8181typedef struct
8282{
8383 MtmMessageCode code ; /* Message code: MSG_READY, MSG_PREPARE, MSG_COMMIT, MSG_ABORT */
84- int node ; /* Sender node ID */
84+ int node ; /* Sender node ID */
8585 TransactionId dxid ; /* Transaction ID at destination node */
8686 TransactionId sxid ; /* Transaction ID at sender node */
8787 csn_t csn ; /* Local CSN in case of sending data from replica to master, global CSN master->replica */
8888 nodemask_t disabledNodeMask ; /* Bitmask of disabled nodes at the sender of message */
8989 csn_t oldestSnapshot ; /* Oldest snapshot used by active transactions at this node */
90+ uint64 seqno ;/* Message sequence number (used to eliminate duplicated messages) */
9091} MtmArbiterMessage ;
9192
9293typedef struct
@@ -112,6 +113,7 @@ static int busy_socket;
112113static void MtmTransSender (Datum arg );
113114static void MtmTransReceiver (Datum arg );
114115static void MtmSendHeartbeat (void );
116+ static bool MtmSendToNode (int node , void const * buf , int size );
115117
116118
117119static char const * const messageText [] =
@@ -248,6 +250,7 @@ static bool MtmWriteSocket(int sd, void const* buf, int size)
248250 if (rc == 1 ) {
249251 int n = send (sd , src , size , 0 );
250252 if (n < 0 ) {
253+ Assert (errno != EINTR ); /* should not happen in non-blocking call */
251254 busy_socket = -1 ;
252255 return false;
253256 }
@@ -266,6 +269,7 @@ static int MtmReadSocket(int sd, void* buf, int buf_size)
266269{
267270 int rc = recv (sd , buf , buf_size , 0 );
268271 if (rc <= 0 ) {
272+ Assert (errno != EINTR ); /* should not happen in non-blocking call */
269273 return -1 ;
270274 }
271275 return rc ;
@@ -346,9 +350,8 @@ static void MtmSendHeartbeat()
346350 {
347351 if (sockets [i ] >= 0 && sockets [i ] != busy_socket && !BIT_CHECK (Mtm -> disabledNodeMask |Mtm -> reconnectMask , i ))
348352 {
349- size_t rc = send (sockets [i ], & msg , sizeof (msg ), 0 );
350- if ((size_t )rc != sizeof (msg )) {
351- elog (LOG , "Failed to send heartbeat to node %d: %d" , i + 1 , errno );
353+ if (!MtmSendToNode (i , & msg , sizeof (msg ))) {
354+ elog (LOG , "Arbiter failed to send heartbeat to node %d" , i + 1 );
352355 }
353356 }
354357 }
@@ -629,6 +632,7 @@ static void MtmAppendBuffer(MtmBuffer* txBuffer, TransactionId xid, int node, Mt
629632 MTM_LOG3 ("Send %s message CSN=%ld to node %d from node %d for global transaction %d/local transaction %d" ,
630633 messageText [ts -> cmd ], ts -> csn , node + 1 , MtmNodeId , ts -> gtid .xid , ts -> xid );
631634 Assert (ts -> cmd != MSG_INVALID );
635+ buf -> data [buf -> used ].seqno = ++ Mtm -> nodes [node ].sendSeqNo ;
632636 buf -> data [buf -> used ].code = ts -> cmd ;
633637 buf -> data [buf -> used ].sxid = ts -> xid ;
634638 buf -> data [buf -> used ].csn = ts -> csn ;
@@ -845,10 +849,17 @@ static void MtmTransReceiver(Datum arg)
845849 elog (WARNING , "Ignore message from dead node %d\n" , msg -> node );
846850 continue ;
847851 }
852+ if (msg -> seqno <= Mtm -> nodes [msg -> node - 1 ].recvSeqNo ) {
853+ elog (WARNING , "Ignore duplicated message %ld from node %d" , msg -> seqno , msg -> node );
854+ continue ;
855+ }
856+ Mtm -> nodes [msg -> node - 1 ].recvSeqNo = msg -> seqno ;
848857
849858 ts = (MtmTransState * )hash_search (MtmXid2State , & msg -> dxid , HASH_FIND , NULL );
850- Assert (ts != NULL );
851-
859+ if (ts == NULL ) {
860+ elog (WARNING , "Ignore response for unexisted transaction %d from node %d" , msg -> dxid , msg -> node );
861+ continue ;
862+ }
852863 if (BIT_CHECK (msg -> disabledNodeMask , MtmNodeId - 1 ) && Mtm -> status != MTM_RECOVERY ) {
853864 elog (PANIC , "Node %d thinks that I was dead: perform hara-kiri not to be a zombie" , msg -> node );
854865 }
0 commit comments