5959
6060#ifndef USE_EPOLL
6161#ifdef __linux__
62- #define USE_EPOLL 1
62+ #define USE_EPOLL 0
6363#else
6464#define USE_EPOLL 0
6565#endif
@@ -105,7 +105,7 @@ typedef struct
105105static int * sockets ;
106106static int gateway ;
107107static bool send_heartbeat ;
108- static timestamp_t last_sent_hearbeat ;
108+ static timestamp_t last_sent_heartbeat ;
109109static TimeoutId heartbeat_timer ;
110110static int busy_socket ;
111111
@@ -266,17 +266,20 @@ static bool MtmWriteSocket(int sd, void const* buf, int size)
266266
267267static int MtmReadSocket (int sd , void * buf , int buf_size )
268268{
269- int rc = MtmWaitSocket (sd , false, MtmHeartbeatSendTimeout );
270- if (rc == 1 ) {
271- int rc = recv (sd , buf , buf_size , 0 );
272- if (rc <= 0 ) {
273- Assert (errno != EINTR ); /* should not happen in non-blocking call */
274- return -1 ;
269+ int rc = recv (sd , buf , buf_size , 0 );
270+ if (rc < 0 && errno == EAGAIN ) {
271+ rc = MtmWaitSocket (sd , false, MtmHeartbeatSendTimeout );
272+ if (rc == 1 ) {
273+ rc = recv (sd , buf , buf_size , 0 );
274+ if (rc < 0 ) {
275+ Assert (errno != EINTR ); /* should not happen in non-blocking call */
276+ return -1 ;
277+ }
278+ } else {
279+ return 0 ;
275280 }
276- return rc ;
277- } else {
278- return 0 ;
279281 }
282+ return rc ;
280283}
281284
282285
@@ -343,7 +346,7 @@ static void MtmCheckResponse(MtmArbiterMessage* resp)
343346
344347static void MtmScheduleHeartbeat ()
345348{
346- // Assert(!last_sent_hearbeat || last_sent_hearbeat + MSEC_TO_USEC(MtmHeartbeatRecvTimeout) >= MtmGetSystemTime());
349+ // Assert(!last_sent_heartbeat || last_sent_heartbeat + MSEC_TO_USEC(MtmHeartbeatRecvTimeout) >= MtmGetSystemTime());
347350 enable_timeout_after (heartbeat_timer , MtmHeartbeatSendTimeout );
348351 send_heartbeat = true;
349352 PGSemaphoreUnlock (& Mtm -> votingSemaphore );
@@ -353,11 +356,16 @@ static void MtmSendHeartbeat()
353356{
354357 int i ;
355358 MtmArbiterMessage msg ;
359+ timestamp_t now = MtmGetSystemTime ();
356360 msg .code = MSG_HEARTBEAT ;
357361 msg .disabledNodeMask = Mtm -> disabledNodeMask ;
358362 msg .oldestSnapshot = Mtm -> nodes [MtmNodeId - 1 ].oldestSnapshot ;
359363 msg .node = MtmNodeId ;
360- last_sent_hearbeat = MtmGetSystemTime ();
364+ msg .csn = now ;
365+ if (last_sent_heartbeat + MSEC_TO_USEC (MtmHeartbeatSendTimeout )* 2 < now ) {
366+ MTM_LOG1 ("More than %ld microseconds since last heartbeat" , now - last_sent_heartbeat );
367+ }
368+ last_sent_heartbeat = now ;
361369
362370 for (i = 0 ; i < Mtm -> nAllNodes ; i ++ )
363371 {
@@ -366,6 +374,8 @@ static void MtmSendHeartbeat()
366374 {
367375 if (!MtmSendToNode (i , & msg , sizeof (msg ))) {
368376 elog (LOG , "Arbiter failed to send heartbeat to node %d" , i + 1 );
377+ } else {
378+ MTM_LOG1 ("Send heartbeat to node %d with timestamp %ld" , i + 1 , now );
369379 }
370380 }
371381 }
@@ -558,7 +568,7 @@ static bool MtmSendToNode(int node, void const* buf, int size)
558568static int MtmReadFromNode (int node , void * buf , int buf_size )
559569{
560570 int rc = MtmReadSocket (sockets [node ], buf , buf_size );
561- if (rc <= 0 ) {
571+ if (rc < 0 ) {
562572 elog (WARNING , "Arbiter failed to read from node=%d, rc=%d, errno=%d" , node + 1 , rc , errno );
563573 MtmDisconnect (node );
564574 }
@@ -812,6 +822,8 @@ static void MtmTransReceiver(Datum arg)
812822 }
813823
814824 while (!stop ) {
825+ timestamp_t startPolling = MtmGetSystemTime ();
826+ timestamp_t stopPolling ;
815827#if USE_EPOLL
816828 n = epoll_wait (epollfd , events , nNodes , MtmHeartbeatRecvTimeout );
817829 if (n < 0 ) {
@@ -820,13 +832,17 @@ static void MtmTransReceiver(Datum arg)
820832 }
821833 elog (ERROR , "Arbiter failed to poll sockets: %d" , errno );
822834 }
835+ stopPolling = MtmGetSystemTime ();
836+
823837 for (j = 0 ; j < n ; j ++ ) {
824838 i = events [j ].data .u32 ;
825839 if (events [j ].events & EPOLLERR ) {
826840 elog (WARNING , "Arbiter lost connection with node %d" , i + 1 );
827841 MtmDisconnect (i );
828842 }
829- else if (events [j ].events & EPOLLIN )
843+ }
844+ for (j = 0 ; j < n ; j ++ ) {
845+ if (events [j ].events & EPOLLIN )
830846#else
831847 fd_set events ;
832848 do {
@@ -842,6 +858,8 @@ static void MtmTransReceiver(Datum arg)
842858 if (n < 0 ) {
843859 elog (ERROR , "Arbiter failed to select sockets: %d" , errno );
844860 }
861+ stopPolling = MtmGetSystemTime ();
862+
845863 for (i = 0 ; i < nNodes ; i ++ ) {
846864 if (sockets [i ] >= 0 && FD_ISSET (sockets [i ], & events ))
847865#endif
@@ -871,7 +889,8 @@ static void MtmTransReceiver(Datum arg)
871889 Mtm -> nodes [msg -> node - 1 ].lastHeartbeat = MtmGetSystemTime ();
872890
873891 if (msg -> code == MSG_HEARTBEAT ) {
874- MTM_LOG3 ("Receive HEARTBEAT from node %d at %ld" , msg -> node , USEC_TO_MSEC (MtmGetSystemTime ()));
892+ MTM_LOG1 ("Receive HEARTBEAT from node %d with timestamp %ld delay %ld" ,
893+ msg -> node , msg -> csn , USEC_TO_MSEC (MtmGetSystemTime () - msg -> csn ));
875894 continue ;
876895 }
877896 if (BIT_CHECK (msg -> disabledNodeMask , msg -> node - 1 )) {
@@ -985,7 +1004,14 @@ static void MtmTransReceiver(Datum arg)
9851004 }
9861005 now = MtmGetSystemTime ();
9871006 if (now > lastHeartbeatCheck + MSEC_TO_USEC (MtmHeartbeatRecvTimeout )) {
988- MtmWatchdog ();
1007+ if (!MtmWatchdog (stopPolling )) {
1008+ for (i = 0 ; i < nNodes ; i ++ ) {
1009+ if (Mtm -> nodes [i ].lastHeartbeat != 0 && sockets [i ] >= 0 ) {
1010+ MTM_LOG1 ("Last hearbeat from node %d received %ld microseconds ago" , i + 1 , now - Mtm -> nodes [i ].lastHeartbeat );
1011+ }
1012+ }
1013+ MTM_LOG1 ("epoll started %ld and finished %ld microseconds ago" , now - startPolling , now - stopPolling );
1014+ }
9891015 lastHeartbeatCheck = now ;
9901016 }
9911017 if (n == 0 && Mtm -> disabledNodeMask != 0 ) {
0 commit comments