2828#include <arpa/inet.h>
2929#include <signal.h>
3030#include <time.h>
31- #ifdef HAVE_POLL_H
32- #include <poll.h>
33- #endif
34- #ifdef HAVE_SYS_POLL_H
35- #include <sys/poll.h>
36- #endif
3731
3832#include "pgstat.h"
3933
5549#include "storage/backendid.h"
5650#include "storage/fd.h"
5751#include "storage/ipc.h"
52+ #include "storage/latch.h"
5853#include "storage/pg_shmem.h"
59- #include "storage/pmsignal.h"
6054#include "storage/procsignal.h"
6155#include "utils/ascii.h"
6256#include "utils/guc.h"
9488 * failed statistics collector; in
9589 * seconds. */
9690
97- #define PGSTAT_SELECT_TIMEOUT 2 /* How often to check for postmaster
98- * death; in seconds. */
99-
10091#define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY)
10192#define PGSTAT_INQ_LOOP_COUNT (PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY)
10293
@@ -139,6 +130,8 @@ PgStat_MsgBgWriter BgWriterStats;
139130 */
140131NON_EXEC_STATIC pgsocket pgStatSock = PGINVALID_SOCKET ;
141132
133+ static Latch pgStatLatch ;
134+
142135static struct sockaddr_storage pgStatAddr ;
143136
144137static time_t last_pgstat_start_time ;
@@ -3009,15 +3002,7 @@ PgstatCollectorMain(int argc, char *argv[])
30093002{
30103003 int len ;
30113004 PgStat_Msg msg ;
3012-
3013- #ifndef WIN32
3014- #ifdef HAVE_POLL
3015- struct pollfd input_fd ;
3016- #else
3017- struct timeval sel_timeout ;
3018- fd_set rfds ;
3019- #endif
3020- #endif
3005+ int wr ;
30213006
30223007 IsUnderPostmaster = true; /* we are a postmaster subprocess now */
30233008
@@ -3036,9 +3021,13 @@ PgstatCollectorMain(int argc, char *argv[])
30363021 elog (FATAL , "setsid() failed: %m" );
30373022#endif
30383023
3024+ /* Initialize private latch for use by signal handlers */
3025+ InitLatch (& pgStatLatch );
3026+
30393027 /*
30403028 * Ignore all signals usually bound to some action in the postmaster,
3041- * except SIGQUIT.
3029+ * except SIGHUP and SIGQUIT. Note we don't need a SIGUSR1 handler to
3030+ * support latch operations, because pgStatLatch is local not shared.
30423031 */
30433032 pqsignal (SIGHUP , pgstat_sighup_handler );
30443033 pqsignal (SIGINT , SIG_IGN );
@@ -3073,26 +3062,24 @@ PgstatCollectorMain(int argc, char *argv[])
30733062 pgStatRunningInCollector = true;
30743063 pgStatDBHash = pgstat_read_statsfile (InvalidOid , true);
30753064
3076- /*
3077- * Setup the descriptor set for select(2). Since only one bit in the set
3078- * ever changes, we need not repeat FD_ZERO each time.
3079- */
3080- #if !defined(HAVE_POLL ) && !defined(WIN32 )
3081- FD_ZERO (& rfds );
3082- #endif
3083-
30843065 /*
30853066 * Loop to process messages until we get SIGQUIT or detect ungraceful
30863067 * death of our parent postmaster.
30873068 *
3088- * For performance reasons, we don't want to do a PostmasterIsAlive() test
3089- * after every message; instead, do it only when select()/poll() is
3090- * interrupted by timeout. In essence, we'll stay alive as long as
3091- * backends keep sending us stuff often, even if the postmaster is gone.
3069+ * For performance reasons, we don't want to do ResetLatch/WaitLatch after
3070+ * every message; instead, do that only after a recv() fails to obtain a
3071+ * message. (This effectively means that if backends are sending us stuff
3072+ * like mad, we won't notice postmaster death until things slack off a
3073+ * bit; which seems fine.) To do that, we have an inner loop that
3074+ * iterates as long as recv() succeeds. We do recognize got_SIGHUP inside
3075+ * the inner loop, which means that such interrupts will get serviced but
3076+ * the latch won't get cleared until next time there is a break in the
3077+ * action.
30923078 */
30933079 for (;;)
30943080 {
3095- int got_data ;
3081+ /* Clear any already-pending wakeups */
3082+ ResetLatch (& pgStatLatch );
30963083
30973084 /*
30983085 * Quit if we get SIGQUIT from the postmaster.
@@ -3101,87 +3088,37 @@ PgstatCollectorMain(int argc, char *argv[])
31013088 break ;
31023089
31033090 /*
3104- * Reload configuration if we got SIGHUP from the postmaster.
3105- */
3106- if (got_SIGHUP )
3107- {
3108- ProcessConfigFile (PGC_SIGHUP );
3109- got_SIGHUP = false;
3110- }
3111-
3112- /*
3113- * Write the stats file if a new request has arrived that is not
3114- * satisfied by existing file.
3091+ * Inner loop iterates as long as we keep getting messages, or until
3092+ * need_exit becomes set.
31153093 */
3116- if (last_statwrite < last_statrequest )
3117- pgstat_write_statsfile (false);
3118-
3119- /*
3120- * Wait for a message to arrive; but not for more than
3121- * PGSTAT_SELECT_TIMEOUT seconds. (This determines how quickly we will
3122- * shut down after an ungraceful postmaster termination; so it needn't
3123- * be very fast. However, on some systems SIGQUIT won't interrupt the
3124- * poll/select call, so this also limits speed of response to SIGQUIT,
3125- * which is more important.)
3126- *
3127- * We use poll(2) if available, otherwise select(2). Win32 has its own
3128- * implementation.
3129- */
3130- #ifndef WIN32
3131- #ifdef HAVE_POLL
3132- input_fd .fd = pgStatSock ;
3133- input_fd .events = POLLIN | POLLERR ;
3134- input_fd .revents = 0 ;
3135-
3136- if (poll (& input_fd , 1 , PGSTAT_SELECT_TIMEOUT * 1000 ) < 0 )
3137- {
3138- if (errno == EINTR )
3139- continue ;
3140- ereport (ERROR ,
3141- (errcode_for_socket_access (),
3142- errmsg ("poll() failed in statistics collector: %m" )));
3143- }
3144-
3145- got_data = (input_fd .revents != 0 );
3146- #else /* !HAVE_POLL */
3147-
3148- FD_SET (pgStatSock , & rfds );
3149-
3150- /*
3151- * timeout struct is modified by select() on some operating systems,
3152- * so re-fill it each time.
3153- */
3154- sel_timeout .tv_sec = PGSTAT_SELECT_TIMEOUT ;
3155- sel_timeout .tv_usec = 0 ;
3156-
3157- if (select (pgStatSock + 1 , & rfds , NULL , NULL , & sel_timeout ) < 0 )
3094+ while (!need_exit )
31583095 {
3159- if (errno == EINTR )
3160- continue ;
3161- ereport (ERROR ,
3162- (errcode_for_socket_access (),
3163- errmsg ("select() failed in statistics collector: %m" )));
3164- }
3096+ /*
3097+ * Reload configuration if we got SIGHUP from the postmaster.
3098+ */
3099+ if (got_SIGHUP )
3100+ {
3101+ got_SIGHUP = false;
3102+ ProcessConfigFile (PGC_SIGHUP );
3103+ }
31653104
3166- got_data = FD_ISSET ( pgStatSock , & rfds );
3167- #endif /* HAVE_POLL */
3168- #else /* WIN32 */
3169- got_data = pgwin32_waitforsinglesocket ( pgStatSock , FD_READ ,
3170- PGSTAT_SELECT_TIMEOUT * 1000 );
3171- #endif
3105+ /*
3106+ * Write the stats file if a new request has arrived that is not
3107+ * satisfied by existing file.
3108+ */
3109+ if ( last_statwrite < last_statrequest )
3110+ pgstat_write_statsfile (false);
31723111
3173- /*
3174- * If there is a message on the socket, read it and check for
3175- * validity.
3176- */
3177- if (got_data )
3178- {
3112+ /*
3113+ * Try to receive and process a message. This will not block,
3114+ * since the socket is set to non-blocking mode.
3115+ */
31793116 len = recv (pgStatSock , (char * ) & msg ,
31803117 sizeof (PgStat_Msg ), 0 );
31813118 if (len < 0 )
31823119 {
3183- if (errno == EINTR )
3184- continue ;
3120+ if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR )
3121+ break ; /* out of inner loop */
31853122 ereport (ERROR ,
31863123 (errcode_for_socket_access (),
31873124 errmsg ("could not read statistics message: %m" )));
@@ -3279,17 +3216,21 @@ PgstatCollectorMain(int argc, char *argv[])
32793216 default :
32803217 break ;
32813218 }
3282- }
3283- else
3284- {
3285- /*
3286- * We can only get here if the select/poll timeout elapsed. Check
3287- * for postmaster death.
3288- */
3289- if (!PostmasterIsAlive ())
3290- break ;
3291- }
3292- } /* end of message-processing loop */
3219+ } /* end of inner message-processing loop */
3220+
3221+ /* Sleep until there's something to do */
3222+ wr = WaitLatchOrSocket (& pgStatLatch ,
3223+ WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE ,
3224+ pgStatSock ,
3225+ -1L );
3226+
3227+ /*
3228+ * Emergency bailout if postmaster has died. This is to avoid the
3229+ * necessity for manual cleanup of all postmaster children.
3230+ */
3231+ if (wr & WL_POSTMASTER_DEATH )
3232+ break ;
3233+ } /* end of outer loop */
32933234
32943235 /*
32953236 * Save the final stats to reuse at next startup.
@@ -3304,14 +3245,24 @@ PgstatCollectorMain(int argc, char *argv[])
33043245static void
33053246pgstat_exit (SIGNAL_ARGS )
33063247{
3248+ int save_errno = errno ;
3249+
33073250 need_exit = true;
3251+ SetLatch (& pgStatLatch );
3252+
3253+ errno = save_errno ;
33083254}
33093255
33103256/* SIGHUP handler for collector process */
33113257static void
33123258pgstat_sighup_handler (SIGNAL_ARGS )
33133259{
3260+ int save_errno = errno ;
3261+
33143262 got_SIGHUP = true;
3263+ SetLatch (& pgStatLatch );
3264+
3265+ errno = save_errno ;
33153266}
33163267
33173268
0 commit comments