2828#include <arpa/inet.h>
2929#include <signal.h>
3030#include <time.h>
31+ #ifdef HAVE_POLL_H
32+ #include <poll.h>
33+ #endif
34+ #ifdef HAVE_SYS_POLL_H
35+ #include <sys/poll.h>
36+ #endif
3137
3238#include "pgstat.h"
3339
4955#include "storage/backendid.h"
5056#include "storage/fd.h"
5157#include "storage/ipc.h"
52- #include "storage/latch.h"
5358#include "storage/pg_shmem.h"
59+ #include "storage/pmsignal.h"
5460#include "storage/procsignal.h"
5561#include "utils/ascii.h"
5662#include "utils/guc.h"
8894 * failed statistics collector; in
8995 * seconds. */
9096
97+ #define PGSTAT_SELECT_TIMEOUT 2 /* How often to check for postmaster
98+ * death; in seconds. */
99+
91100#define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY)
92101#define PGSTAT_INQ_LOOP_COUNT (PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY)
93102
@@ -130,8 +139,6 @@ PgStat_MsgBgWriter BgWriterStats;
130139 */
131140NON_EXEC_STATIC pgsocket pgStatSock = PGINVALID_SOCKET ;
132141
133- static Latch pgStatLatch ;
134-
135142static struct sockaddr_storage pgStatAddr ;
136143
137144static time_t last_pgstat_start_time ;
@@ -3002,7 +3009,15 @@ PgstatCollectorMain(int argc, char *argv[])
30023009{
30033010 int len ;
30043011 PgStat_Msg msg ;
3005- int wr ;
3012+
3013+ #ifndef WIN32
3014+ #ifdef HAVE_POLL
3015+ struct pollfd input_fd ;
3016+ #else
3017+ struct timeval sel_timeout ;
3018+ fd_set rfds ;
3019+ #endif
3020+ #endif
30063021
30073022 IsUnderPostmaster = true; /* we are a postmaster subprocess now */
30083023
@@ -3021,13 +3036,9 @@ PgstatCollectorMain(int argc, char *argv[])
30213036 elog (FATAL , "setsid() failed: %m" );
30223037#endif
30233038
3024- /* Initialize private latch for use by signal handlers */
3025- InitLatch (& pgStatLatch );
3026-
30273039 /*
30283040 * Ignore all signals usually bound to some action in the postmaster,
3029- * except SIGHUP and SIGQUIT. Note we don't need a SIGUSR1 handler to
3030- * support latch operations, because pgStatLatch is local not shared.
3041+ * except SIGQUIT.
30313042 */
30323043 pqsignal (SIGHUP , pgstat_sighup_handler );
30333044 pqsignal (SIGINT , SIG_IGN );
@@ -3062,24 +3073,26 @@ PgstatCollectorMain(int argc, char *argv[])
30623073 pgStatRunningInCollector = true;
30633074 pgStatDBHash = pgstat_read_statsfile (InvalidOid , true);
30643075
3076+ /*
3077+ * Setup the descriptor set for select(2). Since only one bit in the set
3078+ * ever changes, we need not repeat FD_ZERO each time.
3079+ */
3080+ #if !defined(HAVE_POLL ) && !defined(WIN32 )
3081+ FD_ZERO (& rfds );
3082+ #endif
3083+
30653084 /*
30663085 * Loop to process messages until we get SIGQUIT or detect ungraceful
30673086 * death of our parent postmaster.
30683087 *
3069- * For performance reasons, we don't want to do ResetLatch/WaitLatch after
3070- * every message; instead, do that only after a recv() fails to obtain a
3071- * message. (This effectively means that if backends are sending us stuff
3072- * like mad, we won't notice postmaster death until things slack off a
3073- * bit; which seems fine.) To do that, we have an inner loop that
3074- * iterates as long as recv() succeeds. We do recognize got_SIGHUP inside
3075- * the inner loop, which means that such interrupts will get serviced but
3076- * the latch won't get cleared until next time there is a break in the
3077- * action.
3088+ * For performance reasons, we don't want to do a PostmasterIsAlive() test
3089+ * after every message; instead, do it only when select()/poll() is
3090+ * interrupted by timeout. In essence, we'll stay alive as long as
3091+ * backends keep sending us stuff often, even if the postmaster is gone.
30783092 */
30793093 for (;;)
30803094 {
3081- /* Clear any already-pending wakeups */
3082- ResetLatch (& pgStatLatch );
3095+ int got_data ;
30833096
30843097 /*
30853098 * Quit if we get SIGQUIT from the postmaster.
@@ -3088,37 +3101,87 @@ PgstatCollectorMain(int argc, char *argv[])
30883101 break ;
30893102
30903103 /*
3091- * Inner loop iterates as long as we keep getting messages, or until
3092- * need_exit becomes set.
3104+ * Reload configuration if we got SIGHUP from the postmaster.
30933105 */
3094- while (! need_exit )
3106+ if ( got_SIGHUP )
30953107 {
3096- /*
3097- * Reload configuration if we got SIGHUP from the postmaster.
3098- */
3099- if (got_SIGHUP )
3100- {
3101- got_SIGHUP = false;
3102- ProcessConfigFile (PGC_SIGHUP );
3103- }
3108+ ProcessConfigFile (PGC_SIGHUP );
3109+ got_SIGHUP = false;
3110+ }
31043111
3105- /*
3106- * Write the stats file if a new request has arrived that is not
3107- * satisfied by existing file.
3108- */
3109- if (last_statwrite < last_statrequest )
3110- pgstat_write_statsfile (false);
3112+ /*
3113+ * Write the stats file if a new request has arrived that is not
3114+ * satisfied by existing file.
3115+ */
3116+ if (last_statwrite < last_statrequest )
3117+ pgstat_write_statsfile (false);
31113118
3112- /*
3113- * Try to receive and process a message. This will not block,
3114- * since the socket is set to non-blocking mode.
3115- */
3119+ /*
3120+ * Wait for a message to arrive; but not for more than
3121+ * PGSTAT_SELECT_TIMEOUT seconds. (This determines how quickly we will
3122+ * shut down after an ungraceful postmaster termination; so it needn't
3123+ * be very fast. However, on some systems SIGQUIT won't interrupt the
3124+ * poll/select call, so this also limits speed of response to SIGQUIT,
3125+ * which is more important.)
3126+ *
3127+ * We use poll(2) if available, otherwise select(2). Win32 has its own
3128+ * implementation.
3129+ */
3130+ #ifndef WIN32
3131+ #ifdef HAVE_POLL
3132+ input_fd .fd = pgStatSock ;
3133+ input_fd .events = POLLIN | POLLERR ;
3134+ input_fd .revents = 0 ;
3135+
3136+ if (poll (& input_fd , 1 , PGSTAT_SELECT_TIMEOUT * 1000 ) < 0 )
3137+ {
3138+ if (errno == EINTR )
3139+ continue ;
3140+ ereport (ERROR ,
3141+ (errcode_for_socket_access (),
3142+ errmsg ("poll() failed in statistics collector: %m" )));
3143+ }
3144+
3145+ got_data = (input_fd .revents != 0 );
3146+ #else /* !HAVE_POLL */
3147+
3148+ FD_SET (pgStatSock , & rfds );
3149+
3150+ /*
3151+ * timeout struct is modified by select() on some operating systems,
3152+ * so re-fill it each time.
3153+ */
3154+ sel_timeout .tv_sec = PGSTAT_SELECT_TIMEOUT ;
3155+ sel_timeout .tv_usec = 0 ;
3156+
3157+ if (select (pgStatSock + 1 , & rfds , NULL , NULL , & sel_timeout ) < 0 )
3158+ {
3159+ if (errno == EINTR )
3160+ continue ;
3161+ ereport (ERROR ,
3162+ (errcode_for_socket_access (),
3163+ errmsg ("select() failed in statistics collector: %m" )));
3164+ }
3165+
3166+ got_data = FD_ISSET (pgStatSock , & rfds );
3167+ #endif /* HAVE_POLL */
3168+ #else /* WIN32 */
3169+ got_data = pgwin32_waitforsinglesocket (pgStatSock , FD_READ ,
3170+ PGSTAT_SELECT_TIMEOUT * 1000 );
3171+ #endif
3172+
3173+ /*
3174+ * If there is a message on the socket, read it and check for
3175+ * validity.
3176+ */
3177+ if (got_data )
3178+ {
31163179 len = recv (pgStatSock , (char * ) & msg ,
31173180 sizeof (PgStat_Msg ), 0 );
31183181 if (len < 0 )
31193182 {
3120- if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR )
3121- break ; /* out of inner loop */
3183+ if (errno == EINTR )
3184+ continue ;
31223185 ereport (ERROR ,
31233186 (errcode_for_socket_access (),
31243187 errmsg ("could not read statistics message: %m" )));
@@ -3216,21 +3279,17 @@ PgstatCollectorMain(int argc, char *argv[])
32163279 default :
32173280 break ;
32183281 }
3219- } /* end of inner message-processing loop */
3220-
3221- /* Sleep until there's something to do */
3222- wr = WaitLatchOrSocket (& pgStatLatch ,
3223- WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE ,
3224- pgStatSock ,
3225- -1L );
3226-
3227- /*
3228- * Emergency bailout if postmaster has died. This is to avoid the
3229- * necessity for manual cleanup of all postmaster children.
3230- */
3231- if (wr & WL_POSTMASTER_DEATH )
3232- break ;
3233- } /* end of outer loop */
3282+ }
3283+ else
3284+ {
3285+ /*
3286+ * We can only get here if the select/poll timeout elapsed. Check
3287+ * for postmaster death.
3288+ */
3289+ if (!PostmasterIsAlive ())
3290+ break ;
3291+ }
3292+ } /* end of message-processing loop */
32343293
32353294 /*
32363295 * Save the final stats to reuse at next startup.
@@ -3245,24 +3304,14 @@ PgstatCollectorMain(int argc, char *argv[])
32453304static void
32463305pgstat_exit (SIGNAL_ARGS )
32473306{
3248- int save_errno = errno ;
3249-
32503307 need_exit = true;
3251- SetLatch (& pgStatLatch );
3252-
3253- errno = save_errno ;
32543308}
32553309
32563310/* SIGHUP handler for collector process */
32573311static void
32583312pgstat_sighup_handler (SIGNAL_ARGS )
32593313{
3260- int save_errno = errno ;
3261-
32623314 got_SIGHUP = true;
3263- SetLatch (& pgStatLatch );
3264-
3265- errno = save_errno ;
32663315}
32673316
32683317
0 commit comments