33 * latch.c
44 * Routines for inter-process latches
55 *
6- * The Unix implementation uses the so-called self-pipe trick to overcome the
7- * race condition involved with poll() (or epoll_wait() on linux) and setting
8- * a global flag in the signal handler. When a latch is set and the current
9- * process is waiting for it, the signal handler wakes up the poll() in
10- * WaitLatch by writing a byte to a pipe. A signal by itself doesn't interrupt
11- * poll() on all platforms, and even on platforms where it does, a signal that
12- * arrives just before the poll() call does not prevent poll() from entering
13- * sleep. An incoming byte on a pipe however reliably interrupts the sleep,
14- * and causes poll() to return immediately even if the signal arrives before
15- * poll() begins.
6+ * The poll() implementation uses the so-called self-pipe trick to overcome the
7+ * race condition involved with poll() and setting a global flag in the signal
8+ * handler. When a latch is set and the current process is waiting for it, the
9+ * signal handler wakes up the poll() in WaitLatch by writing a byte to a pipe.
10+ * A signal by itself doesn't interrupt poll() on all platforms, and even on
11+ * platforms where it does, a signal that arrives just before the poll() call
12+ * does not prevent poll() from entering sleep. An incoming byte on a pipe
13+ * however reliably interrupts the sleep, and causes poll() to return
14+ * immediately even if the signal arrives before poll() begins.
1615 *
17- * When SetLatch is called from the same process that owns the latch,
18- * SetLatch writes the byte directly to the pipe. If it's owned by another
19- * process, SIGURG is sent and the signal handler in the waiting process
20- * writes the byte to the pipe on behalf of the signaling process .
16+ * The epoll() implementation overcomes the race with a different technique: it
17+ * keeps SIGURG blocked and consumes from a signalfd() descriptor instead. We
18+ * don't need to register a signal handler or create our own self-pipe. We
19+ * assume that any system that has Linux epoll() also has Linux signalfd() .
2120 *
2221 * The Windows implementation uses Windows events that are inherited by all
2322 * postmaster child processes. There's no need for the self-pipe trick there.
4645#include <poll.h>
4746#endif
4847
48+ #include "libpq/pqsignal.h"
4949#include "miscadmin.h"
5050#include "pgstat.h"
5151#include "port/atomics.h"
7979#error "no wait set implementation available"
8080#endif
8181
82+ #ifdef WAIT_USE_EPOLL
83+ #include <sys/signalfd.h>
84+ #endif
85+
8286/* typedef in latch.h */
8387struct WaitEventSet
8488{
@@ -139,7 +143,14 @@ static WaitEventSet *LatchWaitSet;
139143#ifndef WIN32
140144/* Are we currently in WaitLatch? The signal handler would like to know. */
141145static volatile sig_atomic_t waiting = false;
146+ #endif
142147
148+ #ifdef WAIT_USE_EPOLL
149+ /* On Linux, we'll receive SIGURG via a signalfd file descriptor. */
150+ static int signal_fd = -1 ;
151+ #endif
152+
153+ #if defined(WAIT_USE_POLL ) || defined(WAIT_USE_KQUEUE )
143154/* Read and write ends of the self-pipe */
144155static int selfpipe_readfd = -1 ;
145156static int selfpipe_writefd = -1 ;
@@ -150,8 +161,11 @@ static int selfpipe_owner_pid = 0;
150161/* Private function prototypes */
151162static void latch_sigurg_handler (SIGNAL_ARGS );
152163static void sendSelfPipeByte (void );
153- static void drainSelfPipe (void );
154- #endif /* WIN32 */
164+ #endif
165+
166+ #if defined(WAIT_USE_POLL ) || defined(WAIT_USE_EPOLL )
167+ static void drain (void );
168+ #endif
155169
156170#if defined(WAIT_USE_EPOLL )
157171static void WaitEventAdjustEpoll (WaitEventSet * set , WaitEvent * event , int action );
@@ -175,7 +189,7 @@ static inline int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
175189void
176190InitializeLatchSupport (void )
177191{
178- #ifndef WIN32
192+ #if defined( WAIT_USE_POLL ) || defined( WAIT_USE_KQUEUE )
179193 int pipefd [2 ];
180194
181195 if (IsUnderPostmaster )
@@ -247,8 +261,21 @@ InitializeLatchSupport(void)
247261 ReserveExternalFD ();
248262
249263 pqsignal (SIGURG , latch_sigurg_handler );
250- #else
251- /* currently, nothing to do here for Windows */
264+ #endif
265+
266+ #ifdef WAIT_USE_EPOLL
267+ sigset_t signalfd_mask ;
268+
269+ /* Block SIGURG, because we'll receive it through a signalfd. */
270+ sigaddset (& UnBlockSig , SIGURG );
271+
272+ /* Set up the signalfd to receive SIGURG notifications. */
273+ sigemptyset (& signalfd_mask );
274+ sigaddset (& signalfd_mask , SIGURG );
275+ signal_fd = signalfd (-1 , & signalfd_mask , SFD_NONBLOCK | SFD_CLOEXEC );
276+ if (signal_fd < 0 )
277+ elog (FATAL , "signalfd() failed" );
278+ ReserveExternalFD ();
252279#endif
253280}
254281
@@ -273,19 +300,28 @@ InitializeLatchWaitSet(void)
273300void
274301ShutdownLatchSupport (void )
275302{
303+ #if defined(WAIT_USE_POLL ) || defined(WAIT_USE_KQUEUE )
276304 pqsignal (SIGURG , SIG_IGN );
305+ #endif
277306
278307 if (LatchWaitSet )
279308 {
280309 FreeWaitEventSet (LatchWaitSet );
281310 LatchWaitSet = NULL ;
282311 }
283312
313+ #if defined(WAIT_USE_POLL ) || defined(WAIT_USE_KQUEUE )
284314 close (selfpipe_readfd );
285315 close (selfpipe_writefd );
286316 selfpipe_readfd = -1 ;
287317 selfpipe_writefd = -1 ;
288318 selfpipe_owner_pid = InvalidPid ;
319+ #endif
320+
321+ #if defined(WAIT_USE_EPOLL )
322+ close (signal_fd );
323+ signal_fd = -1 ;
324+ #endif
289325}
290326
291327/*
@@ -299,10 +335,10 @@ InitLatch(Latch *latch)
299335 latch -> owner_pid = MyProcPid ;
300336 latch -> is_shared = false;
301337
302- #ifndef WIN32
338+ #if defined( WAIT_USE_POLL ) || defined( WAIT_USE_KQUEUE )
303339 /* Assert InitializeLatchSupport has been called in this process */
304340 Assert (selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid );
305- #else
341+ #elif defined( WAIT_USE_WIN32 )
306342 latch -> event = CreateEvent (NULL , TRUE, FALSE, NULL );
307343 if (latch -> event == NULL )
308344 elog (ERROR , "CreateEvent failed: error code %lu" , GetLastError ());
@@ -363,7 +399,7 @@ OwnLatch(Latch *latch)
363399 /* Sanity checks */
364400 Assert (latch -> is_shared );
365401
366- #ifndef WIN32
402+ #if defined( WAIT_USE_POLL ) || defined( WAIT_USE_KQUEUE )
367403 /* Assert InitializeLatchSupport has been called in this process */
368404 Assert (selfpipe_readfd >= 0 && selfpipe_owner_pid == MyProcPid );
369405#endif
@@ -550,9 +586,9 @@ SetLatch(Latch *latch)
550586
551587 /*
552588 * See if anyone's waiting for the latch. It can be the current process if
553- * we're in a signal handler. We use the self-pipe to wake up the
554- * poll()/epoll_wait() in that case. If it's another process, send a
555- * signal.
589+ * we're in a signal handler. We use the self-pipe or SIGURG to ourselves
590+ * to wake up WaitEventSetWaitBlock() without races in that case. If it's
591+ * another process, send a signal.
556592 *
557593 * Fetch owner_pid only once, in case the latch is concurrently getting
558594 * owned or disowned. XXX: This assumes that pid_t is atomic, which isn't
@@ -575,11 +611,17 @@ SetLatch(Latch *latch)
575611 return ;
576612 else if (owner_pid == MyProcPid )
577613 {
614+ #if defined(WAIT_USE_POLL ) || defined(WAIT_USE_KQUEUE )
578615 if (waiting )
579616 sendSelfPipeByte ();
617+ #else
618+ if (waiting )
619+ kill (MyProcPid , SIGURG );
620+ #endif
580621 }
581622 else
582623 kill (owner_pid , SIGURG );
624+
583625#else
584626
585627 /*
@@ -856,8 +898,13 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
856898 {
857899 set -> latch = latch ;
858900 set -> latch_pos = event -> pos ;
859- #ifndef WIN32
901+ #if defined( WAIT_USE_POLL ) || defined( WAIT_USE_KQUEUE )
860902 event -> fd = selfpipe_readfd ;
903+ #elif defined(WAIT_USE_EPOLL )
904+ event -> fd = signal_fd ;
905+ #else
906+ event -> fd = PGINVALID_SOCKET ;
907+ return event -> pos ;
861908#endif
862909 }
863910 else if (events == WL_POSTMASTER_DEATH )
@@ -932,12 +979,13 @@ ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
932979 if (latch && latch -> owner_pid != MyProcPid )
933980 elog (ERROR , "cannot wait on a latch owned by another process" );
934981 set -> latch = latch ;
982+
935983 /*
936984 * On Unix, we don't need to modify the kernel object because the
937- * underlying pipe is the same for all latches so we can return
938- * immediately. On Windows, we need to update our array of handles,
939- * but we leave the old one in place and tolerate spurious wakeups if
940- * the latch is disabled.
985+ * underlying pipe (if there is one) is the same for all latches so we
986+ * can return immediately. On Windows, we need to update our array of
987+ * handles, but we leave the old one in place and tolerate spurious
988+ * wakeups if the latch is disabled.
941989 */
942990#if defined(WAIT_USE_WIN32 )
943991 if (!latch )
@@ -1421,8 +1469,8 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
14211469 if (cur_event -> events == WL_LATCH_SET &&
14221470 cur_epoll_event -> events & (EPOLLIN | EPOLLERR | EPOLLHUP ))
14231471 {
1424- /* There's data in the self-pipe, clear it . */
1425- drainSelfPipe ();
1472+ /* Drain the signalfd . */
1473+ drain ();
14261474
14271475 if (set -> latch && set -> latch -> is_set )
14281476 {
@@ -1575,7 +1623,7 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
15751623 cur_kqueue_event -> filter == EVFILT_READ )
15761624 {
15771625 /* There's data in the self-pipe, clear it. */
1578- drainSelfPipe ();
1626+ drain ();
15791627
15801628 if (set -> latch && set -> latch -> is_set )
15811629 {
@@ -1691,7 +1739,7 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
16911739 (cur_pollfd -> revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL )))
16921740 {
16931741 /* There's data in the self-pipe, clear it. */
1694- drainSelfPipe ();
1742+ drain ();
16951743
16961744 if (set -> latch && set -> latch -> is_set )
16971745 {
@@ -1951,7 +1999,8 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
19511999}
19522000#endif
19532001
1954- #ifndef WIN32
2002+ #if defined(WAIT_USE_POLL ) || defined(WAIT_USE_KQUEUE )
2003+
19552004/*
19562005 * SetLatch uses SIGURG to wake up the process waiting on the latch.
19572006 *
@@ -1967,10 +2016,8 @@ latch_sigurg_handler(SIGNAL_ARGS)
19672016
19682017 errno = save_errno ;
19692018}
1970- #endif /* !WIN32 */
19712019
19722020/* Send one byte to the self-pipe, to wake up WaitLatch */
1973- #ifndef WIN32
19742021static void
19752022sendSelfPipeByte (void )
19762023{
@@ -2000,45 +2047,58 @@ sendSelfPipeByte(void)
20002047 return ;
20012048 }
20022049}
2003- #endif /* !WIN32 */
2050+
2051+ #endif
2052+
2053+ #if defined(WAIT_USE_POLL ) || defined(WAIT_USE_EPOLL )
20042054
20052055/*
2006- * Read all available data from the self-pipe
2056+ * Read all available data from self-pipe or signalfd.
20072057 *
20082058 * Note: this is only called when waiting = true. If it fails and doesn't
20092059 * return, it must reset that flag first (though ideally, this will never
20102060 * happen).
20112061 */
2012- #ifndef WIN32
20132062static void
2014- drainSelfPipe (void )
2063+ drain (void )
20152064{
2016- /*
2017- * There shouldn't normally be more than one byte in the pipe, or maybe a
2018- * few bytes if multiple processes run SetLatch at the same instant.
2019- */
2020- char buf [16 ];
2065+ char buf [1024 ];
20212066 int rc ;
2067+ int fd ;
2068+
2069+ #ifdef WAIT_USE_POLL
2070+ fd = selfpipe_readfd ;
2071+ #else
2072+ fd = signal_fd ;
2073+ #endif
20222074
20232075 for (;;)
20242076 {
2025- rc = read (selfpipe_readfd , buf , sizeof (buf ));
2077+ rc = read (fd , buf , sizeof (buf ));
20262078 if (rc < 0 )
20272079 {
20282080 if (errno == EAGAIN || errno == EWOULDBLOCK )
2029- break ; /* the pipe is empty */
2081+ break ; /* the descriptor is empty */
20302082 else if (errno == EINTR )
20312083 continue ; /* retry */
20322084 else
20332085 {
20342086 waiting = false;
2087+ #ifdef WAIT_USE_POLL
20352088 elog (ERROR , "read() on self-pipe failed: %m" );
2089+ #else
2090+ elog (ERROR , "read() on signalfd failed: %m" );
2091+ #endif
20362092 }
20372093 }
20382094 else if (rc == 0 )
20392095 {
20402096 waiting = false;
2097+ #ifdef WAIT_USE_POLL
20412098 elog (ERROR , "unexpected EOF on self-pipe" );
2099+ #else
2100+ elog (ERROR , "unexpected EOF on signalfd" );
2101+ #endif
20422102 }
20432103 else if (rc < sizeof (buf ))
20442104 {
@@ -2048,4 +2108,5 @@ drainSelfPipe(void)
20482108 /* else buffer wasn't big enough, so read again */
20492109 }
20502110}
2051- #endif /* !WIN32 */
2111+
2112+ #endif
0 commit comments