@@ -53,18 +53,29 @@ static timeout_params *volatile active_timeouts[MAX_TIMEOUTS];
5353
5454/*
5555 * Flag controlling whether the signal handler is allowed to do anything.
56- * We leave this "false" when we're not expecting interrupts, just in case.
56+ * This is useful to avoid race conditions with the handler. Note in
57+ * particular that this lets us make changes in the data structures without
58+ * tediously disabling and re-enabling the timer signal. Most of the time,
59+ * no interrupt would happen anyway during such critical sections, but if
60+ * one does, this rule ensures it's safe. Leaving the signal enabled across
61+ * multiple operations can greatly reduce the number of kernel calls we make,
62+ * too. See comments in schedule_alarm() about that.
5763 *
58- * Note that we don't bother to reset any pending timer interrupt when we
59- * disable the signal handler; it's not really worth the cycles to do so,
60- * since the probability of the interrupt actually occurring while we have
61- * it disabled is low. See comments in schedule_alarm() about that.
64+ * We leave this "false" when we're not expecting interrupts, just in case.
6265 */
6366static volatile sig_atomic_t alarm_enabled = false;
6467
6568#define disable_alarm () (alarm_enabled = false)
6669#define enable_alarm () (alarm_enabled = true)
6770
71+ /*
72+ * State recording if and when we next expect the interrupt to fire.
73+ * Note that the signal handler will unconditionally reset signal_pending to
74+ * false, so that can change asynchronously even when alarm_enabled is false.
75+ */
76+ static volatile sig_atomic_t signal_pending = false;
77+ static TimestampTz signal_due_at = 0 ; /* valid only when signal_pending */
78+
6879
6980/*****************************************************************************
7081 * Internal helper functions
@@ -185,29 +196,50 @@ enable_timeout(TimeoutId id, TimestampTz now, TimestampTz fin_time)
185196 * Schedule alarm for the next active timeout, if any
186197 *
187198 * We assume the caller has obtained the current time, or a close-enough
188- * approximation.
199+ * approximation. (It's okay if a tick or two has passed since "now", or
200+ * if a little more time elapses before we reach the kernel call; that will
201+ * cause us to ask for an interrupt a tick or two later than the nearest
202+ * timeout, which is no big deal. Passing a "now" value that's in the future
203+ * would be bad though.)
189204 */
190205static void
191206schedule_alarm (TimestampTz now )
192207{
193208 if (num_active_timeouts > 0 )
194209 {
195210 struct itimerval timeval ;
211+ TimestampTz nearest_timeout ;
196212 long secs ;
197213 int usecs ;
198214
199215 MemSet (& timeval , 0 , sizeof (struct itimerval ));
200216
201- /* Get the time remaining till the nearest pending timeout */
202- TimestampDifference (now , active_timeouts [0 ]-> fin_time ,
203- & secs , & usecs );
204-
205217 /*
206- * It's possible that the difference is less than a microsecond;
207- * ensure we don't cancel, rather than set, the interrupt.
218+ * Get the time remaining till the nearest pending timeout. If it is
219+ * negative, assume that we somehow missed an interrupt, and force
220+ * signal_pending off. This gives us a chance to recover if the
221+ * kernel drops a timeout request for some reason.
208222 */
209- if (secs == 0 && usecs == 0 )
223+ nearest_timeout = active_timeouts [0 ]-> fin_time ;
224+ if (now > nearest_timeout )
225+ {
226+ signal_pending = false;
227+ /* force an interrupt as soon as possible */
228+ secs = 0 ;
210229 usecs = 1 ;
230+ }
231+ else
232+ {
233+ TimestampDifference (now , nearest_timeout ,
234+ & secs , & usecs );
235+
236+ /*
237+ * It's possible that the difference is less than a microsecond;
238+ * ensure we don't cancel, rather than set, the interrupt.
239+ */
240+ if (secs == 0 && usecs == 0 )
241+ usecs = 1 ;
242+ }
211243
212244 timeval .it_value .tv_sec = secs ;
213245 timeval .it_value .tv_usec = usecs ;
@@ -218,7 +250,7 @@ schedule_alarm(TimestampTz now)
218250 * interrupt could occur before we can set alarm_enabled, so that the
219251 * signal handler would fail to do anything.
220252 *
221- * Because we didn't bother to reset the timer in disable_alarm(),
253+ * Because we didn't bother to disable the timer in disable_alarm(),
222254 * it's possible that a previously-set interrupt will fire between
223255 * enable_alarm() and setitimer(). This is safe, however. There are
224256 * two possible outcomes:
@@ -244,9 +276,53 @@ schedule_alarm(TimestampTz now)
244276 */
245277 enable_alarm ();
246278
279+ /*
280+ * If there is already an interrupt pending that's at or before the
281+ * needed time, we need not do anything more. The signal handler will
282+ * do the right thing in the first case, and re-schedule the interrupt
283+ * for later in the second case. It might seem that the extra
284+ * interrupt is wasted work, but it's not terribly much work, and this
285+ * method has very significant advantages in the common use-case where
286+ * we repeatedly set a timeout that we don't expect to reach and then
287+ * cancel it. Instead of invoking setitimer() every time the timeout
288+ * is set or canceled, we perform one interrupt and a re-scheduling
289+ * setitimer() call at intervals roughly equal to the timeout delay.
290+ * For example, with statement_timeout = 1s and a throughput of
291+ * thousands of queries per second, this method requires an interrupt
292+ * and setitimer() call roughly once a second, rather than thousands
293+ * of setitimer() calls per second.
294+ *
295+ * Because of the possible passage of time between when we obtained
296+ * "now" and when we reach setitimer(), the kernel's opinion of when
297+ * to trigger the interrupt is likely to be a bit later than
298+ * signal_due_at. That's fine, for the same reasons described above.
299+ */
300+ if (signal_pending && nearest_timeout >= signal_due_at )
301+ return ;
302+
303+ /*
304+ * As with calling enable_alarm(), we must set signal_pending *before*
305+ * calling setitimer(); if we did it after, the signal handler could
306+ * trigger before we set it, leaving us with a false opinion that a
307+ * signal is still coming.
308+ *
309+ * Other race conditions involved with setting/checking signal_pending
310+ * are okay, for the reasons described above.
311+ */
312+ signal_due_at = nearest_timeout ;
313+ signal_pending = true;
314+
247315 /* Set the alarm timer */
248316 if (setitimer (ITIMER_REAL , & timeval , NULL ) != 0 )
317+ {
318+ /*
319+ * Clearing signal_pending here is a bit pro forma, but not
320+ * entirely so, since something in the FATAL exit path could try
321+ * to use timeout facilities.
322+ */
323+ signal_pending = false;
249324 elog (FATAL , "could not enable SIGALRM timer: %m" );
325+ }
250326 }
251327}
252328
@@ -279,6 +355,12 @@ handle_sig_alarm(SIGNAL_ARGS)
279355 */
280356 SetLatch (MyLatch );
281357
358+ /*
359+ * Always reset signal_pending, even if !alarm_enabled, since indeed no
360+ * signal is now pending.
361+ */
362+ signal_pending = false;
363+
282364 /*
283365 * Fire any pending timeouts, but only if we're enabled to do so.
284366 */
@@ -591,7 +673,7 @@ disable_timeouts(const DisableTimeoutParams *timeouts, int count)
591673}
592674
593675/*
594- * Disable SIGALRM and remove all timeouts from the active list,
676+ * Disable the signal handler, remove all timeouts from the active list,
595677 * and optionally reset their timeout indicators.
596678 */
597679void
@@ -602,18 +684,10 @@ disable_all_timeouts(bool keep_indicators)
602684 disable_alarm ();
603685
604686 /*
605- * Only bother to reset the timer if we think it's active. We could just
606- * let the interrupt happen anyway, but it's probably a bit cheaper to do
607- * setitimer() than to let the useless interrupt happen .
687+ * We used to disable the timer interrupt here, but in common usage
688+ * patterns it's cheaper to leave it enabled; that may save us from having
689+ * to enable it again shortly. See comments in schedule_alarm() .
608690 */
609- if (num_active_timeouts > 0 )
610- {
611- struct itimerval timeval ;
612-
613- MemSet (& timeval , 0 , sizeof (struct itimerval ));
614- if (setitimer (ITIMER_REAL , & timeval , NULL ) != 0 )
615- elog (FATAL , "could not disable SIGALRM timer: %m" );
616- }
617691
618692 num_active_timeouts = 0 ;
619693
0 commit comments