@@ -337,7 +337,7 @@ WalSummarizerMain(char *startup_data, size_t startup_data_len)
337337 *
338338 * If we discover that WAL summarization is not enabled, just exit.
339339 */
340- current_lsn = GetOldestUnsummarizedLSN (& current_tli , & exact , true );
340+ current_lsn = GetOldestUnsummarizedLSN (& current_tli , & exact );
341341 if (XLogRecPtrIsInvalid (current_lsn ))
342342 proc_exit (0 );
343343
@@ -479,46 +479,40 @@ GetWalSummarizerState(TimeLineID *summarized_tli, XLogRecPtr *summarized_lsn,
479479
480480/*
481481 * Get the oldest LSN in this server's timeline history that has not yet been
482- * summarized.
482+ * summarized, and update shared memory state as appropriate .
483483 *
484484 * If *tli != NULL, it will be set to the TLI for the LSN that is returned.
485485 *
486486 * If *lsn_is_exact != NULL, it will be set to true if the returned LSN is
487487 * necessarily the start of a WAL record and false if it's just the beginning
488488 * of a WAL segment.
489- *
490- * If reset_pending_lsn is true, resets the pending_lsn in shared memory to
491- * be equal to the summarized_lsn.
492489 */
493490XLogRecPtr
494- GetOldestUnsummarizedLSN (TimeLineID * tli , bool * lsn_is_exact ,
495- bool reset_pending_lsn )
491+ GetOldestUnsummarizedLSN (TimeLineID * tli , bool * lsn_is_exact )
496492{
497493 TimeLineID latest_tli ;
498- LWLockMode mode = reset_pending_lsn ? LW_EXCLUSIVE : LW_SHARED ;
499494 int n ;
500495 List * tles ;
501496 XLogRecPtr unsummarized_lsn = InvalidXLogRecPtr ;
502497 TimeLineID unsummarized_tli = 0 ;
503498 bool should_make_exact = false;
504499 List * existing_summaries ;
505500 ListCell * lc ;
501+ bool am_wal_summarizer = AmWalSummarizerProcess ();
506502
507503 /* If not summarizing WAL, do nothing. */
508504 if (!summarize_wal )
509505 return InvalidXLogRecPtr ;
510506
511507 /*
512- * Unless we need to reset the pending_lsn, we initially acquire the lock
513- * in shared mode and try to fetch the required information. If we acquire
514- * in shared mode and find that the data structure hasn't been
515- * initialized, we reacquire the lock in exclusive mode so that we can
516- * initialize it. However, if someone else does that first before we get
517- * the lock, then we can just return the requested information after all.
508+ * If we are not the WAL summarizer process, then we normally just want
509+ * to read the values from shared memory. However, as an exception, if
510+ * shared memory hasn't been initialized yet, then we need to do that so
511+ * that we can read legal values and not remove any WAL too early.
518512 */
519- while ( 1 )
513+ if (! am_wal_summarizer )
520514 {
521- LWLockAcquire (WALSummarizerLock , mode );
515+ LWLockAcquire (WALSummarizerLock , LW_SHARED );
522516
523517 if (WalSummarizerCtl -> initialized )
524518 {
@@ -527,27 +521,22 @@ GetOldestUnsummarizedLSN(TimeLineID *tli, bool *lsn_is_exact,
527521 * tli = WalSummarizerCtl -> summarized_tli ;
528522 if (lsn_is_exact != NULL )
529523 * lsn_is_exact = WalSummarizerCtl -> lsn_is_exact ;
530- if (reset_pending_lsn )
531- WalSummarizerCtl -> pending_lsn =
532- WalSummarizerCtl -> summarized_lsn ;
533524 LWLockRelease (WALSummarizerLock );
534525 return unsummarized_lsn ;
535526 }
536527
537- if (mode == LW_EXCLUSIVE )
538- break ;
539-
540528 LWLockRelease (WALSummarizerLock );
541- mode = LW_EXCLUSIVE ;
542529 }
543530
544531 /*
545- * The data structure needs to be initialized, and we are the first to
546- * obtain the lock in exclusive mode, so it's our job to do that
547- * initialization.
532+ * Find the oldest timeline on which WAL still exists, and the earliest
533+ * segment for which it exists.
548534 *
549- * So, find the oldest timeline on which WAL still exists, and the
550- * earliest segment for which it exists.
535+ * Note that we do this every time the WAL summarizer process restarts
536+ * or recovers from an error, in case the contents of pg_wal have changed
537+ * under us e.g. if some files were removed, either manually - which
538+ * shouldn't really happen, but might - or by postgres itself, if
539+ * summarize_wal was turned off and then back on again.
551540 */
552541 (void ) GetLatestLSN (& latest_tli );
553542 tles = readTimeLineHistory (latest_tli );
@@ -568,12 +557,6 @@ GetOldestUnsummarizedLSN(TimeLineID *tli, bool *lsn_is_exact,
568557 }
569558 }
570559
571- /* It really should not be possible for us to find no WAL. */
572- if (unsummarized_tli == 0 )
573- ereport (ERROR ,
574- errcode (ERRCODE_INTERNAL_ERROR ),
575- errmsg_internal ("no WAL found on timeline %u" , latest_tli ));
576-
577560 /*
578561 * Don't try to summarize anything older than the end LSN of the newest
579562 * summary file that exists for this timeline.
@@ -592,12 +575,32 @@ GetOldestUnsummarizedLSN(TimeLineID *tli, bool *lsn_is_exact,
592575 }
593576 }
594577
595- /* Update shared memory with the discovered values. */
596- WalSummarizerCtl -> initialized = true;
597- WalSummarizerCtl -> summarized_lsn = unsummarized_lsn ;
598- WalSummarizerCtl -> summarized_tli = unsummarized_tli ;
599- WalSummarizerCtl -> lsn_is_exact = should_make_exact ;
600- WalSummarizerCtl -> pending_lsn = unsummarized_lsn ;
578+ /* It really should not be possible for us to find no WAL. */
579+ if (unsummarized_tli == 0 )
580+ ereport (ERROR ,
581+ errcode (ERRCODE_INTERNAL_ERROR ),
582+ errmsg_internal ("no WAL found on timeline %u" , latest_tli ));
583+
584+ /*
585+ * If we're the WAL summarizer, we always want to store the values we
586+ * just computed into shared memory, because those are the values we're
587+ * going to use to drive our operation, and so they are the authoritative
588+ * values. Otherwise, we only store values into shared memory if shared
589+ * memory is uninitialized. Our values are not canonical in such a case,
590+ * but it's better to have something than nothing, to guide WAL
591+ * retention.
592+ */
593+ LWLockAcquire (WALSummarizerLock , LW_EXCLUSIVE );
594+ if (am_wal_summarizer || !WalSummarizerCtl -> initialized )
595+ {
596+ WalSummarizerCtl -> initialized = true;
597+ WalSummarizerCtl -> summarized_lsn = unsummarized_lsn ;
598+ WalSummarizerCtl -> summarized_tli = unsummarized_tli ;
599+ WalSummarizerCtl -> lsn_is_exact = should_make_exact ;
600+ WalSummarizerCtl -> pending_lsn = unsummarized_lsn ;
601+ }
602+ else
603+ unsummarized_lsn = WalSummarizerCtl -> summarized_lsn ;
601604
602605 /* Also return the to the caller as required. */
603606 if (tli != NULL )
0 commit comments