@@ -442,11 +442,21 @@ typedef struct XLogwrtResult
442442 * the WAL record is just copied to the page and the lock is released. But
443443 * to avoid the deadlock-scenario explained above, the indicator is always
444444 * updated before sleeping while holding an insertion lock.
445+ *
446+ * lastImportantAt contains the LSN of the last important WAL record inserted
447+ * using a given lock. This value is used to detect if there has been
448+ * important WAL activity since the last time some action, like a checkpoint,
449+ * was performed - allowing to not repeat the action if not. The LSN is
450+ * updated for all insertions, unless the XLOG_MARK_UNIMPORTANT flag was
451+ * set. lastImportantAt is never cleared, only overwritten by the LSN of newer
452+ * records. Tracking the WAL activity directly in WALInsertLock has the
453+ * advantage of not needing any additional locks to update the value.
445454 */
446455typedef struct
447456{
448457 LWLock lock ;
449458 XLogRecPtr insertingAt ;
459+ XLogRecPtr lastImportantAt ;
450460} WALInsertLock ;
451461
452462/*
@@ -541,8 +551,9 @@ typedef struct XLogCtlData
541551 XLogRecPtr unloggedLSN ;
542552 slock_t ulsn_lck ;
543553
544- /* Time of last xlog segment switch. Protected by WALWriteLock. */
554+ /* Time and LSN of last xlog segment switch. Protected by WALWriteLock. */
545555 pg_time_t lastSegSwitchTime ;
556+ XLogRecPtr lastSegSwitchLSN ;
546557
547558 /*
548559 * Protected by info_lck and WALWriteLock (you must hold either lock to
@@ -884,6 +895,9 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt);
884895 * which pages need a full-page image, and retry. If fpw_lsn is invalid, the
885896 * record is always inserted.
886897 *
898+ * 'flags' gives more in-depth control on the record being inserted. See
899+ * XLogSetRecordFlags() for details.
900+ *
887901 * The first XLogRecData in the chain must be for the record header, and its
888902 * data must be MAXALIGNed. XLogInsertRecord fills in the xl_prev and
889903 * xl_crc fields in the header, the rest of the header must already be filled
@@ -896,7 +910,9 @@ static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt);
896910 * WAL rule "write the log before the data".)
897911 */
898912XLogRecPtr
899- XLogInsertRecord (XLogRecData * rdata , XLogRecPtr fpw_lsn )
913+ XLogInsertRecord (XLogRecData * rdata ,
914+ XLogRecPtr fpw_lsn ,
915+ uint8 flags )
900916{
901917 XLogCtlInsert * Insert = & XLogCtl -> Insert ;
902918 pg_crc32c rdata_crc ;
@@ -1013,6 +1029,18 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
10131029 */
10141030 CopyXLogRecordToWAL (rechdr -> xl_tot_len , isLogSwitch , rdata ,
10151031 StartPos , EndPos );
1032+
1033+ /*
1034+ * Unless record is flagged as not important, update LSN of last
1035+ * important record in the current slot. When holding all locks, just
1036+ * update the first one.
1037+ */
1038+ if ((flags & XLOG_MARK_UNIMPORTANT ) == 0 )
1039+ {
1040+ int lockno = holdingAllLocks ? 0 : MyLockNo ;
1041+
1042+ WALInsertLocks [lockno ].l .lastImportantAt = StartPos ;
1043+ }
10161044 }
10171045 else
10181046 {
@@ -2332,6 +2360,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
23322360 XLogArchiveNotifySeg (openLogSegNo );
23332361
23342362 XLogCtl -> lastSegSwitchTime = (pg_time_t ) time (NULL );
2363+ XLogCtl -> lastSegSwitchLSN = LogwrtResult .Flush ;
23352364
23362365 /*
23372366 * Request a checkpoint if we've consumed too much xlog since
@@ -4715,6 +4744,7 @@ XLOGShmemInit(void)
47154744 {
47164745 LWLockInitialize (& WALInsertLocks [i ].l .lock , LWTRANCHE_WAL_INSERT );
47174746 WALInsertLocks [i ].l .insertingAt = InvalidXLogRecPtr ;
4747+ WALInsertLocks [i ].l .lastImportantAt = InvalidXLogRecPtr ;
47184748 }
47194749
47204750 /*
@@ -7431,8 +7461,9 @@ StartupXLOG(void)
74317461 */
74327462 InRecovery = false;
74337463
7434- /* start the archive_timeout timer running */
7464+ /* start the archive_timeout timer and LSN running */
74357465 XLogCtl -> lastSegSwitchTime = (pg_time_t ) time (NULL );
7466+ XLogCtl -> lastSegSwitchLSN = EndOfLog ;
74367467
74377468 /* also initialize latestCompletedXid, to nextXid - 1 */
74387469 LWLockAcquire (ProcArrayLock , LW_EXCLUSIVE );
@@ -7994,16 +8025,51 @@ GetFlushRecPtr(void)
79948025}
79958026
79968027/*
7997- * Get the time of the last xlog segment switch
8028+ * GetLastImportantRecPtr -- Returns the LSN of the last important record
8029+ * inserted. All records not explicitly marked as unimportant are considered
8030+ * important.
8031+ *
8032+ * The LSN is determined by computing the maximum of
8033+ * WALInsertLocks[i].lastImportantAt.
8034+ */
8035+ XLogRecPtr
8036+ GetLastImportantRecPtr (void )
8037+ {
8038+ XLogRecPtr res = InvalidXLogRecPtr ;
8039+ int i ;
8040+
8041+ for (i = 0 ; i < NUM_XLOGINSERT_LOCKS ; i ++ )
8042+ {
8043+ XLogRecPtr last_important ;
8044+
8045+ /*
8046+ * Need to take a lock to prevent torn reads of the LSN, which are
8047+ * possible on some of the supported platforms. WAL insert locks only
8048+ * support exclusive mode, so we have to use that.
8049+ */
8050+ LWLockAcquire (& WALInsertLocks [i ].l .lock , LW_EXCLUSIVE );
8051+ last_important = WALInsertLocks [i ].l .lastImportantAt ;
8052+ LWLockRelease (& WALInsertLocks [i ].l .lock );
8053+
8054+ if (res < last_important )
8055+ res = last_important ;
8056+ }
8057+
8058+ return res ;
8059+ }
8060+
8061+ /*
8062+ * Get the time and LSN of the last xlog segment switch
79988063 */
79998064pg_time_t
8000- GetLastSegSwitchTime ( void )
8065+ GetLastSegSwitchData ( XLogRecPtr * lastSwitchLSN )
80018066{
80028067 pg_time_t result ;
80038068
80048069 /* Need WALWriteLock, but shared lock is sufficient */
80058070 LWLockAcquire (WALWriteLock , LW_SHARED );
80068071 result = XLogCtl -> lastSegSwitchTime ;
8072+ * lastSwitchLSN = XLogCtl -> lastSegSwitchLSN ;
80078073 LWLockRelease (WALWriteLock );
80088074
80098075 return result ;
@@ -8065,7 +8131,7 @@ ShutdownXLOG(int code, Datum arg)
80658131 * record will go to the next XLOG file and won't be archived (yet).
80668132 */
80678133 if (XLogArchivingActive () && XLogArchiveCommandSet ())
8068- RequestXLogSwitch ();
8134+ RequestXLogSwitch (false );
80698135
80708136 CreateCheckPoint (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE );
80718137 }
@@ -8253,7 +8319,7 @@ CreateCheckPoint(int flags)
82538319 uint32 freespace ;
82548320 XLogRecPtr PriorRedoPtr ;
82558321 XLogRecPtr curInsert ;
8256- XLogRecPtr prevPtr ;
8322+ XLogRecPtr last_important_lsn ;
82578323 VirtualTransactionId * vxids ;
82588324 int nvxids ;
82598325
@@ -8333,39 +8399,34 @@ CreateCheckPoint(int flags)
83338399 else
83348400 checkPoint .oldestActiveXid = InvalidTransactionId ;
83358401
8402+ /*
8403+ * Get location of last important record before acquiring insert locks (as
8404+ * GetLastImportantRecPtr() also locks WAL locks).
8405+ */
8406+ last_important_lsn = GetLastImportantRecPtr ();
8407+
83368408 /*
83378409 * We must block concurrent insertions while examining insert state to
83388410 * determine the checkpoint REDO pointer.
83398411 */
83408412 WALInsertLockAcquireExclusive ();
83418413 curInsert = XLogBytePosToRecPtr (Insert -> CurrBytePos );
8342- prevPtr = XLogBytePosToRecPtr (Insert -> PrevBytePos );
83438414
83448415 /*
8345- * If this isn't a shutdown or forced checkpoint, and we have not inserted
8346- * any XLOG records since the start of the last checkpoint, skip the
8347- * checkpoint. The idea here is to avoid inserting duplicate checkpoints
8348- * when the system is idle. That wastes log space, and more importantly it
8349- * exposes us to possible loss of both current and previous checkpoint
8350- * records if the machine crashes just as we're writing the update.
8351- * (Perhaps it'd make even more sense to checkpoint only when the previous
8352- * checkpoint record is in a different xlog page?)
8353- *
8354- * If the previous checkpoint crossed a WAL segment, however, we create
8355- * the checkpoint anyway, to have the latest checkpoint fully contained in
8356- * the new segment. This is for a little bit of extra robustness: it's
8357- * better if you don't need to keep two WAL segments around to recover the
8358- * checkpoint.
8416+ * If this isn't a shutdown or forced checkpoint, and if there has been no
8417+ * WAL activity requiring a checkpoint, skip it. The idea here is to
8418+ * avoid inserting duplicate checkpoints when the system is idle.
83598419 */
83608420 if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY |
83618421 CHECKPOINT_FORCE )) == 0 )
83628422 {
8363- if (prevPtr == ControlFile -> checkPointCopy .redo &&
8364- prevPtr / XLOG_SEG_SIZE == curInsert / XLOG_SEG_SIZE )
8423+ if (last_important_lsn == ControlFile -> checkPoint )
83658424 {
83668425 WALInsertLockRelease ();
83678426 LWLockRelease (CheckpointLock );
83688427 END_CRIT_SECTION ();
8428+ ereport (DEBUG1 ,
8429+ (errmsg ("checkpoint skipped due to an idle system" )));
83698430 return ;
83708431 }
83718432 }
@@ -9122,12 +9183,15 @@ XLogPutNextOid(Oid nextOid)
91229183 * write a switch record because we are already at segment start.
91239184 */
91249185XLogRecPtr
9125- RequestXLogSwitch (void )
9186+ RequestXLogSwitch (bool mark_unimportant )
91269187{
91279188 XLogRecPtr RecPtr ;
91289189
91299190 /* XLOG SWITCH has no data */
91309191 XLogBeginInsert ();
9192+
9193+ if (mark_unimportant )
9194+ XLogSetRecordFlags (XLOG_MARK_UNIMPORTANT );
91319195 RecPtr = XLogInsert (RM_XLOG_ID , XLOG_SWITCH );
91329196
91339197 return RecPtr ;
@@ -9997,7 +10061,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
999710061 * recovery case described above.
999810062 */
999910063 if (!backup_started_in_recovery )
10000- RequestXLogSwitch ();
10064+ RequestXLogSwitch (false );
1000110065
1000210066 do
1000310067 {
@@ -10582,7 +10646,7 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
1058210646 * Force a switch to a new xlog segment file, so that the backup is valid
1058310647 * as soon as archiver moves out the current segment file.
1058410648 */
10585- RequestXLogSwitch ();
10649+ RequestXLogSwitch (false );
1058610650
1058710651 XLByteToPrevSeg (stoppoint , _logSegNo );
1058810652 XLogFileName (stopxlogfilename , ThisTimeLineID , _logSegNo );
0 commit comments