6666#define RECOVERY_COMMAND_FILE "recovery.conf"
6767#define RECOVERY_COMMAND_DONE "recovery.done"
6868#define PROMOTE_SIGNAL_FILE "promote"
69+ #define FAST_PROMOTE_SIGNAL_FILE "fast_promote"
6970
7071
7172/* User-settable parameters */
@@ -210,6 +211,9 @@ bool StandbyMode = false;
210211static char * PrimaryConnInfo = NULL ;
211212static char * TriggerFile = NULL ;
212213
214+ /* whether request for fast promotion has been made yet */
215+ static bool fast_promote = false;
216+
213217/* if recoveryStopsHere returns true, it saves actual stop xid/time/name here */
214218static TransactionId recoveryStopXid ;
215219static TimestampTz recoveryStopTime ;
@@ -611,6 +615,7 @@ static void CheckRequiredParameterValues(void);
611615static void XLogReportParameters (void );
612616static void checkTimeLineSwitch (XLogRecPtr lsn , TimeLineID newTLI );
613617static void LocalSetXLogInsertAllowed (void );
618+ static void CreateEndOfRecoveryRecord (void );
614619static void CheckPointGuts (XLogRecPtr checkPointRedo , int flags );
615620static void KeepLogSeg (XLogRecPtr recptr , XLogSegNo * logSegNo );
616621
@@ -642,7 +647,7 @@ static XLogRecord *ReadRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
642647 int emode , bool fetching_ckpt );
643648static void CheckRecoveryConsistency (void );
644649static XLogRecord * ReadCheckpointRecord (XLogReaderState * xlogreader ,
645- XLogRecPtr RecPtr , int whichChkpt );
650+ XLogRecPtr RecPtr , int whichChkpti , bool report );
646651static bool rescanLatestTimeLine (void );
647652static void WriteControlFile (void );
648653static void ReadControlFile (void );
@@ -4848,7 +4853,7 @@ StartupXLOG(void)
48484853 * When a backup_label file is present, we want to roll forward from
48494854 * the checkpoint it identifies, rather than using pg_control.
48504855 */
4851- record = ReadCheckpointRecord (xlogreader , checkPointLoc , 0 );
4856+ record = ReadCheckpointRecord (xlogreader , checkPointLoc , 0 , true );
48524857 if (record != NULL )
48534858 {
48544859 memcpy (& checkPoint , XLogRecGetData (record ), sizeof (CheckPoint ));
@@ -4890,7 +4895,7 @@ StartupXLOG(void)
48904895 */
48914896 checkPointLoc = ControlFile -> checkPoint ;
48924897 RedoStartLSN = ControlFile -> checkPointCopy .redo ;
4893- record = ReadCheckpointRecord (xlogreader , checkPointLoc , 1 );
4898+ record = ReadCheckpointRecord (xlogreader , checkPointLoc , 1 , true );
48944899 if (record != NULL )
48954900 {
48964901 ereport (DEBUG1 ,
@@ -4909,7 +4914,7 @@ StartupXLOG(void)
49094914 else
49104915 {
49114916 checkPointLoc = ControlFile -> prevCheckPoint ;
4912- record = ReadCheckpointRecord (xlogreader , checkPointLoc , 2 );
4917+ record = ReadCheckpointRecord (xlogreader , checkPointLoc , 2 , true );
49134918 if (record != NULL )
49144919 {
49154920 ereport (LOG ,
@@ -5393,22 +5398,33 @@ StartupXLOG(void)
53935398 }
53945399
53955400 /*
5396- * Before replaying this record, check if it is a shutdown
5397- * checkpoint record that causes the current timeline to
5398- * change. The checkpoint record is already considered to be
5399- * part of the new timeline, so we update ThisTimeLineID
5400- * before replaying it. That's important so that replayEndTLI,
5401- * which is recorded as the minimum recovery point's TLI if
5401+ * Before replaying this record, check if this record
5402+ * causes the current timeline to change. The record is
5403+ * already considered to be part of the new timeline,
5404+ * so we update ThisTimeLineID before replaying it.
5405+ * That's important so that replayEndTLI, which is
5406+ * recorded as the minimum recovery point's TLI if
54025407 * recovery stops after this record, is set correctly.
54035408 */
5404- if (record -> xl_rmid == RM_XLOG_ID &&
5405- (record -> xl_info & ~XLR_INFO_MASK ) == XLOG_CHECKPOINT_SHUTDOWN )
5409+ if (record -> xl_rmid == RM_XLOG_ID )
54065410 {
5407- CheckPoint checkPoint ;
5408- TimeLineID newTLI ;
5411+ TimeLineID newTLI = ThisTimeLineID ;
5412+ uint8 info = record -> xl_info & ~XLR_INFO_MASK ;
5413+
5414+ if (info == XLOG_CHECKPOINT_SHUTDOWN )
5415+ {
5416+ CheckPoint checkPoint ;
5417+
5418+ memcpy (& checkPoint , XLogRecGetData (record ), sizeof (CheckPoint ));
5419+ newTLI = checkPoint .ThisTimeLineID ;
5420+ }
5421+ else if (info == XLOG_END_OF_RECOVERY )
5422+ {
5423+ xl_end_of_recovery xlrec ;
54095424
5410- memcpy (& checkPoint , XLogRecGetData (record ), sizeof (CheckPoint ));
5411- newTLI = checkPoint .ThisTimeLineID ;
5425+ memcpy (& xlrec , XLogRecGetData (record ), sizeof (xl_end_of_recovery ));
5426+ newTLI = xlrec .ThisTimeLineID ;
5427+ }
54125428
54135429 if (newTLI != ThisTimeLineID )
54145430 {
@@ -5729,9 +5745,36 @@ StartupXLOG(void)
57295745 * allows some extra error checking in xlog_redo.
57305746 */
57315747 if (bgwriterLaunched )
5732- RequestCheckpoint (CHECKPOINT_END_OF_RECOVERY |
5733- CHECKPOINT_IMMEDIATE |
5734- CHECKPOINT_WAIT );
5748+ {
5749+ bool checkpoint_wait = true;
5750+
5751+ /*
5752+ * If we've been explicitly promoted with fast option,
5753+ * end of recovery without a checkpoint if possible.
5754+ */
5755+ if (fast_promote )
5756+ {
5757+ checkPointLoc = ControlFile -> prevCheckPoint ;
5758+ record = ReadCheckpointRecord (xlogreader , checkPointLoc , 2 , false);
5759+ if (record != NULL )
5760+ {
5761+ checkpoint_wait = false;
5762+ CreateEndOfRecoveryRecord ();
5763+ }
5764+ }
5765+
5766+ /*
5767+ * In most cases we will wait for a full checkpoint to complete.
5768+ *
5769+ * If not, issue a normal, non-immediate checkpoint but don't wait.
5770+ */
5771+ if (checkpoint_wait )
5772+ RequestCheckpoint (CHECKPOINT_END_OF_RECOVERY |
5773+ CHECKPOINT_IMMEDIATE |
5774+ CHECKPOINT_WAIT );
5775+ else
5776+ RequestCheckpoint (0 ); /* No flags */
5777+ }
57355778 else
57365779 CreateCheckPoint (CHECKPOINT_END_OF_RECOVERY | CHECKPOINT_IMMEDIATE );
57375780
@@ -6060,12 +6103,15 @@ LocalSetXLogInsertAllowed(void)
60606103 */
60616104static XLogRecord *
60626105ReadCheckpointRecord (XLogReaderState * xlogreader , XLogRecPtr RecPtr ,
6063- int whichChkpt )
6106+ int whichChkpt , bool report )
60646107{
60656108 XLogRecord * record ;
60666109
60676110 if (!XRecOffIsValid (RecPtr ))
60686111 {
6112+ if (!report )
6113+ return NULL ;
6114+
60696115 switch (whichChkpt )
60706116 {
60716117 case 1 :
@@ -6088,6 +6134,9 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
60886134
60896135 if (record == NULL )
60906136 {
6137+ if (!report )
6138+ return NULL ;
6139+
60916140 switch (whichChkpt )
60926141 {
60936142 case 1 :
@@ -6882,6 +6931,44 @@ CreateCheckPoint(int flags)
68826931 LWLockRelease (CheckpointLock );
68836932}
68846933
6934+ /*
6935+ * Mark the end of recovery in WAL though without running a full checkpoint.
6936+ * We can expect that a restartpoint is likely to be in progress as we
6937+ * do this, though we are unwilling to wait for it to complete. So be
6938+ * careful to avoid taking the CheckpointLock anywhere here.
6939+ *
6940+ * CreateRestartPoint() allows for the case where recovery may end before
6941+ * the restartpoint completes so there is no concern of concurrent behaviour.
6942+ */
6943+ void
6944+ CreateEndOfRecoveryRecord (void )
6945+ {
6946+ xl_end_of_recovery xlrec ;
6947+ XLogRecData rdata ;
6948+
6949+ /* sanity check */
6950+ if (!RecoveryInProgress ())
6951+ elog (ERROR , "can only be used to end recovery" );
6952+
6953+ xlrec .end_time = time (NULL );
6954+ xlrec .ThisTimeLineID = ThisTimeLineID ;
6955+
6956+ LocalSetXLogInsertAllowed ();
6957+
6958+ START_CRIT_SECTION ();
6959+
6960+ rdata .data = (char * ) & xlrec ;
6961+ rdata .len = sizeof (xl_end_of_recovery );
6962+ rdata .buffer = InvalidBuffer ;
6963+ rdata .next = NULL ;
6964+
6965+ (void ) XLogInsert (RM_XLOG_ID , XLOG_END_OF_RECOVERY , & rdata );
6966+
6967+ END_CRIT_SECTION ();
6968+
6969+ LocalXLogInsertAllowed = -1 ; /* return to "check" state */
6970+ }
6971+
68856972/*
68866973 * Flush all data in shared memory to disk, and fsync
68876974 *
@@ -7613,6 +7700,27 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
76137700
76147701 RecoveryRestartPoint (& checkPoint );
76157702 }
7703+ else if (info == XLOG_END_OF_RECOVERY )
7704+ {
7705+ xl_end_of_recovery xlrec ;
7706+
7707+ memcpy (& xlrec , XLogRecGetData (record ), sizeof (xl_end_of_recovery ));
7708+
7709+ /*
7710+ * For Hot Standby, we could treat this like a Shutdown Checkpoint,
7711+ * but this case is rarer and harder to test, so the benefit doesn't
7712+ * outweigh the potential extra cost of maintenance.
7713+ */
7714+
7715+ /*
7716+ * We should've already switched to the new TLI before replaying this
7717+ * record.
7718+ */
7719+ if (xlrec .ThisTimeLineID != ThisTimeLineID )
7720+ ereport (PANIC ,
7721+ (errmsg ("unexpected timeline ID %u (should be %u) in checkpoint record" ,
7722+ xlrec .ThisTimeLineID , ThisTimeLineID )));
7723+ }
76167724 else if (info == XLOG_NOOP )
76177725 {
76187726 /* nothing to do here */
@@ -9405,8 +9513,39 @@ CheckForStandbyTrigger(void)
94059513
94069514 if (IsPromoteTriggered ())
94079515 {
9408- ereport (LOG ,
9516+ /*
9517+ * In 9.1 and 9.2 the postmaster unlinked the promote file
9518+ * inside the signal handler. We now leave the file in place
9519+ * and let the Startup process do the unlink. This allows
9520+ * Startup to know whether we're doing fast or normal
9521+ * promotion. Fast promotion takes precedence.
9522+ */
9523+ if (stat (FAST_PROMOTE_SIGNAL_FILE , & stat_buf ) == 0 )
9524+ {
9525+ unlink (FAST_PROMOTE_SIGNAL_FILE );
9526+ unlink (PROMOTE_SIGNAL_FILE );
9527+ fast_promote = true;
9528+ }
9529+ else if (stat (PROMOTE_SIGNAL_FILE , & stat_buf ) == 0 )
9530+ {
9531+ unlink (PROMOTE_SIGNAL_FILE );
9532+ fast_promote = false;
9533+ }
9534+
9535+ /*
9536+ * We only look for fast promote via the pg_ctl promote option.
9537+ * It would be possible to extend trigger file support for the
9538+ * fast promotion option but that wouldn't be backwards compatible
9539+ * anyway and we're looking to focus further work on the promote
9540+ * option as the right way to signal end of recovery.
9541+ */
9542+ if (fast_promote )
9543+ ereport (LOG ,
9544+ (errmsg ("received fast promote request" )));
9545+ else
9546+ ereport (LOG ,
94099547 (errmsg ("received promote request" )));
9548+
94109549 ResetPromoteTriggered ();
94119550 triggered = true;
94129551 return true;
@@ -9435,15 +9574,10 @@ CheckPromoteSignal(void)
94359574{
94369575 struct stat stat_buf ;
94379576
9438- if (stat (PROMOTE_SIGNAL_FILE , & stat_buf ) == 0 )
9439- {
9440- /*
9441- * Since we are in a signal handler, it's not safe to elog. We
9442- * silently ignore any error from unlink.
9443- */
9444- unlink (PROMOTE_SIGNAL_FILE );
9577+ if (stat (PROMOTE_SIGNAL_FILE , & stat_buf ) == 0 ||
9578+ stat (FAST_PROMOTE_SIGNAL_FILE , & stat_buf ) == 0 )
94459579 return true;
9446- }
9580+
94479581 return false;
94489582}
94499583
0 commit comments