@@ -5224,31 +5224,6 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog)
52245224 * happens in the middle of a segment, copy data from the last WAL segment
52255225 * of the old timeline up to the switch point, to the starting WAL segment
52265226 * on the new timeline.
5227- *
5228- * What to do with the partial segment on the old timeline? If we don't
5229- * archive it, and the server that created the WAL never archives it
5230- * either (e.g. because it was hit by a meteor), it will never make it to
5231- * the archive. That's OK from our point of view, because the new segment
5232- * that we created with the new TLI contains all the WAL from the old
5233- * timeline up to the switch point. But if you later try to do PITR to the
5234- * "missing" WAL on the old timeline, recovery won't find it in the
5235- * archive. It's physically present in the new file with new TLI, but
5236- * recovery won't look there when it's recovering to the older timeline.
5237- * On the other hand, if we archive the partial segment, and the original
5238- * server on that timeline is still running and archives the completed
5239- * version of the same segment later, it will fail. (We used to do that in
5240- * 9.4 and below, and it caused such problems).
5241- *
5242- * As a compromise, we archive the last segment with the .partial suffix.
5243- * Archive recovery will never try to read .partial segments, so they will
5244- * normally go unused. But in the odd PITR case, the administrator can
5245- * copy them manually to the pg_xlog directory (removing the suffix). They
5246- * can be useful in debugging, too.
5247- *
5248- * If a .done file already exists for the old timeline, however, there is
5249- * already a complete copy of the file in the archive, and there is no
5250- * need to archive the partial one. (In particular, if it was restored
5251- * from the archive to begin with, it's expected to have .done file).
52525227 */
52535228 if (endLogSegNo == startLogSegNo )
52545229 {
@@ -5266,31 +5241,6 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog)
52665241 tmpfname = XLogFileCopy (NULL , xlogfname , endOfLog % XLOG_SEG_SIZE );
52675242 if (!InstallXLogFileSegment (& endLogSegNo , tmpfname , false, 0 , false))
52685243 elog (ERROR , "InstallXLogFileSegment should not have failed" );
5269-
5270- /*
5271- * Make a .partial copy for the archive (unless the original file was
5272- * already archived)
5273- */
5274- if (XLogArchivingActive () && XLogArchiveIsBusy (xlogfname ))
5275- {
5276- char partialfname [MAXFNAMELEN ];
5277-
5278- snprintf (partialfname , MAXFNAMELEN , "%s.partial" , xlogfname );
5279-
5280- /* Make sure there's no .done or .ready file for it. */
5281- XLogArchiveCleanup (partialfname );
5282-
5283- /*
5284- * We copy the whole segment, not just upto the switch point.
5285- * The portion after the switch point might be garbage, but it
5286- * might also be valid WAL, if we stopped recovery at user's
5287- * request before reaching the end. Better to preserve the
5288- * file as it is, garbage and all, than lose the evidence if
5289- * something goes wrong.
5290- */
5291- (void ) XLogFileCopy (partialfname , xlogfname , XLOG_SEG_SIZE );
5292- XLogArchiveNotify (partialfname );
5293- }
52945244 }
52955245 else
52965246 {
@@ -5942,6 +5892,7 @@ StartupXLOG(void)
59425892 XLogRecPtr RecPtr ,
59435893 checkPointLoc ,
59445894 EndOfLog ;
5895+ TimeLineID EndOfLogTLI ;
59455896 TimeLineID PrevTimeLineID ;
59465897 XLogRecord * record ;
59475898 TransactionId oldestActiveXID ;
@@ -7032,6 +6983,15 @@ StartupXLOG(void)
70326983 record = ReadRecord (xlogreader , LastRec , PANIC , false);
70336984 EndOfLog = EndRecPtr ;
70346985
6986+ /*
6987+ * EndOfLogTLI is the TLI in the filename of the XLOG segment containing
6988+ * the end-of-log. It could be different from the timeline that EndOfLog
6989+ * nominally belongs to, if there was a timeline switch in that segment,
6990+ * and we were reading the old wAL from a segment belonging to a higher
6991+ * timeline.
6992+ */
6993+ EndOfLogTLI = xlogreader -> readPageTLI ;
6994+
70356995 /*
70366996 * Complain if we did not roll forward far enough to render the backup
70376997 * dump consistent. Note: it is indeed okay to look at the local variable
@@ -7131,7 +7091,7 @@ StartupXLOG(void)
71317091 * we will use that below.)
71327092 */
71337093 if (ArchiveRecoveryRequested )
7134- exitArchiveRecovery (xlogreader -> readPageTLI , EndOfLog );
7094+ exitArchiveRecovery (EndOfLogTLI , EndOfLog );
71357095
71367096 /*
71377097 * Prepare to write WAL starting at EndOfLog position, and init xlog
@@ -7262,12 +7222,82 @@ StartupXLOG(void)
72627222 true);
72637223 }
72647224
7265- /*
7266- * Clean up any (possibly bogus) future WAL segments on the old timeline.
7267- */
72687225 if (ArchiveRecoveryRequested )
7226+ {
7227+ /*
7228+ * We switched to a new timeline. Clean up segments on the old
7229+ * timeline.
7230+ *
7231+ * If there are any higher-numbered segments on the old timeline,
7232+ * remove them. They might contain valid WAL, but they might also be
7233+ * pre-allocated files containing garbage. In any case, they are not
7234+ * part of the new timeline's history so we don't need them.
7235+ */
72697236 RemoveNonParentXlogFiles (EndOfLog , ThisTimeLineID );
72707237
7238+ /*
7239+ * If the switch happened in the middle of a segment, what to do with
7240+ * the last, partial segment on the old timeline? If we don't archive
7241+ * it, and the server that created the WAL never archives it either
7242+ * (e.g. because it was hit by a meteor), it will never make it to the
7243+ * archive. That's OK from our point of view, because the new segment
7244+ * that we created with the new TLI contains all the WAL from the old
7245+ * timeline up to the switch point. But if you later try to do PITR to
7246+ * the "missing" WAL on the old timeline, recovery won't find it in
7247+ * the archive. It's physically present in the new file with new TLI,
7248+ * but recovery won't look there when it's recovering to the older
7249+ * timeline. On the other hand, if we archive the partial segment, and
7250+ * the original server on that timeline is still running and archives
7251+ * the completed version of the same segment later, it will fail. (We
7252+ * used to do that in 9.4 and below, and it caused such problems).
7253+ *
7254+ * As a compromise, we rename the last segment with the .partial
7255+ * suffix, and archive it. Archive recovery will never try to read
7256+ * .partial segments, so they will normally go unused. But in the odd
7257+ * PITR case, the administrator can copy them manually to the pg_xlog
7258+ * directory (removing the suffix). They can be useful in debugging,
7259+ * too.
7260+ *
7261+ * If a .done or .ready file already exists for the old timeline,
7262+ * however, we had already determined that the segment is complete,
7263+ * so we can let it be archived normally. (In particular, if it was
7264+ * restored from the archive to begin with, it's expected to have a
7265+ * .done file).
7266+ */
7267+ if (EndOfLog % XLOG_SEG_SIZE != 0 && XLogArchivingActive ())
7268+ {
7269+ char origfname [MAXFNAMELEN ];
7270+ XLogSegNo endLogSegNo ;
7271+
7272+ XLByteToPrevSeg (EndOfLog , endLogSegNo );
7273+ XLogFileName (origfname , EndOfLogTLI , endLogSegNo );
7274+
7275+ if (!XLogArchiveIsReadyOrDone (origfname ))
7276+ {
7277+ char origpath [MAXPGPATH ];
7278+ char partialfname [MAXFNAMELEN ];
7279+ char partialpath [MAXPGPATH ];
7280+
7281+ XLogFilePath (origpath , EndOfLogTLI , endLogSegNo );
7282+ snprintf (partialfname , MAXPGPATH , "%s.partial" , origfname );
7283+ snprintf (partialpath , MAXPGPATH , "%s.partial" , origpath );
7284+
7285+ /*
7286+ * Make sure there's no .done or .ready file for the .partial
7287+ * file.
7288+ */
7289+ XLogArchiveCleanup (partialfname );
7290+
7291+ if (rename (origpath , partialpath ) != 0 )
7292+ ereport (ERROR ,
7293+ (errcode_for_file_access (),
7294+ errmsg ("could not rename file \"%s\" to \"%s\": %m" ,
7295+ origpath , partialpath )));
7296+ XLogArchiveNotify (partialfname );
7297+ }
7298+ }
7299+ }
7300+
72717301 /*
72727302 * Preallocate additional log files, if wanted.
72737303 */
0 commit comments