77 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
88 * Portions Copyright (c) 1994, Regents of the University of California
99 *
10- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.317 2008/08/11 11:05:10 heikki Exp $
10+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.318 2008/09/08 16:42:15 tgl Exp $
1111 *
1212 *-------------------------------------------------------------------------
1313 */
@@ -391,7 +391,8 @@ static bool InRedo = false;
391391
392392static void XLogArchiveNotify (const char * xlog );
393393static void XLogArchiveNotifySeg (uint32 log , uint32 seg );
394- static bool XLogArchiveCheckDone (const char * xlog , bool create_if_missing );
394+ static bool XLogArchiveCheckDone (const char * xlog );
395+ static bool XLogArchiveIsBusy (const char * xlog );
395396static void XLogArchiveCleanup (const char * xlog );
396397static void readRecoveryCommandFile (void );
397398static void exitArchiveRecovery (TimeLineID endTLI ,
@@ -1137,7 +1138,7 @@ XLogArchiveNotifySeg(uint32 log, uint32 seg)
11371138 * create <XLOG>.ready fails, we'll retry during subsequent checkpoints.
11381139 */
11391140static bool
1140- XLogArchiveCheckDone (const char * xlog , bool create_if_missing )
1141+ XLogArchiveCheckDone (const char * xlog )
11411142{
11421143 char archiveStatusPath [MAXPGPATH ];
11431144 struct stat stat_buf ;
@@ -1162,12 +1163,54 @@ XLogArchiveCheckDone(const char *xlog, bool create_if_missing)
11621163 return true;
11631164
11641165 /* Retry creation of the .ready file */
1165- if (create_if_missing )
1166- XLogArchiveNotify (xlog );
1167-
1166+ XLogArchiveNotify (xlog );
11681167 return false;
11691168}
11701169
1170+ /*
1171+ * XLogArchiveIsBusy
1172+ *
1173+ * Check to see if an XLOG segment file is still unarchived.
1174+ * This is almost but not quite the inverse of XLogArchiveCheckDone: in
1175+ * the first place we aren't chartered to recreate the .ready file, and
1176+ * in the second place we should consider that if the file is already gone
1177+ * then it's not busy. (This check is needed to handle the race condition
1178+ * that a checkpoint already deleted the no-longer-needed file.)
1179+ */
1180+ static bool
1181+ XLogArchiveIsBusy (const char * xlog )
1182+ {
1183+ char archiveStatusPath [MAXPGPATH ];
1184+ struct stat stat_buf ;
1185+
1186+ /* First check for .done --- this means archiver is done with it */
1187+ StatusFilePath (archiveStatusPath , xlog , ".done" );
1188+ if (stat (archiveStatusPath , & stat_buf ) == 0 )
1189+ return false;
1190+
1191+ /* check for .ready --- this means archiver is still busy with it */
1192+ StatusFilePath (archiveStatusPath , xlog , ".ready" );
1193+ if (stat (archiveStatusPath , & stat_buf ) == 0 )
1194+ return true;
1195+
1196+ /* Race condition --- maybe archiver just finished, so recheck */
1197+ StatusFilePath (archiveStatusPath , xlog , ".done" );
1198+ if (stat (archiveStatusPath , & stat_buf ) == 0 )
1199+ return false;
1200+
1201+ /*
1202+ * Check to see if the WAL file has been removed by checkpoint,
1203+ * which implies it has already been archived, and explains why we
1204+ * can't see a status file for it.
1205+ */
1206+ snprintf (archiveStatusPath , MAXPGPATH , XLOGDIR "/%s" , xlog );
1207+ if (stat (archiveStatusPath , & stat_buf ) != 0 &&
1208+ errno == ENOENT )
1209+ return false;
1210+
1211+ return true;
1212+ }
1213+
11711214/*
11721215 * XLogArchiveCleanup
11731216 *
@@ -2499,14 +2542,14 @@ RestoreArchivedFile(char *path, const char *xlogfname,
24992542 *
25002543 * We initialise this with the filename of an InvalidXLogRecPtr, which
25012544 * will prevent the deletion of any WAL files from the archive
2502- * because of the alphabetic sorting property of WAL filenames.
2545+ * because of the alphabetic sorting property of WAL filenames.
25032546 *
25042547 * Once we have successfully located the redo pointer of the checkpoint
25052548 * from which we start recovery we never request a file prior to the redo
25062549 * pointer of the last restartpoint. When redo begins we know that we
25072550 * have successfully located it, so there is no need for additional
25082551 * status flags to signify the point when we can begin deleting WAL files
2509- * from the archive.
2552+ * from the archive.
25102553 */
25112554 if (InRedo )
25122555 {
@@ -2740,7 +2783,7 @@ RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr)
27402783 strspn (xlde -> d_name , "0123456789ABCDEF" ) == 24 &&
27412784 strcmp (xlde -> d_name + 8 , lastoff + 8 ) <= 0 )
27422785 {
2743- if (XLogArchiveCheckDone (xlde -> d_name , true ))
2786+ if (XLogArchiveCheckDone (xlde -> d_name ))
27442787 {
27452788 snprintf (path , MAXPGPATH , XLOGDIR "/%s" , xlde -> d_name );
27462789
@@ -2807,7 +2850,7 @@ CleanupBackupHistory(void)
28072850 strcmp (xlde -> d_name + strlen (xlde -> d_name ) - strlen (".backup" ),
28082851 ".backup" ) == 0 )
28092852 {
2810- if (XLogArchiveCheckDone (xlde -> d_name , true ))
2853+ if (XLogArchiveCheckDone (xlde -> d_name ))
28112854 {
28122855 ereport (DEBUG2 ,
28132856 (errmsg ("removing transaction log backup history file \"%s\"" ,
@@ -6623,6 +6666,12 @@ pg_stop_backup(PG_FUNCTION_ARGS)
66236666 (errcode (ERRCODE_INSUFFICIENT_PRIVILEGE ),
66246667 (errmsg ("must be superuser to run a backup" ))));
66256668
6669+ if (!XLogArchivingActive ())
6670+ ereport (ERROR ,
6671+ (errcode (ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE ),
6672+ errmsg ("WAL archiving is not active" ),
6673+ errhint ("archive_mode must be enabled at server start." )));
6674+
66266675 /*
66276676 * OK to clear forcePageWrites
66286677 */
@@ -6721,25 +6770,23 @@ pg_stop_backup(PG_FUNCTION_ARGS)
67216770 CleanupBackupHistory ();
67226771
67236772 /*
6724- * Wait until the history file has been archived. We assume that the
6725- * alphabetic sorting property of the WAL files ensures the last WAL
6726- * file is guaranteed archived by the time the history file is archived.
6773+ * Wait until both the last WAL file filled during backup and the history
6774+ * file have been archived. We assume that the alphabetic sorting
6775+ * property of the WAL files ensures any earlier WAL files are safely
6776+ * archived as well.
67276777 *
67286778 * We wait forever, since archive_command is supposed to work and
6729- * we assume the admin wanted his backup to work completely. If you
6730- * don't wish to wait, you can SET statement_timeout = xx;
6731- *
6732- * If the status file is missing, we assume that is because it was
6733- * set to .ready before we slept, then while asleep it has been set
6734- * to .done and then removed by a concurrent checkpoint.
6779+ * we assume the admin wanted his backup to work completely. If you
6780+ * don't wish to wait, you can set statement_timeout.
67356781 */
67366782 BackupHistoryFileName (histfilepath , ThisTimeLineID , _logId , _logSeg ,
67376783 startpoint .xrecoff % XLogSegSize );
67386784
67396785 seconds_before_warning = 60 ;
67406786 waits = 0 ;
67416787
6742- while (!XLogArchiveCheckDone (histfilepath , false))
6788+ while (XLogArchiveIsBusy (stopxlogfilename ) ||
6789+ XLogArchiveIsBusy (histfilepath ))
67436790 {
67446791 CHECK_FOR_INTERRUPTS ();
67456792
@@ -6748,8 +6795,9 @@ pg_stop_backup(PG_FUNCTION_ARGS)
67486795 if (++ waits >= seconds_before_warning )
67496796 {
67506797 seconds_before_warning *= 2 ; /* This wraps in >10 years... */
6751- elog (WARNING , "pg_stop_backup() waiting for archive to complete "
6752- "(%d seconds delay)" , waits );
6798+ ereport (WARNING ,
6799+ (errmsg ("pg_stop_backup still waiting for archive to complete (%d seconds elapsed)" ,
6800+ waits )));
67536801 }
67546802 }
67556803
0 commit comments