@@ -239,13 +239,13 @@ static bool RecoveryError = false; /* T if WAL recovery failed */
239239 * hot standby during archive recovery.
240240 *
241241 * When the startup process is ready to start archive recovery, it signals the
242- * postmaster, and we switch to PM_RECOVERY state. The background writer is
243- * launched, while the startup process continues applying WAL. If Hot Standby
244- * is enabled, then, after reaching a consistent point in WAL redo, startup
245- * process signals us again, and we switch to PM_HOT_STANDBY state and
246- * begin accepting connections to perform read-only queries. When archive
247- * recovery is finished, the startup process exits with exit code 0 and we
248- * switch to PM_RUN state.
242+ * postmaster, and we switch to PM_RECOVERY state. The background writer and
243+ * checkpointer are launched, while the startup process continues applying WAL.
244+ * If Hot Standby is enabled, then, after reaching a consistent point in WAL
245+ * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
246+ * state and begin accepting connections to perform read-only queries. When
247+ * archive recovery is finished, the startup process exits with exit code 0
248+ * and we switch to PM_RUN state.
249249 *
250250 * Normal child backends can only be launched when we are in PM_RUN or
251251 * PM_HOT_STANDBY state. (We also allow launch of normal
@@ -1038,7 +1038,7 @@ PostmasterMain(int argc, char *argv[])
10381038 * handling setup of child processes. See tcop/postgres.c,
10391039 * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
10401040 * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
1041- * postmaster/syslogger.c and postmaster/checkpointer.c
1041+ * postmaster/syslogger.c and postmaster/checkpointer.c.
10421042 */
10431043 pqinitmask ();
10441044 PG_SETMASK (& BlockSig );
@@ -1373,10 +1373,10 @@ ServerLoop(void)
13731373 /*
13741374 * If no background writer process is running, and we are not in a
13751375 * state that prevents it, start one. It doesn't matter if this
1376- * fails, we'll just try again later.
1376+ * fails, we'll just try again later. Likewise for the checkpointer.
13771377 */
13781378 if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1379- pmState == PM_HOT_STANDBY )
1379+ pmState == PM_HOT_STANDBY )
13801380 {
13811381 if (BgWriterPID == 0 )
13821382 BgWriterPID = StartBackgroundWriter ();
@@ -1386,7 +1386,8 @@ ServerLoop(void)
13861386
13871387 /*
13881388 * Likewise, if we have lost the walwriter process, try to start a new
1389- * one.
1389+ * one. But this is needed only in normal operation (else we cannot
1390+ * be writing any new WAL).
13901391 */
13911392 if (WalWriterPID == 0 && pmState == PM_RUN )
13921393 WalWriterPID = StartWalWriter ();
@@ -2131,11 +2132,12 @@ pmdie(SIGNAL_ARGS)
21312132 /* and the autovac launcher too */
21322133 if (AutoVacPID != 0 )
21332134 signal_child (AutoVacPID , SIGTERM );
2135+ /* and the bgwriter too */
2136+ if (BgWriterPID != 0 )
2137+ signal_child (BgWriterPID , SIGTERM );
21342138 /* and the walwriter too */
21352139 if (WalWriterPID != 0 )
21362140 signal_child (WalWriterPID , SIGTERM );
2137- if (BgWriterPID != 0 )
2138- signal_child (BgWriterPID , SIGTERM );
21392141
21402142 /*
21412143 * If we're in recovery, we can't kill the startup process
@@ -2174,13 +2176,17 @@ pmdie(SIGNAL_ARGS)
21742176
21752177 if (StartupPID != 0 )
21762178 signal_child (StartupPID , SIGTERM );
2177- if (WalReceiverPID != 0 )
2178- signal_child (WalReceiverPID , SIGTERM );
21792179 if (BgWriterPID != 0 )
21802180 signal_child (BgWriterPID , SIGTERM );
2181+ if (WalReceiverPID != 0 )
2182+ signal_child (WalReceiverPID , SIGTERM );
21812183 if (pmState == PM_RECOVERY )
21822184 {
2183- /* only checkpointer is active in this state */
2185+ /*
2186+ * Only startup, bgwriter, and checkpointer should be active
2187+ * in this state; we just signaled the first two, and we don't
2188+ * want to kill checkpointer yet.
2189+ */
21842190 pmState = PM_WAIT_BACKENDS ;
21852191 }
21862192 else if (pmState == PM_RUN ||
@@ -2362,21 +2368,21 @@ reaper(SIGNAL_ARGS)
23622368 }
23632369
23642370 /*
2365- * Crank up background tasks, if we didn't do that already
2371+ * Crank up the background tasks, if we didn't do that already
23662372 * when we entered consistent recovery state. It doesn't matter
23672373 * if this fails, we'll just try again later.
23682374 */
23692375 if (BgWriterPID == 0 )
23702376 BgWriterPID = StartBackgroundWriter ();
23712377 if (CheckpointerPID == 0 )
23722378 CheckpointerPID = StartCheckpointer ();
2379+ if (WalWriterPID == 0 )
2380+ WalWriterPID = StartWalWriter ();
23732381
23742382 /*
23752383 * Likewise, start other special children as needed. In a restart
23762384 * situation, some of them may be alive already.
23772385 */
2378- if (WalWriterPID == 0 )
2379- WalWriterPID = StartWalWriter ();
23802386 if (!IsBinaryUpgrade && AutoVacuumingActive () && AutoVacPID == 0 )
23812387 AutoVacPID = StartAutoVacLauncher ();
23822388 if (XLogArchivingActive () && PgArchPID == 0 )
@@ -2392,7 +2398,9 @@ reaper(SIGNAL_ARGS)
23922398 }
23932399
23942400 /*
2395- * Was it the bgwriter?
2401+ * Was it the bgwriter? Normal exit can be ignored; we'll start a
2402+ * new one at the next iteration of the postmaster's main loop, if
2403+ * necessary. Any other exit condition is treated as a crash.
23962404 */
23972405 if (pid == BgWriterPID )
23982406 {
@@ -4228,13 +4236,13 @@ sigusr1_handler(SIGNAL_ARGS)
42284236 FatalError = false;
42294237
42304238 /*
4231- * Crank up the background writers. It doesn't matter if this fails,
4239+ * Crank up the background tasks. It doesn't matter if this fails,
42324240 * we'll just try again later.
42334241 */
4234- Assert (CheckpointerPID == 0 );
4235- CheckpointerPID = StartCheckpointer ();
42364242 Assert (BgWriterPID == 0 );
42374243 BgWriterPID = StartBackgroundWriter ();
4244+ Assert (CheckpointerPID == 0 );
4245+ CheckpointerPID = StartCheckpointer ();
42384246
42394247 pmState = PM_RECOVERY ;
42404248 }
0 commit comments