@@ -249,6 +249,17 @@ static pid_t StartupPID = 0,
249249 PgStatPID = 0 ,
250250 SysLoggerPID = 0 ;
251251
252+ /* Startup process's status */
253+ typedef enum
254+ {
255+ STARTUP_NOT_RUNNING ,
256+ STARTUP_RUNNING ,
257+ STARTUP_SIGNALED , /* we sent it a SIGQUIT or SIGKILL */
258+ STARTUP_CRASHED
259+ } StartupStatusEnum ;
260+
261+ static StartupStatusEnum StartupStatus = STARTUP_NOT_RUNNING ;
262+
252263/* Startup/shutdown state */
253264#define NoShutdown 0
254265#define SmartShutdown 1
@@ -258,7 +269,6 @@ static pid_t StartupPID = 0,
258269static int Shutdown = NoShutdown ;
259270
260271static bool FatalError = false; /* T if recovering from backend crash */
261- static bool RecoveryError = false; /* T if WAL recovery failed */
262272
263273/*
264274 * We use a simple state machine to control startup, shutdown, and
@@ -301,8 +311,6 @@ static bool RecoveryError = false; /* T if WAL recovery failed */
301311 * states, nor in PM_SHUTDOWN states (because we don't enter those states
302312 * when trying to recover from a crash). It can be true in PM_STARTUP state,
303313 * because we don't clear it until we've successfully started WAL redo.
304- * Similarly, RecoveryError means that we have crashed during recovery, and
305- * should not try to restart.
306314 */
307315typedef enum
308316{
@@ -1246,6 +1254,7 @@ PostmasterMain(int argc, char *argv[])
12461254 */
12471255 StartupPID = StartupDataBase ();
12481256 Assert (StartupPID != 0 );
1257+ StartupStatus = STARTUP_RUNNING ;
12491258 pmState = PM_STARTUP ;
12501259
12511260 /* Some workers may be scheduled to start now */
@@ -1666,7 +1675,7 @@ ServerLoop(void)
16661675
16671676 /* If we have lost the archiver, try to start a new one. */
16681677 if (PgArchPID == 0 && PgArchStartupAllowed ())
1669- PgArchPID = pgarch_start ();
1678+ PgArchPID = pgarch_start ();
16701679
16711680 /* If we need to signal the autovacuum launcher, do so now */
16721681 if (avlauncher_needs_signal )
@@ -2591,6 +2600,7 @@ reaper(SIGNAL_ARGS)
25912600 if (Shutdown > NoShutdown &&
25922601 (EXIT_STATUS_0 (exitstatus ) || EXIT_STATUS_1 (exitstatus )))
25932602 {
2603+ StartupStatus = STARTUP_NOT_RUNNING ;
25942604 pmState = PM_WAIT_BACKENDS ;
25952605 /* PostmasterStateMachine logic does the rest */
25962606 continue ;
@@ -2600,6 +2610,7 @@ reaper(SIGNAL_ARGS)
26002610 {
26012611 ereport (LOG ,
26022612 (errmsg ("shutdown at recovery target" )));
2613+ StartupStatus = STARTUP_NOT_RUNNING ;
26032614 Shutdown = SmartShutdown ;
26042615 TerminateChildren (SIGTERM );
26052616 pmState = PM_WAIT_BACKENDS ;
@@ -2624,16 +2635,18 @@ reaper(SIGNAL_ARGS)
26242635 /*
26252636 * After PM_STARTUP, any unexpected exit (including FATAL exit) of
26262637 * the startup process is catastrophic, so kill other children,
2627- * and set RecoveryError so we don't try to reinitialize after
2628- * they're gone. Exception: if FatalError is already set, that
2629- * implies we previously sent the startup process a SIGQUIT, so
2638+ * and set StartupStatus so we don't try to reinitialize after
2639+ * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
2640+ * then we previously sent the startup process a SIGQUIT; so
26302641 * that's probably the reason it died, and we do want to try to
26312642 * restart in that case.
26322643 */
26332644 if (!EXIT_STATUS_0 (exitstatus ))
26342645 {
2635- if (!FatalError )
2636- RecoveryError = true;
2646+ if (StartupStatus == STARTUP_SIGNALED )
2647+ StartupStatus = STARTUP_NOT_RUNNING ;
2648+ else
2649+ StartupStatus = STARTUP_CRASHED ;
26372650 HandleChildCrash (pid , exitstatus ,
26382651 _ ("startup process" ));
26392652 continue ;
@@ -2642,6 +2655,7 @@ reaper(SIGNAL_ARGS)
26422655 /*
26432656 * Startup succeeded, commence normal operations
26442657 */
2658+ StartupStatus = STARTUP_NOT_RUNNING ;
26452659 FatalError = false;
26462660 Assert (AbortStartTime == 0 );
26472661 ReachedNormalRunning = true;
@@ -2962,7 +2976,7 @@ CleanupBackgroundWorker(int pid,
29622976 ReportBackgroundWorkerPID (rw ); /* report child death */
29632977
29642978 LogChildExit (EXIT_STATUS_0 (exitstatus ) ? DEBUG1 : LOG ,
2965- namebuf , pid , exitstatus );
2979+ namebuf , pid , exitstatus );
29662980
29672981 return true;
29682982 }
@@ -3190,14 +3204,18 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
31903204
31913205 /* Take care of the startup process too */
31923206 if (pid == StartupPID )
3207+ {
31933208 StartupPID = 0 ;
3209+ StartupStatus = STARTUP_CRASHED ;
3210+ }
31943211 else if (StartupPID != 0 && take_action )
31953212 {
31963213 ereport (DEBUG2 ,
31973214 (errmsg_internal ("sending %s to process %d" ,
31983215 (SendStop ? "SIGSTOP" : "SIGQUIT" ),
31993216 (int ) StartupPID )));
32003217 signal_child (StartupPID , (SendStop ? SIGSTOP : SIGQUIT ));
3218+ StartupStatus = STARTUP_SIGNALED ;
32013219 }
32023220
32033221 /* Take care of the bgwriter too */
@@ -3589,13 +3607,14 @@ PostmasterStateMachine(void)
35893607 }
35903608
35913609 /*
3592- * If recovery failed, or the user does not want an automatic restart
3593- * after backend crashes, wait for all non-syslogger children to exit, and
3594- * then exit postmaster. We don't try to reinitialize when recovery fails,
3595- * because more than likely it will just fail again and we will keep
3596- * trying forever.
3610+ * If the startup process failed, or the user does not want an automatic
3611+ * restart after backend crashes, wait for all non-syslogger children to
3612+ * exit, and then exit postmaster. We don't try to reinitialize when the
3613+ * startup process fails, because more than likely it will just fail again
3614+ * and we will keep trying forever.
35973615 */
3598- if (pmState == PM_NO_CHILDREN && (RecoveryError || !restart_after_crash ))
3616+ if (pmState == PM_NO_CHILDREN &&
3617+ (StartupStatus == STARTUP_CRASHED || !restart_after_crash ))
35993618 ExitPostmaster (1 );
36003619
36013620 /*
@@ -3615,6 +3634,7 @@ PostmasterStateMachine(void)
36153634
36163635 StartupPID = StartupDataBase ();
36173636 Assert (StartupPID != 0 );
3637+ StartupStatus = STARTUP_RUNNING ;
36183638 pmState = PM_STARTUP ;
36193639 /* crash recovery started, reset SIGKILL flag */
36203640 AbortStartTime = 0 ;
@@ -3746,7 +3766,11 @@ TerminateChildren(int signal)
37463766{
37473767 SignalChildren (signal );
37483768 if (StartupPID != 0 )
3769+ {
37493770 signal_child (StartupPID , signal );
3771+ if (signal == SIGQUIT || signal == SIGKILL )
3772+ StartupStatus = STARTUP_SIGNALED ;
3773+ }
37503774 if (BgWriterPID != 0 )
37513775 signal_child (BgWriterPID , signal );
37523776 if (CheckpointerPID != 0 )
0 commit comments