2828#include <time.h>
2929#include <sys/types.h>
3030#include <sys/stat.h>
31+ #include <sys/wait.h>
3132#include <unistd.h>
3233
3334#ifdef HAVE_SYS_RESOURCE_H
@@ -156,10 +157,10 @@ static int CreateRestrictedProcess(char *cmd, PROCESS_INFORMATION *processInfo,
156157static pgpid_t get_pgpid (bool is_status_request );
157158static char * * readfile (const char * path );
158159static void free_readfile (char * * optlines );
159- static int start_postmaster (void );
160+ static pgpid_t start_postmaster (void );
160161static void read_post_opts (void );
161162
162- static PGPing test_postmaster_connection (bool );
163+ static PGPing test_postmaster_connection (pgpid_t pm_pid , bool do_checkpoint );
163164static bool postmaster_is_alive (pid_t pid );
164165
165166#if defined(HAVE_GETRLIMIT ) && defined(RLIMIT_CORE )
@@ -420,36 +421,73 @@ free_readfile(char **optlines)
420421 * start/test/stop routines
421422 */
422423
423- static int
424+ /*
425+ * Start the postmaster and return its PID.
426+ *
427+ * Currently, on Windows what we return is the PID of the shell process
428+ * that launched the postmaster (and, we trust, is waiting for it to exit).
429+ * So the PID is usable for "is the postmaster still running" checks,
430+ * but cannot be compared directly to postmaster.pid.
431+ *
432+ * On Windows, we also save aside a handle to the shell process in
433+ * "postmasterProcess", which the caller should close when done with it.
434+ */
435+ static pgpid_t
424436start_postmaster (void )
425437{
426438 char cmd [MAXPGPATH ];
427439
428440#ifndef WIN32
441+ pgpid_t pm_pid ;
442+
443+ /* Flush stdio channels just before fork, to avoid double-output problems */
444+ fflush (stdout );
445+ fflush (stderr );
446+
447+ pm_pid = fork ();
448+ if (pm_pid < 0 )
449+ {
450+ /* fork failed */
451+ write_stderr (_ ("%s: could not start server: %s\n" ),
452+ progname , strerror (errno ));
453+ exit (1 );
454+ }
455+ if (pm_pid > 0 )
456+ {
457+ /* fork succeeded, in parent */
458+ return pm_pid ;
459+ }
460+
461+ /* fork succeeded, in child */
429462
430463 /*
431464 * Since there might be quotes to handle here, it is easier simply to pass
432- * everything to a shell to process them.
433- *
434- * XXX it would be better to fork and exec so that we would know the child
435- * postmaster's PID directly; then test_postmaster_connection could use
436- * the PID without having to rely on reading it back from the pidfile.
465+ * everything to a shell to process them. Use exec so that the postmaster
466+ * has the same PID as the current child process.
437467 */
438468 if (log_file != NULL )
439- snprintf (cmd , MAXPGPATH , "\"%s\" %s%s < \"%s\" >> \"%s\" 2>&1 & " ,
469+ snprintf (cmd , MAXPGPATH , "exec \"%s\" %s%s < \"%s\" >> \"%s\" 2>&1" ,
440470 exec_path , pgdata_opt , post_opts ,
441471 DEVNULL , log_file );
442472 else
443- snprintf (cmd , MAXPGPATH , "\"%s\" %s%s < \"%s\" 2>&1 & " ,
473+ snprintf (cmd , MAXPGPATH , "exec \"%s\" %s%s < \"%s\" 2>&1" ,
444474 exec_path , pgdata_opt , post_opts , DEVNULL );
445475
446- return system (cmd );
476+ (void ) execl ("/bin/sh" , "/bin/sh" , "-c" , cmd , (char * ) NULL );
477+
478+ /* exec failed */
479+ write_stderr (_ ("%s: could not start server: %s\n" ),
480+ progname , strerror (errno ));
481+ exit (1 );
482+
483+ return 0 ; /* keep dumb compilers quiet */
484+
447485#else /* WIN32 */
448486
449487 /*
450- * On win32 we don't use system(). So we don't need to use & (which would
451- * be START /B on win32). However, we still call the shell ( CMD.EXE) with
452- * it to handle redirection etc .
488+ * As with the Unix case, it's easiest to use the shell (CMD.EXE) to
489+ * handle redirection etc. Unfortunately CMD.EXE lacks any equivalent of
490+ * "exec", so we don't get to find out the postmaster's PID immediately .
453491 */
454492 PROCESS_INFORMATION pi ;
455493
@@ -461,10 +499,15 @@ start_postmaster(void)
461499 exec_path , pgdata_opt , post_opts , DEVNULL );
462500
463501 if (!CreateRestrictedProcess (cmd , & pi , false))
464- return GetLastError ();
465- CloseHandle (pi .hProcess );
502+ {
503+ write_stderr (_ ("%s: could not start server: error code %lu\n" ),
504+ progname , (unsigned long ) GetLastError ());
505+ exit (1 );
506+ }
507+ /* Don't close command process handle here; caller must do so */
508+ postmasterProcess = pi .hProcess ;
466509 CloseHandle (pi .hThread );
467- return 0 ;
510+ return pi . dwProcessId ; /* Shell's PID, not postmaster's! */
468511#endif /* WIN32 */
469512}
470513
@@ -473,15 +516,21 @@ start_postmaster(void)
473516/*
474517 * Find the pgport and try a connection
475518 *
519+ * On Unix, pm_pid is the PID of the just-launched postmaster. On Windows,
520+ * it may be the PID of an ancestor shell process, so we can't check the
521+ * contents of postmaster.pid quite as carefully.
522+ *
523+ * On Windows, the static variable postmasterProcess is an implicit argument
524+ * to this routine; it contains a handle to the postmaster process or an
525+ * ancestor shell process thereof.
526+ *
476527 * Note that the checkpoint parameter enables a Windows service control
477528 * manager checkpoint, it's got nothing to do with database checkpoints!!
478529 */
479530static PGPing
480- test_postmaster_connection (bool do_checkpoint )
531+ test_postmaster_connection (pgpid_t pm_pid , bool do_checkpoint )
481532{
482533 PGPing ret = PQPING_NO_RESPONSE ;
483- bool found_stale_pidfile = false;
484- pgpid_t pm_pid = 0 ;
485534 char connstr [MAXPGPATH * 2 + 256 ];
486535 int i ;
487536
@@ -536,29 +585,27 @@ test_postmaster_connection(bool do_checkpoint)
536585 optlines [5 ] != NULL )
537586 {
538587 /* File is complete enough for us, parse it */
539- long pmpid ;
588+ pgpid_t pmpid ;
540589 time_t pmstart ;
541590
542591 /*
543- * Make sanity checks. If it's for a standalone backend
544- * (negative PID), or the recorded start time is before
545- * pg_ctl started, then either we are looking at the wrong
546- * data directory, or this is a pre-existing pidfile that
547- * hasn't (yet?) been overwritten by our child postmaster.
548- * Allow 2 seconds slop for possible cross-process clock
549- * skew.
592+ * Make sanity checks. If it's for the wrong PID, or the
593+ * recorded start time is before pg_ctl started, then
594+ * either we are looking at the wrong data directory, or
595+ * this is a pre-existing pidfile that hasn't (yet?) been
596+ * overwritten by our child postmaster. Allow 2 seconds
597+ * slop for possible cross-process clock skew.
550598 */
551599 pmpid = atol (optlines [LOCK_FILE_LINE_PID - 1 ]);
552600 pmstart = atol (optlines [LOCK_FILE_LINE_START_TIME - 1 ]);
553- if (pmpid <= 0 || pmstart < start_time - 2 )
554- {
555- /*
556- * Set flag to report stale pidfile if it doesn't get
557- * overwritten before we give up waiting.
558- */
559- found_stale_pidfile = true;
560- }
561- else
601+ if (pmstart >= start_time - 2 &&
602+ #ifndef WIN32
603+ pmpid == pm_pid
604+ #else
605+ /* Windows can only reject standalone-backend PIDs */
606+ pmpid > 0
607+ #endif
608+ )
562609 {
563610 /*
564611 * OK, seems to be a valid pidfile from our child.
@@ -568,9 +615,6 @@ test_postmaster_connection(bool do_checkpoint)
568615 char * hostaddr ;
569616 char host_str [MAXPGPATH ];
570617
571- found_stale_pidfile = false;
572- pm_pid = (pgpid_t ) pmpid ;
573-
574618 /*
575619 * Extract port number and host string to use. Prefer
576620 * using Unix socket if available.
@@ -636,37 +680,23 @@ test_postmaster_connection(bool do_checkpoint)
636680 }
637681
638682 /*
639- * The postmaster should create postmaster.pid very soon after being
640- * started. If it's not there after we've waited 5 or more seconds,
641- * assume startup failed and give up waiting. (Note this covers both
642- * cases where the pidfile was never created, and where it was created
643- * and then removed during postmaster exit.) Also, if there *is* a
644- * file there but it appears stale, issue a suitable warning and give
645- * up waiting.
683+ * Check whether the child postmaster process is still alive. This
684+ * lets us exit early if the postmaster fails during startup.
685+ *
686+ * On Windows, we may be checking the postmaster's parent shell, but
687+ * that's fine for this purpose.
646688 */
647- if ( i >= 5 )
689+ #ifndef WIN32
648690 {
649- struct stat statbuf ;
650-
651- if (stat (pid_file , & statbuf ) != 0 )
652- return PQPING_NO_RESPONSE ;
691+ int exitstatus ;
653692
654- if (found_stale_pidfile )
655- {
656- write_stderr (_ ("\n%s: this data directory appears to be running a pre-existing postmaster\n" ),
657- progname );
693+ if (waitpid ((pid_t ) pm_pid , & exitstatus , WNOHANG ) == (pid_t ) pm_pid )
658694 return PQPING_NO_RESPONSE ;
659- }
660695 }
661-
662- /*
663- * If we've been able to identify the child postmaster's PID, check
664- * the process is still alive. This covers cases where the postmaster
665- * successfully created the pidfile but then crashed without removing
666- * it.
667- */
668- if (pm_pid > 0 && !postmaster_is_alive ((pid_t ) pm_pid ))
696+ #else
697+ if (WaitForSingleObject (postmasterProcess , 0 ) == WAIT_OBJECT_0 )
669698 return PQPING_NO_RESPONSE ;
699+ #endif
670700
671701 /* No response, or startup still in process; wait */
672702#if defined(WIN32 )
@@ -832,7 +862,7 @@ static void
832862do_start (void )
833863{
834864 pgpid_t old_pid = 0 ;
835- int exitcode ;
865+ pgpid_t pm_pid ;
836866
837867 if (ctl_command != RESTART_COMMAND )
838868 {
@@ -872,19 +902,13 @@ do_start(void)
872902 }
873903#endif
874904
875- exitcode = start_postmaster ();
876- if (exitcode != 0 )
877- {
878- write_stderr (_ ("%s: could not start server: exit code was %d\n" ),
879- progname , exitcode );
880- exit (1 );
881- }
905+ pm_pid = start_postmaster ();
882906
883907 if (do_wait )
884908 {
885909 print_msg (_ ("waiting for server to start..." ));
886910
887- switch (test_postmaster_connection (false))
911+ switch (test_postmaster_connection (pm_pid , false))
888912 {
889913 case PQPING_OK :
890914 print_msg (_ (" done\n" ));
@@ -910,6 +934,12 @@ do_start(void)
910934 }
911935 else
912936 print_msg (_ ("server starting\n" ));
937+
938+ #ifdef WIN32
939+ /* Now we don't need the handle to the shell process anymore */
940+ CloseHandle (postmasterProcess );
941+ postmasterProcess = INVALID_HANDLE_VALUE ;
942+ #endif
913943}
914944
915945
@@ -1572,7 +1602,7 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv)
15721602 if (do_wait )
15731603 {
15741604 write_eventlog (EVENTLOG_INFORMATION_TYPE , _ ("Waiting for server startup...\n" ));
1575- if (test_postmaster_connection (true) != PQPING_OK )
1605+ if (test_postmaster_connection (postmasterPID , true) != PQPING_OK )
15761606 {
15771607 write_eventlog (EVENTLOG_ERROR_TYPE , _ ("Timed out waiting for server startup\n" ));
15781608 pgwin32_SetServiceStatus (SERVICE_STOPPED );
@@ -1593,10 +1623,9 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv)
15931623 {
15941624 /*
15951625 * status.dwCheckPoint can be incremented by
1596- * test_postmaster_connection(true), so it might not
1597- * start from 0.
1626+ * test_postmaster_connection(), so it might not start from 0.
15981627 */
1599- int maxShutdownCheckPoint = status .dwCheckPoint + 12 ; ;
1628+ int maxShutdownCheckPoint = status .dwCheckPoint + 12 ;
16001629
16011630 kill (postmasterPID , SIGINT );
16021631
0 commit comments