2828#include <time.h>
2929#include <sys/types.h>
3030#include <sys/stat.h>
31+ #include <sys/wait.h>
3132#include <unistd.h>
3233
3334#ifdef HAVE_SYS_RESOURCE_H
@@ -153,10 +154,10 @@ static int CreateRestrictedProcess(char *cmd, PROCESS_INFORMATION *processInfo,
153154static pgpid_t get_pgpid (bool is_status_request );
154155static char * * readfile (const char * path );
155156static void free_readfile (char * * optlines );
156- static int start_postmaster (void );
157+ static pgpid_t start_postmaster (void );
157158static void read_post_opts (void );
158159
159- static PGPing test_postmaster_connection (bool );
160+ static PGPing test_postmaster_connection (pgpid_t pm_pid , bool do_checkpoint );
160161static bool postmaster_is_alive (pid_t pid );
161162
162163#if defined(HAVE_GETRLIMIT ) && defined(RLIMIT_CORE )
@@ -419,36 +420,73 @@ free_readfile(char **optlines)
419420 * start/test/stop routines
420421 */
421422
422- static int
423+ /*
424+ * Start the postmaster and return its PID.
425+ *
426+ * Currently, on Windows what we return is the PID of the shell process
427+ * that launched the postmaster (and, we trust, is waiting for it to exit).
428+ * So the PID is usable for "is the postmaster still running" checks,
429+ * but cannot be compared directly to postmaster.pid.
430+ *
431+ * On Windows, we also save aside a handle to the shell process in
432+ * "postmasterProcess", which the caller should close when done with it.
433+ */
434+ static pgpid_t
423435start_postmaster (void )
424436{
425437 char cmd [MAXPGPATH ];
426438
427439#ifndef WIN32
440+ pgpid_t pm_pid ;
441+
442+ /* Flush stdio channels just before fork, to avoid double-output problems */
443+ fflush (stdout );
444+ fflush (stderr );
445+
446+ pm_pid = fork ();
447+ if (pm_pid < 0 )
448+ {
449+ /* fork failed */
450+ write_stderr (_ ("%s: could not start server: %s\n" ),
451+ progname , strerror (errno ));
452+ exit (1 );
453+ }
454+ if (pm_pid > 0 )
455+ {
456+ /* fork succeeded, in parent */
457+ return pm_pid ;
458+ }
459+
460+ /* fork succeeded, in child */
428461
429462 /*
430463 * Since there might be quotes to handle here, it is easier simply to pass
431- * everything to a shell to process them.
432- *
433- * XXX it would be better to fork and exec so that we would know the child
434- * postmaster's PID directly; then test_postmaster_connection could use
435- * the PID without having to rely on reading it back from the pidfile.
464+ * everything to a shell to process them. Use exec so that the postmaster
465+ * has the same PID as the current child process.
436466 */
437467 if (log_file != NULL )
438- snprintf (cmd , MAXPGPATH , "\"%s\" %s%s < \"%s\" >> \"%s\" 2>&1 & " ,
468+ snprintf (cmd , MAXPGPATH , "exec \"%s\" %s%s < \"%s\" >> \"%s\" 2>&1" ,
439469 exec_path , pgdata_opt , post_opts ,
440470 DEVNULL , log_file );
441471 else
442- snprintf (cmd , MAXPGPATH , "\"%s\" %s%s < \"%s\" 2>&1 & " ,
472+ snprintf (cmd , MAXPGPATH , "exec \"%s\" %s%s < \"%s\" 2>&1" ,
443473 exec_path , pgdata_opt , post_opts , DEVNULL );
444474
445- return system (cmd );
475+ (void ) execl ("/bin/sh" , "/bin/sh" , "-c" , cmd , (char * ) NULL );
476+
477+ /* exec failed */
478+ write_stderr (_ ("%s: could not start server: %s\n" ),
479+ progname , strerror (errno ));
480+ exit (1 );
481+
482+ return 0 ; /* keep dumb compilers quiet */
483+
446484#else /* WIN32 */
447485
448486 /*
449- * On win32 we don't use system(). So we don't need to use & (which would
450- * be START /B on win32). However, we still call the shell ( CMD.EXE) with
451- * it to handle redirection etc .
487+ * As with the Unix case, it's easiest to use the shell (CMD.EXE) to
488+ * handle redirection etc. Unfortunately CMD.EXE lacks any equivalent of
489+ * "exec", so we don't get to find out the postmaster's PID immediately .
452490 */
453491 PROCESS_INFORMATION pi ;
454492
@@ -460,10 +498,15 @@ start_postmaster(void)
460498 exec_path , pgdata_opt , post_opts , DEVNULL );
461499
462500 if (!CreateRestrictedProcess (cmd , & pi , false))
463- return GetLastError ();
464- CloseHandle (pi .hProcess );
501+ {
502+ write_stderr (_ ("%s: could not start server: error code %lu\n" ),
503+ progname , (unsigned long ) GetLastError ());
504+ exit (1 );
505+ }
506+ /* Don't close command process handle here; caller must do so */
507+ postmasterProcess = pi .hProcess ;
465508 CloseHandle (pi .hThread );
466- return 0 ;
509+ return pi . dwProcessId ; /* Shell's PID, not postmaster's! */
467510#endif /* WIN32 */
468511}
469512
@@ -472,15 +515,21 @@ start_postmaster(void)
472515/*
473516 * Find the pgport and try a connection
474517 *
518+ * On Unix, pm_pid is the PID of the just-launched postmaster. On Windows,
519+ * it may be the PID of an ancestor shell process, so we can't check the
520+ * contents of postmaster.pid quite as carefully.
521+ *
522+ * On Windows, the static variable postmasterProcess is an implicit argument
523+ * to this routine; it contains a handle to the postmaster process or an
524+ * ancestor shell process thereof.
525+ *
475526 * Note that the checkpoint parameter enables a Windows service control
476527 * manager checkpoint, it's got nothing to do with database checkpoints!!
477528 */
478529static PGPing
479- test_postmaster_connection (bool do_checkpoint )
530+ test_postmaster_connection (pgpid_t pm_pid , bool do_checkpoint )
480531{
481532 PGPing ret = PQPING_NO_RESPONSE ;
482- bool found_stale_pidfile = false;
483- pgpid_t pm_pid = 0 ;
484533 char connstr [MAXPGPATH * 2 + 256 ];
485534 int i ;
486535
@@ -535,29 +584,27 @@ test_postmaster_connection(bool do_checkpoint)
535584 optlines [5 ] != NULL )
536585 {
537586 /* File is complete enough for us, parse it */
538- long pmpid ;
587+ pgpid_t pmpid ;
539588 time_t pmstart ;
540589
541590 /*
542- * Make sanity checks. If it's for a standalone backend
543- * (negative PID), or the recorded start time is before
544- * pg_ctl started, then either we are looking at the wrong
545- * data directory, or this is a pre-existing pidfile that
546- * hasn't (yet?) been overwritten by our child postmaster.
547- * Allow 2 seconds slop for possible cross-process clock
548- * skew.
591+ * Make sanity checks. If it's for the wrong PID, or the
592+ * recorded start time is before pg_ctl started, then
593+ * either we are looking at the wrong data directory, or
594+ * this is a pre-existing pidfile that hasn't (yet?) been
595+ * overwritten by our child postmaster. Allow 2 seconds
596+ * slop for possible cross-process clock skew.
549597 */
550598 pmpid = atol (optlines [LOCK_FILE_LINE_PID - 1 ]);
551599 pmstart = atol (optlines [LOCK_FILE_LINE_START_TIME - 1 ]);
552- if (pmpid <= 0 || pmstart < start_time - 2 )
553- {
554- /*
555- * Set flag to report stale pidfile if it doesn't get
556- * overwritten before we give up waiting.
557- */
558- found_stale_pidfile = true;
559- }
560- else
600+ if (pmstart >= start_time - 2 &&
601+ #ifndef WIN32
602+ pmpid == pm_pid
603+ #else
604+ /* Windows can only reject standalone-backend PIDs */
605+ pmpid > 0
606+ #endif
607+ )
561608 {
562609 /*
563610 * OK, seems to be a valid pidfile from our child.
@@ -567,9 +614,6 @@ test_postmaster_connection(bool do_checkpoint)
567614 char * hostaddr ;
568615 char host_str [MAXPGPATH ];
569616
570- found_stale_pidfile = false;
571- pm_pid = (pgpid_t ) pmpid ;
572-
573617 /*
574618 * Extract port number and host string to use. Prefer
575619 * using Unix socket if available.
@@ -635,42 +679,23 @@ test_postmaster_connection(bool do_checkpoint)
635679 }
636680
637681 /*
638- * The postmaster should create postmaster.pid very soon after being
639- * started. If it's not there after we've waited 5 or more seconds,
640- * assume startup failed and give up waiting. (Note this covers both
641- * cases where the pidfile was never created, and where it was created
642- * and then removed during postmaster exit.) Also, if there *is* a
643- * file there but it appears stale, issue a suitable warning and give
644- * up waiting.
682+ * Check whether the child postmaster process is still alive. This
683+ * lets us exit early if the postmaster fails during startup.
684+ *
685+ * On Windows, we may be checking the postmaster's parent shell, but
686+ * that's fine for this purpose.
645687 */
646- if ( i >= 5 )
688+ #ifndef WIN32
647689 {
648- struct stat statbuf ;
690+ int exitstatus ;
649691
650- if (stat (pid_file , & statbuf ) != 0 )
651- {
652- if (errno != ENOENT )
653- write_stderr (_ ("\n%s: could not stat file \"%s\": %s\n" ),
654- progname , pid_file , strerror (errno ));
655- return PQPING_NO_RESPONSE ;
656- }
657-
658- if (found_stale_pidfile )
659- {
660- write_stderr (_ ("\n%s: this data directory appears to be running a pre-existing postmaster\n" ),
661- progname );
692+ if (waitpid ((pid_t ) pm_pid , & exitstatus , WNOHANG ) == (pid_t ) pm_pid )
662693 return PQPING_NO_RESPONSE ;
663- }
664694 }
665-
666- /*
667- * If we've been able to identify the child postmaster's PID, check
668- * the process is still alive. This covers cases where the postmaster
669- * successfully created the pidfile but then crashed without removing
670- * it.
671- */
672- if (pm_pid > 0 && !postmaster_is_alive ((pid_t ) pm_pid ))
695+ #else
696+ if (WaitForSingleObject (postmasterProcess , 0 ) == WAIT_OBJECT_0 )
673697 return PQPING_NO_RESPONSE ;
698+ #endif
674699
675700 /* No response, or startup still in process; wait */
676701#if defined(WIN32 )
@@ -836,7 +861,7 @@ static void
836861do_start (void )
837862{
838863 pgpid_t old_pid = 0 ;
839- int exitcode ;
864+ pgpid_t pm_pid ;
840865
841866 if (ctl_command != RESTART_COMMAND )
842867 {
@@ -876,19 +901,13 @@ do_start(void)
876901 }
877902#endif
878903
879- exitcode = start_postmaster ();
880- if (exitcode != 0 )
881- {
882- write_stderr (_ ("%s: could not start server: exit code was %d\n" ),
883- progname , exitcode );
884- exit (1 );
885- }
904+ pm_pid = start_postmaster ();
886905
887906 if (do_wait )
888907 {
889908 print_msg (_ ("waiting for server to start..." ));
890909
891- switch (test_postmaster_connection (false))
910+ switch (test_postmaster_connection (pm_pid , false))
892911 {
893912 case PQPING_OK :
894913 print_msg (_ (" done\n" ));
@@ -914,6 +933,12 @@ do_start(void)
914933 }
915934 else
916935 print_msg (_ ("server starting\n" ));
936+
937+ #ifdef WIN32
938+ /* Now we don't need the handle to the shell process anymore */
939+ CloseHandle (postmasterProcess );
940+ postmasterProcess = INVALID_HANDLE_VALUE ;
941+ #endif
917942}
918943
919944
@@ -1585,7 +1610,7 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv)
15851610 if (do_wait )
15861611 {
15871612 write_eventlog (EVENTLOG_INFORMATION_TYPE , _ ("Waiting for server startup...\n" ));
1588- if (test_postmaster_connection (true) != PQPING_OK )
1613+ if (test_postmaster_connection (postmasterPID , true) != PQPING_OK )
15891614 {
15901615 write_eventlog (EVENTLOG_ERROR_TYPE , _ ("Timed out waiting for server startup\n" ));
15911616 pgwin32_SetServiceStatus (SERVICE_STOPPED );
@@ -1606,10 +1631,9 @@ pgwin32_ServiceMain(DWORD argc, LPTSTR *argv)
16061631 {
16071632 /*
16081633 * status.dwCheckPoint can be incremented by
1609- * test_postmaster_connection(true), so it might not start
1610- * from 0.
1634+ * test_postmaster_connection(), so it might not start from 0.
16111635 */
1612- int maxShutdownCheckPoint = status .dwCheckPoint + 12 ;;
1636+ int maxShutdownCheckPoint = status .dwCheckPoint + 12 ;
16131637
16141638 kill (postmasterPID , SIGINT );
16151639
0 commit comments