1/*-------------------------------------------------------------------------
2 *
3 * postmaster.c
4 * This program acts as a clearing house for requests to the
5 * POSTGRES system. Frontend programs send a startup message
6 * to the Postmaster and the postmaster uses the info in the
7 * message to setup a backend process.
8 *
9 * The postmaster also manages system-wide operations such as
10 * startup and shutdown. The postmaster itself doesn't do those
11 * operations, mind you --- it just forks off a subprocess to do them
12 * at the right times. It also takes care of resetting the system
13 * if a backend crashes.
14 *
15 * The postmaster process creates the shared memory and semaphore
16 * pools during startup, but as a rule does not touch them itself.
17 * In particular, it is not a member of the PGPROC array of backends
18 * and so it cannot participate in lock-manager operations. Keeping
19 * the postmaster away from shared memory operations makes it simpler
20 * and more reliable. The postmaster is almost always able to recover
21 * from crashes of individual backends by resetting shared memory;
22 * if it did much with shared memory then it would be prone to crashing
23 * along with the backends.
24 *
25 * When a request message is received, we now fork() immediately.
26 * The child process performs authentication of the request, and
27 * then becomes a backend if successful. This allows the auth code
28 * to be written in a simple single-threaded style (as opposed to the
29 * crufty "poor man's multitasking" code that used to be needed).
30 * More importantly, it ensures that blockages in non-multithreaded
31 * libraries like SSL or PAM cannot cause denial of service to other
32 * clients.
33 *
34 *
35 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
36 * Portions Copyright (c) 1994, Regents of the University of California
37 *
38 *
39 * IDENTIFICATION
40 * src/backend/postmaster/postmaster.c
41 *
42 * NOTES
43 *
44 * Initialization:
45 * The Postmaster sets up shared memory data structures
46 * for the backends.
47 *
48 * Synchronization:
49 * The Postmaster shares memory with the backends but should avoid
50 * touching shared memory, so as not to become stuck if a crashing
51 * backend screws up locks or shared memory. Likewise, the Postmaster
52 * should never block on messages from frontend clients.
53 *
54 * Garbage Collection:
55 * The Postmaster cleans up after backends if they have an emergency
56 * exit and/or core dump.
57 *
58 * Error Reporting:
59 * Use write_stderr() only for reporting "interactive" errors
60 * (essentially, bogus arguments on the command line). Once the
61 * postmaster is launched, use ereport().
62 *
63 *-------------------------------------------------------------------------
64 */
65
66#include "postgres.h"
67
68#include <unistd.h>
69#include <signal.h>
70#include <time.h>
71#include <sys/wait.h>
72#include <ctype.h>
73#include <sys/stat.h>
74#include <sys/socket.h>
75#include <fcntl.h>
76#include <sys/param.h>
77#include <netdb.h>
78#include <limits.h>
79
80#ifdef HAVE_SYS_SELECT_H
81#include <sys/select.h>
82#endif
83
84#ifdef USE_BONJOUR
85#include <dns_sd.h>
86#endif
87
88#ifdef USE_SYSTEMD
89#include <systemd/sd-daemon.h>
90#endif
91
92#ifdef HAVE_PTHREAD_IS_THREADED_NP
93#include <pthread.h>
94#endif
95
96#include "access/transam.h"
97#include "access/xlog.h"
98#include "bootstrap/bootstrap.h"
99#include "catalog/pg_control.h"
100#include "common/file_perm.h"
101#include "common/ip.h"
102#include "common/string.h"
103#include "lib/ilist.h"
104#include "libpq/auth.h"
105#include "libpq/libpq.h"
106#include "libpq/pqformat.h"
107#include "libpq/pqsignal.h"
108#include "miscadmin.h"
109#include "pg_getopt.h"
110#include "pgstat.h"
111#include "port/pg_bswap.h"
112#include "postmaster/autovacuum.h"
113#include "postmaster/bgworker_internals.h"
114#include "postmaster/fork_process.h"
115#include "postmaster/pgarch.h"
116#include "postmaster/postmaster.h"
117#include "postmaster/syslogger.h"
118#include "replication/logicallauncher.h"
119#include "replication/walsender.h"
120#include "storage/fd.h"
121#include "storage/ipc.h"
122#include "storage/pg_shmem.h"
123#include "storage/pmsignal.h"
124#include "storage/proc.h"
125#include "tcop/tcopprot.h"
126#include "utils/builtins.h"
127#include "utils/datetime.h"
128#include "utils/memutils.h"
129#include "utils/pidfile.h"
130#include "utils/ps_status.h"
131#include "utils/timeout.h"
132#include "utils/timestamp.h"
133#include "utils/varlena.h"
134
135#ifdef EXEC_BACKEND
136#include "storage/spin.h"
137#endif
138
139
140/*
141 * Possible types of a backend. Beyond being the possible bkend_type values in
142 * struct bkend, these are OR-able request flag bits for SignalSomeChildren()
143 * and CountChildren().
144 */
145#define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
146#define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
147#define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
148#define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
149#define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
150
151#define BACKEND_TYPE_WORKER (BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER)
152
153/*
154 * List of active backends (or child processes anyway; we don't actually
155 * know whether a given child has become a backend or is still in the
156 * authorization phase). This is used mainly to keep track of how many
157 * children we have and send them appropriate signals when necessary.
158 *
159 * "Special" children such as the startup, bgwriter and autovacuum launcher
160 * tasks are not in this list. Autovacuum worker and walsender are in it.
161 * Also, "dead_end" children are in it: these are children launched just for
162 * the purpose of sending a friendly rejection message to a would-be client.
163 * We must track them because they are attached to shared memory, but we know
164 * they will never become live backends. dead_end children are not assigned a
165 * PMChildSlot.
166 *
167 * Background workers are in this list, too.
168 */
169typedef struct bkend
170{
171 pid_t pid; /* process id of backend */
172 int32 cancel_key; /* cancel key for cancels for this backend */
173 int child_slot; /* PMChildSlot for this backend, if any */
174
175 /*
176 * Flavor of backend or auxiliary process. Note that BACKEND_TYPE_WALSND
177 * backends initially announce themselves as BACKEND_TYPE_NORMAL, so if
178 * bkend_type is normal, you should check for a recent transition.
179 */
180 int bkend_type;
181 bool dead_end; /* is it going to send an error and quit? */
182 bool bgworker_notify; /* gets bgworker start/stop notifications */
183 dlist_node elem; /* list link in BackendList */
184} Backend;
185
186static dlist_head BackendList = DLIST_STATIC_INIT(BackendList);
187
188#ifdef EXEC_BACKEND
189static Backend *ShmemBackendArray;
190#endif
191
192BackgroundWorker *MyBgworkerEntry = NULL;
193
194
195
196/* The socket number we are listening for connections on */
197int PostPortNumber;
198
199/* The directory names for Unix socket(s) */
200char *Unix_socket_directories;
201
202/* The TCP listen address(es) */
203char *ListenAddresses;
204
205/*
206 * ReservedBackends is the number of backends reserved for superuser use.
207 * This number is taken out of the pool size given by MaxConnections so
208 * number of backend slots available to non-superusers is
209 * (MaxConnections - ReservedBackends). Note what this really means is
210 * "if there are <= ReservedBackends connections available, only superusers
211 * can make new connections" --- pre-existing superuser connections don't
212 * count against the limit.
213 */
214int ReservedBackends;
215
216/* The socket(s) we're listening to. */
217#define MAXLISTEN 64
218static pgsocket ListenSocket[MAXLISTEN];
219
220/*
221 * Set by the -o option
222 */
223static char ExtraOptions[MAXPGPATH];
224
225/*
226 * These globals control the behavior of the postmaster in case some
227 * backend dumps core. Normally, it kills all peers of the dead backend
228 * and reinitializes shared memory. By specifying -s or -n, we can have
229 * the postmaster stop (rather than kill) peers and not reinitialize
230 * shared data structures. (Reinit is currently dead code, though.)
231 */
232static bool Reinit = true;
233static int SendStop = false;
234
235/* still more option variables */
236bool EnableSSL = false;
237
238int PreAuthDelay = 0;
239int AuthenticationTimeout = 60;
240
241bool log_hostname; /* for ps display and logging */
242bool Log_connections = false;
243bool Db_user_namespace = false;
244
245bool enable_bonjour = false;
246char *bonjour_name;
247bool restart_after_crash = true;
248
249/* PIDs of special child processes; 0 when not running */
250static pid_t StartupPID = 0,
251 BgWriterPID = 0,
252 CheckpointerPID = 0,
253 WalWriterPID = 0,
254 WalReceiverPID = 0,
255 AutoVacPID = 0,
256 PgArchPID = 0,
257 PgStatPID = 0,
258 SysLoggerPID = 0;
259
260/* Startup process's status */
261typedef enum
262{
263 STARTUP_NOT_RUNNING,
264 STARTUP_RUNNING,
265 STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */
266 STARTUP_CRASHED
267} StartupStatusEnum;
268
269static StartupStatusEnum StartupStatus = STARTUP_NOT_RUNNING;
270
271/* Startup/shutdown state */
272#define NoShutdown 0
273#define SmartShutdown 1
274#define FastShutdown 2
275#define ImmediateShutdown 3
276
277static int Shutdown = NoShutdown;
278
279static bool FatalError = false; /* T if recovering from backend crash */
280
281/*
282 * We use a simple state machine to control startup, shutdown, and
283 * crash recovery (which is rather like shutdown followed by startup).
284 *
285 * After doing all the postmaster initialization work, we enter PM_STARTUP
286 * state and the startup process is launched. The startup process begins by
287 * reading the control file and other preliminary initialization steps.
288 * In a normal startup, or after crash recovery, the startup process exits
289 * with exit code 0 and we switch to PM_RUN state. However, archive recovery
290 * is handled specially since it takes much longer and we would like to support
291 * hot standby during archive recovery.
292 *
293 * When the startup process is ready to start archive recovery, it signals the
294 * postmaster, and we switch to PM_RECOVERY state. The background writer and
295 * checkpointer are launched, while the startup process continues applying WAL.
296 * If Hot Standby is enabled, then, after reaching a consistent point in WAL
297 * redo, startup process signals us again, and we switch to PM_HOT_STANDBY
298 * state and begin accepting connections to perform read-only queries. When
299 * archive recovery is finished, the startup process exits with exit code 0
300 * and we switch to PM_RUN state.
301 *
302 * Normal child backends can only be launched when we are in PM_RUN or
303 * PM_HOT_STANDBY state. (We also allow launch of normal
304 * child backends in PM_WAIT_BACKUP state, but only for superusers.)
305 * In other states we handle connection requests by launching "dead_end"
306 * child processes, which will simply send the client an error message and
307 * quit. (We track these in the BackendList so that we can know when they
308 * are all gone; this is important because they're still connected to shared
309 * memory, and would interfere with an attempt to destroy the shmem segment,
310 * possibly leading to SHMALL failure when we try to make a new one.)
311 * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
312 * to drain out of the system, and therefore stop accepting connection
313 * requests at all until the last existing child has quit (which hopefully
314 * will not be very long).
315 *
316 * Notice that this state variable does not distinguish *why* we entered
317 * states later than PM_RUN --- Shutdown and FatalError must be consulted
318 * to find that out. FatalError is never true in PM_RECOVERY_* or PM_RUN
319 * states, nor in PM_SHUTDOWN states (because we don't enter those states
320 * when trying to recover from a crash). It can be true in PM_STARTUP state,
321 * because we don't clear it until we've successfully started WAL redo.
322 */
323typedef enum
324{
325 PM_INIT, /* postmaster starting */
326 PM_STARTUP, /* waiting for startup subprocess */
327 PM_RECOVERY, /* in archive recovery mode */
328 PM_HOT_STANDBY, /* in hot standby mode */
329 PM_RUN, /* normal "database is alive" state */
330 PM_WAIT_BACKUP, /* waiting for online backup mode to end */
331 PM_WAIT_READONLY, /* waiting for read only backends to exit */
332 PM_WAIT_BACKENDS, /* waiting for live backends to exit */
333 PM_SHUTDOWN, /* waiting for checkpointer to do shutdown
334 * ckpt */
335 PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
336 * finish */
337 PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
338 PM_NO_CHILDREN /* all important children have exited */
339} PMState;
340
341static PMState pmState = PM_INIT;
342
343/* Start time of SIGKILL timeout during immediate shutdown or child crash */
344/* Zero means timeout is not running */
345static time_t AbortStartTime = 0;
346
347/* Length of said timeout */
348#define SIGKILL_CHILDREN_AFTER_SECS 5
349
350static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
351
352bool ClientAuthInProgress = false; /* T during new-client
353 * authentication */
354
355bool redirection_done = false; /* stderr redirected for syslogger? */
356
357/* received START_AUTOVAC_LAUNCHER signal */
358static volatile sig_atomic_t start_autovac_launcher = false;
359
360/* the launcher needs to be signalled to communicate some condition */
361static volatile bool avlauncher_needs_signal = false;
362
363/* received START_WALRECEIVER signal */
364static volatile sig_atomic_t WalReceiverRequested = false;
365
366/* set when there's a worker that needs to be started up */
367static volatile bool StartWorkerNeeded = true;
368static volatile bool HaveCrashedWorker = false;
369
370#ifdef USE_SSL
371/* Set when and if SSL has been initialized properly */
372static bool LoadedSSL = false;
373#endif
374
375#ifdef USE_BONJOUR
376static DNSServiceRef bonjour_sdref = NULL;
377#endif
378
379/*
380 * postmaster.c - function prototypes
381 */
382static void CloseServerPorts(int status, Datum arg);
383static void unlink_external_pid_file(int status, Datum arg);
384static void getInstallationPaths(const char *argv0);
385static void checkControlFile(void);
386static Port *ConnCreate(int serverFd);
387static void ConnFree(Port *port);
388static void reset_shared(int port);
389static void SIGHUP_handler(SIGNAL_ARGS);
390static void pmdie(SIGNAL_ARGS);
391static void reaper(SIGNAL_ARGS);
392static void sigusr1_handler(SIGNAL_ARGS);
393static void startup_die(SIGNAL_ARGS);
394static void dummy_handler(SIGNAL_ARGS);
395static void StartupPacketTimeoutHandler(void);
396static void CleanupBackend(int pid, int exitstatus);
397static bool CleanupBackgroundWorker(int pid, int exitstatus);
398static void HandleChildCrash(int pid, int exitstatus, const char *procname);
399static void LogChildExit(int lev, const char *procname,
400 int pid, int exitstatus);
401static void PostmasterStateMachine(void);
402static void BackendInitialize(Port *port);
403static void BackendRun(Port *port) pg_attribute_noreturn();
404static void ExitPostmaster(int status) pg_attribute_noreturn();
405static int ServerLoop(void);
406static int BackendStartup(Port *port);
407static int ProcessStartupPacket(Port *port, bool secure_done);
408static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options);
409static void processCancelRequest(Port *port, void *pkt);
410static int initMasks(fd_set *rmask);
411static void report_fork_failure_to_client(Port *port, int errnum);
412static CAC_state canAcceptConnections(void);
413static bool RandomCancelKey(int32 *cancel_key);
414static void signal_child(pid_t pid, int signal);
415static bool SignalSomeChildren(int signal, int targets);
416static void TerminateChildren(int signal);
417
418#define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
419
420static int CountChildren(int target);
421static bool assign_backendlist_entry(RegisteredBgWorker *rw);
422static void maybe_start_bgworkers(void);
423static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
424static pid_t StartChildProcess(AuxProcType type);
425static void StartAutovacuumWorker(void);
426static void MaybeStartWalReceiver(void);
427static void InitPostmasterDeathWatchHandle(void);
428
429/*
430 * Archiver is allowed to start up at the current postmaster state?
431 *
432 * If WAL archiving is enabled always, we are allowed to start archiver
433 * even during recovery.
434 */
435#define PgArchStartupAllowed() \
436 ((XLogArchivingActive() && pmState == PM_RUN) || \
437 (XLogArchivingAlways() && \
438 (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY)))
439
440#ifdef EXEC_BACKEND
441
442#ifdef WIN32
443#define WNOHANG 0 /* ignored, so any integer value will do */
444
445static pid_t waitpid(pid_t pid, int *exitstatus, int options);
446static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
447
448static HANDLE win32ChildQueue;
449
450typedef struct
451{
452 HANDLE waitHandle;
453 HANDLE procHandle;
454 DWORD procId;
455} win32_deadchild_waitinfo;
456#endif /* WIN32 */
457
458static pid_t backend_forkexec(Port *port);
459static pid_t internal_forkexec(int argc, char *argv[], Port *port);
460
461/* Type for a socket that can be inherited to a client process */
462#ifdef WIN32
463typedef struct
464{
465 SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET
466 * if not a socket */
467 WSAPROTOCOL_INFO wsainfo;
468} InheritableSocket;
469#else
470typedef int InheritableSocket;
471#endif
472
473/*
474 * Structure contains all variables passed to exec:ed backends
475 */
476typedef struct
477{
478 Port port;
479 InheritableSocket portsocket;
480 char DataDir[MAXPGPATH];
481 pgsocket ListenSocket[MAXLISTEN];
482 int32 MyCancelKey;
483 int MyPMChildSlot;
484#ifndef WIN32
485 unsigned long UsedShmemSegID;
486#else
487 void *ShmemProtectiveRegion;
488 HANDLE UsedShmemSegID;
489#endif
490 void *UsedShmemSegAddr;
491 slock_t *ShmemLock;
492 VariableCache ShmemVariableCache;
493 Backend *ShmemBackendArray;
494#ifndef HAVE_SPINLOCKS
495 PGSemaphore *SpinlockSemaArray;
496#endif
497 int NamedLWLockTrancheRequests;
498 NamedLWLockTranche *NamedLWLockTrancheArray;
499 LWLockPadded *MainLWLockArray;
500 slock_t *ProcStructLock;
501 PROC_HDR *ProcGlobal;
502 PGPROC *AuxiliaryProcs;
503 PGPROC *PreparedXactProcs;
504 PMSignalData *PMSignalState;
505 InheritableSocket pgStatSock;
506 pid_t PostmasterPid;
507 TimestampTz PgStartTime;
508 TimestampTz PgReloadTime;
509 pg_time_t first_syslogger_file_time;
510 bool redirection_done;
511 bool IsBinaryUpgrade;
512 int max_safe_fds;
513 int MaxBackends;
514#ifdef WIN32
515 HANDLE PostmasterHandle;
516 HANDLE initial_signal_pipe;
517 HANDLE syslogPipe[2];
518#else
519 int postmaster_alive_fds[2];
520 int syslogPipe[2];
521#endif
522 char my_exec_path[MAXPGPATH];
523 char pkglib_path[MAXPGPATH];
524 char ExtraOptions[MAXPGPATH];
525} BackendParameters;
526
527static void read_backend_variables(char *id, Port *port);
528static void restore_backend_variables(BackendParameters *param, Port *port);
529
530#ifndef WIN32
531static bool save_backend_variables(BackendParameters *param, Port *port);
532#else
533static bool save_backend_variables(BackendParameters *param, Port *port,
534 HANDLE childProcess, pid_t childPid);
535#endif
536
537static void ShmemBackendArrayAdd(Backend *bn);
538static void ShmemBackendArrayRemove(Backend *bn);
539#endif /* EXEC_BACKEND */
540
541#define StartupDataBase() StartChildProcess(StartupProcess)
542#define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
543#define StartCheckpointer() StartChildProcess(CheckpointerProcess)
544#define StartWalWriter() StartChildProcess(WalWriterProcess)
545#define StartWalReceiver() StartChildProcess(WalReceiverProcess)
546
547/* Macros to check exit status of a child process */
548#define EXIT_STATUS_0(st) ((st) == 0)
549#define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
550#define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
551
552#ifndef WIN32
553/*
554 * File descriptors for pipe used to monitor if postmaster is alive.
555 * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
556 */
557int postmaster_alive_fds[2] = {-1, -1};
558#else
559/* Process handle of postmaster used for the same purpose on Windows */
560HANDLE PostmasterHandle;
561#endif
562
563/*
564 * Postmaster main entry point
565 */
566void
567PostmasterMain(int argc, char *argv[])
568{
569 int opt;
570 int status;
571 char *userDoption = NULL;
572 bool listen_addr_saved = false;
573 int i;
574 char *output_config_variable = NULL;
575
576 InitProcessGlobals();
577
578 PostmasterPid = MyProcPid;
579
580 IsPostmasterEnvironment = true;
581
582 /*
583 * We should not be creating any files or directories before we check the
584 * data directory (see checkDataDir()), but just in case set the umask to
585 * the most restrictive (owner-only) permissions.
586 *
587 * checkDataDir() will reset the umask based on the data directory
588 * permissions.
589 */
590 umask(PG_MODE_MASK_OWNER);
591
592 /*
593 * By default, palloc() requests in the postmaster will be allocated in
594 * the PostmasterContext, which is space that can be recycled by backends.
595 * Allocated data that needs to be available to backends should be
596 * allocated in TopMemoryContext.
597 */
598 PostmasterContext = AllocSetContextCreate(TopMemoryContext,
599 "Postmaster",
600 ALLOCSET_DEFAULT_SIZES);
601 MemoryContextSwitchTo(PostmasterContext);
602
603 /* Initialize paths to installation files */
604 getInstallationPaths(argv[0]);
605
606 /*
607 * Set up signal handlers for the postmaster process.
608 *
609 * In the postmaster, we want to install non-ignored handlers *without*
610 * SA_RESTART. This is because they'll be blocked at all times except
611 * when ServerLoop is waiting for something to happen, and during that
612 * window, we want signals to exit the select(2) wait so that ServerLoop
613 * can respond if anything interesting happened. On some platforms,
614 * signals marked SA_RESTART would not cause the select() wait to end.
615 * Child processes will generally want SA_RESTART, but we expect them to
616 * set up their own handlers before unblocking signals.
617 *
618 * CAUTION: when changing this list, check for side-effects on the signal
619 * handling setup of child processes. See tcop/postgres.c,
620 * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
621 * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
622 * postmaster/syslogger.c, postmaster/bgworker.c and
623 * postmaster/checkpointer.c.
624 */
625 pqinitmask();
626 PG_SETMASK(&BlockSig);
627
628 pqsignal_no_restart(SIGHUP, SIGHUP_handler); /* reread config file and
629 * have children do same */
630 pqsignal_no_restart(SIGINT, pmdie); /* send SIGTERM and shut down */
631 pqsignal_no_restart(SIGQUIT, pmdie); /* send SIGQUIT and die */
632 pqsignal_no_restart(SIGTERM, pmdie); /* wait for children and shut down */
633 pqsignal(SIGALRM, SIG_IGN); /* ignored */
634 pqsignal(SIGPIPE, SIG_IGN); /* ignored */
635 pqsignal_no_restart(SIGUSR1, sigusr1_handler); /* message from child
636 * process */
637 pqsignal_no_restart(SIGUSR2, dummy_handler); /* unused, reserve for
638 * children */
639 pqsignal_no_restart(SIGCHLD, reaper); /* handle child termination */
640
641 /*
642 * No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We
643 * ignore those signals in a postmaster environment, so that there is no
644 * risk of a child process freezing up due to writing to stderr. But for
645 * a standalone backend, their default handling is reasonable. Hence, all
646 * child processes should just allow the inherited settings to stand.
647 */
648#ifdef SIGTTIN
649 pqsignal(SIGTTIN, SIG_IGN); /* ignored */
650#endif
651#ifdef SIGTTOU
652 pqsignal(SIGTTOU, SIG_IGN); /* ignored */
653#endif
654
655 /* ignore SIGXFSZ, so that ulimit violations work like disk full */
656#ifdef SIGXFSZ
657 pqsignal(SIGXFSZ, SIG_IGN); /* ignored */
658#endif
659
660 /*
661 * Options setup
662 */
663 InitializeGUCOptions();
664
665 opterr = 1;
666
667 /*
668 * Parse command-line options. CAUTION: keep this in sync with
669 * tcop/postgres.c (the option sets should not conflict) and with the
670 * common help() function in main/main.c.
671 */
672 while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:")) != -1)
673 {
674 switch (opt)
675 {
676 case 'B':
677 SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
678 break;
679
680 case 'b':
681 /* Undocumented flag used for binary upgrades */
682 IsBinaryUpgrade = true;
683 break;
684
685 case 'C':
686 output_config_variable = strdup(optarg);
687 break;
688
689 case 'D':
690 userDoption = strdup(optarg);
691 break;
692
693 case 'd':
694 set_debug_options(atoi(optarg), PGC_POSTMASTER, PGC_S_ARGV);
695 break;
696
697 case 'E':
698 SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
699 break;
700
701 case 'e':
702 SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
703 break;
704
705 case 'F':
706 SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
707 break;
708
709 case 'f':
710 if (!set_plan_disabling_options(optarg, PGC_POSTMASTER, PGC_S_ARGV))
711 {
712 write_stderr("%s: invalid argument for option -f: \"%s\"\n",
713 progname, optarg);
714 ExitPostmaster(1);
715 }
716 break;
717
718 case 'h':
719 SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
720 break;
721
722 case 'i':
723 SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
724 break;
725
726 case 'j':
727 /* only used by interactive backend */
728 break;
729
730 case 'k':
731 SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
732 break;
733
734 case 'l':
735 SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
736 break;
737
738 case 'N':
739 SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
740 break;
741
742 case 'n':
743 /* Don't reinit shared mem after abnormal exit */
744 Reinit = false;
745 break;
746
747 case 'O':
748 SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
749 break;
750
751 case 'o':
752 /* Other options to pass to the backend on the command line */
753 snprintf(ExtraOptions + strlen(ExtraOptions),
754 sizeof(ExtraOptions) - strlen(ExtraOptions),
755 " %s", optarg);
756 break;
757
758 case 'P':
759 SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
760 break;
761
762 case 'p':
763 SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
764 break;
765
766 case 'r':
767 /* only used by single-user backend */
768 break;
769
770 case 'S':
771 SetConfigOption("work_mem", optarg, PGC_POSTMASTER, PGC_S_ARGV);
772 break;
773
774 case 's':
775 SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
776 break;
777
778 case 'T':
779
780 /*
781 * In the event that some backend dumps core, send SIGSTOP,
782 * rather than SIGQUIT, to all its peers. This lets the wily
783 * post_hacker collect core dumps from everyone.
784 */
785 SendStop = true;
786 break;
787
788 case 't':
789 {
790 const char *tmp = get_stats_option_name(optarg);
791
792 if (tmp)
793 {
794 SetConfigOption(tmp, "true", PGC_POSTMASTER, PGC_S_ARGV);
795 }
796 else
797 {
798 write_stderr("%s: invalid argument for option -t: \"%s\"\n",
799 progname, optarg);
800 ExitPostmaster(1);
801 }
802 break;
803 }
804
805 case 'W':
806 SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
807 break;
808
809 case 'c':
810 case '-':
811 {
812 char *name,
813 *value;
814
815 ParseLongOption(optarg, &name, &value);
816 if (!value)
817 {
818 if (opt == '-')
819 ereport(ERROR,
820 (errcode(ERRCODE_SYNTAX_ERROR),
821 errmsg("--%s requires a value",
822 optarg)));
823 else
824 ereport(ERROR,
825 (errcode(ERRCODE_SYNTAX_ERROR),
826 errmsg("-c %s requires a value",
827 optarg)));
828 }
829
830 SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
831 free(name);
832 if (value)
833 free(value);
834 break;
835 }
836
837 default:
838 write_stderr("Try \"%s --help\" for more information.\n",
839 progname);
840 ExitPostmaster(1);
841 }
842 }
843
844 /*
845 * Postmaster accepts no non-option switch arguments.
846 */
847 if (optind < argc)
848 {
849 write_stderr("%s: invalid argument: \"%s\"\n",
850 progname, argv[optind]);
851 write_stderr("Try \"%s --help\" for more information.\n",
852 progname);
853 ExitPostmaster(1);
854 }
855
856 /*
857 * Locate the proper configuration files and data directory, and read
858 * postgresql.conf for the first time.
859 */
860 if (!SelectConfigFiles(userDoption, progname))
861 ExitPostmaster(2);
862
863 if (output_config_variable != NULL)
864 {
865 /*
866 * "-C guc" was specified, so print GUC's value and exit. No extra
867 * permission check is needed because the user is reading inside the
868 * data dir.
869 */
870 const char *config_val = GetConfigOption(output_config_variable,
871 false, false);
872
873 puts(config_val ? config_val : "");
874 ExitPostmaster(0);
875 }
876
877 /* Verify that DataDir looks reasonable */
878 checkDataDir();
879
880 /* Check that pg_control exists */
881 checkControlFile();
882
883 /* And switch working directory into it */
884 ChangeToDataDir();
885
886 /*
887 * Check for invalid combinations of GUC settings.
888 */
889 if (ReservedBackends >= MaxConnections)
890 {
891 write_stderr("%s: superuser_reserved_connections (%d) must be less than max_connections (%d)\n",
892 progname,
893 ReservedBackends, MaxConnections);
894 ExitPostmaster(1);
895 }
896 if (XLogArchiveMode > ARCHIVE_MODE_OFF && wal_level == WAL_LEVEL_MINIMAL)
897 ereport(ERROR,
898 (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
899 if (max_wal_senders > 0 && wal_level == WAL_LEVEL_MINIMAL)
900 ereport(ERROR,
901 (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
902
903 /*
904 * Other one-time internal sanity checks can go here, if they are fast.
905 * (Put any slow processing further down, after postmaster.pid creation.)
906 */
907 if (!CheckDateTokenTables())
908 {
909 write_stderr("%s: invalid datetoken tables, please fix\n", progname);
910 ExitPostmaster(1);
911 }
912
913 /*
914 * Now that we are done processing the postmaster arguments, reset
915 * getopt(3) library so that it will work correctly in subprocesses.
916 */
917 optind = 1;
918#ifdef HAVE_INT_OPTRESET
919 optreset = 1; /* some systems need this too */
920#endif
921
922 /* For debugging: display postmaster environment */
923 {
924 extern char **environ;
925 char **p;
926
927 ereport(DEBUG3,
928 (errmsg_internal("%s: PostmasterMain: initial environment dump:",
929 progname)));
930 ereport(DEBUG3,
931 (errmsg_internal("-----------------------------------------")));
932 for (p = environ; *p; ++p)
933 ereport(DEBUG3,
934 (errmsg_internal("\t%s", *p)));
935 ereport(DEBUG3,
936 (errmsg_internal("-----------------------------------------")));
937 }
938
939 /*
940 * Create lockfile for data directory.
941 *
942 * We want to do this before we try to grab the input sockets, because the
943 * data directory interlock is more reliable than the socket-file
944 * interlock (thanks to whoever decided to put socket files in /tmp :-().
945 * For the same reason, it's best to grab the TCP socket(s) before the
946 * Unix socket(s).
947 *
948 * Also note that this internally sets up the on_proc_exit function that
949 * is responsible for removing both data directory and socket lockfiles;
950 * so it must happen before opening sockets so that at exit, the socket
951 * lockfiles go away after CloseServerPorts runs.
952 */
953 CreateDataDirLockFile(true);
954
955 /*
956 * Read the control file (for error checking and config info).
957 *
958 * Since we verify the control file's CRC, this has a useful side effect
959 * on machines where we need a run-time test for CRC support instructions.
960 * The postmaster will do the test once at startup, and then its child
961 * processes will inherit the correct function pointer and not need to
962 * repeat the test.
963 */
964 LocalProcessControlFile(false);
965
966 /*
967 * Initialize SSL library, if specified.
968 */
969#ifdef USE_SSL
970 if (EnableSSL)
971 {
972 (void) secure_initialize(true);
973 LoadedSSL = true;
974 }
975#endif
976
977 /*
978 * Register the apply launcher. Since it registers a background worker,
979 * it needs to be called before InitializeMaxBackends(), and it's probably
980 * a good idea to call it before any modules had chance to take the
981 * background worker slots.
982 */
983 ApplyLauncherRegister();
984
985 /*
986 * process any libraries that should be preloaded at postmaster start
987 */
988 process_shared_preload_libraries();
989
990 /*
991 * Now that loadable modules have had their chance to register background
992 * workers, calculate MaxBackends.
993 */
994 InitializeMaxBackends();
995
996 /* Report server startup in log */
997 ereport(LOG,
998 (errmsg("starting %s", PG_VERSION_STR)));
999
1000 /*
1001 * Establish input sockets.
1002 *
1003 * First, mark them all closed, and set up an on_proc_exit function that's
1004 * charged with closing the sockets again at postmaster shutdown.
1005 */
1006 for (i = 0; i < MAXLISTEN; i++)
1007 ListenSocket[i] = PGINVALID_SOCKET;
1008
1009 on_proc_exit(CloseServerPorts, 0);
1010
1011 if (ListenAddresses)
1012 {
1013 char *rawstring;
1014 List *elemlist;
1015 ListCell *l;
1016 int success = 0;
1017
1018 /* Need a modifiable copy of ListenAddresses */
1019 rawstring = pstrdup(ListenAddresses);
1020
1021 /* Parse string into list of hostnames */
1022 if (!SplitIdentifierString(rawstring, ',', &elemlist))
1023 {
1024 /* syntax error in list */
1025 ereport(FATAL,
1026 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1027 errmsg("invalid list syntax in parameter \"%s\"",
1028 "listen_addresses")));
1029 }
1030
1031 foreach(l, elemlist)
1032 {
1033 char *curhost = (char *) lfirst(l);
1034
1035 if (strcmp(curhost, "*") == 0)
1036 status = StreamServerPort(AF_UNSPEC, NULL,
1037 (unsigned short) PostPortNumber,
1038 NULL,
1039 ListenSocket, MAXLISTEN);
1040 else
1041 status = StreamServerPort(AF_UNSPEC, curhost,
1042 (unsigned short) PostPortNumber,
1043 NULL,
1044 ListenSocket, MAXLISTEN);
1045
1046 if (status == STATUS_OK)
1047 {
1048 success++;
1049 /* record the first successful host addr in lockfile */
1050 if (!listen_addr_saved)
1051 {
1052 AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, curhost);
1053 listen_addr_saved = true;
1054 }
1055 }
1056 else
1057 ereport(WARNING,
1058 (errmsg("could not create listen socket for \"%s\"",
1059 curhost)));
1060 }
1061
1062 if (!success && elemlist != NIL)
1063 ereport(FATAL,
1064 (errmsg("could not create any TCP/IP sockets")));
1065
1066 list_free(elemlist);
1067 pfree(rawstring);
1068 }
1069
1070#ifdef USE_BONJOUR
1071 /* Register for Bonjour only if we opened TCP socket(s) */
1072 if (enable_bonjour && ListenSocket[0] != PGINVALID_SOCKET)
1073 {
1074 DNSServiceErrorType err;
1075
1076 /*
1077 * We pass 0 for interface_index, which will result in registering on
1078 * all "applicable" interfaces. It's not entirely clear from the
1079 * DNS-SD docs whether this would be appropriate if we have bound to
1080 * just a subset of the available network interfaces.
1081 */
1082 err = DNSServiceRegister(&bonjour_sdref,
1083 0,
1084 0,
1085 bonjour_name,
1086 "_postgresql._tcp.",
1087 NULL,
1088 NULL,
1089 pg_hton16(PostPortNumber),
1090 0,
1091 NULL,
1092 NULL,
1093 NULL);
1094 if (err != kDNSServiceErr_NoError)
1095 elog(LOG, "DNSServiceRegister() failed: error code %ld",
1096 (long) err);
1097
1098 /*
1099 * We don't bother to read the mDNS daemon's reply, and we expect that
1100 * it will automatically terminate our registration when the socket is
1101 * closed at postmaster termination. So there's nothing more to be
1102 * done here. However, the bonjour_sdref is kept around so that
1103 * forked children can close their copies of the socket.
1104 */
1105 }
1106#endif
1107
1108#ifdef HAVE_UNIX_SOCKETS
1109 if (Unix_socket_directories)
1110 {
1111 char *rawstring;
1112 List *elemlist;
1113 ListCell *l;
1114 int success = 0;
1115
1116 /* Need a modifiable copy of Unix_socket_directories */
1117 rawstring = pstrdup(Unix_socket_directories);
1118
1119 /* Parse string into list of directories */
1120 if (!SplitDirectoriesString(rawstring, ',', &elemlist))
1121 {
1122 /* syntax error in list */
1123 ereport(FATAL,
1124 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1125 errmsg("invalid list syntax in parameter \"%s\"",
1126 "unix_socket_directories")));
1127 }
1128
1129 foreach(l, elemlist)
1130 {
1131 char *socketdir = (char *) lfirst(l);
1132
1133 status = StreamServerPort(AF_UNIX, NULL,
1134 (unsigned short) PostPortNumber,
1135 socketdir,
1136 ListenSocket, MAXLISTEN);
1137
1138 if (status == STATUS_OK)
1139 {
1140 success++;
1141 /* record the first successful Unix socket in lockfile */
1142 if (success == 1)
1143 AddToDataDirLockFile(LOCK_FILE_LINE_SOCKET_DIR, socketdir);
1144 }
1145 else
1146 ereport(WARNING,
1147 (errmsg("could not create Unix-domain socket in directory \"%s\"",
1148 socketdir)));
1149 }
1150
1151 if (!success && elemlist != NIL)
1152 ereport(FATAL,
1153 (errmsg("could not create any Unix-domain sockets")));
1154
1155 list_free_deep(elemlist);
1156 pfree(rawstring);
1157 }
1158#endif
1159
1160 /*
1161 * check that we have some socket to listen on
1162 */
1163 if (ListenSocket[0] == PGINVALID_SOCKET)
1164 ereport(FATAL,
1165 (errmsg("no socket created for listening")));
1166
1167 /*
1168 * If no valid TCP ports, write an empty line for listen address,
1169 * indicating the Unix socket must be used. Note that this line is not
1170 * added to the lock file until there is a socket backing it.
1171 */
1172 if (!listen_addr_saved)
1173 AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, "");
1174
1175 /*
1176 * Set up shared memory and semaphores.
1177 */
1178 reset_shared(PostPortNumber);
1179
1180 /*
1181 * Estimate number of openable files. This must happen after setting up
1182 * semaphores, because on some platforms semaphores count as open files.
1183 */
1184 set_max_safe_fds();
1185
1186 /*
1187 * Set reference point for stack-depth checking.
1188 */
1189 set_stack_base();
1190
1191 /*
1192 * Initialize pipe (or process handle on Windows) that allows children to
1193 * wake up from sleep on postmaster death.
1194 */
1195 InitPostmasterDeathWatchHandle();
1196
1197#ifdef WIN32
1198
1199 /*
1200 * Initialize I/O completion port used to deliver list of dead children.
1201 */
1202 win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1);
1203 if (win32ChildQueue == NULL)
1204 ereport(FATAL,
1205 (errmsg("could not create I/O completion port for child queue")));
1206#endif
1207
1208 /*
1209 * Record postmaster options. We delay this till now to avoid recording
1210 * bogus options (eg, NBuffers too high for available memory).
1211 */
1212 if (!CreateOptsFile(argc, argv, my_exec_path))
1213 ExitPostmaster(1);
1214
1215#ifdef EXEC_BACKEND
1216 /* Write out nondefault GUC settings for child processes to use */
1217 write_nondefault_variables(PGC_POSTMASTER);
1218#endif
1219
1220 /*
1221 * Write the external PID file if requested
1222 */
1223 if (external_pid_file)
1224 {
1225 FILE *fpidfile = fopen(external_pid_file, "w");
1226
1227 if (fpidfile)
1228 {
1229 fprintf(fpidfile, "%d\n", MyProcPid);
1230 fclose(fpidfile);
1231
1232 /* Make PID file world readable */
1233 if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0)
1234 write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1235 progname, external_pid_file, strerror(errno));
1236 }
1237 else
1238 write_stderr("%s: could not write external PID file \"%s\": %s\n",
1239 progname, external_pid_file, strerror(errno));
1240
1241 on_proc_exit(unlink_external_pid_file, 0);
1242 }
1243
1244 /*
1245 * Remove old temporary files. At this point there can be no other
1246 * Postgres processes running in this directory, so this should be safe.
1247 */
1248 RemovePgTempFiles();
1249
1250 /*
1251 * Forcibly remove the files signaling a standby promotion request.
1252 * Otherwise, the existence of those files triggers a promotion too early,
1253 * whether a user wants that or not.
1254 *
1255 * This removal of files is usually unnecessary because they can exist
1256 * only during a few moments during a standby promotion. However there is
1257 * a race condition: if pg_ctl promote is executed and creates the files
1258 * during a promotion, the files can stay around even after the server is
1259 * brought up to new master. Then, if new standby starts by using the
1260 * backup taken from that master, the files can exist at the server
1261 * startup and should be removed in order to avoid an unexpected
1262 * promotion.
1263 *
1264 * Note that promotion signal files need to be removed before the startup
1265 * process is invoked. Because, after that, they can be used by
1266 * postmaster's SIGUSR1 signal handler.
1267 */
1268 RemovePromoteSignalFiles();
1269
1270 /* Do the same for logrotate signal file */
1271 RemoveLogrotateSignalFiles();
1272
1273 /* Remove any outdated file holding the current log filenames. */
1274 if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT)
1275 ereport(LOG,
1276 (errcode_for_file_access(),
1277 errmsg("could not remove file \"%s\": %m",
1278 LOG_METAINFO_DATAFILE)));
1279
1280 /*
1281 * If enabled, start up syslogger collection subprocess
1282 */
1283 SysLoggerPID = SysLogger_Start();
1284
1285 /*
1286 * Reset whereToSendOutput from DestDebug (its starting state) to
1287 * DestNone. This stops ereport from sending log messages to stderr unless
1288 * Log_destination permits. We don't do this until the postmaster is
1289 * fully launched, since startup failures may as well be reported to
1290 * stderr.
1291 *
1292 * If we are in fact disabling logging to stderr, first emit a log message
1293 * saying so, to provide a breadcrumb trail for users who may not remember
1294 * that their logging is configured to go somewhere else.
1295 */
1296 if (!(Log_destination & LOG_DESTINATION_STDERR))
1297 ereport(LOG,
1298 (errmsg("ending log output to stderr"),
1299 errhint("Future log output will go to log destination \"%s\".",
1300 Log_destination_string)));
1301
1302 whereToSendOutput = DestNone;
1303
1304 /*
1305 * Initialize stats collection subsystem (this does NOT start the
1306 * collector process!)
1307 */
1308 pgstat_init();
1309
1310 /*
1311 * Initialize the autovacuum subsystem (again, no process start yet)
1312 */
1313 autovac_init();
1314
1315 /*
1316 * Load configuration files for client authentication.
1317 */
1318 if (!load_hba())
1319 {
1320 /*
1321 * It makes no sense to continue if we fail to load the HBA file,
1322 * since there is no way to connect to the database in this case.
1323 */
1324 ereport(FATAL,
1325 (errmsg("could not load pg_hba.conf")));
1326 }
1327 if (!load_ident())
1328 {
1329 /*
1330 * We can start up without the IDENT file, although it means that you
1331 * cannot log in using any of the authentication methods that need a
1332 * user name mapping. load_ident() already logged the details of error
1333 * to the log.
1334 */
1335 }
1336
1337#ifdef HAVE_PTHREAD_IS_THREADED_NP
1338
1339 /*
1340 * On macOS, libintl replaces setlocale() with a version that calls
1341 * CFLocaleCopyCurrent() when its second argument is "" and every relevant
1342 * environment variable is unset or empty. CFLocaleCopyCurrent() makes
1343 * the process multithreaded. The postmaster calls sigprocmask() and
1344 * calls fork() without an immediate exec(), both of which have undefined
1345 * behavior in a multithreaded program. A multithreaded postmaster is the
1346 * normal case on Windows, which offers neither fork() nor sigprocmask().
1347 */
1348 if (pthread_is_threaded_np() != 0)
1349 ereport(FATAL,
1350 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1351 errmsg("postmaster became multithreaded during startup"),
1352 errhint("Set the LC_ALL environment variable to a valid locale.")));
1353#endif
1354
1355 /*
1356 * Remember postmaster startup time
1357 */
1358 PgStartTime = GetCurrentTimestamp();
1359
1360 /*
1361 * Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1362 * see what's happening.
1363 */
1364 AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STARTING);
1365
1366 /*
1367 * We're ready to rock and roll...
1368 */
1369 StartupPID = StartupDataBase();
1370 Assert(StartupPID != 0);
1371 StartupStatus = STARTUP_RUNNING;
1372 pmState = PM_STARTUP;
1373
1374 /* Some workers may be scheduled to start now */
1375 maybe_start_bgworkers();
1376
1377 status = ServerLoop();
1378
1379 /*
1380 * ServerLoop probably shouldn't ever return, but if it does, close down.
1381 */
1382 ExitPostmaster(status != STATUS_OK);
1383
1384 abort(); /* not reached */
1385}
1386
1387
1388/*
1389 * on_proc_exit callback to close server's listen sockets
1390 */
1391static void
1392CloseServerPorts(int status, Datum arg)
1393{
1394 int i;
1395
1396 /*
1397 * First, explicitly close all the socket FDs. We used to just let this
1398 * happen implicitly at postmaster exit, but it's better to close them
1399 * before we remove the postmaster.pid lockfile; otherwise there's a race
1400 * condition if a new postmaster wants to re-use the TCP port number.
1401 */
1402 for (i = 0; i < MAXLISTEN; i++)
1403 {
1404 if (ListenSocket[i] != PGINVALID_SOCKET)
1405 {
1406 StreamClose(ListenSocket[i]);
1407 ListenSocket[i] = PGINVALID_SOCKET;
1408 }
1409 }
1410
1411 /*
1412 * Next, remove any filesystem entries for Unix sockets. To avoid race
1413 * conditions against incoming postmasters, this must happen after closing
1414 * the sockets and before removing lock files.
1415 */
1416 RemoveSocketFiles();
1417
1418 /*
1419 * We don't do anything about socket lock files here; those will be
1420 * removed in a later on_proc_exit callback.
1421 */
1422}
1423
1424/*
1425 * on_proc_exit callback to delete external_pid_file
1426 */
1427static void
1428unlink_external_pid_file(int status, Datum arg)
1429{
1430 if (external_pid_file)
1431 unlink(external_pid_file);
1432}
1433
1434
1435/*
1436 * Compute and check the directory paths to files that are part of the
1437 * installation (as deduced from the postgres executable's own location)
1438 */
1439static void
1440getInstallationPaths(const char *argv0)
1441{
1442 DIR *pdir;
1443
1444 /* Locate the postgres executable itself */
1445 if (find_my_exec(argv0, my_exec_path) < 0)
1446 elog(FATAL, "%s: could not locate my own executable path", argv0);
1447
1448#ifdef EXEC_BACKEND
1449 /* Locate executable backend before we change working directory */
1450 if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1451 postgres_exec_path) < 0)
1452 ereport(FATAL,
1453 (errmsg("%s: could not locate matching postgres executable",
1454 argv0)));
1455#endif
1456
1457 /*
1458 * Locate the pkglib directory --- this has to be set early in case we try
1459 * to load any modules from it in response to postgresql.conf entries.
1460 */
1461 get_pkglib_path(my_exec_path, pkglib_path);
1462
1463 /*
1464 * Verify that there's a readable directory there; otherwise the Postgres
1465 * installation is incomplete or corrupt. (A typical cause of this
1466 * failure is that the postgres executable has been moved or hardlinked to
1467 * some directory that's not a sibling of the installation lib/
1468 * directory.)
1469 */
1470 pdir = AllocateDir(pkglib_path);
1471 if (pdir == NULL)
1472 ereport(ERROR,
1473 (errcode_for_file_access(),
1474 errmsg("could not open directory \"%s\": %m",
1475 pkglib_path),
1476 errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1477 my_exec_path)));
1478 FreeDir(pdir);
1479
1480 /*
1481 * XXX is it worth similarly checking the share/ directory? If the lib/
1482 * directory is there, then share/ probably is too.
1483 */
1484}
1485
1486/*
1487 * Check that pg_control exists in the correct location in the data directory.
1488 *
1489 * No attempt is made to validate the contents of pg_control here. This is
1490 * just a sanity check to see if we are looking at a real data directory.
1491 */
1492static void
1493checkControlFile(void)
1494{
1495 char path[MAXPGPATH];
1496 FILE *fp;
1497
1498 snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1499
1500 fp = AllocateFile(path, PG_BINARY_R);
1501 if (fp == NULL)
1502 {
1503 write_stderr("%s: could not find the database system\n"
1504 "Expected to find it in the directory \"%s\",\n"
1505 "but could not open file \"%s\": %s\n",
1506 progname, DataDir, path, strerror(errno));
1507 ExitPostmaster(2);
1508 }
1509 FreeFile(fp);
1510}
1511
1512/*
1513 * Determine how long should we let ServerLoop sleep.
1514 *
1515 * In normal conditions we wait at most one minute, to ensure that the other
1516 * background tasks handled by ServerLoop get done even when no requests are
1517 * arriving. However, if there are background workers waiting to be started,
1518 * we don't actually sleep so that they are quickly serviced. Other exception
1519 * cases are as shown in the code.
1520 */
1521static void
1522DetermineSleepTime(struct timeval *timeout)
1523{
1524 TimestampTz next_wakeup = 0;
1525
1526 /*
1527 * Normal case: either there are no background workers at all, or we're in
1528 * a shutdown sequence (during which we ignore bgworkers altogether).
1529 */
1530 if (Shutdown > NoShutdown ||
1531 (!StartWorkerNeeded && !HaveCrashedWorker))
1532 {
1533 if (AbortStartTime != 0)
1534 {
1535 /* time left to abort; clamp to 0 in case it already expired */
1536 timeout->tv_sec = SIGKILL_CHILDREN_AFTER_SECS -
1537 (time(NULL) - AbortStartTime);
1538 timeout->tv_sec = Max(timeout->tv_sec, 0);
1539 timeout->tv_usec = 0;
1540 }
1541 else
1542 {
1543 timeout->tv_sec = 60;
1544 timeout->tv_usec = 0;
1545 }
1546 return;
1547 }
1548
1549 if (StartWorkerNeeded)
1550 {
1551 timeout->tv_sec = 0;
1552 timeout->tv_usec = 0;
1553 return;
1554 }
1555
1556 if (HaveCrashedWorker)
1557 {
1558 slist_mutable_iter siter;
1559
1560 /*
1561 * When there are crashed bgworkers, we sleep just long enough that
1562 * they are restarted when they request to be. Scan the list to
1563 * determine the minimum of all wakeup times according to most recent
1564 * crash time and requested restart interval.
1565 */
1566 slist_foreach_modify(siter, &BackgroundWorkerList)
1567 {
1568 RegisteredBgWorker *rw;
1569 TimestampTz this_wakeup;
1570
1571 rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1572
1573 if (rw->rw_crashed_at == 0)
1574 continue;
1575
1576 if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART
1577 || rw->rw_terminate)
1578 {
1579 ForgetBackgroundWorker(&siter);
1580 continue;
1581 }
1582
1583 this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1584 1000L * rw->rw_worker.bgw_restart_time);
1585 if (next_wakeup == 0 || this_wakeup < next_wakeup)
1586 next_wakeup = this_wakeup;
1587 }
1588 }
1589
1590 if (next_wakeup != 0)
1591 {
1592 long secs;
1593 int microsecs;
1594
1595 TimestampDifference(GetCurrentTimestamp(), next_wakeup,
1596 &secs, &microsecs);
1597 timeout->tv_sec = secs;
1598 timeout->tv_usec = microsecs;
1599
1600 /* Ensure we don't exceed one minute */
1601 if (timeout->tv_sec > 60)
1602 {
1603 timeout->tv_sec = 60;
1604 timeout->tv_usec = 0;
1605 }
1606 }
1607 else
1608 {
1609 timeout->tv_sec = 60;
1610 timeout->tv_usec = 0;
1611 }
1612}
1613
1614/*
1615 * Main idle loop of postmaster
1616 *
1617 * NB: Needs to be called with signals blocked
1618 */
1619static int
1620ServerLoop(void)
1621{
1622 fd_set readmask;
1623 int nSockets;
1624 time_t last_lockfile_recheck_time,
1625 last_touch_time;
1626
1627 last_lockfile_recheck_time = last_touch_time = time(NULL);
1628
1629 nSockets = initMasks(&readmask);
1630
1631 for (;;)
1632 {
1633 fd_set rmask;
1634 int selres;
1635 time_t now;
1636
1637 /*
1638 * Wait for a connection request to arrive.
1639 *
1640 * We block all signals except while sleeping. That makes it safe for
1641 * signal handlers, which again block all signals while executing, to
1642 * do nontrivial work.
1643 *
1644 * If we are in PM_WAIT_DEAD_END state, then we don't want to accept
1645 * any new connections, so we don't call select(), and just sleep.
1646 */
1647 memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
1648
1649 if (pmState == PM_WAIT_DEAD_END)
1650 {
1651 PG_SETMASK(&UnBlockSig);
1652
1653 pg_usleep(100000L); /* 100 msec seems reasonable */
1654 selres = 0;
1655
1656 PG_SETMASK(&BlockSig);
1657 }
1658 else
1659 {
1660 /* must set timeout each time; some OSes change it! */
1661 struct timeval timeout;
1662
1663 /* Needs to run with blocked signals! */
1664 DetermineSleepTime(&timeout);
1665
1666 PG_SETMASK(&UnBlockSig);
1667
1668 selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1669
1670 PG_SETMASK(&BlockSig);
1671 }
1672
1673 /* Now check the select() result */
1674 if (selres < 0)
1675 {
1676 if (errno != EINTR && errno != EWOULDBLOCK)
1677 {
1678 ereport(LOG,
1679 (errcode_for_socket_access(),
1680 errmsg("select() failed in postmaster: %m")));
1681 return STATUS_ERROR;
1682 }
1683 }
1684
1685 /*
1686 * New connection pending on any of our sockets? If so, fork a child
1687 * process to deal with it.
1688 */
1689 if (selres > 0)
1690 {
1691 int i;
1692
1693 for (i = 0; i < MAXLISTEN; i++)
1694 {
1695 if (ListenSocket[i] == PGINVALID_SOCKET)
1696 break;
1697 if (FD_ISSET(ListenSocket[i], &rmask))
1698 {
1699 Port *port;
1700
1701 port = ConnCreate(ListenSocket[i]);
1702 if (port)
1703 {
1704 BackendStartup(port);
1705
1706 /*
1707 * We no longer need the open socket or port structure
1708 * in this process
1709 */
1710 StreamClose(port->sock);
1711 ConnFree(port);
1712 }
1713 }
1714 }
1715 }
1716
1717 /* If we have lost the log collector, try to start a new one */
1718 if (SysLoggerPID == 0 && Logging_collector)
1719 SysLoggerPID = SysLogger_Start();
1720
1721 /*
1722 * If no background writer process is running, and we are not in a
1723 * state that prevents it, start one. It doesn't matter if this
1724 * fails, we'll just try again later. Likewise for the checkpointer.
1725 */
1726 if (pmState == PM_RUN || pmState == PM_RECOVERY ||
1727 pmState == PM_HOT_STANDBY)
1728 {
1729 if (CheckpointerPID == 0)
1730 CheckpointerPID = StartCheckpointer();
1731 if (BgWriterPID == 0)
1732 BgWriterPID = StartBackgroundWriter();
1733 }
1734
1735 /*
1736 * Likewise, if we have lost the walwriter process, try to start a new
1737 * one. But this is needed only in normal operation (else we cannot
1738 * be writing any new WAL).
1739 */
1740 if (WalWriterPID == 0 && pmState == PM_RUN)
1741 WalWriterPID = StartWalWriter();
1742
1743 /*
1744 * If we have lost the autovacuum launcher, try to start a new one. We
1745 * don't want autovacuum to run in binary upgrade mode because
1746 * autovacuum might update relfrozenxid for empty tables before the
1747 * physical files are put in place.
1748 */
1749 if (!IsBinaryUpgrade && AutoVacPID == 0 &&
1750 (AutoVacuumingActive() || start_autovac_launcher) &&
1751 pmState == PM_RUN)
1752 {
1753 AutoVacPID = StartAutoVacLauncher();
1754 if (AutoVacPID != 0)
1755 start_autovac_launcher = false; /* signal processed */
1756 }
1757
1758 /* If we have lost the stats collector, try to start a new one */
1759 if (PgStatPID == 0 &&
1760 (pmState == PM_RUN || pmState == PM_HOT_STANDBY))
1761 PgStatPID = pgstat_start();
1762
1763 /* If we have lost the archiver, try to start a new one. */
1764 if (PgArchPID == 0 && PgArchStartupAllowed())
1765 PgArchPID = pgarch_start();
1766
1767 /* If we need to signal the autovacuum launcher, do so now */
1768 if (avlauncher_needs_signal)
1769 {
1770 avlauncher_needs_signal = false;
1771 if (AutoVacPID != 0)
1772 kill(AutoVacPID, SIGUSR2);
1773 }
1774
1775 /* If we need to start a WAL receiver, try to do that now */
1776 if (WalReceiverRequested)
1777 MaybeStartWalReceiver();
1778
1779 /* Get other worker processes running, if needed */
1780 if (StartWorkerNeeded || HaveCrashedWorker)
1781 maybe_start_bgworkers();
1782
1783#ifdef HAVE_PTHREAD_IS_THREADED_NP
1784
1785 /*
1786 * With assertions enabled, check regularly for appearance of
1787 * additional threads. All builds check at start and exit.
1788 */
1789 Assert(pthread_is_threaded_np() == 0);
1790#endif
1791
1792 /*
1793 * Lastly, check to see if it's time to do some things that we don't
1794 * want to do every single time through the loop, because they're a
1795 * bit expensive. Note that there's up to a minute of slop in when
1796 * these tasks will be performed, since DetermineSleepTime() will let
1797 * us sleep at most that long; except for SIGKILL timeout which has
1798 * special-case logic there.
1799 */
1800 now = time(NULL);
1801
1802 /*
1803 * If we already sent SIGQUIT to children and they are slow to shut
1804 * down, it's time to send them SIGKILL. This doesn't happen
1805 * normally, but under certain conditions backends can get stuck while
1806 * shutting down. This is a last measure to get them unwedged.
1807 *
1808 * Note we also do this during recovery from a process crash.
1809 */
1810 if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
1811 AbortStartTime != 0 &&
1812 (now - AbortStartTime) >= SIGKILL_CHILDREN_AFTER_SECS)
1813 {
1814 /* We were gentle with them before. Not anymore */
1815 TerminateChildren(SIGKILL);
1816 /* reset flag so we don't SIGKILL again */
1817 AbortStartTime = 0;
1818 }
1819
1820 /*
1821 * Once a minute, verify that postmaster.pid hasn't been removed or
1822 * overwritten. If it has, we force a shutdown. This avoids having
1823 * postmasters and child processes hanging around after their database
1824 * is gone, and maybe causing problems if a new database cluster is
1825 * created in the same place. It also provides some protection
1826 * against a DBA foolishly removing postmaster.pid and manually
1827 * starting a new postmaster. Data corruption is likely to ensue from
1828 * that anyway, but we can minimize the damage by aborting ASAP.
1829 */
1830 if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
1831 {
1832 if (!RecheckDataDirLockFile())
1833 {
1834 ereport(LOG,
1835 (errmsg("performing immediate shutdown because data directory lock file is invalid")));
1836 kill(MyProcPid, SIGQUIT);
1837 }
1838 last_lockfile_recheck_time = now;
1839 }
1840
1841 /*
1842 * Touch Unix socket and lock files every 58 minutes, to ensure that
1843 * they are not removed by overzealous /tmp-cleaning tasks. We assume
1844 * no one runs cleaners with cutoff times of less than an hour ...
1845 */
1846 if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
1847 {
1848 TouchSocketFiles();
1849 TouchSocketLockFiles();
1850 last_touch_time = now;
1851 }
1852 }
1853}
1854
1855/*
1856 * Initialise the masks for select() for the ports we are listening on.
1857 * Return the number of sockets to listen on.
1858 */
1859static int
1860initMasks(fd_set *rmask)
1861{
1862 int maxsock = -1;
1863 int i;
1864
1865 FD_ZERO(rmask);
1866
1867 for (i = 0; i < MAXLISTEN; i++)
1868 {
1869 int fd = ListenSocket[i];
1870
1871 if (fd == PGINVALID_SOCKET)
1872 break;
1873 FD_SET(fd, rmask);
1874
1875 if (fd > maxsock)
1876 maxsock = fd;
1877 }
1878
1879 return maxsock + 1;
1880}
1881
1882
1883/*
1884 * Read a client's startup packet and do something according to it.
1885 *
1886 * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1887 * not return at all.
1888 *
1889 * (Note that ereport(FATAL) stuff is sent to the client, so only use it
1890 * if that's what you want. Return STATUS_ERROR if you don't want to
1891 * send anything to the client, which would typically be appropriate
1892 * if we detect a communications failure.)
1893 *
1894 * Set secure_done when negotiation of an encrypted layer (currently, TLS or
1895 * GSSAPI) is already completed.
1896 */
1897static int
1898ProcessStartupPacket(Port *port, bool secure_done)
1899{
1900 int32 len;
1901 void *buf;
1902 ProtocolVersion proto;
1903 MemoryContext oldcontext;
1904
1905 pq_startmsgread();
1906
1907 /*
1908 * Grab the first byte of the length word separately, so that we can tell
1909 * whether we have no data at all or an incomplete packet. (This might
1910 * sound inefficient, but it's not really, because of buffering in
1911 * pqcomm.c.)
1912 */
1913 if (pq_getbytes((char *) &len, 1) == EOF)
1914 {
1915 /*
1916 * If we get no data at all, don't clutter the log with a complaint;
1917 * such cases often occur for legitimate reasons. An example is that
1918 * we might be here after responding to NEGOTIATE_SSL_CODE, and if the
1919 * client didn't like our response, it'll probably just drop the
1920 * connection. Service-monitoring software also often just opens and
1921 * closes a connection without sending anything. (So do port
1922 * scanners, which may be less benign, but it's not really our job to
1923 * notice those.)
1924 */
1925 return STATUS_ERROR;
1926 }
1927
1928 if (pq_getbytes(((char *) &len) + 1, 3) == EOF)
1929 {
1930 /* Got a partial length word, so bleat about that */
1931 if (!secure_done)
1932 ereport(COMMERROR,
1933 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1934 errmsg("incomplete startup packet")));
1935 return STATUS_ERROR;
1936 }
1937
1938 len = pg_ntoh32(len);
1939 len -= 4;
1940
1941 if (len < (int32) sizeof(ProtocolVersion) ||
1942 len > MAX_STARTUP_PACKET_LENGTH)
1943 {
1944 ereport(COMMERROR,
1945 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1946 errmsg("invalid length of startup packet")));
1947 return STATUS_ERROR;
1948 }
1949
1950 /*
1951 * Allocate at least the size of an old-style startup packet, plus one
1952 * extra byte, and make sure all are zeroes. This ensures we will have
1953 * null termination of all strings, in both fixed- and variable-length
1954 * packet layouts.
1955 */
1956 if (len <= (int32) sizeof(StartupPacket))
1957 buf = palloc0(sizeof(StartupPacket) + 1);
1958 else
1959 buf = palloc0(len + 1);
1960
1961 if (pq_getbytes(buf, len) == EOF)
1962 {
1963 ereport(COMMERROR,
1964 (errcode(ERRCODE_PROTOCOL_VIOLATION),
1965 errmsg("incomplete startup packet")));
1966 return STATUS_ERROR;
1967 }
1968 pq_endmsgread();
1969
1970 /*
1971 * The first field is either a protocol version number or a special
1972 * request code.
1973 */
1974 port->proto = proto = pg_ntoh32(*((ProtocolVersion *) buf));
1975
1976 if (proto == CANCEL_REQUEST_CODE)
1977 {
1978 processCancelRequest(port, buf);
1979 /* Not really an error, but we don't want to proceed further */
1980 return STATUS_ERROR;
1981 }
1982
1983 if (proto == NEGOTIATE_SSL_CODE && !secure_done)
1984 {
1985 char SSLok;
1986
1987#ifdef USE_SSL
1988 /* No SSL when disabled or on Unix sockets */
1989 if (!LoadedSSL || IS_AF_UNIX(port->laddr.addr.ss_family))
1990 SSLok = 'N';
1991 else
1992 SSLok = 'S'; /* Support for SSL */
1993#else
1994 SSLok = 'N'; /* No support for SSL */
1995#endif
1996
1997retry1:
1998 if (send(port->sock, &SSLok, 1, 0) != 1)
1999 {
2000 if (errno == EINTR)
2001 goto retry1; /* if interrupted, just retry */
2002 ereport(COMMERROR,
2003 (errcode_for_socket_access(),
2004 errmsg("failed to send SSL negotiation response: %m")));
2005 return STATUS_ERROR; /* close the connection */
2006 }
2007
2008#ifdef USE_SSL
2009 if (SSLok == 'S' && secure_open_server(port) == -1)
2010 return STATUS_ERROR;
2011#endif
2012 /* regular startup packet, cancel, etc packet should follow... */
2013 /* but not another SSL negotiation request */
2014 return ProcessStartupPacket(port, true);
2015 }
2016 else if (proto == NEGOTIATE_GSS_CODE && !secure_done)
2017 {
2018 char GSSok = 'N';
2019#ifdef ENABLE_GSS
2020 /* No GSSAPI encryption when on Unix socket */
2021 if (!IS_AF_UNIX(port->laddr.addr.ss_family))
2022 GSSok = 'G';
2023#endif
2024
2025 while (send(port->sock, &GSSok, 1, 0) != 1)
2026 {
2027 if (errno == EINTR)
2028 continue;
2029 ereport(COMMERROR,
2030 (errcode_for_socket_access(),
2031 errmsg("failed to send GSSAPI negotiation response: %m")));
2032 return STATUS_ERROR; /* close the connection */
2033 }
2034
2035#ifdef ENABLE_GSS
2036 if (GSSok == 'G' && secure_open_gssapi(port) == -1)
2037 return STATUS_ERROR;
2038#endif
2039 /* Won't ever see more than one negotiation request */
2040 return ProcessStartupPacket(port, true);
2041 }
2042
2043 /* Could add additional special packet types here */
2044
2045 /*
2046 * Set FrontendProtocol now so that ereport() knows what format to send if
2047 * we fail during startup.
2048 */
2049 FrontendProtocol = proto;
2050
2051 /* Check that the major protocol version is in range. */
2052 if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) ||
2053 PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST))
2054 ereport(FATAL,
2055 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2056 errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2057 PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2058 PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
2059 PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
2060 PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
2061
2062 /*
2063 * Now fetch parameters out of startup packet and save them into the Port
2064 * structure. All data structures attached to the Port struct must be
2065 * allocated in TopMemoryContext so that they will remain available in a
2066 * running backend (even after PostmasterContext is destroyed). We need
2067 * not worry about leaking this storage on failure, since we aren't in the
2068 * postmaster process anymore.
2069 */
2070 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
2071
2072 if (PG_PROTOCOL_MAJOR(proto) >= 3)
2073 {
2074 int32 offset = sizeof(ProtocolVersion);
2075 List *unrecognized_protocol_options = NIL;
2076
2077 /*
2078 * Scan packet body for name/option pairs. We can assume any string
2079 * beginning within the packet body is null-terminated, thanks to
2080 * zeroing extra byte above.
2081 */
2082 port->guc_options = NIL;
2083
2084 while (offset < len)
2085 {
2086 char *nameptr = ((char *) buf) + offset;
2087 int32 valoffset;
2088 char *valptr;
2089
2090 if (*nameptr == '\0')
2091 break; /* found packet terminator */
2092 valoffset = offset + strlen(nameptr) + 1;
2093 if (valoffset >= len)
2094 break; /* missing value, will complain below */
2095 valptr = ((char *) buf) + valoffset;
2096
2097 if (strcmp(nameptr, "database") == 0)
2098 port->database_name = pstrdup(valptr);
2099 else if (strcmp(nameptr, "user") == 0)
2100 port->user_name = pstrdup(valptr);
2101 else if (strcmp(nameptr, "options") == 0)
2102 port->cmdline_options = pstrdup(valptr);
2103 else if (strcmp(nameptr, "replication") == 0)
2104 {
2105 /*
2106 * Due to backward compatibility concerns the replication
2107 * parameter is a hybrid beast which allows the value to be
2108 * either boolean or the string 'database'. The latter
2109 * connects to a specific database which is e.g. required for
2110 * logical decoding while.
2111 */
2112 if (strcmp(valptr, "database") == 0)
2113 {
2114 am_walsender = true;
2115 am_db_walsender = true;
2116 }
2117 else if (!parse_bool(valptr, &am_walsender))
2118 ereport(FATAL,
2119 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2120 errmsg("invalid value for parameter \"%s\": \"%s\"",
2121 "replication",
2122 valptr),
2123 errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2124 }
2125 else if (strncmp(nameptr, "_pq_.", 5) == 0)
2126 {
2127 /*
2128 * Any option beginning with _pq_. is reserved for use as a
2129 * protocol-level option, but at present no such options are
2130 * defined.
2131 */
2132 unrecognized_protocol_options =
2133 lappend(unrecognized_protocol_options, pstrdup(nameptr));
2134 }
2135 else
2136 {
2137 /* Assume it's a generic GUC option */
2138 port->guc_options = lappend(port->guc_options,
2139 pstrdup(nameptr));
2140 port->guc_options = lappend(port->guc_options,
2141 pstrdup(valptr));
2142
2143 /*
2144 * Copy application_name to port if we come across it. This
2145 * is done so we can log the application_name in the
2146 * connection authorization message. Note that the GUC would
2147 * be used but we haven't gone through GUC setup yet.
2148 */
2149 if (strcmp(nameptr, "application_name") == 0)
2150 {
2151 char *tmp_app_name = pstrdup(valptr);
2152
2153 pg_clean_ascii(tmp_app_name);
2154
2155 port->application_name = tmp_app_name;
2156 }
2157 }
2158 offset = valoffset + strlen(valptr) + 1;
2159 }
2160
2161 /*
2162 * If we didn't find a packet terminator exactly at the end of the
2163 * given packet length, complain.
2164 */
2165 if (offset != len - 1)
2166 ereport(FATAL,
2167 (errcode(ERRCODE_PROTOCOL_VIOLATION),
2168 errmsg("invalid startup packet layout: expected terminator as last byte")));
2169
2170 /*
2171 * If the client requested a newer protocol version or if the client
2172 * requested any protocol options we didn't recognize, let them know
2173 * the newest minor protocol version we do support and the names of
2174 * any unrecognized options.
2175 */
2176 if (PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST) ||
2177 unrecognized_protocol_options != NIL)
2178 SendNegotiateProtocolVersion(unrecognized_protocol_options);
2179 }
2180 else
2181 {
2182 /*
2183 * Get the parameters from the old-style, fixed-width-fields startup
2184 * packet as C strings. The packet destination was cleared first so a
2185 * short packet has zeros silently added. We have to be prepared to
2186 * truncate the pstrdup result for oversize fields, though.
2187 */
2188 StartupPacket *packet = (StartupPacket *) buf;
2189
2190 port->database_name = pstrdup(packet->database);
2191 if (strlen(port->database_name) > sizeof(packet->database))
2192 port->database_name[sizeof(packet->database)] = '\0';
2193 port->user_name = pstrdup(packet->user);
2194 if (strlen(port->user_name) > sizeof(packet->user))
2195 port->user_name[sizeof(packet->user)] = '\0';
2196 port->cmdline_options = pstrdup(packet->options);
2197 if (strlen(port->cmdline_options) > sizeof(packet->options))
2198 port->cmdline_options[sizeof(packet->options)] = '\0';
2199 port->guc_options = NIL;
2200 }
2201
2202 /* Check a user name was given. */
2203 if (port->user_name == NULL || port->user_name[0] == '\0')
2204 ereport(FATAL,
2205 (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2206 errmsg("no PostgreSQL user name specified in startup packet")));
2207
2208 /* The database defaults to the user name. */
2209 if (port->database_name == NULL || port->database_name[0] == '\0')
2210 port->database_name = pstrdup(port->user_name);
2211
2212 if (Db_user_namespace)
2213 {
2214 /*
2215 * If user@, it is a global user, remove '@'. We only want to do this
2216 * if there is an '@' at the end and no earlier in the user string or
2217 * they may fake as a local user of another database attaching to this
2218 * database.
2219 */
2220 if (strchr(port->user_name, '@') ==
2221 port->user_name + strlen(port->user_name) - 1)
2222 *strchr(port->user_name, '@') = '\0';
2223 else
2224 {
2225 /* Append '@' and dbname */
2226 port->user_name = psprintf("%s@%s", port->user_name, port->database_name);
2227 }
2228 }
2229
2230 /*
2231 * Truncate given database and user names to length of a Postgres name.
2232 * This avoids lookup failures when overlength names are given.
2233 */
2234 if (strlen(port->database_name) >= NAMEDATALEN)
2235 port->database_name[NAMEDATALEN - 1] = '\0';
2236 if (strlen(port->user_name) >= NAMEDATALEN)
2237 port->user_name[NAMEDATALEN - 1] = '\0';
2238
2239 /*
2240 * Normal walsender backends, e.g. for streaming replication, are not
2241 * connected to a particular database. But walsenders used for logical
2242 * replication need to connect to a specific database. We allow streaming
2243 * replication commands to be issued even if connected to a database as it
2244 * can make sense to first make a basebackup and then stream changes
2245 * starting from that.
2246 */
2247 if (am_walsender && !am_db_walsender)
2248 port->database_name[0] = '\0';
2249
2250 /*
2251 * Done putting stuff in TopMemoryContext.
2252 */
2253 MemoryContextSwitchTo(oldcontext);
2254
2255 /*
2256 * If we're going to reject the connection due to database state, say so
2257 * now instead of wasting cycles on an authentication exchange. (This also
2258 * allows a pg_ping utility to be written.)
2259 */
2260 switch (port->canAcceptConnections)
2261 {
2262 case CAC_STARTUP:
2263 ereport(FATAL,
2264 (errcode(ERRCODE_CANNOT_CONNECT_NOW),
2265 errmsg("the database system is starting up")));
2266 break;
2267 case CAC_SHUTDOWN:
2268 ereport(FATAL,
2269 (errcode(ERRCODE_CANNOT_CONNECT_NOW),
2270 errmsg("the database system is shutting down")));
2271 break;
2272 case CAC_RECOVERY:
2273 ereport(FATAL,
2274 (errcode(ERRCODE_CANNOT_CONNECT_NOW),
2275 errmsg("the database system is in recovery mode")));
2276 break;
2277 case CAC_TOOMANY:
2278 ereport(FATAL,
2279 (errcode(ERRCODE_TOO_MANY_CONNECTIONS),
2280 errmsg("sorry, too many clients already")));
2281 break;
2282 case CAC_WAITBACKUP:
2283 /* OK for now, will check in InitPostgres */
2284 break;
2285 case CAC_OK:
2286 break;
2287 }
2288
2289 return STATUS_OK;
2290}
2291
2292/*
2293 * Send a NegotiateProtocolVersion to the client. This lets the client know
2294 * that they have requested a newer minor protocol version than we are able
2295 * to speak. We'll speak the highest version we know about; the client can,
2296 * of course, abandon the connection if that's a problem.
2297 *
2298 * We also include in the response a list of protocol options we didn't
2299 * understand. This allows clients to include optional parameters that might
2300 * be present either in newer protocol versions or third-party protocol
2301 * extensions without fear of having to reconnect if those options are not
2302 * understood, while at the same time making certain that the client is aware
2303 * of which options were actually accepted.
2304 */
2305static void
2306SendNegotiateProtocolVersion(List *unrecognized_protocol_options)
2307{
2308 StringInfoData buf;
2309 ListCell *lc;
2310
2311 pq_beginmessage(&buf, 'v'); /* NegotiateProtocolVersion */
2312 pq_sendint32(&buf, PG_PROTOCOL_LATEST);
2313 pq_sendint32(&buf, list_length(unrecognized_protocol_options));
2314 foreach(lc, unrecognized_protocol_options)
2315 pq_sendstring(&buf, lfirst(lc));
2316 pq_endmessage(&buf);
2317
2318 /* no need to flush, some other message will follow */
2319}
2320
2321/*
2322 * The client has sent a cancel request packet, not a normal
2323 * start-a-new-connection packet. Perform the necessary processing.
2324 * Nothing is sent back to the client.
2325 */
2326static void
2327processCancelRequest(Port *port, void *pkt)
2328{
2329 CancelRequestPacket *canc = (CancelRequestPacket *) pkt;
2330 int backendPID;
2331 int32 cancelAuthCode;
2332 Backend *bp;
2333
2334#ifndef EXEC_BACKEND
2335 dlist_iter iter;
2336#else
2337 int i;
2338#endif
2339
2340 backendPID = (int) pg_ntoh32(canc->backendPID);
2341 cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode);
2342
2343 /*
2344 * See if we have a matching backend. In the EXEC_BACKEND case, we can no
2345 * longer access the postmaster's own backend list, and must rely on the
2346 * duplicate array in shared memory.
2347 */
2348#ifndef EXEC_BACKEND
2349 dlist_foreach(iter, &BackendList)
2350 {
2351 bp = dlist_container(Backend, elem, iter.cur);
2352#else
2353 for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--)
2354 {
2355 bp = (Backend *) &ShmemBackendArray[i];
2356#endif
2357 if (bp->pid == backendPID)
2358 {
2359 if (bp->cancel_key == cancelAuthCode)
2360 {
2361 /* Found a match; signal that backend to cancel current op */
2362 ereport(DEBUG2,
2363 (errmsg_internal("processing cancel request: sending SIGINT to process %d",
2364 backendPID)));
2365 signal_child(bp->pid, SIGINT);
2366 }
2367 else
2368 /* Right PID, wrong key: no way, Jose */
2369 ereport(LOG,
2370 (errmsg("wrong key in cancel request for process %d",
2371 backendPID)));
2372 return;
2373 }
2374#ifndef EXEC_BACKEND /* make GNU Emacs 26.1 see brace balance */
2375 }
2376#else
2377 }
2378#endif
2379
2380 /* No matching backend */
2381 ereport(LOG,
2382 (errmsg("PID %d in cancel request did not match any process",
2383 backendPID)));
2384}
2385
2386/*
2387 * canAcceptConnections --- check to see if database state allows connections.
2388 */
2389static CAC_state
2390canAcceptConnections(void)
2391{
2392 CAC_state result = CAC_OK;
2393
2394 /*
2395 * Can't start backends when in startup/shutdown/inconsistent recovery
2396 * state.
2397 *
2398 * In state PM_WAIT_BACKUP only superusers can connect (this must be
2399 * allowed so that a superuser can end online backup mode); we return
2400 * CAC_WAITBACKUP code to indicate that this must be checked later. Note
2401 * that neither CAC_OK nor CAC_WAITBACKUP can safely be returned until we
2402 * have checked for too many children.
2403 */
2404 if (pmState != PM_RUN)
2405 {
2406 if (pmState == PM_WAIT_BACKUP)
2407 result = CAC_WAITBACKUP; /* allow superusers only */
2408 else if (Shutdown > NoShutdown)
2409 return CAC_SHUTDOWN; /* shutdown is pending */
2410 else if (!FatalError &&
2411 (pmState == PM_STARTUP ||
2412 pmState == PM_RECOVERY))
2413 return CAC_STARTUP; /* normal startup */
2414 else if (!FatalError &&
2415 pmState == PM_HOT_STANDBY)
2416 result = CAC_OK; /* connection OK during hot standby */
2417 else
2418 return CAC_RECOVERY; /* else must be crash recovery */
2419 }
2420
2421 /*
2422 * Don't start too many children.
2423 *
2424 * We allow more connections than we can have backends here because some
2425 * might still be authenticating; they might fail auth, or some existing
2426 * backend might exit before the auth cycle is completed. The exact
2427 * MaxBackends limit is enforced when a new backend tries to join the
2428 * shared-inval backend array.
2429 *
2430 * The limit here must match the sizes of the per-child-process arrays;
2431 * see comments for MaxLivePostmasterChildren().
2432 */
2433 if (CountChildren(BACKEND_TYPE_ALL) >= MaxLivePostmasterChildren())
2434 result = CAC_TOOMANY;
2435
2436 return result;
2437}
2438
2439
2440/*
2441 * ConnCreate -- create a local connection data structure
2442 *
2443 * Returns NULL on failure, other than out-of-memory which is fatal.
2444 */
2445static Port *
2446ConnCreate(int serverFd)
2447{
2448 Port *port;
2449
2450 if (!(port = (Port *) calloc(1, sizeof(Port))))
2451 {
2452 ereport(LOG,
2453 (errcode(ERRCODE_OUT_OF_MEMORY),
2454 errmsg("out of memory")));
2455 ExitPostmaster(1);
2456 }
2457
2458 if (StreamConnection(serverFd, port) != STATUS_OK)
2459 {
2460 if (port->sock != PGINVALID_SOCKET)
2461 StreamClose(port->sock);
2462 ConnFree(port);
2463 return NULL;
2464 }
2465
2466 /*
2467 * Allocate GSSAPI specific state struct
2468 */
2469#ifndef EXEC_BACKEND
2470#if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
2471 port->gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
2472 if (!port->gss)
2473 {
2474 ereport(LOG,
2475 (errcode(ERRCODE_OUT_OF_MEMORY),
2476 errmsg("out of memory")));
2477 ExitPostmaster(1);
2478 }
2479#endif
2480#endif
2481
2482 return port;
2483}
2484
2485
2486/*
2487 * ConnFree -- free a local connection data structure
2488 */
2489static void
2490ConnFree(Port *conn)
2491{
2492#ifdef USE_SSL
2493 secure_close(conn);
2494#endif
2495 if (conn->gss)
2496 free(conn->gss);
2497 free(conn);
2498}
2499
2500
2501/*
2502 * ClosePostmasterPorts -- close all the postmaster's open sockets
2503 *
2504 * This is called during child process startup to release file descriptors
2505 * that are not needed by that child process. The postmaster still has
2506 * them open, of course.
2507 *
2508 * Note: we pass am_syslogger as a boolean because we don't want to set
2509 * the global variable yet when this is called.
2510 */
2511void
2512ClosePostmasterPorts(bool am_syslogger)
2513{
2514 int i;
2515
2516#ifndef WIN32
2517
2518 /*
2519 * Close the write end of postmaster death watch pipe. It's important to
2520 * do this as early as possible, so that if postmaster dies, others won't
2521 * think that it's still running because we're holding the pipe open.
2522 */
2523 if (close(postmaster_alive_fds[POSTMASTER_FD_OWN]))
2524 ereport(FATAL,
2525 (errcode_for_file_access(),
2526 errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2527 postmaster_alive_fds[POSTMASTER_FD_OWN] = -1;
2528#endif
2529
2530 /* Close the listen sockets */
2531 for (i = 0; i < MAXLISTEN; i++)
2532 {
2533 if (ListenSocket[i] != PGINVALID_SOCKET)
2534 {
2535 StreamClose(ListenSocket[i]);
2536 ListenSocket[i] = PGINVALID_SOCKET;
2537 }
2538 }
2539
2540 /* If using syslogger, close the read side of the pipe */
2541 if (!am_syslogger)
2542 {
2543#ifndef WIN32
2544 if (syslogPipe[0] >= 0)
2545 close(syslogPipe[0]);
2546 syslogPipe[0] = -1;
2547#else
2548 if (syslogPipe[0])
2549 CloseHandle(syslogPipe[0]);
2550 syslogPipe[0] = 0;
2551#endif
2552 }
2553
2554#ifdef USE_BONJOUR
2555 /* If using Bonjour, close the connection to the mDNS daemon */
2556 if (bonjour_sdref)
2557 close(DNSServiceRefSockFD(bonjour_sdref));
2558#endif
2559}
2560
2561
2562/*
2563 * InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds
2564 *
2565 * Called early in the postmaster and every backend.
2566 */
2567void
2568InitProcessGlobals(void)
2569{
2570 unsigned int rseed;
2571
2572 MyProcPid = getpid();
2573 MyStartTimestamp = GetCurrentTimestamp();
2574 MyStartTime = timestamptz_to_time_t(MyStartTimestamp);
2575
2576 /*
2577 * Set a different seed for random() in every process. We want something
2578 * unpredictable, so if possible, use high-quality random bits for the
2579 * seed. Otherwise, fall back to a seed based on timestamp and PID.
2580 */
2581 if (!pg_strong_random(&rseed, sizeof(rseed)))
2582 {
2583 /*
2584 * Since PIDs and timestamps tend to change more frequently in their
2585 * least significant bits, shift the timestamp left to allow a larger
2586 * total number of seeds in a given time period. Since that would
2587 * leave only 20 bits of the timestamp that cycle every ~1 second,
2588 * also mix in some higher bits.
2589 */
2590 rseed = ((uint64) MyProcPid) ^
2591 ((uint64) MyStartTimestamp << 12) ^
2592 ((uint64) MyStartTimestamp >> 20);
2593 }
2594 srandom(rseed);
2595}
2596
2597
2598/*
2599 * reset_shared -- reset shared memory and semaphores
2600 */
2601static void
2602reset_shared(int port)
2603{
2604 /*
2605 * Create or re-create shared memory and semaphores.
2606 *
2607 * Note: in each "cycle of life" we will normally assign the same IPC keys
2608 * (if using SysV shmem and/or semas), since the port number is used to
2609 * determine IPC keys. This helps ensure that we will clean up dead IPC
2610 * objects if the postmaster crashes and is restarted.
2611 */
2612 CreateSharedMemoryAndSemaphores(port);
2613}
2614
2615
2616/*
2617 * SIGHUP -- reread config files, and tell children to do same
2618 */
2619static void
2620SIGHUP_handler(SIGNAL_ARGS)
2621{
2622 int save_errno = errno;
2623
2624 PG_SETMASK(&BlockSig);
2625
2626 if (Shutdown <= SmartShutdown)
2627 {
2628 ereport(LOG,
2629 (errmsg("received SIGHUP, reloading configuration files")));
2630 ProcessConfigFile(PGC_SIGHUP);
2631 SignalChildren(SIGHUP);
2632 if (StartupPID != 0)
2633 signal_child(StartupPID, SIGHUP);
2634 if (BgWriterPID != 0)
2635 signal_child(BgWriterPID, SIGHUP);
2636 if (CheckpointerPID != 0)
2637 signal_child(CheckpointerPID, SIGHUP);
2638 if (WalWriterPID != 0)
2639 signal_child(WalWriterPID, SIGHUP);
2640 if (WalReceiverPID != 0)
2641 signal_child(WalReceiverPID, SIGHUP);
2642 if (AutoVacPID != 0)
2643 signal_child(AutoVacPID, SIGHUP);
2644 if (PgArchPID != 0)
2645 signal_child(PgArchPID, SIGHUP);
2646 if (SysLoggerPID != 0)
2647 signal_child(SysLoggerPID, SIGHUP);
2648 if (PgStatPID != 0)
2649 signal_child(PgStatPID, SIGHUP);
2650
2651 /* Reload authentication config files too */
2652 if (!load_hba())
2653 ereport(LOG,
2654 /* translator: %s is a configuration file */
2655 (errmsg("%s was not reloaded", "pg_hba.conf")));
2656
2657 if (!load_ident())
2658 ereport(LOG,
2659 (errmsg("%s was not reloaded", "pg_ident.conf")));
2660
2661#ifdef USE_SSL
2662 /* Reload SSL configuration as well */
2663 if (EnableSSL)
2664 {
2665 if (secure_initialize(false) == 0)
2666 LoadedSSL = true;
2667 else
2668 ereport(LOG,
2669 (errmsg("SSL configuration was not reloaded")));
2670 }
2671 else
2672 {
2673 secure_destroy();
2674 LoadedSSL = false;
2675 }
2676#endif
2677
2678#ifdef EXEC_BACKEND
2679 /* Update the starting-point file for future children */
2680 write_nondefault_variables(PGC_SIGHUP);
2681#endif
2682 }
2683
2684 PG_SETMASK(&UnBlockSig);
2685
2686 errno = save_errno;
2687}
2688
2689
2690/*
2691 * pmdie -- signal handler for processing various postmaster signals.
2692 */
2693static void
2694pmdie(SIGNAL_ARGS)
2695{
2696 int save_errno = errno;
2697
2698 PG_SETMASK(&BlockSig);
2699
2700 ereport(DEBUG2,
2701 (errmsg_internal("postmaster received signal %d",
2702 postgres_signal_arg)));
2703
2704 switch (postgres_signal_arg)
2705 {
2706 case SIGTERM:
2707
2708 /*
2709 * Smart Shutdown:
2710 *
2711 * Wait for children to end their work, then shut down.
2712 */
2713 if (Shutdown >= SmartShutdown)
2714 break;
2715 Shutdown = SmartShutdown;
2716 ereport(LOG,
2717 (errmsg("received smart shutdown request")));
2718
2719 /* Report status */
2720 AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2721#ifdef USE_SYSTEMD
2722 sd_notify(0, "STOPPING=1");
2723#endif
2724
2725 if (pmState == PM_RUN || pmState == PM_RECOVERY ||
2726 pmState == PM_HOT_STANDBY || pmState == PM_STARTUP)
2727 {
2728 /* autovac workers are told to shut down immediately */
2729 /* and bgworkers too; does this need tweaking? */
2730 SignalSomeChildren(SIGTERM,
2731 BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER);
2732 /* and the autovac launcher too */
2733 if (AutoVacPID != 0)
2734 signal_child(AutoVacPID, SIGTERM);
2735 /* and the bgwriter too */
2736 if (BgWriterPID != 0)
2737 signal_child(BgWriterPID, SIGTERM);
2738 /* and the walwriter too */
2739 if (WalWriterPID != 0)
2740 signal_child(WalWriterPID, SIGTERM);
2741
2742 /*
2743 * If we're in recovery, we can't kill the startup process
2744 * right away, because at present doing so does not release
2745 * its locks. We might want to change this in a future
2746 * release. For the time being, the PM_WAIT_READONLY state
2747 * indicates that we're waiting for the regular (read only)
2748 * backends to die off; once they do, we'll kill the startup
2749 * and walreceiver processes.
2750 */
2751 pmState = (pmState == PM_RUN) ?
2752 PM_WAIT_BACKUP : PM_WAIT_READONLY;
2753 }
2754
2755 /*
2756 * Now wait for online backup mode to end and backends to exit. If
2757 * that is already the case, PostmasterStateMachine will take the
2758 * next step.
2759 */
2760 PostmasterStateMachine();
2761 break;
2762
2763 case SIGINT:
2764
2765 /*
2766 * Fast Shutdown:
2767 *
2768 * Abort all children with SIGTERM (rollback active transactions
2769 * and exit) and shut down when they are gone.
2770 */
2771 if (Shutdown >= FastShutdown)
2772 break;
2773 Shutdown = FastShutdown;
2774 ereport(LOG,
2775 (errmsg("received fast shutdown request")));
2776
2777 /* Report status */
2778 AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2779#ifdef USE_SYSTEMD
2780 sd_notify(0, "STOPPING=1");
2781#endif
2782
2783 if (StartupPID != 0)
2784 signal_child(StartupPID, SIGTERM);
2785 if (BgWriterPID != 0)
2786 signal_child(BgWriterPID, SIGTERM);
2787 if (WalReceiverPID != 0)
2788 signal_child(WalReceiverPID, SIGTERM);
2789 if (pmState == PM_STARTUP || pmState == PM_RECOVERY)
2790 {
2791 SignalSomeChildren(SIGTERM, BACKEND_TYPE_BGWORKER);
2792
2793 /*
2794 * Only startup, bgwriter, walreceiver, possibly bgworkers,
2795 * and/or checkpointer should be active in this state; we just
2796 * signaled the first four, and we don't want to kill
2797 * checkpointer yet.
2798 */
2799 pmState = PM_WAIT_BACKENDS;
2800 }
2801 else if (pmState == PM_RUN ||
2802 pmState == PM_WAIT_BACKUP ||
2803 pmState == PM_WAIT_READONLY ||
2804 pmState == PM_WAIT_BACKENDS ||
2805 pmState == PM_HOT_STANDBY)
2806 {
2807 ereport(LOG,
2808 (errmsg("aborting any active transactions")));
2809 /* shut down all backends and workers */
2810 SignalSomeChildren(SIGTERM,
2811 BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC |
2812 BACKEND_TYPE_BGWORKER);
2813 /* and the autovac launcher too */
2814 if (AutoVacPID != 0)
2815 signal_child(AutoVacPID, SIGTERM);
2816 /* and the walwriter too */
2817 if (WalWriterPID != 0)
2818 signal_child(WalWriterPID, SIGTERM);
2819 pmState = PM_WAIT_BACKENDS;
2820 }
2821
2822 /*
2823 * Now wait for backends to exit. If there are none,
2824 * PostmasterStateMachine will take the next step.
2825 */
2826 PostmasterStateMachine();
2827 break;
2828
2829 case SIGQUIT:
2830
2831 /*
2832 * Immediate Shutdown:
2833 *
2834 * abort all children with SIGQUIT, wait for them to exit,
2835 * terminate remaining ones with SIGKILL, then exit without
2836 * attempt to properly shut down the data base system.
2837 */
2838 if (Shutdown >= ImmediateShutdown)
2839 break;
2840 Shutdown = ImmediateShutdown;
2841 ereport(LOG,
2842 (errmsg("received immediate shutdown request")));
2843
2844 /* Report status */
2845 AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2846#ifdef USE_SYSTEMD
2847 sd_notify(0, "STOPPING=1");
2848#endif
2849
2850 TerminateChildren(SIGQUIT);
2851 pmState = PM_WAIT_BACKENDS;
2852
2853 /* set stopwatch for them to die */
2854 AbortStartTime = time(NULL);
2855
2856 /*
2857 * Now wait for backends to exit. If there are none,
2858 * PostmasterStateMachine will take the next step.
2859 */
2860 PostmasterStateMachine();
2861 break;
2862 }
2863
2864 PG_SETMASK(&UnBlockSig);
2865
2866 errno = save_errno;
2867}
2868
2869/*
2870 * Reaper -- signal handler to cleanup after a child process dies.
2871 */
2872static void
2873reaper(SIGNAL_ARGS)
2874{
2875 int save_errno = errno;
2876 int pid; /* process id of dead child process */
2877 int exitstatus; /* its exit status */
2878
2879 PG_SETMASK(&BlockSig);
2880
2881 ereport(DEBUG4,
2882 (errmsg_internal("reaping dead processes")));
2883
2884 while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0)
2885 {
2886 /*
2887 * Check if this child was a startup process.
2888 */
2889 if (pid == StartupPID)
2890 {
2891 StartupPID = 0;
2892
2893 /*
2894 * Startup process exited in response to a shutdown request (or it
2895 * completed normally regardless of the shutdown request).
2896 */
2897 if (Shutdown > NoShutdown &&
2898 (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus)))
2899 {
2900 StartupStatus = STARTUP_NOT_RUNNING;
2901 pmState = PM_WAIT_BACKENDS;
2902 /* PostmasterStateMachine logic does the rest */
2903 continue;
2904 }
2905
2906 if (EXIT_STATUS_3(exitstatus))
2907 {
2908 ereport(LOG,
2909 (errmsg("shutdown at recovery target")));
2910 StartupStatus = STARTUP_NOT_RUNNING;
2911 Shutdown = SmartShutdown;
2912 TerminateChildren(SIGTERM);
2913 pmState = PM_WAIT_BACKENDS;
2914 /* PostmasterStateMachine logic does the rest */
2915 continue;
2916 }
2917
2918 /*
2919 * Unexpected exit of startup process (including FATAL exit)
2920 * during PM_STARTUP is treated as catastrophic. There are no
2921 * other processes running yet, so we can just exit.
2922 */
2923 if (pmState == PM_STARTUP && !EXIT_STATUS_0(exitstatus))
2924 {
2925 LogChildExit(LOG, _("startup process"),
2926 pid, exitstatus);
2927 ereport(LOG,
2928 (errmsg("aborting startup due to startup process failure")));
2929 ExitPostmaster(1);
2930 }
2931
2932 /*
2933 * After PM_STARTUP, any unexpected exit (including FATAL exit) of
2934 * the startup process is catastrophic, so kill other children,
2935 * and set StartupStatus so we don't try to reinitialize after
2936 * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
2937 * then we previously sent the startup process a SIGQUIT; so
2938 * that's probably the reason it died, and we do want to try to
2939 * restart in that case.
2940 */
2941 if (!EXIT_STATUS_0(exitstatus))
2942 {
2943 if (StartupStatus == STARTUP_SIGNALED)
2944 StartupStatus = STARTUP_NOT_RUNNING;
2945 else
2946 StartupStatus = STARTUP_CRASHED;
2947 HandleChildCrash(pid, exitstatus,
2948 _("startup process"));
2949 continue;
2950 }
2951
2952 /*
2953 * Startup succeeded, commence normal operations
2954 */
2955 StartupStatus = STARTUP_NOT_RUNNING;
2956 FatalError = false;
2957 Assert(AbortStartTime == 0);
2958 ReachedNormalRunning = true;
2959 pmState = PM_RUN;
2960
2961 /*
2962 * Crank up the background tasks, if we didn't do that already
2963 * when we entered consistent recovery state. It doesn't matter
2964 * if this fails, we'll just try again later.
2965 */
2966 if (CheckpointerPID == 0)
2967 CheckpointerPID = StartCheckpointer();
2968 if (BgWriterPID == 0)
2969 BgWriterPID = StartBackgroundWriter();
2970 if (WalWriterPID == 0)
2971 WalWriterPID = StartWalWriter();
2972
2973 /*
2974 * Likewise, start other special children as needed. In a restart
2975 * situation, some of them may be alive already.
2976 */
2977 if (!IsBinaryUpgrade && AutoVacuumingActive() && AutoVacPID == 0)
2978 AutoVacPID = StartAutoVacLauncher();
2979 if (PgArchStartupAllowed() && PgArchPID == 0)
2980 PgArchPID = pgarch_start();
2981 if (PgStatPID == 0)
2982 PgStatPID = pgstat_start();
2983
2984 /* workers may be scheduled to start now */
2985 maybe_start_bgworkers();
2986
2987 /* at this point we are really open for business */
2988 ereport(LOG,
2989 (errmsg("database system is ready to accept connections")));
2990
2991 /* Report status */
2992 AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY);
2993#ifdef USE_SYSTEMD
2994 sd_notify(0, "READY=1");
2995#endif
2996
2997 continue;
2998 }
2999
3000 /*
3001 * Was it the bgwriter? Normal exit can be ignored; we'll start a new
3002 * one at the next iteration of the postmaster's main loop, if
3003 * necessary. Any other exit condition is treated as a crash.
3004 */
3005 if (pid == BgWriterPID)
3006 {
3007 BgWriterPID = 0;
3008 if (!EXIT_STATUS_0(exitstatus))
3009 HandleChildCrash(pid, exitstatus,
3010 _("background writer process"));
3011 continue;
3012 }
3013
3014 /*
3015 * Was it the checkpointer?
3016 */
3017 if (pid == CheckpointerPID)
3018 {
3019 CheckpointerPID = 0;
3020 if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
3021 {
3022 /*
3023 * OK, we saw normal exit of the checkpointer after it's been
3024 * told to shut down. We expect that it wrote a shutdown
3025 * checkpoint. (If for some reason it didn't, recovery will
3026 * occur on next postmaster start.)
3027 *
3028 * At this point we should have no normal backend children
3029 * left (else we'd not be in PM_SHUTDOWN state) but we might
3030 * have dead_end children to wait for.
3031 *
3032 * If we have an archiver subprocess, tell it to do a last
3033 * archive cycle and quit. Likewise, if we have walsender
3034 * processes, tell them to send any remaining WAL and quit.
3035 */
3036 Assert(Shutdown > NoShutdown);
3037
3038 /* Waken archiver for the last time */
3039 if (PgArchPID != 0)
3040 signal_child(PgArchPID, SIGUSR2);
3041
3042 /*
3043 * Waken walsenders for the last time. No regular backends
3044 * should be around anymore.
3045 */
3046 SignalChildren(SIGUSR2);
3047
3048 pmState = PM_SHUTDOWN_2;
3049
3050 /*
3051 * We can also shut down the stats collector now; there's
3052 * nothing left for it to do.
3053 */
3054 if (PgStatPID != 0)
3055 signal_child(PgStatPID, SIGQUIT);
3056 }
3057 else
3058 {
3059 /*
3060 * Any unexpected exit of the checkpointer (including FATAL
3061 * exit) is treated as a crash.
3062 */
3063 HandleChildCrash(pid, exitstatus,
3064 _("checkpointer process"));
3065 }
3066
3067 continue;
3068 }
3069
3070 /*
3071 * Was it the wal writer? Normal exit can be ignored; we'll start a
3072 * new one at the next iteration of the postmaster's main loop, if
3073 * necessary. Any other exit condition is treated as a crash.
3074 */
3075 if (pid == WalWriterPID)
3076 {
3077 WalWriterPID = 0;
3078 if (!EXIT_STATUS_0(exitstatus))
3079 HandleChildCrash(pid, exitstatus,
3080 _("WAL writer process"));
3081 continue;
3082 }
3083
3084 /*
3085 * Was it the wal receiver? If exit status is zero (normal) or one
3086 * (FATAL exit), we assume everything is all right just like normal
3087 * backends. (If we need a new wal receiver, we'll start one at the
3088 * next iteration of the postmaster's main loop.)
3089 */
3090 if (pid == WalReceiverPID)
3091 {
3092 WalReceiverPID = 0;
3093 if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3094 HandleChildCrash(pid, exitstatus,
3095 _("WAL receiver process"));
3096 continue;
3097 }
3098
3099 /*
3100 * Was it the autovacuum launcher? Normal exit can be ignored; we'll
3101 * start a new one at the next iteration of the postmaster's main
3102 * loop, if necessary. Any other exit condition is treated as a
3103 * crash.
3104 */
3105 if (pid == AutoVacPID)
3106 {
3107 AutoVacPID = 0;
3108 if (!EXIT_STATUS_0(exitstatus))
3109 HandleChildCrash(pid, exitstatus,
3110 _("autovacuum launcher process"));
3111 continue;
3112 }
3113
3114 /*
3115 * Was it the archiver? If so, just try to start a new one; no need
3116 * to force reset of the rest of the system. (If fail, we'll try
3117 * again in future cycles of the main loop.). Unless we were waiting
3118 * for it to shut down; don't restart it in that case, and
3119 * PostmasterStateMachine() will advance to the next shutdown step.
3120 */
3121 if (pid == PgArchPID)
3122 {
3123 PgArchPID = 0;
3124 if (!EXIT_STATUS_0(exitstatus))
3125 LogChildExit(LOG, _("archiver process"),
3126 pid, exitstatus);
3127 if (PgArchStartupAllowed())
3128 PgArchPID = pgarch_start();
3129 continue;
3130 }
3131
3132 /*
3133 * Was it the statistics collector? If so, just try to start a new
3134 * one; no need to force reset of the rest of the system. (If fail,
3135 * we'll try again in future cycles of the main loop.)
3136 */
3137 if (pid == PgStatPID)
3138 {
3139 PgStatPID = 0;
3140 if (!EXIT_STATUS_0(exitstatus))
3141 LogChildExit(LOG, _("statistics collector process"),
3142 pid, exitstatus);
3143 if (pmState == PM_RUN || pmState == PM_HOT_STANDBY)
3144 PgStatPID = pgstat_start();
3145 continue;
3146 }
3147
3148 /* Was it the system logger? If so, try to start a new one */
3149 if (pid == SysLoggerPID)
3150 {
3151 SysLoggerPID = 0;
3152 /* for safety's sake, launch new logger *first* */
3153 SysLoggerPID = SysLogger_Start();
3154 if (!EXIT_STATUS_0(exitstatus))
3155 LogChildExit(LOG, _("system logger process"),
3156 pid, exitstatus);
3157 continue;
3158 }
3159
3160 /* Was it one of our background workers? */
3161 if (CleanupBackgroundWorker(pid, exitstatus))
3162 {
3163 /* have it be restarted */
3164 HaveCrashedWorker = true;
3165 continue;
3166 }
3167
3168 /*
3169 * Else do standard backend child cleanup.
3170 */
3171 CleanupBackend(pid, exitstatus);
3172 } /* loop over pending child-death reports */
3173
3174 /*
3175 * After cleaning out the SIGCHLD queue, see if we have any state changes
3176 * or actions to make.
3177 */
3178 PostmasterStateMachine();
3179
3180 /* Done with signal handler */
3181 PG_SETMASK(&UnBlockSig);
3182
3183 errno = save_errno;
3184}
3185
3186/*
3187 * Scan the bgworkers list and see if the given PID (which has just stopped
3188 * or crashed) is in it. Handle its shutdown if so, and return true. If not a
3189 * bgworker, return false.
3190 *
3191 * This is heavily based on CleanupBackend. One important difference is that
3192 * we don't know yet that the dying process is a bgworker, so we must be silent
3193 * until we're sure it is.
3194 */
3195static bool
3196CleanupBackgroundWorker(int pid,
3197 int exitstatus) /* child's exit status */
3198{
3199 char namebuf[MAXPGPATH];
3200 slist_mutable_iter iter;
3201
3202 slist_foreach_modify(iter, &BackgroundWorkerList)
3203 {
3204 RegisteredBgWorker *rw;
3205
3206 rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3207
3208 if (rw->rw_pid != pid)
3209 continue;
3210
3211#ifdef WIN32
3212 /* see CleanupBackend */
3213 if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3214 exitstatus = 0;
3215#endif
3216
3217 snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
3218 rw->rw_worker.bgw_type);
3219
3220
3221 if (!EXIT_STATUS_0(exitstatus))
3222 {
3223 /* Record timestamp, so we know when to restart the worker. */
3224 rw->rw_crashed_at = GetCurrentTimestamp();
3225 }
3226 else
3227 {
3228 /* Zero exit status means terminate */
3229 rw->rw_crashed_at = 0;
3230 rw->rw_terminate = true;
3231 }
3232
3233 /*
3234 * Additionally, for shared-memory-connected workers, just like a
3235 * backend, any exit status other than 0 or 1 is considered a crash
3236 * and causes a system-wide restart.
3237 */
3238 if ((rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0)
3239 {
3240 if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3241 {
3242 HandleChildCrash(pid, exitstatus, namebuf);
3243 return true;
3244 }
3245 }
3246
3247 /*
3248 * We must release the postmaster child slot whether this worker is
3249 * connected to shared memory or not, but we only treat it as a crash
3250 * if it is in fact connected.
3251 */
3252 if (!ReleasePostmasterChildSlot(rw->rw_child_slot) &&
3253 (rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0)
3254 {
3255 HandleChildCrash(pid, exitstatus, namebuf);
3256 return true;
3257 }
3258
3259 /* Get it out of the BackendList and clear out remaining data */
3260 dlist_delete(&rw->rw_backend->elem);
3261#ifdef EXEC_BACKEND
3262 ShmemBackendArrayRemove(rw->rw_backend);
3263#endif
3264
3265 /*
3266 * It's possible that this background worker started some OTHER
3267 * background worker and asked to be notified when that worker started
3268 * or stopped. If so, cancel any notifications destined for the
3269 * now-dead backend.
3270 */
3271 if (rw->rw_backend->bgworker_notify)
3272 BackgroundWorkerStopNotifications(rw->rw_pid);
3273 free(rw->rw_backend);
3274 rw->rw_backend = NULL;
3275 rw->rw_pid = 0;
3276 rw->rw_child_slot = 0;
3277 ReportBackgroundWorkerExit(&iter); /* report child death */
3278
3279 LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3280 namebuf, pid, exitstatus);
3281
3282 return true;
3283 }
3284
3285 return false;
3286}
3287
3288/*
3289 * CleanupBackend -- cleanup after terminated backend.
3290 *
3291 * Remove all local state associated with backend.
3292 *
3293 * If you change this, see also CleanupBackgroundWorker.
3294 */
3295static void
3296CleanupBackend(int pid,
3297 int exitstatus) /* child's exit status. */
3298{
3299 dlist_mutable_iter iter;
3300
3301 LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3302
3303 /*
3304 * If a backend dies in an ugly way then we must signal all other backends
3305 * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3306 * assume everything is all right and proceed to remove the backend from
3307 * the active backend list.
3308 */
3309
3310#ifdef WIN32
3311
3312 /*
3313 * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3314 * since that sometimes happens under load when the process fails to start
3315 * properly (long before it starts using shared memory). Microsoft reports
3316 * it is related to mutex failure:
3317 * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3318 */
3319 if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3320 {
3321 LogChildExit(LOG, _("server process"), pid, exitstatus);
3322 exitstatus = 0;
3323 }
3324#endif
3325
3326 if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3327 {
3328 HandleChildCrash(pid, exitstatus, _("server process"));
3329 return;
3330 }
3331
3332 dlist_foreach_modify(iter, &BackendList)
3333 {
3334 Backend *bp = dlist_container(Backend, elem, iter.cur);
3335
3336 if (bp->pid == pid)
3337 {
3338 if (!bp->dead_end)
3339 {
3340 if (!ReleasePostmasterChildSlot(bp->child_slot))
3341 {
3342 /*
3343 * Uh-oh, the child failed to clean itself up. Treat as a
3344 * crash after all.
3345 */
3346 HandleChildCrash(pid, exitstatus, _("server process"));
3347 return;
3348 }
3349#ifdef EXEC_BACKEND
3350 ShmemBackendArrayRemove(bp);
3351#endif
3352 }
3353 if (bp->bgworker_notify)
3354 {
3355 /*
3356 * This backend may have been slated to receive SIGUSR1 when
3357 * some background worker started or stopped. Cancel those
3358 * notifications, as we don't want to signal PIDs that are not
3359 * PostgreSQL backends. This gets skipped in the (probably
3360 * very common) case where the backend has never requested any
3361 * such notifications.
3362 */
3363 BackgroundWorkerStopNotifications(bp->pid);
3364 }
3365 dlist_delete(iter.cur);
3366 free(bp);
3367 break;
3368 }
3369 }
3370}
3371
3372/*
3373 * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3374 * walwriter, autovacuum, or background worker.
3375 *
3376 * The objectives here are to clean up our local state about the child
3377 * process, and to signal all other remaining children to quickdie.
3378 */
3379static void
3380HandleChildCrash(int pid, int exitstatus, const char *procname)
3381{
3382 dlist_mutable_iter iter;
3383 slist_iter siter;
3384 Backend *bp;
3385 bool take_action;
3386
3387 /*
3388 * We only log messages and send signals if this is the first process
3389 * crash and we're not doing an immediate shutdown; otherwise, we're only
3390 * here to update postmaster's idea of live processes. If we have already
3391 * signalled children, nonzero exit status is to be expected, so don't
3392 * clutter log.
3393 */
3394 take_action = !FatalError && Shutdown != ImmediateShutdown;
3395
3396 if (take_action)
3397 {
3398 LogChildExit(LOG, procname, pid, exitstatus);
3399 ereport(LOG,
3400 (errmsg("terminating any other active server processes")));
3401 }
3402
3403 /* Process background workers. */
3404 slist_foreach(siter, &BackgroundWorkerList)
3405 {
3406 RegisteredBgWorker *rw;
3407
3408 rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3409 if (rw->rw_pid == 0)
3410 continue; /* not running */
3411 if (rw->rw_pid == pid)
3412 {
3413 /*
3414 * Found entry for freshly-dead worker, so remove it.
3415 */
3416 (void) ReleasePostmasterChildSlot(rw->rw_child_slot);
3417 dlist_delete(&rw->rw_backend->elem);
3418#ifdef EXEC_BACKEND
3419 ShmemBackendArrayRemove(rw->rw_backend);
3420#endif
3421 free(rw->rw_backend);
3422 rw->rw_backend = NULL;
3423 rw->rw_pid = 0;
3424 rw->rw_child_slot = 0;
3425 /* don't reset crashed_at */
3426 /* don't report child stop, either */
3427 /* Keep looping so we can signal remaining workers */
3428 }
3429 else
3430 {
3431 /*
3432 * This worker is still alive. Unless we did so already, tell it
3433 * to commit hara-kiri.
3434 *
3435 * SIGQUIT is the special signal that says exit without proc_exit
3436 * and let the user know what's going on. But if SendStop is set
3437 * (-s on command line), then we send SIGSTOP instead, so that we
3438 * can get core dumps from all backends by hand.
3439 */
3440 if (take_action)
3441 {
3442 ereport(DEBUG2,
3443 (errmsg_internal("sending %s to process %d",
3444 (SendStop ? "SIGSTOP" : "SIGQUIT"),
3445 (int) rw->rw_pid)));
3446 signal_child(rw->rw_pid, (SendStop ? SIGSTOP : SIGQUIT));
3447 }
3448 }
3449 }
3450
3451 /* Process regular backends */
3452 dlist_foreach_modify(iter, &BackendList)
3453 {
3454 bp = dlist_container(Backend, elem, iter.cur);
3455
3456 if (bp->pid == pid)
3457 {
3458 /*
3459 * Found entry for freshly-dead backend, so remove it.
3460 */
3461 if (!bp->dead_end)
3462 {
3463 (void) ReleasePostmasterChildSlot(bp->child_slot);
3464#ifdef EXEC_BACKEND
3465 ShmemBackendArrayRemove(bp);
3466#endif
3467 }
3468 dlist_delete(iter.cur);
3469 free(bp);
3470 /* Keep looping so we can signal remaining backends */
3471 }
3472 else
3473 {
3474 /*
3475 * This backend is still alive. Unless we did so already, tell it
3476 * to commit hara-kiri.
3477 *
3478 * SIGQUIT is the special signal that says exit without proc_exit
3479 * and let the user know what's going on. But if SendStop is set
3480 * (-s on command line), then we send SIGSTOP instead, so that we
3481 * can get core dumps from all backends by hand.
3482 *
3483 * We could exclude dead_end children here, but at least in the
3484 * SIGSTOP case it seems better to include them.
3485 *
3486 * Background workers were already processed above; ignore them
3487 * here.
3488 */
3489 if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3490 continue;
3491
3492 if (take_action)
3493 {
3494 ereport(DEBUG2,
3495 (errmsg_internal("sending %s to process %d",
3496 (SendStop ? "SIGSTOP" : "SIGQUIT"),
3497 (int) bp->pid)));
3498 signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
3499 }
3500 }
3501 }
3502
3503 /* Take care of the startup process too */
3504 if (pid == StartupPID)
3505 {
3506 StartupPID = 0;
3507 StartupStatus = STARTUP_CRASHED;
3508 }
3509 else if (StartupPID != 0 && take_action)
3510 {
3511 ereport(DEBUG2,
3512 (errmsg_internal("sending %s to process %d",
3513 (SendStop ? "SIGSTOP" : "SIGQUIT"),
3514 (int) StartupPID)));
3515 signal_child(StartupPID, (SendStop ? SIGSTOP : SIGQUIT));
3516 StartupStatus = STARTUP_SIGNALED;
3517 }
3518
3519 /* Take care of the bgwriter too */
3520 if (pid == BgWriterPID)
3521 BgWriterPID = 0;
3522 else if (BgWriterPID != 0 && take_action)
3523 {
3524 ereport(DEBUG2,
3525 (errmsg_internal("sending %s to process %d",
3526 (SendStop ? "SIGSTOP" : "SIGQUIT"),
3527 (int) BgWriterPID)));
3528 signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3529 }
3530
3531 /* Take care of the checkpointer too */
3532 if (pid == CheckpointerPID)
3533 CheckpointerPID = 0;
3534 else if (CheckpointerPID != 0 && take_action)
3535 {
3536 ereport(DEBUG2,
3537 (errmsg_internal("sending %s to process %d",
3538 (SendStop ? "SIGSTOP" : "SIGQUIT"),
3539 (int) CheckpointerPID)));
3540 signal_child(CheckpointerPID, (SendStop ? SIGSTOP : SIGQUIT));
3541 }
3542
3543 /* Take care of the walwriter too */
3544 if (pid == WalWriterPID)
3545 WalWriterPID = 0;
3546 else if (WalWriterPID != 0 && take_action)
3547 {
3548 ereport(DEBUG2,
3549 (errmsg_internal("sending %s to process %d",
3550 (SendStop ? "SIGSTOP" : "SIGQUIT"),
3551 (int) WalWriterPID)));
3552 signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3553 }
3554
3555 /* Take care of the walreceiver too */
3556 if (pid == WalReceiverPID)
3557 WalReceiverPID = 0;
3558 else if (WalReceiverPID != 0 && take_action)
3559 {
3560 ereport(DEBUG2,
3561 (errmsg_internal("sending %s to process %d",
3562 (SendStop ? "SIGSTOP" : "SIGQUIT"),
3563 (int) WalReceiverPID)));
3564 signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT));
3565 }
3566
3567 /* Take care of the autovacuum launcher too */
3568 if (pid == AutoVacPID)
3569 AutoVacPID = 0;
3570 else if (AutoVacPID != 0 && take_action)
3571 {
3572 ereport(DEBUG2,
3573 (errmsg_internal("sending %s to process %d",
3574 (SendStop ? "SIGSTOP" : "SIGQUIT"),
3575 (int) AutoVacPID)));
3576 signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
3577 }
3578
3579 /*
3580 * Force a power-cycle of the pgarch process too. (This isn't absolutely
3581 * necessary, but it seems like a good idea for robustness, and it
3582 * simplifies the state-machine logic in the case where a shutdown request
3583 * arrives during crash processing.)
3584 */
3585 if (PgArchPID != 0 && take_action)
3586 {
3587 ereport(DEBUG2,
3588 (errmsg_internal("sending %s to process %d",
3589 "SIGQUIT",
3590 (int) PgArchPID)));
3591 signal_child(PgArchPID, SIGQUIT);
3592 }
3593
3594 /*
3595 * Force a power-cycle of the pgstat process too. (This isn't absolutely
3596 * necessary, but it seems like a good idea for robustness, and it
3597 * simplifies the state-machine logic in the case where a shutdown request
3598 * arrives during crash processing.)
3599 */
3600 if (PgStatPID != 0 && take_action)
3601 {
3602 ereport(DEBUG2,
3603 (errmsg_internal("sending %s to process %d",
3604 "SIGQUIT",
3605 (int) PgStatPID)));
3606 signal_child(PgStatPID, SIGQUIT);
3607 allow_immediate_pgstat_restart();
3608 }
3609
3610 /* We do NOT restart the syslogger */
3611
3612 if (Shutdown != ImmediateShutdown)
3613 FatalError = true;
3614
3615 /* We now transit into a state of waiting for children to die */
3616 if (pmState == PM_RECOVERY ||
3617 pmState == PM_HOT_STANDBY ||
3618 pmState == PM_RUN ||
3619 pmState == PM_WAIT_BACKUP ||
3620 pmState == PM_WAIT_READONLY ||
3621 pmState == PM_SHUTDOWN)
3622 pmState = PM_WAIT_BACKENDS;
3623
3624 /*
3625 * .. and if this doesn't happen quickly enough, now the clock is ticking
3626 * for us to kill them without mercy.
3627 */
3628 if (AbortStartTime == 0)
3629 AbortStartTime = time(NULL);
3630}
3631
3632/*
3633 * Log the death of a child process.
3634 */
3635static void
3636LogChildExit(int lev, const char *procname, int pid, int exitstatus)
3637{
3638 /*
3639 * size of activity_buffer is arbitrary, but set equal to default
3640 * track_activity_query_size
3641 */
3642 char activity_buffer[1024];
3643 const char *activity = NULL;
3644
3645 if (!EXIT_STATUS_0(exitstatus))
3646 activity = pgstat_get_crashed_backend_activity(pid,
3647 activity_buffer,
3648 sizeof(activity_buffer));
3649
3650 if (WIFEXITED(exitstatus))
3651 ereport(lev,
3652
3653 /*------
3654 translator: %s is a noun phrase describing a child process, such as
3655 "server process" */
3656 (errmsg("%s (PID %d) exited with exit code %d",
3657 procname, pid, WEXITSTATUS(exitstatus)),
3658 activity ? errdetail("Failed process was running: %s", activity) : 0));
3659 else if (WIFSIGNALED(exitstatus))
3660 {
3661#if defined(WIN32)
3662 ereport(lev,
3663
3664 /*------
3665 translator: %s is a noun phrase describing a child process, such as
3666 "server process" */
3667 (errmsg("%s (PID %d) was terminated by exception 0x%X",
3668 procname, pid, WTERMSIG(exitstatus)),
3669 errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3670 activity ? errdetail("Failed process was running: %s", activity) : 0));
3671#else
3672 ereport(lev,
3673
3674 /*------
3675 translator: %s is a noun phrase describing a child process, such as
3676 "server process" */
3677 (errmsg("%s (PID %d) was terminated by signal %d: %s",
3678 procname, pid, WTERMSIG(exitstatus),
3679 pg_strsignal(WTERMSIG(exitstatus))),
3680 activity ? errdetail("Failed process was running: %s", activity) : 0));
3681#endif
3682 }
3683 else
3684 ereport(lev,
3685
3686 /*------
3687 translator: %s is a noun phrase describing a child process, such as
3688 "server process" */
3689 (errmsg("%s (PID %d) exited with unrecognized status %d",
3690 procname, pid, exitstatus),
3691 activity ? errdetail("Failed process was running: %s", activity) : 0));
3692}
3693
3694/*
3695 * Advance the postmaster's state machine and take actions as appropriate
3696 *
3697 * This is common code for pmdie(), reaper() and sigusr1_handler(), which
3698 * receive the signals that might mean we need to change state.
3699 */
3700static void
3701PostmasterStateMachine(void)
3702{
3703 if (pmState == PM_WAIT_BACKUP)
3704 {
3705 /*
3706 * PM_WAIT_BACKUP state ends when online backup mode is not active.
3707 */
3708 if (!BackupInProgress())
3709 pmState = PM_WAIT_BACKENDS;
3710 }
3711
3712 if (pmState == PM_WAIT_READONLY)
3713 {
3714 /*
3715 * PM_WAIT_READONLY state ends when we have no regular backends that
3716 * have been started during recovery. We kill the startup and
3717 * walreceiver processes and transition to PM_WAIT_BACKENDS. Ideally,
3718 * we might like to kill these processes first and then wait for
3719 * backends to die off, but that doesn't work at present because
3720 * killing the startup process doesn't release its locks.
3721 */
3722 if (CountChildren(BACKEND_TYPE_NORMAL) == 0)
3723 {
3724 if (StartupPID != 0)
3725 signal_child(StartupPID, SIGTERM);
3726 if (WalReceiverPID != 0)
3727 signal_child(WalReceiverPID, SIGTERM);
3728 pmState = PM_WAIT_BACKENDS;
3729 }
3730 }
3731
3732 /*
3733 * If we are in a state-machine state that implies waiting for backends to
3734 * exit, see if they're all gone, and change state if so.
3735 */
3736 if (pmState == PM_WAIT_BACKENDS)
3737 {
3738 /*
3739 * PM_WAIT_BACKENDS state ends when we have no regular backends
3740 * (including autovac workers), no bgworkers (including unconnected
3741 * ones), and no walwriter, autovac launcher or bgwriter. If we are
3742 * doing crash recovery or an immediate shutdown then we expect the
3743 * checkpointer to exit as well, otherwise not. The archiver, stats,
3744 * and syslogger processes are disregarded since they are not
3745 * connected to shared memory; we also disregard dead_end children
3746 * here. Walsenders are also disregarded, they will be terminated
3747 * later after writing the checkpoint record, like the archiver
3748 * process.
3749 */
3750 if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_WORKER) == 0 &&
3751 StartupPID == 0 &&
3752 WalReceiverPID == 0 &&
3753 BgWriterPID == 0 &&
3754 (CheckpointerPID == 0 ||
3755 (!FatalError && Shutdown < ImmediateShutdown)) &&
3756 WalWriterPID == 0 &&
3757 AutoVacPID == 0)
3758 {
3759 if (Shutdown >= ImmediateShutdown || FatalError)
3760 {
3761 /*
3762 * Start waiting for dead_end children to die. This state
3763 * change causes ServerLoop to stop creating new ones.
3764 */
3765 pmState = PM_WAIT_DEAD_END;
3766
3767 /*
3768 * We already SIGQUIT'd the archiver and stats processes, if
3769 * any, when we started immediate shutdown or entered
3770 * FatalError state.
3771 */
3772 }
3773 else
3774 {
3775 /*
3776 * If we get here, we are proceeding with normal shutdown. All
3777 * the regular children are gone, and it's time to tell the
3778 * checkpointer to do a shutdown checkpoint.
3779 */
3780 Assert(Shutdown > NoShutdown);
3781 /* Start the checkpointer if not running */
3782 if (CheckpointerPID == 0)
3783 CheckpointerPID = StartCheckpointer();
3784 /* And tell it to shut down */
3785 if (CheckpointerPID != 0)
3786 {
3787 signal_child(CheckpointerPID, SIGUSR2);
3788 pmState = PM_SHUTDOWN;
3789 }
3790 else
3791 {
3792 /*
3793 * If we failed to fork a checkpointer, just shut down.
3794 * Any required cleanup will happen at next restart. We
3795 * set FatalError so that an "abnormal shutdown" message
3796 * gets logged when we exit.
3797 */
3798 FatalError = true;
3799 pmState = PM_WAIT_DEAD_END;
3800
3801 /* Kill the walsenders, archiver and stats collector too */
3802 SignalChildren(SIGQUIT);
3803 if (PgArchPID != 0)
3804 signal_child(PgArchPID, SIGQUIT);
3805 if (PgStatPID != 0)
3806 signal_child(PgStatPID, SIGQUIT);
3807 }
3808 }
3809 }
3810 }
3811
3812 if (pmState == PM_SHUTDOWN_2)
3813 {
3814 /*
3815 * PM_SHUTDOWN_2 state ends when there's no other children than
3816 * dead_end children left. There shouldn't be any regular backends
3817 * left by now anyway; what we're really waiting for is walsenders and
3818 * archiver.
3819 */
3820 if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0)
3821 {
3822 pmState = PM_WAIT_DEAD_END;
3823 }
3824 }
3825
3826 if (pmState == PM_WAIT_DEAD_END)
3827 {
3828 /*
3829 * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3830 * (ie, no dead_end children remain), and the archiver and stats
3831 * collector are gone too.
3832 *
3833 * The reason we wait for those two is to protect them against a new
3834 * postmaster starting conflicting subprocesses; this isn't an
3835 * ironclad protection, but it at least helps in the
3836 * shutdown-and-immediately-restart scenario. Note that they have
3837 * already been sent appropriate shutdown signals, either during a
3838 * normal state transition leading up to PM_WAIT_DEAD_END, or during
3839 * FatalError processing.
3840 */
3841 if (dlist_is_empty(&BackendList) &&
3842 PgArchPID == 0 && PgStatPID == 0)
3843 {
3844 /* These other guys should be dead already */
3845 Assert(StartupPID == 0);
3846 Assert(WalReceiverPID == 0);
3847 Assert(BgWriterPID == 0);
3848 Assert(CheckpointerPID == 0);
3849 Assert(WalWriterPID == 0);
3850 Assert(AutoVacPID == 0);
3851 /* syslogger is not considered here */
3852 pmState = PM_NO_CHILDREN;
3853 }
3854 }
3855
3856 /*
3857 * If we've been told to shut down, we exit as soon as there are no
3858 * remaining children. If there was a crash, cleanup will occur at the
3859 * next startup. (Before PostgreSQL 8.3, we tried to recover from the
3860 * crash before exiting, but that seems unwise if we are quitting because
3861 * we got SIGTERM from init --- there may well not be time for recovery
3862 * before init decides to SIGKILL us.)
3863 *
3864 * Note that the syslogger continues to run. It will exit when it sees
3865 * EOF on its input pipe, which happens when there are no more upstream
3866 * processes.
3867 */
3868 if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN)
3869 {
3870 if (FatalError)
3871 {
3872 ereport(LOG, (errmsg("abnormal database system shutdown")));
3873 ExitPostmaster(1);
3874 }
3875 else
3876 {
3877 /*
3878 * Terminate exclusive backup mode to avoid recovery after a clean
3879 * fast shutdown. Since an exclusive backup can only be taken
3880 * during normal running (and not, for example, while running
3881 * under Hot Standby) it only makes sense to do this if we reached
3882 * normal running. If we're still in recovery, the backup file is
3883 * one we're recovering *from*, and we must keep it around so that
3884 * recovery restarts from the right place.
3885 */
3886 if (ReachedNormalRunning)
3887 CancelBackup();
3888
3889 /* Normal exit from the postmaster is here */
3890 ExitPostmaster(0);
3891 }
3892 }
3893
3894 /*
3895 * If the startup process failed, or the user does not want an automatic
3896 * restart after backend crashes, wait for all non-syslogger children to
3897 * exit, and then exit postmaster. We don't try to reinitialize when the
3898 * startup process fails, because more than likely it will just fail again
3899 * and we will keep trying forever.
3900 */
3901 if (pmState == PM_NO_CHILDREN &&
3902 (StartupStatus == STARTUP_CRASHED || !restart_after_crash))
3903 ExitPostmaster(1);
3904
3905 /*
3906 * If we need to recover from a crash, wait for all non-syslogger children
3907 * to exit, then reset shmem and StartupDataBase.
3908 */
3909 if (FatalError && pmState == PM_NO_CHILDREN)
3910 {
3911 ereport(LOG,
3912 (errmsg("all server processes terminated; reinitializing")));
3913
3914 /* allow background workers to immediately restart */
3915 ResetBackgroundWorkerCrashTimes();
3916
3917 shmem_exit(1);
3918
3919 /* re-read control file into local memory */
3920 LocalProcessControlFile(true);
3921
3922 reset_shared(PostPortNumber);
3923
3924 StartupPID = StartupDataBase();
3925 Assert(StartupPID != 0);
3926 StartupStatus = STARTUP_RUNNING;
3927 pmState = PM_STARTUP;
3928 /* crash recovery started, reset SIGKILL flag */
3929 AbortStartTime = 0;
3930 }
3931}
3932
3933
3934/*
3935 * Send a signal to a postmaster child process
3936 *
3937 * On systems that have setsid(), each child process sets itself up as a
3938 * process group leader. For signals that are generally interpreted in the
3939 * appropriate fashion, we signal the entire process group not just the
3940 * direct child process. This allows us to, for example, SIGQUIT a blocked
3941 * archive_recovery script, or SIGINT a script being run by a backend via
3942 * system().
3943 *
3944 * There is a race condition for recently-forked children: they might not
3945 * have executed setsid() yet. So we signal the child directly as well as
3946 * the group. We assume such a child will handle the signal before trying
3947 * to spawn any grandchild processes. We also assume that signaling the
3948 * child twice will not cause any problems.
3949 */
3950static void
3951signal_child(pid_t pid, int signal)
3952{
3953 if (kill(pid, signal) < 0)
3954 elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
3955#ifdef HAVE_SETSID
3956 switch (signal)
3957 {
3958 case SIGINT:
3959 case SIGTERM:
3960 case SIGQUIT:
3961 case SIGSTOP:
3962 case SIGKILL:
3963 if (kill(-pid, signal) < 0)
3964 elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
3965 break;
3966 default:
3967 break;
3968 }
3969#endif
3970}
3971
3972/*
3973 * Send a signal to the targeted children (but NOT special children;
3974 * dead_end children are never signaled, either).
3975 */
3976static bool
3977SignalSomeChildren(int signal, int target)
3978{
3979 dlist_iter iter;
3980 bool signaled = false;
3981
3982 dlist_foreach(iter, &BackendList)
3983 {
3984 Backend *bp = dlist_container(Backend, elem, iter.cur);
3985
3986 if (bp->dead_end)
3987 continue;
3988
3989 /*
3990 * Since target == BACKEND_TYPE_ALL is the most common case, we test
3991 * it first and avoid touching shared memory for every child.
3992 */
3993 if (target != BACKEND_TYPE_ALL)
3994 {
3995 /*
3996 * Assign bkend_type for any recently announced WAL Sender
3997 * processes.
3998 */
3999 if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
4000 IsPostmasterChildWalSender(bp->child_slot))
4001 bp->bkend_type = BACKEND_TYPE_WALSND;
4002
4003 if (!(target & bp->bkend_type))
4004 continue;
4005 }
4006
4007 ereport(DEBUG4,
4008 (errmsg_internal("sending signal %d to process %d",
4009 signal, (int) bp->pid)));
4010 signal_child(bp->pid, signal);
4011 signaled = true;
4012 }
4013 return signaled;
4014}
4015
4016/*
4017 * Send a termination signal to children. This considers all of our children
4018 * processes, except syslogger and dead_end backends.
4019 */
4020static void
4021TerminateChildren(int signal)
4022{
4023 SignalChildren(signal);
4024 if (StartupPID != 0)
4025 {
4026 signal_child(StartupPID, signal);
4027 if (signal == SIGQUIT || signal == SIGKILL)
4028 StartupStatus = STARTUP_SIGNALED;
4029 }
4030 if (BgWriterPID != 0)
4031 signal_child(BgWriterPID, signal);
4032 if (CheckpointerPID != 0)
4033 signal_child(CheckpointerPID, signal);
4034 if (WalWriterPID != 0)
4035 signal_child(WalWriterPID, signal);
4036 if (WalReceiverPID != 0)
4037 signal_child(WalReceiverPID, signal);
4038 if (AutoVacPID != 0)
4039 signal_child(AutoVacPID, signal);
4040 if (PgArchPID != 0)
4041 signal_child(PgArchPID, signal);
4042 if (PgStatPID != 0)
4043 signal_child(PgStatPID, signal);
4044}
4045
4046/*
4047 * BackendStartup -- start backend process
4048 *
4049 * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
4050 *
4051 * Note: if you change this code, also consider StartAutovacuumWorker.
4052 */
4053static int
4054BackendStartup(Port *port)
4055{
4056 Backend *bn; /* for backend cleanup */
4057 pid_t pid;
4058
4059 /*
4060 * Create backend data structure. Better before the fork() so we can
4061 * handle failure cleanly.
4062 */
4063 bn = (Backend *) malloc(sizeof(Backend));
4064 if (!bn)
4065 {
4066 ereport(LOG,
4067 (errcode(ERRCODE_OUT_OF_MEMORY),
4068 errmsg("out of memory")));
4069 return STATUS_ERROR;
4070 }
4071
4072 /*
4073 * Compute the cancel key that will be assigned to this backend. The
4074 * backend will have its own copy in the forked-off process' value of
4075 * MyCancelKey, so that it can transmit the key to the frontend.
4076 */
4077 if (!RandomCancelKey(&MyCancelKey))
4078 {
4079 free(bn);
4080 ereport(LOG,
4081 (errcode(ERRCODE_INTERNAL_ERROR),
4082 errmsg("could not generate random cancel key")));
4083 return STATUS_ERROR;
4084 }
4085
4086 bn->cancel_key = MyCancelKey;
4087
4088 /* Pass down canAcceptConnections state */
4089 port->canAcceptConnections = canAcceptConnections();
4090 bn->dead_end = (port->canAcceptConnections != CAC_OK &&
4091 port->canAcceptConnections != CAC_WAITBACKUP);
4092
4093 /*
4094 * Unless it's a dead_end child, assign it a child slot number
4095 */
4096 if (!bn->dead_end)
4097 bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
4098 else
4099 bn->child_slot = 0;
4100
4101 /* Hasn't asked to be notified about any bgworkers yet */
4102 bn->bgworker_notify = false;
4103
4104#ifdef EXEC_BACKEND
4105 pid = backend_forkexec(port);
4106#else /* !EXEC_BACKEND */
4107 pid = fork_process();
4108 if (pid == 0) /* child */
4109 {
4110 free(bn);
4111
4112 /* Detangle from postmaster */
4113 InitPostmasterChild();
4114
4115 /* Close the postmaster's sockets */
4116 ClosePostmasterPorts(false);
4117
4118 /* Perform additional initialization and collect startup packet */
4119 BackendInitialize(port);
4120
4121 /* And run the backend */
4122 BackendRun(port);
4123 }
4124#endif /* EXEC_BACKEND */
4125
4126 if (pid < 0)
4127 {
4128 /* in parent, fork failed */
4129 int save_errno = errno;
4130
4131 if (!bn->dead_end)
4132 (void) ReleasePostmasterChildSlot(bn->child_slot);
4133 free(bn);
4134 errno = save_errno;
4135 ereport(LOG,
4136 (errmsg("could not fork new process for connection: %m")));
4137 report_fork_failure_to_client(port, save_errno);
4138 return STATUS_ERROR;
4139 }
4140
4141 /* in parent, successful fork */
4142 ereport(DEBUG2,
4143 (errmsg_internal("forked new backend, pid=%d socket=%d",
4144 (int) pid, (int) port->sock)));
4145
4146 /*
4147 * Everything's been successful, it's safe to add this backend to our list
4148 * of backends.
4149 */
4150 bn->pid = pid;
4151 bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
4152 dlist_push_head(&BackendList, &bn->elem);
4153
4154#ifdef EXEC_BACKEND
4155 if (!bn->dead_end)
4156 ShmemBackendArrayAdd(bn);
4157#endif
4158
4159 return STATUS_OK;
4160}
4161
4162/*
4163 * Try to report backend fork() failure to client before we close the
4164 * connection. Since we do not care to risk blocking the postmaster on
4165 * this connection, we set the connection to non-blocking and try only once.
4166 *
4167 * This is grungy special-purpose code; we cannot use backend libpq since
4168 * it's not up and running.
4169 */
4170static void
4171report_fork_failure_to_client(Port *port, int errnum)
4172{
4173 char buffer[1000];
4174 int rc;
4175
4176 /* Format the error message packet (always V2 protocol) */
4177 snprintf(buffer, sizeof(buffer), "E%s%s\n",
4178 _("could not fork new process for connection: "),
4179 strerror(errnum));
4180
4181 /* Set port to non-blocking. Don't do send() if this fails */
4182 if (!pg_set_noblock(port->sock))
4183 return;
4184
4185 /* We'll retry after EINTR, but ignore all other failures */
4186 do
4187 {
4188 rc = send(port->sock, buffer, strlen(buffer) + 1, 0);
4189 } while (rc < 0 && errno == EINTR);
4190}
4191
4192
4193/*
4194 * BackendInitialize -- initialize an interactive (postmaster-child)
4195 * backend process, and collect the client's startup packet.
4196 *
4197 * returns: nothing. Will not return at all if there's any failure.
4198 *
4199 * Note: this code does not depend on having any access to shared memory.
4200 * In the EXEC_BACKEND case, we are physically attached to shared memory
4201 * but have not yet set up most of our local pointers to shmem structures.
4202 */
4203static void
4204BackendInitialize(Port *port)
4205{
4206 int status;
4207 int ret;
4208 char remote_host[NI_MAXHOST];
4209 char remote_port[NI_MAXSERV];
4210 char remote_ps_data[NI_MAXHOST];
4211
4212 /* Save port etc. for ps status */
4213 MyProcPort = port;
4214
4215 /*
4216 * PreAuthDelay is a debugging aid for investigating problems in the
4217 * authentication cycle: it can be set in postgresql.conf to allow time to
4218 * attach to the newly-forked backend with a debugger. (See also
4219 * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4220 * is not honored until after authentication.)
4221 */
4222 if (PreAuthDelay > 0)
4223 pg_usleep(PreAuthDelay * 1000000L);
4224
4225 /* This flag will remain set until InitPostgres finishes authentication */
4226 ClientAuthInProgress = true; /* limit visibility of log messages */
4227
4228 /* set these to empty in case they are needed before we set them up */
4229 port->remote_host = "";
4230 port->remote_port = "";
4231
4232 /*
4233 * Initialize libpq and enable reporting of ereport errors to the client.
4234 * Must do this now because authentication uses libpq to send messages.
4235 */
4236 pq_init(); /* initialize libpq to talk to client */
4237 whereToSendOutput = DestRemote; /* now safe to ereport to client */
4238
4239 /*
4240 * We arrange for a simple exit(1) if we receive SIGTERM or SIGQUIT or
4241 * timeout while trying to collect the startup packet. Otherwise the
4242 * postmaster cannot shutdown the database FAST or IMMED cleanly if a
4243 * buggy client fails to send the packet promptly. XXX it follows that
4244 * the remainder of this function must tolerate losing control at any
4245 * instant. Likewise, any pg_on_exit_callback registered before or during
4246 * this function must be prepared to execute at any instant between here
4247 * and the end of this function. Furthermore, affected callbacks execute
4248 * partially or not at all when a second exit-inducing signal arrives
4249 * after proc_exit_prepare() decrements on_proc_exit_index. (Thanks to
4250 * that mechanic, callbacks need not anticipate more than one call.) This
4251 * is fragile; it ought to instead follow the norm of handling interrupts
4252 * at selected, safe opportunities.
4253 */
4254 pqsignal(SIGTERM, startup_die);
4255 pqsignal(SIGQUIT, startup_die);
4256 InitializeTimeouts(); /* establishes SIGALRM handler */
4257 PG_SETMASK(&StartupBlockSig);
4258
4259 /*
4260 * Get the remote host name and port for logging and status display.
4261 */
4262 remote_host[0] = '\0';
4263 remote_port[0] = '\0';
4264 if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4265 remote_host, sizeof(remote_host),
4266 remote_port, sizeof(remote_port),
4267 (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0)
4268 ereport(WARNING,
4269 (errmsg_internal("pg_getnameinfo_all() failed: %s",
4270 gai_strerror(ret))));
4271 if (remote_port[0] == '\0')
4272 snprintf(remote_ps_data, sizeof(remote_ps_data), "%s", remote_host);
4273 else
4274 snprintf(remote_ps_data, sizeof(remote_ps_data), "%s(%s)", remote_host, remote_port);
4275
4276 /*
4277 * Save remote_host and remote_port in port structure (after this, they
4278 * will appear in log_line_prefix data for log messages).
4279 */
4280 port->remote_host = strdup(remote_host);
4281 port->remote_port = strdup(remote_port);
4282
4283 /* And now we can issue the Log_connections message, if wanted */
4284 if (Log_connections)
4285 {
4286 if (remote_port[0])
4287 ereport(LOG,
4288 (errmsg("connection received: host=%s port=%s",
4289 remote_host,
4290 remote_port)));
4291 else
4292 ereport(LOG,
4293 (errmsg("connection received: host=%s",
4294 remote_host)));
4295 }
4296
4297 /*
4298 * If we did a reverse lookup to name, we might as well save the results
4299 * rather than possibly repeating the lookup during authentication.
4300 *
4301 * Note that we don't want to specify NI_NAMEREQD above, because then we'd
4302 * get nothing useful for a client without an rDNS entry. Therefore, we
4303 * must check whether we got a numeric IPv4 or IPv6 address, and not save
4304 * it into remote_hostname if so. (This test is conservative and might
4305 * sometimes classify a hostname as numeric, but an error in that
4306 * direction is safe; it only results in a possible extra lookup.)
4307 */
4308 if (log_hostname &&
4309 ret == 0 &&
4310 strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4311 strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4312 port->remote_hostname = strdup(remote_host);
4313
4314 /*
4315 * Ready to begin client interaction. We will give up and exit(1) after a
4316 * time delay, so that a broken client can't hog a connection
4317 * indefinitely. PreAuthDelay and any DNS interactions above don't count
4318 * against the time limit.
4319 *
4320 * Note: AuthenticationTimeout is applied here while waiting for the
4321 * startup packet, and then again in InitPostgres for the duration of any
4322 * authentication operations. So a hostile client could tie up the
4323 * process for nearly twice AuthenticationTimeout before we kick him off.
4324 *
4325 * Note: because PostgresMain will call InitializeTimeouts again, the
4326 * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4327 * since we never use it again after this function.
4328 */
4329 RegisterTimeout(STARTUP_PACKET_TIMEOUT, StartupPacketTimeoutHandler);
4330 enable_timeout_after(STARTUP_PACKET_TIMEOUT, AuthenticationTimeout * 1000);
4331
4332 /*
4333 * Receive the startup packet (which might turn out to be a cancel request
4334 * packet).
4335 */
4336 status = ProcessStartupPacket(port, false);
4337
4338 /*
4339 * Stop here if it was bad or a cancel packet. ProcessStartupPacket
4340 * already did any appropriate error reporting.
4341 */
4342 if (status != STATUS_OK)
4343 proc_exit(0);
4344
4345 /*
4346 * Now that we have the user and database name, we can set the process
4347 * title for ps. It's good to do this as early as possible in startup.
4348 *
4349 * For a walsender, the ps display is set in the following form:
4350 *
4351 * postgres: walsender <user> <host> <activity>
4352 *
4353 * To achieve that, we pass "walsender" as username and username as dbname
4354 * to init_ps_display(). XXX: should add a new variant of
4355 * init_ps_display() to avoid abusing the parameters like this.
4356 */
4357 if (am_walsender)
4358 init_ps_display(pgstat_get_backend_desc(B_WAL_SENDER), port->user_name, remote_ps_data,
4359 update_process_title ? "authentication" : "");
4360 else
4361 init_ps_display(port->user_name, port->database_name, remote_ps_data,
4362 update_process_title ? "authentication" : "");
4363
4364 /*
4365 * Disable the timeout, and prevent SIGTERM/SIGQUIT again.
4366 */
4367 disable_timeout(STARTUP_PACKET_TIMEOUT, false);
4368 PG_SETMASK(&BlockSig);
4369}
4370
4371
4372/*
4373 * BackendRun -- set up the backend's argument list and invoke PostgresMain()
4374 *
4375 * returns:
4376 * Shouldn't return at all.
4377 * If PostgresMain() fails, return status.
4378 */
4379static void
4380BackendRun(Port *port)
4381{
4382 char **av;
4383 int maxac;
4384 int ac;
4385 int i;
4386
4387 /*
4388 * Now, build the argv vector that will be given to PostgresMain.
4389 *
4390 * The maximum possible number of commandline arguments that could come
4391 * from ExtraOptions is (strlen(ExtraOptions) + 1) / 2; see
4392 * pg_split_opts().
4393 */
4394 maxac = 2; /* for fixed args supplied below */
4395 maxac += (strlen(ExtraOptions) + 1) / 2;
4396
4397 av = (char **) MemoryContextAlloc(TopMemoryContext,
4398 maxac * sizeof(char *));
4399 ac = 0;
4400
4401 av[ac++] = "postgres";
4402
4403 /*
4404 * Pass any backend switches specified with -o on the postmaster's own
4405 * command line. We assume these are secure.
4406 */
4407 pg_split_opts(av, &ac, ExtraOptions);
4408
4409 av[ac] = NULL;
4410
4411 Assert(ac < maxac);
4412
4413 /*
4414 * Debug: print arguments being passed to backend
4415 */
4416 ereport(DEBUG3,
4417 (errmsg_internal("%s child[%d]: starting with (",
4418 progname, (int) getpid())));
4419 for (i = 0; i < ac; ++i)
4420 ereport(DEBUG3,
4421 (errmsg_internal("\t%s", av[i])));
4422 ereport(DEBUG3,
4423 (errmsg_internal(")")));
4424
4425 /*
4426 * Make sure we aren't in PostmasterContext anymore. (We can't delete it
4427 * just yet, though, because InitPostgres will need the HBA data.)
4428 */
4429 MemoryContextSwitchTo(TopMemoryContext);
4430
4431 PostgresMain(ac, av, port->database_name, port->user_name);
4432}
4433
4434
4435#ifdef EXEC_BACKEND
4436
4437/*
4438 * postmaster_forkexec -- fork and exec a postmaster subprocess
4439 *
4440 * The caller must have set up the argv array already, except for argv[2]
4441 * which will be filled with the name of the temp variable file.
4442 *
4443 * Returns the child process PID, or -1 on fork failure (a suitable error
4444 * message has been logged on failure).
4445 *
4446 * All uses of this routine will dispatch to SubPostmasterMain in the
4447 * child process.
4448 */
4449pid_t
4450postmaster_forkexec(int argc, char *argv[])
4451{
4452 Port port;
4453
4454 /* This entry point passes dummy values for the Port variables */
4455 memset(&port, 0, sizeof(port));
4456 return internal_forkexec(argc, argv, &port);
4457}
4458
4459/*
4460 * backend_forkexec -- fork/exec off a backend process
4461 *
4462 * Some operating systems (WIN32) don't have fork() so we have to simulate
4463 * it by storing parameters that need to be passed to the child and
4464 * then create a new child process.
4465 *
4466 * returns the pid of the fork/exec'd process, or -1 on failure
4467 */
4468static pid_t
4469backend_forkexec(Port *port)
4470{
4471 char *av[4];
4472 int ac = 0;
4473
4474 av[ac++] = "postgres";
4475 av[ac++] = "--forkbackend";
4476 av[ac++] = NULL; /* filled in by internal_forkexec */
4477
4478 av[ac] = NULL;
4479 Assert(ac < lengthof(av));
4480
4481 return internal_forkexec(ac, av, port);
4482}
4483
4484#ifndef WIN32
4485
4486/*
4487 * internal_forkexec non-win32 implementation
4488 *
4489 * - writes out backend variables to the parameter file
4490 * - fork():s, and then exec():s the child process
4491 */
4492static pid_t
4493internal_forkexec(int argc, char *argv[], Port *port)
4494{
4495 static unsigned long tmpBackendFileNum = 0;
4496 pid_t pid;
4497 char tmpfilename[MAXPGPATH];
4498 BackendParameters param;
4499 FILE *fp;
4500
4501 if (!save_backend_variables(&param, port))
4502 return -1; /* log made by save_backend_variables */
4503
4504 /* Calculate name for temp file */
4505 snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4506 PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
4507 MyProcPid, ++tmpBackendFileNum);
4508
4509 /* Open file */
4510 fp = AllocateFile(tmpfilename, PG_BINARY_W);
4511 if (!fp)
4512 {
4513 /*
4514 * As in OpenTemporaryFileInTablespace, try to make the temp-file
4515 * directory, ignoring errors.
4516 */
4517 (void) MakePGDirectory(PG_TEMP_FILES_DIR);
4518
4519 fp = AllocateFile(tmpfilename, PG_BINARY_W);
4520 if (!fp)
4521 {
4522 ereport(LOG,
4523 (errcode_for_file_access(),
4524 errmsg("could not create file \"%s\": %m",
4525 tmpfilename)));
4526 return -1;
4527 }
4528 }
4529
4530 if (fwrite(&param, sizeof(param), 1, fp) != 1)
4531 {
4532 ereport(LOG,
4533 (errcode_for_file_access(),
4534 errmsg("could not write to file \"%s\": %m", tmpfilename)));
4535 FreeFile(fp);
4536 return -1;
4537 }
4538
4539 /* Release file */
4540 if (FreeFile(fp))
4541 {
4542 ereport(LOG,
4543 (errcode_for_file_access(),
4544 errmsg("could not write to file \"%s\": %m", tmpfilename)));
4545 return -1;
4546 }
4547
4548 /* Make sure caller set up argv properly */
4549 Assert(argc >= 3);
4550 Assert(argv[argc] == NULL);
4551 Assert(strncmp(argv[1], "--fork", 6) == 0);
4552 Assert(argv[2] == NULL);
4553
4554 /* Insert temp file name after --fork argument */
4555 argv[2] = tmpfilename;
4556
4557 /* Fire off execv in child */
4558 if ((pid = fork_process()) == 0)
4559 {
4560 if (execv(postgres_exec_path, argv) < 0)
4561 {
4562 ereport(LOG,
4563 (errmsg("could not execute server process \"%s\": %m",
4564 postgres_exec_path)));
4565 /* We're already in the child process here, can't return */
4566 exit(1);
4567 }
4568 }
4569
4570 return pid; /* Parent returns pid, or -1 on fork failure */
4571}
4572#else /* WIN32 */
4573
4574/*
4575 * internal_forkexec win32 implementation
4576 *
4577 * - starts backend using CreateProcess(), in suspended state
4578 * - writes out backend variables to the parameter file
4579 * - during this, duplicates handles and sockets required for
4580 * inheritance into the new process
4581 * - resumes execution of the new process once the backend parameter
4582 * file is complete.
4583 */
4584static pid_t
4585internal_forkexec(int argc, char *argv[], Port *port)
4586{
4587 int retry_count = 0;
4588 STARTUPINFO si;
4589 PROCESS_INFORMATION pi;
4590 int i;
4591 int j;
4592 char cmdLine[MAXPGPATH * 2];
4593 HANDLE paramHandle;
4594 BackendParameters *param;
4595 SECURITY_ATTRIBUTES sa;
4596 char paramHandleStr[32];
4597 win32_deadchild_waitinfo *childinfo;
4598
4599 /* Make sure caller set up argv properly */
4600 Assert(argc >= 3);
4601 Assert(argv[argc] == NULL);
4602 Assert(strncmp(argv[1], "--fork", 6) == 0);
4603 Assert(argv[2] == NULL);
4604
4605 /* Resume here if we need to retry */
4606retry:
4607
4608 /* Set up shared memory for parameter passing */
4609 ZeroMemory(&sa, sizeof(sa));
4610 sa.nLength = sizeof(sa);
4611 sa.bInheritHandle = TRUE;
4612 paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4613 &sa,
4614 PAGE_READWRITE,
4615 0,
4616 sizeof(BackendParameters),
4617 NULL);
4618 if (paramHandle == INVALID_HANDLE_VALUE)
4619 {
4620 elog(LOG, "could not create backend parameter file mapping: error code %lu",
4621 GetLastError());
4622 return -1;
4623 }
4624
4625 param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters));
4626 if (!param)
4627 {
4628 elog(LOG, "could not map backend parameter memory: error code %lu",
4629 GetLastError());
4630 CloseHandle(paramHandle);
4631 return -1;
4632 }
4633
4634 /* Insert temp file name after --fork argument */
4635#ifdef _WIN64
4636 sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4637#else
4638 sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4639#endif
4640 argv[2] = paramHandleStr;
4641
4642 /* Format the cmd line */
4643 cmdLine[sizeof(cmdLine) - 1] = '\0';
4644 cmdLine[sizeof(cmdLine) - 2] = '\0';
4645 snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"", postgres_exec_path);
4646 i = 0;
4647 while (argv[++i] != NULL)
4648 {
4649 j = strlen(cmdLine);
4650 snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"", argv[i]);
4651 }
4652 if (cmdLine[sizeof(cmdLine) - 2] != '\0')
4653 {
4654 elog(LOG, "subprocess command line too long");
4655 return -1;
4656 }
4657
4658 memset(&pi, 0, sizeof(pi));
4659 memset(&si, 0, sizeof(si));
4660 si.cb = sizeof(si);
4661
4662 /*
4663 * Create the subprocess in a suspended state. This will be resumed later,
4664 * once we have written out the parameter file.
4665 */
4666 if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4667 NULL, NULL, &si, &pi))
4668 {
4669 elog(LOG, "CreateProcess call failed: %m (error code %lu)",
4670 GetLastError());
4671 return -1;
4672 }
4673
4674 if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
4675 {
4676 /*
4677 * log made by save_backend_variables, but we have to clean up the
4678 * mess with the half-started process
4679 */
4680 if (!TerminateProcess(pi.hProcess, 255))
4681 ereport(LOG,
4682 (errmsg_internal("could not terminate unstarted process: error code %lu",
4683 GetLastError())));
4684 CloseHandle(pi.hProcess);
4685 CloseHandle(pi.hThread);
4686 return -1; /* log made by save_backend_variables */
4687 }
4688
4689 /* Drop the parameter shared memory that is now inherited to the backend */
4690 if (!UnmapViewOfFile(param))
4691 elog(LOG, "could not unmap view of backend parameter file: error code %lu",
4692 GetLastError());
4693 if (!CloseHandle(paramHandle))
4694 elog(LOG, "could not close handle to backend parameter file: error code %lu",
4695 GetLastError());
4696
4697 /*
4698 * Reserve the memory region used by our main shared memory segment before
4699 * we resume the child process. Normally this should succeed, but if ASLR
4700 * is active then it might sometimes fail due to the stack or heap having
4701 * gotten mapped into that range. In that case, just terminate the
4702 * process and retry.
4703 */
4704 if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4705 {
4706 /* pgwin32_ReserveSharedMemoryRegion already made a log entry */
4707 if (!TerminateProcess(pi.hProcess, 255))
4708 ereport(LOG,
4709 (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4710 GetLastError())));
4711 CloseHandle(pi.hProcess);
4712 CloseHandle(pi.hThread);
4713 if (++retry_count < 100)
4714 goto retry;
4715 ereport(LOG,
4716 (errmsg("giving up after too many tries to reserve shared memory"),
4717 errhint("This might be caused by ASLR or antivirus software.")));
4718 return -1;
4719 }
4720
4721 /*
4722 * Now that the backend variables are written out, we start the child
4723 * thread so it can start initializing while we set up the rest of the
4724 * parent state.
4725 */
4726 if (ResumeThread(pi.hThread) == -1)
4727 {
4728 if (!TerminateProcess(pi.hProcess, 255))
4729 {
4730 ereport(LOG,
4731 (errmsg_internal("could not terminate unstartable process: error code %lu",
4732 GetLastError())));
4733 CloseHandle(pi.hProcess);
4734 CloseHandle(pi.hThread);
4735 return -1;
4736 }
4737 CloseHandle(pi.hProcess);
4738 CloseHandle(pi.hThread);
4739 ereport(LOG,
4740 (errmsg_internal("could not resume thread of unstarted process: error code %lu",
4741 GetLastError())));
4742 return -1;
4743 }
4744
4745 /*
4746 * Queue a waiter to signal when this child dies. The wait will be handled
4747 * automatically by an operating system thread pool.
4748 *
4749 * Note: use malloc instead of palloc, since it needs to be thread-safe.
4750 * Struct will be free():d from the callback function that runs on a
4751 * different thread.
4752 */
4753 childinfo = malloc(sizeof(win32_deadchild_waitinfo));
4754 if (!childinfo)
4755 ereport(FATAL,
4756 (errcode(ERRCODE_OUT_OF_MEMORY),
4757 errmsg("out of memory")));
4758
4759 childinfo->procHandle = pi.hProcess;
4760 childinfo->procId = pi.dwProcessId;
4761
4762 if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4763 pi.hProcess,
4764 pgwin32_deadchild_callback,
4765 childinfo,
4766 INFINITE,
4767 WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD))
4768 ereport(FATAL,
4769 (errmsg_internal("could not register process for wait: error code %lu",
4770 GetLastError())));
4771
4772 /* Don't close pi.hProcess here - the wait thread needs access to it */
4773
4774 CloseHandle(pi.hThread);
4775
4776 return pi.dwProcessId;
4777}
4778#endif /* WIN32 */
4779
4780
4781/*
4782 * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4783 * to what it would be if we'd simply forked on Unix, and then
4784 * dispatch to the appropriate place.
4785 *
4786 * The first two command line arguments are expected to be "--forkFOO"
4787 * (where FOO indicates which postmaster child we are to become), and
4788 * the name of a variables file that we can read to load data that would
4789 * have been inherited by fork() on Unix. Remaining arguments go to the
4790 * subprocess FooMain() routine.
4791 */
4792void
4793SubPostmasterMain(int argc, char *argv[])
4794{
4795 Port port;
4796
4797 /* In EXEC_BACKEND case we will not have inherited these settings */
4798 IsPostmasterEnvironment = true;
4799 whereToSendOutput = DestNone;
4800
4801 /* Setup as postmaster child */
4802 InitPostmasterChild();
4803
4804 /* Setup essential subsystems (to ensure elog() behaves sanely) */
4805 InitializeGUCOptions();
4806
4807 /* Check we got appropriate args */
4808 if (argc < 3)
4809 elog(FATAL, "invalid subpostmaster invocation");
4810
4811 /* Read in the variables file */
4812 memset(&port, 0, sizeof(Port));
4813 read_backend_variables(argv[2], &port);
4814
4815 /* Close the postmaster's sockets (as soon as we know them) */
4816 ClosePostmasterPorts(strcmp(argv[1], "--forklog") == 0);
4817
4818 /*
4819 * Set reference point for stack-depth checking
4820 */
4821 set_stack_base();
4822
4823 /*
4824 * Set up memory area for GSS information. Mirrors the code in ConnCreate
4825 * for the non-exec case.
4826 */
4827#if defined(ENABLE_GSS) || defined(ENABLE_SSPI)
4828 port.gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo));
4829 if (!port.gss)
4830 ereport(FATAL,
4831 (errcode(ERRCODE_OUT_OF_MEMORY),
4832 errmsg("out of memory")));
4833#endif
4834
4835 /*
4836 * If appropriate, physically re-attach to shared memory segment. We want
4837 * to do this before going any further to ensure that we can attach at the
4838 * same address the postmaster used. On the other hand, if we choose not
4839 * to re-attach, we may have other cleanup to do.
4840 *
4841 * If testing EXEC_BACKEND on Linux, you should run this as root before
4842 * starting the postmaster:
4843 *
4844 * echo 0 >/proc/sys/kernel/randomize_va_space
4845 *
4846 * This prevents using randomized stack and code addresses that cause the
4847 * child process's memory map to be different from the parent's, making it
4848 * sometimes impossible to attach to shared memory at the desired address.
4849 * Return the setting to its old value (usually '1' or '2') when finished.
4850 */
4851 if (strcmp(argv[1], "--forkbackend") == 0 ||
4852 strcmp(argv[1], "--forkavlauncher") == 0 ||
4853 strcmp(argv[1], "--forkavworker") == 0 ||
4854 strcmp(argv[1], "--forkboot") == 0 ||
4855 strncmp(argv[1], "--forkbgworker=", 15) == 0)
4856 PGSharedMemoryReAttach();
4857 else
4858 PGSharedMemoryNoReAttach();
4859
4860 /* autovacuum needs this set before calling InitProcess */
4861 if (strcmp(argv[1], "--forkavlauncher") == 0)
4862 AutovacuumLauncherIAm();
4863 if (strcmp(argv[1], "--forkavworker") == 0)
4864 AutovacuumWorkerIAm();
4865
4866 /*
4867 * Start our win32 signal implementation. This has to be done after we
4868 * read the backend variables, because we need to pick up the signal pipe
4869 * from the parent process.
4870 */
4871#ifdef WIN32
4872 pgwin32_signal_initialize();
4873#endif
4874
4875 /* In EXEC_BACKEND case we will not have inherited these settings */
4876 pqinitmask();
4877 PG_SETMASK(&BlockSig);
4878
4879 /* Read in remaining GUC variables */
4880 read_nondefault_variables();
4881
4882 /*
4883 * Check that the data directory looks valid, which will also check the
4884 * privileges on the data directory and update our umask and file/group
4885 * variables for creating files later. Note: this should really be done
4886 * before we create any files or directories.
4887 */
4888 checkDataDir();
4889
4890 /*
4891 * (re-)read control file, as it contains config. The postmaster will
4892 * already have read this, but this process doesn't know about that.
4893 */
4894 LocalProcessControlFile(false);
4895
4896 /*
4897 * Reload any libraries that were preloaded by the postmaster. Since we
4898 * exec'd this process, those libraries didn't come along with us; but we
4899 * should load them into all child processes to be consistent with the
4900 * non-EXEC_BACKEND behavior.
4901 */
4902 process_shared_preload_libraries();
4903
4904 /* Run backend or appropriate child */
4905 if (strcmp(argv[1], "--forkbackend") == 0)
4906 {
4907 Assert(argc == 3); /* shouldn't be any more args */
4908
4909 /*
4910 * Need to reinitialize the SSL library in the backend, since the
4911 * context structures contain function pointers and cannot be passed
4912 * through the parameter file.
4913 *
4914 * If for some reason reload fails (maybe the user installed broken
4915 * key files), soldier on without SSL; that's better than all
4916 * connections becoming impossible.
4917 *
4918 * XXX should we do this in all child processes? For the moment it's
4919 * enough to do it in backend children.
4920 */
4921#ifdef USE_SSL
4922 if (EnableSSL)
4923 {
4924 if (secure_initialize(false) == 0)
4925 LoadedSSL = true;
4926 else
4927 ereport(LOG,
4928 (errmsg("SSL configuration could not be loaded in child process")));
4929 }
4930#endif
4931
4932 /*
4933 * Perform additional initialization and collect startup packet.
4934 *
4935 * We want to do this before InitProcess() for a couple of reasons: 1.
4936 * so that we aren't eating up a PGPROC slot while waiting on the
4937 * client. 2. so that if InitProcess() fails due to being out of
4938 * PGPROC slots, we have already initialized libpq and are able to
4939 * report the error to the client.
4940 */
4941 BackendInitialize(&port);
4942
4943 /* Restore basic shared memory pointers */
4944 InitShmemAccess(UsedShmemSegAddr);
4945
4946 /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4947 InitProcess();
4948
4949 /* Attach process to shared data structures */
4950 CreateSharedMemoryAndSemaphores(0);
4951
4952 /* And run the backend */
4953 BackendRun(&port); /* does not return */
4954 }
4955 if (strcmp(argv[1], "--forkboot") == 0)
4956 {
4957 /* Restore basic shared memory pointers */
4958 InitShmemAccess(UsedShmemSegAddr);
4959
4960 /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4961 InitAuxiliaryProcess();
4962
4963 /* Attach process to shared data structures */
4964 CreateSharedMemoryAndSemaphores(0);
4965
4966 AuxiliaryProcessMain(argc - 2, argv + 2); /* does not return */
4967 }
4968 if (strcmp(argv[1], "--forkavlauncher") == 0)
4969 {
4970 /* Restore basic shared memory pointers */
4971 InitShmemAccess(UsedShmemSegAddr);
4972
4973 /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4974 InitProcess();
4975
4976 /* Attach process to shared data structures */
4977 CreateSharedMemoryAndSemaphores(0);
4978
4979 AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */
4980 }
4981 if (strcmp(argv[1], "--forkavworker") == 0)
4982 {
4983 /* Restore basic shared memory pointers */
4984 InitShmemAccess(UsedShmemSegAddr);
4985
4986 /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
4987 InitProcess();
4988
4989 /* Attach process to shared data structures */
4990 CreateSharedMemoryAndSemaphores(0);
4991
4992 AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */
4993 }
4994 if (strncmp(argv[1], "--forkbgworker=", 15) == 0)
4995 {
4996 int shmem_slot;
4997
4998 /* do this as early as possible; in particular, before InitProcess() */
4999 IsBackgroundWorker = true;
5000
5001 /* Restore basic shared memory pointers */
5002 InitShmemAccess(UsedShmemSegAddr);
5003
5004 /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */
5005 InitProcess();
5006
5007 /* Attach process to shared data structures */
5008 CreateSharedMemoryAndSemaphores(0);
5009
5010 /* Fetch MyBgworkerEntry from shared memory */
5011 shmem_slot = atoi(argv[1] + 15);
5012 MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot);
5013
5014 StartBackgroundWorker();
5015 }
5016 if (strcmp(argv[1], "--forkarch") == 0)
5017 {
5018 /* Do not want to attach to shared memory */
5019
5020 PgArchiverMain(argc, argv); /* does not return */
5021 }
5022 if (strcmp(argv[1], "--forkcol") == 0)
5023 {
5024 /* Do not want to attach to shared memory */
5025
5026 PgstatCollectorMain(argc, argv); /* does not return */
5027 }
5028 if (strcmp(argv[1], "--forklog") == 0)
5029 {
5030 /* Do not want to attach to shared memory */
5031
5032 SysLoggerMain(argc, argv); /* does not return */
5033 }
5034
5035 abort(); /* shouldn't get here */
5036}
5037#endif /* EXEC_BACKEND */
5038
5039
5040/*
5041 * ExitPostmaster -- cleanup
5042 *
5043 * Do NOT call exit() directly --- always go through here!
5044 */
5045static void
5046ExitPostmaster(int status)
5047{
5048#ifdef HAVE_PTHREAD_IS_THREADED_NP
5049
5050 /*
5051 * There is no known cause for a postmaster to become multithreaded after
5052 * startup. Recheck to account for the possibility of unknown causes.
5053 * This message uses LOG level, because an unclean shutdown at this point
5054 * would usually not look much different from a clean shutdown.
5055 */
5056 if (pthread_is_threaded_np() != 0)
5057 ereport(LOG,
5058 (errcode(ERRCODE_INTERNAL_ERROR),
5059 errmsg_internal("postmaster became multithreaded"),
5060 errdetail("Please report this to <pgsql-bugs@lists.postgresql.org>.")));
5061#endif
5062
5063 /* should cleanup shared memory and kill all backends */
5064
5065 /*
5066 * Not sure of the semantics here. When the Postmaster dies, should the
5067 * backends all be killed? probably not.
5068 *
5069 * MUST -- vadim 05-10-1999
5070 */
5071
5072 proc_exit(status);
5073}
5074
5075/*
5076 * sigusr1_handler - handle signal conditions from child processes
5077 */
5078static void
5079sigusr1_handler(SIGNAL_ARGS)
5080{
5081 int save_errno = errno;
5082
5083 PG_SETMASK(&BlockSig);
5084
5085 /* Process background worker state change. */
5086 if (CheckPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE))
5087 {
5088 BackgroundWorkerStateChange();
5089 StartWorkerNeeded = true;
5090 }
5091
5092 /*
5093 * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
5094 * unexpected states. If the startup process quickly starts up, completes
5095 * recovery, exits, we might process the death of the startup process
5096 * first. We don't want to go back to recovery in that case.
5097 */
5098 if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED) &&
5099 pmState == PM_STARTUP && Shutdown == NoShutdown)
5100 {
5101 /* WAL redo has started. We're out of reinitialization. */
5102 FatalError = false;
5103 Assert(AbortStartTime == 0);
5104
5105 /*
5106 * Crank up the background tasks. It doesn't matter if this fails,
5107 * we'll just try again later.
5108 */
5109 Assert(CheckpointerPID == 0);
5110 CheckpointerPID = StartCheckpointer();
5111 Assert(BgWriterPID == 0);
5112 BgWriterPID = StartBackgroundWriter();
5113
5114 /*
5115 * Start the archiver if we're responsible for (re-)archiving received
5116 * files.
5117 */
5118 Assert(PgArchPID == 0);
5119 if (XLogArchivingAlways())
5120 PgArchPID = pgarch_start();
5121
5122 /*
5123 * If we aren't planning to enter hot standby mode later, treat
5124 * RECOVERY_STARTED as meaning we're out of startup, and report status
5125 * accordingly.
5126 */
5127 if (!EnableHotStandby)
5128 {
5129 AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STANDBY);
5130#ifdef USE_SYSTEMD
5131 sd_notify(0, "READY=1");
5132#endif
5133 }
5134
5135 pmState = PM_RECOVERY;
5136 }
5137 if (CheckPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY) &&
5138 pmState == PM_RECOVERY && Shutdown == NoShutdown)
5139 {
5140 /*
5141 * Likewise, start other special children as needed.
5142 */
5143 Assert(PgStatPID == 0);
5144 PgStatPID = pgstat_start();
5145
5146 ereport(LOG,
5147 (errmsg("database system is ready to accept read only connections")));
5148
5149 /* Report status */
5150 AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY);
5151#ifdef USE_SYSTEMD
5152 sd_notify(0, "READY=1");
5153#endif
5154
5155 pmState = PM_HOT_STANDBY;
5156 /* Some workers may be scheduled to start now */
5157 StartWorkerNeeded = true;
5158 }
5159
5160 if (StartWorkerNeeded || HaveCrashedWorker)
5161 maybe_start_bgworkers();
5162
5163 if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER) &&
5164 PgArchPID != 0)
5165 {
5166 /*
5167 * Send SIGUSR1 to archiver process, to wake it up and begin archiving
5168 * next WAL file.
5169 */
5170 signal_child(PgArchPID, SIGUSR1);
5171 }
5172
5173 /* Tell syslogger to rotate logfile if requested */
5174 if (SysLoggerPID != 0)
5175 {
5176 if (CheckLogrotateSignal())
5177 {
5178 signal_child(SysLoggerPID, SIGUSR1);
5179 RemoveLogrotateSignalFiles();
5180 }
5181 else if (CheckPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE))
5182 {
5183 signal_child(SysLoggerPID, SIGUSR1);
5184 }
5185 }
5186
5187 if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER) &&
5188 Shutdown == NoShutdown)
5189 {
5190 /*
5191 * Start one iteration of the autovacuum daemon, even if autovacuuming
5192 * is nominally not enabled. This is so we can have an active defense
5193 * against transaction ID wraparound. We set a flag for the main loop
5194 * to do it rather than trying to do it here --- this is because the
5195 * autovac process itself may send the signal, and we want to handle
5196 * that by launching another iteration as soon as the current one
5197 * completes.
5198 */
5199 start_autovac_launcher = true;
5200 }
5201
5202 if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER) &&
5203 Shutdown == NoShutdown)
5204 {
5205 /* The autovacuum launcher wants us to start a worker process. */
5206 StartAutovacuumWorker();
5207 }
5208
5209 if (CheckPostmasterSignal(PMSIGNAL_START_WALRECEIVER))
5210 {
5211 /* Startup Process wants us to start the walreceiver process. */
5212 /* Start immediately if possible, else remember request for later. */
5213 WalReceiverRequested = true;
5214 MaybeStartWalReceiver();
5215 }
5216
5217 /*
5218 * Try to advance postmaster's state machine, if a child requests it.
5219 *
5220 * Be careful about the order of this action relative to sigusr1_handler's
5221 * other actions. Generally, this should be after other actions, in case
5222 * they have effects PostmasterStateMachine would need to know about.
5223 * However, we should do it before the CheckPromoteSignal step, which
5224 * cannot have any (immediate) effect on the state machine, but does
5225 * depend on what state we're in now.
5226 */
5227 if (CheckPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE))
5228 {
5229 PostmasterStateMachine();
5230 }
5231
5232 if (StartupPID != 0 &&
5233 (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5234 pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY) &&
5235 CheckPromoteSignal())
5236 {
5237 /* Tell startup process to finish recovery */
5238 signal_child(StartupPID, SIGUSR2);
5239 }
5240
5241 PG_SETMASK(&UnBlockSig);
5242
5243 errno = save_errno;
5244}
5245
5246/*
5247 * SIGTERM or SIGQUIT while processing startup packet.
5248 * Clean up and exit(1).
5249 *
5250 * XXX: possible future improvement: try to send a message indicating
5251 * why we are disconnecting. Problem is to be sure we don't block while
5252 * doing so, nor mess up SSL initialization. In practice, if the client
5253 * has wedged here, it probably couldn't do anything with the message anyway.
5254 */
5255static void
5256startup_die(SIGNAL_ARGS)
5257{
5258 proc_exit(1);
5259}
5260
5261/*
5262 * Dummy signal handler
5263 *
5264 * We use this for signals that we don't actually use in the postmaster,
5265 * but we do use in backends. If we were to SIG_IGN such signals in the
5266 * postmaster, then a newly started backend might drop a signal that arrives
5267 * before it's able to reconfigure its signal processing. (See notes in
5268 * tcop/postgres.c.)
5269 */
5270static void
5271dummy_handler(SIGNAL_ARGS)
5272{
5273}
5274
5275/*
5276 * Timeout while processing startup packet.
5277 * As for startup_die(), we clean up and exit(1).
5278 */
5279static void
5280StartupPacketTimeoutHandler(void)
5281{
5282 proc_exit(1);
5283}
5284
5285
5286/*
5287 * Generate a random cancel key.
5288 */
5289static bool
5290RandomCancelKey(int32 *cancel_key)
5291{
5292 return pg_strong_random(cancel_key, sizeof(int32));
5293}
5294
5295/*
5296 * Count up number of child processes of specified types (dead_end children
5297 * are always excluded).
5298 */
5299static int
5300CountChildren(int target)
5301{
5302 dlist_iter iter;
5303 int cnt = 0;
5304
5305 dlist_foreach(iter, &BackendList)
5306 {
5307 Backend *bp = dlist_container(Backend, elem, iter.cur);
5308
5309 if (bp->dead_end)
5310 continue;
5311
5312 /*
5313 * Since target == BACKEND_TYPE_ALL is the most common case, we test
5314 * it first and avoid touching shared memory for every child.
5315 */
5316 if (target != BACKEND_TYPE_ALL)
5317 {
5318 /*
5319 * Assign bkend_type for any recently announced WAL Sender
5320 * processes.
5321 */
5322 if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5323 IsPostmasterChildWalSender(bp->child_slot))
5324 bp->bkend_type = BACKEND_TYPE_WALSND;
5325
5326 if (!(target & bp->bkend_type))
5327 continue;
5328 }
5329
5330 cnt++;
5331 }
5332 return cnt;
5333}
5334
5335
5336/*
5337 * StartChildProcess -- start an auxiliary process for the postmaster
5338 *
5339 * "type" determines what kind of child will be started. All child types
5340 * initially go to AuxiliaryProcessMain, which will handle common setup.
5341 *
5342 * Return value of StartChildProcess is subprocess' PID, or 0 if failed
5343 * to start subprocess.
5344 */
5345static pid_t
5346StartChildProcess(AuxProcType type)
5347{
5348 pid_t pid;
5349 char *av[10];
5350 int ac = 0;
5351 char typebuf[32];
5352
5353 /*
5354 * Set up command-line arguments for subprocess
5355 */
5356 av[ac++] = "postgres";
5357
5358#ifdef EXEC_BACKEND
5359 av[ac++] = "--forkboot";
5360 av[ac++] = NULL; /* filled in by postmaster_forkexec */
5361#endif
5362
5363 snprintf(typebuf, sizeof(typebuf), "-x%d", type);
5364 av[ac++] = typebuf;
5365
5366 av[ac] = NULL;
5367 Assert(ac < lengthof(av));
5368
5369#ifdef EXEC_BACKEND
5370 pid = postmaster_forkexec(ac, av);
5371#else /* !EXEC_BACKEND */
5372 pid = fork_process();
5373
5374 if (pid == 0) /* child */
5375 {
5376 InitPostmasterChild();
5377
5378 /* Close the postmaster's sockets */
5379 ClosePostmasterPorts(false);
5380
5381 /* Release postmaster's working memory context */
5382 MemoryContextSwitchTo(TopMemoryContext);
5383 MemoryContextDelete(PostmasterContext);
5384 PostmasterContext = NULL;
5385
5386 AuxiliaryProcessMain(ac, av);
5387 ExitPostmaster(0);
5388 }
5389#endif /* EXEC_BACKEND */
5390
5391 if (pid < 0)
5392 {
5393 /* in parent, fork failed */
5394 int save_errno = errno;
5395
5396 errno = save_errno;
5397 switch (type)
5398 {
5399 case StartupProcess:
5400 ereport(LOG,
5401 (errmsg("could not fork startup process: %m")));
5402 break;
5403 case BgWriterProcess:
5404 ereport(LOG,
5405 (errmsg("could not fork background writer process: %m")));
5406 break;
5407 case CheckpointerProcess:
5408 ereport(LOG,
5409 (errmsg("could not fork checkpointer process: %m")));
5410 break;
5411 case WalWriterProcess:
5412 ereport(LOG,
5413 (errmsg("could not fork WAL writer process: %m")));
5414 break;
5415 case WalReceiverProcess:
5416 ereport(LOG,
5417 (errmsg("could not fork WAL receiver process: %m")));
5418 break;
5419 default:
5420 ereport(LOG,
5421 (errmsg("could not fork process: %m")));
5422 break;
5423 }
5424
5425 /*
5426 * fork failure is fatal during startup, but there's no need to choke
5427 * immediately if starting other child types fails.
5428 */
5429 if (type == StartupProcess)
5430 ExitPostmaster(1);
5431 return 0;
5432 }
5433
5434 /*
5435 * in parent, successful fork
5436 */
5437 return pid;
5438}
5439
5440/*
5441 * StartAutovacuumWorker
5442 * Start an autovac worker process.
5443 *
5444 * This function is here because it enters the resulting PID into the
5445 * postmaster's private backends list.
5446 *
5447 * NB -- this code very roughly matches BackendStartup.
5448 */
5449static void
5450StartAutovacuumWorker(void)
5451{
5452 Backend *bn;
5453
5454 /*
5455 * If not in condition to run a process, don't try, but handle it like a
5456 * fork failure. This does not normally happen, since the signal is only
5457 * supposed to be sent by autovacuum launcher when it's OK to do it, but
5458 * we have to check to avoid race-condition problems during DB state
5459 * changes.
5460 */
5461 if (canAcceptConnections() == CAC_OK)
5462 {
5463 /*
5464 * Compute the cancel key that will be assigned to this session. We
5465 * probably don't need cancel keys for autovac workers, but we'd
5466 * better have something random in the field to prevent unfriendly
5467 * people from sending cancels to them.
5468 */
5469 if (!RandomCancelKey(&MyCancelKey))
5470 {
5471 ereport(LOG,
5472 (errcode(ERRCODE_INTERNAL_ERROR),
5473 errmsg("could not generate random cancel key")));
5474 return;
5475 }
5476
5477 bn = (Backend *) malloc(sizeof(Backend));
5478 if (bn)
5479 {
5480 bn->cancel_key = MyCancelKey;
5481
5482 /* Autovac workers are not dead_end and need a child slot */
5483 bn->dead_end = false;
5484 bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
5485 bn->bgworker_notify = false;
5486
5487 bn->pid = StartAutoVacWorker();
5488 if (bn->pid > 0)
5489 {
5490 bn->bkend_type = BACKEND_TYPE_AUTOVAC;
5491 dlist_push_head(&BackendList, &bn->elem);
5492#ifdef EXEC_BACKEND
5493 ShmemBackendArrayAdd(bn);
5494#endif
5495 /* all OK */
5496 return;
5497 }
5498
5499 /*
5500 * fork failed, fall through to report -- actual error message was
5501 * logged by StartAutoVacWorker
5502 */
5503 (void) ReleasePostmasterChildSlot(bn->child_slot);
5504 free(bn);
5505 }
5506 else
5507 ereport(LOG,
5508 (errcode(ERRCODE_OUT_OF_MEMORY),
5509 errmsg("out of memory")));
5510 }
5511
5512 /*
5513 * Report the failure to the launcher, if it's running. (If it's not, we
5514 * might not even be connected to shared memory, so don't try to call
5515 * AutoVacWorkerFailed.) Note that we also need to signal it so that it
5516 * responds to the condition, but we don't do that here, instead waiting
5517 * for ServerLoop to do it. This way we avoid a ping-pong signalling in
5518 * quick succession between the autovac launcher and postmaster in case
5519 * things get ugly.
5520 */
5521 if (AutoVacPID != 0)
5522 {
5523 AutoVacWorkerFailed();
5524 avlauncher_needs_signal = true;
5525 }
5526}
5527
5528/*
5529 * MaybeStartWalReceiver
5530 * Start the WAL receiver process, if not running and our state allows.
5531 *
5532 * Note: if WalReceiverPID is already nonzero, it might seem that we should
5533 * clear WalReceiverRequested. However, there's a race condition if the
5534 * walreceiver terminates and the startup process immediately requests a new
5535 * one: it's quite possible to get the signal for the request before reaping
5536 * the dead walreceiver process. Better to risk launching an extra
5537 * walreceiver than to miss launching one we need. (The walreceiver code
5538 * has logic to recognize that it should go away if not needed.)
5539 */
5540static void
5541MaybeStartWalReceiver(void)
5542{
5543 if (WalReceiverPID == 0 &&
5544 (pmState == PM_STARTUP || pmState == PM_RECOVERY ||
5545 pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY) &&
5546 Shutdown == NoShutdown)
5547 {
5548 WalReceiverPID = StartWalReceiver();
5549 if (WalReceiverPID != 0)
5550 WalReceiverRequested = false;
5551 /* else leave the flag set, so we'll try again later */
5552 }
5553}
5554
5555
5556/*
5557 * Create the opts file
5558 */
5559static bool
5560CreateOptsFile(int argc, char *argv[], char *fullprogname)
5561{
5562 FILE *fp;
5563 int i;
5564
5565#define OPTS_FILE "postmaster.opts"
5566
5567 if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5568 {
5569 elog(LOG, "could not create file \"%s\": %m", OPTS_FILE);
5570 return false;
5571 }
5572
5573 fprintf(fp, "%s", fullprogname);
5574 for (i = 1; i < argc; i++)
5575 fprintf(fp, " \"%s\"", argv[i]);
5576 fputs("\n", fp);
5577
5578 if (fclose(fp))
5579 {
5580 elog(LOG, "could not write file \"%s\": %m", OPTS_FILE);
5581 return false;
5582 }
5583
5584 return true;
5585}
5586
5587
5588/*
5589 * MaxLivePostmasterChildren
5590 *
5591 * This reports the number of entries needed in per-child-process arrays
5592 * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5593 * These arrays include regular backends, autovac workers, walsenders
5594 * and background workers, but not special children nor dead_end children.
5595 * This allows the arrays to have a fixed maximum size, to wit the same
5596 * too-many-children limit enforced by canAcceptConnections(). The exact value
5597 * isn't too critical as long as it's more than MaxBackends.
5598 */
5599int
5600MaxLivePostmasterChildren(void)
5601{
5602 return 2 * (MaxConnections + autovacuum_max_workers + 1 +
5603 max_wal_senders + max_worker_processes);
5604}
5605
5606/*
5607 * Connect background worker to a database.
5608 */
5609void
5610BackgroundWorkerInitializeConnection(const char *dbname, const char *username, uint32 flags)
5611{
5612 BackgroundWorker *worker = MyBgworkerEntry;
5613
5614 /* XXX is this the right errcode? */
5615 if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
5616 ereport(FATAL,
5617 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5618 errmsg("database connection requirement not indicated during registration")));
5619
5620 InitPostgres(dbname, InvalidOid, username, InvalidOid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0);
5621
5622 /* it had better not gotten out of "init" mode yet */
5623 if (!IsInitProcessingMode())
5624 ereport(ERROR,
5625 (errmsg("invalid processing mode in background worker")));
5626 SetProcessingMode(NormalProcessing);
5627}
5628
5629/*
5630 * Connect background worker to a database using OIDs.
5631 */
5632void
5633BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags)
5634{
5635 BackgroundWorker *worker = MyBgworkerEntry;
5636
5637 /* XXX is this the right errcode? */
5638 if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
5639 ereport(FATAL,
5640 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5641 errmsg("database connection requirement not indicated during registration")));
5642
5643 InitPostgres(NULL, dboid, NULL, useroid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0);
5644
5645 /* it had better not gotten out of "init" mode yet */
5646 if (!IsInitProcessingMode())
5647 ereport(ERROR,
5648 (errmsg("invalid processing mode in background worker")));
5649 SetProcessingMode(NormalProcessing);
5650}
5651
5652/*
5653 * Block/unblock signals in a background worker
5654 */
5655void
5656BackgroundWorkerBlockSignals(void)
5657{
5658 PG_SETMASK(&BlockSig);
5659}
5660
5661void
5662BackgroundWorkerUnblockSignals(void)
5663{
5664 PG_SETMASK(&UnBlockSig);
5665}
5666
5667#ifdef EXEC_BACKEND
5668static pid_t
5669bgworker_forkexec(int shmem_slot)
5670{
5671 char *av[10];
5672 int ac = 0;
5673 char forkav[MAXPGPATH];
5674
5675 snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot);
5676
5677 av[ac++] = "postgres";
5678 av[ac++] = forkav;
5679 av[ac++] = NULL; /* filled in by postmaster_forkexec */
5680 av[ac] = NULL;
5681
5682 Assert(ac < lengthof(av));
5683
5684 return postmaster_forkexec(ac, av);
5685}
5686#endif
5687
5688/*
5689 * Start a new bgworker.
5690 * Starting time conditions must have been checked already.
5691 *
5692 * Returns true on success, false on failure.
5693 * In either case, update the RegisteredBgWorker's state appropriately.
5694 *
5695 * This code is heavily based on autovacuum.c, q.v.
5696 */
5697static bool
5698do_start_bgworker(RegisteredBgWorker *rw)
5699{
5700 pid_t worker_pid;
5701
5702 Assert(rw->rw_pid == 0);
5703
5704 /*
5705 * Allocate and assign the Backend element. Note we must do this before
5706 * forking, so that we can handle out of memory properly.
5707 *
5708 * Treat failure as though the worker had crashed. That way, the
5709 * postmaster will wait a bit before attempting to start it again; if it
5710 * tried again right away, most likely it'd find itself repeating the
5711 * out-of-memory or fork failure condition.
5712 */
5713 if (!assign_backendlist_entry(rw))
5714 {
5715 rw->rw_crashed_at = GetCurrentTimestamp();
5716 return false;
5717 }
5718
5719 ereport(DEBUG1,
5720 (errmsg("starting background worker process \"%s\"",
5721 rw->rw_worker.bgw_name)));
5722
5723#ifdef EXEC_BACKEND
5724 switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot)))
5725#else
5726 switch ((worker_pid = fork_process()))
5727#endif
5728 {
5729 case -1:
5730 /* in postmaster, fork failed ... */
5731 ereport(LOG,
5732 (errmsg("could not fork worker process: %m")));
5733 /* undo what assign_backendlist_entry did */
5734 ReleasePostmasterChildSlot(rw->rw_child_slot);
5735 rw->rw_child_slot = 0;
5736 free(rw->rw_backend);
5737 rw->rw_backend = NULL;
5738 /* mark entry as crashed, so we'll try again later */
5739 rw->rw_crashed_at = GetCurrentTimestamp();
5740 break;
5741
5742#ifndef EXEC_BACKEND
5743 case 0:
5744 /* in postmaster child ... */
5745 InitPostmasterChild();
5746
5747 /* Close the postmaster's sockets */
5748 ClosePostmasterPorts(false);
5749
5750 /*
5751 * Before blowing away PostmasterContext, save this bgworker's
5752 * data where it can find it.
5753 */
5754 MyBgworkerEntry = (BackgroundWorker *)
5755 MemoryContextAlloc(TopMemoryContext, sizeof(BackgroundWorker));
5756 memcpy(MyBgworkerEntry, &rw->rw_worker, sizeof(BackgroundWorker));
5757
5758 /* Release postmaster's working memory context */
5759 MemoryContextSwitchTo(TopMemoryContext);
5760 MemoryContextDelete(PostmasterContext);
5761 PostmasterContext = NULL;
5762
5763 StartBackgroundWorker();
5764
5765 exit(1); /* should not get here */
5766 break;
5767#endif
5768 default:
5769 /* in postmaster, fork successful ... */
5770 rw->rw_pid = worker_pid;
5771 rw->rw_backend->pid = rw->rw_pid;
5772 ReportBackgroundWorkerPID(rw);
5773 /* add new worker to lists of backends */
5774 dlist_push_head(&BackendList, &rw->rw_backend->elem);
5775#ifdef EXEC_BACKEND
5776 ShmemBackendArrayAdd(rw->rw_backend);
5777#endif
5778 return true;
5779 }
5780
5781 return false;
5782}
5783
5784/*
5785 * Does the current postmaster state require starting a worker with the
5786 * specified start_time?
5787 */
5788static bool
5789bgworker_should_start_now(BgWorkerStartTime start_time)
5790{
5791 switch (pmState)
5792 {
5793 case PM_NO_CHILDREN:
5794 case PM_WAIT_DEAD_END:
5795 case PM_SHUTDOWN_2:
5796 case PM_SHUTDOWN:
5797 case PM_WAIT_BACKENDS:
5798 case PM_WAIT_READONLY:
5799 case PM_WAIT_BACKUP:
5800 break;
5801
5802 case PM_RUN:
5803 if (start_time == BgWorkerStart_RecoveryFinished)
5804 return true;
5805 /* fall through */
5806
5807 case PM_HOT_STANDBY:
5808 if (start_time == BgWorkerStart_ConsistentState)
5809 return true;
5810 /* fall through */
5811
5812 case PM_RECOVERY:
5813 case PM_STARTUP:
5814 case PM_INIT:
5815 if (start_time == BgWorkerStart_PostmasterStart)
5816 return true;
5817 /* fall through */
5818
5819 }
5820
5821 return false;
5822}
5823
5824/*
5825 * Allocate the Backend struct for a connected background worker, but don't
5826 * add it to the list of backends just yet.
5827 *
5828 * On failure, return false without changing any worker state.
5829 *
5830 * Some info from the Backend is copied into the passed rw.
5831 */
5832static bool
5833assign_backendlist_entry(RegisteredBgWorker *rw)
5834{
5835 Backend *bn;
5836
5837 /*
5838 * Compute the cancel key that will be assigned to this session. We
5839 * probably don't need cancel keys for background workers, but we'd better
5840 * have something random in the field to prevent unfriendly people from
5841 * sending cancels to them.
5842 */
5843 if (!RandomCancelKey(&MyCancelKey))
5844 {
5845 ereport(LOG,
5846 (errcode(ERRCODE_INTERNAL_ERROR),
5847 errmsg("could not generate random cancel key")));
5848 return false;
5849 }
5850
5851 bn = malloc(sizeof(Backend));
5852 if (bn == NULL)
5853 {
5854 ereport(LOG,
5855 (errcode(ERRCODE_OUT_OF_MEMORY),
5856 errmsg("out of memory")));
5857 return false;
5858 }
5859
5860 bn->cancel_key = MyCancelKey;
5861 bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
5862 bn->bkend_type = BACKEND_TYPE_BGWORKER;
5863 bn->dead_end = false;
5864 bn->bgworker_notify = false;
5865
5866 rw->rw_backend = bn;
5867 rw->rw_child_slot = bn->child_slot;
5868
5869 return true;
5870}
5871
5872/*
5873 * If the time is right, start background worker(s).
5874 *
5875 * As a side effect, the bgworker control variables are set or reset
5876 * depending on whether more workers may need to be started.
5877 *
5878 * We limit the number of workers started per call, to avoid consuming the
5879 * postmaster's attention for too long when many such requests are pending.
5880 * As long as StartWorkerNeeded is true, ServerLoop will not block and will
5881 * call this function again after dealing with any other issues.
5882 */
5883static void
5884maybe_start_bgworkers(void)
5885{
5886#define MAX_BGWORKERS_TO_LAUNCH 100
5887 int num_launched = 0;
5888 TimestampTz now = 0;
5889 slist_mutable_iter iter;
5890
5891 /*
5892 * During crash recovery, we have no need to be called until the state
5893 * transition out of recovery.
5894 */
5895 if (FatalError)
5896 {
5897 StartWorkerNeeded = false;
5898 HaveCrashedWorker = false;
5899 return;
5900 }
5901
5902 /* Don't need to be called again unless we find a reason for it below */
5903 StartWorkerNeeded = false;
5904 HaveCrashedWorker = false;
5905
5906 slist_foreach_modify(iter, &BackgroundWorkerList)
5907 {
5908 RegisteredBgWorker *rw;
5909
5910 rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
5911
5912 /* ignore if already running */
5913 if (rw->rw_pid != 0)
5914 continue;
5915
5916 /* if marked for death, clean up and remove from list */
5917 if (rw->rw_terminate)
5918 {
5919 ForgetBackgroundWorker(&iter);
5920 continue;
5921 }
5922
5923 /*
5924 * If this worker has crashed previously, maybe it needs to be
5925 * restarted (unless on registration it specified it doesn't want to
5926 * be restarted at all). Check how long ago did a crash last happen.
5927 * If the last crash is too recent, don't start it right away; let it
5928 * be restarted once enough time has passed.
5929 */
5930 if (rw->rw_crashed_at != 0)
5931 {
5932 if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
5933 {
5934 int notify_pid;
5935
5936 notify_pid = rw->rw_worker.bgw_notify_pid;
5937
5938 ForgetBackgroundWorker(&iter);
5939
5940 /* Report worker is gone now. */
5941 if (notify_pid != 0)
5942 kill(notify_pid, SIGUSR1);
5943
5944 continue;
5945 }
5946
5947 /* read system time only when needed */
5948 if (now == 0)
5949 now = GetCurrentTimestamp();
5950
5951 if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now,
5952 rw->rw_worker.bgw_restart_time * 1000))
5953 {
5954 /* Set flag to remember that we have workers to start later */
5955 HaveCrashedWorker = true;
5956 continue;
5957 }
5958 }
5959
5960 if (bgworker_should_start_now(rw->rw_worker.bgw_start_time))
5961 {
5962 /* reset crash time before trying to start worker */
5963 rw->rw_crashed_at = 0;
5964
5965 /*
5966 * Try to start the worker.
5967 *
5968 * On failure, give up processing workers for now, but set
5969 * StartWorkerNeeded so we'll come back here on the next iteration
5970 * of ServerLoop to try again. (We don't want to wait, because
5971 * there might be additional ready-to-run workers.) We could set
5972 * HaveCrashedWorker as well, since this worker is now marked
5973 * crashed, but there's no need because the next run of this
5974 * function will do that.
5975 */
5976 if (!do_start_bgworker(rw))
5977 {
5978 StartWorkerNeeded = true;
5979 return;
5980 }
5981
5982 /*
5983 * If we've launched as many workers as allowed, quit, but have
5984 * ServerLoop call us again to look for additional ready-to-run
5985 * workers. There might not be any, but we'll find out the next
5986 * time we run.
5987 */
5988 if (++num_launched >= MAX_BGWORKERS_TO_LAUNCH)
5989 {
5990 StartWorkerNeeded = true;
5991 return;
5992 }
5993 }
5994 }
5995}
5996
5997/*
5998 * When a backend asks to be notified about worker state changes, we
5999 * set a flag in its backend entry. The background worker machinery needs
6000 * to know when such backends exit.
6001 */
6002bool
6003PostmasterMarkPIDForWorkerNotify(int pid)
6004{
6005 dlist_iter iter;
6006 Backend *bp;
6007
6008 dlist_foreach(iter, &BackendList)
6009 {
6010 bp = dlist_container(Backend, elem, iter.cur);
6011 if (bp->pid == pid)
6012 {
6013 bp->bgworker_notify = true;
6014 return true;
6015 }
6016 }
6017 return false;
6018}
6019
6020#ifdef EXEC_BACKEND
6021
6022/*
6023 * The following need to be available to the save/restore_backend_variables
6024 * functions. They are marked NON_EXEC_STATIC in their home modules.
6025 */
6026extern slock_t *ShmemLock;
6027extern slock_t *ProcStructLock;
6028extern PGPROC *AuxiliaryProcs;
6029extern PMSignalData *PMSignalState;
6030extern pgsocket pgStatSock;
6031extern pg_time_t first_syslogger_file_time;
6032
6033#ifndef WIN32
6034#define write_inheritable_socket(dest, src, childpid) ((*(dest) = (src)), true)
6035#define read_inheritable_socket(dest, src) (*(dest) = *(src))
6036#else
6037static bool write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE child);
6038static bool write_inheritable_socket(InheritableSocket *dest, SOCKET src,
6039 pid_t childPid);
6040static void read_inheritable_socket(SOCKET *dest, InheritableSocket *src);
6041#endif
6042
6043
6044/* Save critical backend variables into the BackendParameters struct */
6045#ifndef WIN32
6046static bool
6047save_backend_variables(BackendParameters *param, Port *port)
6048#else
6049static bool
6050save_backend_variables(BackendParameters *param, Port *port,
6051 HANDLE childProcess, pid_t childPid)
6052#endif
6053{
6054 memcpy(&param->port, port, sizeof(Port));
6055 if (!write_inheritable_socket(&param->portsocket, port->sock, childPid))
6056 return false;
6057
6058 strlcpy(param->DataDir, DataDir, MAXPGPATH);
6059
6060 memcpy(&param->ListenSocket, &ListenSocket, sizeof(ListenSocket));
6061
6062 param->MyCancelKey = MyCancelKey;
6063 param->MyPMChildSlot = MyPMChildSlot;
6064
6065#ifdef WIN32
6066 param->ShmemProtectiveRegion = ShmemProtectiveRegion;
6067#endif
6068 param->UsedShmemSegID = UsedShmemSegID;
6069 param->UsedShmemSegAddr = UsedShmemSegAddr;
6070
6071 param->ShmemLock = ShmemLock;
6072 param->ShmemVariableCache = ShmemVariableCache;
6073 param->ShmemBackendArray = ShmemBackendArray;
6074
6075#ifndef HAVE_SPINLOCKS
6076 param->SpinlockSemaArray = SpinlockSemaArray;
6077#endif
6078 param->NamedLWLockTrancheRequests = NamedLWLockTrancheRequests;
6079 param->NamedLWLockTrancheArray = NamedLWLockTrancheArray;
6080 param->MainLWLockArray = MainLWLockArray;
6081 param->ProcStructLock = ProcStructLock;
6082 param->ProcGlobal = ProcGlobal;
6083 param->AuxiliaryProcs = AuxiliaryProcs;
6084 param->PreparedXactProcs = PreparedXactProcs;
6085 param->PMSignalState = PMSignalState;
6086 if (!write_inheritable_socket(&param->pgStatSock, pgStatSock, childPid))
6087 return false;
6088
6089 param->PostmasterPid = PostmasterPid;
6090 param->PgStartTime = PgStartTime;
6091 param->PgReloadTime = PgReloadTime;
6092 param->first_syslogger_file_time = first_syslogger_file_time;
6093
6094 param->redirection_done = redirection_done;
6095 param->IsBinaryUpgrade = IsBinaryUpgrade;
6096 param->max_safe_fds = max_safe_fds;
6097
6098 param->MaxBackends = MaxBackends;
6099
6100#ifdef WIN32
6101 param->PostmasterHandle = PostmasterHandle;
6102 if (!write_duplicated_handle(&param->initial_signal_pipe,
6103 pgwin32_create_signal_listener(childPid),
6104 childProcess))
6105 return false;
6106#else
6107 memcpy(&param->postmaster_alive_fds, &postmaster_alive_fds,
6108 sizeof(postmaster_alive_fds));
6109#endif
6110
6111 memcpy(&param->syslogPipe, &syslogPipe, sizeof(syslogPipe));
6112
6113 strlcpy(param->my_exec_path, my_exec_path, MAXPGPATH);
6114
6115 strlcpy(param->pkglib_path, pkglib_path, MAXPGPATH);
6116
6117 strlcpy(param->ExtraOptions, ExtraOptions, MAXPGPATH);
6118
6119 return true;
6120}
6121
6122
6123#ifdef WIN32
6124/*
6125 * Duplicate a handle for usage in a child process, and write the child
6126 * process instance of the handle to the parameter file.
6127 */
6128static bool
6129write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE childProcess)
6130{
6131 HANDLE hChild = INVALID_HANDLE_VALUE;
6132
6133 if (!DuplicateHandle(GetCurrentProcess(),
6134 src,
6135 childProcess,
6136 &hChild,
6137 0,
6138 TRUE,
6139 DUPLICATE_CLOSE_SOURCE | DUPLICATE_SAME_ACCESS))
6140 {
6141 ereport(LOG,
6142 (errmsg_internal("could not duplicate handle to be written to backend parameter file: error code %lu",
6143 GetLastError())));
6144 return false;
6145 }
6146
6147 *dest = hChild;
6148 return true;
6149}
6150
6151/*
6152 * Duplicate a socket for usage in a child process, and write the resulting
6153 * structure to the parameter file.
6154 * This is required because a number of LSPs (Layered Service Providers) very
6155 * common on Windows (antivirus, firewalls, download managers etc) break
6156 * straight socket inheritance.
6157 */
6158static bool
6159write_inheritable_socket(InheritableSocket *dest, SOCKET src, pid_t childpid)
6160{
6161 dest->origsocket = src;
6162 if (src != 0 && src != PGINVALID_SOCKET)
6163 {
6164 /* Actual socket */
6165 if (WSADuplicateSocket(src, childpid, &dest->wsainfo) != 0)
6166 {
6167 ereport(LOG,
6168 (errmsg("could not duplicate socket %d for use in backend: error code %d",
6169 (int) src, WSAGetLastError())));
6170 return false;
6171 }
6172 }
6173 return true;
6174}
6175
6176/*
6177 * Read a duplicate socket structure back, and get the socket descriptor.
6178 */
6179static void
6180read_inheritable_socket(SOCKET *dest, InheritableSocket *src)
6181{
6182 SOCKET s;
6183
6184 if (src->origsocket == PGINVALID_SOCKET || src->origsocket == 0)
6185 {
6186 /* Not a real socket! */
6187 *dest = src->origsocket;
6188 }
6189 else
6190 {
6191 /* Actual socket, so create from structure */
6192 s = WSASocket(FROM_PROTOCOL_INFO,
6193 FROM_PROTOCOL_INFO,
6194 FROM_PROTOCOL_INFO,
6195 &src->wsainfo,
6196 0,
6197 0);
6198 if (s == INVALID_SOCKET)
6199 {
6200 write_stderr("could not create inherited socket: error code %d\n",
6201 WSAGetLastError());
6202 exit(1);
6203 }
6204 *dest = s;
6205
6206 /*
6207 * To make sure we don't get two references to the same socket, close
6208 * the original one. (This would happen when inheritance actually
6209 * works..
6210 */
6211 closesocket(src->origsocket);
6212 }
6213}
6214#endif
6215
6216static void
6217read_backend_variables(char *id, Port *port)
6218{
6219 BackendParameters param;
6220
6221#ifndef WIN32
6222 /* Non-win32 implementation reads from file */
6223 FILE *fp;
6224
6225 /* Open file */
6226 fp = AllocateFile(id, PG_BINARY_R);
6227 if (!fp)
6228 {
6229 write_stderr("could not open backend variables file \"%s\": %s\n",
6230 id, strerror(errno));
6231 exit(1);
6232 }
6233
6234 if (fread(&param, sizeof(param), 1, fp) != 1)
6235 {
6236 write_stderr("could not read from backend variables file \"%s\": %s\n",
6237 id, strerror(errno));
6238 exit(1);
6239 }
6240
6241 /* Release file */
6242 FreeFile(fp);
6243 if (unlink(id) != 0)
6244 {
6245 write_stderr("could not remove file \"%s\": %s\n",
6246 id, strerror(errno));
6247 exit(1);
6248 }
6249#else
6250 /* Win32 version uses mapped file */
6251 HANDLE paramHandle;
6252 BackendParameters *paramp;
6253
6254#ifdef _WIN64
6255 paramHandle = (HANDLE) _atoi64(id);
6256#else
6257 paramHandle = (HANDLE) atol(id);
6258#endif
6259 paramp = MapViewOfFile(paramHandle, FILE_MAP_READ, 0, 0, 0);
6260 if (!paramp)
6261 {
6262 write_stderr("could not map view of backend variables: error code %lu\n",
6263 GetLastError());
6264 exit(1);
6265 }
6266
6267 memcpy(&param, paramp, sizeof(BackendParameters));
6268
6269 if (!UnmapViewOfFile(paramp))
6270 {
6271 write_stderr("could not unmap view of backend variables: error code %lu\n",
6272 GetLastError());
6273 exit(1);
6274 }
6275
6276 if (!CloseHandle(paramHandle))
6277 {
6278 write_stderr("could not close handle to backend parameter variables: error code %lu\n",
6279 GetLastError());
6280 exit(1);
6281 }
6282#endif
6283
6284 restore_backend_variables(&param, port);
6285}
6286
6287/* Restore critical backend variables from the BackendParameters struct */
6288static void
6289restore_backend_variables(BackendParameters *param, Port *port)
6290{
6291 memcpy(port, &param->port, sizeof(Port));
6292 read_inheritable_socket(&port->sock, &param->portsocket);
6293
6294 SetDataDir(param->DataDir);
6295
6296 memcpy(&ListenSocket, &param->ListenSocket, sizeof(ListenSocket));
6297
6298 MyCancelKey = param->MyCancelKey;
6299 MyPMChildSlot = param->MyPMChildSlot;
6300
6301#ifdef WIN32
6302 ShmemProtectiveRegion = param->ShmemProtectiveRegion;
6303#endif
6304 UsedShmemSegID = param->UsedShmemSegID;
6305 UsedShmemSegAddr = param->UsedShmemSegAddr;
6306
6307 ShmemLock = param->ShmemLock;
6308 ShmemVariableCache = param->ShmemVariableCache;
6309 ShmemBackendArray = param->ShmemBackendArray;
6310
6311#ifndef HAVE_SPINLOCKS
6312 SpinlockSemaArray = param->SpinlockSemaArray;
6313#endif
6314 NamedLWLockTrancheRequests = param->NamedLWLockTrancheRequests;
6315 NamedLWLockTrancheArray = param->NamedLWLockTrancheArray;
6316 MainLWLockArray = param->MainLWLockArray;
6317 ProcStructLock = param->ProcStructLock;
6318 ProcGlobal = param->ProcGlobal;
6319 AuxiliaryProcs = param->AuxiliaryProcs;
6320 PreparedXactProcs = param->PreparedXactProcs;
6321 PMSignalState = param->PMSignalState;
6322 read_inheritable_socket(&pgStatSock, &param->pgStatSock);
6323
6324 PostmasterPid = param->PostmasterPid;
6325 PgStartTime = param->PgStartTime;
6326 PgReloadTime = param->PgReloadTime;
6327 first_syslogger_file_time = param->first_syslogger_file_time;
6328
6329 redirection_done = param->redirection_done;
6330 IsBinaryUpgrade = param->IsBinaryUpgrade;
6331 max_safe_fds = param->max_safe_fds;
6332
6333 MaxBackends = param->MaxBackends;
6334
6335#ifdef WIN32
6336 PostmasterHandle = param->PostmasterHandle;
6337 pgwin32_initial_signal_pipe = param->initial_signal_pipe;
6338#else
6339 memcpy(&postmaster_alive_fds, &param->postmaster_alive_fds,
6340 sizeof(postmaster_alive_fds));
6341#endif
6342
6343 memcpy(&syslogPipe, &param->syslogPipe, sizeof(syslogPipe));
6344
6345 strlcpy(my_exec_path, param->my_exec_path, MAXPGPATH);
6346
6347 strlcpy(pkglib_path, param->pkglib_path, MAXPGPATH);
6348
6349 strlcpy(ExtraOptions, param->ExtraOptions, MAXPGPATH);
6350}
6351
6352
6353Size
6354ShmemBackendArraySize(void)
6355{
6356 return mul_size(MaxLivePostmasterChildren(), sizeof(Backend));
6357}
6358
6359void
6360ShmemBackendArrayAllocation(void)
6361{
6362 Size size = ShmemBackendArraySize();
6363
6364 ShmemBackendArray = (Backend *) ShmemAlloc(size);
6365 /* Mark all slots as empty */
6366 memset(ShmemBackendArray, 0, size);
6367}
6368
6369static void
6370ShmemBackendArrayAdd(Backend *bn)
6371{
6372 /* The array slot corresponding to my PMChildSlot should be free */
6373 int i = bn->child_slot - 1;
6374
6375 Assert(ShmemBackendArray[i].pid == 0);
6376 ShmemBackendArray[i] = *bn;
6377}
6378
6379static void
6380ShmemBackendArrayRemove(Backend *bn)
6381{
6382 int i = bn->child_slot - 1;
6383
6384 Assert(ShmemBackendArray[i].pid == bn->pid);
6385 /* Mark the slot as empty */
6386 ShmemBackendArray[i].pid = 0;
6387}
6388#endif /* EXEC_BACKEND */
6389
6390
6391#ifdef WIN32
6392
6393/*
6394 * Subset implementation of waitpid() for Windows. We assume pid is -1
6395 * (that is, check all child processes) and options is WNOHANG (don't wait).
6396 */
6397static pid_t
6398waitpid(pid_t pid, int *exitstatus, int options)
6399{
6400 DWORD dwd;
6401 ULONG_PTR key;
6402 OVERLAPPED *ovl;
6403
6404 /*
6405 * Check if there are any dead children. If there are, return the pid of
6406 * the first one that died.
6407 */
6408 if (GetQueuedCompletionStatus(win32ChildQueue, &dwd, &key, &ovl, 0))
6409 {
6410 *exitstatus = (int) key;
6411 return dwd;
6412 }
6413
6414 return -1;
6415}
6416
6417/*
6418 * Note! Code below executes on a thread pool! All operations must
6419 * be thread safe! Note that elog() and friends must *not* be used.
6420 */
6421static void WINAPI
6422pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
6423{
6424 win32_deadchild_waitinfo *childinfo = (win32_deadchild_waitinfo *) lpParameter;
6425 DWORD exitcode;
6426
6427 if (TimerOrWaitFired)
6428 return; /* timeout. Should never happen, since we use
6429 * INFINITE as timeout value. */
6430
6431 /*
6432 * Remove handle from wait - required even though it's set to wait only
6433 * once
6434 */
6435 UnregisterWaitEx(childinfo->waitHandle, NULL);
6436
6437 if (!GetExitCodeProcess(childinfo->procHandle, &exitcode))
6438 {
6439 /*
6440 * Should never happen. Inform user and set a fixed exitcode.
6441 */
6442 write_stderr("could not read exit code for process\n");
6443 exitcode = 255;
6444 }
6445
6446 if (!PostQueuedCompletionStatus(win32ChildQueue, childinfo->procId, (ULONG_PTR) exitcode, NULL))
6447 write_stderr("could not post child completion status\n");
6448
6449 /*
6450 * Handle is per-process, so we close it here instead of in the
6451 * originating thread
6452 */
6453 CloseHandle(childinfo->procHandle);
6454
6455 /*
6456 * Free struct that was allocated before the call to
6457 * RegisterWaitForSingleObject()
6458 */
6459 free(childinfo);
6460
6461 /* Queue SIGCHLD signal */
6462 pg_queue_signal(SIGCHLD);
6463}
6464#endif /* WIN32 */
6465
6466/*
6467 * Initialize one and only handle for monitoring postmaster death.
6468 *
6469 * Called once in the postmaster, so that child processes can subsequently
6470 * monitor if their parent is dead.
6471 */
6472static void
6473InitPostmasterDeathWatchHandle(void)
6474{
6475#ifndef WIN32
6476
6477 /*
6478 * Create a pipe. Postmaster holds the write end of the pipe open
6479 * (POSTMASTER_FD_OWN), and children hold the read end. Children can pass
6480 * the read file descriptor to select() to wake up in case postmaster
6481 * dies, or check for postmaster death with a (read() == 0). Children must
6482 * close the write end as soon as possible after forking, because EOF
6483 * won't be signaled in the read end until all processes have closed the
6484 * write fd. That is taken care of in ClosePostmasterPorts().
6485 */
6486 Assert(MyProcPid == PostmasterPid);
6487 if (pipe(postmaster_alive_fds) < 0)
6488 ereport(FATAL,
6489 (errcode_for_file_access(),
6490 errmsg_internal("could not create pipe to monitor postmaster death: %m")));
6491
6492 /*
6493 * Set O_NONBLOCK to allow testing for the fd's presence with a read()
6494 * call.
6495 */
6496 if (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFL, O_NONBLOCK) == -1)
6497 ereport(FATAL,
6498 (errcode_for_socket_access(),
6499 errmsg_internal("could not set postmaster death monitoring pipe to nonblocking mode: %m")));
6500#else
6501
6502 /*
6503 * On Windows, we use a process handle for the same purpose.
6504 */
6505 if (DuplicateHandle(GetCurrentProcess(),
6506 GetCurrentProcess(),
6507 GetCurrentProcess(),
6508 &PostmasterHandle,
6509 0,
6510 TRUE,
6511 DUPLICATE_SAME_ACCESS) == 0)
6512 ereport(FATAL,
6513 (errmsg_internal("could not duplicate postmaster handle: error code %lu",
6514 GetLastError())));
6515#endif /* WIN32 */
6516}
6517