postmaster.c source code [PostgreSQL/src/backend/postmaster/postmaster.c]

1	/-------------------------------------------------------------------------*
2	*
3	* postmaster.c
4	* This program acts as a clearing house for requests to the
5	* POSTGRES system. Frontend programs send a startup message
6	* to the Postmaster and the postmaster uses the info in the
7	* message to setup a backend process.
8	*
9	* The postmaster also manages system-wide operations such as
10	* startup and shutdown. The postmaster itself doesn't do those
11	* operations, mind you --- it just forks off a subprocess to do them
12	* at the right times. It also takes care of resetting the system
13	* if a backend crashes.
14	*
15	* The postmaster process creates the shared memory and semaphore
16	* pools during startup, but as a rule does not touch them itself.
17	* In particular, it is not a member of the PGPROC array of backends
18	* and so it cannot participate in lock-manager operations. Keeping
19	* the postmaster away from shared memory operations makes it simpler
20	* and more reliable. The postmaster is almost always able to recover
21	* from crashes of individual backends by resetting shared memory;
22	* if it did much with shared memory then it would be prone to crashing
23	* along with the backends.
24	*
25	* When a request message is received, we now fork() immediately.
26	* The child process performs authentication of the request, and
27	* then becomes a backend if successful. This allows the auth code
28	* to be written in a simple single-threaded style (as opposed to the
29	* crufty "poor man's multitasking" code that used to be needed).
30	* More importantly, it ensures that blockages in non-multithreaded
31	* libraries like SSL or PAM cannot cause denial of service to other
32	* clients.
33	*
34	*
35	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
36	* Portions Copyright (c) 1994, Regents of the University of California
37	*
38	*
39	* IDENTIFICATION
40	* src/backend/postmaster/postmaster.c
41	*
42	* NOTES
43	*
44	* Initialization:
45	* The Postmaster sets up shared memory data structures
46	* for the backends.
47	*
48	* Synchronization:
49	* The Postmaster shares memory with the backends but should avoid
50	* touching shared memory, so as not to become stuck if a crashing
51	* backend screws up locks or shared memory. Likewise, the Postmaster
52	* should never block on messages from frontend clients.
53	*
54	* Garbage Collection:
55	* The Postmaster cleans up after backends if they have an emergency
56	* exit and/or core dump.
57	*
58	* Error Reporting:
59	* Use write_stderr() only for reporting "interactive" errors
60	* (essentially, bogus arguments on the command line). Once the
61	* postmaster is launched, use ereport().
62	*
63	*-------------------------------------------------------------------------
64	*/
65
66	#include "postgres.h"
67
68	#include <unistd.h>
69	#include <signal.h>
70	#include <time.h>
71	#include <sys/wait.h>
72	#include <ctype.h>
73	#include <sys/stat.h>
74	#include <sys/socket.h>
75	#include <fcntl.h>
76	#include <sys/param.h>
77	#include <netdb.h>
78	#include <limits.h>
79
80	#ifdef HAVE_SYS_SELECT_H
81	#include <sys/select.h>
82	#endif
83
84	#ifdef USE_BONJOUR
85	#include <dns_sd.h>
86	#endif
87
88	#ifdef USE_SYSTEMD
89	#include <systemd/sd-daemon.h>
90	#endif
91
92	#ifdef HAVE_PTHREAD_IS_THREADED_NP
93	#include <pthread.h>
94	#endif
95
96	#include "access/transam.h"
97	#include "access/xlog.h"
98	#include "bootstrap/bootstrap.h"
99	#include "catalog/pg_control.h"
100	#include "common/file_perm.h"
101	#include "common/ip.h"
102	#include "common/string.h"
103	#include "lib/ilist.h"
104	#include "libpq/auth.h"
105	#include "libpq/libpq.h"
106	#include "libpq/pqformat.h"
107	#include "libpq/pqsignal.h"
108	#include "miscadmin.h"
109	#include "pg_getopt.h"
110	#include "pgstat.h"
111	#include "port/pg_bswap.h"
112	#include "postmaster/autovacuum.h"
113	#include "postmaster/bgworker_internals.h"
114	#include "postmaster/fork_process.h"
115	#include "postmaster/pgarch.h"
116	#include "postmaster/postmaster.h"
117	#include "postmaster/syslogger.h"
118	#include "replication/logicallauncher.h"
119	#include "replication/walsender.h"
120	#include "storage/fd.h"
121	#include "storage/ipc.h"
122	#include "storage/pg_shmem.h"
123	#include "storage/pmsignal.h"
124	#include "storage/proc.h"
125	#include "tcop/tcopprot.h"
126	#include "utils/builtins.h"
127	#include "utils/datetime.h"
128	#include "utils/memutils.h"
129	#include "utils/pidfile.h"
130	#include "utils/ps_status.h"
131	#include "utils/timeout.h"
132	#include "utils/timestamp.h"
133	#include "utils/varlena.h"
134
135	#ifdef EXEC_BACKEND
136	#include "storage/spin.h"
137	#endif
138
139
140	/*
141	* Possible types of a backend. Beyond being the possible bkend_type values in
142	* struct bkend, these are OR-able request flag bits for SignalSomeChildren()
143	* and CountChildren().
144	*/
145	#define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */
146	#define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */
147	#define BACKEND_TYPE_WALSND 0x0004 /* walsender process */
148	#define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */
149	#define BACKEND_TYPE_ALL 0x000F /* OR of all the above */
150
151	#define BACKEND_TYPE_WORKER (BACKEND_TYPE_AUTOVAC \| BACKEND_TYPE_BGWORKER)
152
153	/*
154	* List of active backends (or child processes anyway; we don't actually
155	* know whether a given child has become a backend or is still in the
156	* authorization phase). This is used mainly to keep track of how many
157	* children we have and send them appropriate signals when necessary.
158	*
159	* "Special" children such as the startup, bgwriter and autovacuum launcher
160	* tasks are not in this list. Autovacuum worker and walsender are in it.
161	* Also, "dead_end" children are in it: these are children launched just for
162	* the purpose of sending a friendly rejection message to a would-be client.
163	* We must track them because they are attached to shared memory, but we know
164	* they will never become live backends. dead_end children are not assigned a
165	* PMChildSlot.
166	*
167	* Background workers are in this list, too.
168	*/
169	typedef struct bkend
170	{
171	pid_t pid; / process id of backend /
172	int32 cancel_key; / cancel key for cancels for this backend /
173	int child_slot; / PMChildSlot for this backend, if any /
174
175	/*
176	* Flavor of backend or auxiliary process. Note that BACKEND_TYPE_WALSND
177	* backends initially announce themselves as BACKEND_TYPE_NORMAL, so if
178	* bkend_type is normal, you should check for a recent transition.
179	*/
180	int bkend_type;
181	bool dead_end; / is it going to send an error and quit? /
182	bool bgworker_notify; / gets bgworker start/stop notifications /
183	dlist_node elem; / list link in BackendList /
184	} Backend;
185
186	static dlist_head BackendList = DLIST_STATIC_INIT(BackendList);
187
188	#ifdef EXEC_BACKEND
189	static Backend *ShmemBackendArray;
190	#endif
191
192	BackgroundWorker *MyBgworkerEntry = NULL;
193
194
195
196	/ The socket number we are listening for connections on /
197	int PostPortNumber;
198
199	/ The directory names for Unix socket(s) /
200	char *Unix_socket_directories;
201
202	/ The TCP listen address(es) /
203	char *ListenAddresses;
204
205	/*
206	* ReservedBackends is the number of backends reserved for superuser use.
207	* This number is taken out of the pool size given by MaxConnections so
208	* number of backend slots available to non-superusers is
209	* (MaxConnections - ReservedBackends). Note what this really means is
210	* "if there are <= ReservedBackends connections available, only superusers
211	* can make new connections" --- pre-existing superuser connections don't
212	* count against the limit.
213	*/
214	int ReservedBackends;
215
216	/ The socket(s) we're listening to. /
217	#define MAXLISTEN 64
218	static pgsocket ListenSocket[MAXLISTEN];
219
220	/*
221	* Set by the -o option
222	*/
223	static char ExtraOptions[MAXPGPATH];
224
225	/*
226	* These globals control the behavior of the postmaster in case some
227	* backend dumps core. Normally, it kills all peers of the dead backend
228	* and reinitializes shared memory. By specifying -s or -n, we can have
229	* the postmaster stop (rather than kill) peers and not reinitialize
230	* shared data structures. (Reinit is currently dead code, though.)
231	*/
232	static bool Reinit = true;
233	static int SendStop = false;
234
235	/ still more option variables /
236	bool EnableSSL = false;
237
238	int PreAuthDelay = `0`;
239	int AuthenticationTimeout = `60`;
240
241	bool log_hostname; / for ps display and logging /
242	bool Log_connections = false;
243	bool Db_user_namespace = false;
244
245	bool enable_bonjour = false;
246	char *bonjour_name;
247	bool restart_after_crash = true;
248
249	/ PIDs of special child processes; 0 when not running /
250	static pid_t StartupPID = `0`,
251	BgWriterPID = `0`,
252	CheckpointerPID = `0`,
253	WalWriterPID = `0`,
254	WalReceiverPID = `0`,
255	AutoVacPID = `0`,
256	PgArchPID = `0`,
257	PgStatPID = `0`,
258	SysLoggerPID = `0`;
259
260	/ Startup process's status /
261	typedef enum
262	{
263	STARTUP_NOT_RUNNING,
264	STARTUP_RUNNING,
265	STARTUP_SIGNALED, / we sent it a SIGQUIT or SIGKILL /
266	STARTUP_CRASHED
267	} StartupStatusEnum;
268
269	static StartupStatusEnum StartupStatus = STARTUP_NOT_RUNNING;
270
271	/ Startup/shutdown state /
272	#define NoShutdown 0
273	#define SmartShutdown 1
274	#define FastShutdown 2
275	#define ImmediateShutdown 3
276
277	static int Shutdown = NoShutdown;
278
279	static bool FatalError = false; / T if recovering from backend crash /
280
281	/*
282	* We use a simple state machine to control startup, shutdown, and
283	* crash recovery (which is rather like shutdown followed by startup).
284	*
285	* After doing all the postmaster initialization work, we enter PM_STARTUP
286	* state and the startup process is launched. The startup process begins by
287	* reading the control file and other preliminary initialization steps.
288	* In a normal startup, or after crash recovery, the startup process exits
289	* with exit code 0 and we switch to PM_RUN state. However, archive recovery
290	* is handled specially since it takes much longer and we would like to support
291	* hot standby during archive recovery.
292	*
293	* When the startup process is ready to start archive recovery, it signals the
294	* postmaster, and we switch to PM_RECOVERY state. The background writer and
295	* checkpointer are launched, while the startup process continues applying WAL.
296	* If Hot Standby is enabled, then, after reaching a consistent point in WAL
297	* redo, startup process signals us again, and we switch to PM_HOT_STANDBY
298	* state and begin accepting connections to perform read-only queries. When
299	* archive recovery is finished, the startup process exits with exit code 0
300	* and we switch to PM_RUN state.
301	*
302	* Normal child backends can only be launched when we are in PM_RUN or
303	* PM_HOT_STANDBY state. (We also allow launch of normal
304	* child backends in PM_WAIT_BACKUP state, but only for superusers.)
305	* In other states we handle connection requests by launching "dead_end"
306	* child processes, which will simply send the client an error message and
307	* quit. (We track these in the BackendList so that we can know when they
308	* are all gone; this is important because they're still connected to shared
309	* memory, and would interfere with an attempt to destroy the shmem segment,
310	* possibly leading to SHMALL failure when we try to make a new one.)
311	* In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
312	* to drain out of the system, and therefore stop accepting connection
313	* requests at all until the last existing child has quit (which hopefully
314	* will not be very long).
315	*
316	* Notice that this state variable does not distinguish why we entered
317	* states later than PM_RUN --- Shutdown and FatalError must be consulted
318	* to find that out. FatalError is never true in PM_RECOVERY_* or PM_RUN
319	* states, nor in PM_SHUTDOWN states (because we don't enter those states
320	* when trying to recover from a crash). It can be true in PM_STARTUP state,
321	* because we don't clear it until we've successfully started WAL redo.
322	*/
323	typedef enum
324	{
325	PM_INIT, / postmaster starting /
326	PM_STARTUP, / waiting for startup subprocess /
327	PM_RECOVERY, / in archive recovery mode /
328	PM_HOT_STANDBY, / in hot standby mode /
329	PM_RUN, / normal "database is alive" state /
330	PM_WAIT_BACKUP, / waiting for online backup mode to end /
331	PM_WAIT_READONLY, / waiting for read only backends to exit /
332	PM_WAIT_BACKENDS, / waiting for live backends to exit /
333	PM_SHUTDOWN, / waiting for checkpointer to do shutdown*
334	* ckpt */
335	PM_SHUTDOWN_2, / waiting for archiver and walsenders to*
336	* finish */
337	PM_WAIT_DEAD_END, / waiting for dead_end children to exit /
338	PM_NO_CHILDREN / all important children have exited /
339	} PMState;
340
341	static PMState pmState = PM_INIT;
342
343	/ Start time of SIGKILL timeout during immediate shutdown or child crash /
344	/ Zero means timeout is not running /
345	static time_t AbortStartTime = `0`;
346
347	/ Length of said timeout /
348	#define SIGKILL_CHILDREN_AFTER_SECS 5
349
350	static bool ReachedNormalRunning = false; / T if we've reached PM_RUN /
351
352	bool ClientAuthInProgress = false; / T during new-client*
353	* authentication */
354
355	bool redirection_done = false; / stderr redirected for syslogger? /
356
357	/ received START_AUTOVAC_LAUNCHER signal /
358	static volatile sig_atomic_t start_autovac_launcher = false;
359
360	/ the launcher needs to be signalled to communicate some condition /
361	static volatile bool avlauncher_needs_signal = false;
362
363	/ received START_WALRECEIVER signal /
364	static volatile sig_atomic_t WalReceiverRequested = false;
365
366	/ set when there's a worker that needs to be started up /
367	static volatile bool StartWorkerNeeded = true;
368	static volatile bool HaveCrashedWorker = false;
369
370	#ifdef USE_SSL
371	/ Set when and if SSL has been initialized properly /
372	static bool LoadedSSL = false;
373	#endif
374
375	#ifdef USE_BONJOUR
376	static DNSServiceRef bonjour_sdref = NULL;
377	#endif
378
379	/*
380	* postmaster.c - function prototypes
381	*/
382	static void CloseServerPorts(int status, Datum arg);
383	static void unlink_external_pid_file(int status, Datum arg);
384	static void getInstallationPaths(const char *argv0);
385	static void checkControlFile(void);
386	static Port ConnCreate(int* serverFd);
387	static void ConnFree(Port *port);
388	static void reset_shared(int port);
389	static void SIGHUP_handler(SIGNAL_ARGS);
390	static void pmdie(SIGNAL_ARGS);
391	static void reaper(SIGNAL_ARGS);
392	static void sigusr1_handler(SIGNAL_ARGS);
393	static void startup_die(SIGNAL_ARGS);
394	static void dummy_handler(SIGNAL_ARGS);
395	static void StartupPacketTimeoutHandler(void);
396	static void CleanupBackend(int pid, int exitstatus);
397	static bool CleanupBackgroundWorker(int pid, int exitstatus);
398	static void HandleChildCrash(int pid, int exitstatus, const char *procname);
399	static void LogChildExit(int lev, const char *procname,
400	int pid, int exitstatus);
401	static void PostmasterStateMachine(void);
402	static void BackendInitialize(Port *port);
403	static void BackendRun(Port *port) pg_attribute_noreturn();
404	static void ExitPostmaster(int status) pg_attribute_noreturn();
405	static int ServerLoop(void);
406	static int BackendStartup(Port *port);
407	static int ProcessStartupPacket(Port *port, bool secure_done);
408	static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options);
409	static void processCancelRequest(Port port, void* *pkt);
410	static int initMasks(fd_set *rmask);
411	static void report_fork_failure_to_client(Port port, int* errnum);
412	static CAC_state canAcceptConnections(void);
413	static bool RandomCancelKey(int32 *cancel_key);
414	static void signal_child(pid_t pid, int signal);
415	static bool SignalSomeChildren(int signal, int targets);
416	static void TerminateChildren(int signal);
417
418	#define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
419
420	static int CountChildren(int target);
421	static bool assign_backendlist_entry(RegisteredBgWorker *rw);
422	static void maybe_start_bgworkers(void);
423	static bool CreateOptsFile(int argc, char argv[], char* *fullprogname);
424	static pid_t StartChildProcess(AuxProcType type);
425	static void StartAutovacuumWorker(void);
426	static void MaybeStartWalReceiver(void);
427	static void InitPostmasterDeathWatchHandle(void);
428
429	/*
430	* Archiver is allowed to start up at the current postmaster state?
431	*
432	* If WAL archiving is enabled always, we are allowed to start archiver
433	* even during recovery.
434	*/
435	#define PgArchStartupAllowed() \
436	((XLogArchivingActive() && pmState == PM_RUN) \|\| \
437	(XLogArchivingAlways() && \
438	(pmState == PM_RECOVERY \|\| pmState == PM_HOT_STANDBY)))
439
440	#ifdef EXEC_BACKEND
441
442	#ifdef WIN32
443	#define WNOHANG 0 /* ignored, so any integer value will do */
444
445	static pid_t waitpid(pid_t pid, int exitstatus, int* options);
446	static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired);
447
448	static HANDLE win32ChildQueue;
449
450	typedef struct
451	{
452	HANDLE waitHandle;
453	HANDLE procHandle;
454	DWORD procId;
455	} win32_deadchild_waitinfo;
456	#endif /* WIN32 */
457
458	static pid_t backend_forkexec(Port *port);
459	static pid_t internal_forkexec(int argc, char argv[], Port port);
460
461	/ Type for a socket that can be inherited to a client process /
462	#ifdef WIN32
463	typedef struct
464	{
465	SOCKET origsocket; / Original socket value, or PGINVALID_SOCKET*
466	* if not a socket */
467	WSAPROTOCOL_INFO wsainfo;
468	} InheritableSocket;
469	#else
470	typedef int InheritableSocket;
471	#endif
472
473	/*
474	* Structure contains all variables passed to exec:ed backends
475	*/
476	typedef struct
477	{
478	Port port;
479	InheritableSocket portsocket;
480	char DataDir[MAXPGPATH];
481	pgsocket ListenSocket[MAXLISTEN];
482	int32 MyCancelKey;
483	int MyPMChildSlot;
484	#ifndef WIN32
485	unsigned long UsedShmemSegID;
486	#else
487	void *ShmemProtectiveRegion;
488	HANDLE UsedShmemSegID;
489	#endif
490	void *UsedShmemSegAddr;
491	slock_t *ShmemLock;
492	VariableCache ShmemVariableCache;
493	Backend *ShmemBackendArray;
494	#ifndef HAVE_SPINLOCKS
495	PGSemaphore *SpinlockSemaArray;
496	#endif
497	int NamedLWLockTrancheRequests;
498	NamedLWLockTranche *NamedLWLockTrancheArray;
499	LWLockPadded *MainLWLockArray;
500	slock_t *ProcStructLock;
501	PROC_HDR *ProcGlobal;
502	PGPROC *AuxiliaryProcs;
503	PGPROC *PreparedXactProcs;
504	PMSignalData *PMSignalState;
505	InheritableSocket pgStatSock;
506	pid_t PostmasterPid;
507	TimestampTz PgStartTime;
508	TimestampTz PgReloadTime;
509	pg_time_t first_syslogger_file_time;
510	bool redirection_done;
511	bool IsBinaryUpgrade;
512	int max_safe_fds;
513	int MaxBackends;
514	#ifdef WIN32
515	HANDLE PostmasterHandle;
516	HANDLE initial_signal_pipe;
517	HANDLE syslogPipe[`2`];
518	#else
519	int postmaster_alive_fds[`2`];
520	int syslogPipe[`2`];
521	#endif
522	char my_exec_path[MAXPGPATH];
523	char pkglib_path[MAXPGPATH];
524	char ExtraOptions[MAXPGPATH];
525	} BackendParameters;
526
527	static void read_backend_variables(char id, Port port);
528	static void restore_backend_variables(BackendParameters param, Port port);
529
530	#ifndef WIN32
531	static bool save_backend_variables(BackendParameters param, Port port);
532	#else
533	static bool save_backend_variables(BackendParameters param, Port port,
534	HANDLE childProcess, pid_t childPid);
535	#endif
536
537	static void ShmemBackendArrayAdd(Backend *bn);
538	static void ShmemBackendArrayRemove(Backend *bn);
539	#endif /* EXEC_BACKEND */
540
541	#define StartupDataBase() StartChildProcess(StartupProcess)
542	#define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
543	#define StartCheckpointer() StartChildProcess(CheckpointerProcess)
544	#define StartWalWriter() StartChildProcess(WalWriterProcess)
545	#define StartWalReceiver() StartChildProcess(WalReceiverProcess)
546
547	/ Macros to check exit status of a child process /
548	#define EXIT_STATUS_0(st) ((st) == 0)
549	#define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
550	#define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3)
551
552	#ifndef WIN32
553	/*
554	* File descriptors for pipe used to monitor if postmaster is alive.
555	* First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
556	*/
557	int postmaster_alive_fds[`2`] = {-`1`, -`1`};
558	#else
559	/ Process handle of postmaster used for the same purpose on Windows /
560	HANDLE PostmasterHandle;
561	#endif
562
563	/*
564	* Postmaster main entry point
565	*/
566	void
567	PostmasterMain(int argc, char *argv[])
568	{
569	int opt;
570	int status;
571	char *userDoption = NULL;
572	bool listen_addr_saved = false;
573	int i;
574	char *output_config_variable = NULL;
575
576	InitProcessGlobals();
577
578	PostmasterPid = MyProcPid;
579
580	IsPostmasterEnvironment = true;
581
582	/*
583	* We should not be creating any files or directories before we check the
584	* data directory (see checkDataDir()), but just in case set the umask to
585	* the most restrictive (owner-only) permissions.
586	*
587	* checkDataDir() will reset the umask based on the data directory
588	* permissions.
589	*/
590	umask(PG_MODE_MASK_OWNER);
591
592	/*
593	* By default, palloc() requests in the postmaster will be allocated in
594	* the PostmasterContext, which is space that can be recycled by backends.
595	* Allocated data that needs to be available to backends should be
596	* allocated in TopMemoryContext.
597	*/
598	PostmasterContext = AllocSetContextCreate(TopMemoryContext,
599	"Postmaster",
600	ALLOCSET_DEFAULT_SIZES);
601	MemoryContextSwitchTo(PostmasterContext);
602
603	/ Initialize paths to installation files /
604	getInstallationPaths(argv[`0`]);
605
606	/*
607	* Set up signal handlers for the postmaster process.
608	*
609	* In the postmaster, we want to install non-ignored handlers without
610	* SA_RESTART. This is because they'll be blocked at all times except
611	* when ServerLoop is waiting for something to happen, and during that
612	* window, we want signals to exit the select(2) wait so that ServerLoop
613	* can respond if anything interesting happened. On some platforms,
614	* signals marked SA_RESTART would not cause the select() wait to end.
615	* Child processes will generally want SA_RESTART, but we expect them to
616	* set up their own handlers before unblocking signals.
617	*
618	* CAUTION: when changing this list, check for side-effects on the signal
619	* handling setup of child processes. See tcop/postgres.c,
620	* bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
621	* postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
622	* postmaster/syslogger.c, postmaster/bgworker.c and
623	* postmaster/checkpointer.c.
624	*/
625	pqinitmask();
626	PG_SETMASK(&BlockSig);
627
628	pqsignal_no_restart(SIGHUP, SIGHUP_handler); / reread config file and*
629	* have children do same */
630	pqsignal_no_restart(SIGINT, pmdie); / send SIGTERM and shut down /
631	pqsignal_no_restart(SIGQUIT, pmdie); / send SIGQUIT and die /
632	pqsignal_no_restart(SIGTERM, pmdie); / wait for children and shut down /
633	pqsignal(SIGALRM, SIG_IGN); / ignored /
634	pqsignal(SIGPIPE, SIG_IGN); / ignored /
635	pqsignal_no_restart(SIGUSR1, sigusr1_handler); / message from child*
636	* process */
637	pqsignal_no_restart(SIGUSR2, dummy_handler); / unused, reserve for*
638	* children */
639	pqsignal_no_restart(SIGCHLD, reaper); / handle child termination /
640
641	/*
642	* No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We
643	* ignore those signals in a postmaster environment, so that there is no
644	* risk of a child process freezing up due to writing to stderr. But for
645	* a standalone backend, their default handling is reasonable. Hence, all
646	* child processes should just allow the inherited settings to stand.
647	*/
648	#ifdef SIGTTIN
649	pqsignal(SIGTTIN, SIG_IGN); / ignored /
650	#endif
651	#ifdef SIGTTOU
652	pqsignal(SIGTTOU, SIG_IGN); / ignored /
653	#endif
654
655	/ ignore SIGXFSZ, so that ulimit violations work like disk full /
656	#ifdef SIGXFSZ
657	pqsignal(SIGXFSZ, SIG_IGN); / ignored /
658	#endif
659
660	/*
661	* Options setup
662	*/
663	InitializeGUCOptions();
664
665	opterr = `1`;
666
667	/*
668	* Parse command-line options. CAUTION: keep this in sync with
669	* tcop/postgres.c (the option sets should not conflict) and with the
670	* common help() function in main/main.c.
671	*/
672	while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:")) != -`1`)
673	{
674	switch (opt)
675	{
676	case `'B'`:
677	SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
678	break;
679
680	case `'b'`:
681	/ Undocumented flag used for binary upgrades /
682	IsBinaryUpgrade = true;
683	break;
684
685	case `'C'`:
686	output_config_variable = strdup(optarg);
687	break;
688
689	case `'D'`:
690	userDoption = strdup(optarg);
691	break;
692
693	case `'d'`:
694	set_debug_options(atoi(optarg), PGC_POSTMASTER, PGC_S_ARGV);
695	break;
696
697	case `'E'`:
698	SetConfigOption("log_statement", "all", PGC_POSTMASTER, PGC_S_ARGV);
699	break;
700
701	case `'e'`:
702	SetConfigOption("datestyle", "euro", PGC_POSTMASTER, PGC_S_ARGV);
703	break;
704
705	case `'F'`:
706	SetConfigOption("fsync", "false", PGC_POSTMASTER, PGC_S_ARGV);
707	break;
708
709	case `'f'`:
710	if (!set_plan_disabling_options(optarg, PGC_POSTMASTER, PGC_S_ARGV))
711	{
712	write_stderr("%s: invalid argument for option -f: \"%s\"\n",
713	progname, optarg);
714	ExitPostmaster(`1`);
715	}
716	break;
717
718	case `'h'`:
719	SetConfigOption("listen_addresses", optarg, PGC_POSTMASTER, PGC_S_ARGV);
720	break;
721
722	case `'i'`:
723	SetConfigOption("listen_addresses", "*", PGC_POSTMASTER, PGC_S_ARGV);
724	break;
725
726	case `'j'`:
727	/ only used by interactive backend /
728	break;
729
730	case `'k'`:
731	SetConfigOption("unix_socket_directories", optarg, PGC_POSTMASTER, PGC_S_ARGV);
732	break;
733
734	case `'l'`:
735	SetConfigOption("ssl", "true", PGC_POSTMASTER, PGC_S_ARGV);
736	break;
737
738	case `'N'`:
739	SetConfigOption("max_connections", optarg, PGC_POSTMASTER, PGC_S_ARGV);
740	break;
741
742	case `'n'`:
743	/ Don't reinit shared mem after abnormal exit /
744	Reinit = false;
745	break;
746
747	case `'O'`:
748	SetConfigOption("allow_system_table_mods", "true", PGC_POSTMASTER, PGC_S_ARGV);
749	break;
750
751	case `'o'`:
752	/ Other options to pass to the backend on the command line /
753	snprintf(ExtraOptions + strlen(ExtraOptions),
754	sizeof(ExtraOptions) - strlen(ExtraOptions),
755	" %s", optarg);
756	break;
757
758	case `'P'`:
759	SetConfigOption("ignore_system_indexes", "true", PGC_POSTMASTER, PGC_S_ARGV);
760	break;
761
762	case `'p'`:
763	SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
764	break;
765
766	case `'r'`:
767	/ only used by single-user backend /
768	break;
769
770	case `'S'`:
771	SetConfigOption("work_mem", optarg, PGC_POSTMASTER, PGC_S_ARGV);
772	break;
773
774	case `'s'`:
775	SetConfigOption("log_statement_stats", "true", PGC_POSTMASTER, PGC_S_ARGV);
776	break;
777
778	case `'T'`:
779
780	/*
781	* In the event that some backend dumps core, send SIGSTOP,
782	* rather than SIGQUIT, to all its peers. This lets the wily
783	* post_hacker collect core dumps from everyone.
784	*/
785	SendStop = true;
786	break;
787
788	case `'t'`:
789	{
790	const char *tmp = get_stats_option_name(optarg);
791
792	if (tmp)
793	{
794	SetConfigOption(tmp, "true", PGC_POSTMASTER, PGC_S_ARGV);
795	}
796	else
797	{
798	write_stderr("%s: invalid argument for option -t: \"%s\"\n",
799	progname, optarg);
800	ExitPostmaster(`1`);
801	}
802	break;
803	}
804
805	case `'W'`:
806	SetConfigOption("post_auth_delay", optarg, PGC_POSTMASTER, PGC_S_ARGV);
807	break;
808
809	case `'c'`:
810	case `'-'`:
811	{
812	char *name,
813	*value;
814
815	ParseLongOption(optarg, &name, &value);
816	if (!value)
817	{
818	if (opt == `'-'`)
819	ereport(ERROR,
820	(errcode(ERRCODE_SYNTAX_ERROR),
821	errmsg("--%s requires a value",
822	optarg)));
823	else
824	ereport(ERROR,
825	(errcode(ERRCODE_SYNTAX_ERROR),
826	errmsg("-c %s requires a value",
827	optarg)));
828	}
829
830	SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV);
831	free(name);
832	if (value)
833	free(value);
834	break;
835	}
836
837	default:
838	write_stderr("Try \"%s --help\" for more information.\n",
839	progname);
840	ExitPostmaster(`1`);
841	}
842	}
843
844	/*
845	* Postmaster accepts no non-option switch arguments.
846	*/
847	if (optind < argc)
848	{
849	write_stderr("%s: invalid argument: \"%s\"\n",
850	progname, argv[optind]);
851	write_stderr("Try \"%s --help\" for more information.\n",
852	progname);
853	ExitPostmaster(`1`);
854	}
855
856	/*
857	* Locate the proper configuration files and data directory, and read
858	* postgresql.conf for the first time.
859	*/
860	if (!SelectConfigFiles(userDoption, progname))
861	ExitPostmaster(`2`);
862
863	if (output_config_variable != NULL)
864	{
865	/*
866	* "-C guc" was specified, so print GUC's value and exit. No extra
867	* permission check is needed because the user is reading inside the
868	* data dir.
869	*/
870	const char *config_val = GetConfigOption(output_config_variable,
871	false, false);
872
873	puts(config_val ? config_val : "");
874	ExitPostmaster(`0`);
875	}
876
877	/ Verify that DataDir looks reasonable /
878	checkDataDir();
879
880	/ Check that pg_control exists /
881	checkControlFile();
882
883	/ And switch working directory into it /
884	ChangeToDataDir();
885
886	/*
887	* Check for invalid combinations of GUC settings.
888	*/
889	if (ReservedBackends >= MaxConnections)
890	{
891	write_stderr("%s: superuser_reserved_connections (%d) must be less than max_connections (%d)\n",
892	progname,
893	ReservedBackends, MaxConnections);
894	ExitPostmaster(`1`);
895	}
896	if (XLogArchiveMode > ARCHIVE_MODE_OFF && wal_level == WAL_LEVEL_MINIMAL)
897	ereport(ERROR,
898	(errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"")));
899	if (max_wal_senders > `0` && wal_level == WAL_LEVEL_MINIMAL)
900	ereport(ERROR,
901	(errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"")));
902
903	/*
904	* Other one-time internal sanity checks can go here, if they are fast.
905	* (Put any slow processing further down, after postmaster.pid creation.)
906	*/
907	if (!CheckDateTokenTables())
908	{
909	write_stderr("%s: invalid datetoken tables, please fix\n", progname);
910	ExitPostmaster(`1`);
911	}
912
913	/*
914	* Now that we are done processing the postmaster arguments, reset
915	* getopt(3) library so that it will work correctly in subprocesses.
916	*/
917	optind = `1`;
918	#ifdef HAVE_INT_OPTRESET
919	optreset = `1`; / some systems need this too /
920	#endif
921
922	/ For debugging: display postmaster environment /
923	{
924	extern char **environ;
925	char **p;
926
927	ereport(DEBUG3,
928	(errmsg_internal("%s: PostmasterMain: initial environment dump:",
929	progname)));
930	ereport(DEBUG3,
931	(errmsg_internal("-----------------------------------------")));
932	for (p = environ; *p; ++p)
933	ereport(DEBUG3,
934	(errmsg_internal("\t%s", *p)));
935	ereport(DEBUG3,
936	(errmsg_internal("-----------------------------------------")));
937	}
938
939	/*
940	* Create lockfile for data directory.
941	*
942	* We want to do this before we try to grab the input sockets, because the
943	* data directory interlock is more reliable than the socket-file
944	* interlock (thanks to whoever decided to put socket files in /tmp :-().
945	* For the same reason, it's best to grab the TCP socket(s) before the
946	* Unix socket(s).
947	*
948	* Also note that this internally sets up the on_proc_exit function that
949	* is responsible for removing both data directory and socket lockfiles;
950	* so it must happen before opening sockets so that at exit, the socket
951	* lockfiles go away after CloseServerPorts runs.
952	*/
953	CreateDataDirLockFile(true);
954
955	/*
956	* Read the control file (for error checking and config info).
957	*
958	* Since we verify the control file's CRC, this has a useful side effect
959	* on machines where we need a run-time test for CRC support instructions.
960	* The postmaster will do the test once at startup, and then its child
961	* processes will inherit the correct function pointer and not need to
962	* repeat the test.
963	*/
964	LocalProcessControlFile(false);
965
966	/*
967	* Initialize SSL library, if specified.
968	*/
969	#ifdef USE_SSL
970	if (EnableSSL)
971	{
972	(void) secure_initialize(true);
973	LoadedSSL = true;
974	}
975	#endif
976
977	/*
978	* Register the apply launcher. Since it registers a background worker,
979	* it needs to be called before InitializeMaxBackends(), and it's probably
980	* a good idea to call it before any modules had chance to take the
981	* background worker slots.
982	*/
983	ApplyLauncherRegister();
984
985	/*
986	* process any libraries that should be preloaded at postmaster start
987	*/
988	process_shared_preload_libraries();
989
990	/*
991	* Now that loadable modules have had their chance to register background
992	* workers, calculate MaxBackends.
993	*/
994	InitializeMaxBackends();
995
996	/ Report server startup in log /
997	ereport(LOG,
998	(errmsg("starting %s", PG_VERSION_STR)));
999
1000	/*
1001	* Establish input sockets.
1002	*
1003	* First, mark them all closed, and set up an on_proc_exit function that's
1004	* charged with closing the sockets again at postmaster shutdown.
1005	*/
1006	for (i = `0`; i < MAXLISTEN; i++)
1007	ListenSocket[i] = PGINVALID_SOCKET;
1008
1009	on_proc_exit(CloseServerPorts, `0`);
1010
1011	if (ListenAddresses)
1012	{
1013	char *rawstring;
1014	List *elemlist;
1015	ListCell *l;
1016	int success = `0`;
1017
1018	/ Need a modifiable copy of ListenAddresses /
1019	rawstring = pstrdup(ListenAddresses);
1020
1021	/ Parse string into list of hostnames /
1022	if (!SplitIdentifierString(rawstring, `','`, &elemlist))
1023	{
1024	/ syntax error in list /
1025	ereport(FATAL,
1026	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1027	errmsg("invalid list syntax in parameter \"%s\"",
1028	"listen_addresses")));
1029	}
1030
1031	foreach(l, elemlist)
1032	{
1033	char curhost = (char* *) lfirst(l);
1034
1035	if (strcmp(curhost, "*") == `0`)
1036	status = StreamServerPort(AF_UNSPEC, NULL,
1037	(unsigned short) PostPortNumber,
1038	NULL,
1039	ListenSocket, MAXLISTEN);
1040	else
1041	status = StreamServerPort(AF_UNSPEC, curhost,
1042	(unsigned short) PostPortNumber,
1043	NULL,
1044	ListenSocket, MAXLISTEN);
1045
1046	if (status == STATUS_OK)
1047	{
1048	success++;
1049	/ record the first successful host addr in lockfile /
1050	if (!listen_addr_saved)
1051	{
1052	AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, curhost);
1053	listen_addr_saved = true;
1054	}
1055	}
1056	else
1057	ereport(WARNING,
1058	(errmsg("could not create listen socket for \"%s\"",
1059	curhost)));
1060	}
1061
1062	if (!success && elemlist != NIL)
1063	ereport(FATAL,
1064	(errmsg("could not create any TCP/IP sockets")));
1065
1066	list_free(elemlist);
1067	pfree(rawstring);
1068	}
1069
1070	#ifdef USE_BONJOUR
1071	/ Register for Bonjour only if we opened TCP socket(s) /
1072	if (enable_bonjour && ListenSocket[`0`] != PGINVALID_SOCKET)
1073	{
1074	DNSServiceErrorType err;
1075
1076	/*
1077	* We pass 0 for interface_index, which will result in registering on
1078	* all "applicable" interfaces. It's not entirely clear from the
1079	* DNS-SD docs whether this would be appropriate if we have bound to
1080	* just a subset of the available network interfaces.
1081	*/
1082	err = DNSServiceRegister(&bonjour_sdref,
1083	`0`,
1084	`0`,
1085	bonjour_name,
1086	"_postgresql._tcp.",
1087	NULL,
1088	NULL,
1089	pg_hton16(PostPortNumber),
1090	`0`,
1091	NULL,
1092	NULL,
1093	NULL);
1094	if (err != kDNSServiceErr_NoError)
1095	elog(LOG, "DNSServiceRegister() failed: error code %ld",
1096	(long) err);
1097
1098	/*
1099	* We don't bother to read the mDNS daemon's reply, and we expect that
1100	* it will automatically terminate our registration when the socket is
1101	* closed at postmaster termination. So there's nothing more to be
1102	* done here. However, the bonjour_sdref is kept around so that
1103	* forked children can close their copies of the socket.
1104	*/
1105	}
1106	#endif
1107
1108	#ifdef HAVE_UNIX_SOCKETS
1109	if (Unix_socket_directories)
1110	{
1111	char *rawstring;
1112	List *elemlist;
1113	ListCell *l;
1114	int success = `0`;
1115
1116	/ Need a modifiable copy of Unix_socket_directories /
1117	rawstring = pstrdup(Unix_socket_directories);
1118
1119	/ Parse string into list of directories /
1120	if (!SplitDirectoriesString(rawstring, `','`, &elemlist))
1121	{
1122	/ syntax error in list /
1123	ereport(FATAL,
1124	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1125	errmsg("invalid list syntax in parameter \"%s\"",
1126	"unix_socket_directories")));
1127	}
1128
1129	foreach(l, elemlist)
1130	{
1131	char socketdir = (char* *) lfirst(l);
1132
1133	status = StreamServerPort(AF_UNIX, NULL,
1134	(unsigned short) PostPortNumber,
1135	socketdir,
1136	ListenSocket, MAXLISTEN);
1137
1138	if (status == STATUS_OK)
1139	{
1140	success++;
1141	/ record the first successful Unix socket in lockfile /
1142	if (success == `1`)
1143	AddToDataDirLockFile(LOCK_FILE_LINE_SOCKET_DIR, socketdir);
1144	}
1145	else
1146	ereport(WARNING,
1147	(errmsg("could not create Unix-domain socket in directory \"%s\"",
1148	socketdir)));
1149	}
1150
1151	if (!success && elemlist != NIL)
1152	ereport(FATAL,
1153	(errmsg("could not create any Unix-domain sockets")));
1154
1155	list_free_deep(elemlist);
1156	pfree(rawstring);
1157	}
1158	#endif
1159
1160	/*
1161	* check that we have some socket to listen on
1162	*/
1163	if (ListenSocket[`0`] == PGINVALID_SOCKET)
1164	ereport(FATAL,
1165	(errmsg("no socket created for listening")));
1166
1167	/*
1168	* If no valid TCP ports, write an empty line for listen address,
1169	* indicating the Unix socket must be used. Note that this line is not
1170	* added to the lock file until there is a socket backing it.
1171	*/
1172	if (!listen_addr_saved)
1173	AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, "");
1174
1175	/*
1176	* Set up shared memory and semaphores.
1177	*/
1178	reset_shared(PostPortNumber);
1179
1180	/*
1181	* Estimate number of openable files. This must happen after setting up
1182	* semaphores, because on some platforms semaphores count as open files.
1183	*/
1184	set_max_safe_fds();
1185
1186	/*
1187	* Set reference point for stack-depth checking.
1188	*/
1189	set_stack_base();
1190
1191	/*
1192	* Initialize pipe (or process handle on Windows) that allows children to
1193	* wake up from sleep on postmaster death.
1194	*/
1195	InitPostmasterDeathWatchHandle();
1196
1197	#ifdef WIN32
1198
1199	/*
1200	* Initialize I/O completion port used to deliver list of dead children.
1201	*/
1202	win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, `0`, `1`);
1203	if (win32ChildQueue == NULL)
1204	ereport(FATAL,
1205	(errmsg("could not create I/O completion port for child queue")));
1206	#endif
1207
1208	/*
1209	* Record postmaster options. We delay this till now to avoid recording
1210	* bogus options (eg, NBuffers too high for available memory).
1211	*/
1212	if (!CreateOptsFile(argc, argv, my_exec_path))
1213	ExitPostmaster(`1`);
1214
1215	#ifdef EXEC_BACKEND
1216	/ Write out nondefault GUC settings for child processes to use /
1217	write_nondefault_variables(PGC_POSTMASTER);
1218	#endif
1219
1220	/*
1221	* Write the external PID file if requested
1222	*/
1223	if (external_pid_file)
1224	{
1225	FILE *fpidfile = fopen(external_pid_file, "w");
1226
1227	if (fpidfile)
1228	{
1229	fprintf(fpidfile, "%d\n", MyProcPid);
1230	fclose(fpidfile);
1231
1232	/ Make PID file world readable /
1233	if (chmod(external_pid_file, S_IRUSR \| S_IWUSR \| S_IRGRP \| S_IROTH) != `0`)
1234	write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n",
1235	progname, external_pid_file, strerror(errno));
1236	}
1237	else
1238	write_stderr("%s: could not write external PID file \"%s\": %s\n",
1239	progname, external_pid_file, strerror(errno));
1240
1241	on_proc_exit(unlink_external_pid_file, `0`);
1242	}
1243
1244	/*
1245	* Remove old temporary files. At this point there can be no other
1246	* Postgres processes running in this directory, so this should be safe.
1247	*/
1248	RemovePgTempFiles();
1249
1250	/*
1251	* Forcibly remove the files signaling a standby promotion request.
1252	* Otherwise, the existence of those files triggers a promotion too early,
1253	* whether a user wants that or not.
1254	*
1255	* This removal of files is usually unnecessary because they can exist
1256	* only during a few moments during a standby promotion. However there is
1257	* a race condition: if pg_ctl promote is executed and creates the files
1258	* during a promotion, the files can stay around even after the server is
1259	* brought up to new master. Then, if new standby starts by using the
1260	* backup taken from that master, the files can exist at the server
1261	* startup and should be removed in order to avoid an unexpected
1262	* promotion.
1263	*
1264	* Note that promotion signal files need to be removed before the startup
1265	* process is invoked. Because, after that, they can be used by
1266	* postmaster's SIGUSR1 signal handler.
1267	*/
1268	RemovePromoteSignalFiles();
1269
1270	/ Do the same for logrotate signal file /
1271	RemoveLogrotateSignalFiles();
1272
1273	/ Remove any outdated file holding the current log filenames. /
1274	if (unlink(LOG_METAINFO_DATAFILE) < `0` && errno != ENOENT)
1275	ereport(LOG,
1276	(errcode_for_file_access(),
1277	errmsg("could not remove file \"%s\": %m",
1278	LOG_METAINFO_DATAFILE)));
1279
1280	/*
1281	* If enabled, start up syslogger collection subprocess
1282	*/
1283	SysLoggerPID = SysLogger_Start();
1284
1285	/*
1286	* Reset whereToSendOutput from DestDebug (its starting state) to
1287	* DestNone. This stops ereport from sending log messages to stderr unless
1288	* Log_destination permits. We don't do this until the postmaster is
1289	* fully launched, since startup failures may as well be reported to
1290	* stderr.
1291	*
1292	* If we are in fact disabling logging to stderr, first emit a log message
1293	* saying so, to provide a breadcrumb trail for users who may not remember
1294	* that their logging is configured to go somewhere else.
1295	*/
1296	if (!(Log_destination & LOG_DESTINATION_STDERR))
1297	ereport(LOG,
1298	(errmsg("ending log output to stderr"),
1299	errhint("Future log output will go to log destination \"%s\".",
1300	Log_destination_string)));
1301
1302	whereToSendOutput = DestNone;
1303
1304	/*
1305	* Initialize stats collection subsystem (this does NOT start the
1306	* collector process!)
1307	*/
1308	pgstat_init();
1309
1310	/*
1311	* Initialize the autovacuum subsystem (again, no process start yet)
1312	*/
1313	autovac_init();
1314
1315	/*
1316	* Load configuration files for client authentication.
1317	*/
1318	if (!load_hba())
1319	{
1320	/*
1321	* It makes no sense to continue if we fail to load the HBA file,
1322	* since there is no way to connect to the database in this case.
1323	*/
1324	ereport(FATAL,
1325	(errmsg("could not load pg_hba.conf")));
1326	}
1327	if (!load_ident())
1328	{
1329	/*
1330	* We can start up without the IDENT file, although it means that you
1331	* cannot log in using any of the authentication methods that need a
1332	* user name mapping. load_ident() already logged the details of error
1333	* to the log.
1334	*/
1335	}
1336
1337	#ifdef HAVE_PTHREAD_IS_THREADED_NP
1338
1339	/*
1340	* On macOS, libintl replaces setlocale() with a version that calls
1341	* CFLocaleCopyCurrent() when its second argument is "" and every relevant
1342	* environment variable is unset or empty. CFLocaleCopyCurrent() makes
1343	* the process multithreaded. The postmaster calls sigprocmask() and
1344	* calls fork() without an immediate exec(), both of which have undefined
1345	* behavior in a multithreaded program. A multithreaded postmaster is the
1346	* normal case on Windows, which offers neither fork() nor sigprocmask().
1347	*/
1348	if (pthread_is_threaded_np() != `0`)
1349	ereport(FATAL,
1350	(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1351	errmsg("postmaster became multithreaded during startup"),
1352	errhint("Set the LC_ALL environment variable to a valid locale.")));
1353	#endif
1354
1355	/*
1356	* Remember postmaster startup time
1357	*/
1358	PgStartTime = GetCurrentTimestamp();
1359
1360	/*
1361	* Report postmaster status in the postmaster.pid file, to allow pg_ctl to
1362	* see what's happening.
1363	*/
1364	AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STARTING);
1365
1366	/*
1367	* We're ready to rock and roll...
1368	*/
1369	StartupPID = StartupDataBase();
1370	Assert(StartupPID != `0`);
1371	StartupStatus = STARTUP_RUNNING;
1372	pmState = PM_STARTUP;
1373
1374	/ Some workers may be scheduled to start now /
1375	maybe_start_bgworkers();
1376
1377	status = ServerLoop();
1378
1379	/*
1380	* ServerLoop probably shouldn't ever return, but if it does, close down.
1381	*/
1382	ExitPostmaster(status != STATUS_OK);
1383
1384	abort(); / not reached /
1385	}
1386
1387
1388	/*
1389	* on_proc_exit callback to close server's listen sockets
1390	*/
1391	static void
1392	CloseServerPorts(int status, Datum arg)
1393	{
1394	int i;
1395
1396	/*
1397	* First, explicitly close all the socket FDs. We used to just let this
1398	* happen implicitly at postmaster exit, but it's better to close them
1399	* before we remove the postmaster.pid lockfile; otherwise there's a race
1400	* condition if a new postmaster wants to re-use the TCP port number.
1401	*/
1402	for (i = `0`; i < MAXLISTEN; i++)
1403	{
1404	if (ListenSocket[i] != PGINVALID_SOCKET)
1405	{
1406	StreamClose(ListenSocket[i]);
1407	ListenSocket[i] = PGINVALID_SOCKET;
1408	}
1409	}
1410
1411	/*
1412	* Next, remove any filesystem entries for Unix sockets. To avoid race
1413	* conditions against incoming postmasters, this must happen after closing
1414	* the sockets and before removing lock files.
1415	*/
1416	RemoveSocketFiles();
1417
1418	/*
1419	* We don't do anything about socket lock files here; those will be
1420	* removed in a later on_proc_exit callback.
1421	*/
1422	}
1423
1424	/*
1425	* on_proc_exit callback to delete external_pid_file
1426	*/
1427	static void
1428	unlink_external_pid_file(int status, Datum arg)
1429	{
1430	if (external_pid_file)
1431	unlink(external_pid_file);
1432	}
1433
1434
1435	/*
1436	* Compute and check the directory paths to files that are part of the
1437	* installation (as deduced from the postgres executable's own location)
1438	*/
1439	static void
1440	getInstallationPaths(const char *argv0)
1441	{
1442	DIR *pdir;
1443
1444	/ Locate the postgres executable itself /
1445	if (find_my_exec(argv0, my_exec_path) < `0`)
1446	elog(FATAL, "%s: could not locate my own executable path", argv0);
1447
1448	#ifdef EXEC_BACKEND
1449	/ Locate executable backend before we change working directory /
1450	if (find_other_exec(argv0, "postgres", PG_BACKEND_VERSIONSTR,
1451	postgres_exec_path) < `0`)
1452	ereport(FATAL,
1453	(errmsg("%s: could not locate matching postgres executable",
1454	argv0)));
1455	#endif
1456
1457	/*
1458	* Locate the pkglib directory --- this has to be set early in case we try
1459	* to load any modules from it in response to postgresql.conf entries.
1460	*/
1461	get_pkglib_path(my_exec_path, pkglib_path);
1462
1463	/*
1464	* Verify that there's a readable directory there; otherwise the Postgres
1465	* installation is incomplete or corrupt. (A typical cause of this
1466	* failure is that the postgres executable has been moved or hardlinked to
1467	* some directory that's not a sibling of the installation lib/
1468	* directory.)
1469	*/
1470	pdir = AllocateDir(pkglib_path);
1471	if (pdir == NULL)
1472	ereport(ERROR,
1473	(errcode_for_file_access(),
1474	errmsg("could not open directory \"%s\": %m",
1475	pkglib_path),
1476	errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location.",
1477	my_exec_path)));
1478	FreeDir(pdir);
1479
1480	/*
1481	* XXX is it worth similarly checking the share/ directory? If the lib/
1482	* directory is there, then share/ probably is too.
1483	*/
1484	}
1485
1486	/*
1487	* Check that pg_control exists in the correct location in the data directory.
1488	*
1489	* No attempt is made to validate the contents of pg_control here. This is
1490	* just a sanity check to see if we are looking at a real data directory.
1491	*/
1492	static void
1493	checkControlFile(void)
1494	{
1495	char path[MAXPGPATH];
1496	FILE *fp;
1497
1498	snprintf(path, sizeof(path), "%s/global/pg_control", DataDir);
1499
1500	fp = AllocateFile(path, PG_BINARY_R);
1501	if (fp == NULL)
1502	{
1503	write_stderr("%s: could not find the database system\n"
1504	"Expected to find it in the directory \"%s\",\n"
1505	"but could not open file \"%s\": %s\n",
1506	progname, DataDir, path, strerror(errno));
1507	ExitPostmaster(`2`);
1508	}
1509	FreeFile(fp);
1510	}
1511
1512	/*
1513	* Determine how long should we let ServerLoop sleep.
1514	*
1515	* In normal conditions we wait at most one minute, to ensure that the other
1516	* background tasks handled by ServerLoop get done even when no requests are
1517	* arriving. However, if there are background workers waiting to be started,
1518	* we don't actually sleep so that they are quickly serviced. Other exception
1519	* cases are as shown in the code.
1520	*/
1521	static void
1522	DetermineSleepTime(struct timeval *timeout)
1523	{
1524	TimestampTz next_wakeup = `0`;
1525
1526	/*
1527	* Normal case: either there are no background workers at all, or we're in
1528	* a shutdown sequence (during which we ignore bgworkers altogether).
1529	*/
1530	if (Shutdown > NoShutdown \|\|
1531	(!StartWorkerNeeded && !HaveCrashedWorker))
1532	{
1533	if (AbortStartTime != `0`)
1534	{
1535	/ time left to abort; clamp to 0 in case it already expired /
1536	timeout->tv_sec = SIGKILL_CHILDREN_AFTER_SECS -
1537	(time(NULL) - AbortStartTime);
1538	timeout->tv_sec = Max(timeout->tv_sec, `0`);
1539	timeout->tv_usec = `0`;
1540	}
1541	else
1542	{
1543	timeout->tv_sec = `60`;
1544	timeout->tv_usec = `0`;
1545	}
1546	return;
1547	}
1548
1549	if (StartWorkerNeeded)
1550	{
1551	timeout->tv_sec = `0`;
1552	timeout->tv_usec = `0`;
1553	return;
1554	}
1555
1556	if (HaveCrashedWorker)
1557	{
1558	slist_mutable_iter siter;
1559
1560	/*
1561	* When there are crashed bgworkers, we sleep just long enough that
1562	* they are restarted when they request to be. Scan the list to
1563	* determine the minimum of all wakeup times according to most recent
1564	* crash time and requested restart interval.
1565	*/
1566	slist_foreach_modify(siter, &BackgroundWorkerList)
1567	{
1568	RegisteredBgWorker *rw;
1569	TimestampTz this_wakeup;
1570
1571	rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
1572
1573	if (rw->rw_crashed_at == `0`)
1574	continue;
1575
1576	if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART
1577	\|\| rw->rw_terminate)
1578	{
1579	ForgetBackgroundWorker(&siter);
1580	continue;
1581	}
1582
1583	this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at,
1584	`1000L` * rw->rw_worker.bgw_restart_time);
1585	if (next_wakeup == `0` \|\| this_wakeup < next_wakeup)
1586	next_wakeup = this_wakeup;
1587	}
1588	}
1589
1590	if (next_wakeup != `0`)
1591	{
1592	long secs;
1593	int microsecs;
1594
1595	TimestampDifference(GetCurrentTimestamp(), next_wakeup,
1596	&secs, &microsecs);
1597	timeout->tv_sec = secs;
1598	timeout->tv_usec = microsecs;
1599
1600	/ Ensure we don't exceed one minute /
1601	if (timeout->tv_sec > `60`)
1602	{
1603	timeout->tv_sec = `60`;
1604	timeout->tv_usec = `0`;
1605	}
1606	}
1607	else
1608	{
1609	timeout->tv_sec = `60`;
1610	timeout->tv_usec = `0`;
1611	}
1612	}
1613
1614	/*
1615	* Main idle loop of postmaster
1616	*
1617	* NB: Needs to be called with signals blocked
1618	*/
1619	static int
1620	ServerLoop(void)
1621	{
1622	fd_set readmask;
1623	int nSockets;
1624	time_t last_lockfile_recheck_time,
1625	last_touch_time;
1626
1627	last_lockfile_recheck_time = last_touch_time = time(NULL);
1628
1629	nSockets = initMasks(&readmask);
1630
1631	for (;;)
1632	{
1633	fd_set rmask;
1634	int selres;
1635	time_t now;
1636
1637	/*
1638	* Wait for a connection request to arrive.
1639	*
1640	* We block all signals except while sleeping. That makes it safe for
1641	* signal handlers, which again block all signals while executing, to
1642	* do nontrivial work.
1643	*
1644	* If we are in PM_WAIT_DEAD_END state, then we don't want to accept
1645	* any new connections, so we don't call select(), and just sleep.
1646	*/
1647	memcpy((char ) &rmask, (char* ) &readmask, sizeof*(fd_set));
1648
1649	if (pmState == PM_WAIT_DEAD_END)
1650	{
1651	PG_SETMASK(&UnBlockSig);
1652
1653	pg_usleep(`100000L`); / 100 msec seems reasonable /
1654	selres = `0`;
1655
1656	PG_SETMASK(&BlockSig);
1657	}
1658	else
1659	{
1660	/ must set timeout each time; some OSes change it! /
1661	struct timeval timeout;
1662
1663	/ Needs to run with blocked signals! /
1664	DetermineSleepTime(&timeout);
1665
1666	PG_SETMASK(&UnBlockSig);
1667
1668	selres = select(nSockets, &rmask, NULL, NULL, &timeout);
1669
1670	PG_SETMASK(&BlockSig);
1671	}
1672
1673	/ Now check the select() result /
1674	if (selres < `0`)
1675	{
1676	if (errno != EINTR && errno != EWOULDBLOCK)
1677	{
1678	ereport(LOG,
1679	(errcode_for_socket_access(),
1680	errmsg("select() failed in postmaster: %m")));
1681	return STATUS_ERROR;
1682	}
1683	}
1684
1685	/*
1686	* New connection pending on any of our sockets? If so, fork a child
1687	* process to deal with it.
1688	*/
1689	if (selres > `0`)
1690	{
1691	int i;
1692
1693	for (i = `0`; i < MAXLISTEN; i++)
1694	{
1695	if (ListenSocket[i] == PGINVALID_SOCKET)
1696	break;
1697	if (FD_ISSET(ListenSocket[i], &rmask))
1698	{
1699	Port *port;
1700
1701	port = ConnCreate(ListenSocket[i]);
1702	if (port)
1703	{
1704	BackendStartup(port);
1705
1706	/*
1707	* We no longer need the open socket or port structure
1708	* in this process
1709	*/
1710	StreamClose(port->sock);
1711	ConnFree(port);
1712	}
1713	}
1714	}
1715	}
1716
1717	/ If we have lost the log collector, try to start a new one /
1718	if (SysLoggerPID == `0` && Logging_collector)
1719	SysLoggerPID = SysLogger_Start();
1720
1721	/*
1722	* If no background writer process is running, and we are not in a
1723	* state that prevents it, start one. It doesn't matter if this
1724	* fails, we'll just try again later. Likewise for the checkpointer.
1725	*/
1726	if (pmState == PM_RUN \|\| pmState == PM_RECOVERY \|\|
1727	pmState == PM_HOT_STANDBY)
1728	{
1729	if (CheckpointerPID == `0`)
1730	CheckpointerPID = StartCheckpointer();
1731	if (BgWriterPID == `0`)
1732	BgWriterPID = StartBackgroundWriter();
1733	}
1734
1735	/*
1736	* Likewise, if we have lost the walwriter process, try to start a new
1737	* one. But this is needed only in normal operation (else we cannot
1738	* be writing any new WAL).
1739	*/
1740	if (WalWriterPID == `0` && pmState == PM_RUN)
1741	WalWriterPID = StartWalWriter();
1742
1743	/*
1744	* If we have lost the autovacuum launcher, try to start a new one. We
1745	* don't want autovacuum to run in binary upgrade mode because
1746	* autovacuum might update relfrozenxid for empty tables before the
1747	* physical files are put in place.
1748	*/
1749	if (!IsBinaryUpgrade && AutoVacPID == `0` &&
1750	(AutoVacuumingActive() \|\| start_autovac_launcher) &&
1751	pmState == PM_RUN)
1752	{
1753	AutoVacPID = StartAutoVacLauncher();
1754	if (AutoVacPID != `0`)
1755	start_autovac_launcher = false; / signal processed /
1756	}
1757
1758	/ If we have lost the stats collector, try to start a new one /
1759	if (PgStatPID == `0` &&
1760	(pmState == PM_RUN \|\| pmState == PM_HOT_STANDBY))
1761	PgStatPID = pgstat_start();
1762
1763	/ If we have lost the archiver, try to start a new one. /
1764	if (PgArchPID == `0` && PgArchStartupAllowed())
1765	PgArchPID = pgarch_start();
1766
1767	/ If we need to signal the autovacuum launcher, do so now /
1768	if (avlauncher_needs_signal)
1769	{
1770	avlauncher_needs_signal = false;
1771	if (AutoVacPID != `0`)
1772	kill(AutoVacPID, SIGUSR2);
1773	}
1774
1775	/ If we need to start a WAL receiver, try to do that now /
1776	if (WalReceiverRequested)
1777	MaybeStartWalReceiver();
1778
1779	/ Get other worker processes running, if needed /
1780	if (StartWorkerNeeded \|\| HaveCrashedWorker)
1781	maybe_start_bgworkers();
1782
1783	#ifdef HAVE_PTHREAD_IS_THREADED_NP
1784
1785	/*
1786	* With assertions enabled, check regularly for appearance of
1787	* additional threads. All builds check at start and exit.
1788	*/
1789	Assert(pthread_is_threaded_np() == `0`);
1790	#endif
1791
1792	/*
1793	* Lastly, check to see if it's time to do some things that we don't
1794	* want to do every single time through the loop, because they're a
1795	* bit expensive. Note that there's up to a minute of slop in when
1796	* these tasks will be performed, since DetermineSleepTime() will let
1797	* us sleep at most that long; except for SIGKILL timeout which has
1798	* special-case logic there.
1799	*/
1800	now = time(NULL);
1801
1802	/*
1803	* If we already sent SIGQUIT to children and they are slow to shut
1804	* down, it's time to send them SIGKILL. This doesn't happen
1805	* normally, but under certain conditions backends can get stuck while
1806	* shutting down. This is a last measure to get them unwedged.
1807	*
1808	* Note we also do this during recovery from a process crash.
1809	*/
1810	if ((Shutdown >= ImmediateShutdown \|\| (FatalError && !SendStop)) &&
1811	AbortStartTime != `0` &&
1812	(now - AbortStartTime) >= SIGKILL_CHILDREN_AFTER_SECS)
1813	{
1814	/ We were gentle with them before. Not anymore /
1815	TerminateChildren(SIGKILL);
1816	/ reset flag so we don't SIGKILL again /
1817	AbortStartTime = `0`;
1818	}
1819
1820	/*
1821	* Once a minute, verify that postmaster.pid hasn't been removed or
1822	* overwritten. If it has, we force a shutdown. This avoids having
1823	* postmasters and child processes hanging around after their database
1824	* is gone, and maybe causing problems if a new database cluster is
1825	* created in the same place. It also provides some protection
1826	* against a DBA foolishly removing postmaster.pid and manually
1827	* starting a new postmaster. Data corruption is likely to ensue from
1828	* that anyway, but we can minimize the damage by aborting ASAP.
1829	*/
1830	if (now - last_lockfile_recheck_time >= `1` * SECS_PER_MINUTE)
1831	{
1832	if (!RecheckDataDirLockFile())
1833	{
1834	ereport(LOG,
1835	(errmsg("performing immediate shutdown because data directory lock file is invalid")));
1836	kill(MyProcPid, SIGQUIT);
1837	}
1838	last_lockfile_recheck_time = now;
1839	}
1840
1841	/*
1842	* Touch Unix socket and lock files every 58 minutes, to ensure that
1843	* they are not removed by overzealous /tmp-cleaning tasks. We assume
1844	* no one runs cleaners with cutoff times of less than an hour ...
1845	*/
1846	if (now - last_touch_time >= `58` * SECS_PER_MINUTE)
1847	{
1848	TouchSocketFiles();
1849	TouchSocketLockFiles();
1850	last_touch_time = now;
1851	}
1852	}
1853	}
1854
1855	/*
1856	* Initialise the masks for select() for the ports we are listening on.
1857	* Return the number of sockets to listen on.
1858	*/
1859	static int
1860	initMasks(fd_set *rmask)
1861	{
1862	int maxsock = -`1`;
1863	int i;
1864
1865	FD_ZERO(rmask);
1866
1867	for (i = `0`; i < MAXLISTEN; i++)
1868	{
1869	int fd = ListenSocket[i];
1870
1871	if (fd == PGINVALID_SOCKET)
1872	break;
1873	FD_SET(fd, rmask);
1874
1875	if (fd > maxsock)
1876	maxsock = fd;
1877	}
1878
1879	return maxsock + `1`;
1880	}
1881
1882
1883	/*
1884	* Read a client's startup packet and do something according to it.
1885	*
1886	* Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
1887	* not return at all.
1888	*
1889	* (Note that ereport(FATAL) stuff is sent to the client, so only use it
1890	* if that's what you want. Return STATUS_ERROR if you don't want to
1891	* send anything to the client, which would typically be appropriate
1892	* if we detect a communications failure.)
1893	*
1894	* Set secure_done when negotiation of an encrypted layer (currently, TLS or
1895	* GSSAPI) is already completed.
1896	*/
1897	static int
1898	ProcessStartupPacket(Port *port, bool secure_done)
1899	{
1900	int32 len;
1901	void *buf;
1902	ProtocolVersion proto;
1903	MemoryContext oldcontext;
1904
1905	pq_startmsgread();
1906
1907	/*
1908	* Grab the first byte of the length word separately, so that we can tell
1909	* whether we have no data at all or an incomplete packet. (This might
1910	* sound inefficient, but it's not really, because of buffering in
1911	* pqcomm.c.)
1912	*/
1913	if (pq_getbytes((char *) &len, `1`) == EOF)
1914	{
1915	/*
1916	* If we get no data at all, don't clutter the log with a complaint;
1917	* such cases often occur for legitimate reasons. An example is that
1918	* we might be here after responding to NEGOTIATE_SSL_CODE, and if the
1919	* client didn't like our response, it'll probably just drop the
1920	* connection. Service-monitoring software also often just opens and
1921	* closes a connection without sending anything. (So do port
1922	* scanners, which may be less benign, but it's not really our job to
1923	* notice those.)
1924	*/
1925	return STATUS_ERROR;
1926	}
1927
1928	if (pq_getbytes(((char *) &len) + `1`, `3`) == EOF)
1929	{
1930	/ Got a partial length word, so bleat about that /
1931	if (!secure_done)
1932	ereport(COMMERROR,
1933	(errcode(ERRCODE_PROTOCOL_VIOLATION),
1934	errmsg("incomplete startup packet")));
1935	return STATUS_ERROR;
1936	}
1937
1938	len = pg_ntoh32(len);
1939	len -= `4`;
1940
1941	if (len < (int32) sizeof(ProtocolVersion) \|\|
1942	len > MAX_STARTUP_PACKET_LENGTH)
1943	{
1944	ereport(COMMERROR,
1945	(errcode(ERRCODE_PROTOCOL_VIOLATION),
1946	errmsg("invalid length of startup packet")));
1947	return STATUS_ERROR;
1948	}
1949
1950	/*
1951	* Allocate at least the size of an old-style startup packet, plus one
1952	* extra byte, and make sure all are zeroes. This ensures we will have
1953	* null termination of all strings, in both fixed- and variable-length
1954	* packet layouts.
1955	*/
1956	if (len <= (int32) sizeof(StartupPacket))
1957	buf = palloc0(sizeof(StartupPacket) + `1`);
1958	else
1959	buf = palloc0(len + `1`);
1960
1961	if (pq_getbytes(buf, len) == EOF)
1962	{
1963	ereport(COMMERROR,
1964	(errcode(ERRCODE_PROTOCOL_VIOLATION),
1965	errmsg("incomplete startup packet")));
1966	return STATUS_ERROR;
1967	}
1968	pq_endmsgread();
1969
1970	/*
1971	* The first field is either a protocol version number or a special
1972	* request code.
1973	*/
1974	port->proto = proto = pg_ntoh32(((ProtocolVersion ) buf));
1975
1976	if (proto == CANCEL_REQUEST_CODE)
1977	{
1978	processCancelRequest(port, buf);
1979	/ Not really an error, but we don't want to proceed further /
1980	return STATUS_ERROR;
1981	}
1982
1983	if (proto == NEGOTIATE_SSL_CODE && !secure_done)
1984	{
1985	char SSLok;
1986
1987	#ifdef USE_SSL
1988	/ No SSL when disabled or on Unix sockets /
1989	if (!LoadedSSL \|\| IS_AF_UNIX(port->laddr.addr.ss_family))
1990	SSLok = `'N'`;
1991	else
1992	SSLok = `'S'`; / Support for SSL /
1993	#else
1994	SSLok = `'N'`; / No support for SSL /
1995	#endif
1996
1997	retry1:
1998	if (send(port->sock, &SSLok, `1`, `0`) != `1`)
1999	{
2000	if (errno == EINTR)
2001	goto retry1; / if interrupted, just retry /
2002	ereport(COMMERROR,
2003	(errcode_for_socket_access(),
2004	errmsg("failed to send SSL negotiation response: %m")));
2005	return STATUS_ERROR; / close the connection /
2006	}
2007
2008	#ifdef USE_SSL
2009	if (SSLok == `'S'` && secure_open_server(port) == -`1`)
2010	return STATUS_ERROR;
2011	#endif
2012	/ regular startup packet, cancel, etc packet should follow... /
2013	/ but not another SSL negotiation request /
2014	return ProcessStartupPacket(port, true);
2015	}
2016	else if (proto == NEGOTIATE_GSS_CODE && !secure_done)
2017	{
2018	char GSSok = `'N'`;
2019	#ifdef ENABLE_GSS
2020	/ No GSSAPI encryption when on Unix socket /
2021	if (!IS_AF_UNIX(port->laddr.addr.ss_family))
2022	GSSok = `'G'`;
2023	#endif
2024
2025	while (send(port->sock, &GSSok, `1`, `0`) != `1`)
2026	{
2027	if (errno == EINTR)
2028	continue;
2029	ereport(COMMERROR,
2030	(errcode_for_socket_access(),
2031	errmsg("failed to send GSSAPI negotiation response: %m")));
2032	return STATUS_ERROR; / close the connection /
2033	}
2034
2035	#ifdef ENABLE_GSS
2036	if (GSSok == `'G'` && secure_open_gssapi(port) == -`1`)
2037	return STATUS_ERROR;
2038	#endif
2039	/ Won't ever see more than one negotiation request /
2040	return ProcessStartupPacket(port, true);
2041	}
2042
2043	/ Could add additional special packet types here /
2044
2045	/*
2046	* Set FrontendProtocol now so that ereport() knows what format to send if
2047	* we fail during startup.
2048	*/
2049	FrontendProtocol = proto;
2050
2051	/ Check that the major protocol version is in range. /
2052	if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) \|\|
2053	PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST))
2054	ereport(FATAL,
2055	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2056	errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u",
2057	PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto),
2058	PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST),
2059	PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST),
2060	PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST))));
2061
2062	/*
2063	* Now fetch parameters out of startup packet and save them into the Port
2064	* structure. All data structures attached to the Port struct must be
2065	* allocated in TopMemoryContext so that they will remain available in a
2066	* running backend (even after PostmasterContext is destroyed). We need
2067	* not worry about leaking this storage on failure, since we aren't in the
2068	* postmaster process anymore.
2069	*/
2070	oldcontext = MemoryContextSwitchTo(TopMemoryContext);
2071
2072	if (PG_PROTOCOL_MAJOR(proto) >= `3`)
2073	{
2074	int32 offset = sizeof(ProtocolVersion);
2075	List *unrecognized_protocol_options = NIL;
2076
2077	/*
2078	* Scan packet body for name/option pairs. We can assume any string
2079	* beginning within the packet body is null-terminated, thanks to
2080	* zeroing extra byte above.
2081	*/
2082	port->guc_options = NIL;
2083
2084	while (offset < len)
2085	{
2086	char nameptr = ((char* *) buf) + offset;
2087	int32 valoffset;
2088	char *valptr;
2089
2090	if (*nameptr == `'\0'`)
2091	break; / found packet terminator /
2092	valoffset = offset + strlen(nameptr) + `1`;
2093	if (valoffset >= len)
2094	break; / missing value, will complain below /
2095	valptr = ((char *) buf) + valoffset;
2096
2097	if (strcmp(nameptr, "database") == `0`)
2098	port->database_name = pstrdup(valptr);
2099	else if (strcmp(nameptr, "user") == `0`)
2100	port->user_name = pstrdup(valptr);
2101	else if (strcmp(nameptr, "options") == `0`)
2102	port->cmdline_options = pstrdup(valptr);
2103	else if (strcmp(nameptr, "replication") == `0`)
2104	{
2105	/*
2106	* Due to backward compatibility concerns the replication
2107	* parameter is a hybrid beast which allows the value to be
2108	* either boolean or the string 'database'. The latter
2109	* connects to a specific database which is e.g. required for
2110	* logical decoding while.
2111	*/
2112	if (strcmp(valptr, "database") == `0`)
2113	{
2114	am_walsender = true;
2115	am_db_walsender = true;
2116	}
2117	else if (!parse_bool(valptr, &am_walsender))
2118	ereport(FATAL,
2119	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2120	errmsg("invalid value for parameter \"%s\": \"%s\"",
2121	"replication",
2122	valptr),
2123	errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\".")));
2124	}
2125	else if (strncmp(nameptr, "_pq_.", `5`) == `0`)
2126	{
2127	/*
2128	* Any option beginning with _pq_. is reserved for use as a
2129	* protocol-level option, but at present no such options are
2130	* defined.
2131	*/
2132	unrecognized_protocol_options =
2133	lappend(unrecognized_protocol_options, pstrdup(nameptr));
2134	}
2135	else
2136	{
2137	/ Assume it's a generic GUC option /
2138	port->guc_options = lappend(port->guc_options,
2139	pstrdup(nameptr));
2140	port->guc_options = lappend(port->guc_options,
2141	pstrdup(valptr));
2142
2143	/*
2144	* Copy application_name to port if we come across it. This
2145	* is done so we can log the application_name in the
2146	* connection authorization message. Note that the GUC would
2147	* be used but we haven't gone through GUC setup yet.
2148	*/
2149	if (strcmp(nameptr, "application_name") == `0`)
2150	{
2151	char *tmp_app_name = pstrdup(valptr);
2152
2153	pg_clean_ascii(tmp_app_name);
2154
2155	port->application_name = tmp_app_name;
2156	}
2157	}
2158	offset = valoffset + strlen(valptr) + `1`;
2159	}
2160
2161	/*
2162	* If we didn't find a packet terminator exactly at the end of the
2163	* given packet length, complain.
2164	*/
2165	if (offset != len - `1`)
2166	ereport(FATAL,
2167	(errcode(ERRCODE_PROTOCOL_VIOLATION),
2168	errmsg("invalid startup packet layout: expected terminator as last byte")));
2169
2170	/*
2171	* If the client requested a newer protocol version or if the client
2172	* requested any protocol options we didn't recognize, let them know
2173	* the newest minor protocol version we do support and the names of
2174	* any unrecognized options.
2175	*/
2176	if (PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST) \|\|
2177	unrecognized_protocol_options != NIL)
2178	SendNegotiateProtocolVersion(unrecognized_protocol_options);
2179	}
2180	else
2181	{
2182	/*
2183	* Get the parameters from the old-style, fixed-width-fields startup
2184	* packet as C strings. The packet destination was cleared first so a
2185	* short packet has zeros silently added. We have to be prepared to
2186	* truncate the pstrdup result for oversize fields, though.
2187	*/
2188	StartupPacket packet = (StartupPacket ) buf;
2189
2190	port->database_name = pstrdup(packet->database);
2191	if (strlen(port->database_name) > sizeof(packet->database))
2192	port->database_name[sizeof(packet->database)] = `'\0'`;
2193	port->user_name = pstrdup(packet->user);
2194	if (strlen(port->user_name) > sizeof(packet->user))
2195	port->user_name[sizeof(packet->user)] = `'\0'`;
2196	port->cmdline_options = pstrdup(packet->options);
2197	if (strlen(port->cmdline_options) > sizeof(packet->options))
2198	port->cmdline_options[sizeof(packet->options)] = `'\0'`;
2199	port->guc_options = NIL;
2200	}
2201
2202	/ Check a user name was given. /
2203	if (port->user_name == NULL \|\| port->user_name[`0`] == `'\0'`)
2204	ereport(FATAL,
2205	(errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION),
2206	errmsg("no PostgreSQL user name specified in startup packet")));
2207
2208	/ The database defaults to the user name. /
2209	if (port->database_name == NULL \|\| port->database_name[`0`] == `'\0'`)
2210	port->database_name = pstrdup(port->user_name);
2211
2212	if (Db_user_namespace)
2213	{
2214	/*
2215	* If user@, it is a global user, remove '@'. We only want to do this
2216	* if there is an '@' at the end and no earlier in the user string or
2217	* they may fake as a local user of another database attaching to this
2218	* database.
2219	*/
2220	if (strchr(port->user_name, `'@'`) ==
2221	port->user_name + strlen(port->user_name) - `1`)
2222	*strchr(port->user_name, `'@'`) = `'\0'`;
2223	else
2224	{
2225	/ Append '@' and dbname /
2226	port->user_name = psprintf("%s@%s", port->user_name, port->database_name);
2227	}
2228	}
2229
2230	/*
2231	* Truncate given database and user names to length of a Postgres name.
2232	* This avoids lookup failures when overlength names are given.
2233	*/
2234	if (strlen(port->database_name) >= NAMEDATALEN)
2235	port->database_name[NAMEDATALEN - `1`] = `'\0'`;
2236	if (strlen(port->user_name) >= NAMEDATALEN)
2237	port->user_name[NAMEDATALEN - `1`] = `'\0'`;
2238
2239	/*
2240	* Normal walsender backends, e.g. for streaming replication, are not
2241	* connected to a particular database. But walsenders used for logical
2242	* replication need to connect to a specific database. We allow streaming
2243	* replication commands to be issued even if connected to a database as it
2244	* can make sense to first make a basebackup and then stream changes
2245	* starting from that.
2246	*/
2247	if (am_walsender && !am_db_walsender)
2248	port->database_name[`0`] = `'\0'`;
2249
2250	/*
2251	* Done putting stuff in TopMemoryContext.
2252	*/
2253	MemoryContextSwitchTo(oldcontext);
2254
2255	/*
2256	* If we're going to reject the connection due to database state, say so
2257	* now instead of wasting cycles on an authentication exchange. (This also
2258	* allows a pg_ping utility to be written.)
2259	*/
2260	switch (port->canAcceptConnections)
2261	{
2262	case CAC_STARTUP:
2263	ereport(FATAL,
2264	(errcode(ERRCODE_CANNOT_CONNECT_NOW),
2265	errmsg("the database system is starting up")));
2266	break;
2267	case CAC_SHUTDOWN:
2268	ereport(FATAL,
2269	(errcode(ERRCODE_CANNOT_CONNECT_NOW),
2270	errmsg("the database system is shutting down")));
2271	break;
2272	case CAC_RECOVERY:
2273	ereport(FATAL,
2274	(errcode(ERRCODE_CANNOT_CONNECT_NOW),
2275	errmsg("the database system is in recovery mode")));
2276	break;
2277	case CAC_TOOMANY:
2278	ereport(FATAL,
2279	(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
2280	errmsg("sorry, too many clients already")));
2281	break;
2282	case CAC_WAITBACKUP:
2283	/ OK for now, will check in InitPostgres /
2284	break;
2285	case CAC_OK:
2286	break;
2287	}
2288
2289	return STATUS_OK;
2290	}
2291
2292	/*
2293	* Send a NegotiateProtocolVersion to the client. This lets the client know
2294	* that they have requested a newer minor protocol version than we are able
2295	* to speak. We'll speak the highest version we know about; the client can,
2296	* of course, abandon the connection if that's a problem.
2297	*
2298	* We also include in the response a list of protocol options we didn't
2299	* understand. This allows clients to include optional parameters that might
2300	* be present either in newer protocol versions or third-party protocol
2301	* extensions without fear of having to reconnect if those options are not
2302	* understood, while at the same time making certain that the client is aware
2303	* of which options were actually accepted.
2304	*/
2305	static void
2306	SendNegotiateProtocolVersion(List *unrecognized_protocol_options)
2307	{
2308	StringInfoData buf;
2309	ListCell *lc;
2310
2311	pq_beginmessage(&buf, `'v'`); / NegotiateProtocolVersion /
2312	pq_sendint32(&buf, PG_PROTOCOL_LATEST);
2313	pq_sendint32(&buf, list_length(unrecognized_protocol_options));
2314	foreach(lc, unrecognized_protocol_options)
2315	pq_sendstring(&buf, lfirst(lc));
2316	pq_endmessage(&buf);
2317
2318	/ no need to flush, some other message will follow /
2319	}
2320
2321	/*
2322	* The client has sent a cancel request packet, not a normal
2323	* start-a-new-connection packet. Perform the necessary processing.
2324	* Nothing is sent back to the client.
2325	*/
2326	static void
2327	processCancelRequest(Port port, void* *pkt)
2328	{
2329	CancelRequestPacket canc = (CancelRequestPacket ) pkt;
2330	int backendPID;
2331	int32 cancelAuthCode;
2332	Backend *bp;
2333
2334	#ifndef EXEC_BACKEND
2335	dlist_iter iter;
2336	#else
2337	int i;
2338	#endif
2339
2340	backendPID = (int) pg_ntoh32(canc->backendPID);
2341	cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode);
2342
2343	/*
2344	* See if we have a matching backend. In the EXEC_BACKEND case, we can no
2345	* longer access the postmaster's own backend list, and must rely on the
2346	* duplicate array in shared memory.
2347	*/
2348	#ifndef EXEC_BACKEND
2349	dlist_foreach(iter, &BackendList)
2350	{
2351	bp = dlist_container(Backend, elem, iter.cur);
2352	#else
2353	for (i = MaxLivePostmasterChildren() - `1`; i >= `0`; i--)
2354	{
2355	bp = (Backend *) &ShmemBackendArray[i];
2356	#endif
2357	if (bp->pid == backendPID)
2358	{
2359	if (bp->cancel_key == cancelAuthCode)
2360	{
2361	/ Found a match; signal that backend to cancel current op /
2362	ereport(DEBUG2,
2363	(errmsg_internal("processing cancel request: sending SIGINT to process %d",
2364	backendPID)));
2365	signal_child(bp->pid, SIGINT);
2366	}
2367	else
2368	/ Right PID, wrong key: no way, Jose /
2369	ereport(LOG,
2370	(errmsg("wrong key in cancel request for process %d",
2371	backendPID)));
2372	return;
2373	}
2374	#ifndef EXEC_BACKEND /* make GNU Emacs 26.1 see brace balance */
2375	}
2376	#else
2377	}
2378	#endif
2379
2380	/ No matching backend /
2381	ereport(LOG,
2382	(errmsg("PID %d in cancel request did not match any process",
2383	backendPID)));
2384	}
2385
2386	/*
2387	* canAcceptConnections --- check to see if database state allows connections.
2388	*/
2389	static CAC_state
2390	canAcceptConnections(void)
2391	{
2392	CAC_state result = CAC_OK;
2393
2394	/*
2395	* Can't start backends when in startup/shutdown/inconsistent recovery
2396	* state.
2397	*
2398	* In state PM_WAIT_BACKUP only superusers can connect (this must be
2399	* allowed so that a superuser can end online backup mode); we return
2400	* CAC_WAITBACKUP code to indicate that this must be checked later. Note
2401	* that neither CAC_OK nor CAC_WAITBACKUP can safely be returned until we
2402	* have checked for too many children.
2403	*/
2404	if (pmState != PM_RUN)
2405	{
2406	if (pmState == PM_WAIT_BACKUP)
2407	result = CAC_WAITBACKUP; / allow superusers only /
2408	else if (Shutdown > NoShutdown)
2409	return CAC_SHUTDOWN; / shutdown is pending /
2410	else if (!FatalError &&
2411	(pmState == PM_STARTUP \|\|
2412	pmState == PM_RECOVERY))
2413	return CAC_STARTUP; / normal startup /
2414	else if (!FatalError &&
2415	pmState == PM_HOT_STANDBY)
2416	result = CAC_OK; / connection OK during hot standby /
2417	else
2418	return CAC_RECOVERY; / else must be crash recovery /
2419	}
2420
2421	/*
2422	* Don't start too many children.
2423	*
2424	* We allow more connections than we can have backends here because some
2425	* might still be authenticating; they might fail auth, or some existing
2426	* backend might exit before the auth cycle is completed. The exact
2427	* MaxBackends limit is enforced when a new backend tries to join the
2428	* shared-inval backend array.
2429	*
2430	* The limit here must match the sizes of the per-child-process arrays;
2431	* see comments for MaxLivePostmasterChildren().
2432	*/
2433	if (CountChildren(BACKEND_TYPE_ALL) >= MaxLivePostmasterChildren())
2434	result = CAC_TOOMANY;
2435
2436	return result;
2437	}
2438
2439
2440	/*
2441	* ConnCreate -- create a local connection data structure
2442	*
2443	* Returns NULL on failure, other than out-of-memory which is fatal.
2444	*/
2445	static Port *
2446	ConnCreate(int serverFd)
2447	{
2448	Port *port;
2449
2450	if (!(port = (Port ) calloc(`1`, sizeof*(Port))))
2451	{
2452	ereport(LOG,
2453	(errcode(ERRCODE_OUT_OF_MEMORY),
2454	errmsg("out of memory")));
2455	ExitPostmaster(`1`);
2456	}
2457
2458	if (StreamConnection(serverFd, port) != STATUS_OK)
2459	{
2460	if (port->sock != PGINVALID_SOCKET)
2461	StreamClose(port->sock);
2462	ConnFree(port);
2463	return NULL;
2464	}
2465
2466	/*
2467	* Allocate GSSAPI specific state struct
2468	*/
2469	#ifndef EXEC_BACKEND
2470	#if defined(ENABLE_GSS) \|\| defined(ENABLE_SSPI)
2471	port->gss = (pg_gssinfo ) calloc(`1`, sizeof*(pg_gssinfo));
2472	if (!port->gss)
2473	{
2474	ereport(LOG,
2475	(errcode(ERRCODE_OUT_OF_MEMORY),
2476	errmsg("out of memory")));
2477	ExitPostmaster(`1`);
2478	}
2479	#endif
2480	#endif
2481
2482	return port;
2483	}
2484
2485
2486	/*
2487	* ConnFree -- free a local connection data structure
2488	*/
2489	static void
2490	ConnFree(Port *conn)
2491	{
2492	#ifdef USE_SSL
2493	secure_close(conn);
2494	#endif
2495	if (conn->gss)
2496	free(conn->gss);
2497	free(conn);
2498	}
2499
2500
2501	/*
2502	* ClosePostmasterPorts -- close all the postmaster's open sockets
2503	*
2504	* This is called during child process startup to release file descriptors
2505	* that are not needed by that child process. The postmaster still has
2506	* them open, of course.
2507	*
2508	* Note: we pass am_syslogger as a boolean because we don't want to set
2509	* the global variable yet when this is called.
2510	*/
2511	void
2512	ClosePostmasterPorts(bool am_syslogger)
2513	{
2514	int i;
2515
2516	#ifndef WIN32
2517
2518	/*
2519	* Close the write end of postmaster death watch pipe. It's important to
2520	* do this as early as possible, so that if postmaster dies, others won't
2521	* think that it's still running because we're holding the pipe open.
2522	*/
2523	if (close(postmaster_alive_fds[POSTMASTER_FD_OWN]))
2524	ereport(FATAL,
2525	(errcode_for_file_access(),
2526	errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
2527	postmaster_alive_fds[POSTMASTER_FD_OWN] = -`1`;
2528	#endif
2529
2530	/ Close the listen sockets /
2531	for (i = `0`; i < MAXLISTEN; i++)
2532	{
2533	if (ListenSocket[i] != PGINVALID_SOCKET)
2534	{
2535	StreamClose(ListenSocket[i]);
2536	ListenSocket[i] = PGINVALID_SOCKET;
2537	}
2538	}
2539
2540	/ If using syslogger, close the read side of the pipe /
2541	if (!am_syslogger)
2542	{
2543	#ifndef WIN32
2544	if (syslogPipe[`0`] >= `0`)
2545	close(syslogPipe[`0`]);
2546	syslogPipe[`0`] = -`1`;
2547	#else
2548	if (syslogPipe[`0`])
2549	CloseHandle(syslogPipe[`0`]);
2550	syslogPipe[`0`] = `0`;
2551	#endif
2552	}
2553
2554	#ifdef USE_BONJOUR
2555	/ If using Bonjour, close the connection to the mDNS daemon /
2556	if (bonjour_sdref)
2557	close(DNSServiceRefSockFD(bonjour_sdref));
2558	#endif
2559	}
2560
2561
2562	/*
2563	* InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds
2564	*
2565	* Called early in the postmaster and every backend.
2566	*/
2567	void
2568	InitProcessGlobals(void)
2569	{
2570	unsigned int rseed;
2571
2572	MyProcPid = getpid();
2573	MyStartTimestamp = GetCurrentTimestamp();
2574	MyStartTime = timestamptz_to_time_t(MyStartTimestamp);
2575
2576	/*
2577	* Set a different seed for random() in every process. We want something
2578	* unpredictable, so if possible, use high-quality random bits for the
2579	* seed. Otherwise, fall back to a seed based on timestamp and PID.
2580	*/
2581	if (!pg_strong_random(&rseed, sizeof(rseed)))
2582	{
2583	/*
2584	* Since PIDs and timestamps tend to change more frequently in their
2585	* least significant bits, shift the timestamp left to allow a larger
2586	* total number of seeds in a given time period. Since that would
2587	* leave only 20 bits of the timestamp that cycle every ~1 second,
2588	* also mix in some higher bits.
2589	*/
2590	rseed = ((uint64) MyProcPid) ^
2591	((uint64) MyStartTimestamp << `12`) ^
2592	((uint64) MyStartTimestamp >> `20`);
2593	}
2594	srandom(rseed);
2595	}
2596
2597
2598	/*
2599	* reset_shared -- reset shared memory and semaphores
2600	*/
2601	static void
2602	reset_shared(int port)
2603	{
2604	/*
2605	* Create or re-create shared memory and semaphores.
2606	*
2607	* Note: in each "cycle of life" we will normally assign the same IPC keys
2608	* (if using SysV shmem and/or semas), since the port number is used to
2609	* determine IPC keys. This helps ensure that we will clean up dead IPC
2610	* objects if the postmaster crashes and is restarted.
2611	*/
2612	CreateSharedMemoryAndSemaphores(port);
2613	}
2614
2615
2616	/*
2617	* SIGHUP -- reread config files, and tell children to do same
2618	*/
2619	static void
2620	SIGHUP_handler(SIGNAL_ARGS)
2621	{
2622	int save_errno = errno;
2623
2624	PG_SETMASK(&BlockSig);
2625
2626	if (Shutdown <= SmartShutdown)
2627	{
2628	ereport(LOG,
2629	(errmsg("received SIGHUP, reloading configuration files")));
2630	ProcessConfigFile(PGC_SIGHUP);
2631	SignalChildren(SIGHUP);
2632	if (StartupPID != `0`)
2633	signal_child(StartupPID, SIGHUP);
2634	if (BgWriterPID != `0`)
2635	signal_child(BgWriterPID, SIGHUP);
2636	if (CheckpointerPID != `0`)
2637	signal_child(CheckpointerPID, SIGHUP);
2638	if (WalWriterPID != `0`)
2639	signal_child(WalWriterPID, SIGHUP);
2640	if (WalReceiverPID != `0`)
2641	signal_child(WalReceiverPID, SIGHUP);
2642	if (AutoVacPID != `0`)
2643	signal_child(AutoVacPID, SIGHUP);
2644	if (PgArchPID != `0`)
2645	signal_child(PgArchPID, SIGHUP);
2646	if (SysLoggerPID != `0`)
2647	signal_child(SysLoggerPID, SIGHUP);
2648	if (PgStatPID != `0`)
2649	signal_child(PgStatPID, SIGHUP);
2650
2651	/ Reload authentication config files too /
2652	if (!load_hba())
2653	ereport(LOG,
2654	/ translator: %s is a configuration file /
2655	(errmsg("%s was not reloaded", "pg_hba.conf")));
2656
2657	if (!load_ident())
2658	ereport(LOG,
2659	(errmsg("%s was not reloaded", "pg_ident.conf")));
2660
2661	#ifdef USE_SSL
2662	/ Reload SSL configuration as well /
2663	if (EnableSSL)
2664	{
2665	if (secure_initialize(false) == `0`)
2666	LoadedSSL = true;
2667	else
2668	ereport(LOG,
2669	(errmsg("SSL configuration was not reloaded")));
2670	}
2671	else
2672	{
2673	secure_destroy();
2674	LoadedSSL = false;
2675	}
2676	#endif
2677
2678	#ifdef EXEC_BACKEND
2679	/ Update the starting-point file for future children /
2680	write_nondefault_variables(PGC_SIGHUP);
2681	#endif
2682	}
2683
2684	PG_SETMASK(&UnBlockSig);
2685
2686	errno = save_errno;
2687	}
2688
2689
2690	/*
2691	* pmdie -- signal handler for processing various postmaster signals.
2692	*/
2693	static void
2694	pmdie(SIGNAL_ARGS)
2695	{
2696	int save_errno = errno;
2697
2698	PG_SETMASK(&BlockSig);
2699
2700	ereport(DEBUG2,
2701	(errmsg_internal("postmaster received signal %d",
2702	postgres_signal_arg)));
2703
2704	switch (postgres_signal_arg)
2705	{
2706	case SIGTERM:
2707
2708	/*
2709	* Smart Shutdown:
2710	*
2711	* Wait for children to end their work, then shut down.
2712	*/
2713	if (Shutdown >= SmartShutdown)
2714	break;
2715	Shutdown = SmartShutdown;
2716	ereport(LOG,
2717	(errmsg("received smart shutdown request")));
2718
2719	/ Report status /
2720	AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2721	#ifdef USE_SYSTEMD
2722	sd_notify(`0`, "STOPPING=1");
2723	#endif
2724
2725	if (pmState == PM_RUN \|\| pmState == PM_RECOVERY \|\|
2726	pmState == PM_HOT_STANDBY \|\| pmState == PM_STARTUP)
2727	{
2728	/ autovac workers are told to shut down immediately /
2729	/ and bgworkers too; does this need tweaking? /
2730	SignalSomeChildren(SIGTERM,
2731	BACKEND_TYPE_AUTOVAC \| BACKEND_TYPE_BGWORKER);
2732	/ and the autovac launcher too /
2733	if (AutoVacPID != `0`)
2734	signal_child(AutoVacPID, SIGTERM);
2735	/ and the bgwriter too /
2736	if (BgWriterPID != `0`)
2737	signal_child(BgWriterPID, SIGTERM);
2738	/ and the walwriter too /
2739	if (WalWriterPID != `0`)
2740	signal_child(WalWriterPID, SIGTERM);
2741
2742	/*
2743	* If we're in recovery, we can't kill the startup process
2744	* right away, because at present doing so does not release
2745	* its locks. We might want to change this in a future
2746	* release. For the time being, the PM_WAIT_READONLY state
2747	* indicates that we're waiting for the regular (read only)
2748	* backends to die off; once they do, we'll kill the startup
2749	* and walreceiver processes.
2750	*/
2751	pmState = (pmState == PM_RUN) ?
2752	PM_WAIT_BACKUP : PM_WAIT_READONLY;
2753	}
2754
2755	/*
2756	* Now wait for online backup mode to end and backends to exit. If
2757	* that is already the case, PostmasterStateMachine will take the
2758	* next step.
2759	*/
2760	PostmasterStateMachine();
2761	break;
2762
2763	case SIGINT:
2764
2765	/*
2766	* Fast Shutdown:
2767	*
2768	* Abort all children with SIGTERM (rollback active transactions
2769	* and exit) and shut down when they are gone.
2770	*/
2771	if (Shutdown >= FastShutdown)
2772	break;
2773	Shutdown = FastShutdown;
2774	ereport(LOG,
2775	(errmsg("received fast shutdown request")));
2776
2777	/ Report status /
2778	AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2779	#ifdef USE_SYSTEMD
2780	sd_notify(`0`, "STOPPING=1");
2781	#endif
2782
2783	if (StartupPID != `0`)
2784	signal_child(StartupPID, SIGTERM);
2785	if (BgWriterPID != `0`)
2786	signal_child(BgWriterPID, SIGTERM);
2787	if (WalReceiverPID != `0`)
2788	signal_child(WalReceiverPID, SIGTERM);
2789	if (pmState == PM_STARTUP \|\| pmState == PM_RECOVERY)
2790	{
2791	SignalSomeChildren(SIGTERM, BACKEND_TYPE_BGWORKER);
2792
2793	/*
2794	* Only startup, bgwriter, walreceiver, possibly bgworkers,
2795	* and/or checkpointer should be active in this state; we just
2796	* signaled the first four, and we don't want to kill
2797	* checkpointer yet.
2798	*/
2799	pmState = PM_WAIT_BACKENDS;
2800	}
2801	else if (pmState == PM_RUN \|\|
2802	pmState == PM_WAIT_BACKUP \|\|
2803	pmState == PM_WAIT_READONLY \|\|
2804	pmState == PM_WAIT_BACKENDS \|\|
2805	pmState == PM_HOT_STANDBY)
2806	{
2807	ereport(LOG,
2808	(errmsg("aborting any active transactions")));
2809	/ shut down all backends and workers /
2810	SignalSomeChildren(SIGTERM,
2811	BACKEND_TYPE_NORMAL \| BACKEND_TYPE_AUTOVAC \|
2812	BACKEND_TYPE_BGWORKER);
2813	/ and the autovac launcher too /
2814	if (AutoVacPID != `0`)
2815	signal_child(AutoVacPID, SIGTERM);
2816	/ and the walwriter too /
2817	if (WalWriterPID != `0`)
2818	signal_child(WalWriterPID, SIGTERM);
2819	pmState = PM_WAIT_BACKENDS;
2820	}
2821
2822	/*
2823	* Now wait for backends to exit. If there are none,
2824	* PostmasterStateMachine will take the next step.
2825	*/
2826	PostmasterStateMachine();
2827	break;
2828
2829	case SIGQUIT:
2830
2831	/*
2832	* Immediate Shutdown:
2833	*
2834	* abort all children with SIGQUIT, wait for them to exit,
2835	* terminate remaining ones with SIGKILL, then exit without
2836	* attempt to properly shut down the data base system.
2837	*/
2838	if (Shutdown >= ImmediateShutdown)
2839	break;
2840	Shutdown = ImmediateShutdown;
2841	ereport(LOG,
2842	(errmsg("received immediate shutdown request")));
2843
2844	/ Report status /
2845	AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING);
2846	#ifdef USE_SYSTEMD
2847	sd_notify(`0`, "STOPPING=1");
2848	#endif
2849
2850	TerminateChildren(SIGQUIT);
2851	pmState = PM_WAIT_BACKENDS;
2852
2853	/ set stopwatch for them to die /
2854	AbortStartTime = time(NULL);
2855
2856	/*
2857	* Now wait for backends to exit. If there are none,
2858	* PostmasterStateMachine will take the next step.
2859	*/
2860	PostmasterStateMachine();
2861	break;
2862	}
2863
2864	PG_SETMASK(&UnBlockSig);
2865
2866	errno = save_errno;
2867	}
2868
2869	/*
2870	* Reaper -- signal handler to cleanup after a child process dies.
2871	*/
2872	static void
2873	reaper(SIGNAL_ARGS)
2874	{
2875	int save_errno = errno;
2876	int pid; / process id of dead child process /
2877	int exitstatus; / its exit status /
2878
2879	PG_SETMASK(&BlockSig);
2880
2881	ereport(DEBUG4,
2882	(errmsg_internal("reaping dead processes")));
2883
2884	while ((pid = waitpid(-`1`, &exitstatus, WNOHANG)) > `0`)
2885	{
2886	/*
2887	* Check if this child was a startup process.
2888	*/
2889	if (pid == StartupPID)
2890	{
2891	StartupPID = `0`;
2892
2893	/*
2894	* Startup process exited in response to a shutdown request (or it
2895	* completed normally regardless of the shutdown request).
2896	*/
2897	if (Shutdown > NoShutdown &&
2898	(EXIT_STATUS_0(exitstatus) \|\| EXIT_STATUS_1(exitstatus)))
2899	{
2900	StartupStatus = STARTUP_NOT_RUNNING;
2901	pmState = PM_WAIT_BACKENDS;
2902	/ PostmasterStateMachine logic does the rest /
2903	continue;
2904	}
2905
2906	if (EXIT_STATUS_3(exitstatus))
2907	{
2908	ereport(LOG,
2909	(errmsg("shutdown at recovery target")));
2910	StartupStatus = STARTUP_NOT_RUNNING;
2911	Shutdown = SmartShutdown;
2912	TerminateChildren(SIGTERM);
2913	pmState = PM_WAIT_BACKENDS;
2914	/ PostmasterStateMachine logic does the rest /
2915	continue;
2916	}
2917
2918	/*
2919	* Unexpected exit of startup process (including FATAL exit)
2920	* during PM_STARTUP is treated as catastrophic. There are no
2921	* other processes running yet, so we can just exit.
2922	*/
2923	if (pmState == PM_STARTUP && !EXIT_STATUS_0(exitstatus))
2924	{
2925	LogChildExit(LOG, _("startup process"),
2926	pid, exitstatus);
2927	ereport(LOG,
2928	(errmsg("aborting startup due to startup process failure")));
2929	ExitPostmaster(`1`);
2930	}
2931
2932	/*
2933	* After PM_STARTUP, any unexpected exit (including FATAL exit) of
2934	* the startup process is catastrophic, so kill other children,
2935	* and set StartupStatus so we don't try to reinitialize after
2936	* they're gone. Exception: if StartupStatus is STARTUP_SIGNALED,
2937	* then we previously sent the startup process a SIGQUIT; so
2938	* that's probably the reason it died, and we do want to try to
2939	* restart in that case.
2940	*/
2941	if (!EXIT_STATUS_0(exitstatus))
2942	{
2943	if (StartupStatus == STARTUP_SIGNALED)
2944	StartupStatus = STARTUP_NOT_RUNNING;
2945	else
2946	StartupStatus = STARTUP_CRASHED;
2947	HandleChildCrash(pid, exitstatus,
2948	_("startup process"));
2949	continue;
2950	}
2951
2952	/*
2953	* Startup succeeded, commence normal operations
2954	*/
2955	StartupStatus = STARTUP_NOT_RUNNING;
2956	FatalError = false;
2957	Assert(AbortStartTime == `0`);
2958	ReachedNormalRunning = true;
2959	pmState = PM_RUN;
2960
2961	/*
2962	* Crank up the background tasks, if we didn't do that already
2963	* when we entered consistent recovery state. It doesn't matter
2964	* if this fails, we'll just try again later.
2965	*/
2966	if (CheckpointerPID == `0`)
2967	CheckpointerPID = StartCheckpointer();
2968	if (BgWriterPID == `0`)
2969	BgWriterPID = StartBackgroundWriter();
2970	if (WalWriterPID == `0`)
2971	WalWriterPID = StartWalWriter();
2972
2973	/*
2974	* Likewise, start other special children as needed. In a restart
2975	* situation, some of them may be alive already.
2976	*/
2977	if (!IsBinaryUpgrade && AutoVacuumingActive() && AutoVacPID == `0`)
2978	AutoVacPID = StartAutoVacLauncher();
2979	if (PgArchStartupAllowed() && PgArchPID == `0`)
2980	PgArchPID = pgarch_start();
2981	if (PgStatPID == `0`)
2982	PgStatPID = pgstat_start();
2983
2984	/ workers may be scheduled to start now /
2985	maybe_start_bgworkers();
2986
2987	/ at this point we are really open for business /
2988	ereport(LOG,
2989	(errmsg("database system is ready to accept connections")));
2990
2991	/ Report status /
2992	AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY);
2993	#ifdef USE_SYSTEMD
2994	sd_notify(`0`, "READY=1");
2995	#endif
2996
2997	continue;
2998	}
2999
3000	/*
3001	* Was it the bgwriter? Normal exit can be ignored; we'll start a new
3002	* one at the next iteration of the postmaster's main loop, if
3003	* necessary. Any other exit condition is treated as a crash.
3004	*/
3005	if (pid == BgWriterPID)
3006	{
3007	BgWriterPID = `0`;
3008	if (!EXIT_STATUS_0(exitstatus))
3009	HandleChildCrash(pid, exitstatus,
3010	_("background writer process"));
3011	continue;
3012	}
3013
3014	/*
3015	* Was it the checkpointer?
3016	*/
3017	if (pid == CheckpointerPID)
3018	{
3019	CheckpointerPID = `0`;
3020	if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
3021	{
3022	/*
3023	* OK, we saw normal exit of the checkpointer after it's been
3024	* told to shut down. We expect that it wrote a shutdown
3025	* checkpoint. (If for some reason it didn't, recovery will
3026	* occur on next postmaster start.)
3027	*
3028	* At this point we should have no normal backend children
3029	* left (else we'd not be in PM_SHUTDOWN state) but we might
3030	* have dead_end children to wait for.
3031	*
3032	* If we have an archiver subprocess, tell it to do a last
3033	* archive cycle and quit. Likewise, if we have walsender
3034	* processes, tell them to send any remaining WAL and quit.
3035	*/
3036	Assert(Shutdown > NoShutdown);
3037
3038	/ Waken archiver for the last time /
3039	if (PgArchPID != `0`)
3040	signal_child(PgArchPID, SIGUSR2);
3041
3042	/*
3043	* Waken walsenders for the last time. No regular backends
3044	* should be around anymore.
3045	*/
3046	SignalChildren(SIGUSR2);
3047
3048	pmState = PM_SHUTDOWN_2;
3049
3050	/*
3051	* We can also shut down the stats collector now; there's
3052	* nothing left for it to do.
3053	*/
3054	if (PgStatPID != `0`)
3055	signal_child(PgStatPID, SIGQUIT);
3056	}
3057	else
3058	{
3059	/*
3060	* Any unexpected exit of the checkpointer (including FATAL
3061	* exit) is treated as a crash.
3062	*/
3063	HandleChildCrash(pid, exitstatus,
3064	_("checkpointer process"));
3065	}
3066
3067	continue;
3068	}
3069
3070	/*
3071	* Was it the wal writer? Normal exit can be ignored; we'll start a
3072	* new one at the next iteration of the postmaster's main loop, if
3073	* necessary. Any other exit condition is treated as a crash.
3074	*/
3075	if (pid == WalWriterPID)
3076	{
3077	WalWriterPID = `0`;
3078	if (!EXIT_STATUS_0(exitstatus))
3079	HandleChildCrash(pid, exitstatus,
3080	_("WAL writer process"));
3081	continue;
3082	}
3083
3084	/*
3085	* Was it the wal receiver? If exit status is zero (normal) or one
3086	* (FATAL exit), we assume everything is all right just like normal
3087	* backends. (If we need a new wal receiver, we'll start one at the
3088	* next iteration of the postmaster's main loop.)
3089	*/
3090	if (pid == WalReceiverPID)
3091	{
3092	WalReceiverPID = `0`;
3093	if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3094	HandleChildCrash(pid, exitstatus,
3095	_("WAL receiver process"));
3096	continue;
3097	}
3098
3099	/*
3100	* Was it the autovacuum launcher? Normal exit can be ignored; we'll
3101	* start a new one at the next iteration of the postmaster's main
3102	* loop, if necessary. Any other exit condition is treated as a
3103	* crash.
3104	*/
3105	if (pid == AutoVacPID)
3106	{
3107	AutoVacPID = `0`;
3108	if (!EXIT_STATUS_0(exitstatus))
3109	HandleChildCrash(pid, exitstatus,
3110	_("autovacuum launcher process"));
3111	continue;
3112	}
3113
3114	/*
3115	* Was it the archiver? If so, just try to start a new one; no need
3116	* to force reset of the rest of the system. (If fail, we'll try
3117	* again in future cycles of the main loop.). Unless we were waiting
3118	* for it to shut down; don't restart it in that case, and
3119	* PostmasterStateMachine() will advance to the next shutdown step.
3120	*/
3121	if (pid == PgArchPID)
3122	{
3123	PgArchPID = `0`;
3124	if (!EXIT_STATUS_0(exitstatus))
3125	LogChildExit(LOG, _("archiver process"),
3126	pid, exitstatus);
3127	if (PgArchStartupAllowed())
3128	PgArchPID = pgarch_start();
3129	continue;
3130	}
3131
3132	/*
3133	* Was it the statistics collector? If so, just try to start a new
3134	* one; no need to force reset of the rest of the system. (If fail,
3135	* we'll try again in future cycles of the main loop.)
3136	*/
3137	if (pid == PgStatPID)
3138	{
3139	PgStatPID = `0`;
3140	if (!EXIT_STATUS_0(exitstatus))
3141	LogChildExit(LOG, _("statistics collector process"),
3142	pid, exitstatus);
3143	if (pmState == PM_RUN \|\| pmState == PM_HOT_STANDBY)
3144	PgStatPID = pgstat_start();
3145	continue;
3146	}
3147
3148	/ Was it the system logger? If so, try to start a new one /
3149	if (pid == SysLoggerPID)
3150	{
3151	SysLoggerPID = `0`;
3152	/ for safety's sake, launch new logger first /
3153	SysLoggerPID = SysLogger_Start();
3154	if (!EXIT_STATUS_0(exitstatus))
3155	LogChildExit(LOG, _("system logger process"),
3156	pid, exitstatus);
3157	continue;
3158	}
3159
3160	/ Was it one of our background workers? /
3161	if (CleanupBackgroundWorker(pid, exitstatus))
3162	{
3163	/ have it be restarted /
3164	HaveCrashedWorker = true;
3165	continue;
3166	}
3167
3168	/*
3169	* Else do standard backend child cleanup.
3170	*/
3171	CleanupBackend(pid, exitstatus);
3172	} / loop over pending child-death reports /
3173
3174	/*
3175	* After cleaning out the SIGCHLD queue, see if we have any state changes
3176	* or actions to make.
3177	*/
3178	PostmasterStateMachine();
3179
3180	/ Done with signal handler /
3181	PG_SETMASK(&UnBlockSig);
3182
3183	errno = save_errno;
3184	}
3185
3186	/*
3187	* Scan the bgworkers list and see if the given PID (which has just stopped
3188	* or crashed) is in it. Handle its shutdown if so, and return true. If not a
3189	* bgworker, return false.
3190	*
3191	* This is heavily based on CleanupBackend. One important difference is that
3192	* we don't know yet that the dying process is a bgworker, so we must be silent
3193	* until we're sure it is.
3194	*/
3195	static bool
3196	CleanupBackgroundWorker(int pid,
3197	int exitstatus) / child's exit status /
3198	{
3199	char namebuf[MAXPGPATH];
3200	slist_mutable_iter iter;
3201
3202	slist_foreach_modify(iter, &BackgroundWorkerList)
3203	{
3204	RegisteredBgWorker *rw;
3205
3206	rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
3207
3208	if (rw->rw_pid != pid)
3209	continue;
3210
3211	#ifdef WIN32
3212	/ see CleanupBackend /
3213	if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3214	exitstatus = `0`;
3215	#endif
3216
3217	snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
3218	rw->rw_worker.bgw_type);
3219
3220
3221	if (!EXIT_STATUS_0(exitstatus))
3222	{
3223	/ Record timestamp, so we know when to restart the worker. /
3224	rw->rw_crashed_at = GetCurrentTimestamp();
3225	}
3226	else
3227	{
3228	/ Zero exit status means terminate /
3229	rw->rw_crashed_at = `0`;
3230	rw->rw_terminate = true;
3231	}
3232
3233	/*
3234	* Additionally, for shared-memory-connected workers, just like a
3235	* backend, any exit status other than 0 or 1 is considered a crash
3236	* and causes a system-wide restart.
3237	*/
3238	if ((rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != `0`)
3239	{
3240	if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3241	{
3242	HandleChildCrash(pid, exitstatus, namebuf);
3243	return true;
3244	}
3245	}
3246
3247	/*
3248	* We must release the postmaster child slot whether this worker is
3249	* connected to shared memory or not, but we only treat it as a crash
3250	* if it is in fact connected.
3251	*/
3252	if (!ReleasePostmasterChildSlot(rw->rw_child_slot) &&
3253	(rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != `0`)
3254	{
3255	HandleChildCrash(pid, exitstatus, namebuf);
3256	return true;
3257	}
3258
3259	/ Get it out of the BackendList and clear out remaining data /
3260	dlist_delete(&rw->rw_backend->elem);
3261	#ifdef EXEC_BACKEND
3262	ShmemBackendArrayRemove(rw->rw_backend);
3263	#endif
3264
3265	/*
3266	* It's possible that this background worker started some OTHER
3267	* background worker and asked to be notified when that worker started
3268	* or stopped. If so, cancel any notifications destined for the
3269	* now-dead backend.
3270	*/
3271	if (rw->rw_backend->bgworker_notify)
3272	BackgroundWorkerStopNotifications(rw->rw_pid);
3273	free(rw->rw_backend);
3274	rw->rw_backend = NULL;
3275	rw->rw_pid = `0`;
3276	rw->rw_child_slot = `0`;
3277	ReportBackgroundWorkerExit(&iter); / report child death /
3278
3279	LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
3280	namebuf, pid, exitstatus);
3281
3282	return true;
3283	}
3284
3285	return false;
3286	}
3287
3288	/*
3289	* CleanupBackend -- cleanup after terminated backend.
3290	*
3291	* Remove all local state associated with backend.
3292	*
3293	* If you change this, see also CleanupBackgroundWorker.
3294	*/
3295	static void
3296	CleanupBackend(int pid,
3297	int exitstatus) / child's exit status. /
3298	{
3299	dlist_mutable_iter iter;
3300
3301	LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
3302
3303	/*
3304	* If a backend dies in an ugly way then we must signal all other backends
3305	* to quickdie. If exit status is zero (normal) or one (FATAL exit), we
3306	* assume everything is all right and proceed to remove the backend from
3307	* the active backend list.
3308	*/
3309
3310	#ifdef WIN32
3311
3312	/*
3313	* On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case,
3314	* since that sometimes happens under load when the process fails to start
3315	* properly (long before it starts using shared memory). Microsoft reports
3316	* it is related to mutex failure:
3317	* http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php
3318	*/
3319	if (exitstatus == ERROR_WAIT_NO_CHILDREN)
3320	{
3321	LogChildExit(LOG, _("server process"), pid, exitstatus);
3322	exitstatus = `0`;
3323	}
3324	#endif
3325
3326	if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
3327	{
3328	HandleChildCrash(pid, exitstatus, _("server process"));
3329	return;
3330	}
3331
3332	dlist_foreach_modify(iter, &BackendList)
3333	{
3334	Backend *bp = dlist_container(Backend, elem, iter.cur);
3335
3336	if (bp->pid == pid)
3337	{
3338	if (!bp->dead_end)
3339	{
3340	if (!ReleasePostmasterChildSlot(bp->child_slot))
3341	{
3342	/*
3343	* Uh-oh, the child failed to clean itself up. Treat as a
3344	* crash after all.
3345	*/
3346	HandleChildCrash(pid, exitstatus, _("server process"));
3347	return;
3348	}
3349	#ifdef EXEC_BACKEND
3350	ShmemBackendArrayRemove(bp);
3351	#endif
3352	}
3353	if (bp->bgworker_notify)
3354	{
3355	/*
3356	* This backend may have been slated to receive SIGUSR1 when
3357	* some background worker started or stopped. Cancel those
3358	* notifications, as we don't want to signal PIDs that are not
3359	* PostgreSQL backends. This gets skipped in the (probably
3360	* very common) case where the backend has never requested any
3361	* such notifications.
3362	*/
3363	BackgroundWorkerStopNotifications(bp->pid);
3364	}
3365	dlist_delete(iter.cur);
3366	free(bp);
3367	break;
3368	}
3369	}
3370	}
3371
3372	/*
3373	* HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
3374	* walwriter, autovacuum, or background worker.
3375	*
3376	* The objectives here are to clean up our local state about the child
3377	* process, and to signal all other remaining children to quickdie.
3378	*/
3379	static void
3380	HandleChildCrash(int pid, int exitstatus, const char *procname)
3381	{
3382	dlist_mutable_iter iter;
3383	slist_iter siter;
3384	Backend *bp;
3385	bool take_action;
3386
3387	/*
3388	* We only log messages and send signals if this is the first process
3389	* crash and we're not doing an immediate shutdown; otherwise, we're only
3390	* here to update postmaster's idea of live processes. If we have already
3391	* signalled children, nonzero exit status is to be expected, so don't
3392	* clutter log.
3393	*/
3394	take_action = !FatalError && Shutdown != ImmediateShutdown;
3395
3396	if (take_action)
3397	{
3398	LogChildExit(LOG, procname, pid, exitstatus);
3399	ereport(LOG,
3400	(errmsg("terminating any other active server processes")));
3401	}
3402
3403	/ Process background workers. /
3404	slist_foreach(siter, &BackgroundWorkerList)
3405	{
3406	RegisteredBgWorker *rw;
3407
3408	rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
3409	if (rw->rw_pid == `0`)
3410	continue; / not running /
3411	if (rw->rw_pid == pid)
3412	{
3413	/*
3414	* Found entry for freshly-dead worker, so remove it.
3415	*/
3416	(void) ReleasePostmasterChildSlot(rw->rw_child_slot);
3417	dlist_delete(&rw->rw_backend->elem);
3418	#ifdef EXEC_BACKEND
3419	ShmemBackendArrayRemove(rw->rw_backend);
3420	#endif
3421	free(rw->rw_backend);
3422	rw->rw_backend = NULL;
3423	rw->rw_pid = `0`;
3424	rw->rw_child_slot = `0`;
3425	/ don't reset crashed_at /
3426	/ don't report child stop, either /
3427	/ Keep looping so we can signal remaining workers /
3428	}
3429	else
3430	{
3431	/*
3432	* This worker is still alive. Unless we did so already, tell it
3433	* to commit hara-kiri.
3434	*
3435	* SIGQUIT is the special signal that says exit without proc_exit
3436	* and let the user know what's going on. But if SendStop is set
3437	* (-s on command line), then we send SIGSTOP instead, so that we
3438	* can get core dumps from all backends by hand.
3439	*/
3440	if (take_action)
3441	{
3442	ereport(DEBUG2,
3443	(errmsg_internal("sending %s to process %d",
3444	(SendStop ? "SIGSTOP" : "SIGQUIT"),
3445	(int) rw->rw_pid)));
3446	signal_child(rw->rw_pid, (SendStop ? SIGSTOP : SIGQUIT));
3447	}
3448	}
3449	}
3450
3451	/ Process regular backends /
3452	dlist_foreach_modify(iter, &BackendList)
3453	{
3454	bp = dlist_container(Backend, elem, iter.cur);
3455
3456	if (bp->pid == pid)
3457	{
3458	/*
3459	* Found entry for freshly-dead backend, so remove it.
3460	*/
3461	if (!bp->dead_end)
3462	{
3463	(void) ReleasePostmasterChildSlot(bp->child_slot);
3464	#ifdef EXEC_BACKEND
3465	ShmemBackendArrayRemove(bp);
3466	#endif
3467	}
3468	dlist_delete(iter.cur);
3469	free(bp);
3470	/ Keep looping so we can signal remaining backends /
3471	}
3472	else
3473	{
3474	/*
3475	* This backend is still alive. Unless we did so already, tell it
3476	* to commit hara-kiri.
3477	*
3478	* SIGQUIT is the special signal that says exit without proc_exit
3479	* and let the user know what's going on. But if SendStop is set
3480	* (-s on command line), then we send SIGSTOP instead, so that we
3481	* can get core dumps from all backends by hand.
3482	*
3483	* We could exclude dead_end children here, but at least in the
3484	* SIGSTOP case it seems better to include them.
3485	*
3486	* Background workers were already processed above; ignore them
3487	* here.
3488	*/
3489	if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
3490	continue;
3491
3492	if (take_action)
3493	{
3494	ereport(DEBUG2,
3495	(errmsg_internal("sending %s to process %d",
3496	(SendStop ? "SIGSTOP" : "SIGQUIT"),
3497	(int) bp->pid)));
3498	signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
3499	}
3500	}
3501	}
3502
3503	/ Take care of the startup process too /
3504	if (pid == StartupPID)
3505	{
3506	StartupPID = `0`;
3507	StartupStatus = STARTUP_CRASHED;
3508	}
3509	else if (StartupPID != `0` && take_action)
3510	{
3511	ereport(DEBUG2,
3512	(errmsg_internal("sending %s to process %d",
3513	(SendStop ? "SIGSTOP" : "SIGQUIT"),
3514	(int) StartupPID)));
3515	signal_child(StartupPID, (SendStop ? SIGSTOP : SIGQUIT));
3516	StartupStatus = STARTUP_SIGNALED;
3517	}
3518
3519	/ Take care of the bgwriter too /
3520	if (pid == BgWriterPID)
3521	BgWriterPID = `0`;
3522	else if (BgWriterPID != `0` && take_action)
3523	{
3524	ereport(DEBUG2,
3525	(errmsg_internal("sending %s to process %d",
3526	(SendStop ? "SIGSTOP" : "SIGQUIT"),
3527	(int) BgWriterPID)));
3528	signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3529	}
3530
3531	/ Take care of the checkpointer too /
3532	if (pid == CheckpointerPID)
3533	CheckpointerPID = `0`;
3534	else if (CheckpointerPID != `0` && take_action)
3535	{
3536	ereport(DEBUG2,
3537	(errmsg_internal("sending %s to process %d",
3538	(SendStop ? "SIGSTOP" : "SIGQUIT"),
3539	(int) CheckpointerPID)));
3540	signal_child(CheckpointerPID, (SendStop ? SIGSTOP : SIGQUIT));
3541	}
3542
3543	/ Take care of the walwriter too /
3544	if (pid == WalWriterPID)
3545	WalWriterPID = `0`;
3546	else if (WalWriterPID != `0` && take_action)
3547	{
3548	ereport(DEBUG2,
3549	(errmsg_internal("sending %s to process %d",
3550	(SendStop ? "SIGSTOP" : "SIGQUIT"),
3551	(int) WalWriterPID)));
3552	signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
3553	}
3554
3555	/ Take care of the walreceiver too /
3556	if (pid == WalReceiverPID)
3557	WalReceiverPID = `0`;
3558	else if (WalReceiverPID != `0` && take_action)
3559	{
3560	ereport(DEBUG2,
3561	(errmsg_internal("sending %s to process %d",
3562	(SendStop ? "SIGSTOP" : "SIGQUIT"),
3563	(int) WalReceiverPID)));
3564	signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT));
3565	}
3566
3567	/ Take care of the autovacuum launcher too /
3568	if (pid == AutoVacPID)
3569	AutoVacPID = `0`;
3570	else if (AutoVacPID != `0` && take_action)
3571	{
3572	ereport(DEBUG2,
3573	(errmsg_internal("sending %s to process %d",
3574	(SendStop ? "SIGSTOP" : "SIGQUIT"),
3575	(int) AutoVacPID)));
3576	signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
3577	}
3578
3579	/*
3580	* Force a power-cycle of the pgarch process too. (This isn't absolutely
3581	* necessary, but it seems like a good idea for robustness, and it
3582	* simplifies the state-machine logic in the case where a shutdown request
3583	* arrives during crash processing.)
3584	*/
3585	if (PgArchPID != `0` && take_action)
3586	{
3587	ereport(DEBUG2,
3588	(errmsg_internal("sending %s to process %d",
3589	"SIGQUIT",
3590	(int) PgArchPID)));
3591	signal_child(PgArchPID, SIGQUIT);
3592	}
3593
3594	/*
3595	* Force a power-cycle of the pgstat process too. (This isn't absolutely
3596	* necessary, but it seems like a good idea for robustness, and it
3597	* simplifies the state-machine logic in the case where a shutdown request
3598	* arrives during crash processing.)
3599	*/
3600	if (PgStatPID != `0` && take_action)
3601	{
3602	ereport(DEBUG2,
3603	(errmsg_internal("sending %s to process %d",
3604	"SIGQUIT",
3605	(int) PgStatPID)));
3606	signal_child(PgStatPID, SIGQUIT);
3607	allow_immediate_pgstat_restart();
3608	}
3609
3610	/ We do NOT restart the syslogger /
3611
3612	if (Shutdown != ImmediateShutdown)
3613	FatalError = true;
3614
3615	/ We now transit into a state of waiting for children to die /
3616	if (pmState == PM_RECOVERY \|\|
3617	pmState == PM_HOT_STANDBY \|\|
3618	pmState == PM_RUN \|\|
3619	pmState == PM_WAIT_BACKUP \|\|
3620	pmState == PM_WAIT_READONLY \|\|
3621	pmState == PM_SHUTDOWN)
3622	pmState = PM_WAIT_BACKENDS;
3623
3624	/*
3625	* .. and if this doesn't happen quickly enough, now the clock is ticking
3626	* for us to kill them without mercy.
3627	*/
3628	if (AbortStartTime == `0`)
3629	AbortStartTime = time(NULL);
3630	}
3631
3632	/*
3633	* Log the death of a child process.
3634	*/
3635	static void
3636	LogChildExit(int lev, const char procname, int* pid, int exitstatus)
3637	{
3638	/*
3639	* size of activity_buffer is arbitrary, but set equal to default
3640	* track_activity_query_size
3641	*/
3642	char activity_buffer[`1024`];
3643	const char *activity = NULL;
3644
3645	if (!EXIT_STATUS_0(exitstatus))
3646	activity = pgstat_get_crashed_backend_activity(pid,
3647	activity_buffer,
3648	sizeof(activity_buffer));
3649
3650	if (WIFEXITED(exitstatus))
3651	ereport(lev,
3652
3653	/------*
3654	translator: %s is a noun phrase describing a child process, such as
3655	"server process" /*
3656	(errmsg("%s (PID %d) exited with exit code %d",
3657	procname, pid, WEXITSTATUS(exitstatus)),
3658	activity ? errdetail("Failed process was running: %s", activity) : `0`));
3659	else if (WIFSIGNALED(exitstatus))
3660	{
3661	#if defined(WIN32)
3662	ereport(lev,
3663
3664	/------*
3665	translator: %s is a noun phrase describing a child process, such as
3666	"server process" /*
3667	(errmsg("%s (PID %d) was terminated by exception 0x%X",
3668	procname, pid, WTERMSIG(exitstatus)),
3669	errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
3670	activity ? errdetail("Failed process was running: %s", activity) : `0`));
3671	#else
3672	ereport(lev,
3673
3674	/------*
3675	translator: %s is a noun phrase describing a child process, such as
3676	"server process" /*
3677	(errmsg("%s (PID %d) was terminated by signal %d: %s",
3678	procname, pid, WTERMSIG(exitstatus),
3679	pg_strsignal(WTERMSIG(exitstatus))),
3680	activity ? errdetail("Failed process was running: %s", activity) : `0`));
3681	#endif
3682	}
3683	else
3684	ereport(lev,
3685
3686	/------*
3687	translator: %s is a noun phrase describing a child process, such as
3688	"server process" /*
3689	(errmsg("%s (PID %d) exited with unrecognized status %d",
3690	procname, pid, exitstatus),
3691	activity ? errdetail("Failed process was running: %s", activity) : `0`));
3692	}
3693
3694	/*
3695	* Advance the postmaster's state machine and take actions as appropriate
3696	*
3697	* This is common code for pmdie(), reaper() and sigusr1_handler(), which
3698	* receive the signals that might mean we need to change state.
3699	*/
3700	static void
3701	PostmasterStateMachine(void)
3702	{
3703	if (pmState == PM_WAIT_BACKUP)
3704	{
3705	/*
3706	* PM_WAIT_BACKUP state ends when online backup mode is not active.
3707	*/
3708	if (!BackupInProgress())
3709	pmState = PM_WAIT_BACKENDS;
3710	}
3711
3712	if (pmState == PM_WAIT_READONLY)
3713	{
3714	/*
3715	* PM_WAIT_READONLY state ends when we have no regular backends that
3716	* have been started during recovery. We kill the startup and
3717	* walreceiver processes and transition to PM_WAIT_BACKENDS. Ideally,
3718	* we might like to kill these processes first and then wait for
3719	* backends to die off, but that doesn't work at present because
3720	* killing the startup process doesn't release its locks.
3721	*/
3722	if (CountChildren(BACKEND_TYPE_NORMAL) == `0`)
3723	{
3724	if (StartupPID != `0`)
3725	signal_child(StartupPID, SIGTERM);
3726	if (WalReceiverPID != `0`)
3727	signal_child(WalReceiverPID, SIGTERM);
3728	pmState = PM_WAIT_BACKENDS;
3729	}
3730	}
3731
3732	/*
3733	* If we are in a state-machine state that implies waiting for backends to
3734	* exit, see if they're all gone, and change state if so.
3735	*/
3736	if (pmState == PM_WAIT_BACKENDS)
3737	{
3738	/*
3739	* PM_WAIT_BACKENDS state ends when we have no regular backends
3740	* (including autovac workers), no bgworkers (including unconnected
3741	* ones), and no walwriter, autovac launcher or bgwriter. If we are
3742	* doing crash recovery or an immediate shutdown then we expect the
3743	* checkpointer to exit as well, otherwise not. The archiver, stats,
3744	* and syslogger processes are disregarded since they are not
3745	* connected to shared memory; we also disregard dead_end children
3746	* here. Walsenders are also disregarded, they will be terminated
3747	* later after writing the checkpoint record, like the archiver
3748	* process.
3749	*/
3750	if (CountChildren(BACKEND_TYPE_NORMAL \| BACKEND_TYPE_WORKER) == `0` &&
3751	StartupPID == `0` &&
3752	WalReceiverPID == `0` &&
3753	BgWriterPID == `0` &&
3754	(CheckpointerPID == `0` \|\|
3755	(!FatalError && Shutdown < ImmediateShutdown)) &&
3756	WalWriterPID == `0` &&
3757	AutoVacPID == `0`)
3758	{
3759	if (Shutdown >= ImmediateShutdown \|\| FatalError)
3760	{
3761	/*
3762	* Start waiting for dead_end children to die. This state
3763	* change causes ServerLoop to stop creating new ones.
3764	*/
3765	pmState = PM_WAIT_DEAD_END;
3766
3767	/*
3768	* We already SIGQUIT'd the archiver and stats processes, if
3769	* any, when we started immediate shutdown or entered
3770	* FatalError state.
3771	*/
3772	}
3773	else
3774	{
3775	/*
3776	* If we get here, we are proceeding with normal shutdown. All
3777	* the regular children are gone, and it's time to tell the
3778	* checkpointer to do a shutdown checkpoint.
3779	*/
3780	Assert(Shutdown > NoShutdown);
3781	/ Start the checkpointer if not running /
3782	if (CheckpointerPID == `0`)
3783	CheckpointerPID = StartCheckpointer();
3784	/ And tell it to shut down /
3785	if (CheckpointerPID != `0`)
3786	{
3787	signal_child(CheckpointerPID, SIGUSR2);
3788	pmState = PM_SHUTDOWN;
3789	}
3790	else
3791	{
3792	/*
3793	* If we failed to fork a checkpointer, just shut down.
3794	* Any required cleanup will happen at next restart. We
3795	* set FatalError so that an "abnormal shutdown" message
3796	* gets logged when we exit.
3797	*/
3798	FatalError = true;
3799	pmState = PM_WAIT_DEAD_END;
3800
3801	/ Kill the walsenders, archiver and stats collector too /
3802	SignalChildren(SIGQUIT);
3803	if (PgArchPID != `0`)
3804	signal_child(PgArchPID, SIGQUIT);
3805	if (PgStatPID != `0`)
3806	signal_child(PgStatPID, SIGQUIT);
3807	}
3808	}
3809	}
3810	}
3811
3812	if (pmState == PM_SHUTDOWN_2)
3813	{
3814	/*
3815	* PM_SHUTDOWN_2 state ends when there's no other children than
3816	* dead_end children left. There shouldn't be any regular backends
3817	* left by now anyway; what we're really waiting for is walsenders and
3818	* archiver.
3819	*/
3820	if (PgArchPID == `0` && CountChildren(BACKEND_TYPE_ALL) == `0`)
3821	{
3822	pmState = PM_WAIT_DEAD_END;
3823	}
3824	}
3825
3826	if (pmState == PM_WAIT_DEAD_END)
3827	{
3828	/*
3829	* PM_WAIT_DEAD_END state ends when the BackendList is entirely empty
3830	* (ie, no dead_end children remain), and the archiver and stats
3831	* collector are gone too.
3832	*
3833	* The reason we wait for those two is to protect them against a new
3834	* postmaster starting conflicting subprocesses; this isn't an
3835	* ironclad protection, but it at least helps in the
3836	* shutdown-and-immediately-restart scenario. Note that they have
3837	* already been sent appropriate shutdown signals, either during a
3838	* normal state transition leading up to PM_WAIT_DEAD_END, or during
3839	* FatalError processing.
3840	*/
3841	if (dlist_is_empty(&BackendList) &&
3842	PgArchPID == `0` && PgStatPID == `0`)
3843	{
3844	/ These other guys should be dead already /
3845	Assert(StartupPID == `0`);
3846	Assert(WalReceiverPID == `0`);
3847	Assert(BgWriterPID == `0`);
3848	Assert(CheckpointerPID == `0`);
3849	Assert(WalWriterPID == `0`);
3850	Assert(AutoVacPID == `0`);
3851	/ syslogger is not considered here /
3852	pmState = PM_NO_CHILDREN;
3853	}
3854	}
3855
3856	/*
3857	* If we've been told to shut down, we exit as soon as there are no
3858	* remaining children. If there was a crash, cleanup will occur at the
3859	* next startup. (Before PostgreSQL 8.3, we tried to recover from the
3860	* crash before exiting, but that seems unwise if we are quitting because
3861	* we got SIGTERM from init --- there may well not be time for recovery
3862	* before init decides to SIGKILL us.)
3863	*
3864	* Note that the syslogger continues to run. It will exit when it sees
3865	* EOF on its input pipe, which happens when there are no more upstream
3866	* processes.
3867	*/
3868	if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN)
3869	{
3870	if (FatalError)
3871	{
3872	ereport(LOG, (errmsg("abnormal database system shutdown")));
3873	ExitPostmaster(`1`);
3874	}
3875	else
3876	{
3877	/*
3878	* Terminate exclusive backup mode to avoid recovery after a clean
3879	* fast shutdown. Since an exclusive backup can only be taken
3880	* during normal running (and not, for example, while running
3881	* under Hot Standby) it only makes sense to do this if we reached
3882	* normal running. If we're still in recovery, the backup file is
3883	* one we're recovering from, and we must keep it around so that
3884	* recovery restarts from the right place.
3885	*/
3886	if (ReachedNormalRunning)
3887	CancelBackup();
3888
3889	/ Normal exit from the postmaster is here /
3890	ExitPostmaster(`0`);
3891	}
3892	}
3893
3894	/*
3895	* If the startup process failed, or the user does not want an automatic
3896	* restart after backend crashes, wait for all non-syslogger children to
3897	* exit, and then exit postmaster. We don't try to reinitialize when the
3898	* startup process fails, because more than likely it will just fail again
3899	* and we will keep trying forever.
3900	*/
3901	if (pmState == PM_NO_CHILDREN &&
3902	(StartupStatus == STARTUP_CRASHED \|\| !restart_after_crash))
3903	ExitPostmaster(`1`);
3904
3905	/*
3906	* If we need to recover from a crash, wait for all non-syslogger children
3907	* to exit, then reset shmem and StartupDataBase.
3908	*/
3909	if (FatalError && pmState == PM_NO_CHILDREN)
3910	{
3911	ereport(LOG,
3912	(errmsg("all server processes terminated; reinitializing")));
3913
3914	/ allow background workers to immediately restart /
3915	ResetBackgroundWorkerCrashTimes();
3916
3917	shmem_exit(`1`);
3918
3919	/ re-read control file into local memory /
3920	LocalProcessControlFile(true);
3921
3922	reset_shared(PostPortNumber);
3923
3924	StartupPID = StartupDataBase();
3925	Assert(StartupPID != `0`);
3926	StartupStatus = STARTUP_RUNNING;
3927	pmState = PM_STARTUP;
3928	/ crash recovery started, reset SIGKILL flag /
3929	AbortStartTime = `0`;
3930	}
3931	}
3932
3933
3934	/*
3935	* Send a signal to a postmaster child process
3936	*
3937	* On systems that have setsid(), each child process sets itself up as a
3938	* process group leader. For signals that are generally interpreted in the
3939	* appropriate fashion, we signal the entire process group not just the
3940	* direct child process. This allows us to, for example, SIGQUIT a blocked
3941	* archive_recovery script, or SIGINT a script being run by a backend via
3942	* system().
3943	*
3944	* There is a race condition for recently-forked children: they might not
3945	* have executed setsid() yet. So we signal the child directly as well as
3946	* the group. We assume such a child will handle the signal before trying
3947	* to spawn any grandchild processes. We also assume that signaling the
3948	* child twice will not cause any problems.
3949	*/
3950	static void
3951	signal_child(pid_t pid, int signal)
3952	{
3953	if (kill(pid, signal) < `0`)
3954	elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
3955	#ifdef HAVE_SETSID
3956	switch (signal)
3957	{
3958	case SIGINT:
3959	case SIGTERM:
3960	case SIGQUIT:
3961	case SIGSTOP:
3962	case SIGKILL:
3963	if (kill(-pid, signal) < `0`)
3964	elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
3965	break;
3966	default:
3967	break;
3968	}
3969	#endif
3970	}
3971
3972	/*
3973	* Send a signal to the targeted children (but NOT special children;
3974	* dead_end children are never signaled, either).
3975	*/
3976	static bool
3977	SignalSomeChildren(int signal, int target)
3978	{
3979	dlist_iter iter;
3980	bool signaled = false;
3981
3982	dlist_foreach(iter, &BackendList)
3983	{
3984	Backend *bp = dlist_container(Backend, elem, iter.cur);
3985
3986	if (bp->dead_end)
3987	continue;
3988
3989	/*
3990	* Since target == BACKEND_TYPE_ALL is the most common case, we test
3991	* it first and avoid touching shared memory for every child.
3992	*/
3993	if (target != BACKEND_TYPE_ALL)
3994	{
3995	/*
3996	* Assign bkend_type for any recently announced WAL Sender
3997	* processes.
3998	*/
3999	if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
4000	IsPostmasterChildWalSender(bp->child_slot))
4001	bp->bkend_type = BACKEND_TYPE_WALSND;
4002
4003	if (!(target & bp->bkend_type))
4004	continue;
4005	}
4006
4007	ereport(DEBUG4,
4008	(errmsg_internal("sending signal %d to process %d",
4009	signal, (int) bp->pid)));
4010	signal_child(bp->pid, signal);
4011	signaled = true;
4012	}
4013	return signaled;
4014	}
4015
4016	/*
4017	* Send a termination signal to children. This considers all of our children
4018	* processes, except syslogger and dead_end backends.
4019	*/
4020	static void
4021	TerminateChildren(int signal)
4022	{
4023	SignalChildren(signal);
4024	if (StartupPID != `0`)
4025	{
4026	signal_child(StartupPID, signal);
4027	if (signal == SIGQUIT \|\| signal == SIGKILL)
4028	StartupStatus = STARTUP_SIGNALED;
4029	}
4030	if (BgWriterPID != `0`)
4031	signal_child(BgWriterPID, signal);
4032	if (CheckpointerPID != `0`)
4033	signal_child(CheckpointerPID, signal);
4034	if (WalWriterPID != `0`)
4035	signal_child(WalWriterPID, signal);
4036	if (WalReceiverPID != `0`)
4037	signal_child(WalReceiverPID, signal);
4038	if (AutoVacPID != `0`)
4039	signal_child(AutoVacPID, signal);
4040	if (PgArchPID != `0`)
4041	signal_child(PgArchPID, signal);
4042	if (PgStatPID != `0`)
4043	signal_child(PgStatPID, signal);
4044	}
4045
4046	/*
4047	* BackendStartup -- start backend process
4048	*
4049	* returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
4050	*
4051	* Note: if you change this code, also consider StartAutovacuumWorker.
4052	*/
4053	static int
4054	BackendStartup(Port *port)
4055	{
4056	Backend bn; /* for backend cleanup /
4057	pid_t pid;
4058
4059	/*
4060	* Create backend data structure. Better before the fork() so we can
4061	* handle failure cleanly.
4062	*/
4063	bn = (Backend ) malloc(sizeof*(Backend));
4064	if (!bn)
4065	{
4066	ereport(LOG,
4067	(errcode(ERRCODE_OUT_OF_MEMORY),
4068	errmsg("out of memory")));
4069	return STATUS_ERROR;
4070	}
4071
4072	/*
4073	* Compute the cancel key that will be assigned to this backend. The
4074	* backend will have its own copy in the forked-off process' value of
4075	* MyCancelKey, so that it can transmit the key to the frontend.
4076	*/
4077	if (!RandomCancelKey(&MyCancelKey))
4078	{
4079	free(bn);
4080	ereport(LOG,
4081	(errcode(ERRCODE_INTERNAL_ERROR),
4082	errmsg("could not generate random cancel key")));
4083	return STATUS_ERROR;
4084	}
4085
4086	bn->cancel_key = MyCancelKey;
4087
4088	/ Pass down canAcceptConnections state /
4089	port->canAcceptConnections = canAcceptConnections();
4090	bn->dead_end = (port->canAcceptConnections != CAC_OK &&
4091	port->canAcceptConnections != CAC_WAITBACKUP);
4092
4093	/*
4094	* Unless it's a dead_end child, assign it a child slot number
4095	*/
4096	if (!bn->dead_end)
4097	bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
4098	else
4099	bn->child_slot = `0`;
4100
4101	/ Hasn't asked to be notified about any bgworkers yet /
4102	bn->bgworker_notify = false;
4103
4104	#ifdef EXEC_BACKEND
4105	pid = backend_forkexec(port);
4106	#else /* !EXEC_BACKEND */
4107	pid = fork_process();
4108	if (pid == `0`) / child /
4109	{
4110	free(bn);
4111
4112	/ Detangle from postmaster /
4113	InitPostmasterChild();
4114
4115	/ Close the postmaster's sockets /
4116	ClosePostmasterPorts(false);
4117
4118	/ Perform additional initialization and collect startup packet /
4119	BackendInitialize(port);
4120
4121	/ And run the backend /
4122	BackendRun(port);
4123	}
4124	#endif /* EXEC_BACKEND */
4125
4126	if (pid < `0`)
4127	{
4128	/ in parent, fork failed /
4129	int save_errno = errno;
4130
4131	if (!bn->dead_end)
4132	(void) ReleasePostmasterChildSlot(bn->child_slot);
4133	free(bn);
4134	errno = save_errno;
4135	ereport(LOG,
4136	(errmsg("could not fork new process for connection: %m")));
4137	report_fork_failure_to_client(port, save_errno);
4138	return STATUS_ERROR;
4139	}
4140
4141	/ in parent, successful fork /
4142	ereport(DEBUG2,
4143	(errmsg_internal("forked new backend, pid=%d socket=%d",
4144	(int) pid, (int) port->sock)));
4145
4146	/*
4147	* Everything's been successful, it's safe to add this backend to our list
4148	* of backends.
4149	*/
4150	bn->pid = pid;
4151	bn->bkend_type = BACKEND_TYPE_NORMAL; / Can change later to WALSND /
4152	dlist_push_head(&BackendList, &bn->elem);
4153
4154	#ifdef EXEC_BACKEND
4155	if (!bn->dead_end)
4156	ShmemBackendArrayAdd(bn);
4157	#endif
4158
4159	return STATUS_OK;
4160	}
4161
4162	/*
4163	* Try to report backend fork() failure to client before we close the
4164	* connection. Since we do not care to risk blocking the postmaster on
4165	* this connection, we set the connection to non-blocking and try only once.
4166	*
4167	* This is grungy special-purpose code; we cannot use backend libpq since
4168	* it's not up and running.
4169	*/
4170	static void
4171	report_fork_failure_to_client(Port port, int* errnum)
4172	{
4173	char buffer[`1000`];
4174	int rc;
4175
4176	/ Format the error message packet (always V2 protocol) /
4177	snprintf(buffer, sizeof(buffer), "E%s%s\n",
4178	_("could not fork new process for connection: "),
4179	strerror(errnum));
4180
4181	/ Set port to non-blocking. Don't do send() if this fails /
4182	if (!pg_set_noblock(port->sock))
4183	return;
4184
4185	/ We'll retry after EINTR, but ignore all other failures /
4186	do
4187	{
4188	rc = send(port->sock, buffer, strlen(buffer) + `1`, `0`);
4189	} while (rc < `0` && errno == EINTR);
4190	}
4191
4192
4193	/*
4194	* BackendInitialize -- initialize an interactive (postmaster-child)
4195	* backend process, and collect the client's startup packet.
4196	*
4197	* returns: nothing. Will not return at all if there's any failure.
4198	*
4199	* Note: this code does not depend on having any access to shared memory.
4200	* In the EXEC_BACKEND case, we are physically attached to shared memory
4201	* but have not yet set up most of our local pointers to shmem structures.
4202	*/
4203	static void
4204	BackendInitialize(Port *port)
4205	{
4206	int status;
4207	int ret;
4208	char remote_host[NI_MAXHOST];
4209	char remote_port[NI_MAXSERV];
4210	char remote_ps_data[NI_MAXHOST];
4211
4212	/ Save port etc. for ps status /
4213	MyProcPort = port;
4214
4215	/*
4216	* PreAuthDelay is a debugging aid for investigating problems in the
4217	* authentication cycle: it can be set in postgresql.conf to allow time to
4218	* attach to the newly-forked backend with a debugger. (See also
4219	* PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it
4220	* is not honored until after authentication.)
4221	*/
4222	if (PreAuthDelay > `0`)
4223	pg_usleep(PreAuthDelay * `1000000L`);
4224
4225	/ This flag will remain set until InitPostgres finishes authentication /
4226	ClientAuthInProgress = true; / limit visibility of log messages /
4227
4228	/ set these to empty in case they are needed before we set them up /
4229	port->remote_host = "";
4230	port->remote_port = "";
4231
4232	/*
4233	* Initialize libpq and enable reporting of ereport errors to the client.
4234	* Must do this now because authentication uses libpq to send messages.
4235	*/
4236	pq_init(); / initialize libpq to talk to client /
4237	whereToSendOutput = DestRemote; / now safe to ereport to client /
4238
4239	/*
4240	* We arrange for a simple exit(1) if we receive SIGTERM or SIGQUIT or
4241	* timeout while trying to collect the startup packet. Otherwise the
4242	* postmaster cannot shutdown the database FAST or IMMED cleanly if a
4243	* buggy client fails to send the packet promptly. XXX it follows that
4244	* the remainder of this function must tolerate losing control at any
4245	* instant. Likewise, any pg_on_exit_callback registered before or during
4246	* this function must be prepared to execute at any instant between here
4247	* and the end of this function. Furthermore, affected callbacks execute
4248	* partially or not at all when a second exit-inducing signal arrives
4249	* after proc_exit_prepare() decrements on_proc_exit_index. (Thanks to
4250	* that mechanic, callbacks need not anticipate more than one call.) This
4251	* is fragile; it ought to instead follow the norm of handling interrupts
4252	* at selected, safe opportunities.
4253	*/
4254	pqsignal(SIGTERM, startup_die);
4255	pqsignal(SIGQUIT, startup_die);
4256	InitializeTimeouts(); / establishes SIGALRM handler /
4257	PG_SETMASK(&StartupBlockSig);
4258
4259	/*
4260	* Get the remote host name and port for logging and status display.
4261	*/
4262	remote_host[`0`] = `'\0'`;
4263	remote_port[`0`] = `'\0'`;
4264	if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
4265	remote_host, sizeof(remote_host),
4266	remote_port, sizeof(remote_port),
4267	(log_hostname ? `0` : NI_NUMERICHOST) \| NI_NUMERICSERV)) != `0`)
4268	ereport(WARNING,
4269	(errmsg_internal("pg_getnameinfo_all() failed: %s",
4270	gai_strerror(ret))));
4271	if (remote_port[`0`] == `'\0'`)
4272	snprintf(remote_ps_data, sizeof(remote_ps_data), "%s", remote_host);
4273	else
4274	snprintf(remote_ps_data, sizeof(remote_ps_data), "%s(%s)", remote_host, remote_port);
4275
4276	/*
4277	* Save remote_host and remote_port in port structure (after this, they
4278	* will appear in log_line_prefix data for log messages).
4279	*/
4280	port->remote_host = strdup(remote_host);
4281	port->remote_port = strdup(remote_port);
4282
4283	/ And now we can issue the Log_connections message, if wanted /
4284	if (Log_connections)
4285	{
4286	if (remote_port[`0`])
4287	ereport(LOG,
4288	(errmsg("connection received: host=%s port=%s",
4289	remote_host,
4290	remote_port)));
4291	else
4292	ereport(LOG,
4293	(errmsg("connection received: host=%s",
4294	remote_host)));
4295	}
4296
4297	/*
4298	* If we did a reverse lookup to name, we might as well save the results
4299	* rather than possibly repeating the lookup during authentication.
4300	*
4301	* Note that we don't want to specify NI_NAMEREQD above, because then we'd
4302	* get nothing useful for a client without an rDNS entry. Therefore, we
4303	* must check whether we got a numeric IPv4 or IPv6 address, and not save
4304	* it into remote_hostname if so. (This test is conservative and might
4305	* sometimes classify a hostname as numeric, but an error in that
4306	* direction is safe; it only results in a possible extra lookup.)
4307	*/
4308	if (log_hostname &&
4309	ret == `0` &&
4310	strspn(remote_host, "0123456789.") < strlen(remote_host) &&
4311	strspn(remote_host, "0123456789ABCDEFabcdef:") < strlen(remote_host))
4312	port->remote_hostname = strdup(remote_host);
4313
4314	/*
4315	* Ready to begin client interaction. We will give up and exit(1) after a
4316	* time delay, so that a broken client can't hog a connection
4317	* indefinitely. PreAuthDelay and any DNS interactions above don't count
4318	* against the time limit.
4319	*
4320	* Note: AuthenticationTimeout is applied here while waiting for the
4321	* startup packet, and then again in InitPostgres for the duration of any
4322	* authentication operations. So a hostile client could tie up the
4323	* process for nearly twice AuthenticationTimeout before we kick him off.
4324	*
4325	* Note: because PostgresMain will call InitializeTimeouts again, the
4326	* registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay
4327	* since we never use it again after this function.
4328	*/
4329	RegisterTimeout(STARTUP_PACKET_TIMEOUT, StartupPacketTimeoutHandler);
4330	enable_timeout_after(STARTUP_PACKET_TIMEOUT, AuthenticationTimeout * `1000`);
4331
4332	/*
4333	* Receive the startup packet (which might turn out to be a cancel request
4334	* packet).
4335	*/
4336	status = ProcessStartupPacket(port, false);
4337
4338	/*
4339	* Stop here if it was bad or a cancel packet. ProcessStartupPacket
4340	* already did any appropriate error reporting.
4341	*/
4342	if (status != STATUS_OK)
4343	proc_exit(`0`);
4344
4345	/*
4346	* Now that we have the user and database name, we can set the process
4347	* title for ps. It's good to do this as early as possible in startup.
4348	*
4349	* For a walsender, the ps display is set in the following form:
4350	*
4351	* postgres: walsender <user> <host> <activity>
4352	*
4353	* To achieve that, we pass "walsender" as username and username as dbname
4354	* to init_ps_display(). XXX: should add a new variant of
4355	* init_ps_display() to avoid abusing the parameters like this.
4356	*/
4357	if (am_walsender)
4358	init_ps_display(pgstat_get_backend_desc(B_WAL_SENDER), port->user_name, remote_ps_data,
4359	update_process_title ? "authentication" : "");
4360	else
4361	init_ps_display(port->user_name, port->database_name, remote_ps_data,
4362	update_process_title ? "authentication" : "");
4363
4364	/*
4365	* Disable the timeout, and prevent SIGTERM/SIGQUIT again.
4366	*/
4367	disable_timeout(STARTUP_PACKET_TIMEOUT, false);
4368	PG_SETMASK(&BlockSig);
4369	}
4370
4371
4372	/*
4373	* BackendRun -- set up the backend's argument list and invoke PostgresMain()
4374	*
4375	* returns:
4376	* Shouldn't return at all.
4377	* If PostgresMain() fails, return status.
4378	*/
4379	static void
4380	BackendRun(Port *port)
4381	{
4382	char **av;
4383	int maxac;
4384	int ac;
4385	int i;
4386
4387	/*
4388	* Now, build the argv vector that will be given to PostgresMain.
4389	*
4390	* The maximum possible number of commandline arguments that could come
4391	* from ExtraOptions is (strlen(ExtraOptions) + 1) / 2; see
4392	* pg_split_opts().
4393	*/
4394	maxac = `2`; / for fixed args supplied below /
4395	maxac += (strlen(ExtraOptions) + `1`) / `2`;
4396
4397	av = (char **) MemoryContextAlloc(TopMemoryContext,
4398	maxac * sizeof(char *));
4399	ac = `0`;
4400
4401	av[ac++] = "postgres";
4402
4403	/*
4404	* Pass any backend switches specified with -o on the postmaster's own
4405	* command line. We assume these are secure.
4406	*/
4407	pg_split_opts(av, &ac, ExtraOptions);
4408
4409	av[ac] = NULL;
4410
4411	Assert(ac < maxac);
4412
4413	/*
4414	* Debug: print arguments being passed to backend
4415	*/
4416	ereport(DEBUG3,
4417	(errmsg_internal("%s child[%d]: starting with (",
4418	progname, (int) getpid())));
4419	for (i = `0`; i < ac; ++i)
4420	ereport(DEBUG3,
4421	(errmsg_internal("\t%s", av[i])));
4422	ereport(DEBUG3,
4423	(errmsg_internal(")")));
4424
4425	/*
4426	* Make sure we aren't in PostmasterContext anymore. (We can't delete it
4427	* just yet, though, because InitPostgres will need the HBA data.)
4428	*/
4429	MemoryContextSwitchTo(TopMemoryContext);
4430
4431	PostgresMain(ac, av, port->database_name, port->user_name);
4432	}
4433
4434
4435	#ifdef EXEC_BACKEND
4436
4437	/*
4438	* postmaster_forkexec -- fork and exec a postmaster subprocess
4439	*
4440	* The caller must have set up the argv array already, except for argv[2]
4441	* which will be filled with the name of the temp variable file.
4442	*
4443	* Returns the child process PID, or -1 on fork failure (a suitable error
4444	* message has been logged on failure).
4445	*
4446	* All uses of this routine will dispatch to SubPostmasterMain in the
4447	* child process.
4448	*/
4449	pid_t
4450	postmaster_forkexec(int argc, char *argv[])
4451	{
4452	Port port;
4453
4454	/ This entry point passes dummy values for the Port variables /
4455	memset(&port, `0`, sizeof(port));
4456	return internal_forkexec(argc, argv, &port);
4457	}
4458
4459	/*
4460	* backend_forkexec -- fork/exec off a backend process
4461	*
4462	* Some operating systems (WIN32) don't have fork() so we have to simulate
4463	* it by storing parameters that need to be passed to the child and
4464	* then create a new child process.
4465	*
4466	* returns the pid of the fork/exec'd process, or -1 on failure
4467	*/
4468	static pid_t
4469	backend_forkexec(Port *port)
4470	{
4471	char *av[`4`];
4472	int ac = `0`;
4473
4474	av[ac++] = "postgres";
4475	av[ac++] = "--forkbackend";
4476	av[ac++] = NULL; / filled in by internal_forkexec /
4477
4478	av[ac] = NULL;
4479	Assert(ac < lengthof(av));
4480
4481	return internal_forkexec(ac, av, port);
4482	}
4483
4484	#ifndef WIN32
4485
4486	/*
4487	* internal_forkexec non-win32 implementation
4488	*
4489	* - writes out backend variables to the parameter file
4490	* - fork():s, and then exec():s the child process
4491	*/
4492	static pid_t
4493	internal_forkexec(int argc, char argv[], Port port)
4494	{
4495	static unsigned long tmpBackendFileNum = `0`;
4496	pid_t pid;
4497	char tmpfilename[MAXPGPATH];
4498	BackendParameters param;
4499	FILE *fp;
4500
4501	if (!save_backend_variables(&param, port))
4502	return -`1`; / log made by save_backend_variables /
4503
4504	/ Calculate name for temp file /
4505	snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu",
4506	PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
4507	MyProcPid, ++tmpBackendFileNum);
4508
4509	/ Open file /
4510	fp = AllocateFile(tmpfilename, PG_BINARY_W);
4511	if (!fp)
4512	{
4513	/*
4514	* As in OpenTemporaryFileInTablespace, try to make the temp-file
4515	* directory, ignoring errors.
4516	*/
4517	(void) MakePGDirectory(PG_TEMP_FILES_DIR);
4518
4519	fp = AllocateFile(tmpfilename, PG_BINARY_W);
4520	if (!fp)
4521	{
4522	ereport(LOG,
4523	(errcode_for_file_access(),
4524	errmsg("could not create file \"%s\": %m",
4525	tmpfilename)));
4526	return -`1`;
4527	}
4528	}
4529
4530	if (fwrite(&param, sizeof(param), `1`, fp) != `1`)
4531	{
4532	ereport(LOG,
4533	(errcode_for_file_access(),
4534	errmsg("could not write to file \"%s\": %m", tmpfilename)));
4535	FreeFile(fp);
4536	return -`1`;
4537	}
4538
4539	/ Release file /
4540	if (FreeFile(fp))
4541	{
4542	ereport(LOG,
4543	(errcode_for_file_access(),
4544	errmsg("could not write to file \"%s\": %m", tmpfilename)));
4545	return -`1`;
4546	}
4547
4548	/ Make sure caller set up argv properly /
4549	Assert(argc >= `3`);
4550	Assert(argv[argc] == NULL);
4551	Assert(strncmp(argv[`1`], "--fork", `6`) == `0`);
4552	Assert(argv[`2`] == NULL);
4553
4554	/ Insert temp file name after --fork argument /
4555	argv[`2`] = tmpfilename;
4556
4557	/ Fire off execv in child /
4558	if ((pid = fork_process()) == `0`)
4559	{
4560	if (execv(postgres_exec_path, argv) < `0`)
4561	{
4562	ereport(LOG,
4563	(errmsg("could not execute server process \"%s\": %m",
4564	postgres_exec_path)));
4565	/ We're already in the child process here, can't return /
4566	exit(`1`);
4567	}
4568	}
4569
4570	return pid; / Parent returns pid, or -1 on fork failure /
4571	}
4572	#else /* WIN32 */
4573
4574	/*
4575	* internal_forkexec win32 implementation
4576	*
4577	* - starts backend using CreateProcess(), in suspended state
4578	* - writes out backend variables to the parameter file
4579	* - during this, duplicates handles and sockets required for
4580	* inheritance into the new process
4581	* - resumes execution of the new process once the backend parameter
4582	* file is complete.
4583	*/
4584	static pid_t
4585	internal_forkexec(int argc, char argv[], Port port)
4586	{
4587	int retry_count = `0`;
4588	STARTUPINFO si;
4589	PROCESS_INFORMATION pi;
4590	int i;
4591	int j;
4592	char cmdLine[MAXPGPATH * `2`];
4593	HANDLE paramHandle;
4594	BackendParameters *param;
4595	SECURITY_ATTRIBUTES sa;
4596	char paramHandleStr[`32`];
4597	win32_deadchild_waitinfo *childinfo;
4598
4599	/ Make sure caller set up argv properly /
4600	Assert(argc >= `3`);
4601	Assert(argv[argc] == NULL);
4602	Assert(strncmp(argv[`1`], "--fork", `6`) == `0`);
4603	Assert(argv[`2`] == NULL);
4604
4605	/ Resume here if we need to retry /
4606	retry:
4607
4608	/ Set up shared memory for parameter passing /
4609	ZeroMemory(&sa, sizeof(sa));
4610	sa.nLength = sizeof(sa);
4611	sa.bInheritHandle = TRUE;
4612	paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE,
4613	&sa,
4614	PAGE_READWRITE,
4615	`0`,
4616	sizeof(BackendParameters),
4617	NULL);
4618	if (paramHandle == INVALID_HANDLE_VALUE)
4619	{
4620	elog(LOG, "could not create backend parameter file mapping: error code %lu",
4621	GetLastError());
4622	return -`1`;
4623	}
4624
4625	param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, `0`, `0`, sizeof(BackendParameters));
4626	if (!param)
4627	{
4628	elog(LOG, "could not map backend parameter memory: error code %lu",
4629	GetLastError());
4630	CloseHandle(paramHandle);
4631	return -`1`;
4632	}
4633
4634	/ Insert temp file name after --fork argument /
4635	#ifdef _WIN64
4636	sprintf(paramHandleStr, "%llu", (LONG_PTR) paramHandle);
4637	#else
4638	sprintf(paramHandleStr, "%lu", (DWORD) paramHandle);
4639	#endif
4640	argv[`2`] = paramHandleStr;
4641
4642	/ Format the cmd line /
4643	cmdLine[sizeof(cmdLine) - `1`] = `'\0'`;
4644	cmdLine[sizeof(cmdLine) - `2`] = `'\0'`;
4645	snprintf(cmdLine, sizeof(cmdLine) - `1`, "\"%s\"", postgres_exec_path);
4646	i = `0`;
4647	while (argv[++i] != NULL)
4648	{
4649	j = strlen(cmdLine);
4650	snprintf(cmdLine + j, sizeof(cmdLine) - `1` - j, " \"%s\"", argv[i]);
4651	}
4652	if (cmdLine[sizeof(cmdLine) - `2`] != `'\0'`)
4653	{
4654	elog(LOG, "subprocess command line too long");
4655	return -`1`;
4656	}
4657
4658	memset(&pi, `0`, sizeof(pi));
4659	memset(&si, `0`, sizeof(si));
4660	si.cb = sizeof(si);
4661
4662	/*
4663	* Create the subprocess in a suspended state. This will be resumed later,
4664	* once we have written out the parameter file.
4665	*/
4666	if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED,
4667	NULL, NULL, &si, &pi))
4668	{
4669	elog(LOG, "CreateProcess call failed: %m (error code %lu)",
4670	GetLastError());
4671	return -`1`;
4672	}
4673
4674	if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId))
4675	{
4676	/*
4677	* log made by save_backend_variables, but we have to clean up the
4678	* mess with the half-started process
4679	*/
4680	if (!TerminateProcess(pi.hProcess, `255`))
4681	ereport(LOG,
4682	(errmsg_internal("could not terminate unstarted process: error code %lu",
4683	GetLastError())));
4684	CloseHandle(pi.hProcess);
4685	CloseHandle(pi.hThread);
4686	return -`1`; / log made by save_backend_variables /
4687	}
4688
4689	/ Drop the parameter shared memory that is now inherited to the backend /
4690	if (!UnmapViewOfFile(param))
4691	elog(LOG, "could not unmap view of backend parameter file: error code %lu",
4692	GetLastError());
4693	if (!CloseHandle(paramHandle))
4694	elog(LOG, "could not close handle to backend parameter file: error code %lu",
4695	GetLastError());
4696
4697	/*
4698	* Reserve the memory region used by our main shared memory segment before
4699	* we resume the child process. Normally this should succeed, but if ASLR
4700	* is active then it might sometimes fail due to the stack or heap having
4701	* gotten mapped into that range. In that case, just terminate the
4702	* process and retry.
4703	*/
4704	if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess))
4705	{
4706	/ pgwin32_ReserveSharedMemoryRegion already made a log entry /
4707	if (!TerminateProcess(pi.hProcess, `255`))
4708	ereport(LOG,
4709	(errmsg_internal("could not terminate process that failed to reserve memory: error code %lu",
4710	GetLastError())));
4711	CloseHandle(pi.hProcess);
4712	CloseHandle(pi.hThread);
4713	if (++retry_count < `100`)
4714	goto retry;
4715	ereport(LOG,
4716	(errmsg("giving up after too many tries to reserve shared memory"),
4717	errhint("This might be caused by ASLR or antivirus software.")));
4718	return -`1`;
4719	}
4720
4721	/*
4722	* Now that the backend variables are written out, we start the child
4723	* thread so it can start initializing while we set up the rest of the
4724	* parent state.
4725	*/
4726	if (ResumeThread(pi.hThread) == -`1`)
4727	{
4728	if (!TerminateProcess(pi.hProcess, `255`))
4729	{
4730	ereport(LOG,
4731	(errmsg_internal("could not terminate unstartable process: error code %lu",
4732	GetLastError())));
4733	CloseHandle(pi.hProcess);
4734	CloseHandle(pi.hThread);
4735	return -`1`;
4736	}
4737	CloseHandle(pi.hProcess);
4738	CloseHandle(pi.hThread);
4739	ereport(LOG,
4740	(errmsg_internal("could not resume thread of unstarted process: error code %lu",
4741	GetLastError())));
4742	return -`1`;
4743	}
4744
4745	/*
4746	* Queue a waiter to signal when this child dies. The wait will be handled
4747	* automatically by an operating system thread pool.
4748	*
4749	* Note: use malloc instead of palloc, since it needs to be thread-safe.
4750	* Struct will be free():d from the callback function that runs on a
4751	* different thread.
4752	*/
4753	childinfo = malloc(sizeof(win32_deadchild_waitinfo));
4754	if (!childinfo)
4755	ereport(FATAL,
4756	(errcode(ERRCODE_OUT_OF_MEMORY),
4757	errmsg("out of memory")));
4758
4759	childinfo->procHandle = pi.hProcess;
4760	childinfo->procId = pi.dwProcessId;
4761
4762	if (!RegisterWaitForSingleObject(&childinfo->waitHandle,
4763	pi.hProcess,
4764	pgwin32_deadchild_callback,
4765	childinfo,
4766	INFINITE,
4767	WT_EXECUTEONLYONCE \| WT_EXECUTEINWAITTHREAD))
4768	ereport(FATAL,
4769	(errmsg_internal("could not register process for wait: error code %lu",
4770	GetLastError())));
4771
4772	/ Don't close pi.hProcess here - the wait thread needs access to it /
4773
4774	CloseHandle(pi.hThread);
4775
4776	return pi.dwProcessId;
4777	}
4778	#endif /* WIN32 */
4779
4780
4781	/*
4782	* SubPostmasterMain -- Get the fork/exec'd process into a state equivalent
4783	* to what it would be if we'd simply forked on Unix, and then
4784	* dispatch to the appropriate place.
4785	*
4786	* The first two command line arguments are expected to be "--forkFOO"
4787	* (where FOO indicates which postmaster child we are to become), and
4788	* the name of a variables file that we can read to load data that would
4789	* have been inherited by fork() on Unix. Remaining arguments go to the
4790	* subprocess FooMain() routine.
4791	*/
4792	void
4793	SubPostmasterMain(int argc, char *argv[])
4794	{
4795	Port port;
4796
4797	/ In EXEC_BACKEND case we will not have inherited these settings /
4798	IsPostmasterEnvironment = true;
4799	whereToSendOutput = DestNone;
4800
4801	/ Setup as postmaster child /
4802	InitPostmasterChild();
4803
4804	/ Setup essential subsystems (to ensure elog() behaves sanely) /
4805	InitializeGUCOptions();
4806
4807	/ Check we got appropriate args /
4808	if (argc < `3`)
4809	elog(FATAL, "invalid subpostmaster invocation");
4810
4811	/ Read in the variables file /
4812	memset(&port, `0`, sizeof(Port));
4813	read_backend_variables(argv[`2`], &port);
4814
4815	/ Close the postmaster's sockets (as soon as we know them) /
4816	ClosePostmasterPorts(strcmp(argv[`1`], "--forklog") == `0`);
4817
4818	/*
4819	* Set reference point for stack-depth checking
4820	*/
4821	set_stack_base();
4822
4823	/*
4824	* Set up memory area for GSS information. Mirrors the code in ConnCreate
4825	* for the non-exec case.
4826	*/
4827	#if defined(ENABLE_GSS) \|\| defined(ENABLE_SSPI)
4828	port.gss = (pg_gssinfo ) calloc(`1`, sizeof*(pg_gssinfo));
4829	if (!port.gss)
4830	ereport(FATAL,
4831	(errcode(ERRCODE_OUT_OF_MEMORY),
4832	errmsg("out of memory")));
4833	#endif
4834
4835	/*
4836	* If appropriate, physically re-attach to shared memory segment. We want
4837	* to do this before going any further to ensure that we can attach at the
4838	* same address the postmaster used. On the other hand, if we choose not
4839	* to re-attach, we may have other cleanup to do.
4840	*
4841	* If testing EXEC_BACKEND on Linux, you should run this as root before
4842	* starting the postmaster:
4843	*
4844	* echo 0 >/proc/sys/kernel/randomize_va_space
4845	*
4846	* This prevents using randomized stack and code addresses that cause the
4847	* child process's memory map to be different from the parent's, making it
4848	* sometimes impossible to attach to shared memory at the desired address.
4849	* Return the setting to its old value (usually '1' or '2') when finished.
4850	*/
4851	if (strcmp(argv[`1`], "--forkbackend") == `0` \|\|
4852	strcmp(argv[`1`], "--forkavlauncher") == `0` \|\|
4853	strcmp(argv[`1`], "--forkavworker") == `0` \|\|
4854	strcmp(argv[`1`], "--forkboot") == `0` \|\|
4855	strncmp(argv[`1`], "--forkbgworker=", `15`) == `0`)
4856	PGSharedMemoryReAttach();
4857	else
4858	PGSharedMemoryNoReAttach();
4859
4860	/ autovacuum needs this set before calling InitProcess /
4861	if (strcmp(argv[`1`], "--forkavlauncher") == `0`)
4862	AutovacuumLauncherIAm();
4863	if (strcmp(argv[`1`], "--forkavworker") == `0`)
4864	AutovacuumWorkerIAm();
4865
4866	/*
4867	* Start our win32 signal implementation. This has to be done after we
4868	* read the backend variables, because we need to pick up the signal pipe
4869	* from the parent process.
4870	*/
4871	#ifdef WIN32
4872	pgwin32_signal_initialize();
4873	#endif
4874
4875	/ In EXEC_BACKEND case we will not have inherited these settings /
4876	pqinitmask();
4877	PG_SETMASK(&BlockSig);
4878
4879	/ Read in remaining GUC variables /
4880	read_nondefault_variables();
4881
4882	/*
4883	* Check that the data directory looks valid, which will also check the
4884	* privileges on the data directory and update our umask and file/group
4885	* variables for creating files later. Note: this should really be done
4886	* before we create any files or directories.
4887	*/
4888	checkDataDir();
4889
4890	/*
4891	* (re-)read control file, as it contains config. The postmaster will
4892	* already have read this, but this process doesn't know about that.
4893	*/
4894	LocalProcessControlFile(false);
4895
4896	/*
4897	* Reload any libraries that were preloaded by the postmaster. Since we
4898	* exec'd this process, those libraries didn't come along with us; but we
4899	* should load them into all child processes to be consistent with the
4900	* non-EXEC_BACKEND behavior.
4901	*/
4902	process_shared_preload_libraries();
4903
4904	/ Run backend or appropriate child /
4905	if (strcmp(argv[`1`], "--forkbackend") == `0`)
4906	{
4907	Assert(argc == `3`); / shouldn't be any more args /
4908
4909	/*
4910	* Need to reinitialize the SSL library in the backend, since the
4911	* context structures contain function pointers and cannot be passed
4912	* through the parameter file.
4913	*
4914	* If for some reason reload fails (maybe the user installed broken
4915	* key files), soldier on without SSL; that's better than all
4916	* connections becoming impossible.
4917	*
4918	* XXX should we do this in all child processes? For the moment it's
4919	* enough to do it in backend children.
4920	*/
4921	#ifdef USE_SSL
4922	if (EnableSSL)
4923	{
4924	if (secure_initialize(false) == `0`)
4925	LoadedSSL = true;
4926	else
4927	ereport(LOG,
4928	(errmsg("SSL configuration could not be loaded in child process")));
4929	}
4930	#endif
4931
4932	/*
4933	* Perform additional initialization and collect startup packet.
4934	*
4935	* We want to do this before InitProcess() for a couple of reasons: 1.
4936	* so that we aren't eating up a PGPROC slot while waiting on the
4937	* client. 2. so that if InitProcess() fails due to being out of
4938	* PGPROC slots, we have already initialized libpq and are able to
4939	* report the error to the client.
4940	*/
4941	BackendInitialize(&port);
4942
4943	/ Restore basic shared memory pointers /
4944	InitShmemAccess(UsedShmemSegAddr);
4945
4946	/ Need a PGPROC to run CreateSharedMemoryAndSemaphores /
4947	InitProcess();
4948
4949	/ Attach process to shared data structures /
4950	CreateSharedMemoryAndSemaphores(`0`);
4951
4952	/ And run the backend /
4953	BackendRun(&port); / does not return /
4954	}
4955	if (strcmp(argv[`1`], "--forkboot") == `0`)
4956	{
4957	/ Restore basic shared memory pointers /
4958	InitShmemAccess(UsedShmemSegAddr);
4959
4960	/ Need a PGPROC to run CreateSharedMemoryAndSemaphores /
4961	InitAuxiliaryProcess();
4962
4963	/ Attach process to shared data structures /
4964	CreateSharedMemoryAndSemaphores(`0`);
4965
4966	AuxiliaryProcessMain(argc - `2`, argv + `2`); / does not return /
4967	}
4968	if (strcmp(argv[`1`], "--forkavlauncher") == `0`)
4969	{
4970	/ Restore basic shared memory pointers /
4971	InitShmemAccess(UsedShmemSegAddr);
4972
4973	/ Need a PGPROC to run CreateSharedMemoryAndSemaphores /
4974	InitProcess();
4975
4976	/ Attach process to shared data structures /
4977	CreateSharedMemoryAndSemaphores(`0`);
4978
4979	AutoVacLauncherMain(argc - `2`, argv + `2`); / does not return /
4980	}
4981	if (strcmp(argv[`1`], "--forkavworker") == `0`)
4982	{
4983	/ Restore basic shared memory pointers /
4984	InitShmemAccess(UsedShmemSegAddr);
4985
4986	/ Need a PGPROC to run CreateSharedMemoryAndSemaphores /
4987	InitProcess();
4988
4989	/ Attach process to shared data structures /
4990	CreateSharedMemoryAndSemaphores(`0`);
4991
4992	AutoVacWorkerMain(argc - `2`, argv + `2`); / does not return /
4993	}
4994	if (strncmp(argv[`1`], "--forkbgworker=", `15`) == `0`)
4995	{
4996	int shmem_slot;
4997
4998	/ do this as early as possible; in particular, before InitProcess() /
4999	IsBackgroundWorker = true;
5000
5001	/ Restore basic shared memory pointers /
5002	InitShmemAccess(UsedShmemSegAddr);
5003
5004	/ Need a PGPROC to run CreateSharedMemoryAndSemaphores /
5005	InitProcess();
5006
5007	/ Attach process to shared data structures /
5008	CreateSharedMemoryAndSemaphores(`0`);
5009
5010	/ Fetch MyBgworkerEntry from shared memory /
5011	shmem_slot = atoi(argv[`1`] + `15`);
5012	MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot);
5013
5014	StartBackgroundWorker();
5015	}
5016	if (strcmp(argv[`1`], "--forkarch") == `0`)
5017	{
5018	/ Do not want to attach to shared memory /
5019
5020	PgArchiverMain(argc, argv); / does not return /
5021	}
5022	if (strcmp(argv[`1`], "--forkcol") == `0`)
5023	{
5024	/ Do not want to attach to shared memory /
5025
5026	PgstatCollectorMain(argc, argv); / does not return /
5027	}
5028	if (strcmp(argv[`1`], "--forklog") == `0`)
5029	{
5030	/ Do not want to attach to shared memory /
5031
5032	SysLoggerMain(argc, argv); / does not return /
5033	}
5034
5035	abort(); / shouldn't get here /
5036	}
5037	#endif /* EXEC_BACKEND */
5038
5039
5040	/*
5041	* ExitPostmaster -- cleanup
5042	*
5043	* Do NOT call exit() directly --- always go through here!
5044	*/
5045	static void
5046	ExitPostmaster(int status)
5047	{
5048	#ifdef HAVE_PTHREAD_IS_THREADED_NP
5049
5050	/*
5051	* There is no known cause for a postmaster to become multithreaded after
5052	* startup. Recheck to account for the possibility of unknown causes.
5053	* This message uses LOG level, because an unclean shutdown at this point
5054	* would usually not look much different from a clean shutdown.
5055	*/
5056	if (pthread_is_threaded_np() != `0`)
5057	ereport(LOG,
5058	(errcode(ERRCODE_INTERNAL_ERROR),
5059	errmsg_internal("postmaster became multithreaded"),
5060	errdetail("Please report this to <pgsql-bugs@lists.postgresql.org>.")));
5061	#endif
5062
5063	/ should cleanup shared memory and kill all backends /
5064
5065	/*
5066	* Not sure of the semantics here. When the Postmaster dies, should the
5067	* backends all be killed? probably not.
5068	*
5069	* MUST -- vadim 05-10-1999
5070	*/
5071
5072	proc_exit(status);
5073	}
5074
5075	/*
5076	* sigusr1_handler - handle signal conditions from child processes
5077	*/
5078	static void
5079	sigusr1_handler(SIGNAL_ARGS)
5080	{
5081	int save_errno = errno;
5082
5083	PG_SETMASK(&BlockSig);
5084
5085	/ Process background worker state change. /
5086	if (CheckPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE))
5087	{
5088	BackgroundWorkerStateChange();
5089	StartWorkerNeeded = true;
5090	}
5091
5092	/*
5093	* RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in
5094	* unexpected states. If the startup process quickly starts up, completes
5095	* recovery, exits, we might process the death of the startup process
5096	* first. We don't want to go back to recovery in that case.
5097	*/
5098	if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED) &&
5099	pmState == PM_STARTUP && Shutdown == NoShutdown)
5100	{
5101	/ WAL redo has started. We're out of reinitialization. /
5102	FatalError = false;
5103	Assert(AbortStartTime == `0`);
5104
5105	/*
5106	* Crank up the background tasks. It doesn't matter if this fails,
5107	* we'll just try again later.
5108	*/
5109	Assert(CheckpointerPID == `0`);
5110	CheckpointerPID = StartCheckpointer();
5111	Assert(BgWriterPID == `0`);
5112	BgWriterPID = StartBackgroundWriter();
5113
5114	/*
5115	* Start the archiver if we're responsible for (re-)archiving received
5116	* files.
5117	*/
5118	Assert(PgArchPID == `0`);
5119	if (XLogArchivingAlways())
5120	PgArchPID = pgarch_start();
5121
5122	/*
5123	* If we aren't planning to enter hot standby mode later, treat
5124	* RECOVERY_STARTED as meaning we're out of startup, and report status
5125	* accordingly.
5126	*/
5127	if (!EnableHotStandby)
5128	{
5129	AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STANDBY);
5130	#ifdef USE_SYSTEMD
5131	sd_notify(`0`, "READY=1");
5132	#endif
5133	}
5134
5135	pmState = PM_RECOVERY;
5136	}
5137	if (CheckPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY) &&
5138	pmState == PM_RECOVERY && Shutdown == NoShutdown)
5139	{
5140	/*
5141	* Likewise, start other special children as needed.
5142	*/
5143	Assert(PgStatPID == `0`);
5144	PgStatPID = pgstat_start();
5145
5146	ereport(LOG,
5147	(errmsg("database system is ready to accept read only connections")));
5148
5149	/ Report status /
5150	AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY);
5151	#ifdef USE_SYSTEMD
5152	sd_notify(`0`, "READY=1");
5153	#endif
5154
5155	pmState = PM_HOT_STANDBY;
5156	/ Some workers may be scheduled to start now /
5157	StartWorkerNeeded = true;
5158	}
5159
5160	if (StartWorkerNeeded \|\| HaveCrashedWorker)
5161	maybe_start_bgworkers();
5162
5163	if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER) &&
5164	PgArchPID != `0`)
5165	{
5166	/*
5167	* Send SIGUSR1 to archiver process, to wake it up and begin archiving
5168	* next WAL file.
5169	*/
5170	signal_child(PgArchPID, SIGUSR1);
5171	}
5172
5173	/ Tell syslogger to rotate logfile if requested /
5174	if (SysLoggerPID != `0`)
5175	{
5176	if (CheckLogrotateSignal())
5177	{
5178	signal_child(SysLoggerPID, SIGUSR1);
5179	RemoveLogrotateSignalFiles();
5180	}
5181	else if (CheckPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE))
5182	{
5183	signal_child(SysLoggerPID, SIGUSR1);
5184	}
5185	}
5186
5187	if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER) &&
5188	Shutdown == NoShutdown)
5189	{
5190	/*
5191	* Start one iteration of the autovacuum daemon, even if autovacuuming
5192	* is nominally not enabled. This is so we can have an active defense
5193	* against transaction ID wraparound. We set a flag for the main loop
5194	* to do it rather than trying to do it here --- this is because the
5195	* autovac process itself may send the signal, and we want to handle
5196	* that by launching another iteration as soon as the current one
5197	* completes.
5198	*/
5199	start_autovac_launcher = true;
5200	}
5201
5202	if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER) &&
5203	Shutdown == NoShutdown)
5204	{
5205	/ The autovacuum launcher wants us to start a worker process. /
5206	StartAutovacuumWorker();
5207	}
5208
5209	if (CheckPostmasterSignal(PMSIGNAL_START_WALRECEIVER))
5210	{
5211	/ Startup Process wants us to start the walreceiver process. /
5212	/ Start immediately if possible, else remember request for later. /
5213	WalReceiverRequested = true;
5214	MaybeStartWalReceiver();
5215	}
5216
5217	/*
5218	* Try to advance postmaster's state machine, if a child requests it.
5219	*
5220	* Be careful about the order of this action relative to sigusr1_handler's
5221	* other actions. Generally, this should be after other actions, in case
5222	* they have effects PostmasterStateMachine would need to know about.
5223	* However, we should do it before the CheckPromoteSignal step, which
5224	* cannot have any (immediate) effect on the state machine, but does
5225	* depend on what state we're in now.
5226	*/
5227	if (CheckPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE))
5228	{
5229	PostmasterStateMachine();
5230	}
5231
5232	if (StartupPID != `0` &&
5233	(pmState == PM_STARTUP \|\| pmState == PM_RECOVERY \|\|
5234	pmState == PM_HOT_STANDBY \|\| pmState == PM_WAIT_READONLY) &&
5235	CheckPromoteSignal())
5236	{
5237	/ Tell startup process to finish recovery /
5238	signal_child(StartupPID, SIGUSR2);
5239	}
5240
5241	PG_SETMASK(&UnBlockSig);
5242
5243	errno = save_errno;
5244	}
5245
5246	/*
5247	* SIGTERM or SIGQUIT while processing startup packet.
5248	* Clean up and exit(1).
5249	*
5250	* XXX: possible future improvement: try to send a message indicating
5251	* why we are disconnecting. Problem is to be sure we don't block while
5252	* doing so, nor mess up SSL initialization. In practice, if the client
5253	* has wedged here, it probably couldn't do anything with the message anyway.
5254	*/
5255	static void
5256	startup_die(SIGNAL_ARGS)
5257	{
5258	proc_exit(`1`);
5259	}
5260
5261	/*
5262	* Dummy signal handler
5263	*
5264	* We use this for signals that we don't actually use in the postmaster,
5265	* but we do use in backends. If we were to SIG_IGN such signals in the
5266	* postmaster, then a newly started backend might drop a signal that arrives
5267	* before it's able to reconfigure its signal processing. (See notes in
5268	* tcop/postgres.c.)
5269	*/
5270	static void
5271	dummy_handler(SIGNAL_ARGS)
5272	{
5273	}
5274
5275	/*
5276	* Timeout while processing startup packet.
5277	* As for startup_die(), we clean up and exit(1).
5278	*/
5279	static void
5280	StartupPacketTimeoutHandler(void)
5281	{
5282	proc_exit(`1`);
5283	}
5284
5285
5286	/*
5287	* Generate a random cancel key.
5288	*/
5289	static bool
5290	RandomCancelKey(int32 *cancel_key)
5291	{
5292	return pg_strong_random(cancel_key, sizeof(int32));
5293	}
5294
5295	/*
5296	* Count up number of child processes of specified types (dead_end children
5297	* are always excluded).
5298	*/
5299	static int
5300	CountChildren(int target)
5301	{
5302	dlist_iter iter;
5303	int cnt = `0`;
5304
5305	dlist_foreach(iter, &BackendList)
5306	{
5307	Backend *bp = dlist_container(Backend, elem, iter.cur);
5308
5309	if (bp->dead_end)
5310	continue;
5311
5312	/*
5313	* Since target == BACKEND_TYPE_ALL is the most common case, we test
5314	* it first and avoid touching shared memory for every child.
5315	*/
5316	if (target != BACKEND_TYPE_ALL)
5317	{
5318	/*
5319	* Assign bkend_type for any recently announced WAL Sender
5320	* processes.
5321	*/
5322	if (bp->bkend_type == BACKEND_TYPE_NORMAL &&
5323	IsPostmasterChildWalSender(bp->child_slot))
5324	bp->bkend_type = BACKEND_TYPE_WALSND;
5325
5326	if (!(target & bp->bkend_type))
5327	continue;
5328	}
5329
5330	cnt++;
5331	}
5332	return cnt;
5333	}
5334
5335
5336	/*
5337	* StartChildProcess -- start an auxiliary process for the postmaster
5338	*
5339	* "type" determines what kind of child will be started. All child types
5340	* initially go to AuxiliaryProcessMain, which will handle common setup.
5341	*
5342	* Return value of StartChildProcess is subprocess' PID, or 0 if failed
5343	* to start subprocess.
5344	*/
5345	static pid_t
5346	StartChildProcess(AuxProcType type)
5347	{
5348	pid_t pid;
5349	char *av[`10`];
5350	int ac = `0`;
5351	char typebuf[`32`];
5352
5353	/*
5354	* Set up command-line arguments for subprocess
5355	*/
5356	av[ac++] = "postgres";
5357
5358	#ifdef EXEC_BACKEND
5359	av[ac++] = "--forkboot";
5360	av[ac++] = NULL; / filled in by postmaster_forkexec /
5361	#endif
5362
5363	snprintf(typebuf, sizeof(typebuf), "-x%d", type);
5364	av[ac++] = typebuf;
5365
5366	av[ac] = NULL;
5367	Assert(ac < lengthof(av));
5368
5369	#ifdef EXEC_BACKEND
5370	pid = postmaster_forkexec(ac, av);
5371	#else /* !EXEC_BACKEND */
5372	pid = fork_process();
5373
5374	if (pid == `0`) / child /
5375	{
5376	InitPostmasterChild();
5377
5378	/ Close the postmaster's sockets /
5379	ClosePostmasterPorts(false);
5380
5381	/ Release postmaster's working memory context /
5382	MemoryContextSwitchTo(TopMemoryContext);
5383	MemoryContextDelete(PostmasterContext);
5384	PostmasterContext = NULL;
5385
5386	AuxiliaryProcessMain(ac, av);
5387	ExitPostmaster(`0`);
5388	}
5389	#endif /* EXEC_BACKEND */
5390
5391	if (pid < `0`)
5392	{
5393	/ in parent, fork failed /
5394	int save_errno = errno;
5395
5396	errno = save_errno;
5397	switch (type)
5398	{
5399	case StartupProcess:
5400	ereport(LOG,
5401	(errmsg("could not fork startup process: %m")));
5402	break;
5403	case BgWriterProcess:
5404	ereport(LOG,
5405	(errmsg("could not fork background writer process: %m")));
5406	break;
5407	case CheckpointerProcess:
5408	ereport(LOG,
5409	(errmsg("could not fork checkpointer process: %m")));
5410	break;
5411	case WalWriterProcess:
5412	ereport(LOG,
5413	(errmsg("could not fork WAL writer process: %m")));
5414	break;
5415	case WalReceiverProcess:
5416	ereport(LOG,
5417	(errmsg("could not fork WAL receiver process: %m")));
5418	break;
5419	default:
5420	ereport(LOG,
5421	(errmsg("could not fork process: %m")));
5422	break;
5423	}
5424
5425	/*
5426	* fork failure is fatal during startup, but there's no need to choke
5427	* immediately if starting other child types fails.
5428	*/
5429	if (type == StartupProcess)
5430	ExitPostmaster(`1`);
5431	return `0`;
5432	}
5433
5434	/*
5435	* in parent, successful fork
5436	*/
5437	return pid;
5438	}
5439
5440	/*
5441	* StartAutovacuumWorker
5442	* Start an autovac worker process.
5443	*
5444	* This function is here because it enters the resulting PID into the
5445	* postmaster's private backends list.
5446	*
5447	* NB -- this code very roughly matches BackendStartup.
5448	*/
5449	static void
5450	StartAutovacuumWorker(void)
5451	{
5452	Backend *bn;
5453
5454	/*
5455	* If not in condition to run a process, don't try, but handle it like a
5456	* fork failure. This does not normally happen, since the signal is only
5457	* supposed to be sent by autovacuum launcher when it's OK to do it, but
5458	* we have to check to avoid race-condition problems during DB state
5459	* changes.
5460	*/
5461	if (canAcceptConnections() == CAC_OK)
5462	{
5463	/*
5464	* Compute the cancel key that will be assigned to this session. We
5465	* probably don't need cancel keys for autovac workers, but we'd
5466	* better have something random in the field to prevent unfriendly
5467	* people from sending cancels to them.
5468	*/
5469	if (!RandomCancelKey(&MyCancelKey))
5470	{
5471	ereport(LOG,
5472	(errcode(ERRCODE_INTERNAL_ERROR),
5473	errmsg("could not generate random cancel key")));
5474	return;
5475	}
5476
5477	bn = (Backend ) malloc(sizeof*(Backend));
5478	if (bn)
5479	{
5480	bn->cancel_key = MyCancelKey;
5481
5482	/ Autovac workers are not dead_end and need a child slot /
5483	bn->dead_end = false;
5484	bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
5485	bn->bgworker_notify = false;
5486
5487	bn->pid = StartAutoVacWorker();
5488	if (bn->pid > `0`)
5489	{
5490	bn->bkend_type = BACKEND_TYPE_AUTOVAC;
5491	dlist_push_head(&BackendList, &bn->elem);
5492	#ifdef EXEC_BACKEND
5493	ShmemBackendArrayAdd(bn);
5494	#endif
5495	/ all OK /
5496	return;
5497	}
5498
5499	/*
5500	* fork failed, fall through to report -- actual error message was
5501	* logged by StartAutoVacWorker
5502	*/
5503	(void) ReleasePostmasterChildSlot(bn->child_slot);
5504	free(bn);
5505	}
5506	else
5507	ereport(LOG,
5508	(errcode(ERRCODE_OUT_OF_MEMORY),
5509	errmsg("out of memory")));
5510	}
5511
5512	/*
5513	* Report the failure to the launcher, if it's running. (If it's not, we
5514	* might not even be connected to shared memory, so don't try to call
5515	* AutoVacWorkerFailed.) Note that we also need to signal it so that it
5516	* responds to the condition, but we don't do that here, instead waiting
5517	* for ServerLoop to do it. This way we avoid a ping-pong signalling in
5518	* quick succession between the autovac launcher and postmaster in case
5519	* things get ugly.
5520	*/
5521	if (AutoVacPID != `0`)
5522	{
5523	AutoVacWorkerFailed();
5524	avlauncher_needs_signal = true;
5525	}
5526	}
5527
5528	/*
5529	* MaybeStartWalReceiver
5530	* Start the WAL receiver process, if not running and our state allows.
5531	*
5532	* Note: if WalReceiverPID is already nonzero, it might seem that we should
5533	* clear WalReceiverRequested. However, there's a race condition if the
5534	* walreceiver terminates and the startup process immediately requests a new
5535	* one: it's quite possible to get the signal for the request before reaping
5536	* the dead walreceiver process. Better to risk launching an extra
5537	* walreceiver than to miss launching one we need. (The walreceiver code
5538	* has logic to recognize that it should go away if not needed.)
5539	*/
5540	static void
5541	MaybeStartWalReceiver(void)
5542	{
5543	if (WalReceiverPID == `0` &&
5544	(pmState == PM_STARTUP \|\| pmState == PM_RECOVERY \|\|
5545	pmState == PM_HOT_STANDBY \|\| pmState == PM_WAIT_READONLY) &&
5546	Shutdown == NoShutdown)
5547	{
5548	WalReceiverPID = StartWalReceiver();
5549	if (WalReceiverPID != `0`)
5550	WalReceiverRequested = false;
5551	/ else leave the flag set, so we'll try again later /
5552	}
5553	}
5554
5555
5556	/*
5557	* Create the opts file
5558	*/
5559	static bool
5560	CreateOptsFile(int argc, char argv[], char* *fullprogname)
5561	{
5562	FILE *fp;
5563	int i;
5564
5565	#define OPTS_FILE "postmaster.opts"
5566
5567	if ((fp = fopen(OPTS_FILE, "w")) == NULL)
5568	{
5569	elog(LOG, "could not create file \"%s\": %m", OPTS_FILE);
5570	return false;
5571	}
5572
5573	fprintf(fp, "%s", fullprogname);
5574	for (i = `1`; i < argc; i++)
5575	fprintf(fp, " \"%s\"", argv[i]);
5576	fputs("\n", fp);
5577
5578	if (fclose(fp))
5579	{
5580	elog(LOG, "could not write file \"%s\": %m", OPTS_FILE);
5581	return false;
5582	}
5583
5584	return true;
5585	}
5586
5587
5588	/*
5589	* MaxLivePostmasterChildren
5590	*
5591	* This reports the number of entries needed in per-child-process arrays
5592	* (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray).
5593	* These arrays include regular backends, autovac workers, walsenders
5594	* and background workers, but not special children nor dead_end children.
5595	* This allows the arrays to have a fixed maximum size, to wit the same
5596	* too-many-children limit enforced by canAcceptConnections(). The exact value
5597	* isn't too critical as long as it's more than MaxBackends.
5598	*/
5599	int
5600	MaxLivePostmasterChildren(void)
5601	{
5602	return `2` * (MaxConnections + autovacuum_max_workers + `1` +
5603	max_wal_senders + max_worker_processes);
5604	}
5605
5606	/*
5607	* Connect background worker to a database.
5608	*/
5609	void
5610	BackgroundWorkerInitializeConnection(const char dbname, const* char *username, uint32 flags)
5611	{
5612	BackgroundWorker *worker = MyBgworkerEntry;
5613
5614	/ XXX is this the right errcode? /
5615	if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
5616	ereport(FATAL,
5617	(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5618	errmsg("database connection requirement not indicated during registration")));
5619
5620	InitPostgres(dbname, InvalidOid, username, InvalidOid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != `0`);
5621
5622	/ it had better not gotten out of "init" mode yet /
5623	if (!IsInitProcessingMode())
5624	ereport(ERROR,
5625	(errmsg("invalid processing mode in background worker")));
5626	SetProcessingMode(NormalProcessing);
5627	}
5628
5629	/*
5630	* Connect background worker to a database using OIDs.
5631	*/
5632	void
5633	BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags)
5634	{
5635	BackgroundWorker *worker = MyBgworkerEntry;
5636
5637	/ XXX is this the right errcode? /
5638	if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION))
5639	ereport(FATAL,
5640	(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
5641	errmsg("database connection requirement not indicated during registration")));
5642
5643	InitPostgres(NULL, dboid, NULL, useroid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != `0`);
5644
5645	/ it had better not gotten out of "init" mode yet /
5646	if (!IsInitProcessingMode())
5647	ereport(ERROR,
5648	(errmsg("invalid processing mode in background worker")));
5649	SetProcessingMode(NormalProcessing);
5650	}
5651
5652	/*
5653	* Block/unblock signals in a background worker
5654	*/
5655	void
5656	BackgroundWorkerBlockSignals(void)
5657	{
5658	PG_SETMASK(&BlockSig);
5659	}
5660
5661	void
5662	BackgroundWorkerUnblockSignals(void)
5663	{
5664	PG_SETMASK(&UnBlockSig);
5665	}
5666
5667	#ifdef EXEC_BACKEND
5668	static pid_t
5669	bgworker_forkexec(int shmem_slot)
5670	{
5671	char *av[`10`];
5672	int ac = `0`;
5673	char forkav[MAXPGPATH];
5674
5675	snprintf(forkav, MAXPGPATH, "--forkbgworker=%d", shmem_slot);
5676
5677	av[ac++] = "postgres";
5678	av[ac++] = forkav;
5679	av[ac++] = NULL; / filled in by postmaster_forkexec /
5680	av[ac] = NULL;
5681
5682	Assert(ac < lengthof(av));
5683
5684	return postmaster_forkexec(ac, av);
5685	}
5686	#endif
5687
5688	/*
5689	* Start a new bgworker.
5690	* Starting time conditions must have been checked already.
5691	*
5692	* Returns true on success, false on failure.
5693	* In either case, update the RegisteredBgWorker's state appropriately.
5694	*
5695	* This code is heavily based on autovacuum.c, q.v.
5696	*/
5697	static bool
5698	do_start_bgworker(RegisteredBgWorker *rw)
5699	{
5700	pid_t worker_pid;
5701
5702	Assert(rw->rw_pid == `0`);
5703
5704	/*
5705	* Allocate and assign the Backend element. Note we must do this before
5706	* forking, so that we can handle out of memory properly.
5707	*
5708	* Treat failure as though the worker had crashed. That way, the
5709	* postmaster will wait a bit before attempting to start it again; if it
5710	* tried again right away, most likely it'd find itself repeating the
5711	* out-of-memory or fork failure condition.
5712	*/
5713	if (!assign_backendlist_entry(rw))
5714	{
5715	rw->rw_crashed_at = GetCurrentTimestamp();
5716	return false;
5717	}
5718
5719	ereport(DEBUG1,
5720	(errmsg("starting background worker process \"%s\"",
5721	rw->rw_worker.bgw_name)));
5722
5723	#ifdef EXEC_BACKEND
5724	switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot)))
5725	#else
5726	switch ((worker_pid = fork_process()))
5727	#endif
5728	{
5729	case -`1`:
5730	/ in postmaster, fork failed ... /
5731	ereport(LOG,
5732	(errmsg("could not fork worker process: %m")));
5733	/ undo what assign_backendlist_entry did /
5734	ReleasePostmasterChildSlot(rw->rw_child_slot);
5735	rw->rw_child_slot = `0`;
5736	free(rw->rw_backend);
5737	rw->rw_backend = NULL;
5738	/ mark entry as crashed, so we'll try again later /
5739	rw->rw_crashed_at = GetCurrentTimestamp();
5740	break;
5741
5742	#ifndef EXEC_BACKEND
5743	case `0`:
5744	/ in postmaster child ... /
5745	InitPostmasterChild();
5746
5747	/ Close the postmaster's sockets /
5748	ClosePostmasterPorts(false);
5749
5750	/*
5751	* Before blowing away PostmasterContext, save this bgworker's
5752	* data where it can find it.
5753	*/
5754	MyBgworkerEntry = (BackgroundWorker *)
5755	MemoryContextAlloc(TopMemoryContext, sizeof(BackgroundWorker));
5756	memcpy(MyBgworkerEntry, &rw->rw_worker, sizeof(BackgroundWorker));
5757
5758	/ Release postmaster's working memory context /
5759	MemoryContextSwitchTo(TopMemoryContext);
5760	MemoryContextDelete(PostmasterContext);
5761	PostmasterContext = NULL;
5762
5763	StartBackgroundWorker();
5764
5765	exit(`1`); / should not get here /
5766	break;
5767	#endif
5768	default:
5769	/ in postmaster, fork successful ... /
5770	rw->rw_pid = worker_pid;
5771	rw->rw_backend->pid = rw->rw_pid;
5772	ReportBackgroundWorkerPID(rw);
5773	/ add new worker to lists of backends /
5774	dlist_push_head(&BackendList, &rw->rw_backend->elem);
5775	#ifdef EXEC_BACKEND
5776	ShmemBackendArrayAdd(rw->rw_backend);
5777	#endif
5778	return true;
5779	}
5780
5781	return false;
5782	}
5783
5784	/*
5785	* Does the current postmaster state require starting a worker with the
5786	* specified start_time?
5787	*/
5788	static bool
5789	bgworker_should_start_now(BgWorkerStartTime start_time)
5790	{
5791	switch (pmState)
5792	{
5793	case PM_NO_CHILDREN:
5794	case PM_WAIT_DEAD_END:
5795	case PM_SHUTDOWN_2:
5796	case PM_SHUTDOWN:
5797	case PM_WAIT_BACKENDS:
5798	case PM_WAIT_READONLY:
5799	case PM_WAIT_BACKUP:
5800	break;
5801
5802	case PM_RUN:
5803	if (start_time == BgWorkerStart_RecoveryFinished)
5804	return true;
5805	/ fall through /
5806
5807	case PM_HOT_STANDBY:
5808	if (start_time == BgWorkerStart_ConsistentState)
5809	return true;
5810	/ fall through /
5811
5812	case PM_RECOVERY:
5813	case PM_STARTUP:
5814	case PM_INIT:
5815	if (start_time == BgWorkerStart_PostmasterStart)
5816	return true;
5817	/ fall through /
5818
5819	}
5820
5821	return false;
5822	}
5823
5824	/*
5825	* Allocate the Backend struct for a connected background worker, but don't
5826	* add it to the list of backends just yet.
5827	*
5828	* On failure, return false without changing any worker state.
5829	*
5830	* Some info from the Backend is copied into the passed rw.
5831	*/
5832	static bool
5833	assign_backendlist_entry(RegisteredBgWorker *rw)
5834	{
5835	Backend *bn;
5836
5837	/*
5838	* Compute the cancel key that will be assigned to this session. We
5839	* probably don't need cancel keys for background workers, but we'd better
5840	* have something random in the field to prevent unfriendly people from
5841	* sending cancels to them.
5842	*/
5843	if (!RandomCancelKey(&MyCancelKey))
5844	{
5845	ereport(LOG,
5846	(errcode(ERRCODE_INTERNAL_ERROR),
5847	errmsg("could not generate random cancel key")));
5848	return false;
5849	}
5850
5851	bn = malloc(sizeof(Backend));
5852	if (bn == NULL)
5853	{
5854	ereport(LOG,
5855	(errcode(ERRCODE_OUT_OF_MEMORY),
5856	errmsg("out of memory")));
5857	return false;
5858	}
5859
5860	bn->cancel_key = MyCancelKey;
5861	bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
5862	bn->bkend_type = BACKEND_TYPE_BGWORKER;
5863	bn->dead_end = false;
5864	bn->bgworker_notify = false;
5865
5866	rw->rw_backend = bn;
5867	rw->rw_child_slot = bn->child_slot;
5868
5869	return true;
5870	}
5871
5872	/*
5873	* If the time is right, start background worker(s).
5874	*
5875	* As a side effect, the bgworker control variables are set or reset
5876	* depending on whether more workers may need to be started.
5877	*
5878	* We limit the number of workers started per call, to avoid consuming the
5879	* postmaster's attention for too long when many such requests are pending.
5880	* As long as StartWorkerNeeded is true, ServerLoop will not block and will
5881	* call this function again after dealing with any other issues.
5882	*/
5883	static void
5884	maybe_start_bgworkers(void)
5885	{
5886	#define MAX_BGWORKERS_TO_LAUNCH 100
5887	int num_launched = `0`;
5888	TimestampTz now = `0`;
5889	slist_mutable_iter iter;
5890
5891	/*
5892	* During crash recovery, we have no need to be called until the state
5893	* transition out of recovery.
5894	*/
5895	if (FatalError)
5896	{
5897	StartWorkerNeeded = false;
5898	HaveCrashedWorker = false;
5899	return;
5900	}
5901
5902	/ Don't need to be called again unless we find a reason for it below /
5903	StartWorkerNeeded = false;
5904	HaveCrashedWorker = false;
5905
5906	slist_foreach_modify(iter, &BackgroundWorkerList)
5907	{
5908	RegisteredBgWorker *rw;
5909
5910	rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
5911
5912	/ ignore if already running /
5913	if (rw->rw_pid != `0`)
5914	continue;
5915
5916	/ if marked for death, clean up and remove from list /
5917	if (rw->rw_terminate)
5918	{
5919	ForgetBackgroundWorker(&iter);
5920	continue;
5921	}
5922
5923	/*
5924	* If this worker has crashed previously, maybe it needs to be
5925	* restarted (unless on registration it specified it doesn't want to
5926	* be restarted at all). Check how long ago did a crash last happen.
5927	* If the last crash is too recent, don't start it right away; let it
5928	* be restarted once enough time has passed.
5929	*/
5930	if (rw->rw_crashed_at != `0`)
5931	{
5932	if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
5933	{
5934	int notify_pid;
5935
5936	notify_pid = rw->rw_worker.bgw_notify_pid;
5937
5938	ForgetBackgroundWorker(&iter);
5939
5940	/ Report worker is gone now. /
5941	if (notify_pid != `0`)
5942	kill(notify_pid, SIGUSR1);
5943
5944	continue;
5945	}
5946
5947	/ read system time only when needed /
5948	if (now == `0`)
5949	now = GetCurrentTimestamp();
5950
5951	if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now,
5952	rw->rw_worker.bgw_restart_time * `1000`))
5953	{
5954	/ Set flag to remember that we have workers to start later /
5955	HaveCrashedWorker = true;
5956	continue;
5957	}
5958	}
5959
5960	if (bgworker_should_start_now(rw->rw_worker.bgw_start_time))
5961	{
5962	/ reset crash time before trying to start worker /
5963	rw->rw_crashed_at = `0`;
5964
5965	/*
5966	* Try to start the worker.
5967	*
5968	* On failure, give up processing workers for now, but set
5969	* StartWorkerNeeded so we'll come back here on the next iteration
5970	* of ServerLoop to try again. (We don't want to wait, because
5971	* there might be additional ready-to-run workers.) We could set
5972	* HaveCrashedWorker as well, since this worker is now marked
5973	* crashed, but there's no need because the next run of this
5974	* function will do that.
5975	*/
5976	if (!do_start_bgworker(rw))
5977	{
5978	StartWorkerNeeded = true;
5979	return;
5980	}
5981
5982	/*
5983	* If we've launched as many workers as allowed, quit, but have
5984	* ServerLoop call us again to look for additional ready-to-run
5985	* workers. There might not be any, but we'll find out the next
5986	* time we run.
5987	*/
5988	if (++num_launched >= MAX_BGWORKERS_TO_LAUNCH)
5989	{
5990	StartWorkerNeeded = true;
5991	return;
5992	}
5993	}
5994	}
5995	}
5996
5997	/*
5998	* When a backend asks to be notified about worker state changes, we
5999	* set a flag in its backend entry. The background worker machinery needs
6000	* to know when such backends exit.
6001	*/
6002	bool
6003	PostmasterMarkPIDForWorkerNotify(int pid)
6004	{
6005	dlist_iter iter;
6006	Backend *bp;
6007
6008	dlist_foreach(iter, &BackendList)
6009	{
6010	bp = dlist_container(Backend, elem, iter.cur);
6011	if (bp->pid == pid)
6012	{
6013	bp->bgworker_notify = true;
6014	return true;
6015	}
6016	}
6017	return false;
6018	}
6019
6020	#ifdef EXEC_BACKEND
6021
6022	/*
6023	* The following need to be available to the save/restore_backend_variables
6024	* functions. They are marked NON_EXEC_STATIC in their home modules.
6025	*/
6026	extern slock_t *ShmemLock;
6027	extern slock_t *ProcStructLock;
6028	extern PGPROC *AuxiliaryProcs;
6029	extern PMSignalData *PMSignalState;
6030	extern pgsocket pgStatSock;
6031	extern pg_time_t first_syslogger_file_time;
6032
6033	#ifndef WIN32
6034	#define write_inheritable_socket(dest, src, childpid) ((*(dest) = (src)), true)
6035	#define read_inheritable_socket(dest, src) ((dest) = (src))
6036	#else
6037	static bool write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE child);
6038	static bool write_inheritable_socket(InheritableSocket *dest, SOCKET src,
6039	pid_t childPid);
6040	static void read_inheritable_socket(SOCKET dest, InheritableSocket src);
6041	#endif
6042
6043
6044	/ Save critical backend variables into the BackendParameters struct /
6045	#ifndef WIN32
6046	static bool
6047	save_backend_variables(BackendParameters param, Port port)
6048	#else
6049	static bool
6050	save_backend_variables(BackendParameters param, Port port,
6051	HANDLE childProcess, pid_t childPid)
6052	#endif
6053	{
6054	memcpy(&param->port, port, sizeof(Port));
6055	if (!write_inheritable_socket(&param->portsocket, port->sock, childPid))
6056	return false;
6057
6058	strlcpy(param->DataDir, DataDir, MAXPGPATH);
6059
6060	memcpy(&param->ListenSocket, &ListenSocket, sizeof(ListenSocket));
6061
6062	param->MyCancelKey = MyCancelKey;
6063	param->MyPMChildSlot = MyPMChildSlot;
6064
6065	#ifdef WIN32
6066	param->ShmemProtectiveRegion = ShmemProtectiveRegion;
6067	#endif
6068	param->UsedShmemSegID = UsedShmemSegID;
6069	param->UsedShmemSegAddr = UsedShmemSegAddr;
6070
6071	param->ShmemLock = ShmemLock;
6072	param->ShmemVariableCache = ShmemVariableCache;
6073	param->ShmemBackendArray = ShmemBackendArray;
6074
6075	#ifndef HAVE_SPINLOCKS
6076	param->SpinlockSemaArray = SpinlockSemaArray;
6077	#endif
6078	param->NamedLWLockTrancheRequests = NamedLWLockTrancheRequests;
6079	param->NamedLWLockTrancheArray = NamedLWLockTrancheArray;
6080	param->MainLWLockArray = MainLWLockArray;
6081	param->ProcStructLock = ProcStructLock;
6082	param->ProcGlobal = ProcGlobal;
6083	param->AuxiliaryProcs = AuxiliaryProcs;
6084	param->PreparedXactProcs = PreparedXactProcs;
6085	param->PMSignalState = PMSignalState;
6086	if (!write_inheritable_socket(&param->pgStatSock, pgStatSock, childPid))
6087	return false;
6088
6089	param->PostmasterPid = PostmasterPid;
6090	param->PgStartTime = PgStartTime;
6091	param->PgReloadTime = PgReloadTime;
6092	param->first_syslogger_file_time = first_syslogger_file_time;
6093
6094	param->redirection_done = redirection_done;
6095	param->IsBinaryUpgrade = IsBinaryUpgrade;
6096	param->max_safe_fds = max_safe_fds;
6097
6098	param->MaxBackends = MaxBackends;
6099
6100	#ifdef WIN32
6101	param->PostmasterHandle = PostmasterHandle;
6102	if (!write_duplicated_handle(&param->initial_signal_pipe,
6103	pgwin32_create_signal_listener(childPid),
6104	childProcess))
6105	return false;
6106	#else
6107	memcpy(&param->postmaster_alive_fds, &postmaster_alive_fds,
6108	sizeof(postmaster_alive_fds));
6109	#endif
6110
6111	memcpy(&param->syslogPipe, &syslogPipe, sizeof(syslogPipe));
6112
6113	strlcpy(param->my_exec_path, my_exec_path, MAXPGPATH);
6114
6115	strlcpy(param->pkglib_path, pkglib_path, MAXPGPATH);
6116
6117	strlcpy(param->ExtraOptions, ExtraOptions, MAXPGPATH);
6118
6119	return true;
6120	}
6121
6122
6123	#ifdef WIN32
6124	/*
6125	* Duplicate a handle for usage in a child process, and write the child
6126	* process instance of the handle to the parameter file.
6127	*/
6128	static bool
6129	write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE childProcess)
6130	{
6131	HANDLE hChild = INVALID_HANDLE_VALUE;
6132
6133	if (!DuplicateHandle(GetCurrentProcess(),
6134	src,
6135	childProcess,
6136	&hChild,
6137	`0`,
6138	TRUE,
6139	DUPLICATE_CLOSE_SOURCE \| DUPLICATE_SAME_ACCESS))
6140	{
6141	ereport(LOG,
6142	(errmsg_internal("could not duplicate handle to be written to backend parameter file: error code %lu",
6143	GetLastError())));
6144	return false;
6145	}
6146
6147	*dest = hChild;
6148	return true;
6149	}
6150
6151	/*
6152	* Duplicate a socket for usage in a child process, and write the resulting
6153	* structure to the parameter file.
6154	* This is required because a number of LSPs (Layered Service Providers) very
6155	* common on Windows (antivirus, firewalls, download managers etc) break
6156	* straight socket inheritance.
6157	*/
6158	static bool
6159	write_inheritable_socket(InheritableSocket *dest, SOCKET src, pid_t childpid)
6160	{
6161	dest->origsocket = src;
6162	if (src != `0` && src != PGINVALID_SOCKET)
6163	{
6164	/ Actual socket /
6165	if (WSADuplicateSocket(src, childpid, &dest->wsainfo) != `0`)
6166	{
6167	ereport(LOG,
6168	(errmsg("could not duplicate socket %d for use in backend: error code %d",
6169	(int) src, WSAGetLastError())));
6170	return false;
6171	}
6172	}
6173	return true;
6174	}
6175
6176	/*
6177	* Read a duplicate socket structure back, and get the socket descriptor.
6178	*/
6179	static void
6180	read_inheritable_socket(SOCKET dest, InheritableSocket src)
6181	{
6182	SOCKET s;
6183
6184	if (src->origsocket == PGINVALID_SOCKET \|\| src->origsocket == `0`)
6185	{
6186	/ Not a real socket! /
6187	*dest = src->origsocket;
6188	}
6189	else
6190	{
6191	/ Actual socket, so create from structure /
6192	s = WSASocket(FROM_PROTOCOL_INFO,
6193	FROM_PROTOCOL_INFO,
6194	FROM_PROTOCOL_INFO,
6195	&src->wsainfo,
6196	`0`,
6197	`0`);
6198	if (s == INVALID_SOCKET)
6199	{
6200	write_stderr("could not create inherited socket: error code %d\n",
6201	WSAGetLastError());
6202	exit(`1`);
6203	}
6204	*dest = s;
6205
6206	/*
6207	* To make sure we don't get two references to the same socket, close
6208	* the original one. (This would happen when inheritance actually
6209	* works..
6210	*/
6211	closesocket(src->origsocket);
6212	}
6213	}
6214	#endif
6215
6216	static void
6217	read_backend_variables(char id, Port port)
6218	{
6219	BackendParameters param;
6220
6221	#ifndef WIN32
6222	/ Non-win32 implementation reads from file /
6223	FILE *fp;
6224
6225	/ Open file /
6226	fp = AllocateFile(id, PG_BINARY_R);
6227	if (!fp)
6228	{
6229	write_stderr("could not open backend variables file \"%s\": %s\n",
6230	id, strerror(errno));
6231	exit(`1`);
6232	}
6233
6234	if (fread(&param, sizeof(param), `1`, fp) != `1`)
6235	{
6236	write_stderr("could not read from backend variables file \"%s\": %s\n",
6237	id, strerror(errno));
6238	exit(`1`);
6239	}
6240
6241	/ Release file /
6242	FreeFile(fp);
6243	if (unlink(id) != `0`)
6244	{
6245	write_stderr("could not remove file \"%s\": %s\n",
6246	id, strerror(errno));
6247	exit(`1`);
6248	}
6249	#else
6250	/ Win32 version uses mapped file /
6251	HANDLE paramHandle;
6252	BackendParameters *paramp;
6253
6254	#ifdef _WIN64
6255	paramHandle = (HANDLE) _atoi64(id);
6256	#else
6257	paramHandle = (HANDLE) atol(id);
6258	#endif
6259	paramp = MapViewOfFile(paramHandle, FILE_MAP_READ, `0`, `0`, `0`);
6260	if (!paramp)
6261	{
6262	write_stderr("could not map view of backend variables: error code %lu\n",
6263	GetLastError());
6264	exit(`1`);
6265	}
6266
6267	memcpy(&param, paramp, sizeof(BackendParameters));
6268
6269	if (!UnmapViewOfFile(paramp))
6270	{
6271	write_stderr("could not unmap view of backend variables: error code %lu\n",
6272	GetLastError());
6273	exit(`1`);
6274	}
6275
6276	if (!CloseHandle(paramHandle))
6277	{
6278	write_stderr("could not close handle to backend parameter variables: error code %lu\n",
6279	GetLastError());
6280	exit(`1`);
6281	}
6282	#endif
6283
6284	restore_backend_variables(&param, port);
6285	}
6286
6287	/ Restore critical backend variables from the BackendParameters struct /
6288	static void
6289	restore_backend_variables(BackendParameters param, Port port)
6290	{
6291	memcpy(port, &param->port, sizeof(Port));
6292	read_inheritable_socket(&port->sock, &param->portsocket);
6293
6294	SetDataDir(param->DataDir);
6295
6296	memcpy(&ListenSocket, &param->ListenSocket, sizeof(ListenSocket));
6297
6298	MyCancelKey = param->MyCancelKey;
6299	MyPMChildSlot = param->MyPMChildSlot;
6300
6301	#ifdef WIN32
6302	ShmemProtectiveRegion = param->ShmemProtectiveRegion;
6303	#endif
6304	UsedShmemSegID = param->UsedShmemSegID;
6305	UsedShmemSegAddr = param->UsedShmemSegAddr;
6306
6307	ShmemLock = param->ShmemLock;
6308	ShmemVariableCache = param->ShmemVariableCache;
6309	ShmemBackendArray = param->ShmemBackendArray;
6310
6311	#ifndef HAVE_SPINLOCKS
6312	SpinlockSemaArray = param->SpinlockSemaArray;
6313	#endif
6314	NamedLWLockTrancheRequests = param->NamedLWLockTrancheRequests;
6315	NamedLWLockTrancheArray = param->NamedLWLockTrancheArray;
6316	MainLWLockArray = param->MainLWLockArray;
6317	ProcStructLock = param->ProcStructLock;
6318	ProcGlobal = param->ProcGlobal;
6319	AuxiliaryProcs = param->AuxiliaryProcs;
6320	PreparedXactProcs = param->PreparedXactProcs;
6321	PMSignalState = param->PMSignalState;
6322	read_inheritable_socket(&pgStatSock, &param->pgStatSock);
6323
6324	PostmasterPid = param->PostmasterPid;
6325	PgStartTime = param->PgStartTime;
6326	PgReloadTime = param->PgReloadTime;
6327	first_syslogger_file_time = param->first_syslogger_file_time;
6328
6329	redirection_done = param->redirection_done;
6330	IsBinaryUpgrade = param->IsBinaryUpgrade;
6331	max_safe_fds = param->max_safe_fds;
6332
6333	MaxBackends = param->MaxBackends;
6334
6335	#ifdef WIN32
6336	PostmasterHandle = param->PostmasterHandle;
6337	pgwin32_initial_signal_pipe = param->initial_signal_pipe;
6338	#else
6339	memcpy(&postmaster_alive_fds, &param->postmaster_alive_fds,
6340	sizeof(postmaster_alive_fds));
6341	#endif
6342
6343	memcpy(&syslogPipe, &param->syslogPipe, sizeof(syslogPipe));
6344
6345	strlcpy(my_exec_path, param->my_exec_path, MAXPGPATH);
6346
6347	strlcpy(pkglib_path, param->pkglib_path, MAXPGPATH);
6348
6349	strlcpy(ExtraOptions, param->ExtraOptions, MAXPGPATH);
6350	}
6351
6352
6353	Size
6354	ShmemBackendArraySize(void)
6355	{
6356	return mul_size(MaxLivePostmasterChildren(), sizeof(Backend));
6357	}
6358
6359	void
6360	ShmemBackendArrayAllocation(void)
6361	{
6362	Size size = ShmemBackendArraySize();
6363
6364	ShmemBackendArray = (Backend *) ShmemAlloc(size);
6365	/ Mark all slots as empty /
6366	memset(ShmemBackendArray, `0`, size);
6367	}
6368
6369	static void
6370	ShmemBackendArrayAdd(Backend *bn)
6371	{
6372	/ The array slot corresponding to my PMChildSlot should be free /
6373	int i = bn->child_slot - `1`;
6374
6375	Assert(ShmemBackendArray[i].pid == `0`);
6376	ShmemBackendArray[i] = *bn;
6377	}
6378
6379	static void
6380	ShmemBackendArrayRemove(Backend *bn)
6381	{
6382	int i = bn->child_slot - `1`;
6383
6384	Assert(ShmemBackendArray[i].pid == bn->pid);
6385	/ Mark the slot as empty /
6386	ShmemBackendArray[i].pid = `0`;
6387	}
6388	#endif /* EXEC_BACKEND */
6389
6390
6391	#ifdef WIN32
6392
6393	/*
6394	* Subset implementation of waitpid() for Windows. We assume pid is -1
6395	* (that is, check all child processes) and options is WNOHANG (don't wait).
6396	*/
6397	static pid_t
6398	waitpid(pid_t pid, int exitstatus, int* options)
6399	{
6400	DWORD dwd;
6401	ULONG_PTR key;
6402	OVERLAPPED *ovl;
6403
6404	/*
6405	* Check if there are any dead children. If there are, return the pid of
6406	* the first one that died.
6407	*/
6408	if (GetQueuedCompletionStatus(win32ChildQueue, &dwd, &key, &ovl, `0`))
6409	{
6410	exitstatus = (int*) key;
6411	return dwd;
6412	}
6413
6414	return -`1`;
6415	}
6416
6417	/*
6418	* Note! Code below executes on a thread pool! All operations must
6419	* be thread safe! Note that elog() and friends must not be used.
6420	*/
6421	static void WINAPI
6422	pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
6423	{
6424	win32_deadchild_waitinfo childinfo = (win32_deadchild_waitinfo ) lpParameter;
6425	DWORD exitcode;
6426
6427	if (TimerOrWaitFired)
6428	return; / timeout. Should never happen, since we use*
6429	* INFINITE as timeout value. */
6430
6431	/*
6432	* Remove handle from wait - required even though it's set to wait only
6433	* once
6434	*/
6435	UnregisterWaitEx(childinfo->waitHandle, NULL);
6436
6437	if (!GetExitCodeProcess(childinfo->procHandle, &exitcode))
6438	{
6439	/*
6440	* Should never happen. Inform user and set a fixed exitcode.
6441	*/
6442	write_stderr("could not read exit code for process\n");
6443	exitcode = `255`;
6444	}
6445
6446	if (!PostQueuedCompletionStatus(win32ChildQueue, childinfo->procId, (ULONG_PTR) exitcode, NULL))
6447	write_stderr("could not post child completion status\n");
6448
6449	/*
6450	* Handle is per-process, so we close it here instead of in the
6451	* originating thread
6452	*/
6453	CloseHandle(childinfo->procHandle);
6454
6455	/*
6456	* Free struct that was allocated before the call to
6457	* RegisterWaitForSingleObject()
6458	*/
6459	free(childinfo);
6460
6461	/ Queue SIGCHLD signal /
6462	pg_queue_signal(SIGCHLD);
6463	}
6464	#endif /* WIN32 */
6465
6466	/*
6467	* Initialize one and only handle for monitoring postmaster death.
6468	*
6469	* Called once in the postmaster, so that child processes can subsequently
6470	* monitor if their parent is dead.
6471	*/
6472	static void
6473	InitPostmasterDeathWatchHandle(void)
6474	{
6475	#ifndef WIN32
6476
6477	/*
6478	* Create a pipe. Postmaster holds the write end of the pipe open
6479	* (POSTMASTER_FD_OWN), and children hold the read end. Children can pass
6480	* the read file descriptor to select() to wake up in case postmaster
6481	* dies, or check for postmaster death with a (read() == 0). Children must
6482	* close the write end as soon as possible after forking, because EOF
6483	* won't be signaled in the read end until all processes have closed the
6484	* write fd. That is taken care of in ClosePostmasterPorts().
6485	*/
6486	Assert(MyProcPid == PostmasterPid);
6487	if (pipe(postmaster_alive_fds) < `0`)
6488	ereport(FATAL,
6489	(errcode_for_file_access(),
6490	errmsg_internal("could not create pipe to monitor postmaster death: %m")));
6491
6492	/*
6493	* Set O_NONBLOCK to allow testing for the fd's presence with a read()
6494	* call.
6495	*/
6496	if (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFL, O_NONBLOCK) == -`1`)
6497	ereport(FATAL,
6498	(errcode_for_socket_access(),
6499	errmsg_internal("could not set postmaster death monitoring pipe to nonblocking mode: %m")));
6500	#else
6501
6502	/*
6503	* On Windows, we use a process handle for the same purpose.
6504	*/
6505	if (DuplicateHandle(GetCurrentProcess(),
6506	GetCurrentProcess(),
6507	GetCurrentProcess(),
6508	&PostmasterHandle,
6509	`0`,
6510	TRUE,
6511	DUPLICATE_SAME_ACCESS) == `0`)
6512	ereport(FATAL,
6513	(errmsg_internal("could not duplicate postmaster handle: error code %lu",
6514	GetLastError())));
6515	#endif /* WIN32 */
6516	}
6517

Browse the source code of PostgreSQL/src/backend/postmaster/postmaster.c