1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * postmaster.c |
4 | * This program acts as a clearing house for requests to the |
5 | * POSTGRES system. Frontend programs send a startup message |
6 | * to the Postmaster and the postmaster uses the info in the |
7 | * message to setup a backend process. |
8 | * |
9 | * The postmaster also manages system-wide operations such as |
10 | * startup and shutdown. The postmaster itself doesn't do those |
11 | * operations, mind you --- it just forks off a subprocess to do them |
12 | * at the right times. It also takes care of resetting the system |
13 | * if a backend crashes. |
14 | * |
15 | * The postmaster process creates the shared memory and semaphore |
16 | * pools during startup, but as a rule does not touch them itself. |
17 | * In particular, it is not a member of the PGPROC array of backends |
18 | * and so it cannot participate in lock-manager operations. Keeping |
19 | * the postmaster away from shared memory operations makes it simpler |
20 | * and more reliable. The postmaster is almost always able to recover |
21 | * from crashes of individual backends by resetting shared memory; |
22 | * if it did much with shared memory then it would be prone to crashing |
23 | * along with the backends. |
24 | * |
25 | * When a request message is received, we now fork() immediately. |
26 | * The child process performs authentication of the request, and |
27 | * then becomes a backend if successful. This allows the auth code |
28 | * to be written in a simple single-threaded style (as opposed to the |
29 | * crufty "poor man's multitasking" code that used to be needed). |
30 | * More importantly, it ensures that blockages in non-multithreaded |
31 | * libraries like SSL or PAM cannot cause denial of service to other |
32 | * clients. |
33 | * |
34 | * |
35 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
36 | * Portions Copyright (c) 1994, Regents of the University of California |
37 | * |
38 | * |
39 | * IDENTIFICATION |
40 | * src/backend/postmaster/postmaster.c |
41 | * |
42 | * NOTES |
43 | * |
44 | * Initialization: |
45 | * The Postmaster sets up shared memory data structures |
46 | * for the backends. |
47 | * |
48 | * Synchronization: |
49 | * The Postmaster shares memory with the backends but should avoid |
50 | * touching shared memory, so as not to become stuck if a crashing |
51 | * backend screws up locks or shared memory. Likewise, the Postmaster |
52 | * should never block on messages from frontend clients. |
53 | * |
54 | * Garbage Collection: |
55 | * The Postmaster cleans up after backends if they have an emergency |
56 | * exit and/or core dump. |
57 | * |
58 | * Error Reporting: |
59 | * Use write_stderr() only for reporting "interactive" errors |
60 | * (essentially, bogus arguments on the command line). Once the |
61 | * postmaster is launched, use ereport(). |
62 | * |
63 | *------------------------------------------------------------------------- |
64 | */ |
65 | |
66 | #include "postgres.h" |
67 | |
68 | #include <unistd.h> |
69 | #include <signal.h> |
70 | #include <time.h> |
71 | #include <sys/wait.h> |
72 | #include <ctype.h> |
73 | #include <sys/stat.h> |
74 | #include <sys/socket.h> |
75 | #include <fcntl.h> |
76 | #include <sys/param.h> |
77 | #include <netdb.h> |
78 | #include <limits.h> |
79 | |
80 | #ifdef HAVE_SYS_SELECT_H |
81 | #include <sys/select.h> |
82 | #endif |
83 | |
84 | #ifdef USE_BONJOUR |
85 | #include <dns_sd.h> |
86 | #endif |
87 | |
88 | #ifdef USE_SYSTEMD |
89 | #include <systemd/sd-daemon.h> |
90 | #endif |
91 | |
92 | #ifdef HAVE_PTHREAD_IS_THREADED_NP |
93 | #include <pthread.h> |
94 | #endif |
95 | |
96 | #include "access/transam.h" |
97 | #include "access/xlog.h" |
98 | #include "bootstrap/bootstrap.h" |
99 | #include "catalog/pg_control.h" |
100 | #include "common/file_perm.h" |
101 | #include "common/ip.h" |
102 | #include "common/string.h" |
103 | #include "lib/ilist.h" |
104 | #include "libpq/auth.h" |
105 | #include "libpq/libpq.h" |
106 | #include "libpq/pqformat.h" |
107 | #include "libpq/pqsignal.h" |
108 | #include "miscadmin.h" |
109 | #include "pg_getopt.h" |
110 | #include "pgstat.h" |
111 | #include "port/pg_bswap.h" |
112 | #include "postmaster/autovacuum.h" |
113 | #include "postmaster/bgworker_internals.h" |
114 | #include "postmaster/fork_process.h" |
115 | #include "postmaster/pgarch.h" |
116 | #include "postmaster/postmaster.h" |
117 | #include "postmaster/syslogger.h" |
118 | #include "replication/logicallauncher.h" |
119 | #include "replication/walsender.h" |
120 | #include "storage/fd.h" |
121 | #include "storage/ipc.h" |
122 | #include "storage/pg_shmem.h" |
123 | #include "storage/pmsignal.h" |
124 | #include "storage/proc.h" |
125 | #include "tcop/tcopprot.h" |
126 | #include "utils/builtins.h" |
127 | #include "utils/datetime.h" |
128 | #include "utils/memutils.h" |
129 | #include "utils/pidfile.h" |
130 | #include "utils/ps_status.h" |
131 | #include "utils/timeout.h" |
132 | #include "utils/timestamp.h" |
133 | #include "utils/varlena.h" |
134 | |
135 | #ifdef EXEC_BACKEND |
136 | #include "storage/spin.h" |
137 | #endif |
138 | |
139 | |
140 | /* |
141 | * Possible types of a backend. Beyond being the possible bkend_type values in |
142 | * struct bkend, these are OR-able request flag bits for SignalSomeChildren() |
143 | * and CountChildren(). |
144 | */ |
145 | #define BACKEND_TYPE_NORMAL 0x0001 /* normal backend */ |
146 | #define BACKEND_TYPE_AUTOVAC 0x0002 /* autovacuum worker process */ |
147 | #define BACKEND_TYPE_WALSND 0x0004 /* walsender process */ |
148 | #define BACKEND_TYPE_BGWORKER 0x0008 /* bgworker process */ |
149 | #define BACKEND_TYPE_ALL 0x000F /* OR of all the above */ |
150 | |
151 | #define BACKEND_TYPE_WORKER (BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER) |
152 | |
153 | /* |
154 | * List of active backends (or child processes anyway; we don't actually |
155 | * know whether a given child has become a backend or is still in the |
156 | * authorization phase). This is used mainly to keep track of how many |
157 | * children we have and send them appropriate signals when necessary. |
158 | * |
159 | * "Special" children such as the startup, bgwriter and autovacuum launcher |
160 | * tasks are not in this list. Autovacuum worker and walsender are in it. |
161 | * Also, "dead_end" children are in it: these are children launched just for |
162 | * the purpose of sending a friendly rejection message to a would-be client. |
163 | * We must track them because they are attached to shared memory, but we know |
164 | * they will never become live backends. dead_end children are not assigned a |
165 | * PMChildSlot. |
166 | * |
167 | * Background workers are in this list, too. |
168 | */ |
169 | typedef struct bkend |
170 | { |
171 | pid_t pid; /* process id of backend */ |
172 | int32 cancel_key; /* cancel key for cancels for this backend */ |
173 | int child_slot; /* PMChildSlot for this backend, if any */ |
174 | |
175 | /* |
176 | * Flavor of backend or auxiliary process. Note that BACKEND_TYPE_WALSND |
177 | * backends initially announce themselves as BACKEND_TYPE_NORMAL, so if |
178 | * bkend_type is normal, you should check for a recent transition. |
179 | */ |
180 | int bkend_type; |
181 | bool dead_end; /* is it going to send an error and quit? */ |
182 | bool bgworker_notify; /* gets bgworker start/stop notifications */ |
183 | dlist_node elem; /* list link in BackendList */ |
184 | } Backend; |
185 | |
186 | static dlist_head BackendList = DLIST_STATIC_INIT(BackendList); |
187 | |
188 | #ifdef EXEC_BACKEND |
189 | static Backend *ShmemBackendArray; |
190 | #endif |
191 | |
192 | BackgroundWorker *MyBgworkerEntry = NULL; |
193 | |
194 | |
195 | |
196 | /* The socket number we are listening for connections on */ |
197 | int PostPortNumber; |
198 | |
199 | /* The directory names for Unix socket(s) */ |
200 | char *Unix_socket_directories; |
201 | |
202 | /* The TCP listen address(es) */ |
203 | char *ListenAddresses; |
204 | |
205 | /* |
206 | * ReservedBackends is the number of backends reserved for superuser use. |
207 | * This number is taken out of the pool size given by MaxConnections so |
208 | * number of backend slots available to non-superusers is |
209 | * (MaxConnections - ReservedBackends). Note what this really means is |
210 | * "if there are <= ReservedBackends connections available, only superusers |
211 | * can make new connections" --- pre-existing superuser connections don't |
212 | * count against the limit. |
213 | */ |
214 | int ReservedBackends; |
215 | |
216 | /* The socket(s) we're listening to. */ |
217 | #define MAXLISTEN 64 |
218 | static pgsocket ListenSocket[MAXLISTEN]; |
219 | |
220 | /* |
221 | * Set by the -o option |
222 | */ |
223 | static char [MAXPGPATH]; |
224 | |
225 | /* |
226 | * These globals control the behavior of the postmaster in case some |
227 | * backend dumps core. Normally, it kills all peers of the dead backend |
228 | * and reinitializes shared memory. By specifying -s or -n, we can have |
229 | * the postmaster stop (rather than kill) peers and not reinitialize |
230 | * shared data structures. (Reinit is currently dead code, though.) |
231 | */ |
232 | static bool Reinit = true; |
233 | static int SendStop = false; |
234 | |
235 | /* still more option variables */ |
236 | bool EnableSSL = false; |
237 | |
238 | int PreAuthDelay = 0; |
239 | int AuthenticationTimeout = 60; |
240 | |
241 | bool log_hostname; /* for ps display and logging */ |
242 | bool Log_connections = false; |
243 | bool Db_user_namespace = false; |
244 | |
245 | bool enable_bonjour = false; |
246 | char *bonjour_name; |
247 | bool restart_after_crash = true; |
248 | |
249 | /* PIDs of special child processes; 0 when not running */ |
250 | static pid_t StartupPID = 0, |
251 | BgWriterPID = 0, |
252 | CheckpointerPID = 0, |
253 | WalWriterPID = 0, |
254 | WalReceiverPID = 0, |
255 | AutoVacPID = 0, |
256 | PgArchPID = 0, |
257 | PgStatPID = 0, |
258 | SysLoggerPID = 0; |
259 | |
260 | /* Startup process's status */ |
261 | typedef enum |
262 | { |
263 | STARTUP_NOT_RUNNING, |
264 | STARTUP_RUNNING, |
265 | STARTUP_SIGNALED, /* we sent it a SIGQUIT or SIGKILL */ |
266 | STARTUP_CRASHED |
267 | } StartupStatusEnum; |
268 | |
269 | static StartupStatusEnum StartupStatus = STARTUP_NOT_RUNNING; |
270 | |
271 | /* Startup/shutdown state */ |
272 | #define NoShutdown 0 |
273 | #define SmartShutdown 1 |
274 | #define FastShutdown 2 |
275 | #define ImmediateShutdown 3 |
276 | |
277 | static int Shutdown = NoShutdown; |
278 | |
279 | static bool FatalError = false; /* T if recovering from backend crash */ |
280 | |
281 | /* |
282 | * We use a simple state machine to control startup, shutdown, and |
283 | * crash recovery (which is rather like shutdown followed by startup). |
284 | * |
285 | * After doing all the postmaster initialization work, we enter PM_STARTUP |
286 | * state and the startup process is launched. The startup process begins by |
287 | * reading the control file and other preliminary initialization steps. |
288 | * In a normal startup, or after crash recovery, the startup process exits |
289 | * with exit code 0 and we switch to PM_RUN state. However, archive recovery |
290 | * is handled specially since it takes much longer and we would like to support |
291 | * hot standby during archive recovery. |
292 | * |
293 | * When the startup process is ready to start archive recovery, it signals the |
294 | * postmaster, and we switch to PM_RECOVERY state. The background writer and |
295 | * checkpointer are launched, while the startup process continues applying WAL. |
296 | * If Hot Standby is enabled, then, after reaching a consistent point in WAL |
297 | * redo, startup process signals us again, and we switch to PM_HOT_STANDBY |
298 | * state and begin accepting connections to perform read-only queries. When |
299 | * archive recovery is finished, the startup process exits with exit code 0 |
300 | * and we switch to PM_RUN state. |
301 | * |
302 | * Normal child backends can only be launched when we are in PM_RUN or |
303 | * PM_HOT_STANDBY state. (We also allow launch of normal |
304 | * child backends in PM_WAIT_BACKUP state, but only for superusers.) |
305 | * In other states we handle connection requests by launching "dead_end" |
306 | * child processes, which will simply send the client an error message and |
307 | * quit. (We track these in the BackendList so that we can know when they |
308 | * are all gone; this is important because they're still connected to shared |
309 | * memory, and would interfere with an attempt to destroy the shmem segment, |
310 | * possibly leading to SHMALL failure when we try to make a new one.) |
311 | * In PM_WAIT_DEAD_END state we are waiting for all the dead_end children |
312 | * to drain out of the system, and therefore stop accepting connection |
313 | * requests at all until the last existing child has quit (which hopefully |
314 | * will not be very long). |
315 | * |
316 | * Notice that this state variable does not distinguish *why* we entered |
317 | * states later than PM_RUN --- Shutdown and FatalError must be consulted |
318 | * to find that out. FatalError is never true in PM_RECOVERY_* or PM_RUN |
319 | * states, nor in PM_SHUTDOWN states (because we don't enter those states |
320 | * when trying to recover from a crash). It can be true in PM_STARTUP state, |
321 | * because we don't clear it until we've successfully started WAL redo. |
322 | */ |
323 | typedef enum |
324 | { |
325 | PM_INIT, /* postmaster starting */ |
326 | PM_STARTUP, /* waiting for startup subprocess */ |
327 | PM_RECOVERY, /* in archive recovery mode */ |
328 | PM_HOT_STANDBY, /* in hot standby mode */ |
329 | PM_RUN, /* normal "database is alive" state */ |
330 | PM_WAIT_BACKUP, /* waiting for online backup mode to end */ |
331 | PM_WAIT_READONLY, /* waiting for read only backends to exit */ |
332 | PM_WAIT_BACKENDS, /* waiting for live backends to exit */ |
333 | PM_SHUTDOWN, /* waiting for checkpointer to do shutdown |
334 | * ckpt */ |
335 | PM_SHUTDOWN_2, /* waiting for archiver and walsenders to |
336 | * finish */ |
337 | PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */ |
338 | PM_NO_CHILDREN /* all important children have exited */ |
339 | } PMState; |
340 | |
341 | static PMState pmState = PM_INIT; |
342 | |
343 | /* Start time of SIGKILL timeout during immediate shutdown or child crash */ |
344 | /* Zero means timeout is not running */ |
345 | static time_t AbortStartTime = 0; |
346 | |
347 | /* Length of said timeout */ |
348 | #define SIGKILL_CHILDREN_AFTER_SECS 5 |
349 | |
350 | static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */ |
351 | |
352 | bool ClientAuthInProgress = false; /* T during new-client |
353 | * authentication */ |
354 | |
355 | bool redirection_done = false; /* stderr redirected for syslogger? */ |
356 | |
357 | /* received START_AUTOVAC_LAUNCHER signal */ |
358 | static volatile sig_atomic_t start_autovac_launcher = false; |
359 | |
360 | /* the launcher needs to be signalled to communicate some condition */ |
361 | static volatile bool avlauncher_needs_signal = false; |
362 | |
363 | /* received START_WALRECEIVER signal */ |
364 | static volatile sig_atomic_t WalReceiverRequested = false; |
365 | |
366 | /* set when there's a worker that needs to be started up */ |
367 | static volatile bool StartWorkerNeeded = true; |
368 | static volatile bool HaveCrashedWorker = false; |
369 | |
370 | #ifdef USE_SSL |
371 | /* Set when and if SSL has been initialized properly */ |
372 | static bool LoadedSSL = false; |
373 | #endif |
374 | |
375 | #ifdef USE_BONJOUR |
376 | static DNSServiceRef bonjour_sdref = NULL; |
377 | #endif |
378 | |
379 | /* |
380 | * postmaster.c - function prototypes |
381 | */ |
382 | static void CloseServerPorts(int status, Datum arg); |
383 | static void unlink_external_pid_file(int status, Datum arg); |
384 | static void getInstallationPaths(const char *argv0); |
385 | static void checkControlFile(void); |
386 | static Port *ConnCreate(int serverFd); |
387 | static void ConnFree(Port *port); |
388 | static void reset_shared(int port); |
389 | static void SIGHUP_handler(SIGNAL_ARGS); |
390 | static void pmdie(SIGNAL_ARGS); |
391 | static void reaper(SIGNAL_ARGS); |
392 | static void sigusr1_handler(SIGNAL_ARGS); |
393 | static void startup_die(SIGNAL_ARGS); |
394 | static void dummy_handler(SIGNAL_ARGS); |
395 | static void StartupPacketTimeoutHandler(void); |
396 | static void CleanupBackend(int pid, int exitstatus); |
397 | static bool CleanupBackgroundWorker(int pid, int exitstatus); |
398 | static void HandleChildCrash(int pid, int exitstatus, const char *procname); |
399 | static void LogChildExit(int lev, const char *procname, |
400 | int pid, int exitstatus); |
401 | static void PostmasterStateMachine(void); |
402 | static void BackendInitialize(Port *port); |
403 | static void BackendRun(Port *port) pg_attribute_noreturn(); |
404 | static void ExitPostmaster(int status) pg_attribute_noreturn(); |
405 | static int ServerLoop(void); |
406 | static int BackendStartup(Port *port); |
407 | static int ProcessStartupPacket(Port *port, bool secure_done); |
408 | static void SendNegotiateProtocolVersion(List *unrecognized_protocol_options); |
409 | static void processCancelRequest(Port *port, void *pkt); |
410 | static int initMasks(fd_set *rmask); |
411 | static void report_fork_failure_to_client(Port *port, int errnum); |
412 | static CAC_state canAcceptConnections(void); |
413 | static bool RandomCancelKey(int32 *cancel_key); |
414 | static void signal_child(pid_t pid, int signal); |
415 | static bool SignalSomeChildren(int signal, int targets); |
416 | static void TerminateChildren(int signal); |
417 | |
418 | #define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL) |
419 | |
420 | static int CountChildren(int target); |
421 | static bool assign_backendlist_entry(RegisteredBgWorker *rw); |
422 | static void maybe_start_bgworkers(void); |
423 | static bool CreateOptsFile(int argc, char *argv[], char *fullprogname); |
424 | static pid_t StartChildProcess(AuxProcType type); |
425 | static void StartAutovacuumWorker(void); |
426 | static void MaybeStartWalReceiver(void); |
427 | static void InitPostmasterDeathWatchHandle(void); |
428 | |
429 | /* |
430 | * Archiver is allowed to start up at the current postmaster state? |
431 | * |
432 | * If WAL archiving is enabled always, we are allowed to start archiver |
433 | * even during recovery. |
434 | */ |
435 | #define PgArchStartupAllowed() \ |
436 | ((XLogArchivingActive() && pmState == PM_RUN) || \ |
437 | (XLogArchivingAlways() && \ |
438 | (pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY))) |
439 | |
440 | #ifdef EXEC_BACKEND |
441 | |
442 | #ifdef WIN32 |
443 | #define WNOHANG 0 /* ignored, so any integer value will do */ |
444 | |
445 | static pid_t waitpid(pid_t pid, int *exitstatus, int options); |
446 | static void WINAPI pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired); |
447 | |
448 | static HANDLE win32ChildQueue; |
449 | |
450 | typedef struct |
451 | { |
452 | HANDLE waitHandle; |
453 | HANDLE procHandle; |
454 | DWORD procId; |
455 | } win32_deadchild_waitinfo; |
456 | #endif /* WIN32 */ |
457 | |
458 | static pid_t backend_forkexec(Port *port); |
459 | static pid_t internal_forkexec(int argc, char *argv[], Port *port); |
460 | |
461 | /* Type for a socket that can be inherited to a client process */ |
462 | #ifdef WIN32 |
463 | typedef struct |
464 | { |
465 | SOCKET origsocket; /* Original socket value, or PGINVALID_SOCKET |
466 | * if not a socket */ |
467 | WSAPROTOCOL_INFO wsainfo; |
468 | } InheritableSocket; |
469 | #else |
470 | typedef int InheritableSocket; |
471 | #endif |
472 | |
473 | /* |
474 | * Structure contains all variables passed to exec:ed backends |
475 | */ |
476 | typedef struct |
477 | { |
478 | Port port; |
479 | InheritableSocket portsocket; |
480 | char DataDir[MAXPGPATH]; |
481 | pgsocket ListenSocket[MAXLISTEN]; |
482 | int32 MyCancelKey; |
483 | int MyPMChildSlot; |
484 | #ifndef WIN32 |
485 | unsigned long UsedShmemSegID; |
486 | #else |
487 | void *ShmemProtectiveRegion; |
488 | HANDLE UsedShmemSegID; |
489 | #endif |
490 | void *UsedShmemSegAddr; |
491 | slock_t *ShmemLock; |
492 | VariableCache ShmemVariableCache; |
493 | Backend *ShmemBackendArray; |
494 | #ifndef HAVE_SPINLOCKS |
495 | PGSemaphore *SpinlockSemaArray; |
496 | #endif |
497 | int NamedLWLockTrancheRequests; |
498 | NamedLWLockTranche *NamedLWLockTrancheArray; |
499 | LWLockPadded *MainLWLockArray; |
500 | slock_t *ProcStructLock; |
501 | PROC_HDR *ProcGlobal; |
502 | PGPROC *AuxiliaryProcs; |
503 | PGPROC *PreparedXactProcs; |
504 | PMSignalData *PMSignalState; |
505 | InheritableSocket pgStatSock; |
506 | pid_t PostmasterPid; |
507 | TimestampTz PgStartTime; |
508 | TimestampTz PgReloadTime; |
509 | pg_time_t first_syslogger_file_time; |
510 | bool redirection_done; |
511 | bool IsBinaryUpgrade; |
512 | int max_safe_fds; |
513 | int MaxBackends; |
514 | #ifdef WIN32 |
515 | HANDLE PostmasterHandle; |
516 | HANDLE initial_signal_pipe; |
517 | HANDLE syslogPipe[2]; |
518 | #else |
519 | int postmaster_alive_fds[2]; |
520 | int syslogPipe[2]; |
521 | #endif |
522 | char my_exec_path[MAXPGPATH]; |
523 | char pkglib_path[MAXPGPATH]; |
524 | char ExtraOptions[MAXPGPATH]; |
525 | } BackendParameters; |
526 | |
527 | static void read_backend_variables(char *id, Port *port); |
528 | static void restore_backend_variables(BackendParameters *param, Port *port); |
529 | |
530 | #ifndef WIN32 |
531 | static bool save_backend_variables(BackendParameters *param, Port *port); |
532 | #else |
533 | static bool save_backend_variables(BackendParameters *param, Port *port, |
534 | HANDLE childProcess, pid_t childPid); |
535 | #endif |
536 | |
537 | static void ShmemBackendArrayAdd(Backend *bn); |
538 | static void ShmemBackendArrayRemove(Backend *bn); |
539 | #endif /* EXEC_BACKEND */ |
540 | |
541 | #define StartupDataBase() StartChildProcess(StartupProcess) |
542 | #define StartBackgroundWriter() StartChildProcess(BgWriterProcess) |
543 | #define StartCheckpointer() StartChildProcess(CheckpointerProcess) |
544 | #define StartWalWriter() StartChildProcess(WalWriterProcess) |
545 | #define StartWalReceiver() StartChildProcess(WalReceiverProcess) |
546 | |
547 | /* Macros to check exit status of a child process */ |
548 | #define EXIT_STATUS_0(st) ((st) == 0) |
549 | #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1) |
550 | #define EXIT_STATUS_3(st) (WIFEXITED(st) && WEXITSTATUS(st) == 3) |
551 | |
552 | #ifndef WIN32 |
553 | /* |
554 | * File descriptors for pipe used to monitor if postmaster is alive. |
555 | * First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN. |
556 | */ |
557 | int postmaster_alive_fds[2] = {-1, -1}; |
558 | #else |
559 | /* Process handle of postmaster used for the same purpose on Windows */ |
560 | HANDLE PostmasterHandle; |
561 | #endif |
562 | |
563 | /* |
564 | * Postmaster main entry point |
565 | */ |
566 | void |
567 | PostmasterMain(int argc, char *argv[]) |
568 | { |
569 | int opt; |
570 | int status; |
571 | char *userDoption = NULL; |
572 | bool listen_addr_saved = false; |
573 | int i; |
574 | char *output_config_variable = NULL; |
575 | |
576 | InitProcessGlobals(); |
577 | |
578 | PostmasterPid = MyProcPid; |
579 | |
580 | IsPostmasterEnvironment = true; |
581 | |
582 | /* |
583 | * We should not be creating any files or directories before we check the |
584 | * data directory (see checkDataDir()), but just in case set the umask to |
585 | * the most restrictive (owner-only) permissions. |
586 | * |
587 | * checkDataDir() will reset the umask based on the data directory |
588 | * permissions. |
589 | */ |
590 | umask(PG_MODE_MASK_OWNER); |
591 | |
592 | /* |
593 | * By default, palloc() requests in the postmaster will be allocated in |
594 | * the PostmasterContext, which is space that can be recycled by backends. |
595 | * Allocated data that needs to be available to backends should be |
596 | * allocated in TopMemoryContext. |
597 | */ |
598 | PostmasterContext = AllocSetContextCreate(TopMemoryContext, |
599 | "Postmaster" , |
600 | ALLOCSET_DEFAULT_SIZES); |
601 | MemoryContextSwitchTo(PostmasterContext); |
602 | |
603 | /* Initialize paths to installation files */ |
604 | getInstallationPaths(argv[0]); |
605 | |
606 | /* |
607 | * Set up signal handlers for the postmaster process. |
608 | * |
609 | * In the postmaster, we want to install non-ignored handlers *without* |
610 | * SA_RESTART. This is because they'll be blocked at all times except |
611 | * when ServerLoop is waiting for something to happen, and during that |
612 | * window, we want signals to exit the select(2) wait so that ServerLoop |
613 | * can respond if anything interesting happened. On some platforms, |
614 | * signals marked SA_RESTART would not cause the select() wait to end. |
615 | * Child processes will generally want SA_RESTART, but we expect them to |
616 | * set up their own handlers before unblocking signals. |
617 | * |
618 | * CAUTION: when changing this list, check for side-effects on the signal |
619 | * handling setup of child processes. See tcop/postgres.c, |
620 | * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c, |
621 | * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c, |
622 | * postmaster/syslogger.c, postmaster/bgworker.c and |
623 | * postmaster/checkpointer.c. |
624 | */ |
625 | pqinitmask(); |
626 | PG_SETMASK(&BlockSig); |
627 | |
628 | pqsignal_no_restart(SIGHUP, SIGHUP_handler); /* reread config file and |
629 | * have children do same */ |
630 | pqsignal_no_restart(SIGINT, pmdie); /* send SIGTERM and shut down */ |
631 | pqsignal_no_restart(SIGQUIT, pmdie); /* send SIGQUIT and die */ |
632 | pqsignal_no_restart(SIGTERM, pmdie); /* wait for children and shut down */ |
633 | pqsignal(SIGALRM, SIG_IGN); /* ignored */ |
634 | pqsignal(SIGPIPE, SIG_IGN); /* ignored */ |
635 | pqsignal_no_restart(SIGUSR1, sigusr1_handler); /* message from child |
636 | * process */ |
637 | pqsignal_no_restart(SIGUSR2, dummy_handler); /* unused, reserve for |
638 | * children */ |
639 | pqsignal_no_restart(SIGCHLD, reaper); /* handle child termination */ |
640 | |
641 | /* |
642 | * No other place in Postgres should touch SIGTTIN/SIGTTOU handling. We |
643 | * ignore those signals in a postmaster environment, so that there is no |
644 | * risk of a child process freezing up due to writing to stderr. But for |
645 | * a standalone backend, their default handling is reasonable. Hence, all |
646 | * child processes should just allow the inherited settings to stand. |
647 | */ |
648 | #ifdef SIGTTIN |
649 | pqsignal(SIGTTIN, SIG_IGN); /* ignored */ |
650 | #endif |
651 | #ifdef SIGTTOU |
652 | pqsignal(SIGTTOU, SIG_IGN); /* ignored */ |
653 | #endif |
654 | |
655 | /* ignore SIGXFSZ, so that ulimit violations work like disk full */ |
656 | #ifdef SIGXFSZ |
657 | pqsignal(SIGXFSZ, SIG_IGN); /* ignored */ |
658 | #endif |
659 | |
660 | /* |
661 | * Options setup |
662 | */ |
663 | InitializeGUCOptions(); |
664 | |
665 | opterr = 1; |
666 | |
667 | /* |
668 | * Parse command-line options. CAUTION: keep this in sync with |
669 | * tcop/postgres.c (the option sets should not conflict) and with the |
670 | * common help() function in main/main.c. |
671 | */ |
672 | while ((opt = getopt(argc, argv, "B:bc:C:D:d:EeFf:h:ijk:lN:nOo:Pp:r:S:sTt:W:-:" )) != -1) |
673 | { |
674 | switch (opt) |
675 | { |
676 | case 'B': |
677 | SetConfigOption("shared_buffers" , optarg, PGC_POSTMASTER, PGC_S_ARGV); |
678 | break; |
679 | |
680 | case 'b': |
681 | /* Undocumented flag used for binary upgrades */ |
682 | IsBinaryUpgrade = true; |
683 | break; |
684 | |
685 | case 'C': |
686 | output_config_variable = strdup(optarg); |
687 | break; |
688 | |
689 | case 'D': |
690 | userDoption = strdup(optarg); |
691 | break; |
692 | |
693 | case 'd': |
694 | set_debug_options(atoi(optarg), PGC_POSTMASTER, PGC_S_ARGV); |
695 | break; |
696 | |
697 | case 'E': |
698 | SetConfigOption("log_statement" , "all" , PGC_POSTMASTER, PGC_S_ARGV); |
699 | break; |
700 | |
701 | case 'e': |
702 | SetConfigOption("datestyle" , "euro" , PGC_POSTMASTER, PGC_S_ARGV); |
703 | break; |
704 | |
705 | case 'F': |
706 | SetConfigOption("fsync" , "false" , PGC_POSTMASTER, PGC_S_ARGV); |
707 | break; |
708 | |
709 | case 'f': |
710 | if (!set_plan_disabling_options(optarg, PGC_POSTMASTER, PGC_S_ARGV)) |
711 | { |
712 | write_stderr("%s: invalid argument for option -f: \"%s\"\n" , |
713 | progname, optarg); |
714 | ExitPostmaster(1); |
715 | } |
716 | break; |
717 | |
718 | case 'h': |
719 | SetConfigOption("listen_addresses" , optarg, PGC_POSTMASTER, PGC_S_ARGV); |
720 | break; |
721 | |
722 | case 'i': |
723 | SetConfigOption("listen_addresses" , "*" , PGC_POSTMASTER, PGC_S_ARGV); |
724 | break; |
725 | |
726 | case 'j': |
727 | /* only used by interactive backend */ |
728 | break; |
729 | |
730 | case 'k': |
731 | SetConfigOption("unix_socket_directories" , optarg, PGC_POSTMASTER, PGC_S_ARGV); |
732 | break; |
733 | |
734 | case 'l': |
735 | SetConfigOption("ssl" , "true" , PGC_POSTMASTER, PGC_S_ARGV); |
736 | break; |
737 | |
738 | case 'N': |
739 | SetConfigOption("max_connections" , optarg, PGC_POSTMASTER, PGC_S_ARGV); |
740 | break; |
741 | |
742 | case 'n': |
743 | /* Don't reinit shared mem after abnormal exit */ |
744 | Reinit = false; |
745 | break; |
746 | |
747 | case 'O': |
748 | SetConfigOption("allow_system_table_mods" , "true" , PGC_POSTMASTER, PGC_S_ARGV); |
749 | break; |
750 | |
751 | case 'o': |
752 | /* Other options to pass to the backend on the command line */ |
753 | snprintf(ExtraOptions + strlen(ExtraOptions), |
754 | sizeof(ExtraOptions) - strlen(ExtraOptions), |
755 | " %s" , optarg); |
756 | break; |
757 | |
758 | case 'P': |
759 | SetConfigOption("ignore_system_indexes" , "true" , PGC_POSTMASTER, PGC_S_ARGV); |
760 | break; |
761 | |
762 | case 'p': |
763 | SetConfigOption("port" , optarg, PGC_POSTMASTER, PGC_S_ARGV); |
764 | break; |
765 | |
766 | case 'r': |
767 | /* only used by single-user backend */ |
768 | break; |
769 | |
770 | case 'S': |
771 | SetConfigOption("work_mem" , optarg, PGC_POSTMASTER, PGC_S_ARGV); |
772 | break; |
773 | |
774 | case 's': |
775 | SetConfigOption("log_statement_stats" , "true" , PGC_POSTMASTER, PGC_S_ARGV); |
776 | break; |
777 | |
778 | case 'T': |
779 | |
780 | /* |
781 | * In the event that some backend dumps core, send SIGSTOP, |
782 | * rather than SIGQUIT, to all its peers. This lets the wily |
783 | * post_hacker collect core dumps from everyone. |
784 | */ |
785 | SendStop = true; |
786 | break; |
787 | |
788 | case 't': |
789 | { |
790 | const char *tmp = get_stats_option_name(optarg); |
791 | |
792 | if (tmp) |
793 | { |
794 | SetConfigOption(tmp, "true" , PGC_POSTMASTER, PGC_S_ARGV); |
795 | } |
796 | else |
797 | { |
798 | write_stderr("%s: invalid argument for option -t: \"%s\"\n" , |
799 | progname, optarg); |
800 | ExitPostmaster(1); |
801 | } |
802 | break; |
803 | } |
804 | |
805 | case 'W': |
806 | SetConfigOption("post_auth_delay" , optarg, PGC_POSTMASTER, PGC_S_ARGV); |
807 | break; |
808 | |
809 | case 'c': |
810 | case '-': |
811 | { |
812 | char *name, |
813 | *value; |
814 | |
815 | ParseLongOption(optarg, &name, &value); |
816 | if (!value) |
817 | { |
818 | if (opt == '-') |
819 | ereport(ERROR, |
820 | (errcode(ERRCODE_SYNTAX_ERROR), |
821 | errmsg("--%s requires a value" , |
822 | optarg))); |
823 | else |
824 | ereport(ERROR, |
825 | (errcode(ERRCODE_SYNTAX_ERROR), |
826 | errmsg("-c %s requires a value" , |
827 | optarg))); |
828 | } |
829 | |
830 | SetConfigOption(name, value, PGC_POSTMASTER, PGC_S_ARGV); |
831 | free(name); |
832 | if (value) |
833 | free(value); |
834 | break; |
835 | } |
836 | |
837 | default: |
838 | write_stderr("Try \"%s --help\" for more information.\n" , |
839 | progname); |
840 | ExitPostmaster(1); |
841 | } |
842 | } |
843 | |
844 | /* |
845 | * Postmaster accepts no non-option switch arguments. |
846 | */ |
847 | if (optind < argc) |
848 | { |
849 | write_stderr("%s: invalid argument: \"%s\"\n" , |
850 | progname, argv[optind]); |
851 | write_stderr("Try \"%s --help\" for more information.\n" , |
852 | progname); |
853 | ExitPostmaster(1); |
854 | } |
855 | |
856 | /* |
857 | * Locate the proper configuration files and data directory, and read |
858 | * postgresql.conf for the first time. |
859 | */ |
860 | if (!SelectConfigFiles(userDoption, progname)) |
861 | ExitPostmaster(2); |
862 | |
863 | if (output_config_variable != NULL) |
864 | { |
865 | /* |
866 | * "-C guc" was specified, so print GUC's value and exit. No extra |
867 | * permission check is needed because the user is reading inside the |
868 | * data dir. |
869 | */ |
870 | const char *config_val = GetConfigOption(output_config_variable, |
871 | false, false); |
872 | |
873 | puts(config_val ? config_val : "" ); |
874 | ExitPostmaster(0); |
875 | } |
876 | |
877 | /* Verify that DataDir looks reasonable */ |
878 | checkDataDir(); |
879 | |
880 | /* Check that pg_control exists */ |
881 | checkControlFile(); |
882 | |
883 | /* And switch working directory into it */ |
884 | ChangeToDataDir(); |
885 | |
886 | /* |
887 | * Check for invalid combinations of GUC settings. |
888 | */ |
889 | if (ReservedBackends >= MaxConnections) |
890 | { |
891 | write_stderr("%s: superuser_reserved_connections (%d) must be less than max_connections (%d)\n" , |
892 | progname, |
893 | ReservedBackends, MaxConnections); |
894 | ExitPostmaster(1); |
895 | } |
896 | if (XLogArchiveMode > ARCHIVE_MODE_OFF && wal_level == WAL_LEVEL_MINIMAL) |
897 | ereport(ERROR, |
898 | (errmsg("WAL archival cannot be enabled when wal_level is \"minimal\"" ))); |
899 | if (max_wal_senders > 0 && wal_level == WAL_LEVEL_MINIMAL) |
900 | ereport(ERROR, |
901 | (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\"" ))); |
902 | |
903 | /* |
904 | * Other one-time internal sanity checks can go here, if they are fast. |
905 | * (Put any slow processing further down, after postmaster.pid creation.) |
906 | */ |
907 | if (!CheckDateTokenTables()) |
908 | { |
909 | write_stderr("%s: invalid datetoken tables, please fix\n" , progname); |
910 | ExitPostmaster(1); |
911 | } |
912 | |
913 | /* |
914 | * Now that we are done processing the postmaster arguments, reset |
915 | * getopt(3) library so that it will work correctly in subprocesses. |
916 | */ |
917 | optind = 1; |
918 | #ifdef HAVE_INT_OPTRESET |
919 | optreset = 1; /* some systems need this too */ |
920 | #endif |
921 | |
922 | /* For debugging: display postmaster environment */ |
923 | { |
924 | extern char **environ; |
925 | char **p; |
926 | |
927 | ereport(DEBUG3, |
928 | (errmsg_internal("%s: PostmasterMain: initial environment dump:" , |
929 | progname))); |
930 | ereport(DEBUG3, |
931 | (errmsg_internal("-----------------------------------------" ))); |
932 | for (p = environ; *p; ++p) |
933 | ereport(DEBUG3, |
934 | (errmsg_internal("\t%s" , *p))); |
935 | ereport(DEBUG3, |
936 | (errmsg_internal("-----------------------------------------" ))); |
937 | } |
938 | |
939 | /* |
940 | * Create lockfile for data directory. |
941 | * |
942 | * We want to do this before we try to grab the input sockets, because the |
943 | * data directory interlock is more reliable than the socket-file |
944 | * interlock (thanks to whoever decided to put socket files in /tmp :-(). |
945 | * For the same reason, it's best to grab the TCP socket(s) before the |
946 | * Unix socket(s). |
947 | * |
948 | * Also note that this internally sets up the on_proc_exit function that |
949 | * is responsible for removing both data directory and socket lockfiles; |
950 | * so it must happen before opening sockets so that at exit, the socket |
951 | * lockfiles go away after CloseServerPorts runs. |
952 | */ |
953 | CreateDataDirLockFile(true); |
954 | |
955 | /* |
956 | * Read the control file (for error checking and config info). |
957 | * |
958 | * Since we verify the control file's CRC, this has a useful side effect |
959 | * on machines where we need a run-time test for CRC support instructions. |
960 | * The postmaster will do the test once at startup, and then its child |
961 | * processes will inherit the correct function pointer and not need to |
962 | * repeat the test. |
963 | */ |
964 | LocalProcessControlFile(false); |
965 | |
966 | /* |
967 | * Initialize SSL library, if specified. |
968 | */ |
969 | #ifdef USE_SSL |
970 | if (EnableSSL) |
971 | { |
972 | (void) secure_initialize(true); |
973 | LoadedSSL = true; |
974 | } |
975 | #endif |
976 | |
977 | /* |
978 | * Register the apply launcher. Since it registers a background worker, |
979 | * it needs to be called before InitializeMaxBackends(), and it's probably |
980 | * a good idea to call it before any modules had chance to take the |
981 | * background worker slots. |
982 | */ |
983 | ApplyLauncherRegister(); |
984 | |
985 | /* |
986 | * process any libraries that should be preloaded at postmaster start |
987 | */ |
988 | process_shared_preload_libraries(); |
989 | |
990 | /* |
991 | * Now that loadable modules have had their chance to register background |
992 | * workers, calculate MaxBackends. |
993 | */ |
994 | InitializeMaxBackends(); |
995 | |
996 | /* Report server startup in log */ |
997 | ereport(LOG, |
998 | (errmsg("starting %s" , PG_VERSION_STR))); |
999 | |
1000 | /* |
1001 | * Establish input sockets. |
1002 | * |
1003 | * First, mark them all closed, and set up an on_proc_exit function that's |
1004 | * charged with closing the sockets again at postmaster shutdown. |
1005 | */ |
1006 | for (i = 0; i < MAXLISTEN; i++) |
1007 | ListenSocket[i] = PGINVALID_SOCKET; |
1008 | |
1009 | on_proc_exit(CloseServerPorts, 0); |
1010 | |
1011 | if (ListenAddresses) |
1012 | { |
1013 | char *rawstring; |
1014 | List *elemlist; |
1015 | ListCell *l; |
1016 | int success = 0; |
1017 | |
1018 | /* Need a modifiable copy of ListenAddresses */ |
1019 | rawstring = pstrdup(ListenAddresses); |
1020 | |
1021 | /* Parse string into list of hostnames */ |
1022 | if (!SplitIdentifierString(rawstring, ',', &elemlist)) |
1023 | { |
1024 | /* syntax error in list */ |
1025 | ereport(FATAL, |
1026 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
1027 | errmsg("invalid list syntax in parameter \"%s\"" , |
1028 | "listen_addresses" ))); |
1029 | } |
1030 | |
1031 | foreach(l, elemlist) |
1032 | { |
1033 | char *curhost = (char *) lfirst(l); |
1034 | |
1035 | if (strcmp(curhost, "*" ) == 0) |
1036 | status = StreamServerPort(AF_UNSPEC, NULL, |
1037 | (unsigned short) PostPortNumber, |
1038 | NULL, |
1039 | ListenSocket, MAXLISTEN); |
1040 | else |
1041 | status = StreamServerPort(AF_UNSPEC, curhost, |
1042 | (unsigned short) PostPortNumber, |
1043 | NULL, |
1044 | ListenSocket, MAXLISTEN); |
1045 | |
1046 | if (status == STATUS_OK) |
1047 | { |
1048 | success++; |
1049 | /* record the first successful host addr in lockfile */ |
1050 | if (!listen_addr_saved) |
1051 | { |
1052 | AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, curhost); |
1053 | listen_addr_saved = true; |
1054 | } |
1055 | } |
1056 | else |
1057 | ereport(WARNING, |
1058 | (errmsg("could not create listen socket for \"%s\"" , |
1059 | curhost))); |
1060 | } |
1061 | |
1062 | if (!success && elemlist != NIL) |
1063 | ereport(FATAL, |
1064 | (errmsg("could not create any TCP/IP sockets" ))); |
1065 | |
1066 | list_free(elemlist); |
1067 | pfree(rawstring); |
1068 | } |
1069 | |
1070 | #ifdef USE_BONJOUR |
1071 | /* Register for Bonjour only if we opened TCP socket(s) */ |
1072 | if (enable_bonjour && ListenSocket[0] != PGINVALID_SOCKET) |
1073 | { |
1074 | DNSServiceErrorType err; |
1075 | |
1076 | /* |
1077 | * We pass 0 for interface_index, which will result in registering on |
1078 | * all "applicable" interfaces. It's not entirely clear from the |
1079 | * DNS-SD docs whether this would be appropriate if we have bound to |
1080 | * just a subset of the available network interfaces. |
1081 | */ |
1082 | err = DNSServiceRegister(&bonjour_sdref, |
1083 | 0, |
1084 | 0, |
1085 | bonjour_name, |
1086 | "_postgresql._tcp." , |
1087 | NULL, |
1088 | NULL, |
1089 | pg_hton16(PostPortNumber), |
1090 | 0, |
1091 | NULL, |
1092 | NULL, |
1093 | NULL); |
1094 | if (err != kDNSServiceErr_NoError) |
1095 | elog(LOG, "DNSServiceRegister() failed: error code %ld" , |
1096 | (long) err); |
1097 | |
1098 | /* |
1099 | * We don't bother to read the mDNS daemon's reply, and we expect that |
1100 | * it will automatically terminate our registration when the socket is |
1101 | * closed at postmaster termination. So there's nothing more to be |
1102 | * done here. However, the bonjour_sdref is kept around so that |
1103 | * forked children can close their copies of the socket. |
1104 | */ |
1105 | } |
1106 | #endif |
1107 | |
1108 | #ifdef HAVE_UNIX_SOCKETS |
1109 | if (Unix_socket_directories) |
1110 | { |
1111 | char *rawstring; |
1112 | List *elemlist; |
1113 | ListCell *l; |
1114 | int success = 0; |
1115 | |
1116 | /* Need a modifiable copy of Unix_socket_directories */ |
1117 | rawstring = pstrdup(Unix_socket_directories); |
1118 | |
1119 | /* Parse string into list of directories */ |
1120 | if (!SplitDirectoriesString(rawstring, ',', &elemlist)) |
1121 | { |
1122 | /* syntax error in list */ |
1123 | ereport(FATAL, |
1124 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
1125 | errmsg("invalid list syntax in parameter \"%s\"" , |
1126 | "unix_socket_directories" ))); |
1127 | } |
1128 | |
1129 | foreach(l, elemlist) |
1130 | { |
1131 | char *socketdir = (char *) lfirst(l); |
1132 | |
1133 | status = StreamServerPort(AF_UNIX, NULL, |
1134 | (unsigned short) PostPortNumber, |
1135 | socketdir, |
1136 | ListenSocket, MAXLISTEN); |
1137 | |
1138 | if (status == STATUS_OK) |
1139 | { |
1140 | success++; |
1141 | /* record the first successful Unix socket in lockfile */ |
1142 | if (success == 1) |
1143 | AddToDataDirLockFile(LOCK_FILE_LINE_SOCKET_DIR, socketdir); |
1144 | } |
1145 | else |
1146 | ereport(WARNING, |
1147 | (errmsg("could not create Unix-domain socket in directory \"%s\"" , |
1148 | socketdir))); |
1149 | } |
1150 | |
1151 | if (!success && elemlist != NIL) |
1152 | ereport(FATAL, |
1153 | (errmsg("could not create any Unix-domain sockets" ))); |
1154 | |
1155 | list_free_deep(elemlist); |
1156 | pfree(rawstring); |
1157 | } |
1158 | #endif |
1159 | |
1160 | /* |
1161 | * check that we have some socket to listen on |
1162 | */ |
1163 | if (ListenSocket[0] == PGINVALID_SOCKET) |
1164 | ereport(FATAL, |
1165 | (errmsg("no socket created for listening" ))); |
1166 | |
1167 | /* |
1168 | * If no valid TCP ports, write an empty line for listen address, |
1169 | * indicating the Unix socket must be used. Note that this line is not |
1170 | * added to the lock file until there is a socket backing it. |
1171 | */ |
1172 | if (!listen_addr_saved) |
1173 | AddToDataDirLockFile(LOCK_FILE_LINE_LISTEN_ADDR, "" ); |
1174 | |
1175 | /* |
1176 | * Set up shared memory and semaphores. |
1177 | */ |
1178 | reset_shared(PostPortNumber); |
1179 | |
1180 | /* |
1181 | * Estimate number of openable files. This must happen after setting up |
1182 | * semaphores, because on some platforms semaphores count as open files. |
1183 | */ |
1184 | set_max_safe_fds(); |
1185 | |
1186 | /* |
1187 | * Set reference point for stack-depth checking. |
1188 | */ |
1189 | set_stack_base(); |
1190 | |
1191 | /* |
1192 | * Initialize pipe (or process handle on Windows) that allows children to |
1193 | * wake up from sleep on postmaster death. |
1194 | */ |
1195 | InitPostmasterDeathWatchHandle(); |
1196 | |
1197 | #ifdef WIN32 |
1198 | |
1199 | /* |
1200 | * Initialize I/O completion port used to deliver list of dead children. |
1201 | */ |
1202 | win32ChildQueue = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 1); |
1203 | if (win32ChildQueue == NULL) |
1204 | ereport(FATAL, |
1205 | (errmsg("could not create I/O completion port for child queue" ))); |
1206 | #endif |
1207 | |
1208 | /* |
1209 | * Record postmaster options. We delay this till now to avoid recording |
1210 | * bogus options (eg, NBuffers too high for available memory). |
1211 | */ |
1212 | if (!CreateOptsFile(argc, argv, my_exec_path)) |
1213 | ExitPostmaster(1); |
1214 | |
1215 | #ifdef EXEC_BACKEND |
1216 | /* Write out nondefault GUC settings for child processes to use */ |
1217 | write_nondefault_variables(PGC_POSTMASTER); |
1218 | #endif |
1219 | |
1220 | /* |
1221 | * Write the external PID file if requested |
1222 | */ |
1223 | if (external_pid_file) |
1224 | { |
1225 | FILE *fpidfile = fopen(external_pid_file, "w" ); |
1226 | |
1227 | if (fpidfile) |
1228 | { |
1229 | fprintf(fpidfile, "%d\n" , MyProcPid); |
1230 | fclose(fpidfile); |
1231 | |
1232 | /* Make PID file world readable */ |
1233 | if (chmod(external_pid_file, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0) |
1234 | write_stderr("%s: could not change permissions of external PID file \"%s\": %s\n" , |
1235 | progname, external_pid_file, strerror(errno)); |
1236 | } |
1237 | else |
1238 | write_stderr("%s: could not write external PID file \"%s\": %s\n" , |
1239 | progname, external_pid_file, strerror(errno)); |
1240 | |
1241 | on_proc_exit(unlink_external_pid_file, 0); |
1242 | } |
1243 | |
1244 | /* |
1245 | * Remove old temporary files. At this point there can be no other |
1246 | * Postgres processes running in this directory, so this should be safe. |
1247 | */ |
1248 | RemovePgTempFiles(); |
1249 | |
1250 | /* |
1251 | * Forcibly remove the files signaling a standby promotion request. |
1252 | * Otherwise, the existence of those files triggers a promotion too early, |
1253 | * whether a user wants that or not. |
1254 | * |
1255 | * This removal of files is usually unnecessary because they can exist |
1256 | * only during a few moments during a standby promotion. However there is |
1257 | * a race condition: if pg_ctl promote is executed and creates the files |
1258 | * during a promotion, the files can stay around even after the server is |
1259 | * brought up to new master. Then, if new standby starts by using the |
1260 | * backup taken from that master, the files can exist at the server |
1261 | * startup and should be removed in order to avoid an unexpected |
1262 | * promotion. |
1263 | * |
1264 | * Note that promotion signal files need to be removed before the startup |
1265 | * process is invoked. Because, after that, they can be used by |
1266 | * postmaster's SIGUSR1 signal handler. |
1267 | */ |
1268 | RemovePromoteSignalFiles(); |
1269 | |
1270 | /* Do the same for logrotate signal file */ |
1271 | RemoveLogrotateSignalFiles(); |
1272 | |
1273 | /* Remove any outdated file holding the current log filenames. */ |
1274 | if (unlink(LOG_METAINFO_DATAFILE) < 0 && errno != ENOENT) |
1275 | ereport(LOG, |
1276 | (errcode_for_file_access(), |
1277 | errmsg("could not remove file \"%s\": %m" , |
1278 | LOG_METAINFO_DATAFILE))); |
1279 | |
1280 | /* |
1281 | * If enabled, start up syslogger collection subprocess |
1282 | */ |
1283 | SysLoggerPID = SysLogger_Start(); |
1284 | |
1285 | /* |
1286 | * Reset whereToSendOutput from DestDebug (its starting state) to |
1287 | * DestNone. This stops ereport from sending log messages to stderr unless |
1288 | * Log_destination permits. We don't do this until the postmaster is |
1289 | * fully launched, since startup failures may as well be reported to |
1290 | * stderr. |
1291 | * |
1292 | * If we are in fact disabling logging to stderr, first emit a log message |
1293 | * saying so, to provide a breadcrumb trail for users who may not remember |
1294 | * that their logging is configured to go somewhere else. |
1295 | */ |
1296 | if (!(Log_destination & LOG_DESTINATION_STDERR)) |
1297 | ereport(LOG, |
1298 | (errmsg("ending log output to stderr" ), |
1299 | errhint("Future log output will go to log destination \"%s\"." , |
1300 | Log_destination_string))); |
1301 | |
1302 | whereToSendOutput = DestNone; |
1303 | |
1304 | /* |
1305 | * Initialize stats collection subsystem (this does NOT start the |
1306 | * collector process!) |
1307 | */ |
1308 | pgstat_init(); |
1309 | |
1310 | /* |
1311 | * Initialize the autovacuum subsystem (again, no process start yet) |
1312 | */ |
1313 | autovac_init(); |
1314 | |
1315 | /* |
1316 | * Load configuration files for client authentication. |
1317 | */ |
1318 | if (!load_hba()) |
1319 | { |
1320 | /* |
1321 | * It makes no sense to continue if we fail to load the HBA file, |
1322 | * since there is no way to connect to the database in this case. |
1323 | */ |
1324 | ereport(FATAL, |
1325 | (errmsg("could not load pg_hba.conf" ))); |
1326 | } |
1327 | if (!load_ident()) |
1328 | { |
1329 | /* |
1330 | * We can start up without the IDENT file, although it means that you |
1331 | * cannot log in using any of the authentication methods that need a |
1332 | * user name mapping. load_ident() already logged the details of error |
1333 | * to the log. |
1334 | */ |
1335 | } |
1336 | |
1337 | #ifdef HAVE_PTHREAD_IS_THREADED_NP |
1338 | |
1339 | /* |
1340 | * On macOS, libintl replaces setlocale() with a version that calls |
1341 | * CFLocaleCopyCurrent() when its second argument is "" and every relevant |
1342 | * environment variable is unset or empty. CFLocaleCopyCurrent() makes |
1343 | * the process multithreaded. The postmaster calls sigprocmask() and |
1344 | * calls fork() without an immediate exec(), both of which have undefined |
1345 | * behavior in a multithreaded program. A multithreaded postmaster is the |
1346 | * normal case on Windows, which offers neither fork() nor sigprocmask(). |
1347 | */ |
1348 | if (pthread_is_threaded_np() != 0) |
1349 | ereport(FATAL, |
1350 | (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), |
1351 | errmsg("postmaster became multithreaded during startup" ), |
1352 | errhint("Set the LC_ALL environment variable to a valid locale." ))); |
1353 | #endif |
1354 | |
1355 | /* |
1356 | * Remember postmaster startup time |
1357 | */ |
1358 | PgStartTime = GetCurrentTimestamp(); |
1359 | |
1360 | /* |
1361 | * Report postmaster status in the postmaster.pid file, to allow pg_ctl to |
1362 | * see what's happening. |
1363 | */ |
1364 | AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STARTING); |
1365 | |
1366 | /* |
1367 | * We're ready to rock and roll... |
1368 | */ |
1369 | StartupPID = StartupDataBase(); |
1370 | Assert(StartupPID != 0); |
1371 | StartupStatus = STARTUP_RUNNING; |
1372 | pmState = PM_STARTUP; |
1373 | |
1374 | /* Some workers may be scheduled to start now */ |
1375 | maybe_start_bgworkers(); |
1376 | |
1377 | status = ServerLoop(); |
1378 | |
1379 | /* |
1380 | * ServerLoop probably shouldn't ever return, but if it does, close down. |
1381 | */ |
1382 | ExitPostmaster(status != STATUS_OK); |
1383 | |
1384 | abort(); /* not reached */ |
1385 | } |
1386 | |
1387 | |
1388 | /* |
1389 | * on_proc_exit callback to close server's listen sockets |
1390 | */ |
1391 | static void |
1392 | CloseServerPorts(int status, Datum arg) |
1393 | { |
1394 | int i; |
1395 | |
1396 | /* |
1397 | * First, explicitly close all the socket FDs. We used to just let this |
1398 | * happen implicitly at postmaster exit, but it's better to close them |
1399 | * before we remove the postmaster.pid lockfile; otherwise there's a race |
1400 | * condition if a new postmaster wants to re-use the TCP port number. |
1401 | */ |
1402 | for (i = 0; i < MAXLISTEN; i++) |
1403 | { |
1404 | if (ListenSocket[i] != PGINVALID_SOCKET) |
1405 | { |
1406 | StreamClose(ListenSocket[i]); |
1407 | ListenSocket[i] = PGINVALID_SOCKET; |
1408 | } |
1409 | } |
1410 | |
1411 | /* |
1412 | * Next, remove any filesystem entries for Unix sockets. To avoid race |
1413 | * conditions against incoming postmasters, this must happen after closing |
1414 | * the sockets and before removing lock files. |
1415 | */ |
1416 | RemoveSocketFiles(); |
1417 | |
1418 | /* |
1419 | * We don't do anything about socket lock files here; those will be |
1420 | * removed in a later on_proc_exit callback. |
1421 | */ |
1422 | } |
1423 | |
1424 | /* |
1425 | * on_proc_exit callback to delete external_pid_file |
1426 | */ |
1427 | static void |
1428 | unlink_external_pid_file(int status, Datum arg) |
1429 | { |
1430 | if (external_pid_file) |
1431 | unlink(external_pid_file); |
1432 | } |
1433 | |
1434 | |
1435 | /* |
1436 | * Compute and check the directory paths to files that are part of the |
1437 | * installation (as deduced from the postgres executable's own location) |
1438 | */ |
1439 | static void |
1440 | getInstallationPaths(const char *argv0) |
1441 | { |
1442 | DIR *pdir; |
1443 | |
1444 | /* Locate the postgres executable itself */ |
1445 | if (find_my_exec(argv0, my_exec_path) < 0) |
1446 | elog(FATAL, "%s: could not locate my own executable path" , argv0); |
1447 | |
1448 | #ifdef EXEC_BACKEND |
1449 | /* Locate executable backend before we change working directory */ |
1450 | if (find_other_exec(argv0, "postgres" , PG_BACKEND_VERSIONSTR, |
1451 | postgres_exec_path) < 0) |
1452 | ereport(FATAL, |
1453 | (errmsg("%s: could not locate matching postgres executable" , |
1454 | argv0))); |
1455 | #endif |
1456 | |
1457 | /* |
1458 | * Locate the pkglib directory --- this has to be set early in case we try |
1459 | * to load any modules from it in response to postgresql.conf entries. |
1460 | */ |
1461 | get_pkglib_path(my_exec_path, pkglib_path); |
1462 | |
1463 | /* |
1464 | * Verify that there's a readable directory there; otherwise the Postgres |
1465 | * installation is incomplete or corrupt. (A typical cause of this |
1466 | * failure is that the postgres executable has been moved or hardlinked to |
1467 | * some directory that's not a sibling of the installation lib/ |
1468 | * directory.) |
1469 | */ |
1470 | pdir = AllocateDir(pkglib_path); |
1471 | if (pdir == NULL) |
1472 | ereport(ERROR, |
1473 | (errcode_for_file_access(), |
1474 | errmsg("could not open directory \"%s\": %m" , |
1475 | pkglib_path), |
1476 | errhint("This may indicate an incomplete PostgreSQL installation, or that the file \"%s\" has been moved away from its proper location." , |
1477 | my_exec_path))); |
1478 | FreeDir(pdir); |
1479 | |
1480 | /* |
1481 | * XXX is it worth similarly checking the share/ directory? If the lib/ |
1482 | * directory is there, then share/ probably is too. |
1483 | */ |
1484 | } |
1485 | |
1486 | /* |
1487 | * Check that pg_control exists in the correct location in the data directory. |
1488 | * |
1489 | * No attempt is made to validate the contents of pg_control here. This is |
1490 | * just a sanity check to see if we are looking at a real data directory. |
1491 | */ |
1492 | static void |
1493 | checkControlFile(void) |
1494 | { |
1495 | char path[MAXPGPATH]; |
1496 | FILE *fp; |
1497 | |
1498 | snprintf(path, sizeof(path), "%s/global/pg_control" , DataDir); |
1499 | |
1500 | fp = AllocateFile(path, PG_BINARY_R); |
1501 | if (fp == NULL) |
1502 | { |
1503 | write_stderr("%s: could not find the database system\n" |
1504 | "Expected to find it in the directory \"%s\",\n" |
1505 | "but could not open file \"%s\": %s\n" , |
1506 | progname, DataDir, path, strerror(errno)); |
1507 | ExitPostmaster(2); |
1508 | } |
1509 | FreeFile(fp); |
1510 | } |
1511 | |
1512 | /* |
1513 | * Determine how long should we let ServerLoop sleep. |
1514 | * |
1515 | * In normal conditions we wait at most one minute, to ensure that the other |
1516 | * background tasks handled by ServerLoop get done even when no requests are |
1517 | * arriving. However, if there are background workers waiting to be started, |
1518 | * we don't actually sleep so that they are quickly serviced. Other exception |
1519 | * cases are as shown in the code. |
1520 | */ |
1521 | static void |
1522 | DetermineSleepTime(struct timeval *timeout) |
1523 | { |
1524 | TimestampTz next_wakeup = 0; |
1525 | |
1526 | /* |
1527 | * Normal case: either there are no background workers at all, or we're in |
1528 | * a shutdown sequence (during which we ignore bgworkers altogether). |
1529 | */ |
1530 | if (Shutdown > NoShutdown || |
1531 | (!StartWorkerNeeded && !HaveCrashedWorker)) |
1532 | { |
1533 | if (AbortStartTime != 0) |
1534 | { |
1535 | /* time left to abort; clamp to 0 in case it already expired */ |
1536 | timeout->tv_sec = SIGKILL_CHILDREN_AFTER_SECS - |
1537 | (time(NULL) - AbortStartTime); |
1538 | timeout->tv_sec = Max(timeout->tv_sec, 0); |
1539 | timeout->tv_usec = 0; |
1540 | } |
1541 | else |
1542 | { |
1543 | timeout->tv_sec = 60; |
1544 | timeout->tv_usec = 0; |
1545 | } |
1546 | return; |
1547 | } |
1548 | |
1549 | if (StartWorkerNeeded) |
1550 | { |
1551 | timeout->tv_sec = 0; |
1552 | timeout->tv_usec = 0; |
1553 | return; |
1554 | } |
1555 | |
1556 | if (HaveCrashedWorker) |
1557 | { |
1558 | slist_mutable_iter siter; |
1559 | |
1560 | /* |
1561 | * When there are crashed bgworkers, we sleep just long enough that |
1562 | * they are restarted when they request to be. Scan the list to |
1563 | * determine the minimum of all wakeup times according to most recent |
1564 | * crash time and requested restart interval. |
1565 | */ |
1566 | slist_foreach_modify(siter, &BackgroundWorkerList) |
1567 | { |
1568 | RegisteredBgWorker *rw; |
1569 | TimestampTz this_wakeup; |
1570 | |
1571 | rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur); |
1572 | |
1573 | if (rw->rw_crashed_at == 0) |
1574 | continue; |
1575 | |
1576 | if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART |
1577 | || rw->rw_terminate) |
1578 | { |
1579 | ForgetBackgroundWorker(&siter); |
1580 | continue; |
1581 | } |
1582 | |
1583 | this_wakeup = TimestampTzPlusMilliseconds(rw->rw_crashed_at, |
1584 | 1000L * rw->rw_worker.bgw_restart_time); |
1585 | if (next_wakeup == 0 || this_wakeup < next_wakeup) |
1586 | next_wakeup = this_wakeup; |
1587 | } |
1588 | } |
1589 | |
1590 | if (next_wakeup != 0) |
1591 | { |
1592 | long secs; |
1593 | int microsecs; |
1594 | |
1595 | TimestampDifference(GetCurrentTimestamp(), next_wakeup, |
1596 | &secs, µsecs); |
1597 | timeout->tv_sec = secs; |
1598 | timeout->tv_usec = microsecs; |
1599 | |
1600 | /* Ensure we don't exceed one minute */ |
1601 | if (timeout->tv_sec > 60) |
1602 | { |
1603 | timeout->tv_sec = 60; |
1604 | timeout->tv_usec = 0; |
1605 | } |
1606 | } |
1607 | else |
1608 | { |
1609 | timeout->tv_sec = 60; |
1610 | timeout->tv_usec = 0; |
1611 | } |
1612 | } |
1613 | |
1614 | /* |
1615 | * Main idle loop of postmaster |
1616 | * |
1617 | * NB: Needs to be called with signals blocked |
1618 | */ |
1619 | static int |
1620 | ServerLoop(void) |
1621 | { |
1622 | fd_set readmask; |
1623 | int nSockets; |
1624 | time_t last_lockfile_recheck_time, |
1625 | last_touch_time; |
1626 | |
1627 | last_lockfile_recheck_time = last_touch_time = time(NULL); |
1628 | |
1629 | nSockets = initMasks(&readmask); |
1630 | |
1631 | for (;;) |
1632 | { |
1633 | fd_set rmask; |
1634 | int selres; |
1635 | time_t now; |
1636 | |
1637 | /* |
1638 | * Wait for a connection request to arrive. |
1639 | * |
1640 | * We block all signals except while sleeping. That makes it safe for |
1641 | * signal handlers, which again block all signals while executing, to |
1642 | * do nontrivial work. |
1643 | * |
1644 | * If we are in PM_WAIT_DEAD_END state, then we don't want to accept |
1645 | * any new connections, so we don't call select(), and just sleep. |
1646 | */ |
1647 | memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set)); |
1648 | |
1649 | if (pmState == PM_WAIT_DEAD_END) |
1650 | { |
1651 | PG_SETMASK(&UnBlockSig); |
1652 | |
1653 | pg_usleep(100000L); /* 100 msec seems reasonable */ |
1654 | selres = 0; |
1655 | |
1656 | PG_SETMASK(&BlockSig); |
1657 | } |
1658 | else |
1659 | { |
1660 | /* must set timeout each time; some OSes change it! */ |
1661 | struct timeval timeout; |
1662 | |
1663 | /* Needs to run with blocked signals! */ |
1664 | DetermineSleepTime(&timeout); |
1665 | |
1666 | PG_SETMASK(&UnBlockSig); |
1667 | |
1668 | selres = select(nSockets, &rmask, NULL, NULL, &timeout); |
1669 | |
1670 | PG_SETMASK(&BlockSig); |
1671 | } |
1672 | |
1673 | /* Now check the select() result */ |
1674 | if (selres < 0) |
1675 | { |
1676 | if (errno != EINTR && errno != EWOULDBLOCK) |
1677 | { |
1678 | ereport(LOG, |
1679 | (errcode_for_socket_access(), |
1680 | errmsg("select() failed in postmaster: %m" ))); |
1681 | return STATUS_ERROR; |
1682 | } |
1683 | } |
1684 | |
1685 | /* |
1686 | * New connection pending on any of our sockets? If so, fork a child |
1687 | * process to deal with it. |
1688 | */ |
1689 | if (selres > 0) |
1690 | { |
1691 | int i; |
1692 | |
1693 | for (i = 0; i < MAXLISTEN; i++) |
1694 | { |
1695 | if (ListenSocket[i] == PGINVALID_SOCKET) |
1696 | break; |
1697 | if (FD_ISSET(ListenSocket[i], &rmask)) |
1698 | { |
1699 | Port *port; |
1700 | |
1701 | port = ConnCreate(ListenSocket[i]); |
1702 | if (port) |
1703 | { |
1704 | BackendStartup(port); |
1705 | |
1706 | /* |
1707 | * We no longer need the open socket or port structure |
1708 | * in this process |
1709 | */ |
1710 | StreamClose(port->sock); |
1711 | ConnFree(port); |
1712 | } |
1713 | } |
1714 | } |
1715 | } |
1716 | |
1717 | /* If we have lost the log collector, try to start a new one */ |
1718 | if (SysLoggerPID == 0 && Logging_collector) |
1719 | SysLoggerPID = SysLogger_Start(); |
1720 | |
1721 | /* |
1722 | * If no background writer process is running, and we are not in a |
1723 | * state that prevents it, start one. It doesn't matter if this |
1724 | * fails, we'll just try again later. Likewise for the checkpointer. |
1725 | */ |
1726 | if (pmState == PM_RUN || pmState == PM_RECOVERY || |
1727 | pmState == PM_HOT_STANDBY) |
1728 | { |
1729 | if (CheckpointerPID == 0) |
1730 | CheckpointerPID = StartCheckpointer(); |
1731 | if (BgWriterPID == 0) |
1732 | BgWriterPID = StartBackgroundWriter(); |
1733 | } |
1734 | |
1735 | /* |
1736 | * Likewise, if we have lost the walwriter process, try to start a new |
1737 | * one. But this is needed only in normal operation (else we cannot |
1738 | * be writing any new WAL). |
1739 | */ |
1740 | if (WalWriterPID == 0 && pmState == PM_RUN) |
1741 | WalWriterPID = StartWalWriter(); |
1742 | |
1743 | /* |
1744 | * If we have lost the autovacuum launcher, try to start a new one. We |
1745 | * don't want autovacuum to run in binary upgrade mode because |
1746 | * autovacuum might update relfrozenxid for empty tables before the |
1747 | * physical files are put in place. |
1748 | */ |
1749 | if (!IsBinaryUpgrade && AutoVacPID == 0 && |
1750 | (AutoVacuumingActive() || start_autovac_launcher) && |
1751 | pmState == PM_RUN) |
1752 | { |
1753 | AutoVacPID = StartAutoVacLauncher(); |
1754 | if (AutoVacPID != 0) |
1755 | start_autovac_launcher = false; /* signal processed */ |
1756 | } |
1757 | |
1758 | /* If we have lost the stats collector, try to start a new one */ |
1759 | if (PgStatPID == 0 && |
1760 | (pmState == PM_RUN || pmState == PM_HOT_STANDBY)) |
1761 | PgStatPID = pgstat_start(); |
1762 | |
1763 | /* If we have lost the archiver, try to start a new one. */ |
1764 | if (PgArchPID == 0 && PgArchStartupAllowed()) |
1765 | PgArchPID = pgarch_start(); |
1766 | |
1767 | /* If we need to signal the autovacuum launcher, do so now */ |
1768 | if (avlauncher_needs_signal) |
1769 | { |
1770 | avlauncher_needs_signal = false; |
1771 | if (AutoVacPID != 0) |
1772 | kill(AutoVacPID, SIGUSR2); |
1773 | } |
1774 | |
1775 | /* If we need to start a WAL receiver, try to do that now */ |
1776 | if (WalReceiverRequested) |
1777 | MaybeStartWalReceiver(); |
1778 | |
1779 | /* Get other worker processes running, if needed */ |
1780 | if (StartWorkerNeeded || HaveCrashedWorker) |
1781 | maybe_start_bgworkers(); |
1782 | |
1783 | #ifdef HAVE_PTHREAD_IS_THREADED_NP |
1784 | |
1785 | /* |
1786 | * With assertions enabled, check regularly for appearance of |
1787 | * additional threads. All builds check at start and exit. |
1788 | */ |
1789 | Assert(pthread_is_threaded_np() == 0); |
1790 | #endif |
1791 | |
1792 | /* |
1793 | * Lastly, check to see if it's time to do some things that we don't |
1794 | * want to do every single time through the loop, because they're a |
1795 | * bit expensive. Note that there's up to a minute of slop in when |
1796 | * these tasks will be performed, since DetermineSleepTime() will let |
1797 | * us sleep at most that long; except for SIGKILL timeout which has |
1798 | * special-case logic there. |
1799 | */ |
1800 | now = time(NULL); |
1801 | |
1802 | /* |
1803 | * If we already sent SIGQUIT to children and they are slow to shut |
1804 | * down, it's time to send them SIGKILL. This doesn't happen |
1805 | * normally, but under certain conditions backends can get stuck while |
1806 | * shutting down. This is a last measure to get them unwedged. |
1807 | * |
1808 | * Note we also do this during recovery from a process crash. |
1809 | */ |
1810 | if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) && |
1811 | AbortStartTime != 0 && |
1812 | (now - AbortStartTime) >= SIGKILL_CHILDREN_AFTER_SECS) |
1813 | { |
1814 | /* We were gentle with them before. Not anymore */ |
1815 | TerminateChildren(SIGKILL); |
1816 | /* reset flag so we don't SIGKILL again */ |
1817 | AbortStartTime = 0; |
1818 | } |
1819 | |
1820 | /* |
1821 | * Once a minute, verify that postmaster.pid hasn't been removed or |
1822 | * overwritten. If it has, we force a shutdown. This avoids having |
1823 | * postmasters and child processes hanging around after their database |
1824 | * is gone, and maybe causing problems if a new database cluster is |
1825 | * created in the same place. It also provides some protection |
1826 | * against a DBA foolishly removing postmaster.pid and manually |
1827 | * starting a new postmaster. Data corruption is likely to ensue from |
1828 | * that anyway, but we can minimize the damage by aborting ASAP. |
1829 | */ |
1830 | if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE) |
1831 | { |
1832 | if (!RecheckDataDirLockFile()) |
1833 | { |
1834 | ereport(LOG, |
1835 | (errmsg("performing immediate shutdown because data directory lock file is invalid" ))); |
1836 | kill(MyProcPid, SIGQUIT); |
1837 | } |
1838 | last_lockfile_recheck_time = now; |
1839 | } |
1840 | |
1841 | /* |
1842 | * Touch Unix socket and lock files every 58 minutes, to ensure that |
1843 | * they are not removed by overzealous /tmp-cleaning tasks. We assume |
1844 | * no one runs cleaners with cutoff times of less than an hour ... |
1845 | */ |
1846 | if (now - last_touch_time >= 58 * SECS_PER_MINUTE) |
1847 | { |
1848 | TouchSocketFiles(); |
1849 | TouchSocketLockFiles(); |
1850 | last_touch_time = now; |
1851 | } |
1852 | } |
1853 | } |
1854 | |
1855 | /* |
1856 | * Initialise the masks for select() for the ports we are listening on. |
1857 | * Return the number of sockets to listen on. |
1858 | */ |
1859 | static int |
1860 | initMasks(fd_set *rmask) |
1861 | { |
1862 | int maxsock = -1; |
1863 | int i; |
1864 | |
1865 | FD_ZERO(rmask); |
1866 | |
1867 | for (i = 0; i < MAXLISTEN; i++) |
1868 | { |
1869 | int fd = ListenSocket[i]; |
1870 | |
1871 | if (fd == PGINVALID_SOCKET) |
1872 | break; |
1873 | FD_SET(fd, rmask); |
1874 | |
1875 | if (fd > maxsock) |
1876 | maxsock = fd; |
1877 | } |
1878 | |
1879 | return maxsock + 1; |
1880 | } |
1881 | |
1882 | |
1883 | /* |
1884 | * Read a client's startup packet and do something according to it. |
1885 | * |
1886 | * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and |
1887 | * not return at all. |
1888 | * |
1889 | * (Note that ereport(FATAL) stuff is sent to the client, so only use it |
1890 | * if that's what you want. Return STATUS_ERROR if you don't want to |
1891 | * send anything to the client, which would typically be appropriate |
1892 | * if we detect a communications failure.) |
1893 | * |
1894 | * Set secure_done when negotiation of an encrypted layer (currently, TLS or |
1895 | * GSSAPI) is already completed. |
1896 | */ |
1897 | static int |
1898 | ProcessStartupPacket(Port *port, bool secure_done) |
1899 | { |
1900 | int32 len; |
1901 | void *buf; |
1902 | ProtocolVersion proto; |
1903 | MemoryContext oldcontext; |
1904 | |
1905 | pq_startmsgread(); |
1906 | |
1907 | /* |
1908 | * Grab the first byte of the length word separately, so that we can tell |
1909 | * whether we have no data at all or an incomplete packet. (This might |
1910 | * sound inefficient, but it's not really, because of buffering in |
1911 | * pqcomm.c.) |
1912 | */ |
1913 | if (pq_getbytes((char *) &len, 1) == EOF) |
1914 | { |
1915 | /* |
1916 | * If we get no data at all, don't clutter the log with a complaint; |
1917 | * such cases often occur for legitimate reasons. An example is that |
1918 | * we might be here after responding to NEGOTIATE_SSL_CODE, and if the |
1919 | * client didn't like our response, it'll probably just drop the |
1920 | * connection. Service-monitoring software also often just opens and |
1921 | * closes a connection without sending anything. (So do port |
1922 | * scanners, which may be less benign, but it's not really our job to |
1923 | * notice those.) |
1924 | */ |
1925 | return STATUS_ERROR; |
1926 | } |
1927 | |
1928 | if (pq_getbytes(((char *) &len) + 1, 3) == EOF) |
1929 | { |
1930 | /* Got a partial length word, so bleat about that */ |
1931 | if (!secure_done) |
1932 | ereport(COMMERROR, |
1933 | (errcode(ERRCODE_PROTOCOL_VIOLATION), |
1934 | errmsg("incomplete startup packet" ))); |
1935 | return STATUS_ERROR; |
1936 | } |
1937 | |
1938 | len = pg_ntoh32(len); |
1939 | len -= 4; |
1940 | |
1941 | if (len < (int32) sizeof(ProtocolVersion) || |
1942 | len > MAX_STARTUP_PACKET_LENGTH) |
1943 | { |
1944 | ereport(COMMERROR, |
1945 | (errcode(ERRCODE_PROTOCOL_VIOLATION), |
1946 | errmsg("invalid length of startup packet" ))); |
1947 | return STATUS_ERROR; |
1948 | } |
1949 | |
1950 | /* |
1951 | * Allocate at least the size of an old-style startup packet, plus one |
1952 | * extra byte, and make sure all are zeroes. This ensures we will have |
1953 | * null termination of all strings, in both fixed- and variable-length |
1954 | * packet layouts. |
1955 | */ |
1956 | if (len <= (int32) sizeof(StartupPacket)) |
1957 | buf = palloc0(sizeof(StartupPacket) + 1); |
1958 | else |
1959 | buf = palloc0(len + 1); |
1960 | |
1961 | if (pq_getbytes(buf, len) == EOF) |
1962 | { |
1963 | ereport(COMMERROR, |
1964 | (errcode(ERRCODE_PROTOCOL_VIOLATION), |
1965 | errmsg("incomplete startup packet" ))); |
1966 | return STATUS_ERROR; |
1967 | } |
1968 | pq_endmsgread(); |
1969 | |
1970 | /* |
1971 | * The first field is either a protocol version number or a special |
1972 | * request code. |
1973 | */ |
1974 | port->proto = proto = pg_ntoh32(*((ProtocolVersion *) buf)); |
1975 | |
1976 | if (proto == CANCEL_REQUEST_CODE) |
1977 | { |
1978 | processCancelRequest(port, buf); |
1979 | /* Not really an error, but we don't want to proceed further */ |
1980 | return STATUS_ERROR; |
1981 | } |
1982 | |
1983 | if (proto == NEGOTIATE_SSL_CODE && !secure_done) |
1984 | { |
1985 | char SSLok; |
1986 | |
1987 | #ifdef USE_SSL |
1988 | /* No SSL when disabled or on Unix sockets */ |
1989 | if (!LoadedSSL || IS_AF_UNIX(port->laddr.addr.ss_family)) |
1990 | SSLok = 'N'; |
1991 | else |
1992 | SSLok = 'S'; /* Support for SSL */ |
1993 | #else |
1994 | SSLok = 'N'; /* No support for SSL */ |
1995 | #endif |
1996 | |
1997 | retry1: |
1998 | if (send(port->sock, &SSLok, 1, 0) != 1) |
1999 | { |
2000 | if (errno == EINTR) |
2001 | goto retry1; /* if interrupted, just retry */ |
2002 | ereport(COMMERROR, |
2003 | (errcode_for_socket_access(), |
2004 | errmsg("failed to send SSL negotiation response: %m" ))); |
2005 | return STATUS_ERROR; /* close the connection */ |
2006 | } |
2007 | |
2008 | #ifdef USE_SSL |
2009 | if (SSLok == 'S' && secure_open_server(port) == -1) |
2010 | return STATUS_ERROR; |
2011 | #endif |
2012 | /* regular startup packet, cancel, etc packet should follow... */ |
2013 | /* but not another SSL negotiation request */ |
2014 | return ProcessStartupPacket(port, true); |
2015 | } |
2016 | else if (proto == NEGOTIATE_GSS_CODE && !secure_done) |
2017 | { |
2018 | char GSSok = 'N'; |
2019 | #ifdef ENABLE_GSS |
2020 | /* No GSSAPI encryption when on Unix socket */ |
2021 | if (!IS_AF_UNIX(port->laddr.addr.ss_family)) |
2022 | GSSok = 'G'; |
2023 | #endif |
2024 | |
2025 | while (send(port->sock, &GSSok, 1, 0) != 1) |
2026 | { |
2027 | if (errno == EINTR) |
2028 | continue; |
2029 | ereport(COMMERROR, |
2030 | (errcode_for_socket_access(), |
2031 | errmsg("failed to send GSSAPI negotiation response: %m" ))); |
2032 | return STATUS_ERROR; /* close the connection */ |
2033 | } |
2034 | |
2035 | #ifdef ENABLE_GSS |
2036 | if (GSSok == 'G' && secure_open_gssapi(port) == -1) |
2037 | return STATUS_ERROR; |
2038 | #endif |
2039 | /* Won't ever see more than one negotiation request */ |
2040 | return ProcessStartupPacket(port, true); |
2041 | } |
2042 | |
2043 | /* Could add additional special packet types here */ |
2044 | |
2045 | /* |
2046 | * Set FrontendProtocol now so that ereport() knows what format to send if |
2047 | * we fail during startup. |
2048 | */ |
2049 | FrontendProtocol = proto; |
2050 | |
2051 | /* Check that the major protocol version is in range. */ |
2052 | if (PG_PROTOCOL_MAJOR(proto) < PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST) || |
2053 | PG_PROTOCOL_MAJOR(proto) > PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST)) |
2054 | ereport(FATAL, |
2055 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
2056 | errmsg("unsupported frontend protocol %u.%u: server supports %u.0 to %u.%u" , |
2057 | PG_PROTOCOL_MAJOR(proto), PG_PROTOCOL_MINOR(proto), |
2058 | PG_PROTOCOL_MAJOR(PG_PROTOCOL_EARLIEST), |
2059 | PG_PROTOCOL_MAJOR(PG_PROTOCOL_LATEST), |
2060 | PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST)))); |
2061 | |
2062 | /* |
2063 | * Now fetch parameters out of startup packet and save them into the Port |
2064 | * structure. All data structures attached to the Port struct must be |
2065 | * allocated in TopMemoryContext so that they will remain available in a |
2066 | * running backend (even after PostmasterContext is destroyed). We need |
2067 | * not worry about leaking this storage on failure, since we aren't in the |
2068 | * postmaster process anymore. |
2069 | */ |
2070 | oldcontext = MemoryContextSwitchTo(TopMemoryContext); |
2071 | |
2072 | if (PG_PROTOCOL_MAJOR(proto) >= 3) |
2073 | { |
2074 | int32 offset = sizeof(ProtocolVersion); |
2075 | List *unrecognized_protocol_options = NIL; |
2076 | |
2077 | /* |
2078 | * Scan packet body for name/option pairs. We can assume any string |
2079 | * beginning within the packet body is null-terminated, thanks to |
2080 | * zeroing extra byte above. |
2081 | */ |
2082 | port->guc_options = NIL; |
2083 | |
2084 | while (offset < len) |
2085 | { |
2086 | char *nameptr = ((char *) buf) + offset; |
2087 | int32 valoffset; |
2088 | char *valptr; |
2089 | |
2090 | if (*nameptr == '\0') |
2091 | break; /* found packet terminator */ |
2092 | valoffset = offset + strlen(nameptr) + 1; |
2093 | if (valoffset >= len) |
2094 | break; /* missing value, will complain below */ |
2095 | valptr = ((char *) buf) + valoffset; |
2096 | |
2097 | if (strcmp(nameptr, "database" ) == 0) |
2098 | port->database_name = pstrdup(valptr); |
2099 | else if (strcmp(nameptr, "user" ) == 0) |
2100 | port->user_name = pstrdup(valptr); |
2101 | else if (strcmp(nameptr, "options" ) == 0) |
2102 | port->cmdline_options = pstrdup(valptr); |
2103 | else if (strcmp(nameptr, "replication" ) == 0) |
2104 | { |
2105 | /* |
2106 | * Due to backward compatibility concerns the replication |
2107 | * parameter is a hybrid beast which allows the value to be |
2108 | * either boolean or the string 'database'. The latter |
2109 | * connects to a specific database which is e.g. required for |
2110 | * logical decoding while. |
2111 | */ |
2112 | if (strcmp(valptr, "database" ) == 0) |
2113 | { |
2114 | am_walsender = true; |
2115 | am_db_walsender = true; |
2116 | } |
2117 | else if (!parse_bool(valptr, &am_walsender)) |
2118 | ereport(FATAL, |
2119 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
2120 | errmsg("invalid value for parameter \"%s\": \"%s\"" , |
2121 | "replication" , |
2122 | valptr), |
2123 | errhint("Valid values are: \"false\", 0, \"true\", 1, \"database\"." ))); |
2124 | } |
2125 | else if (strncmp(nameptr, "_pq_." , 5) == 0) |
2126 | { |
2127 | /* |
2128 | * Any option beginning with _pq_. is reserved for use as a |
2129 | * protocol-level option, but at present no such options are |
2130 | * defined. |
2131 | */ |
2132 | unrecognized_protocol_options = |
2133 | lappend(unrecognized_protocol_options, pstrdup(nameptr)); |
2134 | } |
2135 | else |
2136 | { |
2137 | /* Assume it's a generic GUC option */ |
2138 | port->guc_options = lappend(port->guc_options, |
2139 | pstrdup(nameptr)); |
2140 | port->guc_options = lappend(port->guc_options, |
2141 | pstrdup(valptr)); |
2142 | |
2143 | /* |
2144 | * Copy application_name to port if we come across it. This |
2145 | * is done so we can log the application_name in the |
2146 | * connection authorization message. Note that the GUC would |
2147 | * be used but we haven't gone through GUC setup yet. |
2148 | */ |
2149 | if (strcmp(nameptr, "application_name" ) == 0) |
2150 | { |
2151 | char *tmp_app_name = pstrdup(valptr); |
2152 | |
2153 | pg_clean_ascii(tmp_app_name); |
2154 | |
2155 | port->application_name = tmp_app_name; |
2156 | } |
2157 | } |
2158 | offset = valoffset + strlen(valptr) + 1; |
2159 | } |
2160 | |
2161 | /* |
2162 | * If we didn't find a packet terminator exactly at the end of the |
2163 | * given packet length, complain. |
2164 | */ |
2165 | if (offset != len - 1) |
2166 | ereport(FATAL, |
2167 | (errcode(ERRCODE_PROTOCOL_VIOLATION), |
2168 | errmsg("invalid startup packet layout: expected terminator as last byte" ))); |
2169 | |
2170 | /* |
2171 | * If the client requested a newer protocol version or if the client |
2172 | * requested any protocol options we didn't recognize, let them know |
2173 | * the newest minor protocol version we do support and the names of |
2174 | * any unrecognized options. |
2175 | */ |
2176 | if (PG_PROTOCOL_MINOR(proto) > PG_PROTOCOL_MINOR(PG_PROTOCOL_LATEST) || |
2177 | unrecognized_protocol_options != NIL) |
2178 | SendNegotiateProtocolVersion(unrecognized_protocol_options); |
2179 | } |
2180 | else |
2181 | { |
2182 | /* |
2183 | * Get the parameters from the old-style, fixed-width-fields startup |
2184 | * packet as C strings. The packet destination was cleared first so a |
2185 | * short packet has zeros silently added. We have to be prepared to |
2186 | * truncate the pstrdup result for oversize fields, though. |
2187 | */ |
2188 | StartupPacket *packet = (StartupPacket *) buf; |
2189 | |
2190 | port->database_name = pstrdup(packet->database); |
2191 | if (strlen(port->database_name) > sizeof(packet->database)) |
2192 | port->database_name[sizeof(packet->database)] = '\0'; |
2193 | port->user_name = pstrdup(packet->user); |
2194 | if (strlen(port->user_name) > sizeof(packet->user)) |
2195 | port->user_name[sizeof(packet->user)] = '\0'; |
2196 | port->cmdline_options = pstrdup(packet->options); |
2197 | if (strlen(port->cmdline_options) > sizeof(packet->options)) |
2198 | port->cmdline_options[sizeof(packet->options)] = '\0'; |
2199 | port->guc_options = NIL; |
2200 | } |
2201 | |
2202 | /* Check a user name was given. */ |
2203 | if (port->user_name == NULL || port->user_name[0] == '\0') |
2204 | ereport(FATAL, |
2205 | (errcode(ERRCODE_INVALID_AUTHORIZATION_SPECIFICATION), |
2206 | errmsg("no PostgreSQL user name specified in startup packet" ))); |
2207 | |
2208 | /* The database defaults to the user name. */ |
2209 | if (port->database_name == NULL || port->database_name[0] == '\0') |
2210 | port->database_name = pstrdup(port->user_name); |
2211 | |
2212 | if (Db_user_namespace) |
2213 | { |
2214 | /* |
2215 | * If user@, it is a global user, remove '@'. We only want to do this |
2216 | * if there is an '@' at the end and no earlier in the user string or |
2217 | * they may fake as a local user of another database attaching to this |
2218 | * database. |
2219 | */ |
2220 | if (strchr(port->user_name, '@') == |
2221 | port->user_name + strlen(port->user_name) - 1) |
2222 | *strchr(port->user_name, '@') = '\0'; |
2223 | else |
2224 | { |
2225 | /* Append '@' and dbname */ |
2226 | port->user_name = psprintf("%s@%s" , port->user_name, port->database_name); |
2227 | } |
2228 | } |
2229 | |
2230 | /* |
2231 | * Truncate given database and user names to length of a Postgres name. |
2232 | * This avoids lookup failures when overlength names are given. |
2233 | */ |
2234 | if (strlen(port->database_name) >= NAMEDATALEN) |
2235 | port->database_name[NAMEDATALEN - 1] = '\0'; |
2236 | if (strlen(port->user_name) >= NAMEDATALEN) |
2237 | port->user_name[NAMEDATALEN - 1] = '\0'; |
2238 | |
2239 | /* |
2240 | * Normal walsender backends, e.g. for streaming replication, are not |
2241 | * connected to a particular database. But walsenders used for logical |
2242 | * replication need to connect to a specific database. We allow streaming |
2243 | * replication commands to be issued even if connected to a database as it |
2244 | * can make sense to first make a basebackup and then stream changes |
2245 | * starting from that. |
2246 | */ |
2247 | if (am_walsender && !am_db_walsender) |
2248 | port->database_name[0] = '\0'; |
2249 | |
2250 | /* |
2251 | * Done putting stuff in TopMemoryContext. |
2252 | */ |
2253 | MemoryContextSwitchTo(oldcontext); |
2254 | |
2255 | /* |
2256 | * If we're going to reject the connection due to database state, say so |
2257 | * now instead of wasting cycles on an authentication exchange. (This also |
2258 | * allows a pg_ping utility to be written.) |
2259 | */ |
2260 | switch (port->canAcceptConnections) |
2261 | { |
2262 | case CAC_STARTUP: |
2263 | ereport(FATAL, |
2264 | (errcode(ERRCODE_CANNOT_CONNECT_NOW), |
2265 | errmsg("the database system is starting up" ))); |
2266 | break; |
2267 | case CAC_SHUTDOWN: |
2268 | ereport(FATAL, |
2269 | (errcode(ERRCODE_CANNOT_CONNECT_NOW), |
2270 | errmsg("the database system is shutting down" ))); |
2271 | break; |
2272 | case CAC_RECOVERY: |
2273 | ereport(FATAL, |
2274 | (errcode(ERRCODE_CANNOT_CONNECT_NOW), |
2275 | errmsg("the database system is in recovery mode" ))); |
2276 | break; |
2277 | case CAC_TOOMANY: |
2278 | ereport(FATAL, |
2279 | (errcode(ERRCODE_TOO_MANY_CONNECTIONS), |
2280 | errmsg("sorry, too many clients already" ))); |
2281 | break; |
2282 | case CAC_WAITBACKUP: |
2283 | /* OK for now, will check in InitPostgres */ |
2284 | break; |
2285 | case CAC_OK: |
2286 | break; |
2287 | } |
2288 | |
2289 | return STATUS_OK; |
2290 | } |
2291 | |
2292 | /* |
2293 | * Send a NegotiateProtocolVersion to the client. This lets the client know |
2294 | * that they have requested a newer minor protocol version than we are able |
2295 | * to speak. We'll speak the highest version we know about; the client can, |
2296 | * of course, abandon the connection if that's a problem. |
2297 | * |
2298 | * We also include in the response a list of protocol options we didn't |
2299 | * understand. This allows clients to include optional parameters that might |
2300 | * be present either in newer protocol versions or third-party protocol |
2301 | * extensions without fear of having to reconnect if those options are not |
2302 | * understood, while at the same time making certain that the client is aware |
2303 | * of which options were actually accepted. |
2304 | */ |
2305 | static void |
2306 | SendNegotiateProtocolVersion(List *unrecognized_protocol_options) |
2307 | { |
2308 | StringInfoData buf; |
2309 | ListCell *lc; |
2310 | |
2311 | pq_beginmessage(&buf, 'v'); /* NegotiateProtocolVersion */ |
2312 | pq_sendint32(&buf, PG_PROTOCOL_LATEST); |
2313 | pq_sendint32(&buf, list_length(unrecognized_protocol_options)); |
2314 | foreach(lc, unrecognized_protocol_options) |
2315 | pq_sendstring(&buf, lfirst(lc)); |
2316 | pq_endmessage(&buf); |
2317 | |
2318 | /* no need to flush, some other message will follow */ |
2319 | } |
2320 | |
2321 | /* |
2322 | * The client has sent a cancel request packet, not a normal |
2323 | * start-a-new-connection packet. Perform the necessary processing. |
2324 | * Nothing is sent back to the client. |
2325 | */ |
2326 | static void |
2327 | processCancelRequest(Port *port, void *pkt) |
2328 | { |
2329 | CancelRequestPacket *canc = (CancelRequestPacket *) pkt; |
2330 | int backendPID; |
2331 | int32 cancelAuthCode; |
2332 | Backend *bp; |
2333 | |
2334 | #ifndef EXEC_BACKEND |
2335 | dlist_iter iter; |
2336 | #else |
2337 | int i; |
2338 | #endif |
2339 | |
2340 | backendPID = (int) pg_ntoh32(canc->backendPID); |
2341 | cancelAuthCode = (int32) pg_ntoh32(canc->cancelAuthCode); |
2342 | |
2343 | /* |
2344 | * See if we have a matching backend. In the EXEC_BACKEND case, we can no |
2345 | * longer access the postmaster's own backend list, and must rely on the |
2346 | * duplicate array in shared memory. |
2347 | */ |
2348 | #ifndef EXEC_BACKEND |
2349 | dlist_foreach(iter, &BackendList) |
2350 | { |
2351 | bp = dlist_container(Backend, elem, iter.cur); |
2352 | #else |
2353 | for (i = MaxLivePostmasterChildren() - 1; i >= 0; i--) |
2354 | { |
2355 | bp = (Backend *) &ShmemBackendArray[i]; |
2356 | #endif |
2357 | if (bp->pid == backendPID) |
2358 | { |
2359 | if (bp->cancel_key == cancelAuthCode) |
2360 | { |
2361 | /* Found a match; signal that backend to cancel current op */ |
2362 | ereport(DEBUG2, |
2363 | (errmsg_internal("processing cancel request: sending SIGINT to process %d" , |
2364 | backendPID))); |
2365 | signal_child(bp->pid, SIGINT); |
2366 | } |
2367 | else |
2368 | /* Right PID, wrong key: no way, Jose */ |
2369 | ereport(LOG, |
2370 | (errmsg("wrong key in cancel request for process %d" , |
2371 | backendPID))); |
2372 | return; |
2373 | } |
2374 | #ifndef EXEC_BACKEND /* make GNU Emacs 26.1 see brace balance */ |
2375 | } |
2376 | #else |
2377 | } |
2378 | #endif |
2379 | |
2380 | /* No matching backend */ |
2381 | ereport(LOG, |
2382 | (errmsg("PID %d in cancel request did not match any process" , |
2383 | backendPID))); |
2384 | } |
2385 | |
2386 | /* |
2387 | * canAcceptConnections --- check to see if database state allows connections. |
2388 | */ |
2389 | static CAC_state |
2390 | canAcceptConnections(void) |
2391 | { |
2392 | CAC_state result = CAC_OK; |
2393 | |
2394 | /* |
2395 | * Can't start backends when in startup/shutdown/inconsistent recovery |
2396 | * state. |
2397 | * |
2398 | * In state PM_WAIT_BACKUP only superusers can connect (this must be |
2399 | * allowed so that a superuser can end online backup mode); we return |
2400 | * CAC_WAITBACKUP code to indicate that this must be checked later. Note |
2401 | * that neither CAC_OK nor CAC_WAITBACKUP can safely be returned until we |
2402 | * have checked for too many children. |
2403 | */ |
2404 | if (pmState != PM_RUN) |
2405 | { |
2406 | if (pmState == PM_WAIT_BACKUP) |
2407 | result = CAC_WAITBACKUP; /* allow superusers only */ |
2408 | else if (Shutdown > NoShutdown) |
2409 | return CAC_SHUTDOWN; /* shutdown is pending */ |
2410 | else if (!FatalError && |
2411 | (pmState == PM_STARTUP || |
2412 | pmState == PM_RECOVERY)) |
2413 | return CAC_STARTUP; /* normal startup */ |
2414 | else if (!FatalError && |
2415 | pmState == PM_HOT_STANDBY) |
2416 | result = CAC_OK; /* connection OK during hot standby */ |
2417 | else |
2418 | return CAC_RECOVERY; /* else must be crash recovery */ |
2419 | } |
2420 | |
2421 | /* |
2422 | * Don't start too many children. |
2423 | * |
2424 | * We allow more connections than we can have backends here because some |
2425 | * might still be authenticating; they might fail auth, or some existing |
2426 | * backend might exit before the auth cycle is completed. The exact |
2427 | * MaxBackends limit is enforced when a new backend tries to join the |
2428 | * shared-inval backend array. |
2429 | * |
2430 | * The limit here must match the sizes of the per-child-process arrays; |
2431 | * see comments for MaxLivePostmasterChildren(). |
2432 | */ |
2433 | if (CountChildren(BACKEND_TYPE_ALL) >= MaxLivePostmasterChildren()) |
2434 | result = CAC_TOOMANY; |
2435 | |
2436 | return result; |
2437 | } |
2438 | |
2439 | |
2440 | /* |
2441 | * ConnCreate -- create a local connection data structure |
2442 | * |
2443 | * Returns NULL on failure, other than out-of-memory which is fatal. |
2444 | */ |
2445 | static Port * |
2446 | ConnCreate(int serverFd) |
2447 | { |
2448 | Port *port; |
2449 | |
2450 | if (!(port = (Port *) calloc(1, sizeof(Port)))) |
2451 | { |
2452 | ereport(LOG, |
2453 | (errcode(ERRCODE_OUT_OF_MEMORY), |
2454 | errmsg("out of memory" ))); |
2455 | ExitPostmaster(1); |
2456 | } |
2457 | |
2458 | if (StreamConnection(serverFd, port) != STATUS_OK) |
2459 | { |
2460 | if (port->sock != PGINVALID_SOCKET) |
2461 | StreamClose(port->sock); |
2462 | ConnFree(port); |
2463 | return NULL; |
2464 | } |
2465 | |
2466 | /* |
2467 | * Allocate GSSAPI specific state struct |
2468 | */ |
2469 | #ifndef EXEC_BACKEND |
2470 | #if defined(ENABLE_GSS) || defined(ENABLE_SSPI) |
2471 | port->gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo)); |
2472 | if (!port->gss) |
2473 | { |
2474 | ereport(LOG, |
2475 | (errcode(ERRCODE_OUT_OF_MEMORY), |
2476 | errmsg("out of memory" ))); |
2477 | ExitPostmaster(1); |
2478 | } |
2479 | #endif |
2480 | #endif |
2481 | |
2482 | return port; |
2483 | } |
2484 | |
2485 | |
2486 | /* |
2487 | * ConnFree -- free a local connection data structure |
2488 | */ |
2489 | static void |
2490 | ConnFree(Port *conn) |
2491 | { |
2492 | #ifdef USE_SSL |
2493 | secure_close(conn); |
2494 | #endif |
2495 | if (conn->gss) |
2496 | free(conn->gss); |
2497 | free(conn); |
2498 | } |
2499 | |
2500 | |
2501 | /* |
2502 | * ClosePostmasterPorts -- close all the postmaster's open sockets |
2503 | * |
2504 | * This is called during child process startup to release file descriptors |
2505 | * that are not needed by that child process. The postmaster still has |
2506 | * them open, of course. |
2507 | * |
2508 | * Note: we pass am_syslogger as a boolean because we don't want to set |
2509 | * the global variable yet when this is called. |
2510 | */ |
2511 | void |
2512 | ClosePostmasterPorts(bool am_syslogger) |
2513 | { |
2514 | int i; |
2515 | |
2516 | #ifndef WIN32 |
2517 | |
2518 | /* |
2519 | * Close the write end of postmaster death watch pipe. It's important to |
2520 | * do this as early as possible, so that if postmaster dies, others won't |
2521 | * think that it's still running because we're holding the pipe open. |
2522 | */ |
2523 | if (close(postmaster_alive_fds[POSTMASTER_FD_OWN])) |
2524 | ereport(FATAL, |
2525 | (errcode_for_file_access(), |
2526 | errmsg_internal("could not close postmaster death monitoring pipe in child process: %m" ))); |
2527 | postmaster_alive_fds[POSTMASTER_FD_OWN] = -1; |
2528 | #endif |
2529 | |
2530 | /* Close the listen sockets */ |
2531 | for (i = 0; i < MAXLISTEN; i++) |
2532 | { |
2533 | if (ListenSocket[i] != PGINVALID_SOCKET) |
2534 | { |
2535 | StreamClose(ListenSocket[i]); |
2536 | ListenSocket[i] = PGINVALID_SOCKET; |
2537 | } |
2538 | } |
2539 | |
2540 | /* If using syslogger, close the read side of the pipe */ |
2541 | if (!am_syslogger) |
2542 | { |
2543 | #ifndef WIN32 |
2544 | if (syslogPipe[0] >= 0) |
2545 | close(syslogPipe[0]); |
2546 | syslogPipe[0] = -1; |
2547 | #else |
2548 | if (syslogPipe[0]) |
2549 | CloseHandle(syslogPipe[0]); |
2550 | syslogPipe[0] = 0; |
2551 | #endif |
2552 | } |
2553 | |
2554 | #ifdef USE_BONJOUR |
2555 | /* If using Bonjour, close the connection to the mDNS daemon */ |
2556 | if (bonjour_sdref) |
2557 | close(DNSServiceRefSockFD(bonjour_sdref)); |
2558 | #endif |
2559 | } |
2560 | |
2561 | |
2562 | /* |
2563 | * InitProcessGlobals -- set MyProcPid, MyStartTime[stamp], random seeds |
2564 | * |
2565 | * Called early in the postmaster and every backend. |
2566 | */ |
2567 | void |
2568 | InitProcessGlobals(void) |
2569 | { |
2570 | unsigned int rseed; |
2571 | |
2572 | MyProcPid = getpid(); |
2573 | MyStartTimestamp = GetCurrentTimestamp(); |
2574 | MyStartTime = timestamptz_to_time_t(MyStartTimestamp); |
2575 | |
2576 | /* |
2577 | * Set a different seed for random() in every process. We want something |
2578 | * unpredictable, so if possible, use high-quality random bits for the |
2579 | * seed. Otherwise, fall back to a seed based on timestamp and PID. |
2580 | */ |
2581 | if (!pg_strong_random(&rseed, sizeof(rseed))) |
2582 | { |
2583 | /* |
2584 | * Since PIDs and timestamps tend to change more frequently in their |
2585 | * least significant bits, shift the timestamp left to allow a larger |
2586 | * total number of seeds in a given time period. Since that would |
2587 | * leave only 20 bits of the timestamp that cycle every ~1 second, |
2588 | * also mix in some higher bits. |
2589 | */ |
2590 | rseed = ((uint64) MyProcPid) ^ |
2591 | ((uint64) MyStartTimestamp << 12) ^ |
2592 | ((uint64) MyStartTimestamp >> 20); |
2593 | } |
2594 | srandom(rseed); |
2595 | } |
2596 | |
2597 | |
2598 | /* |
2599 | * reset_shared -- reset shared memory and semaphores |
2600 | */ |
2601 | static void |
2602 | reset_shared(int port) |
2603 | { |
2604 | /* |
2605 | * Create or re-create shared memory and semaphores. |
2606 | * |
2607 | * Note: in each "cycle of life" we will normally assign the same IPC keys |
2608 | * (if using SysV shmem and/or semas), since the port number is used to |
2609 | * determine IPC keys. This helps ensure that we will clean up dead IPC |
2610 | * objects if the postmaster crashes and is restarted. |
2611 | */ |
2612 | CreateSharedMemoryAndSemaphores(port); |
2613 | } |
2614 | |
2615 | |
2616 | /* |
2617 | * SIGHUP -- reread config files, and tell children to do same |
2618 | */ |
2619 | static void |
2620 | SIGHUP_handler(SIGNAL_ARGS) |
2621 | { |
2622 | int save_errno = errno; |
2623 | |
2624 | PG_SETMASK(&BlockSig); |
2625 | |
2626 | if (Shutdown <= SmartShutdown) |
2627 | { |
2628 | ereport(LOG, |
2629 | (errmsg("received SIGHUP, reloading configuration files" ))); |
2630 | ProcessConfigFile(PGC_SIGHUP); |
2631 | SignalChildren(SIGHUP); |
2632 | if (StartupPID != 0) |
2633 | signal_child(StartupPID, SIGHUP); |
2634 | if (BgWriterPID != 0) |
2635 | signal_child(BgWriterPID, SIGHUP); |
2636 | if (CheckpointerPID != 0) |
2637 | signal_child(CheckpointerPID, SIGHUP); |
2638 | if (WalWriterPID != 0) |
2639 | signal_child(WalWriterPID, SIGHUP); |
2640 | if (WalReceiverPID != 0) |
2641 | signal_child(WalReceiverPID, SIGHUP); |
2642 | if (AutoVacPID != 0) |
2643 | signal_child(AutoVacPID, SIGHUP); |
2644 | if (PgArchPID != 0) |
2645 | signal_child(PgArchPID, SIGHUP); |
2646 | if (SysLoggerPID != 0) |
2647 | signal_child(SysLoggerPID, SIGHUP); |
2648 | if (PgStatPID != 0) |
2649 | signal_child(PgStatPID, SIGHUP); |
2650 | |
2651 | /* Reload authentication config files too */ |
2652 | if (!load_hba()) |
2653 | ereport(LOG, |
2654 | /* translator: %s is a configuration file */ |
2655 | (errmsg("%s was not reloaded" , "pg_hba.conf" ))); |
2656 | |
2657 | if (!load_ident()) |
2658 | ereport(LOG, |
2659 | (errmsg("%s was not reloaded" , "pg_ident.conf" ))); |
2660 | |
2661 | #ifdef USE_SSL |
2662 | /* Reload SSL configuration as well */ |
2663 | if (EnableSSL) |
2664 | { |
2665 | if (secure_initialize(false) == 0) |
2666 | LoadedSSL = true; |
2667 | else |
2668 | ereport(LOG, |
2669 | (errmsg("SSL configuration was not reloaded" ))); |
2670 | } |
2671 | else |
2672 | { |
2673 | secure_destroy(); |
2674 | LoadedSSL = false; |
2675 | } |
2676 | #endif |
2677 | |
2678 | #ifdef EXEC_BACKEND |
2679 | /* Update the starting-point file for future children */ |
2680 | write_nondefault_variables(PGC_SIGHUP); |
2681 | #endif |
2682 | } |
2683 | |
2684 | PG_SETMASK(&UnBlockSig); |
2685 | |
2686 | errno = save_errno; |
2687 | } |
2688 | |
2689 | |
2690 | /* |
2691 | * pmdie -- signal handler for processing various postmaster signals. |
2692 | */ |
2693 | static void |
2694 | pmdie(SIGNAL_ARGS) |
2695 | { |
2696 | int save_errno = errno; |
2697 | |
2698 | PG_SETMASK(&BlockSig); |
2699 | |
2700 | ereport(DEBUG2, |
2701 | (errmsg_internal("postmaster received signal %d" , |
2702 | postgres_signal_arg))); |
2703 | |
2704 | switch (postgres_signal_arg) |
2705 | { |
2706 | case SIGTERM: |
2707 | |
2708 | /* |
2709 | * Smart Shutdown: |
2710 | * |
2711 | * Wait for children to end their work, then shut down. |
2712 | */ |
2713 | if (Shutdown >= SmartShutdown) |
2714 | break; |
2715 | Shutdown = SmartShutdown; |
2716 | ereport(LOG, |
2717 | (errmsg("received smart shutdown request" ))); |
2718 | |
2719 | /* Report status */ |
2720 | AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING); |
2721 | #ifdef USE_SYSTEMD |
2722 | sd_notify(0, "STOPPING=1" ); |
2723 | #endif |
2724 | |
2725 | if (pmState == PM_RUN || pmState == PM_RECOVERY || |
2726 | pmState == PM_HOT_STANDBY || pmState == PM_STARTUP) |
2727 | { |
2728 | /* autovac workers are told to shut down immediately */ |
2729 | /* and bgworkers too; does this need tweaking? */ |
2730 | SignalSomeChildren(SIGTERM, |
2731 | BACKEND_TYPE_AUTOVAC | BACKEND_TYPE_BGWORKER); |
2732 | /* and the autovac launcher too */ |
2733 | if (AutoVacPID != 0) |
2734 | signal_child(AutoVacPID, SIGTERM); |
2735 | /* and the bgwriter too */ |
2736 | if (BgWriterPID != 0) |
2737 | signal_child(BgWriterPID, SIGTERM); |
2738 | /* and the walwriter too */ |
2739 | if (WalWriterPID != 0) |
2740 | signal_child(WalWriterPID, SIGTERM); |
2741 | |
2742 | /* |
2743 | * If we're in recovery, we can't kill the startup process |
2744 | * right away, because at present doing so does not release |
2745 | * its locks. We might want to change this in a future |
2746 | * release. For the time being, the PM_WAIT_READONLY state |
2747 | * indicates that we're waiting for the regular (read only) |
2748 | * backends to die off; once they do, we'll kill the startup |
2749 | * and walreceiver processes. |
2750 | */ |
2751 | pmState = (pmState == PM_RUN) ? |
2752 | PM_WAIT_BACKUP : PM_WAIT_READONLY; |
2753 | } |
2754 | |
2755 | /* |
2756 | * Now wait for online backup mode to end and backends to exit. If |
2757 | * that is already the case, PostmasterStateMachine will take the |
2758 | * next step. |
2759 | */ |
2760 | PostmasterStateMachine(); |
2761 | break; |
2762 | |
2763 | case SIGINT: |
2764 | |
2765 | /* |
2766 | * Fast Shutdown: |
2767 | * |
2768 | * Abort all children with SIGTERM (rollback active transactions |
2769 | * and exit) and shut down when they are gone. |
2770 | */ |
2771 | if (Shutdown >= FastShutdown) |
2772 | break; |
2773 | Shutdown = FastShutdown; |
2774 | ereport(LOG, |
2775 | (errmsg("received fast shutdown request" ))); |
2776 | |
2777 | /* Report status */ |
2778 | AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING); |
2779 | #ifdef USE_SYSTEMD |
2780 | sd_notify(0, "STOPPING=1" ); |
2781 | #endif |
2782 | |
2783 | if (StartupPID != 0) |
2784 | signal_child(StartupPID, SIGTERM); |
2785 | if (BgWriterPID != 0) |
2786 | signal_child(BgWriterPID, SIGTERM); |
2787 | if (WalReceiverPID != 0) |
2788 | signal_child(WalReceiverPID, SIGTERM); |
2789 | if (pmState == PM_STARTUP || pmState == PM_RECOVERY) |
2790 | { |
2791 | SignalSomeChildren(SIGTERM, BACKEND_TYPE_BGWORKER); |
2792 | |
2793 | /* |
2794 | * Only startup, bgwriter, walreceiver, possibly bgworkers, |
2795 | * and/or checkpointer should be active in this state; we just |
2796 | * signaled the first four, and we don't want to kill |
2797 | * checkpointer yet. |
2798 | */ |
2799 | pmState = PM_WAIT_BACKENDS; |
2800 | } |
2801 | else if (pmState == PM_RUN || |
2802 | pmState == PM_WAIT_BACKUP || |
2803 | pmState == PM_WAIT_READONLY || |
2804 | pmState == PM_WAIT_BACKENDS || |
2805 | pmState == PM_HOT_STANDBY) |
2806 | { |
2807 | ereport(LOG, |
2808 | (errmsg("aborting any active transactions" ))); |
2809 | /* shut down all backends and workers */ |
2810 | SignalSomeChildren(SIGTERM, |
2811 | BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC | |
2812 | BACKEND_TYPE_BGWORKER); |
2813 | /* and the autovac launcher too */ |
2814 | if (AutoVacPID != 0) |
2815 | signal_child(AutoVacPID, SIGTERM); |
2816 | /* and the walwriter too */ |
2817 | if (WalWriterPID != 0) |
2818 | signal_child(WalWriterPID, SIGTERM); |
2819 | pmState = PM_WAIT_BACKENDS; |
2820 | } |
2821 | |
2822 | /* |
2823 | * Now wait for backends to exit. If there are none, |
2824 | * PostmasterStateMachine will take the next step. |
2825 | */ |
2826 | PostmasterStateMachine(); |
2827 | break; |
2828 | |
2829 | case SIGQUIT: |
2830 | |
2831 | /* |
2832 | * Immediate Shutdown: |
2833 | * |
2834 | * abort all children with SIGQUIT, wait for them to exit, |
2835 | * terminate remaining ones with SIGKILL, then exit without |
2836 | * attempt to properly shut down the data base system. |
2837 | */ |
2838 | if (Shutdown >= ImmediateShutdown) |
2839 | break; |
2840 | Shutdown = ImmediateShutdown; |
2841 | ereport(LOG, |
2842 | (errmsg("received immediate shutdown request" ))); |
2843 | |
2844 | /* Report status */ |
2845 | AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STOPPING); |
2846 | #ifdef USE_SYSTEMD |
2847 | sd_notify(0, "STOPPING=1" ); |
2848 | #endif |
2849 | |
2850 | TerminateChildren(SIGQUIT); |
2851 | pmState = PM_WAIT_BACKENDS; |
2852 | |
2853 | /* set stopwatch for them to die */ |
2854 | AbortStartTime = time(NULL); |
2855 | |
2856 | /* |
2857 | * Now wait for backends to exit. If there are none, |
2858 | * PostmasterStateMachine will take the next step. |
2859 | */ |
2860 | PostmasterStateMachine(); |
2861 | break; |
2862 | } |
2863 | |
2864 | PG_SETMASK(&UnBlockSig); |
2865 | |
2866 | errno = save_errno; |
2867 | } |
2868 | |
2869 | /* |
2870 | * Reaper -- signal handler to cleanup after a child process dies. |
2871 | */ |
2872 | static void |
2873 | reaper(SIGNAL_ARGS) |
2874 | { |
2875 | int save_errno = errno; |
2876 | int pid; /* process id of dead child process */ |
2877 | int exitstatus; /* its exit status */ |
2878 | |
2879 | PG_SETMASK(&BlockSig); |
2880 | |
2881 | ereport(DEBUG4, |
2882 | (errmsg_internal("reaping dead processes" ))); |
2883 | |
2884 | while ((pid = waitpid(-1, &exitstatus, WNOHANG)) > 0) |
2885 | { |
2886 | /* |
2887 | * Check if this child was a startup process. |
2888 | */ |
2889 | if (pid == StartupPID) |
2890 | { |
2891 | StartupPID = 0; |
2892 | |
2893 | /* |
2894 | * Startup process exited in response to a shutdown request (or it |
2895 | * completed normally regardless of the shutdown request). |
2896 | */ |
2897 | if (Shutdown > NoShutdown && |
2898 | (EXIT_STATUS_0(exitstatus) || EXIT_STATUS_1(exitstatus))) |
2899 | { |
2900 | StartupStatus = STARTUP_NOT_RUNNING; |
2901 | pmState = PM_WAIT_BACKENDS; |
2902 | /* PostmasterStateMachine logic does the rest */ |
2903 | continue; |
2904 | } |
2905 | |
2906 | if (EXIT_STATUS_3(exitstatus)) |
2907 | { |
2908 | ereport(LOG, |
2909 | (errmsg("shutdown at recovery target" ))); |
2910 | StartupStatus = STARTUP_NOT_RUNNING; |
2911 | Shutdown = SmartShutdown; |
2912 | TerminateChildren(SIGTERM); |
2913 | pmState = PM_WAIT_BACKENDS; |
2914 | /* PostmasterStateMachine logic does the rest */ |
2915 | continue; |
2916 | } |
2917 | |
2918 | /* |
2919 | * Unexpected exit of startup process (including FATAL exit) |
2920 | * during PM_STARTUP is treated as catastrophic. There are no |
2921 | * other processes running yet, so we can just exit. |
2922 | */ |
2923 | if (pmState == PM_STARTUP && !EXIT_STATUS_0(exitstatus)) |
2924 | { |
2925 | LogChildExit(LOG, _("startup process" ), |
2926 | pid, exitstatus); |
2927 | ereport(LOG, |
2928 | (errmsg("aborting startup due to startup process failure" ))); |
2929 | ExitPostmaster(1); |
2930 | } |
2931 | |
2932 | /* |
2933 | * After PM_STARTUP, any unexpected exit (including FATAL exit) of |
2934 | * the startup process is catastrophic, so kill other children, |
2935 | * and set StartupStatus so we don't try to reinitialize after |
2936 | * they're gone. Exception: if StartupStatus is STARTUP_SIGNALED, |
2937 | * then we previously sent the startup process a SIGQUIT; so |
2938 | * that's probably the reason it died, and we do want to try to |
2939 | * restart in that case. |
2940 | */ |
2941 | if (!EXIT_STATUS_0(exitstatus)) |
2942 | { |
2943 | if (StartupStatus == STARTUP_SIGNALED) |
2944 | StartupStatus = STARTUP_NOT_RUNNING; |
2945 | else |
2946 | StartupStatus = STARTUP_CRASHED; |
2947 | HandleChildCrash(pid, exitstatus, |
2948 | _("startup process" )); |
2949 | continue; |
2950 | } |
2951 | |
2952 | /* |
2953 | * Startup succeeded, commence normal operations |
2954 | */ |
2955 | StartupStatus = STARTUP_NOT_RUNNING; |
2956 | FatalError = false; |
2957 | Assert(AbortStartTime == 0); |
2958 | ReachedNormalRunning = true; |
2959 | pmState = PM_RUN; |
2960 | |
2961 | /* |
2962 | * Crank up the background tasks, if we didn't do that already |
2963 | * when we entered consistent recovery state. It doesn't matter |
2964 | * if this fails, we'll just try again later. |
2965 | */ |
2966 | if (CheckpointerPID == 0) |
2967 | CheckpointerPID = StartCheckpointer(); |
2968 | if (BgWriterPID == 0) |
2969 | BgWriterPID = StartBackgroundWriter(); |
2970 | if (WalWriterPID == 0) |
2971 | WalWriterPID = StartWalWriter(); |
2972 | |
2973 | /* |
2974 | * Likewise, start other special children as needed. In a restart |
2975 | * situation, some of them may be alive already. |
2976 | */ |
2977 | if (!IsBinaryUpgrade && AutoVacuumingActive() && AutoVacPID == 0) |
2978 | AutoVacPID = StartAutoVacLauncher(); |
2979 | if (PgArchStartupAllowed() && PgArchPID == 0) |
2980 | PgArchPID = pgarch_start(); |
2981 | if (PgStatPID == 0) |
2982 | PgStatPID = pgstat_start(); |
2983 | |
2984 | /* workers may be scheduled to start now */ |
2985 | maybe_start_bgworkers(); |
2986 | |
2987 | /* at this point we are really open for business */ |
2988 | ereport(LOG, |
2989 | (errmsg("database system is ready to accept connections" ))); |
2990 | |
2991 | /* Report status */ |
2992 | AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY); |
2993 | #ifdef USE_SYSTEMD |
2994 | sd_notify(0, "READY=1" ); |
2995 | #endif |
2996 | |
2997 | continue; |
2998 | } |
2999 | |
3000 | /* |
3001 | * Was it the bgwriter? Normal exit can be ignored; we'll start a new |
3002 | * one at the next iteration of the postmaster's main loop, if |
3003 | * necessary. Any other exit condition is treated as a crash. |
3004 | */ |
3005 | if (pid == BgWriterPID) |
3006 | { |
3007 | BgWriterPID = 0; |
3008 | if (!EXIT_STATUS_0(exitstatus)) |
3009 | HandleChildCrash(pid, exitstatus, |
3010 | _("background writer process" )); |
3011 | continue; |
3012 | } |
3013 | |
3014 | /* |
3015 | * Was it the checkpointer? |
3016 | */ |
3017 | if (pid == CheckpointerPID) |
3018 | { |
3019 | CheckpointerPID = 0; |
3020 | if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN) |
3021 | { |
3022 | /* |
3023 | * OK, we saw normal exit of the checkpointer after it's been |
3024 | * told to shut down. We expect that it wrote a shutdown |
3025 | * checkpoint. (If for some reason it didn't, recovery will |
3026 | * occur on next postmaster start.) |
3027 | * |
3028 | * At this point we should have no normal backend children |
3029 | * left (else we'd not be in PM_SHUTDOWN state) but we might |
3030 | * have dead_end children to wait for. |
3031 | * |
3032 | * If we have an archiver subprocess, tell it to do a last |
3033 | * archive cycle and quit. Likewise, if we have walsender |
3034 | * processes, tell them to send any remaining WAL and quit. |
3035 | */ |
3036 | Assert(Shutdown > NoShutdown); |
3037 | |
3038 | /* Waken archiver for the last time */ |
3039 | if (PgArchPID != 0) |
3040 | signal_child(PgArchPID, SIGUSR2); |
3041 | |
3042 | /* |
3043 | * Waken walsenders for the last time. No regular backends |
3044 | * should be around anymore. |
3045 | */ |
3046 | SignalChildren(SIGUSR2); |
3047 | |
3048 | pmState = PM_SHUTDOWN_2; |
3049 | |
3050 | /* |
3051 | * We can also shut down the stats collector now; there's |
3052 | * nothing left for it to do. |
3053 | */ |
3054 | if (PgStatPID != 0) |
3055 | signal_child(PgStatPID, SIGQUIT); |
3056 | } |
3057 | else |
3058 | { |
3059 | /* |
3060 | * Any unexpected exit of the checkpointer (including FATAL |
3061 | * exit) is treated as a crash. |
3062 | */ |
3063 | HandleChildCrash(pid, exitstatus, |
3064 | _("checkpointer process" )); |
3065 | } |
3066 | |
3067 | continue; |
3068 | } |
3069 | |
3070 | /* |
3071 | * Was it the wal writer? Normal exit can be ignored; we'll start a |
3072 | * new one at the next iteration of the postmaster's main loop, if |
3073 | * necessary. Any other exit condition is treated as a crash. |
3074 | */ |
3075 | if (pid == WalWriterPID) |
3076 | { |
3077 | WalWriterPID = 0; |
3078 | if (!EXIT_STATUS_0(exitstatus)) |
3079 | HandleChildCrash(pid, exitstatus, |
3080 | _("WAL writer process" )); |
3081 | continue; |
3082 | } |
3083 | |
3084 | /* |
3085 | * Was it the wal receiver? If exit status is zero (normal) or one |
3086 | * (FATAL exit), we assume everything is all right just like normal |
3087 | * backends. (If we need a new wal receiver, we'll start one at the |
3088 | * next iteration of the postmaster's main loop.) |
3089 | */ |
3090 | if (pid == WalReceiverPID) |
3091 | { |
3092 | WalReceiverPID = 0; |
3093 | if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) |
3094 | HandleChildCrash(pid, exitstatus, |
3095 | _("WAL receiver process" )); |
3096 | continue; |
3097 | } |
3098 | |
3099 | /* |
3100 | * Was it the autovacuum launcher? Normal exit can be ignored; we'll |
3101 | * start a new one at the next iteration of the postmaster's main |
3102 | * loop, if necessary. Any other exit condition is treated as a |
3103 | * crash. |
3104 | */ |
3105 | if (pid == AutoVacPID) |
3106 | { |
3107 | AutoVacPID = 0; |
3108 | if (!EXIT_STATUS_0(exitstatus)) |
3109 | HandleChildCrash(pid, exitstatus, |
3110 | _("autovacuum launcher process" )); |
3111 | continue; |
3112 | } |
3113 | |
3114 | /* |
3115 | * Was it the archiver? If so, just try to start a new one; no need |
3116 | * to force reset of the rest of the system. (If fail, we'll try |
3117 | * again in future cycles of the main loop.). Unless we were waiting |
3118 | * for it to shut down; don't restart it in that case, and |
3119 | * PostmasterStateMachine() will advance to the next shutdown step. |
3120 | */ |
3121 | if (pid == PgArchPID) |
3122 | { |
3123 | PgArchPID = 0; |
3124 | if (!EXIT_STATUS_0(exitstatus)) |
3125 | LogChildExit(LOG, _("archiver process" ), |
3126 | pid, exitstatus); |
3127 | if (PgArchStartupAllowed()) |
3128 | PgArchPID = pgarch_start(); |
3129 | continue; |
3130 | } |
3131 | |
3132 | /* |
3133 | * Was it the statistics collector? If so, just try to start a new |
3134 | * one; no need to force reset of the rest of the system. (If fail, |
3135 | * we'll try again in future cycles of the main loop.) |
3136 | */ |
3137 | if (pid == PgStatPID) |
3138 | { |
3139 | PgStatPID = 0; |
3140 | if (!EXIT_STATUS_0(exitstatus)) |
3141 | LogChildExit(LOG, _("statistics collector process" ), |
3142 | pid, exitstatus); |
3143 | if (pmState == PM_RUN || pmState == PM_HOT_STANDBY) |
3144 | PgStatPID = pgstat_start(); |
3145 | continue; |
3146 | } |
3147 | |
3148 | /* Was it the system logger? If so, try to start a new one */ |
3149 | if (pid == SysLoggerPID) |
3150 | { |
3151 | SysLoggerPID = 0; |
3152 | /* for safety's sake, launch new logger *first* */ |
3153 | SysLoggerPID = SysLogger_Start(); |
3154 | if (!EXIT_STATUS_0(exitstatus)) |
3155 | LogChildExit(LOG, _("system logger process" ), |
3156 | pid, exitstatus); |
3157 | continue; |
3158 | } |
3159 | |
3160 | /* Was it one of our background workers? */ |
3161 | if (CleanupBackgroundWorker(pid, exitstatus)) |
3162 | { |
3163 | /* have it be restarted */ |
3164 | HaveCrashedWorker = true; |
3165 | continue; |
3166 | } |
3167 | |
3168 | /* |
3169 | * Else do standard backend child cleanup. |
3170 | */ |
3171 | CleanupBackend(pid, exitstatus); |
3172 | } /* loop over pending child-death reports */ |
3173 | |
3174 | /* |
3175 | * After cleaning out the SIGCHLD queue, see if we have any state changes |
3176 | * or actions to make. |
3177 | */ |
3178 | PostmasterStateMachine(); |
3179 | |
3180 | /* Done with signal handler */ |
3181 | PG_SETMASK(&UnBlockSig); |
3182 | |
3183 | errno = save_errno; |
3184 | } |
3185 | |
3186 | /* |
3187 | * Scan the bgworkers list and see if the given PID (which has just stopped |
3188 | * or crashed) is in it. Handle its shutdown if so, and return true. If not a |
3189 | * bgworker, return false. |
3190 | * |
3191 | * This is heavily based on CleanupBackend. One important difference is that |
3192 | * we don't know yet that the dying process is a bgworker, so we must be silent |
3193 | * until we're sure it is. |
3194 | */ |
3195 | static bool |
3196 | CleanupBackgroundWorker(int pid, |
3197 | int exitstatus) /* child's exit status */ |
3198 | { |
3199 | char namebuf[MAXPGPATH]; |
3200 | slist_mutable_iter iter; |
3201 | |
3202 | slist_foreach_modify(iter, &BackgroundWorkerList) |
3203 | { |
3204 | RegisteredBgWorker *rw; |
3205 | |
3206 | rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur); |
3207 | |
3208 | if (rw->rw_pid != pid) |
3209 | continue; |
3210 | |
3211 | #ifdef WIN32 |
3212 | /* see CleanupBackend */ |
3213 | if (exitstatus == ERROR_WAIT_NO_CHILDREN) |
3214 | exitstatus = 0; |
3215 | #endif |
3216 | |
3217 | snprintf(namebuf, MAXPGPATH, _("background worker \"%s\"" ), |
3218 | rw->rw_worker.bgw_type); |
3219 | |
3220 | |
3221 | if (!EXIT_STATUS_0(exitstatus)) |
3222 | { |
3223 | /* Record timestamp, so we know when to restart the worker. */ |
3224 | rw->rw_crashed_at = GetCurrentTimestamp(); |
3225 | } |
3226 | else |
3227 | { |
3228 | /* Zero exit status means terminate */ |
3229 | rw->rw_crashed_at = 0; |
3230 | rw->rw_terminate = true; |
3231 | } |
3232 | |
3233 | /* |
3234 | * Additionally, for shared-memory-connected workers, just like a |
3235 | * backend, any exit status other than 0 or 1 is considered a crash |
3236 | * and causes a system-wide restart. |
3237 | */ |
3238 | if ((rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0) |
3239 | { |
3240 | if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) |
3241 | { |
3242 | HandleChildCrash(pid, exitstatus, namebuf); |
3243 | return true; |
3244 | } |
3245 | } |
3246 | |
3247 | /* |
3248 | * We must release the postmaster child slot whether this worker is |
3249 | * connected to shared memory or not, but we only treat it as a crash |
3250 | * if it is in fact connected. |
3251 | */ |
3252 | if (!ReleasePostmasterChildSlot(rw->rw_child_slot) && |
3253 | (rw->rw_worker.bgw_flags & BGWORKER_SHMEM_ACCESS) != 0) |
3254 | { |
3255 | HandleChildCrash(pid, exitstatus, namebuf); |
3256 | return true; |
3257 | } |
3258 | |
3259 | /* Get it out of the BackendList and clear out remaining data */ |
3260 | dlist_delete(&rw->rw_backend->elem); |
3261 | #ifdef EXEC_BACKEND |
3262 | ShmemBackendArrayRemove(rw->rw_backend); |
3263 | #endif |
3264 | |
3265 | /* |
3266 | * It's possible that this background worker started some OTHER |
3267 | * background worker and asked to be notified when that worker started |
3268 | * or stopped. If so, cancel any notifications destined for the |
3269 | * now-dead backend. |
3270 | */ |
3271 | if (rw->rw_backend->bgworker_notify) |
3272 | BackgroundWorkerStopNotifications(rw->rw_pid); |
3273 | free(rw->rw_backend); |
3274 | rw->rw_backend = NULL; |
3275 | rw->rw_pid = 0; |
3276 | rw->rw_child_slot = 0; |
3277 | ReportBackgroundWorkerExit(&iter); /* report child death */ |
3278 | |
3279 | LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG, |
3280 | namebuf, pid, exitstatus); |
3281 | |
3282 | return true; |
3283 | } |
3284 | |
3285 | return false; |
3286 | } |
3287 | |
3288 | /* |
3289 | * CleanupBackend -- cleanup after terminated backend. |
3290 | * |
3291 | * Remove all local state associated with backend. |
3292 | * |
3293 | * If you change this, see also CleanupBackgroundWorker. |
3294 | */ |
3295 | static void |
3296 | CleanupBackend(int pid, |
3297 | int exitstatus) /* child's exit status. */ |
3298 | { |
3299 | dlist_mutable_iter iter; |
3300 | |
3301 | LogChildExit(DEBUG2, _("server process" ), pid, exitstatus); |
3302 | |
3303 | /* |
3304 | * If a backend dies in an ugly way then we must signal all other backends |
3305 | * to quickdie. If exit status is zero (normal) or one (FATAL exit), we |
3306 | * assume everything is all right and proceed to remove the backend from |
3307 | * the active backend list. |
3308 | */ |
3309 | |
3310 | #ifdef WIN32 |
3311 | |
3312 | /* |
3313 | * On win32, also treat ERROR_WAIT_NO_CHILDREN (128) as nonfatal case, |
3314 | * since that sometimes happens under load when the process fails to start |
3315 | * properly (long before it starts using shared memory). Microsoft reports |
3316 | * it is related to mutex failure: |
3317 | * http://archives.postgresql.org/pgsql-hackers/2010-09/msg00790.php |
3318 | */ |
3319 | if (exitstatus == ERROR_WAIT_NO_CHILDREN) |
3320 | { |
3321 | LogChildExit(LOG, _("server process" ), pid, exitstatus); |
3322 | exitstatus = 0; |
3323 | } |
3324 | #endif |
3325 | |
3326 | if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus)) |
3327 | { |
3328 | HandleChildCrash(pid, exitstatus, _("server process" )); |
3329 | return; |
3330 | } |
3331 | |
3332 | dlist_foreach_modify(iter, &BackendList) |
3333 | { |
3334 | Backend *bp = dlist_container(Backend, elem, iter.cur); |
3335 | |
3336 | if (bp->pid == pid) |
3337 | { |
3338 | if (!bp->dead_end) |
3339 | { |
3340 | if (!ReleasePostmasterChildSlot(bp->child_slot)) |
3341 | { |
3342 | /* |
3343 | * Uh-oh, the child failed to clean itself up. Treat as a |
3344 | * crash after all. |
3345 | */ |
3346 | HandleChildCrash(pid, exitstatus, _("server process" )); |
3347 | return; |
3348 | } |
3349 | #ifdef EXEC_BACKEND |
3350 | ShmemBackendArrayRemove(bp); |
3351 | #endif |
3352 | } |
3353 | if (bp->bgworker_notify) |
3354 | { |
3355 | /* |
3356 | * This backend may have been slated to receive SIGUSR1 when |
3357 | * some background worker started or stopped. Cancel those |
3358 | * notifications, as we don't want to signal PIDs that are not |
3359 | * PostgreSQL backends. This gets skipped in the (probably |
3360 | * very common) case where the backend has never requested any |
3361 | * such notifications. |
3362 | */ |
3363 | BackgroundWorkerStopNotifications(bp->pid); |
3364 | } |
3365 | dlist_delete(iter.cur); |
3366 | free(bp); |
3367 | break; |
3368 | } |
3369 | } |
3370 | } |
3371 | |
3372 | /* |
3373 | * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer, |
3374 | * walwriter, autovacuum, or background worker. |
3375 | * |
3376 | * The objectives here are to clean up our local state about the child |
3377 | * process, and to signal all other remaining children to quickdie. |
3378 | */ |
3379 | static void |
3380 | HandleChildCrash(int pid, int exitstatus, const char *procname) |
3381 | { |
3382 | dlist_mutable_iter iter; |
3383 | slist_iter siter; |
3384 | Backend *bp; |
3385 | bool take_action; |
3386 | |
3387 | /* |
3388 | * We only log messages and send signals if this is the first process |
3389 | * crash and we're not doing an immediate shutdown; otherwise, we're only |
3390 | * here to update postmaster's idea of live processes. If we have already |
3391 | * signalled children, nonzero exit status is to be expected, so don't |
3392 | * clutter log. |
3393 | */ |
3394 | take_action = !FatalError && Shutdown != ImmediateShutdown; |
3395 | |
3396 | if (take_action) |
3397 | { |
3398 | LogChildExit(LOG, procname, pid, exitstatus); |
3399 | ereport(LOG, |
3400 | (errmsg("terminating any other active server processes" ))); |
3401 | } |
3402 | |
3403 | /* Process background workers. */ |
3404 | slist_foreach(siter, &BackgroundWorkerList) |
3405 | { |
3406 | RegisteredBgWorker *rw; |
3407 | |
3408 | rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur); |
3409 | if (rw->rw_pid == 0) |
3410 | continue; /* not running */ |
3411 | if (rw->rw_pid == pid) |
3412 | { |
3413 | /* |
3414 | * Found entry for freshly-dead worker, so remove it. |
3415 | */ |
3416 | (void) ReleasePostmasterChildSlot(rw->rw_child_slot); |
3417 | dlist_delete(&rw->rw_backend->elem); |
3418 | #ifdef EXEC_BACKEND |
3419 | ShmemBackendArrayRemove(rw->rw_backend); |
3420 | #endif |
3421 | free(rw->rw_backend); |
3422 | rw->rw_backend = NULL; |
3423 | rw->rw_pid = 0; |
3424 | rw->rw_child_slot = 0; |
3425 | /* don't reset crashed_at */ |
3426 | /* don't report child stop, either */ |
3427 | /* Keep looping so we can signal remaining workers */ |
3428 | } |
3429 | else |
3430 | { |
3431 | /* |
3432 | * This worker is still alive. Unless we did so already, tell it |
3433 | * to commit hara-kiri. |
3434 | * |
3435 | * SIGQUIT is the special signal that says exit without proc_exit |
3436 | * and let the user know what's going on. But if SendStop is set |
3437 | * (-s on command line), then we send SIGSTOP instead, so that we |
3438 | * can get core dumps from all backends by hand. |
3439 | */ |
3440 | if (take_action) |
3441 | { |
3442 | ereport(DEBUG2, |
3443 | (errmsg_internal("sending %s to process %d" , |
3444 | (SendStop ? "SIGSTOP" : "SIGQUIT" ), |
3445 | (int) rw->rw_pid))); |
3446 | signal_child(rw->rw_pid, (SendStop ? SIGSTOP : SIGQUIT)); |
3447 | } |
3448 | } |
3449 | } |
3450 | |
3451 | /* Process regular backends */ |
3452 | dlist_foreach_modify(iter, &BackendList) |
3453 | { |
3454 | bp = dlist_container(Backend, elem, iter.cur); |
3455 | |
3456 | if (bp->pid == pid) |
3457 | { |
3458 | /* |
3459 | * Found entry for freshly-dead backend, so remove it. |
3460 | */ |
3461 | if (!bp->dead_end) |
3462 | { |
3463 | (void) ReleasePostmasterChildSlot(bp->child_slot); |
3464 | #ifdef EXEC_BACKEND |
3465 | ShmemBackendArrayRemove(bp); |
3466 | #endif |
3467 | } |
3468 | dlist_delete(iter.cur); |
3469 | free(bp); |
3470 | /* Keep looping so we can signal remaining backends */ |
3471 | } |
3472 | else |
3473 | { |
3474 | /* |
3475 | * This backend is still alive. Unless we did so already, tell it |
3476 | * to commit hara-kiri. |
3477 | * |
3478 | * SIGQUIT is the special signal that says exit without proc_exit |
3479 | * and let the user know what's going on. But if SendStop is set |
3480 | * (-s on command line), then we send SIGSTOP instead, so that we |
3481 | * can get core dumps from all backends by hand. |
3482 | * |
3483 | * We could exclude dead_end children here, but at least in the |
3484 | * SIGSTOP case it seems better to include them. |
3485 | * |
3486 | * Background workers were already processed above; ignore them |
3487 | * here. |
3488 | */ |
3489 | if (bp->bkend_type == BACKEND_TYPE_BGWORKER) |
3490 | continue; |
3491 | |
3492 | if (take_action) |
3493 | { |
3494 | ereport(DEBUG2, |
3495 | (errmsg_internal("sending %s to process %d" , |
3496 | (SendStop ? "SIGSTOP" : "SIGQUIT" ), |
3497 | (int) bp->pid))); |
3498 | signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT)); |
3499 | } |
3500 | } |
3501 | } |
3502 | |
3503 | /* Take care of the startup process too */ |
3504 | if (pid == StartupPID) |
3505 | { |
3506 | StartupPID = 0; |
3507 | StartupStatus = STARTUP_CRASHED; |
3508 | } |
3509 | else if (StartupPID != 0 && take_action) |
3510 | { |
3511 | ereport(DEBUG2, |
3512 | (errmsg_internal("sending %s to process %d" , |
3513 | (SendStop ? "SIGSTOP" : "SIGQUIT" ), |
3514 | (int) StartupPID))); |
3515 | signal_child(StartupPID, (SendStop ? SIGSTOP : SIGQUIT)); |
3516 | StartupStatus = STARTUP_SIGNALED; |
3517 | } |
3518 | |
3519 | /* Take care of the bgwriter too */ |
3520 | if (pid == BgWriterPID) |
3521 | BgWriterPID = 0; |
3522 | else if (BgWriterPID != 0 && take_action) |
3523 | { |
3524 | ereport(DEBUG2, |
3525 | (errmsg_internal("sending %s to process %d" , |
3526 | (SendStop ? "SIGSTOP" : "SIGQUIT" ), |
3527 | (int) BgWriterPID))); |
3528 | signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT)); |
3529 | } |
3530 | |
3531 | /* Take care of the checkpointer too */ |
3532 | if (pid == CheckpointerPID) |
3533 | CheckpointerPID = 0; |
3534 | else if (CheckpointerPID != 0 && take_action) |
3535 | { |
3536 | ereport(DEBUG2, |
3537 | (errmsg_internal("sending %s to process %d" , |
3538 | (SendStop ? "SIGSTOP" : "SIGQUIT" ), |
3539 | (int) CheckpointerPID))); |
3540 | signal_child(CheckpointerPID, (SendStop ? SIGSTOP : SIGQUIT)); |
3541 | } |
3542 | |
3543 | /* Take care of the walwriter too */ |
3544 | if (pid == WalWriterPID) |
3545 | WalWriterPID = 0; |
3546 | else if (WalWriterPID != 0 && take_action) |
3547 | { |
3548 | ereport(DEBUG2, |
3549 | (errmsg_internal("sending %s to process %d" , |
3550 | (SendStop ? "SIGSTOP" : "SIGQUIT" ), |
3551 | (int) WalWriterPID))); |
3552 | signal_child(WalWriterPID, (SendStop ? SIGSTOP : SIGQUIT)); |
3553 | } |
3554 | |
3555 | /* Take care of the walreceiver too */ |
3556 | if (pid == WalReceiverPID) |
3557 | WalReceiverPID = 0; |
3558 | else if (WalReceiverPID != 0 && take_action) |
3559 | { |
3560 | ereport(DEBUG2, |
3561 | (errmsg_internal("sending %s to process %d" , |
3562 | (SendStop ? "SIGSTOP" : "SIGQUIT" ), |
3563 | (int) WalReceiverPID))); |
3564 | signal_child(WalReceiverPID, (SendStop ? SIGSTOP : SIGQUIT)); |
3565 | } |
3566 | |
3567 | /* Take care of the autovacuum launcher too */ |
3568 | if (pid == AutoVacPID) |
3569 | AutoVacPID = 0; |
3570 | else if (AutoVacPID != 0 && take_action) |
3571 | { |
3572 | ereport(DEBUG2, |
3573 | (errmsg_internal("sending %s to process %d" , |
3574 | (SendStop ? "SIGSTOP" : "SIGQUIT" ), |
3575 | (int) AutoVacPID))); |
3576 | signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT)); |
3577 | } |
3578 | |
3579 | /* |
3580 | * Force a power-cycle of the pgarch process too. (This isn't absolutely |
3581 | * necessary, but it seems like a good idea for robustness, and it |
3582 | * simplifies the state-machine logic in the case where a shutdown request |
3583 | * arrives during crash processing.) |
3584 | */ |
3585 | if (PgArchPID != 0 && take_action) |
3586 | { |
3587 | ereport(DEBUG2, |
3588 | (errmsg_internal("sending %s to process %d" , |
3589 | "SIGQUIT" , |
3590 | (int) PgArchPID))); |
3591 | signal_child(PgArchPID, SIGQUIT); |
3592 | } |
3593 | |
3594 | /* |
3595 | * Force a power-cycle of the pgstat process too. (This isn't absolutely |
3596 | * necessary, but it seems like a good idea for robustness, and it |
3597 | * simplifies the state-machine logic in the case where a shutdown request |
3598 | * arrives during crash processing.) |
3599 | */ |
3600 | if (PgStatPID != 0 && take_action) |
3601 | { |
3602 | ereport(DEBUG2, |
3603 | (errmsg_internal("sending %s to process %d" , |
3604 | "SIGQUIT" , |
3605 | (int) PgStatPID))); |
3606 | signal_child(PgStatPID, SIGQUIT); |
3607 | allow_immediate_pgstat_restart(); |
3608 | } |
3609 | |
3610 | /* We do NOT restart the syslogger */ |
3611 | |
3612 | if (Shutdown != ImmediateShutdown) |
3613 | FatalError = true; |
3614 | |
3615 | /* We now transit into a state of waiting for children to die */ |
3616 | if (pmState == PM_RECOVERY || |
3617 | pmState == PM_HOT_STANDBY || |
3618 | pmState == PM_RUN || |
3619 | pmState == PM_WAIT_BACKUP || |
3620 | pmState == PM_WAIT_READONLY || |
3621 | pmState == PM_SHUTDOWN) |
3622 | pmState = PM_WAIT_BACKENDS; |
3623 | |
3624 | /* |
3625 | * .. and if this doesn't happen quickly enough, now the clock is ticking |
3626 | * for us to kill them without mercy. |
3627 | */ |
3628 | if (AbortStartTime == 0) |
3629 | AbortStartTime = time(NULL); |
3630 | } |
3631 | |
3632 | /* |
3633 | * Log the death of a child process. |
3634 | */ |
3635 | static void |
3636 | LogChildExit(int lev, const char *procname, int pid, int exitstatus) |
3637 | { |
3638 | /* |
3639 | * size of activity_buffer is arbitrary, but set equal to default |
3640 | * track_activity_query_size |
3641 | */ |
3642 | char activity_buffer[1024]; |
3643 | const char *activity = NULL; |
3644 | |
3645 | if (!EXIT_STATUS_0(exitstatus)) |
3646 | activity = pgstat_get_crashed_backend_activity(pid, |
3647 | activity_buffer, |
3648 | sizeof(activity_buffer)); |
3649 | |
3650 | if (WIFEXITED(exitstatus)) |
3651 | ereport(lev, |
3652 | |
3653 | /*------ |
3654 | translator: %s is a noun phrase describing a child process, such as |
3655 | "server process" */ |
3656 | (errmsg("%s (PID %d) exited with exit code %d" , |
3657 | procname, pid, WEXITSTATUS(exitstatus)), |
3658 | activity ? errdetail("Failed process was running: %s" , activity) : 0)); |
3659 | else if (WIFSIGNALED(exitstatus)) |
3660 | { |
3661 | #if defined(WIN32) |
3662 | ereport(lev, |
3663 | |
3664 | /*------ |
3665 | translator: %s is a noun phrase describing a child process, such as |
3666 | "server process" */ |
3667 | (errmsg("%s (PID %d) was terminated by exception 0x%X" , |
3668 | procname, pid, WTERMSIG(exitstatus)), |
3669 | errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value." ), |
3670 | activity ? errdetail("Failed process was running: %s" , activity) : 0)); |
3671 | #else |
3672 | ereport(lev, |
3673 | |
3674 | /*------ |
3675 | translator: %s is a noun phrase describing a child process, such as |
3676 | "server process" */ |
3677 | (errmsg("%s (PID %d) was terminated by signal %d: %s" , |
3678 | procname, pid, WTERMSIG(exitstatus), |
3679 | pg_strsignal(WTERMSIG(exitstatus))), |
3680 | activity ? errdetail("Failed process was running: %s" , activity) : 0)); |
3681 | #endif |
3682 | } |
3683 | else |
3684 | ereport(lev, |
3685 | |
3686 | /*------ |
3687 | translator: %s is a noun phrase describing a child process, such as |
3688 | "server process" */ |
3689 | (errmsg("%s (PID %d) exited with unrecognized status %d" , |
3690 | procname, pid, exitstatus), |
3691 | activity ? errdetail("Failed process was running: %s" , activity) : 0)); |
3692 | } |
3693 | |
3694 | /* |
3695 | * Advance the postmaster's state machine and take actions as appropriate |
3696 | * |
3697 | * This is common code for pmdie(), reaper() and sigusr1_handler(), which |
3698 | * receive the signals that might mean we need to change state. |
3699 | */ |
3700 | static void |
3701 | PostmasterStateMachine(void) |
3702 | { |
3703 | if (pmState == PM_WAIT_BACKUP) |
3704 | { |
3705 | /* |
3706 | * PM_WAIT_BACKUP state ends when online backup mode is not active. |
3707 | */ |
3708 | if (!BackupInProgress()) |
3709 | pmState = PM_WAIT_BACKENDS; |
3710 | } |
3711 | |
3712 | if (pmState == PM_WAIT_READONLY) |
3713 | { |
3714 | /* |
3715 | * PM_WAIT_READONLY state ends when we have no regular backends that |
3716 | * have been started during recovery. We kill the startup and |
3717 | * walreceiver processes and transition to PM_WAIT_BACKENDS. Ideally, |
3718 | * we might like to kill these processes first and then wait for |
3719 | * backends to die off, but that doesn't work at present because |
3720 | * killing the startup process doesn't release its locks. |
3721 | */ |
3722 | if (CountChildren(BACKEND_TYPE_NORMAL) == 0) |
3723 | { |
3724 | if (StartupPID != 0) |
3725 | signal_child(StartupPID, SIGTERM); |
3726 | if (WalReceiverPID != 0) |
3727 | signal_child(WalReceiverPID, SIGTERM); |
3728 | pmState = PM_WAIT_BACKENDS; |
3729 | } |
3730 | } |
3731 | |
3732 | /* |
3733 | * If we are in a state-machine state that implies waiting for backends to |
3734 | * exit, see if they're all gone, and change state if so. |
3735 | */ |
3736 | if (pmState == PM_WAIT_BACKENDS) |
3737 | { |
3738 | /* |
3739 | * PM_WAIT_BACKENDS state ends when we have no regular backends |
3740 | * (including autovac workers), no bgworkers (including unconnected |
3741 | * ones), and no walwriter, autovac launcher or bgwriter. If we are |
3742 | * doing crash recovery or an immediate shutdown then we expect the |
3743 | * checkpointer to exit as well, otherwise not. The archiver, stats, |
3744 | * and syslogger processes are disregarded since they are not |
3745 | * connected to shared memory; we also disregard dead_end children |
3746 | * here. Walsenders are also disregarded, they will be terminated |
3747 | * later after writing the checkpoint record, like the archiver |
3748 | * process. |
3749 | */ |
3750 | if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_WORKER) == 0 && |
3751 | StartupPID == 0 && |
3752 | WalReceiverPID == 0 && |
3753 | BgWriterPID == 0 && |
3754 | (CheckpointerPID == 0 || |
3755 | (!FatalError && Shutdown < ImmediateShutdown)) && |
3756 | WalWriterPID == 0 && |
3757 | AutoVacPID == 0) |
3758 | { |
3759 | if (Shutdown >= ImmediateShutdown || FatalError) |
3760 | { |
3761 | /* |
3762 | * Start waiting for dead_end children to die. This state |
3763 | * change causes ServerLoop to stop creating new ones. |
3764 | */ |
3765 | pmState = PM_WAIT_DEAD_END; |
3766 | |
3767 | /* |
3768 | * We already SIGQUIT'd the archiver and stats processes, if |
3769 | * any, when we started immediate shutdown or entered |
3770 | * FatalError state. |
3771 | */ |
3772 | } |
3773 | else |
3774 | { |
3775 | /* |
3776 | * If we get here, we are proceeding with normal shutdown. All |
3777 | * the regular children are gone, and it's time to tell the |
3778 | * checkpointer to do a shutdown checkpoint. |
3779 | */ |
3780 | Assert(Shutdown > NoShutdown); |
3781 | /* Start the checkpointer if not running */ |
3782 | if (CheckpointerPID == 0) |
3783 | CheckpointerPID = StartCheckpointer(); |
3784 | /* And tell it to shut down */ |
3785 | if (CheckpointerPID != 0) |
3786 | { |
3787 | signal_child(CheckpointerPID, SIGUSR2); |
3788 | pmState = PM_SHUTDOWN; |
3789 | } |
3790 | else |
3791 | { |
3792 | /* |
3793 | * If we failed to fork a checkpointer, just shut down. |
3794 | * Any required cleanup will happen at next restart. We |
3795 | * set FatalError so that an "abnormal shutdown" message |
3796 | * gets logged when we exit. |
3797 | */ |
3798 | FatalError = true; |
3799 | pmState = PM_WAIT_DEAD_END; |
3800 | |
3801 | /* Kill the walsenders, archiver and stats collector too */ |
3802 | SignalChildren(SIGQUIT); |
3803 | if (PgArchPID != 0) |
3804 | signal_child(PgArchPID, SIGQUIT); |
3805 | if (PgStatPID != 0) |
3806 | signal_child(PgStatPID, SIGQUIT); |
3807 | } |
3808 | } |
3809 | } |
3810 | } |
3811 | |
3812 | if (pmState == PM_SHUTDOWN_2) |
3813 | { |
3814 | /* |
3815 | * PM_SHUTDOWN_2 state ends when there's no other children than |
3816 | * dead_end children left. There shouldn't be any regular backends |
3817 | * left by now anyway; what we're really waiting for is walsenders and |
3818 | * archiver. |
3819 | */ |
3820 | if (PgArchPID == 0 && CountChildren(BACKEND_TYPE_ALL) == 0) |
3821 | { |
3822 | pmState = PM_WAIT_DEAD_END; |
3823 | } |
3824 | } |
3825 | |
3826 | if (pmState == PM_WAIT_DEAD_END) |
3827 | { |
3828 | /* |
3829 | * PM_WAIT_DEAD_END state ends when the BackendList is entirely empty |
3830 | * (ie, no dead_end children remain), and the archiver and stats |
3831 | * collector are gone too. |
3832 | * |
3833 | * The reason we wait for those two is to protect them against a new |
3834 | * postmaster starting conflicting subprocesses; this isn't an |
3835 | * ironclad protection, but it at least helps in the |
3836 | * shutdown-and-immediately-restart scenario. Note that they have |
3837 | * already been sent appropriate shutdown signals, either during a |
3838 | * normal state transition leading up to PM_WAIT_DEAD_END, or during |
3839 | * FatalError processing. |
3840 | */ |
3841 | if (dlist_is_empty(&BackendList) && |
3842 | PgArchPID == 0 && PgStatPID == 0) |
3843 | { |
3844 | /* These other guys should be dead already */ |
3845 | Assert(StartupPID == 0); |
3846 | Assert(WalReceiverPID == 0); |
3847 | Assert(BgWriterPID == 0); |
3848 | Assert(CheckpointerPID == 0); |
3849 | Assert(WalWriterPID == 0); |
3850 | Assert(AutoVacPID == 0); |
3851 | /* syslogger is not considered here */ |
3852 | pmState = PM_NO_CHILDREN; |
3853 | } |
3854 | } |
3855 | |
3856 | /* |
3857 | * If we've been told to shut down, we exit as soon as there are no |
3858 | * remaining children. If there was a crash, cleanup will occur at the |
3859 | * next startup. (Before PostgreSQL 8.3, we tried to recover from the |
3860 | * crash before exiting, but that seems unwise if we are quitting because |
3861 | * we got SIGTERM from init --- there may well not be time for recovery |
3862 | * before init decides to SIGKILL us.) |
3863 | * |
3864 | * Note that the syslogger continues to run. It will exit when it sees |
3865 | * EOF on its input pipe, which happens when there are no more upstream |
3866 | * processes. |
3867 | */ |
3868 | if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN) |
3869 | { |
3870 | if (FatalError) |
3871 | { |
3872 | ereport(LOG, (errmsg("abnormal database system shutdown" ))); |
3873 | ExitPostmaster(1); |
3874 | } |
3875 | else |
3876 | { |
3877 | /* |
3878 | * Terminate exclusive backup mode to avoid recovery after a clean |
3879 | * fast shutdown. Since an exclusive backup can only be taken |
3880 | * during normal running (and not, for example, while running |
3881 | * under Hot Standby) it only makes sense to do this if we reached |
3882 | * normal running. If we're still in recovery, the backup file is |
3883 | * one we're recovering *from*, and we must keep it around so that |
3884 | * recovery restarts from the right place. |
3885 | */ |
3886 | if (ReachedNormalRunning) |
3887 | CancelBackup(); |
3888 | |
3889 | /* Normal exit from the postmaster is here */ |
3890 | ExitPostmaster(0); |
3891 | } |
3892 | } |
3893 | |
3894 | /* |
3895 | * If the startup process failed, or the user does not want an automatic |
3896 | * restart after backend crashes, wait for all non-syslogger children to |
3897 | * exit, and then exit postmaster. We don't try to reinitialize when the |
3898 | * startup process fails, because more than likely it will just fail again |
3899 | * and we will keep trying forever. |
3900 | */ |
3901 | if (pmState == PM_NO_CHILDREN && |
3902 | (StartupStatus == STARTUP_CRASHED || !restart_after_crash)) |
3903 | ExitPostmaster(1); |
3904 | |
3905 | /* |
3906 | * If we need to recover from a crash, wait for all non-syslogger children |
3907 | * to exit, then reset shmem and StartupDataBase. |
3908 | */ |
3909 | if (FatalError && pmState == PM_NO_CHILDREN) |
3910 | { |
3911 | ereport(LOG, |
3912 | (errmsg("all server processes terminated; reinitializing" ))); |
3913 | |
3914 | /* allow background workers to immediately restart */ |
3915 | ResetBackgroundWorkerCrashTimes(); |
3916 | |
3917 | shmem_exit(1); |
3918 | |
3919 | /* re-read control file into local memory */ |
3920 | LocalProcessControlFile(true); |
3921 | |
3922 | reset_shared(PostPortNumber); |
3923 | |
3924 | StartupPID = StartupDataBase(); |
3925 | Assert(StartupPID != 0); |
3926 | StartupStatus = STARTUP_RUNNING; |
3927 | pmState = PM_STARTUP; |
3928 | /* crash recovery started, reset SIGKILL flag */ |
3929 | AbortStartTime = 0; |
3930 | } |
3931 | } |
3932 | |
3933 | |
3934 | /* |
3935 | * Send a signal to a postmaster child process |
3936 | * |
3937 | * On systems that have setsid(), each child process sets itself up as a |
3938 | * process group leader. For signals that are generally interpreted in the |
3939 | * appropriate fashion, we signal the entire process group not just the |
3940 | * direct child process. This allows us to, for example, SIGQUIT a blocked |
3941 | * archive_recovery script, or SIGINT a script being run by a backend via |
3942 | * system(). |
3943 | * |
3944 | * There is a race condition for recently-forked children: they might not |
3945 | * have executed setsid() yet. So we signal the child directly as well as |
3946 | * the group. We assume such a child will handle the signal before trying |
3947 | * to spawn any grandchild processes. We also assume that signaling the |
3948 | * child twice will not cause any problems. |
3949 | */ |
3950 | static void |
3951 | signal_child(pid_t pid, int signal) |
3952 | { |
3953 | if (kill(pid, signal) < 0) |
3954 | elog(DEBUG3, "kill(%ld,%d) failed: %m" , (long) pid, signal); |
3955 | #ifdef HAVE_SETSID |
3956 | switch (signal) |
3957 | { |
3958 | case SIGINT: |
3959 | case SIGTERM: |
3960 | case SIGQUIT: |
3961 | case SIGSTOP: |
3962 | case SIGKILL: |
3963 | if (kill(-pid, signal) < 0) |
3964 | elog(DEBUG3, "kill(%ld,%d) failed: %m" , (long) (-pid), signal); |
3965 | break; |
3966 | default: |
3967 | break; |
3968 | } |
3969 | #endif |
3970 | } |
3971 | |
3972 | /* |
3973 | * Send a signal to the targeted children (but NOT special children; |
3974 | * dead_end children are never signaled, either). |
3975 | */ |
3976 | static bool |
3977 | SignalSomeChildren(int signal, int target) |
3978 | { |
3979 | dlist_iter iter; |
3980 | bool signaled = false; |
3981 | |
3982 | dlist_foreach(iter, &BackendList) |
3983 | { |
3984 | Backend *bp = dlist_container(Backend, elem, iter.cur); |
3985 | |
3986 | if (bp->dead_end) |
3987 | continue; |
3988 | |
3989 | /* |
3990 | * Since target == BACKEND_TYPE_ALL is the most common case, we test |
3991 | * it first and avoid touching shared memory for every child. |
3992 | */ |
3993 | if (target != BACKEND_TYPE_ALL) |
3994 | { |
3995 | /* |
3996 | * Assign bkend_type for any recently announced WAL Sender |
3997 | * processes. |
3998 | */ |
3999 | if (bp->bkend_type == BACKEND_TYPE_NORMAL && |
4000 | IsPostmasterChildWalSender(bp->child_slot)) |
4001 | bp->bkend_type = BACKEND_TYPE_WALSND; |
4002 | |
4003 | if (!(target & bp->bkend_type)) |
4004 | continue; |
4005 | } |
4006 | |
4007 | ereport(DEBUG4, |
4008 | (errmsg_internal("sending signal %d to process %d" , |
4009 | signal, (int) bp->pid))); |
4010 | signal_child(bp->pid, signal); |
4011 | signaled = true; |
4012 | } |
4013 | return signaled; |
4014 | } |
4015 | |
4016 | /* |
4017 | * Send a termination signal to children. This considers all of our children |
4018 | * processes, except syslogger and dead_end backends. |
4019 | */ |
4020 | static void |
4021 | TerminateChildren(int signal) |
4022 | { |
4023 | SignalChildren(signal); |
4024 | if (StartupPID != 0) |
4025 | { |
4026 | signal_child(StartupPID, signal); |
4027 | if (signal == SIGQUIT || signal == SIGKILL) |
4028 | StartupStatus = STARTUP_SIGNALED; |
4029 | } |
4030 | if (BgWriterPID != 0) |
4031 | signal_child(BgWriterPID, signal); |
4032 | if (CheckpointerPID != 0) |
4033 | signal_child(CheckpointerPID, signal); |
4034 | if (WalWriterPID != 0) |
4035 | signal_child(WalWriterPID, signal); |
4036 | if (WalReceiverPID != 0) |
4037 | signal_child(WalReceiverPID, signal); |
4038 | if (AutoVacPID != 0) |
4039 | signal_child(AutoVacPID, signal); |
4040 | if (PgArchPID != 0) |
4041 | signal_child(PgArchPID, signal); |
4042 | if (PgStatPID != 0) |
4043 | signal_child(PgStatPID, signal); |
4044 | } |
4045 | |
4046 | /* |
4047 | * BackendStartup -- start backend process |
4048 | * |
4049 | * returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise. |
4050 | * |
4051 | * Note: if you change this code, also consider StartAutovacuumWorker. |
4052 | */ |
4053 | static int |
4054 | BackendStartup(Port *port) |
4055 | { |
4056 | Backend *bn; /* for backend cleanup */ |
4057 | pid_t pid; |
4058 | |
4059 | /* |
4060 | * Create backend data structure. Better before the fork() so we can |
4061 | * handle failure cleanly. |
4062 | */ |
4063 | bn = (Backend *) malloc(sizeof(Backend)); |
4064 | if (!bn) |
4065 | { |
4066 | ereport(LOG, |
4067 | (errcode(ERRCODE_OUT_OF_MEMORY), |
4068 | errmsg("out of memory" ))); |
4069 | return STATUS_ERROR; |
4070 | } |
4071 | |
4072 | /* |
4073 | * Compute the cancel key that will be assigned to this backend. The |
4074 | * backend will have its own copy in the forked-off process' value of |
4075 | * MyCancelKey, so that it can transmit the key to the frontend. |
4076 | */ |
4077 | if (!RandomCancelKey(&MyCancelKey)) |
4078 | { |
4079 | free(bn); |
4080 | ereport(LOG, |
4081 | (errcode(ERRCODE_INTERNAL_ERROR), |
4082 | errmsg("could not generate random cancel key" ))); |
4083 | return STATUS_ERROR; |
4084 | } |
4085 | |
4086 | bn->cancel_key = MyCancelKey; |
4087 | |
4088 | /* Pass down canAcceptConnections state */ |
4089 | port->canAcceptConnections = canAcceptConnections(); |
4090 | bn->dead_end = (port->canAcceptConnections != CAC_OK && |
4091 | port->canAcceptConnections != CAC_WAITBACKUP); |
4092 | |
4093 | /* |
4094 | * Unless it's a dead_end child, assign it a child slot number |
4095 | */ |
4096 | if (!bn->dead_end) |
4097 | bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot(); |
4098 | else |
4099 | bn->child_slot = 0; |
4100 | |
4101 | /* Hasn't asked to be notified about any bgworkers yet */ |
4102 | bn->bgworker_notify = false; |
4103 | |
4104 | #ifdef EXEC_BACKEND |
4105 | pid = backend_forkexec(port); |
4106 | #else /* !EXEC_BACKEND */ |
4107 | pid = fork_process(); |
4108 | if (pid == 0) /* child */ |
4109 | { |
4110 | free(bn); |
4111 | |
4112 | /* Detangle from postmaster */ |
4113 | InitPostmasterChild(); |
4114 | |
4115 | /* Close the postmaster's sockets */ |
4116 | ClosePostmasterPorts(false); |
4117 | |
4118 | /* Perform additional initialization and collect startup packet */ |
4119 | BackendInitialize(port); |
4120 | |
4121 | /* And run the backend */ |
4122 | BackendRun(port); |
4123 | } |
4124 | #endif /* EXEC_BACKEND */ |
4125 | |
4126 | if (pid < 0) |
4127 | { |
4128 | /* in parent, fork failed */ |
4129 | int save_errno = errno; |
4130 | |
4131 | if (!bn->dead_end) |
4132 | (void) ReleasePostmasterChildSlot(bn->child_slot); |
4133 | free(bn); |
4134 | errno = save_errno; |
4135 | ereport(LOG, |
4136 | (errmsg("could not fork new process for connection: %m" ))); |
4137 | report_fork_failure_to_client(port, save_errno); |
4138 | return STATUS_ERROR; |
4139 | } |
4140 | |
4141 | /* in parent, successful fork */ |
4142 | ereport(DEBUG2, |
4143 | (errmsg_internal("forked new backend, pid=%d socket=%d" , |
4144 | (int) pid, (int) port->sock))); |
4145 | |
4146 | /* |
4147 | * Everything's been successful, it's safe to add this backend to our list |
4148 | * of backends. |
4149 | */ |
4150 | bn->pid = pid; |
4151 | bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */ |
4152 | dlist_push_head(&BackendList, &bn->elem); |
4153 | |
4154 | #ifdef EXEC_BACKEND |
4155 | if (!bn->dead_end) |
4156 | ShmemBackendArrayAdd(bn); |
4157 | #endif |
4158 | |
4159 | return STATUS_OK; |
4160 | } |
4161 | |
4162 | /* |
4163 | * Try to report backend fork() failure to client before we close the |
4164 | * connection. Since we do not care to risk blocking the postmaster on |
4165 | * this connection, we set the connection to non-blocking and try only once. |
4166 | * |
4167 | * This is grungy special-purpose code; we cannot use backend libpq since |
4168 | * it's not up and running. |
4169 | */ |
4170 | static void |
4171 | report_fork_failure_to_client(Port *port, int errnum) |
4172 | { |
4173 | char buffer[1000]; |
4174 | int rc; |
4175 | |
4176 | /* Format the error message packet (always V2 protocol) */ |
4177 | snprintf(buffer, sizeof(buffer), "E%s%s\n" , |
4178 | _("could not fork new process for connection: " ), |
4179 | strerror(errnum)); |
4180 | |
4181 | /* Set port to non-blocking. Don't do send() if this fails */ |
4182 | if (!pg_set_noblock(port->sock)) |
4183 | return; |
4184 | |
4185 | /* We'll retry after EINTR, but ignore all other failures */ |
4186 | do |
4187 | { |
4188 | rc = send(port->sock, buffer, strlen(buffer) + 1, 0); |
4189 | } while (rc < 0 && errno == EINTR); |
4190 | } |
4191 | |
4192 | |
4193 | /* |
4194 | * BackendInitialize -- initialize an interactive (postmaster-child) |
4195 | * backend process, and collect the client's startup packet. |
4196 | * |
4197 | * returns: nothing. Will not return at all if there's any failure. |
4198 | * |
4199 | * Note: this code does not depend on having any access to shared memory. |
4200 | * In the EXEC_BACKEND case, we are physically attached to shared memory |
4201 | * but have not yet set up most of our local pointers to shmem structures. |
4202 | */ |
4203 | static void |
4204 | BackendInitialize(Port *port) |
4205 | { |
4206 | int status; |
4207 | int ret; |
4208 | char remote_host[NI_MAXHOST]; |
4209 | char remote_port[NI_MAXSERV]; |
4210 | char remote_ps_data[NI_MAXHOST]; |
4211 | |
4212 | /* Save port etc. for ps status */ |
4213 | MyProcPort = port; |
4214 | |
4215 | /* |
4216 | * PreAuthDelay is a debugging aid for investigating problems in the |
4217 | * authentication cycle: it can be set in postgresql.conf to allow time to |
4218 | * attach to the newly-forked backend with a debugger. (See also |
4219 | * PostAuthDelay, which we allow clients to pass through PGOPTIONS, but it |
4220 | * is not honored until after authentication.) |
4221 | */ |
4222 | if (PreAuthDelay > 0) |
4223 | pg_usleep(PreAuthDelay * 1000000L); |
4224 | |
4225 | /* This flag will remain set until InitPostgres finishes authentication */ |
4226 | ClientAuthInProgress = true; /* limit visibility of log messages */ |
4227 | |
4228 | /* set these to empty in case they are needed before we set them up */ |
4229 | port->remote_host = "" ; |
4230 | port->remote_port = "" ; |
4231 | |
4232 | /* |
4233 | * Initialize libpq and enable reporting of ereport errors to the client. |
4234 | * Must do this now because authentication uses libpq to send messages. |
4235 | */ |
4236 | pq_init(); /* initialize libpq to talk to client */ |
4237 | whereToSendOutput = DestRemote; /* now safe to ereport to client */ |
4238 | |
4239 | /* |
4240 | * We arrange for a simple exit(1) if we receive SIGTERM or SIGQUIT or |
4241 | * timeout while trying to collect the startup packet. Otherwise the |
4242 | * postmaster cannot shutdown the database FAST or IMMED cleanly if a |
4243 | * buggy client fails to send the packet promptly. XXX it follows that |
4244 | * the remainder of this function must tolerate losing control at any |
4245 | * instant. Likewise, any pg_on_exit_callback registered before or during |
4246 | * this function must be prepared to execute at any instant between here |
4247 | * and the end of this function. Furthermore, affected callbacks execute |
4248 | * partially or not at all when a second exit-inducing signal arrives |
4249 | * after proc_exit_prepare() decrements on_proc_exit_index. (Thanks to |
4250 | * that mechanic, callbacks need not anticipate more than one call.) This |
4251 | * is fragile; it ought to instead follow the norm of handling interrupts |
4252 | * at selected, safe opportunities. |
4253 | */ |
4254 | pqsignal(SIGTERM, startup_die); |
4255 | pqsignal(SIGQUIT, startup_die); |
4256 | InitializeTimeouts(); /* establishes SIGALRM handler */ |
4257 | PG_SETMASK(&StartupBlockSig); |
4258 | |
4259 | /* |
4260 | * Get the remote host name and port for logging and status display. |
4261 | */ |
4262 | remote_host[0] = '\0'; |
4263 | remote_port[0] = '\0'; |
4264 | if ((ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen, |
4265 | remote_host, sizeof(remote_host), |
4266 | remote_port, sizeof(remote_port), |
4267 | (log_hostname ? 0 : NI_NUMERICHOST) | NI_NUMERICSERV)) != 0) |
4268 | ereport(WARNING, |
4269 | (errmsg_internal("pg_getnameinfo_all() failed: %s" , |
4270 | gai_strerror(ret)))); |
4271 | if (remote_port[0] == '\0') |
4272 | snprintf(remote_ps_data, sizeof(remote_ps_data), "%s" , remote_host); |
4273 | else |
4274 | snprintf(remote_ps_data, sizeof(remote_ps_data), "%s(%s)" , remote_host, remote_port); |
4275 | |
4276 | /* |
4277 | * Save remote_host and remote_port in port structure (after this, they |
4278 | * will appear in log_line_prefix data for log messages). |
4279 | */ |
4280 | port->remote_host = strdup(remote_host); |
4281 | port->remote_port = strdup(remote_port); |
4282 | |
4283 | /* And now we can issue the Log_connections message, if wanted */ |
4284 | if (Log_connections) |
4285 | { |
4286 | if (remote_port[0]) |
4287 | ereport(LOG, |
4288 | (errmsg("connection received: host=%s port=%s" , |
4289 | remote_host, |
4290 | remote_port))); |
4291 | else |
4292 | ereport(LOG, |
4293 | (errmsg("connection received: host=%s" , |
4294 | remote_host))); |
4295 | } |
4296 | |
4297 | /* |
4298 | * If we did a reverse lookup to name, we might as well save the results |
4299 | * rather than possibly repeating the lookup during authentication. |
4300 | * |
4301 | * Note that we don't want to specify NI_NAMEREQD above, because then we'd |
4302 | * get nothing useful for a client without an rDNS entry. Therefore, we |
4303 | * must check whether we got a numeric IPv4 or IPv6 address, and not save |
4304 | * it into remote_hostname if so. (This test is conservative and might |
4305 | * sometimes classify a hostname as numeric, but an error in that |
4306 | * direction is safe; it only results in a possible extra lookup.) |
4307 | */ |
4308 | if (log_hostname && |
4309 | ret == 0 && |
4310 | strspn(remote_host, "0123456789." ) < strlen(remote_host) && |
4311 | strspn(remote_host, "0123456789ABCDEFabcdef:" ) < strlen(remote_host)) |
4312 | port->remote_hostname = strdup(remote_host); |
4313 | |
4314 | /* |
4315 | * Ready to begin client interaction. We will give up and exit(1) after a |
4316 | * time delay, so that a broken client can't hog a connection |
4317 | * indefinitely. PreAuthDelay and any DNS interactions above don't count |
4318 | * against the time limit. |
4319 | * |
4320 | * Note: AuthenticationTimeout is applied here while waiting for the |
4321 | * startup packet, and then again in InitPostgres for the duration of any |
4322 | * authentication operations. So a hostile client could tie up the |
4323 | * process for nearly twice AuthenticationTimeout before we kick him off. |
4324 | * |
4325 | * Note: because PostgresMain will call InitializeTimeouts again, the |
4326 | * registration of STARTUP_PACKET_TIMEOUT will be lost. This is okay |
4327 | * since we never use it again after this function. |
4328 | */ |
4329 | RegisterTimeout(STARTUP_PACKET_TIMEOUT, StartupPacketTimeoutHandler); |
4330 | enable_timeout_after(STARTUP_PACKET_TIMEOUT, AuthenticationTimeout * 1000); |
4331 | |
4332 | /* |
4333 | * Receive the startup packet (which might turn out to be a cancel request |
4334 | * packet). |
4335 | */ |
4336 | status = ProcessStartupPacket(port, false); |
4337 | |
4338 | /* |
4339 | * Stop here if it was bad or a cancel packet. ProcessStartupPacket |
4340 | * already did any appropriate error reporting. |
4341 | */ |
4342 | if (status != STATUS_OK) |
4343 | proc_exit(0); |
4344 | |
4345 | /* |
4346 | * Now that we have the user and database name, we can set the process |
4347 | * title for ps. It's good to do this as early as possible in startup. |
4348 | * |
4349 | * For a walsender, the ps display is set in the following form: |
4350 | * |
4351 | * postgres: walsender <user> <host> <activity> |
4352 | * |
4353 | * To achieve that, we pass "walsender" as username and username as dbname |
4354 | * to init_ps_display(). XXX: should add a new variant of |
4355 | * init_ps_display() to avoid abusing the parameters like this. |
4356 | */ |
4357 | if (am_walsender) |
4358 | init_ps_display(pgstat_get_backend_desc(B_WAL_SENDER), port->user_name, remote_ps_data, |
4359 | update_process_title ? "authentication" : "" ); |
4360 | else |
4361 | init_ps_display(port->user_name, port->database_name, remote_ps_data, |
4362 | update_process_title ? "authentication" : "" ); |
4363 | |
4364 | /* |
4365 | * Disable the timeout, and prevent SIGTERM/SIGQUIT again. |
4366 | */ |
4367 | disable_timeout(STARTUP_PACKET_TIMEOUT, false); |
4368 | PG_SETMASK(&BlockSig); |
4369 | } |
4370 | |
4371 | |
4372 | /* |
4373 | * BackendRun -- set up the backend's argument list and invoke PostgresMain() |
4374 | * |
4375 | * returns: |
4376 | * Shouldn't return at all. |
4377 | * If PostgresMain() fails, return status. |
4378 | */ |
4379 | static void |
4380 | BackendRun(Port *port) |
4381 | { |
4382 | char **av; |
4383 | int maxac; |
4384 | int ac; |
4385 | int i; |
4386 | |
4387 | /* |
4388 | * Now, build the argv vector that will be given to PostgresMain. |
4389 | * |
4390 | * The maximum possible number of commandline arguments that could come |
4391 | * from ExtraOptions is (strlen(ExtraOptions) + 1) / 2; see |
4392 | * pg_split_opts(). |
4393 | */ |
4394 | maxac = 2; /* for fixed args supplied below */ |
4395 | maxac += (strlen(ExtraOptions) + 1) / 2; |
4396 | |
4397 | av = (char **) MemoryContextAlloc(TopMemoryContext, |
4398 | maxac * sizeof(char *)); |
4399 | ac = 0; |
4400 | |
4401 | av[ac++] = "postgres" ; |
4402 | |
4403 | /* |
4404 | * Pass any backend switches specified with -o on the postmaster's own |
4405 | * command line. We assume these are secure. |
4406 | */ |
4407 | pg_split_opts(av, &ac, ExtraOptions); |
4408 | |
4409 | av[ac] = NULL; |
4410 | |
4411 | Assert(ac < maxac); |
4412 | |
4413 | /* |
4414 | * Debug: print arguments being passed to backend |
4415 | */ |
4416 | ereport(DEBUG3, |
4417 | (errmsg_internal("%s child[%d]: starting with (" , |
4418 | progname, (int) getpid()))); |
4419 | for (i = 0; i < ac; ++i) |
4420 | ereport(DEBUG3, |
4421 | (errmsg_internal("\t%s" , av[i]))); |
4422 | ereport(DEBUG3, |
4423 | (errmsg_internal(")" ))); |
4424 | |
4425 | /* |
4426 | * Make sure we aren't in PostmasterContext anymore. (We can't delete it |
4427 | * just yet, though, because InitPostgres will need the HBA data.) |
4428 | */ |
4429 | MemoryContextSwitchTo(TopMemoryContext); |
4430 | |
4431 | PostgresMain(ac, av, port->database_name, port->user_name); |
4432 | } |
4433 | |
4434 | |
4435 | #ifdef EXEC_BACKEND |
4436 | |
4437 | /* |
4438 | * postmaster_forkexec -- fork and exec a postmaster subprocess |
4439 | * |
4440 | * The caller must have set up the argv array already, except for argv[2] |
4441 | * which will be filled with the name of the temp variable file. |
4442 | * |
4443 | * Returns the child process PID, or -1 on fork failure (a suitable error |
4444 | * message has been logged on failure). |
4445 | * |
4446 | * All uses of this routine will dispatch to SubPostmasterMain in the |
4447 | * child process. |
4448 | */ |
4449 | pid_t |
4450 | postmaster_forkexec(int argc, char *argv[]) |
4451 | { |
4452 | Port port; |
4453 | |
4454 | /* This entry point passes dummy values for the Port variables */ |
4455 | memset(&port, 0, sizeof(port)); |
4456 | return internal_forkexec(argc, argv, &port); |
4457 | } |
4458 | |
4459 | /* |
4460 | * backend_forkexec -- fork/exec off a backend process |
4461 | * |
4462 | * Some operating systems (WIN32) don't have fork() so we have to simulate |
4463 | * it by storing parameters that need to be passed to the child and |
4464 | * then create a new child process. |
4465 | * |
4466 | * returns the pid of the fork/exec'd process, or -1 on failure |
4467 | */ |
4468 | static pid_t |
4469 | backend_forkexec(Port *port) |
4470 | { |
4471 | char *av[4]; |
4472 | int ac = 0; |
4473 | |
4474 | av[ac++] = "postgres" ; |
4475 | av[ac++] = "--forkbackend" ; |
4476 | av[ac++] = NULL; /* filled in by internal_forkexec */ |
4477 | |
4478 | av[ac] = NULL; |
4479 | Assert(ac < lengthof(av)); |
4480 | |
4481 | return internal_forkexec(ac, av, port); |
4482 | } |
4483 | |
4484 | #ifndef WIN32 |
4485 | |
4486 | /* |
4487 | * internal_forkexec non-win32 implementation |
4488 | * |
4489 | * - writes out backend variables to the parameter file |
4490 | * - fork():s, and then exec():s the child process |
4491 | */ |
4492 | static pid_t |
4493 | internal_forkexec(int argc, char *argv[], Port *port) |
4494 | { |
4495 | static unsigned long tmpBackendFileNum = 0; |
4496 | pid_t pid; |
4497 | char tmpfilename[MAXPGPATH]; |
4498 | BackendParameters param; |
4499 | FILE *fp; |
4500 | |
4501 | if (!save_backend_variables(¶m, port)) |
4502 | return -1; /* log made by save_backend_variables */ |
4503 | |
4504 | /* Calculate name for temp file */ |
4505 | snprintf(tmpfilename, MAXPGPATH, "%s/%s.backend_var.%d.%lu" , |
4506 | PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX, |
4507 | MyProcPid, ++tmpBackendFileNum); |
4508 | |
4509 | /* Open file */ |
4510 | fp = AllocateFile(tmpfilename, PG_BINARY_W); |
4511 | if (!fp) |
4512 | { |
4513 | /* |
4514 | * As in OpenTemporaryFileInTablespace, try to make the temp-file |
4515 | * directory, ignoring errors. |
4516 | */ |
4517 | (void) MakePGDirectory(PG_TEMP_FILES_DIR); |
4518 | |
4519 | fp = AllocateFile(tmpfilename, PG_BINARY_W); |
4520 | if (!fp) |
4521 | { |
4522 | ereport(LOG, |
4523 | (errcode_for_file_access(), |
4524 | errmsg("could not create file \"%s\": %m" , |
4525 | tmpfilename))); |
4526 | return -1; |
4527 | } |
4528 | } |
4529 | |
4530 | if (fwrite(¶m, sizeof(param), 1, fp) != 1) |
4531 | { |
4532 | ereport(LOG, |
4533 | (errcode_for_file_access(), |
4534 | errmsg("could not write to file \"%s\": %m" , tmpfilename))); |
4535 | FreeFile(fp); |
4536 | return -1; |
4537 | } |
4538 | |
4539 | /* Release file */ |
4540 | if (FreeFile(fp)) |
4541 | { |
4542 | ereport(LOG, |
4543 | (errcode_for_file_access(), |
4544 | errmsg("could not write to file \"%s\": %m" , tmpfilename))); |
4545 | return -1; |
4546 | } |
4547 | |
4548 | /* Make sure caller set up argv properly */ |
4549 | Assert(argc >= 3); |
4550 | Assert(argv[argc] == NULL); |
4551 | Assert(strncmp(argv[1], "--fork" , 6) == 0); |
4552 | Assert(argv[2] == NULL); |
4553 | |
4554 | /* Insert temp file name after --fork argument */ |
4555 | argv[2] = tmpfilename; |
4556 | |
4557 | /* Fire off execv in child */ |
4558 | if ((pid = fork_process()) == 0) |
4559 | { |
4560 | if (execv(postgres_exec_path, argv) < 0) |
4561 | { |
4562 | ereport(LOG, |
4563 | (errmsg("could not execute server process \"%s\": %m" , |
4564 | postgres_exec_path))); |
4565 | /* We're already in the child process here, can't return */ |
4566 | exit(1); |
4567 | } |
4568 | } |
4569 | |
4570 | return pid; /* Parent returns pid, or -1 on fork failure */ |
4571 | } |
4572 | #else /* WIN32 */ |
4573 | |
4574 | /* |
4575 | * internal_forkexec win32 implementation |
4576 | * |
4577 | * - starts backend using CreateProcess(), in suspended state |
4578 | * - writes out backend variables to the parameter file |
4579 | * - during this, duplicates handles and sockets required for |
4580 | * inheritance into the new process |
4581 | * - resumes execution of the new process once the backend parameter |
4582 | * file is complete. |
4583 | */ |
4584 | static pid_t |
4585 | internal_forkexec(int argc, char *argv[], Port *port) |
4586 | { |
4587 | int retry_count = 0; |
4588 | STARTUPINFO si; |
4589 | PROCESS_INFORMATION pi; |
4590 | int i; |
4591 | int j; |
4592 | char cmdLine[MAXPGPATH * 2]; |
4593 | HANDLE paramHandle; |
4594 | BackendParameters *param; |
4595 | SECURITY_ATTRIBUTES sa; |
4596 | char paramHandleStr[32]; |
4597 | win32_deadchild_waitinfo *childinfo; |
4598 | |
4599 | /* Make sure caller set up argv properly */ |
4600 | Assert(argc >= 3); |
4601 | Assert(argv[argc] == NULL); |
4602 | Assert(strncmp(argv[1], "--fork" , 6) == 0); |
4603 | Assert(argv[2] == NULL); |
4604 | |
4605 | /* Resume here if we need to retry */ |
4606 | retry: |
4607 | |
4608 | /* Set up shared memory for parameter passing */ |
4609 | ZeroMemory(&sa, sizeof(sa)); |
4610 | sa.nLength = sizeof(sa); |
4611 | sa.bInheritHandle = TRUE; |
4612 | paramHandle = CreateFileMapping(INVALID_HANDLE_VALUE, |
4613 | &sa, |
4614 | PAGE_READWRITE, |
4615 | 0, |
4616 | sizeof(BackendParameters), |
4617 | NULL); |
4618 | if (paramHandle == INVALID_HANDLE_VALUE) |
4619 | { |
4620 | elog(LOG, "could not create backend parameter file mapping: error code %lu" , |
4621 | GetLastError()); |
4622 | return -1; |
4623 | } |
4624 | |
4625 | param = MapViewOfFile(paramHandle, FILE_MAP_WRITE, 0, 0, sizeof(BackendParameters)); |
4626 | if (!param) |
4627 | { |
4628 | elog(LOG, "could not map backend parameter memory: error code %lu" , |
4629 | GetLastError()); |
4630 | CloseHandle(paramHandle); |
4631 | return -1; |
4632 | } |
4633 | |
4634 | /* Insert temp file name after --fork argument */ |
4635 | #ifdef _WIN64 |
4636 | sprintf(paramHandleStr, "%llu" , (LONG_PTR) paramHandle); |
4637 | #else |
4638 | sprintf(paramHandleStr, "%lu" , (DWORD) paramHandle); |
4639 | #endif |
4640 | argv[2] = paramHandleStr; |
4641 | |
4642 | /* Format the cmd line */ |
4643 | cmdLine[sizeof(cmdLine) - 1] = '\0'; |
4644 | cmdLine[sizeof(cmdLine) - 2] = '\0'; |
4645 | snprintf(cmdLine, sizeof(cmdLine) - 1, "\"%s\"" , postgres_exec_path); |
4646 | i = 0; |
4647 | while (argv[++i] != NULL) |
4648 | { |
4649 | j = strlen(cmdLine); |
4650 | snprintf(cmdLine + j, sizeof(cmdLine) - 1 - j, " \"%s\"" , argv[i]); |
4651 | } |
4652 | if (cmdLine[sizeof(cmdLine) - 2] != '\0') |
4653 | { |
4654 | elog(LOG, "subprocess command line too long" ); |
4655 | return -1; |
4656 | } |
4657 | |
4658 | memset(&pi, 0, sizeof(pi)); |
4659 | memset(&si, 0, sizeof(si)); |
4660 | si.cb = sizeof(si); |
4661 | |
4662 | /* |
4663 | * Create the subprocess in a suspended state. This will be resumed later, |
4664 | * once we have written out the parameter file. |
4665 | */ |
4666 | if (!CreateProcess(NULL, cmdLine, NULL, NULL, TRUE, CREATE_SUSPENDED, |
4667 | NULL, NULL, &si, &pi)) |
4668 | { |
4669 | elog(LOG, "CreateProcess call failed: %m (error code %lu)" , |
4670 | GetLastError()); |
4671 | return -1; |
4672 | } |
4673 | |
4674 | if (!save_backend_variables(param, port, pi.hProcess, pi.dwProcessId)) |
4675 | { |
4676 | /* |
4677 | * log made by save_backend_variables, but we have to clean up the |
4678 | * mess with the half-started process |
4679 | */ |
4680 | if (!TerminateProcess(pi.hProcess, 255)) |
4681 | ereport(LOG, |
4682 | (errmsg_internal("could not terminate unstarted process: error code %lu" , |
4683 | GetLastError()))); |
4684 | CloseHandle(pi.hProcess); |
4685 | CloseHandle(pi.hThread); |
4686 | return -1; /* log made by save_backend_variables */ |
4687 | } |
4688 | |
4689 | /* Drop the parameter shared memory that is now inherited to the backend */ |
4690 | if (!UnmapViewOfFile(param)) |
4691 | elog(LOG, "could not unmap view of backend parameter file: error code %lu" , |
4692 | GetLastError()); |
4693 | if (!CloseHandle(paramHandle)) |
4694 | elog(LOG, "could not close handle to backend parameter file: error code %lu" , |
4695 | GetLastError()); |
4696 | |
4697 | /* |
4698 | * Reserve the memory region used by our main shared memory segment before |
4699 | * we resume the child process. Normally this should succeed, but if ASLR |
4700 | * is active then it might sometimes fail due to the stack or heap having |
4701 | * gotten mapped into that range. In that case, just terminate the |
4702 | * process and retry. |
4703 | */ |
4704 | if (!pgwin32_ReserveSharedMemoryRegion(pi.hProcess)) |
4705 | { |
4706 | /* pgwin32_ReserveSharedMemoryRegion already made a log entry */ |
4707 | if (!TerminateProcess(pi.hProcess, 255)) |
4708 | ereport(LOG, |
4709 | (errmsg_internal("could not terminate process that failed to reserve memory: error code %lu" , |
4710 | GetLastError()))); |
4711 | CloseHandle(pi.hProcess); |
4712 | CloseHandle(pi.hThread); |
4713 | if (++retry_count < 100) |
4714 | goto retry; |
4715 | ereport(LOG, |
4716 | (errmsg("giving up after too many tries to reserve shared memory" ), |
4717 | errhint("This might be caused by ASLR or antivirus software." ))); |
4718 | return -1; |
4719 | } |
4720 | |
4721 | /* |
4722 | * Now that the backend variables are written out, we start the child |
4723 | * thread so it can start initializing while we set up the rest of the |
4724 | * parent state. |
4725 | */ |
4726 | if (ResumeThread(pi.hThread) == -1) |
4727 | { |
4728 | if (!TerminateProcess(pi.hProcess, 255)) |
4729 | { |
4730 | ereport(LOG, |
4731 | (errmsg_internal("could not terminate unstartable process: error code %lu" , |
4732 | GetLastError()))); |
4733 | CloseHandle(pi.hProcess); |
4734 | CloseHandle(pi.hThread); |
4735 | return -1; |
4736 | } |
4737 | CloseHandle(pi.hProcess); |
4738 | CloseHandle(pi.hThread); |
4739 | ereport(LOG, |
4740 | (errmsg_internal("could not resume thread of unstarted process: error code %lu" , |
4741 | GetLastError()))); |
4742 | return -1; |
4743 | } |
4744 | |
4745 | /* |
4746 | * Queue a waiter to signal when this child dies. The wait will be handled |
4747 | * automatically by an operating system thread pool. |
4748 | * |
4749 | * Note: use malloc instead of palloc, since it needs to be thread-safe. |
4750 | * Struct will be free():d from the callback function that runs on a |
4751 | * different thread. |
4752 | */ |
4753 | childinfo = malloc(sizeof(win32_deadchild_waitinfo)); |
4754 | if (!childinfo) |
4755 | ereport(FATAL, |
4756 | (errcode(ERRCODE_OUT_OF_MEMORY), |
4757 | errmsg("out of memory" ))); |
4758 | |
4759 | childinfo->procHandle = pi.hProcess; |
4760 | childinfo->procId = pi.dwProcessId; |
4761 | |
4762 | if (!RegisterWaitForSingleObject(&childinfo->waitHandle, |
4763 | pi.hProcess, |
4764 | pgwin32_deadchild_callback, |
4765 | childinfo, |
4766 | INFINITE, |
4767 | WT_EXECUTEONLYONCE | WT_EXECUTEINWAITTHREAD)) |
4768 | ereport(FATAL, |
4769 | (errmsg_internal("could not register process for wait: error code %lu" , |
4770 | GetLastError()))); |
4771 | |
4772 | /* Don't close pi.hProcess here - the wait thread needs access to it */ |
4773 | |
4774 | CloseHandle(pi.hThread); |
4775 | |
4776 | return pi.dwProcessId; |
4777 | } |
4778 | #endif /* WIN32 */ |
4779 | |
4780 | |
4781 | /* |
4782 | * SubPostmasterMain -- Get the fork/exec'd process into a state equivalent |
4783 | * to what it would be if we'd simply forked on Unix, and then |
4784 | * dispatch to the appropriate place. |
4785 | * |
4786 | * The first two command line arguments are expected to be "--forkFOO" |
4787 | * (where FOO indicates which postmaster child we are to become), and |
4788 | * the name of a variables file that we can read to load data that would |
4789 | * have been inherited by fork() on Unix. Remaining arguments go to the |
4790 | * subprocess FooMain() routine. |
4791 | */ |
4792 | void |
4793 | SubPostmasterMain(int argc, char *argv[]) |
4794 | { |
4795 | Port port; |
4796 | |
4797 | /* In EXEC_BACKEND case we will not have inherited these settings */ |
4798 | IsPostmasterEnvironment = true; |
4799 | whereToSendOutput = DestNone; |
4800 | |
4801 | /* Setup as postmaster child */ |
4802 | InitPostmasterChild(); |
4803 | |
4804 | /* Setup essential subsystems (to ensure elog() behaves sanely) */ |
4805 | InitializeGUCOptions(); |
4806 | |
4807 | /* Check we got appropriate args */ |
4808 | if (argc < 3) |
4809 | elog(FATAL, "invalid subpostmaster invocation" ); |
4810 | |
4811 | /* Read in the variables file */ |
4812 | memset(&port, 0, sizeof(Port)); |
4813 | read_backend_variables(argv[2], &port); |
4814 | |
4815 | /* Close the postmaster's sockets (as soon as we know them) */ |
4816 | ClosePostmasterPorts(strcmp(argv[1], "--forklog" ) == 0); |
4817 | |
4818 | /* |
4819 | * Set reference point for stack-depth checking |
4820 | */ |
4821 | set_stack_base(); |
4822 | |
4823 | /* |
4824 | * Set up memory area for GSS information. Mirrors the code in ConnCreate |
4825 | * for the non-exec case. |
4826 | */ |
4827 | #if defined(ENABLE_GSS) || defined(ENABLE_SSPI) |
4828 | port.gss = (pg_gssinfo *) calloc(1, sizeof(pg_gssinfo)); |
4829 | if (!port.gss) |
4830 | ereport(FATAL, |
4831 | (errcode(ERRCODE_OUT_OF_MEMORY), |
4832 | errmsg("out of memory" ))); |
4833 | #endif |
4834 | |
4835 | /* |
4836 | * If appropriate, physically re-attach to shared memory segment. We want |
4837 | * to do this before going any further to ensure that we can attach at the |
4838 | * same address the postmaster used. On the other hand, if we choose not |
4839 | * to re-attach, we may have other cleanup to do. |
4840 | * |
4841 | * If testing EXEC_BACKEND on Linux, you should run this as root before |
4842 | * starting the postmaster: |
4843 | * |
4844 | * echo 0 >/proc/sys/kernel/randomize_va_space |
4845 | * |
4846 | * This prevents using randomized stack and code addresses that cause the |
4847 | * child process's memory map to be different from the parent's, making it |
4848 | * sometimes impossible to attach to shared memory at the desired address. |
4849 | * Return the setting to its old value (usually '1' or '2') when finished. |
4850 | */ |
4851 | if (strcmp(argv[1], "--forkbackend" ) == 0 || |
4852 | strcmp(argv[1], "--forkavlauncher" ) == 0 || |
4853 | strcmp(argv[1], "--forkavworker" ) == 0 || |
4854 | strcmp(argv[1], "--forkboot" ) == 0 || |
4855 | strncmp(argv[1], "--forkbgworker=" , 15) == 0) |
4856 | PGSharedMemoryReAttach(); |
4857 | else |
4858 | PGSharedMemoryNoReAttach(); |
4859 | |
4860 | /* autovacuum needs this set before calling InitProcess */ |
4861 | if (strcmp(argv[1], "--forkavlauncher" ) == 0) |
4862 | AutovacuumLauncherIAm(); |
4863 | if (strcmp(argv[1], "--forkavworker" ) == 0) |
4864 | AutovacuumWorkerIAm(); |
4865 | |
4866 | /* |
4867 | * Start our win32 signal implementation. This has to be done after we |
4868 | * read the backend variables, because we need to pick up the signal pipe |
4869 | * from the parent process. |
4870 | */ |
4871 | #ifdef WIN32 |
4872 | pgwin32_signal_initialize(); |
4873 | #endif |
4874 | |
4875 | /* In EXEC_BACKEND case we will not have inherited these settings */ |
4876 | pqinitmask(); |
4877 | PG_SETMASK(&BlockSig); |
4878 | |
4879 | /* Read in remaining GUC variables */ |
4880 | read_nondefault_variables(); |
4881 | |
4882 | /* |
4883 | * Check that the data directory looks valid, which will also check the |
4884 | * privileges on the data directory and update our umask and file/group |
4885 | * variables for creating files later. Note: this should really be done |
4886 | * before we create any files or directories. |
4887 | */ |
4888 | checkDataDir(); |
4889 | |
4890 | /* |
4891 | * (re-)read control file, as it contains config. The postmaster will |
4892 | * already have read this, but this process doesn't know about that. |
4893 | */ |
4894 | LocalProcessControlFile(false); |
4895 | |
4896 | /* |
4897 | * Reload any libraries that were preloaded by the postmaster. Since we |
4898 | * exec'd this process, those libraries didn't come along with us; but we |
4899 | * should load them into all child processes to be consistent with the |
4900 | * non-EXEC_BACKEND behavior. |
4901 | */ |
4902 | process_shared_preload_libraries(); |
4903 | |
4904 | /* Run backend or appropriate child */ |
4905 | if (strcmp(argv[1], "--forkbackend" ) == 0) |
4906 | { |
4907 | Assert(argc == 3); /* shouldn't be any more args */ |
4908 | |
4909 | /* |
4910 | * Need to reinitialize the SSL library in the backend, since the |
4911 | * context structures contain function pointers and cannot be passed |
4912 | * through the parameter file. |
4913 | * |
4914 | * If for some reason reload fails (maybe the user installed broken |
4915 | * key files), soldier on without SSL; that's better than all |
4916 | * connections becoming impossible. |
4917 | * |
4918 | * XXX should we do this in all child processes? For the moment it's |
4919 | * enough to do it in backend children. |
4920 | */ |
4921 | #ifdef USE_SSL |
4922 | if (EnableSSL) |
4923 | { |
4924 | if (secure_initialize(false) == 0) |
4925 | LoadedSSL = true; |
4926 | else |
4927 | ereport(LOG, |
4928 | (errmsg("SSL configuration could not be loaded in child process" ))); |
4929 | } |
4930 | #endif |
4931 | |
4932 | /* |
4933 | * Perform additional initialization and collect startup packet. |
4934 | * |
4935 | * We want to do this before InitProcess() for a couple of reasons: 1. |
4936 | * so that we aren't eating up a PGPROC slot while waiting on the |
4937 | * client. 2. so that if InitProcess() fails due to being out of |
4938 | * PGPROC slots, we have already initialized libpq and are able to |
4939 | * report the error to the client. |
4940 | */ |
4941 | BackendInitialize(&port); |
4942 | |
4943 | /* Restore basic shared memory pointers */ |
4944 | InitShmemAccess(UsedShmemSegAddr); |
4945 | |
4946 | /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */ |
4947 | InitProcess(); |
4948 | |
4949 | /* Attach process to shared data structures */ |
4950 | CreateSharedMemoryAndSemaphores(0); |
4951 | |
4952 | /* And run the backend */ |
4953 | BackendRun(&port); /* does not return */ |
4954 | } |
4955 | if (strcmp(argv[1], "--forkboot" ) == 0) |
4956 | { |
4957 | /* Restore basic shared memory pointers */ |
4958 | InitShmemAccess(UsedShmemSegAddr); |
4959 | |
4960 | /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */ |
4961 | InitAuxiliaryProcess(); |
4962 | |
4963 | /* Attach process to shared data structures */ |
4964 | CreateSharedMemoryAndSemaphores(0); |
4965 | |
4966 | AuxiliaryProcessMain(argc - 2, argv + 2); /* does not return */ |
4967 | } |
4968 | if (strcmp(argv[1], "--forkavlauncher" ) == 0) |
4969 | { |
4970 | /* Restore basic shared memory pointers */ |
4971 | InitShmemAccess(UsedShmemSegAddr); |
4972 | |
4973 | /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */ |
4974 | InitProcess(); |
4975 | |
4976 | /* Attach process to shared data structures */ |
4977 | CreateSharedMemoryAndSemaphores(0); |
4978 | |
4979 | AutoVacLauncherMain(argc - 2, argv + 2); /* does not return */ |
4980 | } |
4981 | if (strcmp(argv[1], "--forkavworker" ) == 0) |
4982 | { |
4983 | /* Restore basic shared memory pointers */ |
4984 | InitShmemAccess(UsedShmemSegAddr); |
4985 | |
4986 | /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */ |
4987 | InitProcess(); |
4988 | |
4989 | /* Attach process to shared data structures */ |
4990 | CreateSharedMemoryAndSemaphores(0); |
4991 | |
4992 | AutoVacWorkerMain(argc - 2, argv + 2); /* does not return */ |
4993 | } |
4994 | if (strncmp(argv[1], "--forkbgworker=" , 15) == 0) |
4995 | { |
4996 | int shmem_slot; |
4997 | |
4998 | /* do this as early as possible; in particular, before InitProcess() */ |
4999 | IsBackgroundWorker = true; |
5000 | |
5001 | /* Restore basic shared memory pointers */ |
5002 | InitShmemAccess(UsedShmemSegAddr); |
5003 | |
5004 | /* Need a PGPROC to run CreateSharedMemoryAndSemaphores */ |
5005 | InitProcess(); |
5006 | |
5007 | /* Attach process to shared data structures */ |
5008 | CreateSharedMemoryAndSemaphores(0); |
5009 | |
5010 | /* Fetch MyBgworkerEntry from shared memory */ |
5011 | shmem_slot = atoi(argv[1] + 15); |
5012 | MyBgworkerEntry = BackgroundWorkerEntry(shmem_slot); |
5013 | |
5014 | StartBackgroundWorker(); |
5015 | } |
5016 | if (strcmp(argv[1], "--forkarch" ) == 0) |
5017 | { |
5018 | /* Do not want to attach to shared memory */ |
5019 | |
5020 | PgArchiverMain(argc, argv); /* does not return */ |
5021 | } |
5022 | if (strcmp(argv[1], "--forkcol" ) == 0) |
5023 | { |
5024 | /* Do not want to attach to shared memory */ |
5025 | |
5026 | PgstatCollectorMain(argc, argv); /* does not return */ |
5027 | } |
5028 | if (strcmp(argv[1], "--forklog" ) == 0) |
5029 | { |
5030 | /* Do not want to attach to shared memory */ |
5031 | |
5032 | SysLoggerMain(argc, argv); /* does not return */ |
5033 | } |
5034 | |
5035 | abort(); /* shouldn't get here */ |
5036 | } |
5037 | #endif /* EXEC_BACKEND */ |
5038 | |
5039 | |
5040 | /* |
5041 | * ExitPostmaster -- cleanup |
5042 | * |
5043 | * Do NOT call exit() directly --- always go through here! |
5044 | */ |
5045 | static void |
5046 | ExitPostmaster(int status) |
5047 | { |
5048 | #ifdef HAVE_PTHREAD_IS_THREADED_NP |
5049 | |
5050 | /* |
5051 | * There is no known cause for a postmaster to become multithreaded after |
5052 | * startup. Recheck to account for the possibility of unknown causes. |
5053 | * This message uses LOG level, because an unclean shutdown at this point |
5054 | * would usually not look much different from a clean shutdown. |
5055 | */ |
5056 | if (pthread_is_threaded_np() != 0) |
5057 | ereport(LOG, |
5058 | (errcode(ERRCODE_INTERNAL_ERROR), |
5059 | errmsg_internal("postmaster became multithreaded" ), |
5060 | errdetail("Please report this to <pgsql-bugs@lists.postgresql.org>." ))); |
5061 | #endif |
5062 | |
5063 | /* should cleanup shared memory and kill all backends */ |
5064 | |
5065 | /* |
5066 | * Not sure of the semantics here. When the Postmaster dies, should the |
5067 | * backends all be killed? probably not. |
5068 | * |
5069 | * MUST -- vadim 05-10-1999 |
5070 | */ |
5071 | |
5072 | proc_exit(status); |
5073 | } |
5074 | |
5075 | /* |
5076 | * sigusr1_handler - handle signal conditions from child processes |
5077 | */ |
5078 | static void |
5079 | sigusr1_handler(SIGNAL_ARGS) |
5080 | { |
5081 | int save_errno = errno; |
5082 | |
5083 | PG_SETMASK(&BlockSig); |
5084 | |
5085 | /* Process background worker state change. */ |
5086 | if (CheckPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE)) |
5087 | { |
5088 | BackgroundWorkerStateChange(); |
5089 | StartWorkerNeeded = true; |
5090 | } |
5091 | |
5092 | /* |
5093 | * RECOVERY_STARTED and BEGIN_HOT_STANDBY signals are ignored in |
5094 | * unexpected states. If the startup process quickly starts up, completes |
5095 | * recovery, exits, we might process the death of the startup process |
5096 | * first. We don't want to go back to recovery in that case. |
5097 | */ |
5098 | if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_STARTED) && |
5099 | pmState == PM_STARTUP && Shutdown == NoShutdown) |
5100 | { |
5101 | /* WAL redo has started. We're out of reinitialization. */ |
5102 | FatalError = false; |
5103 | Assert(AbortStartTime == 0); |
5104 | |
5105 | /* |
5106 | * Crank up the background tasks. It doesn't matter if this fails, |
5107 | * we'll just try again later. |
5108 | */ |
5109 | Assert(CheckpointerPID == 0); |
5110 | CheckpointerPID = StartCheckpointer(); |
5111 | Assert(BgWriterPID == 0); |
5112 | BgWriterPID = StartBackgroundWriter(); |
5113 | |
5114 | /* |
5115 | * Start the archiver if we're responsible for (re-)archiving received |
5116 | * files. |
5117 | */ |
5118 | Assert(PgArchPID == 0); |
5119 | if (XLogArchivingAlways()) |
5120 | PgArchPID = pgarch_start(); |
5121 | |
5122 | /* |
5123 | * If we aren't planning to enter hot standby mode later, treat |
5124 | * RECOVERY_STARTED as meaning we're out of startup, and report status |
5125 | * accordingly. |
5126 | */ |
5127 | if (!EnableHotStandby) |
5128 | { |
5129 | AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STANDBY); |
5130 | #ifdef USE_SYSTEMD |
5131 | sd_notify(0, "READY=1" ); |
5132 | #endif |
5133 | } |
5134 | |
5135 | pmState = PM_RECOVERY; |
5136 | } |
5137 | if (CheckPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY) && |
5138 | pmState == PM_RECOVERY && Shutdown == NoShutdown) |
5139 | { |
5140 | /* |
5141 | * Likewise, start other special children as needed. |
5142 | */ |
5143 | Assert(PgStatPID == 0); |
5144 | PgStatPID = pgstat_start(); |
5145 | |
5146 | ereport(LOG, |
5147 | (errmsg("database system is ready to accept read only connections" ))); |
5148 | |
5149 | /* Report status */ |
5150 | AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_READY); |
5151 | #ifdef USE_SYSTEMD |
5152 | sd_notify(0, "READY=1" ); |
5153 | #endif |
5154 | |
5155 | pmState = PM_HOT_STANDBY; |
5156 | /* Some workers may be scheduled to start now */ |
5157 | StartWorkerNeeded = true; |
5158 | } |
5159 | |
5160 | if (StartWorkerNeeded || HaveCrashedWorker) |
5161 | maybe_start_bgworkers(); |
5162 | |
5163 | if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER) && |
5164 | PgArchPID != 0) |
5165 | { |
5166 | /* |
5167 | * Send SIGUSR1 to archiver process, to wake it up and begin archiving |
5168 | * next WAL file. |
5169 | */ |
5170 | signal_child(PgArchPID, SIGUSR1); |
5171 | } |
5172 | |
5173 | /* Tell syslogger to rotate logfile if requested */ |
5174 | if (SysLoggerPID != 0) |
5175 | { |
5176 | if (CheckLogrotateSignal()) |
5177 | { |
5178 | signal_child(SysLoggerPID, SIGUSR1); |
5179 | RemoveLogrotateSignalFiles(); |
5180 | } |
5181 | else if (CheckPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE)) |
5182 | { |
5183 | signal_child(SysLoggerPID, SIGUSR1); |
5184 | } |
5185 | } |
5186 | |
5187 | if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER) && |
5188 | Shutdown == NoShutdown) |
5189 | { |
5190 | /* |
5191 | * Start one iteration of the autovacuum daemon, even if autovacuuming |
5192 | * is nominally not enabled. This is so we can have an active defense |
5193 | * against transaction ID wraparound. We set a flag for the main loop |
5194 | * to do it rather than trying to do it here --- this is because the |
5195 | * autovac process itself may send the signal, and we want to handle |
5196 | * that by launching another iteration as soon as the current one |
5197 | * completes. |
5198 | */ |
5199 | start_autovac_launcher = true; |
5200 | } |
5201 | |
5202 | if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC_WORKER) && |
5203 | Shutdown == NoShutdown) |
5204 | { |
5205 | /* The autovacuum launcher wants us to start a worker process. */ |
5206 | StartAutovacuumWorker(); |
5207 | } |
5208 | |
5209 | if (CheckPostmasterSignal(PMSIGNAL_START_WALRECEIVER)) |
5210 | { |
5211 | /* Startup Process wants us to start the walreceiver process. */ |
5212 | /* Start immediately if possible, else remember request for later. */ |
5213 | WalReceiverRequested = true; |
5214 | MaybeStartWalReceiver(); |
5215 | } |
5216 | |
5217 | /* |
5218 | * Try to advance postmaster's state machine, if a child requests it. |
5219 | * |
5220 | * Be careful about the order of this action relative to sigusr1_handler's |
5221 | * other actions. Generally, this should be after other actions, in case |
5222 | * they have effects PostmasterStateMachine would need to know about. |
5223 | * However, we should do it before the CheckPromoteSignal step, which |
5224 | * cannot have any (immediate) effect on the state machine, but does |
5225 | * depend on what state we're in now. |
5226 | */ |
5227 | if (CheckPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE)) |
5228 | { |
5229 | PostmasterStateMachine(); |
5230 | } |
5231 | |
5232 | if (StartupPID != 0 && |
5233 | (pmState == PM_STARTUP || pmState == PM_RECOVERY || |
5234 | pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY) && |
5235 | CheckPromoteSignal()) |
5236 | { |
5237 | /* Tell startup process to finish recovery */ |
5238 | signal_child(StartupPID, SIGUSR2); |
5239 | } |
5240 | |
5241 | PG_SETMASK(&UnBlockSig); |
5242 | |
5243 | errno = save_errno; |
5244 | } |
5245 | |
5246 | /* |
5247 | * SIGTERM or SIGQUIT while processing startup packet. |
5248 | * Clean up and exit(1). |
5249 | * |
5250 | * XXX: possible future improvement: try to send a message indicating |
5251 | * why we are disconnecting. Problem is to be sure we don't block while |
5252 | * doing so, nor mess up SSL initialization. In practice, if the client |
5253 | * has wedged here, it probably couldn't do anything with the message anyway. |
5254 | */ |
5255 | static void |
5256 | startup_die(SIGNAL_ARGS) |
5257 | { |
5258 | proc_exit(1); |
5259 | } |
5260 | |
5261 | /* |
5262 | * Dummy signal handler |
5263 | * |
5264 | * We use this for signals that we don't actually use in the postmaster, |
5265 | * but we do use in backends. If we were to SIG_IGN such signals in the |
5266 | * postmaster, then a newly started backend might drop a signal that arrives |
5267 | * before it's able to reconfigure its signal processing. (See notes in |
5268 | * tcop/postgres.c.) |
5269 | */ |
5270 | static void |
5271 | dummy_handler(SIGNAL_ARGS) |
5272 | { |
5273 | } |
5274 | |
5275 | /* |
5276 | * Timeout while processing startup packet. |
5277 | * As for startup_die(), we clean up and exit(1). |
5278 | */ |
5279 | static void |
5280 | StartupPacketTimeoutHandler(void) |
5281 | { |
5282 | proc_exit(1); |
5283 | } |
5284 | |
5285 | |
5286 | /* |
5287 | * Generate a random cancel key. |
5288 | */ |
5289 | static bool |
5290 | RandomCancelKey(int32 *cancel_key) |
5291 | { |
5292 | return pg_strong_random(cancel_key, sizeof(int32)); |
5293 | } |
5294 | |
5295 | /* |
5296 | * Count up number of child processes of specified types (dead_end children |
5297 | * are always excluded). |
5298 | */ |
5299 | static int |
5300 | CountChildren(int target) |
5301 | { |
5302 | dlist_iter iter; |
5303 | int cnt = 0; |
5304 | |
5305 | dlist_foreach(iter, &BackendList) |
5306 | { |
5307 | Backend *bp = dlist_container(Backend, elem, iter.cur); |
5308 | |
5309 | if (bp->dead_end) |
5310 | continue; |
5311 | |
5312 | /* |
5313 | * Since target == BACKEND_TYPE_ALL is the most common case, we test |
5314 | * it first and avoid touching shared memory for every child. |
5315 | */ |
5316 | if (target != BACKEND_TYPE_ALL) |
5317 | { |
5318 | /* |
5319 | * Assign bkend_type for any recently announced WAL Sender |
5320 | * processes. |
5321 | */ |
5322 | if (bp->bkend_type == BACKEND_TYPE_NORMAL && |
5323 | IsPostmasterChildWalSender(bp->child_slot)) |
5324 | bp->bkend_type = BACKEND_TYPE_WALSND; |
5325 | |
5326 | if (!(target & bp->bkend_type)) |
5327 | continue; |
5328 | } |
5329 | |
5330 | cnt++; |
5331 | } |
5332 | return cnt; |
5333 | } |
5334 | |
5335 | |
5336 | /* |
5337 | * StartChildProcess -- start an auxiliary process for the postmaster |
5338 | * |
5339 | * "type" determines what kind of child will be started. All child types |
5340 | * initially go to AuxiliaryProcessMain, which will handle common setup. |
5341 | * |
5342 | * Return value of StartChildProcess is subprocess' PID, or 0 if failed |
5343 | * to start subprocess. |
5344 | */ |
5345 | static pid_t |
5346 | StartChildProcess(AuxProcType type) |
5347 | { |
5348 | pid_t pid; |
5349 | char *av[10]; |
5350 | int ac = 0; |
5351 | char typebuf[32]; |
5352 | |
5353 | /* |
5354 | * Set up command-line arguments for subprocess |
5355 | */ |
5356 | av[ac++] = "postgres" ; |
5357 | |
5358 | #ifdef EXEC_BACKEND |
5359 | av[ac++] = "--forkboot" ; |
5360 | av[ac++] = NULL; /* filled in by postmaster_forkexec */ |
5361 | #endif |
5362 | |
5363 | snprintf(typebuf, sizeof(typebuf), "-x%d" , type); |
5364 | av[ac++] = typebuf; |
5365 | |
5366 | av[ac] = NULL; |
5367 | Assert(ac < lengthof(av)); |
5368 | |
5369 | #ifdef EXEC_BACKEND |
5370 | pid = postmaster_forkexec(ac, av); |
5371 | #else /* !EXEC_BACKEND */ |
5372 | pid = fork_process(); |
5373 | |
5374 | if (pid == 0) /* child */ |
5375 | { |
5376 | InitPostmasterChild(); |
5377 | |
5378 | /* Close the postmaster's sockets */ |
5379 | ClosePostmasterPorts(false); |
5380 | |
5381 | /* Release postmaster's working memory context */ |
5382 | MemoryContextSwitchTo(TopMemoryContext); |
5383 | MemoryContextDelete(PostmasterContext); |
5384 | PostmasterContext = NULL; |
5385 | |
5386 | AuxiliaryProcessMain(ac, av); |
5387 | ExitPostmaster(0); |
5388 | } |
5389 | #endif /* EXEC_BACKEND */ |
5390 | |
5391 | if (pid < 0) |
5392 | { |
5393 | /* in parent, fork failed */ |
5394 | int save_errno = errno; |
5395 | |
5396 | errno = save_errno; |
5397 | switch (type) |
5398 | { |
5399 | case StartupProcess: |
5400 | ereport(LOG, |
5401 | (errmsg("could not fork startup process: %m" ))); |
5402 | break; |
5403 | case BgWriterProcess: |
5404 | ereport(LOG, |
5405 | (errmsg("could not fork background writer process: %m" ))); |
5406 | break; |
5407 | case CheckpointerProcess: |
5408 | ereport(LOG, |
5409 | (errmsg("could not fork checkpointer process: %m" ))); |
5410 | break; |
5411 | case WalWriterProcess: |
5412 | ereport(LOG, |
5413 | (errmsg("could not fork WAL writer process: %m" ))); |
5414 | break; |
5415 | case WalReceiverProcess: |
5416 | ereport(LOG, |
5417 | (errmsg("could not fork WAL receiver process: %m" ))); |
5418 | break; |
5419 | default: |
5420 | ereport(LOG, |
5421 | (errmsg("could not fork process: %m" ))); |
5422 | break; |
5423 | } |
5424 | |
5425 | /* |
5426 | * fork failure is fatal during startup, but there's no need to choke |
5427 | * immediately if starting other child types fails. |
5428 | */ |
5429 | if (type == StartupProcess) |
5430 | ExitPostmaster(1); |
5431 | return 0; |
5432 | } |
5433 | |
5434 | /* |
5435 | * in parent, successful fork |
5436 | */ |
5437 | return pid; |
5438 | } |
5439 | |
5440 | /* |
5441 | * StartAutovacuumWorker |
5442 | * Start an autovac worker process. |
5443 | * |
5444 | * This function is here because it enters the resulting PID into the |
5445 | * postmaster's private backends list. |
5446 | * |
5447 | * NB -- this code very roughly matches BackendStartup. |
5448 | */ |
5449 | static void |
5450 | StartAutovacuumWorker(void) |
5451 | { |
5452 | Backend *bn; |
5453 | |
5454 | /* |
5455 | * If not in condition to run a process, don't try, but handle it like a |
5456 | * fork failure. This does not normally happen, since the signal is only |
5457 | * supposed to be sent by autovacuum launcher when it's OK to do it, but |
5458 | * we have to check to avoid race-condition problems during DB state |
5459 | * changes. |
5460 | */ |
5461 | if (canAcceptConnections() == CAC_OK) |
5462 | { |
5463 | /* |
5464 | * Compute the cancel key that will be assigned to this session. We |
5465 | * probably don't need cancel keys for autovac workers, but we'd |
5466 | * better have something random in the field to prevent unfriendly |
5467 | * people from sending cancels to them. |
5468 | */ |
5469 | if (!RandomCancelKey(&MyCancelKey)) |
5470 | { |
5471 | ereport(LOG, |
5472 | (errcode(ERRCODE_INTERNAL_ERROR), |
5473 | errmsg("could not generate random cancel key" ))); |
5474 | return; |
5475 | } |
5476 | |
5477 | bn = (Backend *) malloc(sizeof(Backend)); |
5478 | if (bn) |
5479 | { |
5480 | bn->cancel_key = MyCancelKey; |
5481 | |
5482 | /* Autovac workers are not dead_end and need a child slot */ |
5483 | bn->dead_end = false; |
5484 | bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot(); |
5485 | bn->bgworker_notify = false; |
5486 | |
5487 | bn->pid = StartAutoVacWorker(); |
5488 | if (bn->pid > 0) |
5489 | { |
5490 | bn->bkend_type = BACKEND_TYPE_AUTOVAC; |
5491 | dlist_push_head(&BackendList, &bn->elem); |
5492 | #ifdef EXEC_BACKEND |
5493 | ShmemBackendArrayAdd(bn); |
5494 | #endif |
5495 | /* all OK */ |
5496 | return; |
5497 | } |
5498 | |
5499 | /* |
5500 | * fork failed, fall through to report -- actual error message was |
5501 | * logged by StartAutoVacWorker |
5502 | */ |
5503 | (void) ReleasePostmasterChildSlot(bn->child_slot); |
5504 | free(bn); |
5505 | } |
5506 | else |
5507 | ereport(LOG, |
5508 | (errcode(ERRCODE_OUT_OF_MEMORY), |
5509 | errmsg("out of memory" ))); |
5510 | } |
5511 | |
5512 | /* |
5513 | * Report the failure to the launcher, if it's running. (If it's not, we |
5514 | * might not even be connected to shared memory, so don't try to call |
5515 | * AutoVacWorkerFailed.) Note that we also need to signal it so that it |
5516 | * responds to the condition, but we don't do that here, instead waiting |
5517 | * for ServerLoop to do it. This way we avoid a ping-pong signalling in |
5518 | * quick succession between the autovac launcher and postmaster in case |
5519 | * things get ugly. |
5520 | */ |
5521 | if (AutoVacPID != 0) |
5522 | { |
5523 | AutoVacWorkerFailed(); |
5524 | avlauncher_needs_signal = true; |
5525 | } |
5526 | } |
5527 | |
5528 | /* |
5529 | * MaybeStartWalReceiver |
5530 | * Start the WAL receiver process, if not running and our state allows. |
5531 | * |
5532 | * Note: if WalReceiverPID is already nonzero, it might seem that we should |
5533 | * clear WalReceiverRequested. However, there's a race condition if the |
5534 | * walreceiver terminates and the startup process immediately requests a new |
5535 | * one: it's quite possible to get the signal for the request before reaping |
5536 | * the dead walreceiver process. Better to risk launching an extra |
5537 | * walreceiver than to miss launching one we need. (The walreceiver code |
5538 | * has logic to recognize that it should go away if not needed.) |
5539 | */ |
5540 | static void |
5541 | MaybeStartWalReceiver(void) |
5542 | { |
5543 | if (WalReceiverPID == 0 && |
5544 | (pmState == PM_STARTUP || pmState == PM_RECOVERY || |
5545 | pmState == PM_HOT_STANDBY || pmState == PM_WAIT_READONLY) && |
5546 | Shutdown == NoShutdown) |
5547 | { |
5548 | WalReceiverPID = StartWalReceiver(); |
5549 | if (WalReceiverPID != 0) |
5550 | WalReceiverRequested = false; |
5551 | /* else leave the flag set, so we'll try again later */ |
5552 | } |
5553 | } |
5554 | |
5555 | |
5556 | /* |
5557 | * Create the opts file |
5558 | */ |
5559 | static bool |
5560 | CreateOptsFile(int argc, char *argv[], char *fullprogname) |
5561 | { |
5562 | FILE *fp; |
5563 | int i; |
5564 | |
5565 | #define OPTS_FILE "postmaster.opts" |
5566 | |
5567 | if ((fp = fopen(OPTS_FILE, "w" )) == NULL) |
5568 | { |
5569 | elog(LOG, "could not create file \"%s\": %m" , OPTS_FILE); |
5570 | return false; |
5571 | } |
5572 | |
5573 | fprintf(fp, "%s" , fullprogname); |
5574 | for (i = 1; i < argc; i++) |
5575 | fprintf(fp, " \"%s\"" , argv[i]); |
5576 | fputs("\n" , fp); |
5577 | |
5578 | if (fclose(fp)) |
5579 | { |
5580 | elog(LOG, "could not write file \"%s\": %m" , OPTS_FILE); |
5581 | return false; |
5582 | } |
5583 | |
5584 | return true; |
5585 | } |
5586 | |
5587 | |
5588 | /* |
5589 | * MaxLivePostmasterChildren |
5590 | * |
5591 | * This reports the number of entries needed in per-child-process arrays |
5592 | * (the PMChildFlags array, and if EXEC_BACKEND the ShmemBackendArray). |
5593 | * These arrays include regular backends, autovac workers, walsenders |
5594 | * and background workers, but not special children nor dead_end children. |
5595 | * This allows the arrays to have a fixed maximum size, to wit the same |
5596 | * too-many-children limit enforced by canAcceptConnections(). The exact value |
5597 | * isn't too critical as long as it's more than MaxBackends. |
5598 | */ |
5599 | int |
5600 | MaxLivePostmasterChildren(void) |
5601 | { |
5602 | return 2 * (MaxConnections + autovacuum_max_workers + 1 + |
5603 | max_wal_senders + max_worker_processes); |
5604 | } |
5605 | |
5606 | /* |
5607 | * Connect background worker to a database. |
5608 | */ |
5609 | void |
5610 | BackgroundWorkerInitializeConnection(const char *dbname, const char *username, uint32 flags) |
5611 | { |
5612 | BackgroundWorker *worker = MyBgworkerEntry; |
5613 | |
5614 | /* XXX is this the right errcode? */ |
5615 | if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)) |
5616 | ereport(FATAL, |
5617 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
5618 | errmsg("database connection requirement not indicated during registration" ))); |
5619 | |
5620 | InitPostgres(dbname, InvalidOid, username, InvalidOid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0); |
5621 | |
5622 | /* it had better not gotten out of "init" mode yet */ |
5623 | if (!IsInitProcessingMode()) |
5624 | ereport(ERROR, |
5625 | (errmsg("invalid processing mode in background worker" ))); |
5626 | SetProcessingMode(NormalProcessing); |
5627 | } |
5628 | |
5629 | /* |
5630 | * Connect background worker to a database using OIDs. |
5631 | */ |
5632 | void |
5633 | BackgroundWorkerInitializeConnectionByOid(Oid dboid, Oid useroid, uint32 flags) |
5634 | { |
5635 | BackgroundWorker *worker = MyBgworkerEntry; |
5636 | |
5637 | /* XXX is this the right errcode? */ |
5638 | if (!(worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)) |
5639 | ereport(FATAL, |
5640 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
5641 | errmsg("database connection requirement not indicated during registration" ))); |
5642 | |
5643 | InitPostgres(NULL, dboid, NULL, useroid, NULL, (flags & BGWORKER_BYPASS_ALLOWCONN) != 0); |
5644 | |
5645 | /* it had better not gotten out of "init" mode yet */ |
5646 | if (!IsInitProcessingMode()) |
5647 | ereport(ERROR, |
5648 | (errmsg("invalid processing mode in background worker" ))); |
5649 | SetProcessingMode(NormalProcessing); |
5650 | } |
5651 | |
5652 | /* |
5653 | * Block/unblock signals in a background worker |
5654 | */ |
5655 | void |
5656 | BackgroundWorkerBlockSignals(void) |
5657 | { |
5658 | PG_SETMASK(&BlockSig); |
5659 | } |
5660 | |
5661 | void |
5662 | BackgroundWorkerUnblockSignals(void) |
5663 | { |
5664 | PG_SETMASK(&UnBlockSig); |
5665 | } |
5666 | |
5667 | #ifdef EXEC_BACKEND |
5668 | static pid_t |
5669 | bgworker_forkexec(int shmem_slot) |
5670 | { |
5671 | char *av[10]; |
5672 | int ac = 0; |
5673 | char forkav[MAXPGPATH]; |
5674 | |
5675 | snprintf(forkav, MAXPGPATH, "--forkbgworker=%d" , shmem_slot); |
5676 | |
5677 | av[ac++] = "postgres" ; |
5678 | av[ac++] = forkav; |
5679 | av[ac++] = NULL; /* filled in by postmaster_forkexec */ |
5680 | av[ac] = NULL; |
5681 | |
5682 | Assert(ac < lengthof(av)); |
5683 | |
5684 | return postmaster_forkexec(ac, av); |
5685 | } |
5686 | #endif |
5687 | |
5688 | /* |
5689 | * Start a new bgworker. |
5690 | * Starting time conditions must have been checked already. |
5691 | * |
5692 | * Returns true on success, false on failure. |
5693 | * In either case, update the RegisteredBgWorker's state appropriately. |
5694 | * |
5695 | * This code is heavily based on autovacuum.c, q.v. |
5696 | */ |
5697 | static bool |
5698 | do_start_bgworker(RegisteredBgWorker *rw) |
5699 | { |
5700 | pid_t worker_pid; |
5701 | |
5702 | Assert(rw->rw_pid == 0); |
5703 | |
5704 | /* |
5705 | * Allocate and assign the Backend element. Note we must do this before |
5706 | * forking, so that we can handle out of memory properly. |
5707 | * |
5708 | * Treat failure as though the worker had crashed. That way, the |
5709 | * postmaster will wait a bit before attempting to start it again; if it |
5710 | * tried again right away, most likely it'd find itself repeating the |
5711 | * out-of-memory or fork failure condition. |
5712 | */ |
5713 | if (!assign_backendlist_entry(rw)) |
5714 | { |
5715 | rw->rw_crashed_at = GetCurrentTimestamp(); |
5716 | return false; |
5717 | } |
5718 | |
5719 | ereport(DEBUG1, |
5720 | (errmsg("starting background worker process \"%s\"" , |
5721 | rw->rw_worker.bgw_name))); |
5722 | |
5723 | #ifdef EXEC_BACKEND |
5724 | switch ((worker_pid = bgworker_forkexec(rw->rw_shmem_slot))) |
5725 | #else |
5726 | switch ((worker_pid = fork_process())) |
5727 | #endif |
5728 | { |
5729 | case -1: |
5730 | /* in postmaster, fork failed ... */ |
5731 | ereport(LOG, |
5732 | (errmsg("could not fork worker process: %m" ))); |
5733 | /* undo what assign_backendlist_entry did */ |
5734 | ReleasePostmasterChildSlot(rw->rw_child_slot); |
5735 | rw->rw_child_slot = 0; |
5736 | free(rw->rw_backend); |
5737 | rw->rw_backend = NULL; |
5738 | /* mark entry as crashed, so we'll try again later */ |
5739 | rw->rw_crashed_at = GetCurrentTimestamp(); |
5740 | break; |
5741 | |
5742 | #ifndef EXEC_BACKEND |
5743 | case 0: |
5744 | /* in postmaster child ... */ |
5745 | InitPostmasterChild(); |
5746 | |
5747 | /* Close the postmaster's sockets */ |
5748 | ClosePostmasterPorts(false); |
5749 | |
5750 | /* |
5751 | * Before blowing away PostmasterContext, save this bgworker's |
5752 | * data where it can find it. |
5753 | */ |
5754 | MyBgworkerEntry = (BackgroundWorker *) |
5755 | MemoryContextAlloc(TopMemoryContext, sizeof(BackgroundWorker)); |
5756 | memcpy(MyBgworkerEntry, &rw->rw_worker, sizeof(BackgroundWorker)); |
5757 | |
5758 | /* Release postmaster's working memory context */ |
5759 | MemoryContextSwitchTo(TopMemoryContext); |
5760 | MemoryContextDelete(PostmasterContext); |
5761 | PostmasterContext = NULL; |
5762 | |
5763 | StartBackgroundWorker(); |
5764 | |
5765 | exit(1); /* should not get here */ |
5766 | break; |
5767 | #endif |
5768 | default: |
5769 | /* in postmaster, fork successful ... */ |
5770 | rw->rw_pid = worker_pid; |
5771 | rw->rw_backend->pid = rw->rw_pid; |
5772 | ReportBackgroundWorkerPID(rw); |
5773 | /* add new worker to lists of backends */ |
5774 | dlist_push_head(&BackendList, &rw->rw_backend->elem); |
5775 | #ifdef EXEC_BACKEND |
5776 | ShmemBackendArrayAdd(rw->rw_backend); |
5777 | #endif |
5778 | return true; |
5779 | } |
5780 | |
5781 | return false; |
5782 | } |
5783 | |
5784 | /* |
5785 | * Does the current postmaster state require starting a worker with the |
5786 | * specified start_time? |
5787 | */ |
5788 | static bool |
5789 | bgworker_should_start_now(BgWorkerStartTime start_time) |
5790 | { |
5791 | switch (pmState) |
5792 | { |
5793 | case PM_NO_CHILDREN: |
5794 | case PM_WAIT_DEAD_END: |
5795 | case PM_SHUTDOWN_2: |
5796 | case PM_SHUTDOWN: |
5797 | case PM_WAIT_BACKENDS: |
5798 | case PM_WAIT_READONLY: |
5799 | case PM_WAIT_BACKUP: |
5800 | break; |
5801 | |
5802 | case PM_RUN: |
5803 | if (start_time == BgWorkerStart_RecoveryFinished) |
5804 | return true; |
5805 | /* fall through */ |
5806 | |
5807 | case PM_HOT_STANDBY: |
5808 | if (start_time == BgWorkerStart_ConsistentState) |
5809 | return true; |
5810 | /* fall through */ |
5811 | |
5812 | case PM_RECOVERY: |
5813 | case PM_STARTUP: |
5814 | case PM_INIT: |
5815 | if (start_time == BgWorkerStart_PostmasterStart) |
5816 | return true; |
5817 | /* fall through */ |
5818 | |
5819 | } |
5820 | |
5821 | return false; |
5822 | } |
5823 | |
5824 | /* |
5825 | * Allocate the Backend struct for a connected background worker, but don't |
5826 | * add it to the list of backends just yet. |
5827 | * |
5828 | * On failure, return false without changing any worker state. |
5829 | * |
5830 | * Some info from the Backend is copied into the passed rw. |
5831 | */ |
5832 | static bool |
5833 | assign_backendlist_entry(RegisteredBgWorker *rw) |
5834 | { |
5835 | Backend *bn; |
5836 | |
5837 | /* |
5838 | * Compute the cancel key that will be assigned to this session. We |
5839 | * probably don't need cancel keys for background workers, but we'd better |
5840 | * have something random in the field to prevent unfriendly people from |
5841 | * sending cancels to them. |
5842 | */ |
5843 | if (!RandomCancelKey(&MyCancelKey)) |
5844 | { |
5845 | ereport(LOG, |
5846 | (errcode(ERRCODE_INTERNAL_ERROR), |
5847 | errmsg("could not generate random cancel key" ))); |
5848 | return false; |
5849 | } |
5850 | |
5851 | bn = malloc(sizeof(Backend)); |
5852 | if (bn == NULL) |
5853 | { |
5854 | ereport(LOG, |
5855 | (errcode(ERRCODE_OUT_OF_MEMORY), |
5856 | errmsg("out of memory" ))); |
5857 | return false; |
5858 | } |
5859 | |
5860 | bn->cancel_key = MyCancelKey; |
5861 | bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot(); |
5862 | bn->bkend_type = BACKEND_TYPE_BGWORKER; |
5863 | bn->dead_end = false; |
5864 | bn->bgworker_notify = false; |
5865 | |
5866 | rw->rw_backend = bn; |
5867 | rw->rw_child_slot = bn->child_slot; |
5868 | |
5869 | return true; |
5870 | } |
5871 | |
5872 | /* |
5873 | * If the time is right, start background worker(s). |
5874 | * |
5875 | * As a side effect, the bgworker control variables are set or reset |
5876 | * depending on whether more workers may need to be started. |
5877 | * |
5878 | * We limit the number of workers started per call, to avoid consuming the |
5879 | * postmaster's attention for too long when many such requests are pending. |
5880 | * As long as StartWorkerNeeded is true, ServerLoop will not block and will |
5881 | * call this function again after dealing with any other issues. |
5882 | */ |
5883 | static void |
5884 | maybe_start_bgworkers(void) |
5885 | { |
5886 | #define MAX_BGWORKERS_TO_LAUNCH 100 |
5887 | int num_launched = 0; |
5888 | TimestampTz now = 0; |
5889 | slist_mutable_iter iter; |
5890 | |
5891 | /* |
5892 | * During crash recovery, we have no need to be called until the state |
5893 | * transition out of recovery. |
5894 | */ |
5895 | if (FatalError) |
5896 | { |
5897 | StartWorkerNeeded = false; |
5898 | HaveCrashedWorker = false; |
5899 | return; |
5900 | } |
5901 | |
5902 | /* Don't need to be called again unless we find a reason for it below */ |
5903 | StartWorkerNeeded = false; |
5904 | HaveCrashedWorker = false; |
5905 | |
5906 | slist_foreach_modify(iter, &BackgroundWorkerList) |
5907 | { |
5908 | RegisteredBgWorker *rw; |
5909 | |
5910 | rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur); |
5911 | |
5912 | /* ignore if already running */ |
5913 | if (rw->rw_pid != 0) |
5914 | continue; |
5915 | |
5916 | /* if marked for death, clean up and remove from list */ |
5917 | if (rw->rw_terminate) |
5918 | { |
5919 | ForgetBackgroundWorker(&iter); |
5920 | continue; |
5921 | } |
5922 | |
5923 | /* |
5924 | * If this worker has crashed previously, maybe it needs to be |
5925 | * restarted (unless on registration it specified it doesn't want to |
5926 | * be restarted at all). Check how long ago did a crash last happen. |
5927 | * If the last crash is too recent, don't start it right away; let it |
5928 | * be restarted once enough time has passed. |
5929 | */ |
5930 | if (rw->rw_crashed_at != 0) |
5931 | { |
5932 | if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART) |
5933 | { |
5934 | int notify_pid; |
5935 | |
5936 | notify_pid = rw->rw_worker.bgw_notify_pid; |
5937 | |
5938 | ForgetBackgroundWorker(&iter); |
5939 | |
5940 | /* Report worker is gone now. */ |
5941 | if (notify_pid != 0) |
5942 | kill(notify_pid, SIGUSR1); |
5943 | |
5944 | continue; |
5945 | } |
5946 | |
5947 | /* read system time only when needed */ |
5948 | if (now == 0) |
5949 | now = GetCurrentTimestamp(); |
5950 | |
5951 | if (!TimestampDifferenceExceeds(rw->rw_crashed_at, now, |
5952 | rw->rw_worker.bgw_restart_time * 1000)) |
5953 | { |
5954 | /* Set flag to remember that we have workers to start later */ |
5955 | HaveCrashedWorker = true; |
5956 | continue; |
5957 | } |
5958 | } |
5959 | |
5960 | if (bgworker_should_start_now(rw->rw_worker.bgw_start_time)) |
5961 | { |
5962 | /* reset crash time before trying to start worker */ |
5963 | rw->rw_crashed_at = 0; |
5964 | |
5965 | /* |
5966 | * Try to start the worker. |
5967 | * |
5968 | * On failure, give up processing workers for now, but set |
5969 | * StartWorkerNeeded so we'll come back here on the next iteration |
5970 | * of ServerLoop to try again. (We don't want to wait, because |
5971 | * there might be additional ready-to-run workers.) We could set |
5972 | * HaveCrashedWorker as well, since this worker is now marked |
5973 | * crashed, but there's no need because the next run of this |
5974 | * function will do that. |
5975 | */ |
5976 | if (!do_start_bgworker(rw)) |
5977 | { |
5978 | StartWorkerNeeded = true; |
5979 | return; |
5980 | } |
5981 | |
5982 | /* |
5983 | * If we've launched as many workers as allowed, quit, but have |
5984 | * ServerLoop call us again to look for additional ready-to-run |
5985 | * workers. There might not be any, but we'll find out the next |
5986 | * time we run. |
5987 | */ |
5988 | if (++num_launched >= MAX_BGWORKERS_TO_LAUNCH) |
5989 | { |
5990 | StartWorkerNeeded = true; |
5991 | return; |
5992 | } |
5993 | } |
5994 | } |
5995 | } |
5996 | |
5997 | /* |
5998 | * When a backend asks to be notified about worker state changes, we |
5999 | * set a flag in its backend entry. The background worker machinery needs |
6000 | * to know when such backends exit. |
6001 | */ |
6002 | bool |
6003 | PostmasterMarkPIDForWorkerNotify(int pid) |
6004 | { |
6005 | dlist_iter iter; |
6006 | Backend *bp; |
6007 | |
6008 | dlist_foreach(iter, &BackendList) |
6009 | { |
6010 | bp = dlist_container(Backend, elem, iter.cur); |
6011 | if (bp->pid == pid) |
6012 | { |
6013 | bp->bgworker_notify = true; |
6014 | return true; |
6015 | } |
6016 | } |
6017 | return false; |
6018 | } |
6019 | |
6020 | #ifdef EXEC_BACKEND |
6021 | |
6022 | /* |
6023 | * The following need to be available to the save/restore_backend_variables |
6024 | * functions. They are marked NON_EXEC_STATIC in their home modules. |
6025 | */ |
6026 | extern slock_t *ShmemLock; |
6027 | extern slock_t *ProcStructLock; |
6028 | extern PGPROC *AuxiliaryProcs; |
6029 | extern PMSignalData *PMSignalState; |
6030 | extern pgsocket pgStatSock; |
6031 | extern pg_time_t first_syslogger_file_time; |
6032 | |
6033 | #ifndef WIN32 |
6034 | #define write_inheritable_socket(dest, src, childpid) ((*(dest) = (src)), true) |
6035 | #define read_inheritable_socket(dest, src) (*(dest) = *(src)) |
6036 | #else |
6037 | static bool write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE child); |
6038 | static bool write_inheritable_socket(InheritableSocket *dest, SOCKET src, |
6039 | pid_t childPid); |
6040 | static void read_inheritable_socket(SOCKET *dest, InheritableSocket *src); |
6041 | #endif |
6042 | |
6043 | |
6044 | /* Save critical backend variables into the BackendParameters struct */ |
6045 | #ifndef WIN32 |
6046 | static bool |
6047 | save_backend_variables(BackendParameters *param, Port *port) |
6048 | #else |
6049 | static bool |
6050 | save_backend_variables(BackendParameters *param, Port *port, |
6051 | HANDLE childProcess, pid_t childPid) |
6052 | #endif |
6053 | { |
6054 | memcpy(¶m->port, port, sizeof(Port)); |
6055 | if (!write_inheritable_socket(¶m->portsocket, port->sock, childPid)) |
6056 | return false; |
6057 | |
6058 | strlcpy(param->DataDir, DataDir, MAXPGPATH); |
6059 | |
6060 | memcpy(¶m->ListenSocket, &ListenSocket, sizeof(ListenSocket)); |
6061 | |
6062 | param->MyCancelKey = MyCancelKey; |
6063 | param->MyPMChildSlot = MyPMChildSlot; |
6064 | |
6065 | #ifdef WIN32 |
6066 | param->ShmemProtectiveRegion = ShmemProtectiveRegion; |
6067 | #endif |
6068 | param->UsedShmemSegID = UsedShmemSegID; |
6069 | param->UsedShmemSegAddr = UsedShmemSegAddr; |
6070 | |
6071 | param->ShmemLock = ShmemLock; |
6072 | param->ShmemVariableCache = ShmemVariableCache; |
6073 | param->ShmemBackendArray = ShmemBackendArray; |
6074 | |
6075 | #ifndef HAVE_SPINLOCKS |
6076 | param->SpinlockSemaArray = SpinlockSemaArray; |
6077 | #endif |
6078 | param->NamedLWLockTrancheRequests = NamedLWLockTrancheRequests; |
6079 | param->NamedLWLockTrancheArray = NamedLWLockTrancheArray; |
6080 | param->MainLWLockArray = MainLWLockArray; |
6081 | param->ProcStructLock = ProcStructLock; |
6082 | param->ProcGlobal = ProcGlobal; |
6083 | param->AuxiliaryProcs = AuxiliaryProcs; |
6084 | param->PreparedXactProcs = PreparedXactProcs; |
6085 | param->PMSignalState = PMSignalState; |
6086 | if (!write_inheritable_socket(¶m->pgStatSock, pgStatSock, childPid)) |
6087 | return false; |
6088 | |
6089 | param->PostmasterPid = PostmasterPid; |
6090 | param->PgStartTime = PgStartTime; |
6091 | param->PgReloadTime = PgReloadTime; |
6092 | param->first_syslogger_file_time = first_syslogger_file_time; |
6093 | |
6094 | param->redirection_done = redirection_done; |
6095 | param->IsBinaryUpgrade = IsBinaryUpgrade; |
6096 | param->max_safe_fds = max_safe_fds; |
6097 | |
6098 | param->MaxBackends = MaxBackends; |
6099 | |
6100 | #ifdef WIN32 |
6101 | param->PostmasterHandle = PostmasterHandle; |
6102 | if (!write_duplicated_handle(¶m->initial_signal_pipe, |
6103 | pgwin32_create_signal_listener(childPid), |
6104 | childProcess)) |
6105 | return false; |
6106 | #else |
6107 | memcpy(¶m->postmaster_alive_fds, &postmaster_alive_fds, |
6108 | sizeof(postmaster_alive_fds)); |
6109 | #endif |
6110 | |
6111 | memcpy(¶m->syslogPipe, &syslogPipe, sizeof(syslogPipe)); |
6112 | |
6113 | strlcpy(param->my_exec_path, my_exec_path, MAXPGPATH); |
6114 | |
6115 | strlcpy(param->pkglib_path, pkglib_path, MAXPGPATH); |
6116 | |
6117 | strlcpy(param->ExtraOptions, ExtraOptions, MAXPGPATH); |
6118 | |
6119 | return true; |
6120 | } |
6121 | |
6122 | |
6123 | #ifdef WIN32 |
6124 | /* |
6125 | * Duplicate a handle for usage in a child process, and write the child |
6126 | * process instance of the handle to the parameter file. |
6127 | */ |
6128 | static bool |
6129 | write_duplicated_handle(HANDLE *dest, HANDLE src, HANDLE childProcess) |
6130 | { |
6131 | HANDLE hChild = INVALID_HANDLE_VALUE; |
6132 | |
6133 | if (!DuplicateHandle(GetCurrentProcess(), |
6134 | src, |
6135 | childProcess, |
6136 | &hChild, |
6137 | 0, |
6138 | TRUE, |
6139 | DUPLICATE_CLOSE_SOURCE | DUPLICATE_SAME_ACCESS)) |
6140 | { |
6141 | ereport(LOG, |
6142 | (errmsg_internal("could not duplicate handle to be written to backend parameter file: error code %lu" , |
6143 | GetLastError()))); |
6144 | return false; |
6145 | } |
6146 | |
6147 | *dest = hChild; |
6148 | return true; |
6149 | } |
6150 | |
6151 | /* |
6152 | * Duplicate a socket for usage in a child process, and write the resulting |
6153 | * structure to the parameter file. |
6154 | * This is required because a number of LSPs (Layered Service Providers) very |
6155 | * common on Windows (antivirus, firewalls, download managers etc) break |
6156 | * straight socket inheritance. |
6157 | */ |
6158 | static bool |
6159 | write_inheritable_socket(InheritableSocket *dest, SOCKET src, pid_t childpid) |
6160 | { |
6161 | dest->origsocket = src; |
6162 | if (src != 0 && src != PGINVALID_SOCKET) |
6163 | { |
6164 | /* Actual socket */ |
6165 | if (WSADuplicateSocket(src, childpid, &dest->wsainfo) != 0) |
6166 | { |
6167 | ereport(LOG, |
6168 | (errmsg("could not duplicate socket %d for use in backend: error code %d" , |
6169 | (int) src, WSAGetLastError()))); |
6170 | return false; |
6171 | } |
6172 | } |
6173 | return true; |
6174 | } |
6175 | |
6176 | /* |
6177 | * Read a duplicate socket structure back, and get the socket descriptor. |
6178 | */ |
6179 | static void |
6180 | read_inheritable_socket(SOCKET *dest, InheritableSocket *src) |
6181 | { |
6182 | SOCKET s; |
6183 | |
6184 | if (src->origsocket == PGINVALID_SOCKET || src->origsocket == 0) |
6185 | { |
6186 | /* Not a real socket! */ |
6187 | *dest = src->origsocket; |
6188 | } |
6189 | else |
6190 | { |
6191 | /* Actual socket, so create from structure */ |
6192 | s = WSASocket(FROM_PROTOCOL_INFO, |
6193 | FROM_PROTOCOL_INFO, |
6194 | FROM_PROTOCOL_INFO, |
6195 | &src->wsainfo, |
6196 | 0, |
6197 | 0); |
6198 | if (s == INVALID_SOCKET) |
6199 | { |
6200 | write_stderr("could not create inherited socket: error code %d\n" , |
6201 | WSAGetLastError()); |
6202 | exit(1); |
6203 | } |
6204 | *dest = s; |
6205 | |
6206 | /* |
6207 | * To make sure we don't get two references to the same socket, close |
6208 | * the original one. (This would happen when inheritance actually |
6209 | * works.. |
6210 | */ |
6211 | closesocket(src->origsocket); |
6212 | } |
6213 | } |
6214 | #endif |
6215 | |
6216 | static void |
6217 | read_backend_variables(char *id, Port *port) |
6218 | { |
6219 | BackendParameters param; |
6220 | |
6221 | #ifndef WIN32 |
6222 | /* Non-win32 implementation reads from file */ |
6223 | FILE *fp; |
6224 | |
6225 | /* Open file */ |
6226 | fp = AllocateFile(id, PG_BINARY_R); |
6227 | if (!fp) |
6228 | { |
6229 | write_stderr("could not open backend variables file \"%s\": %s\n" , |
6230 | id, strerror(errno)); |
6231 | exit(1); |
6232 | } |
6233 | |
6234 | if (fread(¶m, sizeof(param), 1, fp) != 1) |
6235 | { |
6236 | write_stderr("could not read from backend variables file \"%s\": %s\n" , |
6237 | id, strerror(errno)); |
6238 | exit(1); |
6239 | } |
6240 | |
6241 | /* Release file */ |
6242 | FreeFile(fp); |
6243 | if (unlink(id) != 0) |
6244 | { |
6245 | write_stderr("could not remove file \"%s\": %s\n" , |
6246 | id, strerror(errno)); |
6247 | exit(1); |
6248 | } |
6249 | #else |
6250 | /* Win32 version uses mapped file */ |
6251 | HANDLE paramHandle; |
6252 | BackendParameters *paramp; |
6253 | |
6254 | #ifdef _WIN64 |
6255 | paramHandle = (HANDLE) _atoi64(id); |
6256 | #else |
6257 | paramHandle = (HANDLE) atol(id); |
6258 | #endif |
6259 | paramp = MapViewOfFile(paramHandle, FILE_MAP_READ, 0, 0, 0); |
6260 | if (!paramp) |
6261 | { |
6262 | write_stderr("could not map view of backend variables: error code %lu\n" , |
6263 | GetLastError()); |
6264 | exit(1); |
6265 | } |
6266 | |
6267 | memcpy(¶m, paramp, sizeof(BackendParameters)); |
6268 | |
6269 | if (!UnmapViewOfFile(paramp)) |
6270 | { |
6271 | write_stderr("could not unmap view of backend variables: error code %lu\n" , |
6272 | GetLastError()); |
6273 | exit(1); |
6274 | } |
6275 | |
6276 | if (!CloseHandle(paramHandle)) |
6277 | { |
6278 | write_stderr("could not close handle to backend parameter variables: error code %lu\n" , |
6279 | GetLastError()); |
6280 | exit(1); |
6281 | } |
6282 | #endif |
6283 | |
6284 | restore_backend_variables(¶m, port); |
6285 | } |
6286 | |
6287 | /* Restore critical backend variables from the BackendParameters struct */ |
6288 | static void |
6289 | restore_backend_variables(BackendParameters *param, Port *port) |
6290 | { |
6291 | memcpy(port, ¶m->port, sizeof(Port)); |
6292 | read_inheritable_socket(&port->sock, ¶m->portsocket); |
6293 | |
6294 | SetDataDir(param->DataDir); |
6295 | |
6296 | memcpy(&ListenSocket, ¶m->ListenSocket, sizeof(ListenSocket)); |
6297 | |
6298 | MyCancelKey = param->MyCancelKey; |
6299 | MyPMChildSlot = param->MyPMChildSlot; |
6300 | |
6301 | #ifdef WIN32 |
6302 | ShmemProtectiveRegion = param->ShmemProtectiveRegion; |
6303 | #endif |
6304 | UsedShmemSegID = param->UsedShmemSegID; |
6305 | UsedShmemSegAddr = param->UsedShmemSegAddr; |
6306 | |
6307 | ShmemLock = param->ShmemLock; |
6308 | ShmemVariableCache = param->ShmemVariableCache; |
6309 | ShmemBackendArray = param->ShmemBackendArray; |
6310 | |
6311 | #ifndef HAVE_SPINLOCKS |
6312 | SpinlockSemaArray = param->SpinlockSemaArray; |
6313 | #endif |
6314 | NamedLWLockTrancheRequests = param->NamedLWLockTrancheRequests; |
6315 | NamedLWLockTrancheArray = param->NamedLWLockTrancheArray; |
6316 | MainLWLockArray = param->MainLWLockArray; |
6317 | ProcStructLock = param->ProcStructLock; |
6318 | ProcGlobal = param->ProcGlobal; |
6319 | AuxiliaryProcs = param->AuxiliaryProcs; |
6320 | PreparedXactProcs = param->PreparedXactProcs; |
6321 | PMSignalState = param->PMSignalState; |
6322 | read_inheritable_socket(&pgStatSock, ¶m->pgStatSock); |
6323 | |
6324 | PostmasterPid = param->PostmasterPid; |
6325 | PgStartTime = param->PgStartTime; |
6326 | PgReloadTime = param->PgReloadTime; |
6327 | first_syslogger_file_time = param->first_syslogger_file_time; |
6328 | |
6329 | redirection_done = param->redirection_done; |
6330 | IsBinaryUpgrade = param->IsBinaryUpgrade; |
6331 | max_safe_fds = param->max_safe_fds; |
6332 | |
6333 | MaxBackends = param->MaxBackends; |
6334 | |
6335 | #ifdef WIN32 |
6336 | PostmasterHandle = param->PostmasterHandle; |
6337 | pgwin32_initial_signal_pipe = param->initial_signal_pipe; |
6338 | #else |
6339 | memcpy(&postmaster_alive_fds, ¶m->postmaster_alive_fds, |
6340 | sizeof(postmaster_alive_fds)); |
6341 | #endif |
6342 | |
6343 | memcpy(&syslogPipe, ¶m->syslogPipe, sizeof(syslogPipe)); |
6344 | |
6345 | strlcpy(my_exec_path, param->my_exec_path, MAXPGPATH); |
6346 | |
6347 | strlcpy(pkglib_path, param->pkglib_path, MAXPGPATH); |
6348 | |
6349 | strlcpy(ExtraOptions, param->ExtraOptions, MAXPGPATH); |
6350 | } |
6351 | |
6352 | |
6353 | Size |
6354 | ShmemBackendArraySize(void) |
6355 | { |
6356 | return mul_size(MaxLivePostmasterChildren(), sizeof(Backend)); |
6357 | } |
6358 | |
6359 | void |
6360 | ShmemBackendArrayAllocation(void) |
6361 | { |
6362 | Size size = ShmemBackendArraySize(); |
6363 | |
6364 | ShmemBackendArray = (Backend *) ShmemAlloc(size); |
6365 | /* Mark all slots as empty */ |
6366 | memset(ShmemBackendArray, 0, size); |
6367 | } |
6368 | |
6369 | static void |
6370 | ShmemBackendArrayAdd(Backend *bn) |
6371 | { |
6372 | /* The array slot corresponding to my PMChildSlot should be free */ |
6373 | int i = bn->child_slot - 1; |
6374 | |
6375 | Assert(ShmemBackendArray[i].pid == 0); |
6376 | ShmemBackendArray[i] = *bn; |
6377 | } |
6378 | |
6379 | static void |
6380 | ShmemBackendArrayRemove(Backend *bn) |
6381 | { |
6382 | int i = bn->child_slot - 1; |
6383 | |
6384 | Assert(ShmemBackendArray[i].pid == bn->pid); |
6385 | /* Mark the slot as empty */ |
6386 | ShmemBackendArray[i].pid = 0; |
6387 | } |
6388 | #endif /* EXEC_BACKEND */ |
6389 | |
6390 | |
6391 | #ifdef WIN32 |
6392 | |
6393 | /* |
6394 | * Subset implementation of waitpid() for Windows. We assume pid is -1 |
6395 | * (that is, check all child processes) and options is WNOHANG (don't wait). |
6396 | */ |
6397 | static pid_t |
6398 | waitpid(pid_t pid, int *exitstatus, int options) |
6399 | { |
6400 | DWORD dwd; |
6401 | ULONG_PTR key; |
6402 | OVERLAPPED *ovl; |
6403 | |
6404 | /* |
6405 | * Check if there are any dead children. If there are, return the pid of |
6406 | * the first one that died. |
6407 | */ |
6408 | if (GetQueuedCompletionStatus(win32ChildQueue, &dwd, &key, &ovl, 0)) |
6409 | { |
6410 | *exitstatus = (int) key; |
6411 | return dwd; |
6412 | } |
6413 | |
6414 | return -1; |
6415 | } |
6416 | |
6417 | /* |
6418 | * Note! Code below executes on a thread pool! All operations must |
6419 | * be thread safe! Note that elog() and friends must *not* be used. |
6420 | */ |
6421 | static void WINAPI |
6422 | pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired) |
6423 | { |
6424 | win32_deadchild_waitinfo *childinfo = (win32_deadchild_waitinfo *) lpParameter; |
6425 | DWORD exitcode; |
6426 | |
6427 | if (TimerOrWaitFired) |
6428 | return; /* timeout. Should never happen, since we use |
6429 | * INFINITE as timeout value. */ |
6430 | |
6431 | /* |
6432 | * Remove handle from wait - required even though it's set to wait only |
6433 | * once |
6434 | */ |
6435 | UnregisterWaitEx(childinfo->waitHandle, NULL); |
6436 | |
6437 | if (!GetExitCodeProcess(childinfo->procHandle, &exitcode)) |
6438 | { |
6439 | /* |
6440 | * Should never happen. Inform user and set a fixed exitcode. |
6441 | */ |
6442 | write_stderr("could not read exit code for process\n" ); |
6443 | exitcode = 255; |
6444 | } |
6445 | |
6446 | if (!PostQueuedCompletionStatus(win32ChildQueue, childinfo->procId, (ULONG_PTR) exitcode, NULL)) |
6447 | write_stderr("could not post child completion status\n" ); |
6448 | |
6449 | /* |
6450 | * Handle is per-process, so we close it here instead of in the |
6451 | * originating thread |
6452 | */ |
6453 | CloseHandle(childinfo->procHandle); |
6454 | |
6455 | /* |
6456 | * Free struct that was allocated before the call to |
6457 | * RegisterWaitForSingleObject() |
6458 | */ |
6459 | free(childinfo); |
6460 | |
6461 | /* Queue SIGCHLD signal */ |
6462 | pg_queue_signal(SIGCHLD); |
6463 | } |
6464 | #endif /* WIN32 */ |
6465 | |
6466 | /* |
6467 | * Initialize one and only handle for monitoring postmaster death. |
6468 | * |
6469 | * Called once in the postmaster, so that child processes can subsequently |
6470 | * monitor if their parent is dead. |
6471 | */ |
6472 | static void |
6473 | InitPostmasterDeathWatchHandle(void) |
6474 | { |
6475 | #ifndef WIN32 |
6476 | |
6477 | /* |
6478 | * Create a pipe. Postmaster holds the write end of the pipe open |
6479 | * (POSTMASTER_FD_OWN), and children hold the read end. Children can pass |
6480 | * the read file descriptor to select() to wake up in case postmaster |
6481 | * dies, or check for postmaster death with a (read() == 0). Children must |
6482 | * close the write end as soon as possible after forking, because EOF |
6483 | * won't be signaled in the read end until all processes have closed the |
6484 | * write fd. That is taken care of in ClosePostmasterPorts(). |
6485 | */ |
6486 | Assert(MyProcPid == PostmasterPid); |
6487 | if (pipe(postmaster_alive_fds) < 0) |
6488 | ereport(FATAL, |
6489 | (errcode_for_file_access(), |
6490 | errmsg_internal("could not create pipe to monitor postmaster death: %m" ))); |
6491 | |
6492 | /* |
6493 | * Set O_NONBLOCK to allow testing for the fd's presence with a read() |
6494 | * call. |
6495 | */ |
6496 | if (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFL, O_NONBLOCK) == -1) |
6497 | ereport(FATAL, |
6498 | (errcode_for_socket_access(), |
6499 | errmsg_internal("could not set postmaster death monitoring pipe to nonblocking mode: %m" ))); |
6500 | #else |
6501 | |
6502 | /* |
6503 | * On Windows, we use a process handle for the same purpose. |
6504 | */ |
6505 | if (DuplicateHandle(GetCurrentProcess(), |
6506 | GetCurrentProcess(), |
6507 | GetCurrentProcess(), |
6508 | &PostmasterHandle, |
6509 | 0, |
6510 | TRUE, |
6511 | DUPLICATE_SAME_ACCESS) == 0) |
6512 | ereport(FATAL, |
6513 | (errmsg_internal("could not duplicate postmaster handle: error code %lu" , |
6514 | GetLastError()))); |
6515 | #endif /* WIN32 */ |
6516 | } |
6517 | |