1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * pmsignal.c |
4 | * routines for signaling the postmaster from its child processes |
5 | * |
6 | * |
7 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
8 | * Portions Copyright (c) 1994, Regents of the University of California |
9 | * |
10 | * IDENTIFICATION |
11 | * src/backend/storage/ipc/pmsignal.c |
12 | * |
13 | *------------------------------------------------------------------------- |
14 | */ |
15 | #include "postgres.h" |
16 | |
17 | #include <signal.h> |
18 | #include <unistd.h> |
19 | |
20 | #ifdef HAVE_SYS_PRCTL_H |
21 | #include <sys/prctl.h> |
22 | #endif |
23 | |
24 | #include "miscadmin.h" |
25 | #include "postmaster/postmaster.h" |
26 | #include "replication/walsender.h" |
27 | #include "storage/pmsignal.h" |
28 | #include "storage/shmem.h" |
29 | |
30 | |
31 | /* |
32 | * The postmaster is signaled by its children by sending SIGUSR1. The |
33 | * specific reason is communicated via flags in shared memory. We keep |
34 | * a boolean flag for each possible "reason", so that different reasons |
35 | * can be signaled by different backends at the same time. (However, |
36 | * if the same reason is signaled more than once simultaneously, the |
37 | * postmaster will observe it only once.) |
38 | * |
39 | * The flags are actually declared as "volatile sig_atomic_t" for maximum |
40 | * portability. This should ensure that loads and stores of the flag |
41 | * values are atomic, allowing us to dispense with any explicit locking. |
42 | * |
43 | * In addition to the per-reason flags, we store a set of per-child-process |
44 | * flags that are currently used only for detecting whether a backend has |
45 | * exited without performing proper shutdown. The per-child-process flags |
46 | * have three possible states: UNUSED, ASSIGNED, ACTIVE. An UNUSED slot is |
47 | * available for assignment. An ASSIGNED slot is associated with a postmaster |
48 | * child process, but either the process has not touched shared memory yet, |
49 | * or it has successfully cleaned up after itself. A ACTIVE slot means the |
50 | * process is actively using shared memory. The slots are assigned to |
51 | * child processes at random, and postmaster.c is responsible for tracking |
52 | * which one goes with which PID. |
53 | * |
54 | * Actually there is a fourth state, WALSENDER. This is just like ACTIVE, |
55 | * but carries the extra information that the child is a WAL sender. |
56 | * WAL senders too start in ACTIVE state, but switch to WALSENDER once they |
57 | * start streaming the WAL (and they never go back to ACTIVE after that). |
58 | */ |
59 | |
60 | #define PM_CHILD_UNUSED 0 /* these values must fit in sig_atomic_t */ |
61 | #define PM_CHILD_ASSIGNED 1 |
62 | #define PM_CHILD_ACTIVE 2 |
63 | #define PM_CHILD_WALSENDER 3 |
64 | |
65 | /* "typedef struct PMSignalData PMSignalData" appears in pmsignal.h */ |
66 | struct PMSignalData |
67 | { |
68 | /* per-reason flags */ |
69 | sig_atomic_t PMSignalFlags[NUM_PMSIGNALS]; |
70 | /* per-child-process flags */ |
71 | int num_child_flags; /* # of entries in PMChildFlags[] */ |
72 | int next_child_flag; /* next slot to try to assign */ |
73 | sig_atomic_t PMChildFlags[FLEXIBLE_ARRAY_MEMBER]; |
74 | }; |
75 | |
76 | NON_EXEC_STATIC volatile PMSignalData *PMSignalState = NULL; |
77 | |
78 | /* |
79 | * Signal handler to be notified if postmaster dies. |
80 | */ |
81 | #ifdef USE_POSTMASTER_DEATH_SIGNAL |
82 | volatile sig_atomic_t postmaster_possibly_dead = false; |
83 | |
84 | static void |
85 | postmaster_death_handler(int signo) |
86 | { |
87 | postmaster_possibly_dead = true; |
88 | } |
89 | |
90 | /* |
91 | * The available signals depend on the OS. SIGUSR1 and SIGUSR2 are already |
92 | * used for other things, so choose another one. |
93 | * |
94 | * Currently, we assume that we can always find a signal to use. That |
95 | * seems like a reasonable assumption for all platforms that are modern |
96 | * enough to have a parent-death signaling mechanism. |
97 | */ |
98 | #if defined(SIGINFO) |
99 | #define POSTMASTER_DEATH_SIGNAL SIGINFO |
100 | #elif defined(SIGPWR) |
101 | #define POSTMASTER_DEATH_SIGNAL SIGPWR |
102 | #else |
103 | #error "cannot find a signal to use for postmaster death" |
104 | #endif |
105 | |
106 | #endif /* USE_POSTMASTER_DEATH_SIGNAL */ |
107 | |
108 | /* |
109 | * PMSignalShmemSize |
110 | * Compute space needed for pmsignal.c's shared memory |
111 | */ |
112 | Size |
113 | PMSignalShmemSize(void) |
114 | { |
115 | Size size; |
116 | |
117 | size = offsetof(PMSignalData, PMChildFlags); |
118 | size = add_size(size, mul_size(MaxLivePostmasterChildren(), |
119 | sizeof(sig_atomic_t))); |
120 | |
121 | return size; |
122 | } |
123 | |
124 | /* |
125 | * PMSignalShmemInit - initialize during shared-memory creation |
126 | */ |
127 | void |
128 | PMSignalShmemInit(void) |
129 | { |
130 | bool found; |
131 | |
132 | PMSignalState = (PMSignalData *) |
133 | ShmemInitStruct("PMSignalState" , PMSignalShmemSize(), &found); |
134 | |
135 | if (!found) |
136 | { |
137 | MemSet(unvolatize(PMSignalData *, PMSignalState), 0, PMSignalShmemSize()); |
138 | PMSignalState->num_child_flags = MaxLivePostmasterChildren(); |
139 | } |
140 | } |
141 | |
142 | /* |
143 | * SendPostmasterSignal - signal the postmaster from a child process |
144 | */ |
145 | void |
146 | SendPostmasterSignal(PMSignalReason reason) |
147 | { |
148 | /* If called in a standalone backend, do nothing */ |
149 | if (!IsUnderPostmaster) |
150 | return; |
151 | /* Atomically set the proper flag */ |
152 | PMSignalState->PMSignalFlags[reason] = true; |
153 | /* Send signal to postmaster */ |
154 | kill(PostmasterPid, SIGUSR1); |
155 | } |
156 | |
157 | /* |
158 | * CheckPostmasterSignal - check to see if a particular reason has been |
159 | * signaled, and clear the signal flag. Should be called by postmaster |
160 | * after receiving SIGUSR1. |
161 | */ |
162 | bool |
163 | CheckPostmasterSignal(PMSignalReason reason) |
164 | { |
165 | /* Careful here --- don't clear flag if we haven't seen it set */ |
166 | if (PMSignalState->PMSignalFlags[reason]) |
167 | { |
168 | PMSignalState->PMSignalFlags[reason] = false; |
169 | return true; |
170 | } |
171 | return false; |
172 | } |
173 | |
174 | |
175 | /* |
176 | * AssignPostmasterChildSlot - select an unused slot for a new postmaster |
177 | * child process, and set its state to ASSIGNED. Returns a slot number |
178 | * (one to N). |
179 | * |
180 | * Only the postmaster is allowed to execute this routine, so we need no |
181 | * special locking. |
182 | */ |
183 | int |
184 | AssignPostmasterChildSlot(void) |
185 | { |
186 | int slot = PMSignalState->next_child_flag; |
187 | int n; |
188 | |
189 | /* |
190 | * Scan for a free slot. We track the last slot assigned so as not to |
191 | * waste time repeatedly rescanning low-numbered slots. |
192 | */ |
193 | for (n = PMSignalState->num_child_flags; n > 0; n--) |
194 | { |
195 | if (--slot < 0) |
196 | slot = PMSignalState->num_child_flags - 1; |
197 | if (PMSignalState->PMChildFlags[slot] == PM_CHILD_UNUSED) |
198 | { |
199 | PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED; |
200 | PMSignalState->next_child_flag = slot; |
201 | return slot + 1; |
202 | } |
203 | } |
204 | |
205 | /* Out of slots ... should never happen, else postmaster.c messed up */ |
206 | elog(FATAL, "no free slots in PMChildFlags array" ); |
207 | return 0; /* keep compiler quiet */ |
208 | } |
209 | |
210 | /* |
211 | * ReleasePostmasterChildSlot - release a slot after death of a postmaster |
212 | * child process. This must be called in the postmaster process. |
213 | * |
214 | * Returns true if the slot had been in ASSIGNED state (the expected case), |
215 | * false otherwise (implying that the child failed to clean itself up). |
216 | */ |
217 | bool |
218 | ReleasePostmasterChildSlot(int slot) |
219 | { |
220 | bool result; |
221 | |
222 | Assert(slot > 0 && slot <= PMSignalState->num_child_flags); |
223 | slot--; |
224 | |
225 | /* |
226 | * Note: the slot state might already be unused, because the logic in |
227 | * postmaster.c is such that this might get called twice when a child |
228 | * crashes. So we don't try to Assert anything about the state. |
229 | */ |
230 | result = (PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED); |
231 | PMSignalState->PMChildFlags[slot] = PM_CHILD_UNUSED; |
232 | return result; |
233 | } |
234 | |
235 | /* |
236 | * IsPostmasterChildWalSender - check if given slot is in use by a |
237 | * walsender process. |
238 | */ |
239 | bool |
240 | IsPostmasterChildWalSender(int slot) |
241 | { |
242 | Assert(slot > 0 && slot <= PMSignalState->num_child_flags); |
243 | slot--; |
244 | |
245 | if (PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER) |
246 | return true; |
247 | else |
248 | return false; |
249 | } |
250 | |
251 | /* |
252 | * MarkPostmasterChildActive - mark a postmaster child as about to begin |
253 | * actively using shared memory. This is called in the child process. |
254 | */ |
255 | void |
256 | MarkPostmasterChildActive(void) |
257 | { |
258 | int slot = MyPMChildSlot; |
259 | |
260 | Assert(slot > 0 && slot <= PMSignalState->num_child_flags); |
261 | slot--; |
262 | Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ASSIGNED); |
263 | PMSignalState->PMChildFlags[slot] = PM_CHILD_ACTIVE; |
264 | } |
265 | |
266 | /* |
267 | * MarkPostmasterChildWalSender - mark a postmaster child as a WAL sender |
268 | * process. This is called in the child process, sometime after marking the |
269 | * child as active. |
270 | */ |
271 | void |
272 | MarkPostmasterChildWalSender(void) |
273 | { |
274 | int slot = MyPMChildSlot; |
275 | |
276 | Assert(am_walsender); |
277 | |
278 | Assert(slot > 0 && slot <= PMSignalState->num_child_flags); |
279 | slot--; |
280 | Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ACTIVE); |
281 | PMSignalState->PMChildFlags[slot] = PM_CHILD_WALSENDER; |
282 | } |
283 | |
284 | /* |
285 | * MarkPostmasterChildInactive - mark a postmaster child as done using |
286 | * shared memory. This is called in the child process. |
287 | */ |
288 | void |
289 | MarkPostmasterChildInactive(void) |
290 | { |
291 | int slot = MyPMChildSlot; |
292 | |
293 | Assert(slot > 0 && slot <= PMSignalState->num_child_flags); |
294 | slot--; |
295 | Assert(PMSignalState->PMChildFlags[slot] == PM_CHILD_ACTIVE || |
296 | PMSignalState->PMChildFlags[slot] == PM_CHILD_WALSENDER); |
297 | PMSignalState->PMChildFlags[slot] = PM_CHILD_ASSIGNED; |
298 | } |
299 | |
300 | |
301 | /* |
302 | * PostmasterIsAliveInternal - check whether postmaster process is still alive |
303 | * |
304 | * This is the slow path of PostmasterIsAlive(), where the caller has already |
305 | * checked 'postmaster_possibly_dead'. (On platforms that don't support |
306 | * a signal for parent death, PostmasterIsAlive() is just an alias for this.) |
307 | */ |
308 | bool |
309 | PostmasterIsAliveInternal(void) |
310 | { |
311 | #ifdef USE_POSTMASTER_DEATH_SIGNAL |
312 | /* |
313 | * Reset the flag before checking, so that we don't miss a signal if |
314 | * postmaster dies right after the check. If postmaster was indeed dead, |
315 | * we'll re-arm it before returning to caller. |
316 | */ |
317 | postmaster_possibly_dead = false; |
318 | #endif |
319 | |
320 | #ifndef WIN32 |
321 | { |
322 | char c; |
323 | ssize_t rc; |
324 | |
325 | rc = read(postmaster_alive_fds[POSTMASTER_FD_WATCH], &c, 1); |
326 | |
327 | /* |
328 | * In the usual case, the postmaster is still alive, and there is no |
329 | * data in the pipe. |
330 | */ |
331 | if (rc < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) |
332 | return true; |
333 | else |
334 | { |
335 | /* |
336 | * Postmaster is dead, or something went wrong with the read() |
337 | * call. |
338 | */ |
339 | |
340 | #ifdef USE_POSTMASTER_DEATH_SIGNAL |
341 | postmaster_possibly_dead = true; |
342 | #endif |
343 | |
344 | if (rc < 0) |
345 | elog(FATAL, "read on postmaster death monitoring pipe failed: %m" ); |
346 | else if (rc > 0) |
347 | elog(FATAL, "unexpected data in postmaster death monitoring pipe" ); |
348 | |
349 | return false; |
350 | } |
351 | } |
352 | |
353 | #else /* WIN32 */ |
354 | if (WaitForSingleObject(PostmasterHandle, 0) == WAIT_TIMEOUT) |
355 | return true; |
356 | else |
357 | { |
358 | #ifdef USE_POSTMASTER_DEATH_SIGNAL |
359 | postmaster_possibly_dead = true; |
360 | #endif |
361 | return false; |
362 | } |
363 | #endif /* WIN32 */ |
364 | } |
365 | |
366 | /* |
367 | * PostmasterDeathSignalInit - request signal on postmaster death if possible |
368 | */ |
369 | void |
370 | PostmasterDeathSignalInit(void) |
371 | { |
372 | #ifdef USE_POSTMASTER_DEATH_SIGNAL |
373 | int signum = POSTMASTER_DEATH_SIGNAL; |
374 | |
375 | /* Register our signal handler. */ |
376 | pqsignal(signum, postmaster_death_handler); |
377 | |
378 | /* Request a signal on parent exit. */ |
379 | #if defined(PR_SET_PDEATHSIG) |
380 | if (prctl(PR_SET_PDEATHSIG, signum) < 0) |
381 | elog(ERROR, "could not request parent death signal: %m" ); |
382 | #elif defined(PROC_PDEATHSIG_CTL) |
383 | if (procctl(P_PID, 0, PROC_PDEATHSIG_CTL, &signum) < 0) |
384 | elog(ERROR, "could not request parent death signal: %m" ); |
385 | #else |
386 | #error "USE_POSTMASTER_DEATH_SIGNAL set, but there is no mechanism to request the signal" |
387 | #endif |
388 | |
389 | /* |
390 | * Just in case the parent was gone already and we missed it, we'd better |
391 | * check the slow way on the first call. |
392 | */ |
393 | postmaster_possibly_dead = true; |
394 | #endif /* USE_POSTMASTER_DEATH_SIGNAL */ |
395 | } |
396 | |