bgworker.c source code [PostgreSQL/src/backend/postmaster/bgworker.c]

1	/--------------------------------------------------------------------*
2	* bgworker.c
3	* POSTGRES pluggable background workers implementation
4	*
5	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
6	*
7	* IDENTIFICATION
8	* src/backend/postmaster/bgworker.c
9	*
10	*-------------------------------------------------------------------------
11	*/
12
13	#include "postgres.h"
14
15	#include <unistd.h>
16
17	#include "libpq/pqsignal.h"
18	#include "access/parallel.h"
19	#include "miscadmin.h"
20	#include "pgstat.h"
21	#include "port/atomics.h"
22	#include "postmaster/bgworker_internals.h"
23	#include "postmaster/postmaster.h"
24	#include "replication/logicallauncher.h"
25	#include "replication/logicalworker.h"
26	#include "storage/dsm.h"
27	#include "storage/ipc.h"
28	#include "storage/latch.h"
29	#include "storage/lwlock.h"
30	#include "storage/pg_shmem.h"
31	#include "storage/pmsignal.h"
32	#include "storage/proc.h"
33	#include "storage/procsignal.h"
34	#include "storage/shmem.h"
35	#include "tcop/tcopprot.h"
36	#include "utils/ascii.h"
37	#include "utils/ps_status.h"
38	#include "utils/timeout.h"
39
40	/*
41	* The postmaster's list of registered background workers, in private memory.
42	*/
43	slist_head BackgroundWorkerList = SLIST_STATIC_INIT(BackgroundWorkerList);
44
45	/*
46	* BackgroundWorkerSlots exist in shared memory and can be accessed (via
47	* the BackgroundWorkerArray) by both the postmaster and by regular backends.
48	* However, the postmaster cannot take locks, even spinlocks, because this
49	* might allow it to crash or become wedged if shared memory gets corrupted.
50	* Such an outcome is intolerable. Therefore, we need a lockless protocol
51	* for coordinating access to this data.
52	*
53	* The 'in_use' flag is used to hand off responsibility for the slot between
54	* the postmaster and the rest of the system. When 'in_use' is false,
55	* the postmaster will ignore the slot entirely, except for the 'in_use' flag
56	* itself, which it may read. In this state, regular backends may modify the
57	* slot. Once a backend sets 'in_use' to true, the slot becomes the
58	* responsibility of the postmaster. Regular backends may no longer modify it,
59	* but the postmaster may examine it. Thus, a backend initializing a slot
60	* must fully initialize the slot - and insert a write memory barrier - before
61	* marking it as in use.
62	*
63	* As an exception, however, even when the slot is in use, regular backends
64	* may set the 'terminate' flag for a slot, telling the postmaster not
65	* to restart it. Once the background worker is no longer running, the slot
66	* will be released for reuse.
67	*
68	* In addition to coordinating with the postmaster, backends modifying this
69	* data structure must coordinate with each other. Since they can take locks,
70	* this is straightforward: any backend wishing to manipulate a slot must
71	* take BackgroundWorkerLock in exclusive mode. Backends wishing to read
72	* data that might get concurrently modified by other backends should take
73	* this lock in shared mode. No matter what, backends reading this data
74	* structure must be able to tolerate concurrent modifications by the
75	* postmaster.
76	*/
77	typedef struct BackgroundWorkerSlot
78	{
79	bool in_use;
80	bool terminate;
81	pid_t pid; / InvalidPid = not started yet; 0 = dead /
82	uint64 generation; / incremented when slot is recycled /
83	BackgroundWorker worker;
84	} BackgroundWorkerSlot;
85
86	/*
87	* In order to limit the total number of parallel workers (according to
88	* max_parallel_workers GUC), we maintain the number of active parallel
89	* workers. Since the postmaster cannot take locks, two variables are used for
90	* this purpose: the number of registered parallel workers (modified by the
91	* backends, protected by BackgroundWorkerLock) and the number of terminated
92	* parallel workers (modified only by the postmaster, lockless). The active
93	* number of parallel workers is the number of registered workers minus the
94	* terminated ones. These counters can of course overflow, but it's not
95	* important here since the subtraction will still give the right number.
96	*/
97	typedef struct BackgroundWorkerArray
98	{
99	int total_slots;
100	uint32 parallel_register_count;
101	uint32 parallel_terminate_count;
102	BackgroundWorkerSlot slot[FLEXIBLE_ARRAY_MEMBER];
103	} BackgroundWorkerArray;
104
105	struct BackgroundWorkerHandle
106	{
107	int slot;
108	uint64 generation;
109	};
110
111	static BackgroundWorkerArray *BackgroundWorkerData;
112
113	/*
114	* List of internal background worker entry points. We need this for
115	* reasons explained in LookupBackgroundWorkerFunction(), below.
116	*/
117	static const struct
118	{
119	const char *fn_name;
120	bgworker_main_type fn_addr;
121	} InternalBGWorkers[] =
122
123	{
124	{
125	"ParallelWorkerMain", ParallelWorkerMain
126	},
127	{
128	"ApplyLauncherMain", ApplyLauncherMain
129	},
130	{
131	"ApplyWorkerMain", ApplyWorkerMain
132	}
133	};
134
135	/ Private functions. /
136	static bgworker_main_type LookupBackgroundWorkerFunction(const char libraryname, const* char *funcname);
137
138
139	/*
140	* Calculate shared memory needed.
141	*/
142	Size
143	BackgroundWorkerShmemSize(void)
144	{
145	Size size;
146
147	/ Array of workers is variably sized. /
148	size = offsetof(BackgroundWorkerArray, slot);
149	size = add_size(size, mul_size(max_worker_processes,
150	sizeof(BackgroundWorkerSlot)));
151
152	return size;
153	}
154
155	/*
156	* Initialize shared memory.
157	*/
158	void
159	BackgroundWorkerShmemInit(void)
160	{
161	bool found;
162
163	BackgroundWorkerData = ShmemInitStruct("Background Worker Data",
164	BackgroundWorkerShmemSize(),
165	&found);
166	if (!IsUnderPostmaster)
167	{
168	slist_iter siter;
169	int slotno = `0`;
170
171	BackgroundWorkerData->total_slots = max_worker_processes;
172	BackgroundWorkerData->parallel_register_count = `0`;
173	BackgroundWorkerData->parallel_terminate_count = `0`;
174
175	/*
176	* Copy contents of worker list into shared memory. Record the shared
177	* memory slot assigned to each worker. This ensures a 1-to-1
178	* correspondence between the postmaster's private list and the array
179	* in shared memory.
180	*/
181	slist_foreach(siter, &BackgroundWorkerList)
182	{
183	BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
184	RegisteredBgWorker *rw;
185
186	rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
187	Assert(slotno < max_worker_processes);
188	slot->in_use = true;
189	slot->terminate = false;
190	slot->pid = InvalidPid;
191	slot->generation = `0`;
192	rw->rw_shmem_slot = slotno;
193	rw->rw_worker.bgw_notify_pid = `0`; / might be reinit after crash /
194	memcpy(&slot->worker, &rw->rw_worker, sizeof(BackgroundWorker));
195	++slotno;
196	}
197
198	/*
199	* Mark any remaining slots as not in use.
200	*/
201	while (slotno < max_worker_processes)
202	{
203	BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
204
205	slot->in_use = false;
206	++slotno;
207	}
208	}
209	else
210	Assert(found);
211	}
212
213	/*
214	* Search the postmaster's backend-private list of RegisteredBgWorker objects
215	* for the one that maps to the given slot number.
216	*/
217	static RegisteredBgWorker *
218	FindRegisteredWorkerBySlotNumber(int slotno)
219	{
220	slist_iter siter;
221
222	slist_foreach(siter, &BackgroundWorkerList)
223	{
224	RegisteredBgWorker *rw;
225
226	rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
227	if (rw->rw_shmem_slot == slotno)
228	return rw;
229	}
230
231	return NULL;
232	}
233
234	/*
235	* Notice changes to shared memory made by other backends. This code
236	* runs in the postmaster, so we must be very careful not to assume that
237	* shared memory contents are sane. Otherwise, a rogue backend could take
238	* out the postmaster.
239	*/
240	void
241	BackgroundWorkerStateChange(void)
242	{
243	int slotno;
244
245	/*
246	* The total number of slots stored in shared memory should match our
247	* notion of max_worker_processes. If it does not, something is very
248	* wrong. Further down, we always refer to this value as
249	* max_worker_processes, in case shared memory gets corrupted while we're
250	* looping.
251	*/
252	if (max_worker_processes != BackgroundWorkerData->total_slots)
253	{
254	elog(LOG,
255	"inconsistent background worker state (max_worker_processes=%d, total_slots=%d",
256	max_worker_processes,
257	BackgroundWorkerData->total_slots);
258	return;
259	}
260
261	/*
262	* Iterate through slots, looking for newly-registered workers or workers
263	* who must die.
264	*/
265	for (slotno = `0`; slotno < max_worker_processes; ++slotno)
266	{
267	BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
268	RegisteredBgWorker *rw;
269
270	if (!slot->in_use)
271	continue;
272
273	/*
274	* Make sure we don't see the in_use flag before the updated slot
275	* contents.
276	*/
277	pg_read_barrier();
278
279	/ See whether we already know about this worker. /
280	rw = FindRegisteredWorkerBySlotNumber(slotno);
281	if (rw != NULL)
282	{
283	/*
284	* In general, the worker data can't change after it's initially
285	* registered. However, someone can set the terminate flag.
286	*/
287	if (slot->terminate && !rw->rw_terminate)
288	{
289	rw->rw_terminate = true;
290	if (rw->rw_pid != `0`)
291	kill(rw->rw_pid, SIGTERM);
292	else
293	{
294	/ Report never-started, now-terminated worker as dead. /
295	ReportBackgroundWorkerPID(rw);
296	}
297	}
298	continue;
299	}
300
301	/*
302	* If the worker is marked for termination, we don't need to add it to
303	* the registered workers list; we can just free the slot. However, if
304	* bgw_notify_pid is set, the process that registered the worker may
305	* need to know that we've processed the terminate request, so be sure
306	* to signal it.
307	*/
308	if (slot->terminate)
309	{
310	int notify_pid;
311
312	/*
313	* We need a memory barrier here to make sure that the load of
314	* bgw_notify_pid and the update of parallel_terminate_count
315	* complete before the store to in_use.
316	*/
317	notify_pid = slot->worker.bgw_notify_pid;
318	if ((slot->worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != `0`)
319	BackgroundWorkerData->parallel_terminate_count++;
320	pg_memory_barrier();
321	slot->pid = `0`;
322	slot->in_use = false;
323	if (notify_pid != `0`)
324	kill(notify_pid, SIGUSR1);
325
326	continue;
327	}
328
329	/*
330	* Copy the registration data into the registered workers list.
331	*/
332	rw = malloc(sizeof(RegisteredBgWorker));
333	if (rw == NULL)
334	{
335	ereport(LOG,
336	(errcode(ERRCODE_OUT_OF_MEMORY),
337	errmsg("out of memory")));
338	return;
339	}
340
341	/*
342	* Copy strings in a paranoid way. If shared memory is corrupted, the
343	* source data might not even be NUL-terminated.
344	*/
345	ascii_safe_strlcpy(rw->rw_worker.bgw_name,
346	slot->worker.bgw_name, BGW_MAXLEN);
347	ascii_safe_strlcpy(rw->rw_worker.bgw_type,
348	slot->worker.bgw_type, BGW_MAXLEN);
349	ascii_safe_strlcpy(rw->rw_worker.bgw_library_name,
350	slot->worker.bgw_library_name, BGW_MAXLEN);
351	ascii_safe_strlcpy(rw->rw_worker.bgw_function_name,
352	slot->worker.bgw_function_name, BGW_MAXLEN);
353
354	/*
355	* Copy various fixed-size fields.
356	*
357	* flags, start_time, and restart_time are examined by the postmaster,
358	* but nothing too bad will happen if they are corrupted. The
359	* remaining fields will only be examined by the child process. It
360	* might crash, but we won't.
361	*/
362	rw->rw_worker.bgw_flags = slot->worker.bgw_flags;
363	rw->rw_worker.bgw_start_time = slot->worker.bgw_start_time;
364	rw->rw_worker.bgw_restart_time = slot->worker.bgw_restart_time;
365	rw->rw_worker.bgw_main_arg = slot->worker.bgw_main_arg;
366	memcpy(rw->rw_worker.bgw_extra, slot->worker.bgw_extra, BGW_EXTRALEN);
367
368	/*
369	* Copy the PID to be notified about state changes, but only if the
370	* postmaster knows about a backend with that PID. It isn't an error
371	* if the postmaster doesn't know about the PID, because the backend
372	* that requested the worker could have died (or been killed) just
373	* after doing so. Nonetheless, at least until we get some experience
374	* with how this plays out in the wild, log a message at a relative
375	* high debug level.
376	*/
377	rw->rw_worker.bgw_notify_pid = slot->worker.bgw_notify_pid;
378	if (!PostmasterMarkPIDForWorkerNotify(rw->rw_worker.bgw_notify_pid))
379	{
380	elog(DEBUG1, "worker notification PID %lu is not valid",
381	(long) rw->rw_worker.bgw_notify_pid);
382	rw->rw_worker.bgw_notify_pid = `0`;
383	}
384
385	/ Initialize postmaster bookkeeping. /
386	rw->rw_backend = NULL;
387	rw->rw_pid = `0`;
388	rw->rw_child_slot = `0`;
389	rw->rw_crashed_at = `0`;
390	rw->rw_shmem_slot = slotno;
391	rw->rw_terminate = false;
392
393	/ Log it! /
394	ereport(DEBUG1,
395	(errmsg("registering background worker \"%s\"",
396	rw->rw_worker.bgw_name)));
397
398	slist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
399	}
400	}
401
402	/*
403	* Forget about a background worker that's no longer needed.
404	*
405	* The worker must be identified by passing an slist_mutable_iter that
406	* points to it. This convention allows deletion of workers during
407	* searches of the worker list, and saves having to search the list again.
408	*
409	* This function must be invoked only in the postmaster.
410	*/
411	void
412	ForgetBackgroundWorker(slist_mutable_iter *cur)
413	{
414	RegisteredBgWorker *rw;
415	BackgroundWorkerSlot *slot;
416
417	rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur);
418
419	Assert(rw->rw_shmem_slot < max_worker_processes);
420	slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
421	if ((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != `0`)
422	BackgroundWorkerData->parallel_terminate_count++;
423
424	slot->in_use = false;
425
426	ereport(DEBUG1,
427	(errmsg("unregistering background worker \"%s\"",
428	rw->rw_worker.bgw_name)));
429
430	slist_delete_current(cur);
431	free(rw);
432	}
433
434	/*
435	* Report the PID of a newly-launched background worker in shared memory.
436	*
437	* This function should only be called from the postmaster.
438	*/
439	void
440	ReportBackgroundWorkerPID(RegisteredBgWorker *rw)
441	{
442	BackgroundWorkerSlot *slot;
443
444	Assert(rw->rw_shmem_slot < max_worker_processes);
445	slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
446	slot->pid = rw->rw_pid;
447
448	if (rw->rw_worker.bgw_notify_pid != `0`)
449	kill(rw->rw_worker.bgw_notify_pid, SIGUSR1);
450	}
451
452	/*
453	* Report that the PID of a background worker is now zero because a
454	* previously-running background worker has exited.
455	*
456	* This function should only be called from the postmaster.
457	*/
458	void
459	ReportBackgroundWorkerExit(slist_mutable_iter *cur)
460	{
461	RegisteredBgWorker *rw;
462	BackgroundWorkerSlot *slot;
463	int notify_pid;
464
465	rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur);
466
467	Assert(rw->rw_shmem_slot < max_worker_processes);
468	slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
469	slot->pid = rw->rw_pid;
470	notify_pid = rw->rw_worker.bgw_notify_pid;
471
472	/*
473	* If this worker is slated for deregistration, do that before notifying
474	* the process which started it. Otherwise, if that process tries to
475	* reuse the slot immediately, it might not be available yet. In theory
476	* that could happen anyway if the process checks slot->pid at just the
477	* wrong moment, but this makes the window narrower.
478	*/
479	if (rw->rw_terminate \|\|
480	rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
481	ForgetBackgroundWorker(cur);
482
483	if (notify_pid != `0`)
484	kill(notify_pid, SIGUSR1);
485	}
486
487	/*
488	* Cancel SIGUSR1 notifications for a PID belonging to an exiting backend.
489	*
490	* This function should only be called from the postmaster.
491	*/
492	void
493	BackgroundWorkerStopNotifications(pid_t pid)
494	{
495	slist_iter siter;
496
497	slist_foreach(siter, &BackgroundWorkerList)
498	{
499	RegisteredBgWorker *rw;
500
501	rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
502	if (rw->rw_worker.bgw_notify_pid == pid)
503	rw->rw_worker.bgw_notify_pid = `0`;
504	}
505	}
506
507	/*
508	* Reset background worker crash state.
509	*
510	* We assume that, after a crash-and-restart cycle, background workers without
511	* the never-restart flag should be restarted immediately, instead of waiting
512	* for bgw_restart_time to elapse.
513	*/
514	void
515	ResetBackgroundWorkerCrashTimes(void)
516	{
517	slist_mutable_iter iter;
518
519	slist_foreach_modify(iter, &BackgroundWorkerList)
520	{
521	RegisteredBgWorker *rw;
522
523	rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
524
525	if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
526	{
527	/*
528	* Workers marked BGW_NEVER_RESTART shouldn't get relaunched after
529	* the crash, so forget about them. (If we wait until after the
530	* crash to forget about them, and they are parallel workers,
531	* parallel_terminate_count will get incremented after we've
532	* already zeroed parallel_register_count, which would be bad.)
533	*/
534	ForgetBackgroundWorker(&iter);
535	}
536	else
537	{
538	/*
539	* The accounting which we do via parallel_register_count and
540	* parallel_terminate_count would get messed up if a worker marked
541	* parallel could survive a crash and restart cycle. All such
542	* workers should be marked BGW_NEVER_RESTART, and thus control
543	* should never reach this branch.
544	*/
545	Assert((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) == `0`);
546
547	/*
548	* Allow this worker to be restarted immediately after we finish
549	* resetting.
550	*/
551	rw->rw_crashed_at = `0`;
552	}
553	}
554	}
555
556	#ifdef EXEC_BACKEND
557	/*
558	* In EXEC_BACKEND mode, workers use this to retrieve their details from
559	* shared memory.
560	*/
561	BackgroundWorker *
562	BackgroundWorkerEntry(int slotno)
563	{
564	static BackgroundWorker myEntry;
565	BackgroundWorkerSlot *slot;
566
567	Assert(slotno < BackgroundWorkerData->total_slots);
568	slot = &BackgroundWorkerData->slot[slotno];
569	Assert(slot->in_use);
570
571	/ must copy this in case we don't intend to retain shmem access /
572	memcpy(&myEntry, &slot->worker, sizeof myEntry);
573	return &myEntry;
574	}
575	#endif
576
577	/*
578	* Complain about the BackgroundWorker definition using error level elevel.
579	* Return true if it looks ok, false if not (unless elevel >= ERROR, in
580	* which case we won't return at all in the not-OK case).
581	*/
582	static bool
583	SanityCheckBackgroundWorker(BackgroundWorker worker, int* elevel)
584	{
585	/ sanity check for flags /
586	if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)
587	{
588	if (!(worker->bgw_flags & BGWORKER_SHMEM_ACCESS))
589	{
590	ereport(elevel,
591	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
592	errmsg("background worker \"%s\": must attach to shared memory in order to request a database connection",
593	worker->bgw_name)));
594	return false;
595	}
596
597	if (worker->bgw_start_time == BgWorkerStart_PostmasterStart)
598	{
599	ereport(elevel,
600	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
601	errmsg("background worker \"%s\": cannot request database access if starting at postmaster start",
602	worker->bgw_name)));
603	return false;
604	}
605
606	/ XXX other checks? /
607	}
608
609	if ((worker->bgw_restart_time < `0` &&
610	worker->bgw_restart_time != BGW_NEVER_RESTART) \|\|
611	(worker->bgw_restart_time > USECS_PER_DAY / `1000`))
612	{
613	ereport(elevel,
614	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
615	errmsg("background worker \"%s\": invalid restart interval",
616	worker->bgw_name)));
617	return false;
618	}
619
620	/*
621	* Parallel workers may not be configured for restart, because the
622	* parallel_register_count/parallel_terminate_count accounting can't
623	* handle parallel workers lasting through a crash-and-restart cycle.
624	*/
625	if (worker->bgw_restart_time != BGW_NEVER_RESTART &&
626	(worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != `0`)
627	{
628	ereport(elevel,
629	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
630	errmsg("background worker \"%s\": parallel workers may not be configured for restart",
631	worker->bgw_name)));
632	return false;
633	}
634
635	/*
636	* If bgw_type is not filled in, use bgw_name.
637	*/
638	if (strcmp(worker->bgw_type, "") == `0`)
639	strcpy(worker->bgw_type, worker->bgw_name);
640
641	return true;
642	}
643
644	static void
645	bgworker_quickdie(SIGNAL_ARGS)
646	{
647	/*
648	* We DO NOT want to run proc_exit() or atexit() callbacks -- we're here
649	* because shared memory may be corrupted, so we don't want to try to
650	* clean up our transaction. Just nail the windows shut and get out of
651	* town. The callbacks wouldn't be safe to run from a signal handler,
652	* anyway.
653	*
654	* Note we do _exit(2) not _exit(0). This is to force the postmaster into
655	* a system reset cycle if someone sends a manual SIGQUIT to a random
656	* backend. This is necessary precisely because we don't clean up our
657	* shared memory state. (The "dead man switch" mechanism in pmsignal.c
658	* should ensure the postmaster sees this as a crash, too, but no harm in
659	* being doubly sure.)
660	*/
661	_exit(`2`);
662	}
663
664	/*
665	* Standard SIGTERM handler for background workers
666	*/
667	static void
668	bgworker_die(SIGNAL_ARGS)
669	{
670	PG_SETMASK(&BlockSig);
671
672	ereport(FATAL,
673	(errcode(ERRCODE_ADMIN_SHUTDOWN),
674	errmsg("terminating background worker \"%s\" due to administrator command",
675	MyBgworkerEntry->bgw_type)));
676	}
677
678	/*
679	* Standard SIGUSR1 handler for unconnected workers
680	*
681	* Here, we want to make sure an unconnected worker will at least heed
682	* latch activity.
683	*/
684	static void
685	bgworker_sigusr1_handler(SIGNAL_ARGS)
686	{
687	int save_errno = errno;
688
689	latch_sigusr1_handler();
690
691	errno = save_errno;
692	}
693
694	/*
695	* Start a new background worker
696	*
697	* This is the main entry point for background worker, to be called from
698	* postmaster.
699	*/
700	void
701	StartBackgroundWorker(void)
702	{
703	sigjmp_buf local_sigjmp_buf;
704	BackgroundWorker *worker = MyBgworkerEntry;
705	bgworker_main_type entrypt;
706
707	if (worker == NULL)
708	elog(FATAL, "unable to find bgworker entry");
709
710	IsBackgroundWorker = true;
711
712	/ Identify myself via ps /
713	init_ps_display(worker->bgw_name, "", "", "");
714
715	/*
716	* If we're not supposed to have shared memory access, then detach from
717	* shared memory. If we didn't request shared memory access, the
718	* postmaster won't force a cluster-wide restart if we exit unexpectedly,
719	* so we'd better make sure that we don't mess anything up that would
720	* require that sort of cleanup.
721	*/
722	if ((worker->bgw_flags & BGWORKER_SHMEM_ACCESS) == `0`)
723	{
724	dsm_detach_all();
725	PGSharedMemoryDetach();
726	}
727
728	SetProcessingMode(InitProcessing);
729
730	/ Apply PostAuthDelay /
731	if (PostAuthDelay > `0`)
732	pg_usleep(PostAuthDelay * `1000000L`);
733
734	/*
735	* Set up signal handlers.
736	*/
737	if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)
738	{
739	/*
740	* SIGINT is used to signal canceling the current action
741	*/
742	pqsignal(SIGINT, StatementCancelHandler);
743	pqsignal(SIGUSR1, procsignal_sigusr1_handler);
744	pqsignal(SIGFPE, FloatExceptionHandler);
745
746	/ XXX Any other handlers needed here? /
747	}
748	else
749	{
750	pqsignal(SIGINT, SIG_IGN);
751	pqsignal(SIGUSR1, bgworker_sigusr1_handler);
752	pqsignal(SIGFPE, SIG_IGN);
753	}
754	pqsignal(SIGTERM, bgworker_die);
755	pqsignal(SIGHUP, SIG_IGN);
756
757	pqsignal(SIGQUIT, bgworker_quickdie);
758	InitializeTimeouts(); / establishes SIGALRM handler /
759
760	pqsignal(SIGPIPE, SIG_IGN);
761	pqsignal(SIGUSR2, SIG_IGN);
762	pqsignal(SIGCHLD, SIG_DFL);
763
764	/*
765	* If an exception is encountered, processing resumes here.
766	*
767	* See notes in postgres.c about the design of this coding.
768	*/
769	if (sigsetjmp(local_sigjmp_buf, `1`) != `0`)
770	{
771	/ Since not using PG_TRY, must reset error stack by hand /
772	error_context_stack = NULL;
773
774	/ Prevent interrupts while cleaning up /
775	HOLD_INTERRUPTS();
776
777	/ Report the error to the server log /
778	EmitErrorReport();
779
780	/*
781	* Do we need more cleanup here? For shmem-connected bgworkers, we
782	* will call InitProcess below, which will install ProcKill as exit
783	* callback. That will take care of releasing locks, etc.
784	*/
785
786	/ and go away /
787	proc_exit(`1`);
788	}
789
790	/ We can now handle ereport(ERROR) /
791	PG_exception_stack = &local_sigjmp_buf;
792
793	/*
794	* If the background worker request shared memory access, set that up now;
795	* else, detach all shared memory segments.
796	*/
797	if (worker->bgw_flags & BGWORKER_SHMEM_ACCESS)
798	{
799	/*
800	* Early initialization. Some of this could be useful even for
801	* background workers that aren't using shared memory, but they can
802	* call the individual startup routines for those subsystems if
803	* needed.
804	*/
805	BaseInit();
806
807	/*
808	* Create a per-backend PGPROC struct in shared memory, except in the
809	* EXEC_BACKEND case where this was done in SubPostmasterMain. We must
810	* do this before we can use LWLocks (and in the EXEC_BACKEND case we
811	* already had to do some stuff with LWLocks).
812	*/
813	#ifndef EXEC_BACKEND
814	InitProcess();
815	#endif
816	}
817
818	/*
819	* Look up the entry point function, loading its library if necessary.
820	*/
821	entrypt = LookupBackgroundWorkerFunction(worker->bgw_library_name,
822	worker->bgw_function_name);
823
824	/*
825	* Note that in normal processes, we would call InitPostgres here. For a
826	* worker, however, we don't know what database to connect to, yet; so we
827	* need to wait until the user code does it via
828	* BackgroundWorkerInitializeConnection().
829	*/
830
831	/*
832	* Now invoke the user-defined worker code
833	*/
834	entrypt(worker->bgw_main_arg);
835
836	/ ... and if it returns, we're done /
837	proc_exit(`0`);
838	}
839
840	/*
841	* Register a new static background worker.
842	*
843	* This can only be called directly from postmaster or in the _PG_init
844	* function of a module library that's loaded by shared_preload_libraries;
845	* otherwise it will have no effect.
846	*/
847	void
848	RegisterBackgroundWorker(BackgroundWorker *worker)
849	{
850	RegisteredBgWorker *rw;
851	static int numworkers = `0`;
852
853	if (!IsUnderPostmaster)
854	ereport(DEBUG1,
855	(errmsg("registering background worker \"%s\"", worker->bgw_name)));
856
857	if (!process_shared_preload_libraries_in_progress &&
858	strcmp(worker->bgw_library_name, "postgres") != `0`)
859	{
860	if (!IsUnderPostmaster)
861	ereport(LOG,
862	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
863	errmsg("background worker \"%s\": must be registered in shared_preload_libraries",
864	worker->bgw_name)));
865	return;
866	}
867
868	if (!SanityCheckBackgroundWorker(worker, LOG))
869	return;
870
871	if (worker->bgw_notify_pid != `0`)
872	{
873	ereport(LOG,
874	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
875	errmsg("background worker \"%s\": only dynamic background workers can request notification",
876	worker->bgw_name)));
877	return;
878	}
879
880	/*
881	* Enforce maximum number of workers. Note this is overly restrictive: we
882	* could allow more non-shmem-connected workers, because these don't count
883	* towards the MAX_BACKENDS limit elsewhere. For now, it doesn't seem
884	* important to relax this restriction.
885	*/
886	if (++numworkers > max_worker_processes)
887	{
888	ereport(LOG,
889	(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
890	errmsg("too many background workers"),
891	errdetail_plural("Up to %d background worker can be registered with the current settings.",
892	"Up to %d background workers can be registered with the current settings.",
893	max_worker_processes,
894	max_worker_processes),
895	errhint("Consider increasing the configuration parameter \"max_worker_processes\".")));
896	return;
897	}
898
899	/*
900	* Copy the registration data into the registered workers list.
901	*/
902	rw = malloc(sizeof(RegisteredBgWorker));
903	if (rw == NULL)
904	{
905	ereport(LOG,
906	(errcode(ERRCODE_OUT_OF_MEMORY),
907	errmsg("out of memory")));
908	return;
909	}
910
911	rw->rw_worker = *worker;
912	rw->rw_backend = NULL;
913	rw->rw_pid = `0`;
914	rw->rw_child_slot = `0`;
915	rw->rw_crashed_at = `0`;
916	rw->rw_terminate = false;
917
918	slist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
919	}
920
921	/*
922	* Register a new background worker from a regular backend.
923	*
924	* Returns true on success and false on failure. Failure typically indicates
925	* that no background worker slots are currently available.
926	*
927	* If handle != NULL, we'll set *handle to a pointer that can subsequently
928	* be used as an argument to GetBackgroundWorkerPid(). The caller can
929	* free this pointer using pfree(), if desired.
930	*/
931	bool
932	RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
933	BackgroundWorkerHandle **handle)
934	{
935	int slotno;
936	bool success = false;
937	bool parallel;
938	uint64 generation = `0`;
939
940	/*
941	* We can't register dynamic background workers from the postmaster. If
942	* this is a standalone backend, we're the only process and can't start
943	* any more. In a multi-process environment, it might be theoretically
944	* possible, but we don't currently support it due to locking
945	* considerations; see comments on the BackgroundWorkerSlot data
946	* structure.
947	*/
948	if (!IsUnderPostmaster)
949	return false;
950
951	if (!SanityCheckBackgroundWorker(worker, ERROR))
952	return false;
953
954	parallel = (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != `0`;
955
956	LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE);
957
958	/*
959	* If this is a parallel worker, check whether there are already too many
960	* parallel workers; if so, don't register another one. Our view of
961	* parallel_terminate_count may be slightly stale, but that doesn't really
962	* matter: we would have gotten the same result if we'd arrived here
963	* slightly earlier anyway. There's no help for it, either, since the
964	* postmaster must not take locks; a memory barrier wouldn't guarantee
965	* anything useful.
966	*/
967	if (parallel && (BackgroundWorkerData->parallel_register_count -
968	BackgroundWorkerData->parallel_terminate_count) >=
969	max_parallel_workers)
970	{
971	Assert(BackgroundWorkerData->parallel_register_count -
972	BackgroundWorkerData->parallel_terminate_count <=
973	MAX_PARALLEL_WORKER_LIMIT);
974	LWLockRelease(BackgroundWorkerLock);
975	return false;
976	}
977
978	/*
979	* Look for an unused slot. If we find one, grab it.
980	*/
981	for (slotno = `0`; slotno < BackgroundWorkerData->total_slots; ++slotno)
982	{
983	BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
984
985	if (!slot->in_use)
986	{
987	memcpy(&slot->worker, worker, sizeof(BackgroundWorker));
988	slot->pid = InvalidPid; / indicates not started yet /
989	slot->generation++;
990	slot->terminate = false;
991	generation = slot->generation;
992	if (parallel)
993	BackgroundWorkerData->parallel_register_count++;
994
995	/*
996	* Make sure postmaster doesn't see the slot as in use before it
997	* sees the new contents.
998	*/
999	pg_write_barrier();
1000
1001	slot->in_use = true;
1002	success = true;
1003	break;
1004	}
1005	}
1006
1007	LWLockRelease(BackgroundWorkerLock);
1008
1009	/ If we found a slot, tell the postmaster to notice the change. /
1010	if (success)
1011	SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE);
1012
1013	/*
1014	* If we found a slot and the user has provided a handle, initialize it.
1015	*/
1016	if (success && handle)
1017	{
1018	handle = palloc(sizeof*(BackgroundWorkerHandle));
1019	(*handle)->slot = slotno;
1020	(*handle)->generation = generation;
1021	}
1022
1023	return success;
1024	}
1025
1026	/*
1027	* Get the PID of a dynamically-registered background worker.
1028	*
1029	* If the worker is determined to be running, the return value will be
1030	* BGWH_STARTED and *pidp will get the PID of the worker process. If the
1031	* postmaster has not yet attempted to start the worker, the return value will
1032	* be BGWH_NOT_YET_STARTED. Otherwise, the return value is BGWH_STOPPED.
1033	*
1034	* BGWH_STOPPED can indicate either that the worker is temporarily stopped
1035	* (because it is configured for automatic restart and exited non-zero),
1036	* or that the worker is permanently stopped (because it exited with exit
1037	* code 0, or was not configured for automatic restart), or even that the
1038	* worker was unregistered without ever starting (either because startup
1039	* failed and the worker is not configured for automatic restart, or because
1040	* TerminateBackgroundWorker was used before the worker was successfully
1041	* started).
1042	*/
1043	BgwHandleStatus
1044	GetBackgroundWorkerPid(BackgroundWorkerHandle handle, pid_t pidp)
1045	{
1046	BackgroundWorkerSlot *slot;
1047	pid_t pid;
1048
1049	Assert(handle->slot < max_worker_processes);
1050	slot = &BackgroundWorkerData->slot[handle->slot];
1051
1052	/*
1053	* We could probably arrange to synchronize access to data using memory
1054	* barriers only, but for now, let's just keep it simple and grab the
1055	* lock. It seems unlikely that there will be enough traffic here to
1056	* result in meaningful contention.
1057	*/
1058	LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
1059
1060	/*
1061	* The generation number can't be concurrently changed while we hold the
1062	* lock. The pid, which is updated by the postmaster, can change at any
1063	* time, but we assume such changes are atomic. So the value we read
1064	* won't be garbage, but it might be out of date by the time the caller
1065	* examines it (but that's unavoidable anyway).
1066	*
1067	* The in_use flag could be in the process of changing from true to false,
1068	* but if it is already false then it can't change further.
1069	*/
1070	if (handle->generation != slot->generation \|\| !slot->in_use)
1071	pid = `0`;
1072	else
1073	pid = slot->pid;
1074
1075	/ All done. /
1076	LWLockRelease(BackgroundWorkerLock);
1077
1078	if (pid == `0`)
1079	return BGWH_STOPPED;
1080	else if (pid == InvalidPid)
1081	return BGWH_NOT_YET_STARTED;
1082	*pidp = pid;
1083	return BGWH_STARTED;
1084	}
1085
1086	/*
1087	* Wait for a background worker to start up.
1088	*
1089	* This is like GetBackgroundWorkerPid(), except that if the worker has not
1090	* yet started, we wait for it to do so; thus, BGWH_NOT_YET_STARTED is never
1091	* returned. However, if the postmaster has died, we give up and return
1092	* BGWH_POSTMASTER_DIED, since it that case we know that startup will not
1093	* take place.
1094	*/
1095	BgwHandleStatus
1096	WaitForBackgroundWorkerStartup(BackgroundWorkerHandle handle, pid_t pidp)
1097	{
1098	BgwHandleStatus status;
1099	int rc;
1100
1101	for (;;)
1102	{
1103	pid_t pid;
1104
1105	CHECK_FOR_INTERRUPTS();
1106
1107	status = GetBackgroundWorkerPid(handle, &pid);
1108	if (status == BGWH_STARTED)
1109	*pidp = pid;
1110	if (status != BGWH_NOT_YET_STARTED)
1111	break;
1112
1113	rc = WaitLatch(MyLatch,
1114	WL_LATCH_SET \| WL_POSTMASTER_DEATH, `0`,
1115	WAIT_EVENT_BGWORKER_STARTUP);
1116
1117	if (rc & WL_POSTMASTER_DEATH)
1118	{
1119	status = BGWH_POSTMASTER_DIED;
1120	break;
1121	}
1122
1123	ResetLatch(MyLatch);
1124	}
1125
1126	return status;
1127	}
1128
1129	/*
1130	* Wait for a background worker to stop.
1131	*
1132	* If the worker hasn't yet started, or is running, we wait for it to stop
1133	* and then return BGWH_STOPPED. However, if the postmaster has died, we give
1134	* up and return BGWH_POSTMASTER_DIED, because it's the postmaster that
1135	* notifies us when a worker's state changes.
1136	*/
1137	BgwHandleStatus
1138	WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle *handle)
1139	{
1140	BgwHandleStatus status;
1141	int rc;
1142
1143	for (;;)
1144	{
1145	pid_t pid;
1146
1147	CHECK_FOR_INTERRUPTS();
1148
1149	status = GetBackgroundWorkerPid(handle, &pid);
1150	if (status == BGWH_STOPPED)
1151	break;
1152
1153	rc = WaitLatch(MyLatch,
1154	WL_LATCH_SET \| WL_POSTMASTER_DEATH, `0`,
1155	WAIT_EVENT_BGWORKER_SHUTDOWN);
1156
1157	if (rc & WL_POSTMASTER_DEATH)
1158	{
1159	status = BGWH_POSTMASTER_DIED;
1160	break;
1161	}
1162
1163	ResetLatch(MyLatch);
1164	}
1165
1166	return status;
1167	}
1168
1169	/*
1170	* Instruct the postmaster to terminate a background worker.
1171	*
1172	* Note that it's safe to do this without regard to whether the worker is
1173	* still running, or even if the worker may already have existed and been
1174	* unregistered.
1175	*/
1176	void
1177	TerminateBackgroundWorker(BackgroundWorkerHandle *handle)
1178	{
1179	BackgroundWorkerSlot *slot;
1180	bool signal_postmaster = false;
1181
1182	Assert(handle->slot < max_worker_processes);
1183	slot = &BackgroundWorkerData->slot[handle->slot];
1184
1185	/ Set terminate flag in shared memory, unless slot has been reused. /
1186	LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE);
1187	if (handle->generation == slot->generation)
1188	{
1189	slot->terminate = true;
1190	signal_postmaster = true;
1191	}
1192	LWLockRelease(BackgroundWorkerLock);
1193
1194	/ Make sure the postmaster notices the change to shared memory. /
1195	if (signal_postmaster)
1196	SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE);
1197	}
1198
1199	/*
1200	* Look up (and possibly load) a bgworker entry point function.
1201	*
1202	* For functions contained in the core code, we use library name "postgres"
1203	* and consult the InternalBGWorkers array. External functions are
1204	* looked up, and loaded if necessary, using load_external_function().
1205	*
1206	* The point of this is to pass function names as strings across process
1207	* boundaries. We can't pass actual function addresses because of the
1208	* possibility that the function has been loaded at a different address
1209	* in a different process. This is obviously a hazard for functions in
1210	* loadable libraries, but it can happen even for functions in the core code
1211	* on platforms using EXEC_BACKEND (e.g., Windows).
1212	*
1213	* At some point it might be worthwhile to get rid of InternalBGWorkers[]
1214	* in favor of applying load_external_function() for core functions too;
1215	* but that raises portability issues that are not worth addressing now.
1216	*/
1217	static bgworker_main_type
1218	LookupBackgroundWorkerFunction(const char libraryname, const* char *funcname)
1219	{
1220	/*
1221	* If the function is to be loaded from postgres itself, search the
1222	* InternalBGWorkers array.
1223	*/
1224	if (strcmp(libraryname, "postgres") == `0`)
1225	{
1226	int i;
1227
1228	for (i = `0`; i < lengthof(InternalBGWorkers); i++)
1229	{
1230	if (strcmp(InternalBGWorkers[i].fn_name, funcname) == `0`)
1231	return InternalBGWorkers[i].fn_addr;
1232	}
1233
1234	/ We can only reach this by programming error. /
1235	elog(ERROR, "internal function \"%s\" not found", funcname);
1236	}
1237
1238	/ Otherwise load from external library. /
1239	return (bgworker_main_type)
1240	load_external_function(libraryname, funcname, true, NULL);
1241	}
1242
1243	/*
1244	* Given a PID, get the bgw_type of the background worker. Returns NULL if
1245	* not a valid background worker.
1246	*
1247	* The return value is in static memory belonging to this function, so it has
1248	* to be used before calling this function again. This is so that the caller
1249	* doesn't have to worry about the background worker locking protocol.
1250	*/
1251	const char *
1252	GetBackgroundWorkerTypeByPid(pid_t pid)
1253	{
1254	int slotno;
1255	bool found = false;
1256	static char result[BGW_MAXLEN];
1257
1258	LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
1259
1260	for (slotno = `0`; slotno < BackgroundWorkerData->total_slots; slotno++)
1261	{
1262	BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
1263
1264	if (slot->pid > `0` && slot->pid == pid)
1265	{
1266	strcpy(result, slot->worker.bgw_type);
1267	found = true;
1268	break;
1269	}
1270	}
1271
1272	LWLockRelease(BackgroundWorkerLock);
1273
1274	if (!found)
1275	return NULL;
1276
1277	return result;
1278	}
1279

Browse the source code of PostgreSQL/src/backend/postmaster/bgworker.c