bgwriter.c source code [PostgreSQL/src/backend/postmaster/bgwriter.c]

1	/-------------------------------------------------------------------------*
2	*
3	* bgwriter.c
4	*
5	* The background writer (bgwriter) is new as of Postgres 8.0. It attempts
6	* to keep regular backends from having to write out dirty shared buffers
7	* (which they would only do when needing to free a shared buffer to read in
8	* another page). In the best scenario all writes from shared buffers will
9	* be issued by the background writer process. However, regular backends are
10	* still empowered to issue writes if the bgwriter fails to maintain enough
11	* clean shared buffers.
12	*
13	* As of Postgres 9.2 the bgwriter no longer handles checkpoints.
14	*
15	* The bgwriter is started by the postmaster as soon as the startup subprocess
16	* finishes, or as soon as recovery begins if we are doing archive recovery.
17	* It remains alive until the postmaster commands it to terminate.
18	* Normal termination is by SIGTERM, which instructs the bgwriter to exit(0).
19	* Emergency termination is by SIGQUIT; like any backend, the bgwriter will
20	* simply abort and exit on SIGQUIT.
21	*
22	* If the bgwriter exits unexpectedly, the postmaster treats that the same
23	* as a backend crash: shared memory may be corrupted, so remaining backends
24	* should be killed by SIGQUIT and then a recovery cycle started.
25	*
26	*
27	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
28	*
29	*
30	* IDENTIFICATION
31	* src/backend/postmaster/bgwriter.c
32	*
33	*-------------------------------------------------------------------------
34	*/
35	#include "postgres.h"
36
37	#include <signal.h>
38	#include <sys/time.h>
39	#include <unistd.h>
40
41	#include "access/xlog.h"
42	#include "access/xlog_internal.h"
43	#include "libpq/pqsignal.h"
44	#include "miscadmin.h"
45	#include "pgstat.h"
46	#include "postmaster/bgwriter.h"
47	#include "storage/bufmgr.h"
48	#include "storage/buf_internals.h"
49	#include "storage/condition_variable.h"
50	#include "storage/fd.h"
51	#include "storage/ipc.h"
52	#include "storage/lwlock.h"
53	#include "storage/proc.h"
54	#include "storage/shmem.h"
55	#include "storage/smgr.h"
56	#include "storage/spin.h"
57	#include "storage/standby.h"
58	#include "utils/guc.h"
59	#include "utils/memutils.h"
60	#include "utils/resowner.h"
61	#include "utils/timestamp.h"
62
63
64	/*
65	* GUC parameters
66	*/
67	int BgWriterDelay = `200`;
68
69	/*
70	* Multiplier to apply to BgWriterDelay when we decide to hibernate.
71	* (Perhaps this needs to be configurable?)
72	*/
73	#define HIBERNATE_FACTOR 50
74
75	/*
76	* Interval in which standby snapshots are logged into the WAL stream, in
77	* milliseconds.
78	*/
79	#define LOG_SNAPSHOT_INTERVAL_MS 15000
80
81	/*
82	* LSN and timestamp at which we last issued a LogStandbySnapshot(), to avoid
83	* doing so too often or repeatedly if there has been no other write activity
84	* in the system.
85	*/
86	static TimestampTz last_snapshot_ts;
87	static XLogRecPtr last_snapshot_lsn = InvalidXLogRecPtr;
88
89	/*
90	* Flags set by interrupt handlers for later service in the main loop.
91	*/
92	static volatile sig_atomic_t got_SIGHUP = false;
93	static volatile sig_atomic_t shutdown_requested = false;
94
95	/ Signal handlers /
96
97	static void bg_quickdie(SIGNAL_ARGS);
98	static void BgSigHupHandler(SIGNAL_ARGS);
99	static void ReqShutdownHandler(SIGNAL_ARGS);
100	static void bgwriter_sigusr1_handler(SIGNAL_ARGS);
101
102
103	/*
104	* Main entry point for bgwriter process
105	*
106	* This is invoked from AuxiliaryProcessMain, which has already created the
107	* basic execution environment, but not enabled signals yet.
108	*/
109	void
110	BackgroundWriterMain(void)
111	{
112	sigjmp_buf local_sigjmp_buf;
113	MemoryContext bgwriter_context;
114	bool prev_hibernate;
115	WritebackContext wb_context;
116
117	/*
118	* Properly accept or ignore signals the postmaster might send us.
119	*
120	* bgwriter doesn't participate in ProcSignal signalling, but a SIGUSR1
121	* handler is still needed for latch wakeups.
122	*/
123	pqsignal(SIGHUP, BgSigHupHandler); / set flag to read config file /
124	pqsignal(SIGINT, SIG_IGN);
125	pqsignal(SIGTERM, ReqShutdownHandler); / shutdown /
126	pqsignal(SIGQUIT, bg_quickdie); / hard crash time /
127	pqsignal(SIGALRM, SIG_IGN);
128	pqsignal(SIGPIPE, SIG_IGN);
129	pqsignal(SIGUSR1, bgwriter_sigusr1_handler);
130	pqsignal(SIGUSR2, SIG_IGN);
131
132	/*
133	* Reset some signals that are accepted by postmaster but not here
134	*/
135	pqsignal(SIGCHLD, SIG_DFL);
136
137	/ We allow SIGQUIT (quickdie) at all times /
138	sigdelset(&BlockSig, SIGQUIT);
139
140	/*
141	* We just started, assume there has been either a shutdown or
142	* end-of-recovery snapshot.
143	*/
144	last_snapshot_ts = GetCurrentTimestamp();
145
146	/*
147	* Create a memory context that we will do all our work in. We do this so
148	* that we can reset the context during error recovery and thereby avoid
149	* possible memory leaks. Formerly this code just ran in
150	* TopMemoryContext, but resetting that would be a really bad idea.
151	*/
152	bgwriter_context = AllocSetContextCreate(TopMemoryContext,
153	"Background Writer",
154	ALLOCSET_DEFAULT_SIZES);
155	MemoryContextSwitchTo(bgwriter_context);
156
157	WritebackContextInit(&wb_context, &bgwriter_flush_after);
158
159	/*
160	* If an exception is encountered, processing resumes here.
161	*
162	* See notes in postgres.c about the design of this coding.
163	*/
164	if (sigsetjmp(local_sigjmp_buf, `1`) != `0`)
165	{
166	/ Since not using PG_TRY, must reset error stack by hand /
167	error_context_stack = NULL;
168
169	/ Prevent interrupts while cleaning up /
170	HOLD_INTERRUPTS();
171
172	/ Report the error to the server log /
173	EmitErrorReport();
174
175	/*
176	* These operations are really just a minimal subset of
177	* AbortTransaction(). We don't have very many resources to worry
178	* about in bgwriter, but we do have LWLocks, buffers, and temp files.
179	*/
180	LWLockReleaseAll();
181	ConditionVariableCancelSleep();
182	AbortBufferIO();
183	UnlockBuffers();
184	ReleaseAuxProcessResources(false);
185	AtEOXact_Buffers(false);
186	AtEOXact_SMgr();
187	AtEOXact_Files(false);
188	AtEOXact_HashTables(false);
189
190	/*
191	* Now return to normal top-level context and clear ErrorContext for
192	* next time.
193	*/
194	MemoryContextSwitchTo(bgwriter_context);
195	FlushErrorState();
196
197	/ Flush any leaked data in the top-level context /
198	MemoryContextResetAndDeleteChildren(bgwriter_context);
199
200	/ re-initialize to avoid repeated errors causing problems /
201	WritebackContextInit(&wb_context, &bgwriter_flush_after);
202
203	/ Now we can allow interrupts again /
204	RESUME_INTERRUPTS();
205
206	/*
207	* Sleep at least 1 second after any error. A write error is likely
208	* to be repeated, and we don't want to be filling the error logs as
209	* fast as we can.
210	*/
211	pg_usleep(`1000000L`);
212
213	/*
214	* Close all open files after any error. This is helpful on Windows,
215	* where holding deleted files open causes various strange errors.
216	* It's not clear we need it elsewhere, but shouldn't hurt.
217	*/
218	smgrcloseall();
219
220	/ Report wait end here, when there is no further possibility of wait /
221	pgstat_report_wait_end();
222	}
223
224	/ We can now handle ereport(ERROR) /
225	PG_exception_stack = &local_sigjmp_buf;
226
227	/*
228	* Unblock signals (they were blocked when the postmaster forked us)
229	*/
230	PG_SETMASK(&UnBlockSig);
231
232	/*
233	* Reset hibernation state after any error.
234	*/
235	prev_hibernate = false;
236
237	/*
238	* Loop forever
239	*/
240	for (;;)
241	{
242	bool can_hibernate;
243	int rc;
244
245	/ Clear any already-pending wakeups /
246	ResetLatch(MyLatch);
247
248	if (got_SIGHUP)
249	{
250	got_SIGHUP = false;
251	ProcessConfigFile(PGC_SIGHUP);
252	}
253	if (shutdown_requested)
254	{
255	/*
256	* From here on, elog(ERROR) should end with exit(1), not send
257	* control back to the sigsetjmp block above
258	*/
259	ExitOnAnyError = true;
260	/ Normal exit from the bgwriter is here /
261	proc_exit(`0`); / done /
262	}
263
264	/*
265	* Do one cycle of dirty-buffer writing.
266	*/
267	can_hibernate = BgBufferSync(&wb_context);
268
269	/*
270	* Send off activity statistics to the stats collector
271	*/
272	pgstat_send_bgwriter();
273
274	if (FirstCallSinceLastCheckpoint())
275	{
276	/*
277	* After any checkpoint, close all smgr files. This is so we
278	* won't hang onto smgr references to deleted files indefinitely.
279	*/
280	smgrcloseall();
281	}
282
283	/*
284	* Log a new xl_running_xacts every now and then so replication can
285	* get into a consistent state faster (think of suboverflowed
286	* snapshots) and clean up resources (locks, KnownXids*) more
287	* frequently. The costs of this are relatively low, so doing it 4
288	* times (LOG_SNAPSHOT_INTERVAL_MS) a minute seems fine.
289	*
290	* We assume the interval for writing xl_running_xacts is
291	* significantly bigger than BgWriterDelay, so we don't complicate the
292	* overall timeout handling but just assume we're going to get called
293	* often enough even if hibernation mode is active. It's not that
294	* important that log_snap_interval_ms is met strictly. To make sure
295	* we're not waking the disk up unnecessarily on an idle system we
296	* check whether there has been any WAL inserted since the last time
297	* we've logged a running xacts.
298	*
299	* We do this logging in the bgwriter as it is the only process that
300	* is run regularly and returns to its mainloop all the time. E.g.
301	* Checkpointer, when active, is barely ever in its mainloop and thus
302	* makes it hard to log regularly.
303	*/
304	if (XLogStandbyInfoActive() && !RecoveryInProgress())
305	{
306	TimestampTz timeout = `0`;
307	TimestampTz now = GetCurrentTimestamp();
308
309	timeout = TimestampTzPlusMilliseconds(last_snapshot_ts,
310	LOG_SNAPSHOT_INTERVAL_MS);
311
312	/*
313	* Only log if enough time has passed and interesting records have
314	* been inserted since the last snapshot. Have to compare with <=
315	* instead of < because GetLastImportantRecPtr() points at the
316	* start of a record, whereas last_snapshot_lsn points just past
317	* the end of the record.
318	*/
319	if (now >= timeout &&
320	last_snapshot_lsn <= GetLastImportantRecPtr())
321	{
322	last_snapshot_lsn = LogStandbySnapshot();
323	last_snapshot_ts = now;
324	}
325	}
326
327	/*
328	* Sleep until we are signaled or BgWriterDelay has elapsed.
329	*
330	* Note: the feedback control loop in BgBufferSync() expects that we
331	* will call it every BgWriterDelay msec. While it's not critical for
332	* correctness that that be exact, the feedback loop might misbehave
333	* if we stray too far from that. Hence, avoid loading this process
334	* down with latch events that are likely to happen frequently during
335	* normal operation.
336	*/
337	rc = WaitLatch(MyLatch,
338	WL_LATCH_SET \| WL_TIMEOUT \| WL_EXIT_ON_PM_DEATH,
339	BgWriterDelay / ms / , WAIT_EVENT_BGWRITER_MAIN);
340
341	/*
342	* If no latch event and BgBufferSync says nothing's happening, extend
343	* the sleep in "hibernation" mode, where we sleep for much longer
344	* than bgwriter_delay says. Fewer wakeups save electricity. When a
345	* backend starts using buffers again, it will wake us up by setting
346	* our latch. Because the extra sleep will persist only as long as no
347	* buffer allocations happen, this should not distort the behavior of
348	* BgBufferSync's control loop too badly; essentially, it will think
349	* that the system-wide idle interval didn't exist.
350	*
351	* There is a race condition here, in that a backend might allocate a
352	* buffer between the time BgBufferSync saw the alloc count as zero
353	* and the time we call StrategyNotifyBgWriter. While it's not
354	* critical that we not hibernate anyway, we try to reduce the odds of
355	* that by only hibernating when BgBufferSync says nothing's happening
356	* for two consecutive cycles. Also, we mitigate any possible
357	* consequences of a missed wakeup by not hibernating forever.
358	*/
359	if (rc == WL_TIMEOUT && can_hibernate && prev_hibernate)
360	{
361	/ Ask for notification at next buffer allocation /
362	StrategyNotifyBgWriter(MyProc->pgprocno);
363	/ Sleep ... /
364	(void) WaitLatch(MyLatch,
365	WL_LATCH_SET \| WL_TIMEOUT \| WL_EXIT_ON_PM_DEATH,
366	BgWriterDelay * HIBERNATE_FACTOR,
367	WAIT_EVENT_BGWRITER_HIBERNATE);
368	/ Reset the notification request in case we timed out /
369	StrategyNotifyBgWriter(-`1`);
370	}
371
372	prev_hibernate = can_hibernate;
373	}
374	}
375
376
377	/ --------------------------------*
378	* signal handler routines
379	* --------------------------------
380	*/
381
382	/*
383	* bg_quickdie() occurs when signalled SIGQUIT by the postmaster.
384	*
385	* Some backend has bought the farm,
386	* so we need to stop what we're doing and exit.
387	*/
388	static void
389	bg_quickdie(SIGNAL_ARGS)
390	{
391	/*
392	* We DO NOT want to run proc_exit() or atexit() callbacks -- we're here
393	* because shared memory may be corrupted, so we don't want to try to
394	* clean up our transaction. Just nail the windows shut and get out of
395	* town. The callbacks wouldn't be safe to run from a signal handler,
396	* anyway.
397	*
398	* Note we do _exit(2) not _exit(0). This is to force the postmaster into
399	* a system reset cycle if someone sends a manual SIGQUIT to a random
400	* backend. This is necessary precisely because we don't clean up our
401	* shared memory state. (The "dead man switch" mechanism in pmsignal.c
402	* should ensure the postmaster sees this as a crash, too, but no harm in
403	* being doubly sure.)
404	*/
405	_exit(`2`);
406	}
407
408	/ SIGHUP: set flag to re-read config file at next convenient time /
409	static void
410	BgSigHupHandler(SIGNAL_ARGS)
411	{
412	int save_errno = errno;
413
414	got_SIGHUP = true;
415	SetLatch(MyLatch);
416
417	errno = save_errno;
418	}
419
420	/ SIGTERM: set flag to shutdown and exit /
421	static void
422	ReqShutdownHandler(SIGNAL_ARGS)
423	{
424	int save_errno = errno;
425
426	shutdown_requested = true;
427	SetLatch(MyLatch);
428
429	errno = save_errno;
430	}
431
432	/ SIGUSR1: used for latch wakeups /
433	static void
434	bgwriter_sigusr1_handler(SIGNAL_ARGS)
435	{
436	int save_errno = errno;
437
438	latch_sigusr1_handler();
439
440	errno = save_errno;
441	}
442

Browse the source code of PostgreSQL/src/backend/postmaster/bgwriter.c