receivelog.c source code [PostgreSQL/src/bin/pg_basebackup/receivelog.c]

1	/-------------------------------------------------------------------------*
2	*
3	* receivelog.c - receive WAL files using the streaming
4	* replication protocol.
5	*
6	* Author: Magnus Hagander <magnus@hagander.net>
7	*
8	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
9	*
10	* IDENTIFICATION
11	* src/bin/pg_basebackup/receivelog.c
12	*-------------------------------------------------------------------------
13	*/
14
15	#include "postgres_fe.h"
16
17	#include <sys/stat.h>
18	#include <unistd.h>
19	#ifdef HAVE_SYS_SELECT_H
20	#include <sys/select.h>
21	#endif
22
23	/ local includes /
24	#include "receivelog.h"
25	#include "streamutil.h"
26
27	#include "libpq-fe.h"
28	#include "access/xlog_internal.h"
29	#include "common/file_utils.h"
30	#include "common/logging.h"
31
32
33	/ fd and filename for currently open WAL file /
34	static Walfile *walfile = NULL;
35	static char current_walfile_name[MAXPGPATH] = "";
36	static bool reportFlushPosition = false;
37	static XLogRecPtr lastFlushPosition = InvalidXLogRecPtr;
38
39	static bool still_sending = true; / feedback still needs to be sent? /
40
41	static PGresult HandleCopyStream(PGconn conn, StreamCtl *stream,
42	XLogRecPtr *stoppos);
43	static int CopyStreamPoll(PGconn conn, long* timeout_ms, pgsocket stop_socket);
44	static int CopyStreamReceive(PGconn conn, long* timeout, pgsocket stop_socket,
45	char **buffer);
46	static bool ProcessKeepaliveMsg(PGconn conn, StreamCtl stream, char *copybuf,
47	int len, XLogRecPtr blockpos, TimestampTz *last_status);
48	static bool ProcessXLogDataMsg(PGconn conn, StreamCtl stream, char copybuf, int* len,
49	XLogRecPtr *blockpos);
50	static PGresult HandleEndOfCopyStream(PGconn conn, StreamCtl stream, char* *copybuf,
51	XLogRecPtr blockpos, XLogRecPtr *stoppos);
52	static bool CheckCopyStreamStop(PGconn conn, StreamCtl stream, XLogRecPtr blockpos,
53	XLogRecPtr *stoppos);
54	static long CalculateCopyStreamSleeptime(TimestampTz now, int standby_message_timeout,
55	TimestampTz last_status);
56
57	static bool ReadEndOfStreamingResult(PGresult res, XLogRecPtr startpos,
58	uint32 *timeline);
59
60	static bool
61	mark_file_as_archived(StreamCtl stream, const* char *fname)
62	{
63	Walfile *f;
64	static char tmppath[MAXPGPATH];
65
66	snprintf(tmppath, sizeof(tmppath), "archive_status/%s.done",
67	fname);
68
69	f = stream->walmethod->open_for_write(tmppath, NULL, `0`);
70	if (f == NULL)
71	{
72	pg_log_error("could not create archive status file \"%s\": %s",
73	tmppath, stream->walmethod->getlasterror());
74	return false;
75	}
76
77	stream->walmethod->close(f, CLOSE_NORMAL);
78
79	return true;
80	}
81
82	/*
83	* Open a new WAL file in the specified directory.
84	*
85	* Returns true if OK; on failure, returns false after printing an error msg.
86	* On success, 'walfile' is set to the FD for the file, and the base filename
87	* (without partial_suffix) is stored in 'current_walfile_name'.
88	*
89	* The file will be padded to 16Mb with zeroes.
90	*/
91	static bool
92	open_walfile(StreamCtl *stream, XLogRecPtr startpoint)
93	{
94	Walfile *f;
95	char fn[MAXPGPATH];
96	ssize_t size;
97	XLogSegNo segno;
98
99	XLByteToSeg(startpoint, segno, WalSegSz);
100	XLogFileName(current_walfile_name, stream->timeline, segno, WalSegSz);
101
102	snprintf(fn, sizeof(fn), "%s%s", current_walfile_name,
103	stream->partial_suffix ? stream->partial_suffix : "");
104
105	/*
106	* When streaming to files, if an existing file exists we verify that it's
107	* either empty (just created), or a complete WalSegSz segment (in which
108	* case it has been created and padded). Anything else indicates a corrupt
109	* file.
110	*
111	* When streaming to tar, no file with this name will exist before, so we
112	* never have to verify a size.
113	*/
114	if (stream->walmethod->existsfile(fn))
115	{
116	size = stream->walmethod->get_file_size(fn);
117	if (size < `0`)
118	{
119	pg_log_error("could not get size of write-ahead log file \"%s\": %s",
120	fn, stream->walmethod->getlasterror());
121	return false;
122	}
123	if (size == WalSegSz)
124	{
125	/ Already padded file. Open it for use /
126	f = stream->walmethod->open_for_write(current_walfile_name, stream->partial_suffix, `0`);
127	if (f == NULL)
128	{
129	pg_log_error("could not open existing write-ahead log file \"%s\": %s",
130	fn, stream->walmethod->getlasterror());
131	return false;
132	}
133
134	/ fsync file in case of a previous crash /
135	if (stream->walmethod->sync(f) != `0`)
136	{
137	pg_log_error("could not fsync existing write-ahead log file \"%s\": %s",
138	fn, stream->walmethod->getlasterror());
139	stream->walmethod->close(f, CLOSE_UNLINK);
140	return false;
141	}
142
143	walfile = f;
144	return true;
145	}
146	if (size != `0`)
147	{
148	/ if write didn't set errno, assume problem is no disk space /
149	if (errno == `0`)
150	errno = ENOSPC;
151	pg_log_error(ngettext("write-ahead log file \"%s\" has %d byte, should be 0 or %d",
152	"write-ahead log file \"%s\" has %d bytes, should be 0 or %d",
153	size),
154	fn, (int) size, WalSegSz);
155	return false;
156	}
157	/ File existed and was empty, so fall through and open /
158	}
159
160	/ No file existed, so create one /
161
162	f = stream->walmethod->open_for_write(current_walfile_name,
163	stream->partial_suffix, WalSegSz);
164	if (f == NULL)
165	{
166	pg_log_error("could not open write-ahead log file \"%s\": %s",
167	fn, stream->walmethod->getlasterror());
168	return false;
169	}
170
171	walfile = f;
172	return true;
173	}
174
175	/*
176	* Close the current WAL file (if open), and rename it to the correct
177	* filename if it's complete. On failure, prints an error message to stderr
178	* and returns false, otherwise returns true.
179	*/
180	static bool
181	close_walfile(StreamCtl *stream, XLogRecPtr pos)
182	{
183	off_t currpos;
184	int r;
185
186	if (walfile == NULL)
187	return true;
188
189	currpos = stream->walmethod->get_current_pos(walfile);
190	if (currpos == -`1`)
191	{
192	pg_log_error("could not determine seek position in file \"%s\": %s",
193	current_walfile_name, stream->walmethod->getlasterror());
194	stream->walmethod->close(walfile, CLOSE_UNLINK);
195	walfile = NULL;
196
197	return false;
198	}
199
200	if (stream->partial_suffix)
201	{
202	if (currpos == WalSegSz)
203	r = stream->walmethod->close(walfile, CLOSE_NORMAL);
204	else
205	{
206	pg_log_info("not renaming \"%s%s\", segment is not complete",
207	current_walfile_name, stream->partial_suffix);
208	r = stream->walmethod->close(walfile, CLOSE_NO_RENAME);
209	}
210	}
211	else
212	r = stream->walmethod->close(walfile, CLOSE_NORMAL);
213
214	walfile = NULL;
215
216	if (r != `0`)
217	{
218	pg_log_error("could not close file \"%s\": %s",
219	current_walfile_name, stream->walmethod->getlasterror());
220	return false;
221	}
222
223	/*
224	* Mark file as archived if requested by the caller - pg_basebackup needs
225	* to do so as files can otherwise get archived again after promotion of a
226	* new node. This is in line with walreceiver.c always doing a
227	* XLogArchiveForceDone() after a complete segment.
228	*/
229	if (currpos == WalSegSz && stream->mark_done)
230	{
231	/ writes error message if failed /
232	if (!mark_file_as_archived(stream, current_walfile_name))
233	return false;
234	}
235
236	lastFlushPosition = pos;
237	return true;
238	}
239
240
241	/*
242	* Check if a timeline history file exists.
243	*/
244	static bool
245	existsTimeLineHistoryFile(StreamCtl *stream)
246	{
247	char histfname[MAXFNAMELEN];
248
249	/*
250	* Timeline 1 never has a history file. We treat that as if it existed,
251	* since we never need to stream it.
252	*/
253	if (stream->timeline == `1`)
254	return true;
255
256	TLHistoryFileName(histfname, stream->timeline);
257
258	return stream->walmethod->existsfile(histfname);
259	}
260
261	static bool
262	writeTimeLineHistoryFile(StreamCtl stream, char* filename, char* *content)
263	{
264	int size = strlen(content);
265	char histfname[MAXFNAMELEN];
266	Walfile *f;
267
268	/*
269	* Check that the server's idea of how timeline history files should be
270	* named matches ours.
271	*/
272	TLHistoryFileName(histfname, stream->timeline);
273	if (strcmp(histfname, filename) != `0`)
274	{
275	pg_log_error("server reported unexpected history file name for timeline %u: %s",
276	stream->timeline, filename);
277	return false;
278	}
279
280	f = stream->walmethod->open_for_write(histfname, ".tmp", `0`);
281	if (f == NULL)
282	{
283	pg_log_error("could not create timeline history file \"%s\": %s",
284	histfname, stream->walmethod->getlasterror());
285	return false;
286	}
287
288	if ((int) stream->walmethod->write(f, content, size) != size)
289	{
290	pg_log_error("could not write timeline history file \"%s\": %s",
291	histfname, stream->walmethod->getlasterror());
292
293	/*
294	* If we fail to make the file, delete it to release disk space
295	*/
296	stream->walmethod->close(f, CLOSE_UNLINK);
297
298	return false;
299	}
300
301	if (stream->walmethod->close(f, CLOSE_NORMAL) != `0`)
302	{
303	pg_log_error("could not close file \"%s\": %s",
304	histfname, stream->walmethod->getlasterror());
305	return false;
306	}
307
308	/ Maintain archive_status, check close_walfile() for details. /
309	if (stream->mark_done)
310	{
311	/ writes error message if failed /
312	if (!mark_file_as_archived(stream, histfname))
313	return false;
314	}
315
316	return true;
317	}
318
319	/*
320	* Send a Standby Status Update message to server.
321	*/
322	static bool
323	sendFeedback(PGconn *conn, XLogRecPtr blockpos, TimestampTz now, bool replyRequested)
324	{
325	char replybuf[`1` + `8` + `8` + `8` + `8` + `1`];
326	int len = `0`;
327
328	replybuf[len] = `'r'`;
329	len += `1`;
330	fe_sendint64(blockpos, &replybuf[len]); / write /
331	len += `8`;
332	if (reportFlushPosition)
333	fe_sendint64(lastFlushPosition, &replybuf[len]); / flush /
334	else
335	fe_sendint64(InvalidXLogRecPtr, &replybuf[len]); / flush /
336	len += `8`;
337	fe_sendint64(InvalidXLogRecPtr, &replybuf[len]); / apply /
338	len += `8`;
339	fe_sendint64(now, &replybuf[len]); / sendTime /
340	len += `8`;
341	replybuf[len] = replyRequested ? `1` : `0`; / replyRequested /
342	len += `1`;
343
344	if (PQputCopyData(conn, replybuf, len) <= `0` \|\| PQflush(conn))
345	{
346	pg_log_error("could not send feedback packet: %s",
347	PQerrorMessage(conn));
348	return false;
349	}
350
351	return true;
352	}
353
354	/*
355	* Check that the server version we're connected to is supported by
356	* ReceiveXlogStream().
357	*
358	* If it's not, an error message is printed to stderr, and false is returned.
359	*/
360	bool
361	CheckServerVersionForStreaming(PGconn *conn)
362	{
363	int minServerMajor,
364	maxServerMajor;
365	int serverMajor;
366
367	/*
368	* The message format used in streaming replication changed in 9.3, so we
369	* cannot stream from older servers. And we don't support servers newer
370	* than the client; it might work, but we don't know, so err on the safe
371	* side.
372	*/
373	minServerMajor = `903`;
374	maxServerMajor = PG_VERSION_NUM / `100`;
375	serverMajor = PQserverVersion(conn) / `100`;
376	if (serverMajor < minServerMajor)
377	{
378	const char *serverver = PQparameterStatus(conn, "server_version");
379
380	pg_log_error("incompatible server version %s; client does not support streaming from server versions older than %s",
381	serverver ? serverver : "'unknown'",
382	"9.3");
383	return false;
384	}
385	else if (serverMajor > maxServerMajor)
386	{
387	const char *serverver = PQparameterStatus(conn, "server_version");
388
389	pg_log_error("incompatible server version %s; client does not support streaming from server versions newer than %s",
390	serverver ? serverver : "'unknown'",
391	PG_VERSION);
392	return false;
393	}
394	return true;
395	}
396
397	/*
398	* Receive a log stream starting at the specified position.
399	*
400	* Individual parameters are passed through the StreamCtl structure.
401	*
402	* If sysidentifier is specified, validate that both the system
403	* identifier and the timeline matches the specified ones
404	* (by sending an extra IDENTIFY_SYSTEM command)
405	*
406	* All received segments will be written to the directory
407	* specified by basedir. This will also fetch any missing timeline history
408	* files.
409	*
410	* The stream_stop callback will be called every time data
411	* is received, and whenever a segment is completed. If it returns
412	* true, the streaming will stop and the function
413	* return. As long as it returns false, streaming will continue
414	* indefinitely.
415	*
416	* If stream_stop() checks for external input, stop_socket should be set to
417	* the FD it checks. This will allow such input to be detected promptly
418	* rather than after standby_message_timeout (which might be indefinite).
419	* Note that signals will interrupt waits for input as well, but that is
420	* race-y since a signal received while busy won't interrupt the wait.
421	*
422	* standby_message_timeout controls how often we send a message
423	* back to the master letting it know our progress, in milliseconds.
424	* Zero means no messages are sent.
425	* This message will only contain the write location, and never
426	* flush or replay.
427	*
428	* If 'partial_suffix' is not NULL, files are initially created with the
429	* given suffix, and the suffix is removed once the file is finished. That
430	* allows you to tell the difference between partial and completed files,
431	* so that you can continue later where you left.
432	*
433	* If 'synchronous' is true, the received WAL is flushed as soon as written,
434	* otherwise only when the WAL file is closed.
435	*
436	* Note: The WAL location must be at a log segment start!
437	*/
438	bool
439	ReceiveXlogStream(PGconn conn, StreamCtl stream)
440	{
441	char query[`128`];
442	char slotcmd[`128`];
443	PGresult *res;
444	XLogRecPtr stoppos;
445
446	/*
447	* The caller should've checked the server version already, but doesn't do
448	* any harm to check it here too.
449	*/
450	if (!CheckServerVersionForStreaming(conn))
451	return false;
452
453	/*
454	* Decide whether we want to report the flush position. If we report the
455	* flush position, the primary will know what WAL we'll possibly
456	* re-request, and it can then remove older WAL safely. We must always do
457	* that when we are using slots.
458	*
459	* Reporting the flush position makes one eligible as a synchronous
460	* replica. People shouldn't include generic names in
461	* synchronous_standby_names, but we've protected them against it so far,
462	* so let's continue to do so unless specifically requested.
463	*/
464	if (stream->replication_slot != NULL)
465	{
466	reportFlushPosition = true;
467	sprintf(slotcmd, "SLOT \"%s\" ", stream->replication_slot);
468	}
469	else
470	{
471	if (stream->synchronous)
472	reportFlushPosition = true;
473	else
474	reportFlushPosition = false;
475	slotcmd[`0`] = `0`;
476	}
477
478	if (stream->sysidentifier != NULL)
479	{
480	/ Validate system identifier hasn't changed /
481	res = PQexec(conn, "IDENTIFY_SYSTEM");
482	if (PQresultStatus(res) != PGRES_TUPLES_OK)
483	{
484	pg_log_error("could not send replication command \"%s\": %s",
485	"IDENTIFY_SYSTEM", PQerrorMessage(conn));
486	PQclear(res);
487	return false;
488	}
489	if (PQntuples(res) != `1` \|\| PQnfields(res) < `3`)
490	{
491	pg_log_error("could not identify system: got %d rows and %d fields, expected %d rows and %d or more fields",
492	PQntuples(res), PQnfields(res), `1`, `3`);
493	PQclear(res);
494	return false;
495	}
496	if (strcmp(stream->sysidentifier, PQgetvalue(res, `0`, `0`)) != `0`)
497	{
498	pg_log_error("system identifier does not match between base backup and streaming connection");
499	PQclear(res);
500	return false;
501	}
502	if (stream->timeline > atoi(PQgetvalue(res, `0`, `1`)))
503	{
504	pg_log_error("starting timeline %u is not present in the server",
505	stream->timeline);
506	PQclear(res);
507	return false;
508	}
509	PQclear(res);
510	}
511
512	/*
513	* initialize flush position to starting point, it's the caller's
514	* responsibility that that's sane.
515	*/
516	lastFlushPosition = stream->startpos;
517
518	while (`1`)
519	{
520	/*
521	* Fetch the timeline history file for this timeline, if we don't have
522	* it already. When streaming log to tar, this will always return
523	* false, as we are never streaming into an existing file and
524	* therefore there can be no pre-existing timeline history file.
525	*/
526	if (!existsTimeLineHistoryFile(stream))
527	{
528	snprintf(query, sizeof(query), "TIMELINE_HISTORY %u", stream->timeline);
529	res = PQexec(conn, query);
530	if (PQresultStatus(res) != PGRES_TUPLES_OK)
531	{
532	/ FIXME: we might send it ok, but get an error /
533	pg_log_error("could not send replication command \"%s\": %s",
534	"TIMELINE_HISTORY", PQresultErrorMessage(res));
535	PQclear(res);
536	return false;
537	}
538
539	/*
540	* The response to TIMELINE_HISTORY is a single row result set
541	* with two fields: filename and content
542	*/
543	if (PQnfields(res) != `2` \|\| PQntuples(res) != `1`)
544	{
545	pg_log_warning("unexpected response to TIMELINE_HISTORY command: got %d rows and %d fields, expected %d rows and %d fields",
546	PQntuples(res), PQnfields(res), `1`, `2`);
547	}
548
549	/ Write the history file to disk /
550	writeTimeLineHistoryFile(stream,
551	PQgetvalue(res, `0`, `0`),
552	PQgetvalue(res, `0`, `1`));
553
554	PQclear(res);
555	}
556
557	/*
558	* Before we start streaming from the requested location, check if the
559	* callback tells us to stop here.
560	*/
561	if (stream->stream_stop(stream->startpos, stream->timeline, false))
562	return true;
563
564	/ Initiate the replication stream at specified location /
565	snprintf(query, sizeof(query), "START_REPLICATION %s%X/%X TIMELINE %u",
566	slotcmd,
567	(uint32) (stream->startpos >> `32`), (uint32) stream->startpos,
568	stream->timeline);
569	res = PQexec(conn, query);
570	if (PQresultStatus(res) != PGRES_COPY_BOTH)
571	{
572	pg_log_error("could not send replication command \"%s\": %s",
573	"START_REPLICATION", PQresultErrorMessage(res));
574	PQclear(res);
575	return false;
576	}
577	PQclear(res);
578
579	/ Stream the WAL /
580	res = HandleCopyStream(conn, stream, &stoppos);
581	if (res == NULL)
582	goto error;
583
584	/*
585	* Streaming finished.
586	*
587	* There are two possible reasons for that: a controlled shutdown, or
588	* we reached the end of the current timeline. In case of
589	* end-of-timeline, the server sends a result set after Copy has
590	* finished, containing information about the next timeline. Read
591	* that, and restart streaming from the next timeline. In case of
592	* controlled shutdown, stop here.
593	*/
594	if (PQresultStatus(res) == PGRES_TUPLES_OK)
595	{
596	/*
597	* End-of-timeline. Read the next timeline's ID and starting
598	* position. Usually, the starting position will match the end of
599	* the previous timeline, but there are corner cases like if the
600	* server had sent us half of a WAL record, when it was promoted.
601	* The new timeline will begin at the end of the last complete
602	* record in that case, overlapping the partial WAL record on the
603	* old timeline.
604	*/
605	uint32 newtimeline;
606	bool parsed;
607
608	parsed = ReadEndOfStreamingResult(res, &stream->startpos, &newtimeline);
609	PQclear(res);
610	if (!parsed)
611	goto error;
612
613	/ Sanity check the values the server gave us /
614	if (newtimeline <= stream->timeline)
615	{
616	pg_log_error("server reported unexpected next timeline %u, following timeline %u",
617	newtimeline, stream->timeline);
618	goto error;
619	}
620	if (stream->startpos > stoppos)
621	{
622	pg_log_error("server stopped streaming timeline %u at %X/%X, but reported next timeline %u to begin at %X/%X",
623	stream->timeline, (uint32) (stoppos >> `32`), (uint32) stoppos,
624	newtimeline, (uint32) (stream->startpos >> `32`), (uint32) stream->startpos);
625	goto error;
626	}
627
628	/ Read the final result, which should be CommandComplete. /
629	res = PQgetResult(conn);
630	if (PQresultStatus(res) != PGRES_COMMAND_OK)
631	{
632	pg_log_error("unexpected termination of replication stream: %s",
633	PQresultErrorMessage(res));
634	PQclear(res);
635	goto error;
636	}
637	PQclear(res);
638
639	/*
640	* Loop back to start streaming from the new timeline. Always
641	* start streaming at the beginning of a segment.
642	*/
643	stream->timeline = newtimeline;
644	stream->startpos = stream->startpos -
645	XLogSegmentOffset(stream->startpos, WalSegSz);
646	continue;
647	}
648	else if (PQresultStatus(res) == PGRES_COMMAND_OK)
649	{
650	PQclear(res);
651
652	/*
653	* End of replication (ie. controlled shut down of the server).
654	*
655	* Check if the callback thinks it's OK to stop here. If not,
656	* complain.
657	*/
658	if (stream->stream_stop(stoppos, stream->timeline, false))
659	return true;
660	else
661	{
662	pg_log_error("replication stream was terminated before stop point");
663	goto error;
664	}
665	}
666	else
667	{
668	/ Server returned an error. /
669	pg_log_error("unexpected termination of replication stream: %s",
670	PQresultErrorMessage(res));
671	PQclear(res);
672	goto error;
673	}
674	}
675
676	error:
677	if (walfile != NULL && stream->walmethod->close(walfile, CLOSE_NO_RENAME) != `0`)
678	pg_log_error("could not close file \"%s\": %s",
679	current_walfile_name, stream->walmethod->getlasterror());
680	walfile = NULL;
681	return false;
682	}
683
684	/*
685	* Helper function to parse the result set returned by server after streaming
686	* has finished. On failure, prints an error to stderr and returns false.
687	*/
688	static bool
689	ReadEndOfStreamingResult(PGresult res, XLogRecPtr startpos, uint32 *timeline)
690	{
691	uint32 startpos_xlogid,
692	startpos_xrecoff;
693
694	/----------*
695	* The result set consists of one row and two columns, e.g:
696	*
697	* next_tli \| next_tli_startpos
698	* ----------+-------------------
699	* 4 \| 0/9949AE0
700	*
701	* next_tli is the timeline ID of the next timeline after the one that
702	* just finished streaming. next_tli_startpos is the WAL location where
703	* the server switched to it.
704	*----------
705	*/
706	if (PQnfields(res) < `2` \|\| PQntuples(res) != `1`)
707	{
708	pg_log_error("unexpected result set after end-of-timeline: got %d rows and %d fields, expected %d rows and %d fields",
709	PQntuples(res), PQnfields(res), `1`, `2`);
710	return false;
711	}
712
713	*timeline = atoi(PQgetvalue(res, `0`, `0`));
714	if (sscanf(PQgetvalue(res, `0`, `1`), "%X/%X", &startpos_xlogid,
715	&startpos_xrecoff) != `2`)
716	{
717	pg_log_error("could not parse next timeline's starting point \"%s\"",
718	PQgetvalue(res, `0`, `1`));
719	return false;
720	}
721	*startpos = ((uint64) startpos_xlogid << `32`) \| startpos_xrecoff;
722
723	return true;
724	}
725
726	/*
727	* The main loop of ReceiveXlogStream. Handles the COPY stream after
728	* initiating streaming with the START_REPLICATION command.
729	*
730	* If the COPY ends (not necessarily successfully) due a message from the
731	* server, returns a PGresult and sets *stoppos to the last byte written.
732	* On any other sort of error, returns NULL.
733	*/
734	static PGresult *
735	HandleCopyStream(PGconn conn, StreamCtl stream,
736	XLogRecPtr *stoppos)
737	{
738	char *copybuf = NULL;
739	TimestampTz last_status = -`1`;
740	XLogRecPtr blockpos = stream->startpos;
741
742	still_sending = true;
743
744	while (`1`)
745	{
746	int r;
747	TimestampTz now;
748	long sleeptime;
749
750	/*
751	* Check if we should continue streaming, or abort at this point.
752	*/
753	if (!CheckCopyStreamStop(conn, stream, blockpos, stoppos))
754	goto error;
755
756	now = feGetCurrentTimestamp();
757
758	/*
759	* If synchronous option is true, issue sync command as soon as there
760	* are WAL data which has not been flushed yet.
761	*/
762	if (stream->synchronous && lastFlushPosition < blockpos && walfile != NULL)
763	{
764	if (stream->walmethod->sync(walfile) != `0`)
765	{
766	pg_log_error("could not fsync file \"%s\": %s",
767	current_walfile_name, stream->walmethod->getlasterror());
768	goto error;
769	}
770	lastFlushPosition = blockpos;
771
772	/*
773	* Send feedback so that the server sees the latest WAL locations
774	* immediately.
775	*/
776	if (!sendFeedback(conn, blockpos, now, false))
777	goto error;
778	last_status = now;
779	}
780
781	/*
782	* Potentially send a status message to the master
783	*/
784	if (still_sending && stream->standby_message_timeout > `0` &&
785	feTimestampDifferenceExceeds(last_status, now,
786	stream->standby_message_timeout))
787	{
788	/ Time to send feedback! /
789	if (!sendFeedback(conn, blockpos, now, false))
790	goto error;
791	last_status = now;
792	}
793
794	/*
795	* Calculate how long send/receive loops should sleep
796	*/
797	sleeptime = CalculateCopyStreamSleeptime(now, stream->standby_message_timeout,
798	last_status);
799
800	r = CopyStreamReceive(conn, sleeptime, stream->stop_socket, &copybuf);
801	while (r != `0`)
802	{
803	if (r == -`1`)
804	goto error;
805	if (r == -`2`)
806	{
807	PGresult *res = HandleEndOfCopyStream(conn, stream, copybuf, blockpos, stoppos);
808
809	if (res == NULL)
810	goto error;
811	else
812	return res;
813	}
814
815	/ Check the message type. /
816	if (copybuf[`0`] == `'k'`)
817	{
818	if (!ProcessKeepaliveMsg(conn, stream, copybuf, r, blockpos,
819	&last_status))
820	goto error;
821	}
822	else if (copybuf[`0`] == `'w'`)
823	{
824	if (!ProcessXLogDataMsg(conn, stream, copybuf, r, &blockpos))
825	goto error;
826
827	/*
828	* Check if we should continue streaming, or abort at this
829	* point.
830	*/
831	if (!CheckCopyStreamStop(conn, stream, blockpos, stoppos))
832	goto error;
833	}
834	else
835	{
836	pg_log_error("unrecognized streaming header: \"%c\"",
837	copybuf[`0`]);
838	goto error;
839	}
840
841	/*
842	* Process the received data, and any subsequent data we can read
843	* without blocking.
844	*/
845	r = CopyStreamReceive(conn, `0`, stream->stop_socket, &copybuf);
846	}
847	}
848
849	error:
850	if (copybuf != NULL)
851	PQfreemem(copybuf);
852	return NULL;
853	}
854
855	/*
856	* Wait until we can read a CopyData message,
857	* or timeout, or occurrence of a signal or input on the stop_socket.
858	* (timeout_ms < 0 means wait indefinitely; 0 means don't wait.)
859	*
860	* Returns 1 if data has become available for reading, 0 if timed out
861	* or interrupted by signal or stop_socket input, and -1 on an error.
862	*/
863	static int
864	CopyStreamPoll(PGconn conn, long* timeout_ms, pgsocket stop_socket)
865	{
866	int ret;
867	fd_set input_mask;
868	int connsocket;
869	int maxfd;
870	struct timeval timeout;
871	struct timeval *timeoutptr;
872
873	connsocket = PQsocket(conn);
874	if (connsocket < `0`)
875	{
876	pg_log_error("invalid socket: %s", PQerrorMessage(conn));
877	return -`1`;
878	}
879
880	FD_ZERO(&input_mask);
881	FD_SET(connsocket, &input_mask);
882	maxfd = connsocket;
883	if (stop_socket != PGINVALID_SOCKET)
884	{
885	FD_SET(stop_socket, &input_mask);
886	maxfd = Max(maxfd, stop_socket);
887	}
888
889	if (timeout_ms < `0`)
890	timeoutptr = NULL;
891	else
892	{
893	timeout.tv_sec = timeout_ms / `1000L`;
894	timeout.tv_usec = (timeout_ms % `1000L`) * `1000L`;
895	timeoutptr = &timeout;
896	}
897
898	ret = select(maxfd + `1`, &input_mask, NULL, NULL, timeoutptr);
899
900	if (ret < `0`)
901	{
902	if (errno == EINTR)
903	return `0`; / Got a signal, so not an error /
904	pg_log_error("select() failed: %m");
905	return -`1`;
906	}
907	if (ret > `0` && FD_ISSET(connsocket, &input_mask))
908	return `1`; / Got input on connection socket /
909
910	return `0`; / Got timeout or input on stop_socket /
911	}
912
913	/*
914	* Receive CopyData message available from XLOG stream, blocking for
915	* maximum of 'timeout' ms.
916	*
917	* If data was received, returns the length of the data. *buffer is set to
918	* point to a buffer holding the received message. The buffer is only valid
919	* until the next CopyStreamReceive call.
920	*
921	* Returns 0 if no data was available within timeout, or if wait was
922	* interrupted by signal or stop_socket input.
923	* -1 on error. -2 if the server ended the COPY.
924	*/
925	static int
926	CopyStreamReceive(PGconn conn, long* timeout, pgsocket stop_socket,
927	char **buffer)
928	{
929	char *copybuf = NULL;
930	int rawlen;
931
932	if (*buffer != NULL)
933	PQfreemem(*buffer);
934	*buffer = NULL;
935
936	/ Try to receive a CopyData message /
937	rawlen = PQgetCopyData(conn, &copybuf, `1`);
938	if (rawlen == `0`)
939	{
940	int ret;
941
942	/*
943	* No data available. Wait for some to appear, but not longer than
944	* the specified timeout, so that we can ping the server. Also stop
945	* waiting if input appears on stop_socket.
946	*/
947	ret = CopyStreamPoll(conn, timeout, stop_socket);
948	if (ret <= `0`)
949	return ret;
950
951	/ Now there is actually data on the socket /
952	if (PQconsumeInput(conn) == `0`)
953	{
954	pg_log_error("could not receive data from WAL stream: %s",
955	PQerrorMessage(conn));
956	return -`1`;
957	}
958
959	/ Now that we've consumed some input, try again /
960	rawlen = PQgetCopyData(conn, &copybuf, `1`);
961	if (rawlen == `0`)
962	return `0`;
963	}
964	if (rawlen == -`1`) / end-of-streaming or error /
965	return -`2`;
966	if (rawlen == -`2`)
967	{
968	pg_log_error("could not read COPY data: %s", PQerrorMessage(conn));
969	return -`1`;
970	}
971
972	/ Return received messages to caller /
973	*buffer = copybuf;
974	return rawlen;
975	}
976
977	/*
978	* Process the keepalive message.
979	*/
980	static bool
981	ProcessKeepaliveMsg(PGconn conn, StreamCtl stream, char copybuf, int* len,
982	XLogRecPtr blockpos, TimestampTz *last_status)
983	{
984	int pos;
985	bool replyRequested;
986	TimestampTz now;
987
988	/*
989	* Parse the keepalive message, enclosed in the CopyData message. We just
990	* check if the server requested a reply, and ignore the rest.
991	*/
992	pos = `1`; / skip msgtype 'k' /
993	pos += `8`; / skip walEnd /
994	pos += `8`; / skip sendTime /
995
996	if (len < pos + `1`)
997	{
998	pg_log_error("streaming header too small: %d", len);
999	return false;
1000	}
1001	replyRequested = copybuf[pos];
1002
1003	/ If the server requested an immediate reply, send one. /
1004	if (replyRequested && still_sending)
1005	{
1006	if (reportFlushPosition && lastFlushPosition < blockpos &&
1007	walfile != NULL)
1008	{
1009	/*
1010	* If a valid flush location needs to be reported, flush the
1011	* current WAL file so that the latest flush location is sent back
1012	* to the server. This is necessary to see whether the last WAL
1013	* data has been successfully replicated or not, at the normal
1014	* shutdown of the server.
1015	*/
1016	if (stream->walmethod->sync(walfile) != `0`)
1017	{
1018	pg_log_error("could not fsync file \"%s\": %s",
1019	current_walfile_name, stream->walmethod->getlasterror());
1020	return false;
1021	}
1022	lastFlushPosition = blockpos;
1023	}
1024
1025	now = feGetCurrentTimestamp();
1026	if (!sendFeedback(conn, blockpos, now, false))
1027	return false;
1028	*last_status = now;
1029	}
1030
1031	return true;
1032	}
1033
1034	/*
1035	* Process XLogData message.
1036	*/
1037	static bool
1038	ProcessXLogDataMsg(PGconn conn, StreamCtl stream, char copybuf, int* len,
1039	XLogRecPtr *blockpos)
1040	{
1041	int xlogoff;
1042	int bytes_left;
1043	int bytes_written;
1044	int hdr_len;
1045
1046	/*
1047	* Once we've decided we don't want to receive any more, just ignore any
1048	* subsequent XLogData messages.
1049	*/
1050	if (!(still_sending))
1051	return true;
1052
1053	/*
1054	* Read the header of the XLogData message, enclosed in the CopyData
1055	* message. We only need the WAL location field (dataStart), the rest of
1056	* the header is ignored.
1057	*/
1058	hdr_len = `1`; / msgtype 'w' /
1059	hdr_len += `8`; / dataStart /
1060	hdr_len += `8`; / walEnd /
1061	hdr_len += `8`; / sendTime /
1062	if (len < hdr_len)
1063	{
1064	pg_log_error("streaming header too small: %d", len);
1065	return false;
1066	}
1067	*blockpos = fe_recvint64(&copybuf[`1`]);
1068
1069	/ Extract WAL location for this block /
1070	xlogoff = XLogSegmentOffset(*blockpos, WalSegSz);
1071
1072	/*
1073	* Verify that the initial location in the stream matches where we think
1074	* we are.
1075	*/
1076	if (walfile == NULL)
1077	{
1078	/ No file open yet /
1079	if (xlogoff != `0`)
1080	{
1081	pg_log_error("received write-ahead log record for offset %u with no file open",
1082	xlogoff);
1083	return false;
1084	}
1085	}
1086	else
1087	{
1088	/ More data in existing segment /
1089	if (stream->walmethod->get_current_pos(walfile) != xlogoff)
1090	{
1091	pg_log_error("got WAL data offset %08x, expected %08x",
1092	xlogoff, (int) stream->walmethod->get_current_pos(walfile));
1093	return false;
1094	}
1095	}
1096
1097	bytes_left = len - hdr_len;
1098	bytes_written = `0`;
1099
1100	while (bytes_left)
1101	{
1102	int bytes_to_write;
1103
1104	/*
1105	* If crossing a WAL boundary, only write up until we reach wal
1106	* segment size.
1107	*/
1108	if (xlogoff + bytes_left > WalSegSz)
1109	bytes_to_write = WalSegSz - xlogoff;
1110	else
1111	bytes_to_write = bytes_left;
1112
1113	if (walfile == NULL)
1114	{
1115	if (!open_walfile(stream, *blockpos))
1116	{
1117	/ Error logged by open_walfile /
1118	return false;
1119	}
1120	}
1121
1122	if (stream->walmethod->write(walfile, copybuf + hdr_len + bytes_written,
1123	bytes_to_write) != bytes_to_write)
1124	{
1125	pg_log_error("could not write %u bytes to WAL file \"%s\": %s",
1126	bytes_to_write, current_walfile_name,
1127	stream->walmethod->getlasterror());
1128	return false;
1129	}
1130
1131	/ Write was successful, advance our position /
1132	bytes_written += bytes_to_write;
1133	bytes_left -= bytes_to_write;
1134	*blockpos += bytes_to_write;
1135	xlogoff += bytes_to_write;
1136
1137	/ Did we reach the end of a WAL segment? /
1138	if (XLogSegmentOffset(*blockpos, WalSegSz) == `0`)
1139	{
1140	if (!close_walfile(stream, *blockpos))
1141	/ Error message written in close_walfile() /
1142	return false;
1143
1144	xlogoff = `0`;
1145
1146	if (still_sending && stream->stream_stop(*blockpos, stream->timeline, true))
1147	{
1148	if (PQputCopyEnd(conn, NULL) <= `0` \|\| PQflush(conn))
1149	{
1150	pg_log_error("could not send copy-end packet: %s",
1151	PQerrorMessage(conn));
1152	return false;
1153	}
1154	still_sending = false;
1155	return true; / ignore the rest of this XLogData packet /
1156	}
1157	}
1158	}
1159	/ No more data left to write, receive next copy packet /
1160
1161	return true;
1162	}
1163
1164	/*
1165	* Handle end of the copy stream.
1166	*/
1167	static PGresult *
1168	HandleEndOfCopyStream(PGconn conn, StreamCtl stream, char *copybuf,
1169	XLogRecPtr blockpos, XLogRecPtr *stoppos)
1170	{
1171	PGresult *res = PQgetResult(conn);
1172
1173	/*
1174	* The server closed its end of the copy stream. If we haven't closed
1175	* ours already, we need to do so now, unless the server threw an error,
1176	* in which case we don't.
1177	*/
1178	if (still_sending)
1179	{
1180	if (!close_walfile(stream, blockpos))
1181	{
1182	/ Error message written in close_walfile() /
1183	PQclear(res);
1184	return NULL;
1185	}
1186	if (PQresultStatus(res) == PGRES_COPY_IN)
1187	{
1188	if (PQputCopyEnd(conn, NULL) <= `0` \|\| PQflush(conn))
1189	{
1190	pg_log_error("could not send copy-end packet: %s",
1191	PQerrorMessage(conn));
1192	PQclear(res);
1193	return NULL;
1194	}
1195	res = PQgetResult(conn);
1196	}
1197	still_sending = false;
1198	}
1199	if (copybuf != NULL)
1200	PQfreemem(copybuf);
1201	*stoppos = blockpos;
1202	return res;
1203	}
1204
1205	/*
1206	* Check if we should continue streaming, or abort at this point.
1207	*/
1208	static bool
1209	CheckCopyStreamStop(PGconn conn, StreamCtl stream, XLogRecPtr blockpos,
1210	XLogRecPtr *stoppos)
1211	{
1212	if (still_sending && stream->stream_stop(blockpos, stream->timeline, false))
1213	{
1214	if (!close_walfile(stream, blockpos))
1215	{
1216	/ Potential error message is written by close_walfile /
1217	return false;
1218	}
1219	if (PQputCopyEnd(conn, NULL) <= `0` \|\| PQflush(conn))
1220	{
1221	pg_log_error("could not send copy-end packet: %s",
1222	PQerrorMessage(conn));
1223	return false;
1224	}
1225	still_sending = false;
1226	}
1227
1228	return true;
1229	}
1230
1231	/*
1232	* Calculate how long send/receive loops should sleep
1233	*/
1234	static long
1235	CalculateCopyStreamSleeptime(TimestampTz now, int standby_message_timeout,
1236	TimestampTz last_status)
1237	{
1238	TimestampTz status_targettime = `0`;
1239	long sleeptime;
1240
1241	if (standby_message_timeout && still_sending)
1242	status_targettime = last_status +
1243	(standby_message_timeout - `1`) * ((int64) `1000`);
1244
1245	if (status_targettime > `0`)
1246	{
1247	long secs;
1248	int usecs;
1249
1250	feTimestampDifference(now,
1251	status_targettime,
1252	&secs,
1253	&usecs);
1254	/ Always sleep at least 1 sec /
1255	if (secs <= `0`)
1256	{
1257	secs = `1`;
1258	usecs = `0`;
1259	}
1260
1261	sleeptime = secs * `1000` + usecs / `1000`;
1262	}
1263	else
1264	sleeptime = -`1`;
1265
1266	return sleeptime;
1267	}
1268

Browse the source code of PostgreSQL/src/bin/pg_basebackup/receivelog.c