1/*-------------------------------------------------------------------------
2 *
3 * basebackup.c
4 * code for taking a base backup and streaming it to a standby
5 *
6 * Portions Copyright (c) 2010-2019, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/replication/basebackup.c
10 *
11 *-------------------------------------------------------------------------
12 */
13#include "postgres.h"
14
15#include <sys/stat.h>
16#include <unistd.h>
17#include <time.h>
18
19#include "access/xlog_internal.h" /* for pg_start/stop_backup */
20#include "catalog/pg_type.h"
21#include "common/file_perm.h"
22#include "lib/stringinfo.h"
23#include "libpq/libpq.h"
24#include "libpq/pqformat.h"
25#include "miscadmin.h"
26#include "nodes/pg_list.h"
27#include "pgtar.h"
28#include "pgstat.h"
29#include "port.h"
30#include "postmaster/syslogger.h"
31#include "replication/basebackup.h"
32#include "replication/walsender.h"
33#include "replication/walsender_private.h"
34#include "storage/bufpage.h"
35#include "storage/checksum.h"
36#include "storage/dsm_impl.h"
37#include "storage/fd.h"
38#include "storage/ipc.h"
39#include "storage/reinit.h"
40#include "utils/builtins.h"
41#include "utils/ps_status.h"
42#include "utils/relcache.h"
43#include "utils/timestamp.h"
44
45
46typedef struct
47{
48 const char *label;
49 bool progress;
50 bool fastcheckpoint;
51 bool nowait;
52 bool includewal;
53 uint32 maxrate;
54 bool sendtblspcmapfile;
55} basebackup_options;
56
57
58static int64 sendDir(const char *path, int basepathlen, bool sizeonly,
59 List *tablespaces, bool sendtblspclinks);
60static bool sendFile(const char *readfilename, const char *tarfilename,
61 struct stat *statbuf, bool missing_ok, Oid dboid);
62static void sendFileWithContent(const char *filename, const char *content);
63static int64 _tarWriteHeader(const char *filename, const char *linktarget,
64 struct stat *statbuf, bool sizeonly);
65static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
66 bool sizeonly);
67static void send_int8_string(StringInfoData *buf, int64 intval);
68static void SendBackupHeader(List *tablespaces);
69static void base_backup_cleanup(int code, Datum arg);
70static void perform_base_backup(basebackup_options *opt);
71static void parse_basebackup_options(List *options, basebackup_options *opt);
72static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli);
73static int compareWalFileNames(const void *a, const void *b);
74static void throttle(size_t increment);
75static bool is_checksummed_file(const char *fullpath, const char *filename);
76
77/* Was the backup currently in-progress initiated in recovery mode? */
78static bool backup_started_in_recovery = false;
79
80/* Relative path of temporary statistics directory */
81static char *statrelpath = NULL;
82
83/*
84 * Size of each block sent into the tar stream for larger files.
85 */
86#define TAR_SEND_SIZE 32768
87
88/*
89 * How frequently to throttle, as a fraction of the specified rate-second.
90 */
91#define THROTTLING_FREQUENCY 8
92
93/*
94 * Checks whether we encountered any error in fread(). fread() doesn't give
95 * any clue what has happened, so we check with ferror(). Also, neither
96 * fread() nor ferror() set errno, so we just throw a generic error.
97 */
98#define CHECK_FREAD_ERROR(fp, filename) \
99do { \
100 if (ferror(fp)) \
101 ereport(ERROR, \
102 (errmsg("could not read from file \"%s\"", filename))); \
103} while (0)
104
105/* The actual number of bytes, transfer of which may cause sleep. */
106static uint64 throttling_sample;
107
108/* Amount of data already transferred but not yet throttled. */
109static int64 throttling_counter;
110
111/* The minimum time required to transfer throttling_sample bytes. */
112static TimeOffset elapsed_min_unit;
113
114/* The last check of the transfer rate. */
115static TimestampTz throttled_last;
116
117/* The starting XLOG position of the base backup. */
118static XLogRecPtr startptr;
119
120/* Total number of checksum failures during base backup. */
121static int64 total_checksum_failures;
122
123/* Do not verify checksums. */
124static bool noverify_checksums = false;
125
126/*
127 * The contents of these directories are removed or recreated during server
128 * start so they are not included in backups. The directories themselves are
129 * kept and included as empty to preserve access permissions.
130 *
131 * Note: this list should be kept in sync with the filter lists in pg_rewind's
132 * filemap.c.
133 */
134static const char *excludeDirContents[] =
135{
136 /*
137 * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped even
138 * when stats_temp_directory is set because PGSS_TEXT_FILE is always
139 * created there.
140 */
141 PG_STAT_TMP_DIR,
142
143 /*
144 * It is generally not useful to backup the contents of this directory
145 * even if the intention is to restore to another master. See backup.sgml
146 * for a more detailed description.
147 */
148 "pg_replslot",
149
150 /* Contents removed on startup, see dsm_cleanup_for_mmap(). */
151 PG_DYNSHMEM_DIR,
152
153 /* Contents removed on startup, see AsyncShmemInit(). */
154 "pg_notify",
155
156 /*
157 * Old contents are loaded for possible debugging but are not required for
158 * normal operation, see OldSerXidInit().
159 */
160 "pg_serial",
161
162 /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */
163 "pg_snapshots",
164
165 /* Contents zeroed on startup, see StartupSUBTRANS(). */
166 "pg_subtrans",
167
168 /* end of list */
169 NULL
170};
171
172/*
173 * List of files excluded from backups.
174 */
175static const char *excludeFiles[] =
176{
177 /* Skip auto conf temporary file. */
178 PG_AUTOCONF_FILENAME ".tmp",
179
180 /* Skip current log file temporary file */
181 LOG_METAINFO_DATAFILE_TMP,
182
183 /* Skip relation cache because it is rebuilt on startup */
184 RELCACHE_INIT_FILENAME,
185
186 /*
187 * If there's a backup_label or tablespace_map file, it belongs to a
188 * backup started by the user with pg_start_backup(). It is *not* correct
189 * for this backup. Our backup_label/tablespace_map is injected into the
190 * tar separately.
191 */
192 BACKUP_LABEL_FILE,
193 TABLESPACE_MAP,
194
195 "postmaster.pid",
196 "postmaster.opts",
197
198 /* end of list */
199 NULL
200};
201
202/*
203 * List of files excluded from checksum validation.
204 *
205 * Note: this list should be kept in sync with what pg_checksums.c
206 * includes.
207 */
208static const char *const noChecksumFiles[] = {
209 "pg_control",
210 "pg_filenode.map",
211 "pg_internal.init",
212 "PG_VERSION",
213#ifdef EXEC_BACKEND
214 "config_exec_params",
215 "config_exec_params.new",
216#endif
217 NULL,
218};
219
220
221/*
222 * Called when ERROR or FATAL happens in perform_base_backup() after
223 * we have started the backup - make sure we end it!
224 */
225static void
226base_backup_cleanup(int code, Datum arg)
227{
228 do_pg_abort_backup();
229}
230
231/*
232 * Actually do a base backup for the specified tablespaces.
233 *
234 * This is split out mainly to avoid complaints about "variable might be
235 * clobbered by longjmp" from stupider versions of gcc.
236 */
237static void
238perform_base_backup(basebackup_options *opt)
239{
240 TimeLineID starttli;
241 XLogRecPtr endptr;
242 TimeLineID endtli;
243 StringInfo labelfile;
244 StringInfo tblspc_map_file = NULL;
245 int datadirpathlen;
246 List *tablespaces = NIL;
247
248 datadirpathlen = strlen(DataDir);
249
250 backup_started_in_recovery = RecoveryInProgress();
251
252 labelfile = makeStringInfo();
253 tblspc_map_file = makeStringInfo();
254
255 total_checksum_failures = 0;
256
257 startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
258 labelfile, &tablespaces,
259 tblspc_map_file,
260 opt->progress, opt->sendtblspcmapfile);
261
262 /*
263 * Once do_pg_start_backup has been called, ensure that any failure causes
264 * us to abort the backup so we don't "leak" a backup counter. For this
265 * reason, *all* functionality between do_pg_start_backup() and the end of
266 * do_pg_stop_backup() should be inside the error cleanup block!
267 */
268
269 PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
270 {
271 ListCell *lc;
272 tablespaceinfo *ti;
273
274 SendXlogRecPtrResult(startptr, starttli);
275
276 /*
277 * Calculate the relative path of temporary statistics directory in
278 * order to skip the files which are located in that directory later.
279 */
280 if (is_absolute_path(pgstat_stat_directory) &&
281 strncmp(pgstat_stat_directory, DataDir, datadirpathlen) == 0)
282 statrelpath = psprintf("./%s", pgstat_stat_directory + datadirpathlen + 1);
283 else if (strncmp(pgstat_stat_directory, "./", 2) != 0)
284 statrelpath = psprintf("./%s", pgstat_stat_directory);
285 else
286 statrelpath = pgstat_stat_directory;
287
288 /* Add a node for the base directory at the end */
289 ti = palloc0(sizeof(tablespaceinfo));
290 ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1;
291 tablespaces = lappend(tablespaces, ti);
292
293 /* Send tablespace header */
294 SendBackupHeader(tablespaces);
295
296 /* Setup and activate network throttling, if client requested it */
297 if (opt->maxrate > 0)
298 {
299 throttling_sample =
300 (int64) opt->maxrate * (int64) 1024 / THROTTLING_FREQUENCY;
301
302 /*
303 * The minimum amount of time for throttling_sample bytes to be
304 * transferred.
305 */
306 elapsed_min_unit = USECS_PER_SEC / THROTTLING_FREQUENCY;
307
308 /* Enable throttling. */
309 throttling_counter = 0;
310
311 /* The 'real data' starts now (header was ignored). */
312 throttled_last = GetCurrentTimestamp();
313 }
314 else
315 {
316 /* Disable throttling. */
317 throttling_counter = -1;
318 }
319
320 /* Send off our tablespaces one by one */
321 foreach(lc, tablespaces)
322 {
323 tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
324 StringInfoData buf;
325
326 /* Send CopyOutResponse message */
327 pq_beginmessage(&buf, 'H');
328 pq_sendbyte(&buf, 0); /* overall format */
329 pq_sendint16(&buf, 0); /* natts */
330 pq_endmessage(&buf);
331
332 if (ti->path == NULL)
333 {
334 struct stat statbuf;
335
336 /* In the main tar, include the backup_label first... */
337 sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data);
338
339 /*
340 * Send tablespace_map file if required and then the bulk of
341 * the files.
342 */
343 if (tblspc_map_file && opt->sendtblspcmapfile)
344 {
345 sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data);
346 sendDir(".", 1, false, tablespaces, false);
347 }
348 else
349 sendDir(".", 1, false, tablespaces, true);
350
351 /* ... and pg_control after everything else. */
352 if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
353 ereport(ERROR,
354 (errcode_for_file_access(),
355 errmsg("could not stat file \"%s\": %m",
356 XLOG_CONTROL_FILE)));
357 sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, false, InvalidOid);
358 }
359 else
360 sendTablespace(ti->path, false);
361
362 /*
363 * If we're including WAL, and this is the main data directory we
364 * don't terminate the tar stream here. Instead, we will append
365 * the xlog files below and terminate it then. This is safe since
366 * the main data directory is always sent *last*.
367 */
368 if (opt->includewal && ti->path == NULL)
369 {
370 Assert(lnext(lc) == NULL);
371 }
372 else
373 pq_putemptymessage('c'); /* CopyDone */
374 }
375
376 endptr = do_pg_stop_backup(labelfile->data, !opt->nowait, &endtli);
377 }
378 PG_END_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
379
380
381 if (opt->includewal)
382 {
383 /*
384 * We've left the last tar file "open", so we can now append the
385 * required WAL files to it.
386 */
387 char pathbuf[MAXPGPATH];
388 XLogSegNo segno;
389 XLogSegNo startsegno;
390 XLogSegNo endsegno;
391 struct stat statbuf;
392 List *historyFileList = NIL;
393 List *walFileList = NIL;
394 char **walFiles;
395 int nWalFiles;
396 char firstoff[MAXFNAMELEN];
397 char lastoff[MAXFNAMELEN];
398 DIR *dir;
399 struct dirent *de;
400 int i;
401 ListCell *lc;
402 TimeLineID tli;
403
404 /*
405 * I'd rather not worry about timelines here, so scan pg_wal and
406 * include all WAL files in the range between 'startptr' and 'endptr',
407 * regardless of the timeline the file is stamped with. If there are
408 * some spurious WAL files belonging to timelines that don't belong in
409 * this server's history, they will be included too. Normally there
410 * shouldn't be such files, but if there are, there's little harm in
411 * including them.
412 */
413 XLByteToSeg(startptr, startsegno, wal_segment_size);
414 XLogFileName(firstoff, ThisTimeLineID, startsegno, wal_segment_size);
415 XLByteToPrevSeg(endptr, endsegno, wal_segment_size);
416 XLogFileName(lastoff, ThisTimeLineID, endsegno, wal_segment_size);
417
418 dir = AllocateDir("pg_wal");
419 while ((de = ReadDir(dir, "pg_wal")) != NULL)
420 {
421 /* Does it look like a WAL segment, and is it in the range? */
422 if (IsXLogFileName(de->d_name) &&
423 strcmp(de->d_name + 8, firstoff + 8) >= 0 &&
424 strcmp(de->d_name + 8, lastoff + 8) <= 0)
425 {
426 walFileList = lappend(walFileList, pstrdup(de->d_name));
427 }
428 /* Does it look like a timeline history file? */
429 else if (IsTLHistoryFileName(de->d_name))
430 {
431 historyFileList = lappend(historyFileList, pstrdup(de->d_name));
432 }
433 }
434 FreeDir(dir);
435
436 /*
437 * Before we go any further, check that none of the WAL segments we
438 * need were removed.
439 */
440 CheckXLogRemoved(startsegno, ThisTimeLineID);
441
442 /*
443 * Put the WAL filenames into an array, and sort. We send the files in
444 * order from oldest to newest, to reduce the chance that a file is
445 * recycled before we get a chance to send it over.
446 */
447 nWalFiles = list_length(walFileList);
448 walFiles = palloc(nWalFiles * sizeof(char *));
449 i = 0;
450 foreach(lc, walFileList)
451 {
452 walFiles[i++] = lfirst(lc);
453 }
454 qsort(walFiles, nWalFiles, sizeof(char *), compareWalFileNames);
455
456 /*
457 * There must be at least one xlog file in the pg_wal directory, since
458 * we are doing backup-including-xlog.
459 */
460 if (nWalFiles < 1)
461 ereport(ERROR,
462 (errmsg("could not find any WAL files")));
463
464 /*
465 * Sanity check: the first and last segment should cover startptr and
466 * endptr, with no gaps in between.
467 */
468 XLogFromFileName(walFiles[0], &tli, &segno, wal_segment_size);
469 if (segno != startsegno)
470 {
471 char startfname[MAXFNAMELEN];
472
473 XLogFileName(startfname, ThisTimeLineID, startsegno,
474 wal_segment_size);
475 ereport(ERROR,
476 (errmsg("could not find WAL file \"%s\"", startfname)));
477 }
478 for (i = 0; i < nWalFiles; i++)
479 {
480 XLogSegNo currsegno = segno;
481 XLogSegNo nextsegno = segno + 1;
482
483 XLogFromFileName(walFiles[i], &tli, &segno, wal_segment_size);
484 if (!(nextsegno == segno || currsegno == segno))
485 {
486 char nextfname[MAXFNAMELEN];
487
488 XLogFileName(nextfname, ThisTimeLineID, nextsegno,
489 wal_segment_size);
490 ereport(ERROR,
491 (errmsg("could not find WAL file \"%s\"", nextfname)));
492 }
493 }
494 if (segno != endsegno)
495 {
496 char endfname[MAXFNAMELEN];
497
498 XLogFileName(endfname, ThisTimeLineID, endsegno, wal_segment_size);
499 ereport(ERROR,
500 (errmsg("could not find WAL file \"%s\"", endfname)));
501 }
502
503 /* Ok, we have everything we need. Send the WAL files. */
504 for (i = 0; i < nWalFiles; i++)
505 {
506 FILE *fp;
507 char buf[TAR_SEND_SIZE];
508 size_t cnt;
509 pgoff_t len = 0;
510
511 snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", walFiles[i]);
512 XLogFromFileName(walFiles[i], &tli, &segno, wal_segment_size);
513
514 fp = AllocateFile(pathbuf, "rb");
515 if (fp == NULL)
516 {
517 int save_errno = errno;
518
519 /*
520 * Most likely reason for this is that the file was already
521 * removed by a checkpoint, so check for that to get a better
522 * error message.
523 */
524 CheckXLogRemoved(segno, tli);
525
526 errno = save_errno;
527 ereport(ERROR,
528 (errcode_for_file_access(),
529 errmsg("could not open file \"%s\": %m", pathbuf)));
530 }
531
532 if (fstat(fileno(fp), &statbuf) != 0)
533 ereport(ERROR,
534 (errcode_for_file_access(),
535 errmsg("could not stat file \"%s\": %m",
536 pathbuf)));
537 if (statbuf.st_size != wal_segment_size)
538 {
539 CheckXLogRemoved(segno, tli);
540 ereport(ERROR,
541 (errcode_for_file_access(),
542 errmsg("unexpected WAL file size \"%s\"", walFiles[i])));
543 }
544
545 /* send the WAL file itself */
546 _tarWriteHeader(pathbuf, NULL, &statbuf, false);
547
548 while ((cnt = fread(buf, 1,
549 Min(sizeof(buf), wal_segment_size - len),
550 fp)) > 0)
551 {
552 CheckXLogRemoved(segno, tli);
553 /* Send the chunk as a CopyData message */
554 if (pq_putmessage('d', buf, cnt))
555 ereport(ERROR,
556 (errmsg("base backup could not send data, aborting backup")));
557
558 len += cnt;
559 throttle(cnt);
560
561 if (len == wal_segment_size)
562 break;
563 }
564
565 CHECK_FREAD_ERROR(fp, pathbuf);
566
567 if (len != wal_segment_size)
568 {
569 CheckXLogRemoved(segno, tli);
570 ereport(ERROR,
571 (errcode_for_file_access(),
572 errmsg("unexpected WAL file size \"%s\"", walFiles[i])));
573 }
574
575 /* wal_segment_size is a multiple of 512, so no need for padding */
576
577 FreeFile(fp);
578
579 /*
580 * Mark file as archived, otherwise files can get archived again
581 * after promotion of a new node. This is in line with
582 * walreceiver.c always doing an XLogArchiveForceDone() after a
583 * complete segment.
584 */
585 StatusFilePath(pathbuf, walFiles[i], ".done");
586 sendFileWithContent(pathbuf, "");
587 }
588
589 /*
590 * Send timeline history files too. Only the latest timeline history
591 * file is required for recovery, and even that only if there happens
592 * to be a timeline switch in the first WAL segment that contains the
593 * checkpoint record, or if we're taking a base backup from a standby
594 * server and the target timeline changes while the backup is taken.
595 * But they are small and highly useful for debugging purposes, so
596 * better include them all, always.
597 */
598 foreach(lc, historyFileList)
599 {
600 char *fname = lfirst(lc);
601
602 snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s", fname);
603
604 if (lstat(pathbuf, &statbuf) != 0)
605 ereport(ERROR,
606 (errcode_for_file_access(),
607 errmsg("could not stat file \"%s\": %m", pathbuf)));
608
609 sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid);
610
611 /* unconditionally mark file as archived */
612 StatusFilePath(pathbuf, fname, ".done");
613 sendFileWithContent(pathbuf, "");
614 }
615
616 /* Send CopyDone message for the last tar file */
617 pq_putemptymessage('c');
618 }
619 SendXlogRecPtrResult(endptr, endtli);
620
621 if (total_checksum_failures)
622 {
623 if (total_checksum_failures > 1)
624 {
625 char buf[64];
626
627 snprintf(buf, sizeof(buf), INT64_FORMAT, total_checksum_failures);
628
629 ereport(WARNING,
630 (errmsg("%s total checksum verification failures", buf)));
631 }
632 ereport(ERROR,
633 (errcode(ERRCODE_DATA_CORRUPTED),
634 errmsg("checksum verification failure during base backup")));
635 }
636
637}
638
639/*
640 * qsort comparison function, to compare log/seg portion of WAL segment
641 * filenames, ignoring the timeline portion.
642 */
643static int
644compareWalFileNames(const void *a, const void *b)
645{
646 char *fna = *((char **) a);
647 char *fnb = *((char **) b);
648
649 return strcmp(fna + 8, fnb + 8);
650}
651
652/*
653 * Parse the base backup options passed down by the parser
654 */
655static void
656parse_basebackup_options(List *options, basebackup_options *opt)
657{
658 ListCell *lopt;
659 bool o_label = false;
660 bool o_progress = false;
661 bool o_fast = false;
662 bool o_nowait = false;
663 bool o_wal = false;
664 bool o_maxrate = false;
665 bool o_tablespace_map = false;
666 bool o_noverify_checksums = false;
667
668 MemSet(opt, 0, sizeof(*opt));
669 foreach(lopt, options)
670 {
671 DefElem *defel = (DefElem *) lfirst(lopt);
672
673 if (strcmp(defel->defname, "label") == 0)
674 {
675 if (o_label)
676 ereport(ERROR,
677 (errcode(ERRCODE_SYNTAX_ERROR),
678 errmsg("duplicate option \"%s\"", defel->defname)));
679 opt->label = strVal(defel->arg);
680 o_label = true;
681 }
682 else if (strcmp(defel->defname, "progress") == 0)
683 {
684 if (o_progress)
685 ereport(ERROR,
686 (errcode(ERRCODE_SYNTAX_ERROR),
687 errmsg("duplicate option \"%s\"", defel->defname)));
688 opt->progress = true;
689 o_progress = true;
690 }
691 else if (strcmp(defel->defname, "fast") == 0)
692 {
693 if (o_fast)
694 ereport(ERROR,
695 (errcode(ERRCODE_SYNTAX_ERROR),
696 errmsg("duplicate option \"%s\"", defel->defname)));
697 opt->fastcheckpoint = true;
698 o_fast = true;
699 }
700 else if (strcmp(defel->defname, "nowait") == 0)
701 {
702 if (o_nowait)
703 ereport(ERROR,
704 (errcode(ERRCODE_SYNTAX_ERROR),
705 errmsg("duplicate option \"%s\"", defel->defname)));
706 opt->nowait = true;
707 o_nowait = true;
708 }
709 else if (strcmp(defel->defname, "wal") == 0)
710 {
711 if (o_wal)
712 ereport(ERROR,
713 (errcode(ERRCODE_SYNTAX_ERROR),
714 errmsg("duplicate option \"%s\"", defel->defname)));
715 opt->includewal = true;
716 o_wal = true;
717 }
718 else if (strcmp(defel->defname, "max_rate") == 0)
719 {
720 long maxrate;
721
722 if (o_maxrate)
723 ereport(ERROR,
724 (errcode(ERRCODE_SYNTAX_ERROR),
725 errmsg("duplicate option \"%s\"", defel->defname)));
726
727 maxrate = intVal(defel->arg);
728 if (maxrate < MAX_RATE_LOWER || maxrate > MAX_RATE_UPPER)
729 ereport(ERROR,
730 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
731 errmsg("%d is outside the valid range for parameter \"%s\" (%d .. %d)",
732 (int) maxrate, "MAX_RATE", MAX_RATE_LOWER, MAX_RATE_UPPER)));
733
734 opt->maxrate = (uint32) maxrate;
735 o_maxrate = true;
736 }
737 else if (strcmp(defel->defname, "tablespace_map") == 0)
738 {
739 if (o_tablespace_map)
740 ereport(ERROR,
741 (errcode(ERRCODE_SYNTAX_ERROR),
742 errmsg("duplicate option \"%s\"", defel->defname)));
743 opt->sendtblspcmapfile = true;
744 o_tablespace_map = true;
745 }
746 else if (strcmp(defel->defname, "noverify_checksums") == 0)
747 {
748 if (o_noverify_checksums)
749 ereport(ERROR,
750 (errcode(ERRCODE_SYNTAX_ERROR),
751 errmsg("duplicate option \"%s\"", defel->defname)));
752 noverify_checksums = true;
753 o_noverify_checksums = true;
754 }
755 else
756 elog(ERROR, "option \"%s\" not recognized",
757 defel->defname);
758 }
759 if (opt->label == NULL)
760 opt->label = "base backup";
761}
762
763
764/*
765 * SendBaseBackup() - send a complete base backup.
766 *
767 * The function will put the system into backup mode like pg_start_backup()
768 * does, so that the backup is consistent even though we read directly from
769 * the filesystem, bypassing the buffer cache.
770 */
771void
772SendBaseBackup(BaseBackupCmd *cmd)
773{
774 basebackup_options opt;
775
776 parse_basebackup_options(cmd->options, &opt);
777
778 WalSndSetState(WALSNDSTATE_BACKUP);
779
780 if (update_process_title)
781 {
782 char activitymsg[50];
783
784 snprintf(activitymsg, sizeof(activitymsg), "sending backup \"%s\"",
785 opt.label);
786 set_ps_display(activitymsg, false);
787 }
788
789 perform_base_backup(&opt);
790}
791
792static void
793send_int8_string(StringInfoData *buf, int64 intval)
794{
795 char is[32];
796
797 sprintf(is, INT64_FORMAT, intval);
798 pq_sendint32(buf, strlen(is));
799 pq_sendbytes(buf, is, strlen(is));
800}
801
802static void
803SendBackupHeader(List *tablespaces)
804{
805 StringInfoData buf;
806 ListCell *lc;
807
808 /* Construct and send the directory information */
809 pq_beginmessage(&buf, 'T'); /* RowDescription */
810 pq_sendint16(&buf, 3); /* 3 fields */
811
812 /* First field - spcoid */
813 pq_sendstring(&buf, "spcoid");
814 pq_sendint32(&buf, 0); /* table oid */
815 pq_sendint16(&buf, 0); /* attnum */
816 pq_sendint32(&buf, OIDOID); /* type oid */
817 pq_sendint16(&buf, 4); /* typlen */
818 pq_sendint32(&buf, 0); /* typmod */
819 pq_sendint16(&buf, 0); /* format code */
820
821 /* Second field - spcpath */
822 pq_sendstring(&buf, "spclocation");
823 pq_sendint32(&buf, 0);
824 pq_sendint16(&buf, 0);
825 pq_sendint32(&buf, TEXTOID);
826 pq_sendint16(&buf, -1);
827 pq_sendint32(&buf, 0);
828 pq_sendint16(&buf, 0);
829
830 /* Third field - size */
831 pq_sendstring(&buf, "size");
832 pq_sendint32(&buf, 0);
833 pq_sendint16(&buf, 0);
834 pq_sendint32(&buf, INT8OID);
835 pq_sendint16(&buf, 8);
836 pq_sendint32(&buf, 0);
837 pq_sendint16(&buf, 0);
838 pq_endmessage(&buf);
839
840 foreach(lc, tablespaces)
841 {
842 tablespaceinfo *ti = lfirst(lc);
843
844 /* Send one datarow message */
845 pq_beginmessage(&buf, 'D');
846 pq_sendint16(&buf, 3); /* number of columns */
847 if (ti->path == NULL)
848 {
849 pq_sendint32(&buf, -1); /* Length = -1 ==> NULL */
850 pq_sendint32(&buf, -1);
851 }
852 else
853 {
854 Size len;
855
856 len = strlen(ti->oid);
857 pq_sendint32(&buf, len);
858 pq_sendbytes(&buf, ti->oid, len);
859
860 len = strlen(ti->path);
861 pq_sendint32(&buf, len);
862 pq_sendbytes(&buf, ti->path, len);
863 }
864 if (ti->size >= 0)
865 send_int8_string(&buf, ti->size / 1024);
866 else
867 pq_sendint32(&buf, -1); /* NULL */
868
869 pq_endmessage(&buf);
870 }
871
872 /* Send a CommandComplete message */
873 pq_puttextmessage('C', "SELECT");
874}
875
876/*
877 * Send a single resultset containing just a single
878 * XLogRecPtr record (in text format)
879 */
880static void
881SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
882{
883 StringInfoData buf;
884 char str[MAXFNAMELEN];
885 Size len;
886
887 pq_beginmessage(&buf, 'T'); /* RowDescription */
888 pq_sendint16(&buf, 2); /* 2 fields */
889
890 /* Field headers */
891 pq_sendstring(&buf, "recptr");
892 pq_sendint32(&buf, 0); /* table oid */
893 pq_sendint16(&buf, 0); /* attnum */
894 pq_sendint32(&buf, TEXTOID); /* type oid */
895 pq_sendint16(&buf, -1);
896 pq_sendint32(&buf, 0);
897 pq_sendint16(&buf, 0);
898
899 pq_sendstring(&buf, "tli");
900 pq_sendint32(&buf, 0); /* table oid */
901 pq_sendint16(&buf, 0); /* attnum */
902
903 /*
904 * int8 may seem like a surprising data type for this, but in theory int4
905 * would not be wide enough for this, as TimeLineID is unsigned.
906 */
907 pq_sendint32(&buf, INT8OID); /* type oid */
908 pq_sendint16(&buf, -1);
909 pq_sendint32(&buf, 0);
910 pq_sendint16(&buf, 0);
911 pq_endmessage(&buf);
912
913 /* Data row */
914 pq_beginmessage(&buf, 'D');
915 pq_sendint16(&buf, 2); /* number of columns */
916
917 len = snprintf(str, sizeof(str),
918 "%X/%X", (uint32) (ptr >> 32), (uint32) ptr);
919 pq_sendint32(&buf, len);
920 pq_sendbytes(&buf, str, len);
921
922 len = snprintf(str, sizeof(str), "%u", tli);
923 pq_sendint32(&buf, len);
924 pq_sendbytes(&buf, str, len);
925
926 pq_endmessage(&buf);
927
928 /* Send a CommandComplete message */
929 pq_puttextmessage('C', "SELECT");
930}
931
932/*
933 * Inject a file with given name and content in the output tar stream.
934 */
935static void
936sendFileWithContent(const char *filename, const char *content)
937{
938 struct stat statbuf;
939 int pad,
940 len;
941
942 len = strlen(content);
943
944 /*
945 * Construct a stat struct for the backup_label file we're injecting in
946 * the tar.
947 */
948 /* Windows doesn't have the concept of uid and gid */
949#ifdef WIN32
950 statbuf.st_uid = 0;
951 statbuf.st_gid = 0;
952#else
953 statbuf.st_uid = geteuid();
954 statbuf.st_gid = getegid();
955#endif
956 statbuf.st_mtime = time(NULL);
957 statbuf.st_mode = pg_file_create_mode;
958 statbuf.st_size = len;
959
960 _tarWriteHeader(filename, NULL, &statbuf, false);
961 /* Send the contents as a CopyData message */
962 pq_putmessage('d', content, len);
963
964 /* Pad to 512 byte boundary, per tar format requirements */
965 pad = ((len + 511) & ~511) - len;
966 if (pad > 0)
967 {
968 char buf[512];
969
970 MemSet(buf, 0, pad);
971 pq_putmessage('d', buf, pad);
972 }
973}
974
975/*
976 * Include the tablespace directory pointed to by 'path' in the output tar
977 * stream. If 'sizeonly' is true, we just calculate a total length and return
978 * it, without actually sending anything.
979 *
980 * Only used to send auxiliary tablespaces, not PGDATA.
981 */
982int64
983sendTablespace(char *path, bool sizeonly)
984{
985 int64 size;
986 char pathbuf[MAXPGPATH];
987 struct stat statbuf;
988
989 /*
990 * 'path' points to the tablespace location, but we only want to include
991 * the version directory in it that belongs to us.
992 */
993 snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path,
994 TABLESPACE_VERSION_DIRECTORY);
995
996 /*
997 * Store a directory entry in the tar file so we get the permissions
998 * right.
999 */
1000 if (lstat(pathbuf, &statbuf) != 0)
1001 {
1002 if (errno != ENOENT)
1003 ereport(ERROR,
1004 (errcode_for_file_access(),
1005 errmsg("could not stat file or directory \"%s\": %m",
1006 pathbuf)));
1007
1008 /* If the tablespace went away while scanning, it's no error. */
1009 return 0;
1010 }
1011
1012 size = _tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf,
1013 sizeonly);
1014
1015 /* Send all the files in the tablespace version directory */
1016 size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true);
1017
1018 return size;
1019}
1020
1021/*
1022 * Include all files from the given directory in the output tar stream. If
1023 * 'sizeonly' is true, we just calculate a total length and return it, without
1024 * actually sending anything.
1025 *
1026 * Omit any directory in the tablespaces list, to avoid backing up
1027 * tablespaces twice when they were created inside PGDATA.
1028 *
1029 * If sendtblspclinks is true, we need to include symlink
1030 * information in the tar file. If not, we can skip that
1031 * as it will be sent separately in the tablespace_map file.
1032 */
1033static int64
1034sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces,
1035 bool sendtblspclinks)
1036{
1037 DIR *dir;
1038 struct dirent *de;
1039 char pathbuf[MAXPGPATH * 2];
1040 struct stat statbuf;
1041 int64 size = 0;
1042 const char *lastDir; /* Split last dir from parent path. */
1043 bool isDbDir = false; /* Does this directory contain relations? */
1044
1045 /*
1046 * Determine if the current path is a database directory that can contain
1047 * relations.
1048 *
1049 * Start by finding the location of the delimiter between the parent path
1050 * and the current path.
1051 */
1052 lastDir = last_dir_separator(path);
1053
1054 /* Does this path look like a database path (i.e. all digits)? */
1055 if (lastDir != NULL &&
1056 strspn(lastDir + 1, "0123456789") == strlen(lastDir + 1))
1057 {
1058 /* Part of path that contains the parent directory. */
1059 int parentPathLen = lastDir - path;
1060
1061 /*
1062 * Mark path as a database directory if the parent path is either
1063 * $PGDATA/base or a tablespace version path.
1064 */
1065 if (strncmp(path, "./base", parentPathLen) == 0 ||
1066 (parentPathLen >= (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) &&
1067 strncmp(lastDir - (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1),
1068 TABLESPACE_VERSION_DIRECTORY,
1069 sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) == 0))
1070 isDbDir = true;
1071 }
1072
1073 dir = AllocateDir(path);
1074 while ((de = ReadDir(dir, path)) != NULL)
1075 {
1076 int excludeIdx;
1077 bool excludeFound;
1078 ForkNumber relForkNum; /* Type of fork if file is a relation */
1079 int relOidChars; /* Chars in filename that are the rel oid */
1080
1081 /* Skip special stuff */
1082 if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
1083 continue;
1084
1085 /* Skip temporary files */
1086 if (strncmp(de->d_name,
1087 PG_TEMP_FILE_PREFIX,
1088 strlen(PG_TEMP_FILE_PREFIX)) == 0)
1089 continue;
1090
1091 /*
1092 * Check if the postmaster has signaled us to exit, and abort with an
1093 * error in that case. The error handler further up will call
1094 * do_pg_abort_backup() for us. Also check that if the backup was
1095 * started while still in recovery, the server wasn't promoted.
1096 * dp_pg_stop_backup() will check that too, but it's better to stop
1097 * the backup early than continue to the end and fail there.
1098 */
1099 CHECK_FOR_INTERRUPTS();
1100 if (RecoveryInProgress() != backup_started_in_recovery)
1101 ereport(ERROR,
1102 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1103 errmsg("the standby was promoted during online backup"),
1104 errhint("This means that the backup being taken is corrupt "
1105 "and should not be used. "
1106 "Try taking another online backup.")));
1107
1108 /* Scan for files that should be excluded */
1109 excludeFound = false;
1110 for (excludeIdx = 0; excludeFiles[excludeIdx] != NULL; excludeIdx++)
1111 {
1112 if (strcmp(de->d_name, excludeFiles[excludeIdx]) == 0)
1113 {
1114 elog(DEBUG1, "file \"%s\" excluded from backup", de->d_name);
1115 excludeFound = true;
1116 break;
1117 }
1118 }
1119
1120 if (excludeFound)
1121 continue;
1122
1123 /* Exclude all forks for unlogged tables except the init fork */
1124 if (isDbDir &&
1125 parse_filename_for_nontemp_relation(de->d_name, &relOidChars,
1126 &relForkNum))
1127 {
1128 /* Never exclude init forks */
1129 if (relForkNum != INIT_FORKNUM)
1130 {
1131 char initForkFile[MAXPGPATH];
1132 char relOid[OIDCHARS + 1];
1133
1134 /*
1135 * If any other type of fork, check if there is an init fork
1136 * with the same OID. If so, the file can be excluded.
1137 */
1138 memcpy(relOid, de->d_name, relOidChars);
1139 relOid[relOidChars] = '\0';
1140 snprintf(initForkFile, sizeof(initForkFile), "%s/%s_init",
1141 path, relOid);
1142
1143 if (lstat(initForkFile, &statbuf) == 0)
1144 {
1145 elog(DEBUG2,
1146 "unlogged relation file \"%s\" excluded from backup",
1147 de->d_name);
1148
1149 continue;
1150 }
1151 }
1152 }
1153
1154 /* Exclude temporary relations */
1155 if (isDbDir && looks_like_temp_rel_name(de->d_name))
1156 {
1157 elog(DEBUG2,
1158 "temporary relation file \"%s\" excluded from backup",
1159 de->d_name);
1160
1161 continue;
1162 }
1163
1164 snprintf(pathbuf, sizeof(pathbuf), "%s/%s", path, de->d_name);
1165
1166 /* Skip pg_control here to back up it last */
1167 if (strcmp(pathbuf, "./global/pg_control") == 0)
1168 continue;
1169
1170 if (lstat(pathbuf, &statbuf) != 0)
1171 {
1172 if (errno != ENOENT)
1173 ereport(ERROR,
1174 (errcode_for_file_access(),
1175 errmsg("could not stat file or directory \"%s\": %m",
1176 pathbuf)));
1177
1178 /* If the file went away while scanning, it's not an error. */
1179 continue;
1180 }
1181
1182 /* Scan for directories whose contents should be excluded */
1183 excludeFound = false;
1184 for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++)
1185 {
1186 if (strcmp(de->d_name, excludeDirContents[excludeIdx]) == 0)
1187 {
1188 elog(DEBUG1, "contents of directory \"%s\" excluded from backup", de->d_name);
1189 size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
1190 excludeFound = true;
1191 break;
1192 }
1193 }
1194
1195 if (excludeFound)
1196 continue;
1197
1198 /*
1199 * Exclude contents of directory specified by statrelpath if not set
1200 * to the default (pg_stat_tmp) which is caught in the loop above.
1201 */
1202 if (statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0)
1203 {
1204 elog(DEBUG1, "contents of directory \"%s\" excluded from backup", statrelpath);
1205 size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
1206 continue;
1207 }
1208
1209 /*
1210 * We can skip pg_wal, the WAL segments need to be fetched from the
1211 * WAL archive anyway. But include it as an empty directory anyway, so
1212 * we get permissions right.
1213 */
1214 if (strcmp(pathbuf, "./pg_wal") == 0)
1215 {
1216 /* If pg_wal is a symlink, write it as a directory anyway */
1217 size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly);
1218
1219 /*
1220 * Also send archive_status directory (by hackishly reusing
1221 * statbuf from above ...).
1222 */
1223 size += _tarWriteHeader("./pg_wal/archive_status", NULL, &statbuf,
1224 sizeonly);
1225
1226 continue; /* don't recurse into pg_wal */
1227 }
1228
1229 /* Allow symbolic links in pg_tblspc only */
1230 if (strcmp(path, "./pg_tblspc") == 0 &&
1231#ifndef WIN32
1232 S_ISLNK(statbuf.st_mode)
1233#else
1234 pgwin32_is_junction(pathbuf)
1235#endif
1236 )
1237 {
1238#if defined(HAVE_READLINK) || defined(WIN32)
1239 char linkpath[MAXPGPATH];
1240 int rllen;
1241
1242 rllen = readlink(pathbuf, linkpath, sizeof(linkpath));
1243 if (rllen < 0)
1244 ereport(ERROR,
1245 (errcode_for_file_access(),
1246 errmsg("could not read symbolic link \"%s\": %m",
1247 pathbuf)));
1248 if (rllen >= sizeof(linkpath))
1249 ereport(ERROR,
1250 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1251 errmsg("symbolic link \"%s\" target is too long",
1252 pathbuf)));
1253 linkpath[rllen] = '\0';
1254
1255 size += _tarWriteHeader(pathbuf + basepathlen + 1, linkpath,
1256 &statbuf, sizeonly);
1257#else
1258
1259 /*
1260 * If the platform does not have symbolic links, it should not be
1261 * possible to have tablespaces - clearly somebody else created
1262 * them. Warn about it and ignore.
1263 */
1264 ereport(WARNING,
1265 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1266 errmsg("tablespaces are not supported on this platform")));
1267 continue;
1268#endif /* HAVE_READLINK */
1269 }
1270 else if (S_ISDIR(statbuf.st_mode))
1271 {
1272 bool skip_this_dir = false;
1273 ListCell *lc;
1274
1275 /*
1276 * Store a directory entry in the tar file so we can get the
1277 * permissions right.
1278 */
1279 size += _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf,
1280 sizeonly);
1281
1282 /*
1283 * Call ourselves recursively for a directory, unless it happens
1284 * to be a separate tablespace located within PGDATA.
1285 */
1286 foreach(lc, tablespaces)
1287 {
1288 tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
1289
1290 /*
1291 * ti->rpath is the tablespace relative path within PGDATA, or
1292 * NULL if the tablespace has been properly located somewhere
1293 * else.
1294 *
1295 * Skip past the leading "./" in pathbuf when comparing.
1296 */
1297 if (ti->rpath && strcmp(ti->rpath, pathbuf + 2) == 0)
1298 {
1299 skip_this_dir = true;
1300 break;
1301 }
1302 }
1303
1304 /*
1305 * skip sending directories inside pg_tblspc, if not required.
1306 */
1307 if (strcmp(pathbuf, "./pg_tblspc") == 0 && !sendtblspclinks)
1308 skip_this_dir = true;
1309
1310 if (!skip_this_dir)
1311 size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks);
1312 }
1313 else if (S_ISREG(statbuf.st_mode))
1314 {
1315 bool sent = false;
1316
1317 if (!sizeonly)
1318 sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf,
1319 true, isDbDir ? pg_atoi(lastDir + 1, sizeof(Oid), 0) : InvalidOid);
1320
1321 if (sent || sizeonly)
1322 {
1323 /* Add size, rounded up to 512byte block */
1324 size += ((statbuf.st_size + 511) & ~511);
1325 size += 512; /* Size of the header of the file */
1326 }
1327 }
1328 else
1329 ereport(WARNING,
1330 (errmsg("skipping special file \"%s\"", pathbuf)));
1331 }
1332 FreeDir(dir);
1333 return size;
1334}
1335
1336/*
1337 * Check if a file should have its checksum validated.
1338 * We validate checksums on files in regular tablespaces
1339 * (including global and default) only, and in those there
1340 * are some files that are explicitly excluded.
1341 */
1342static bool
1343is_checksummed_file(const char *fullpath, const char *filename)
1344{
1345 const char *const *f;
1346
1347 /* Check that the file is in a tablespace */
1348 if (strncmp(fullpath, "./global/", 9) == 0 ||
1349 strncmp(fullpath, "./base/", 7) == 0 ||
1350 strncmp(fullpath, "/", 1) == 0)
1351 {
1352 /* Compare file against noChecksumFiles skiplist */
1353 for (f = noChecksumFiles; *f; f++)
1354 if (strcmp(*f, filename) == 0)
1355 return false;
1356
1357 return true;
1358 }
1359 else
1360 return false;
1361}
1362
1363/*****
1364 * Functions for handling tar file format
1365 *
1366 * Copied from pg_dump, but modified to work with libpq for sending
1367 */
1368
1369
1370/*
1371 * Given the member, write the TAR header & send the file.
1372 *
1373 * If 'missing_ok' is true, will not throw an error if the file is not found.
1374 *
1375 * If dboid is anything other than InvalidOid then any checksum failures detected
1376 * will get reported to the stats collector.
1377 *
1378 * Returns true if the file was successfully sent, false if 'missing_ok',
1379 * and the file did not exist.
1380 */
1381static bool
1382sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf,
1383 bool missing_ok, Oid dboid)
1384{
1385 FILE *fp;
1386 BlockNumber blkno = 0;
1387 bool block_retry = false;
1388 char buf[TAR_SEND_SIZE];
1389 uint16 checksum;
1390 int checksum_failures = 0;
1391 off_t cnt;
1392 int i;
1393 pgoff_t len = 0;
1394 char *page;
1395 size_t pad;
1396 PageHeader phdr;
1397 int segmentno = 0;
1398 char *segmentpath;
1399 bool verify_checksum = false;
1400
1401 fp = AllocateFile(readfilename, "rb");
1402 if (fp == NULL)
1403 {
1404 if (errno == ENOENT && missing_ok)
1405 return false;
1406 ereport(ERROR,
1407 (errcode_for_file_access(),
1408 errmsg("could not open file \"%s\": %m", readfilename)));
1409 }
1410
1411 _tarWriteHeader(tarfilename, NULL, statbuf, false);
1412
1413 if (!noverify_checksums && DataChecksumsEnabled())
1414 {
1415 char *filename;
1416
1417 /*
1418 * Get the filename (excluding path). As last_dir_separator()
1419 * includes the last directory separator, we chop that off by
1420 * incrementing the pointer.
1421 */
1422 filename = last_dir_separator(readfilename) + 1;
1423
1424 if (is_checksummed_file(readfilename, filename))
1425 {
1426 verify_checksum = true;
1427
1428 /*
1429 * Cut off at the segment boundary (".") to get the segment number
1430 * in order to mix it into the checksum.
1431 */
1432 segmentpath = strstr(filename, ".");
1433 if (segmentpath != NULL)
1434 {
1435 segmentno = atoi(segmentpath + 1);
1436 if (segmentno == 0)
1437 ereport(ERROR,
1438 (errmsg("invalid segment number %d in file \"%s\"",
1439 segmentno, filename)));
1440 }
1441 }
1442 }
1443
1444 while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0)
1445 {
1446 /*
1447 * The checksums are verified at block level, so we iterate over the
1448 * buffer in chunks of BLCKSZ, after making sure that
1449 * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of
1450 * BLCKSZ bytes.
1451 */
1452 Assert(TAR_SEND_SIZE % BLCKSZ == 0);
1453
1454 if (verify_checksum && (cnt % BLCKSZ != 0))
1455 {
1456 ereport(WARNING,
1457 (errmsg("could not verify checksum in file \"%s\", block "
1458 "%d: read buffer size %d and page size %d "
1459 "differ",
1460 readfilename, blkno, (int) cnt, BLCKSZ)));
1461 verify_checksum = false;
1462 }
1463
1464 if (verify_checksum)
1465 {
1466 for (i = 0; i < cnt / BLCKSZ; i++)
1467 {
1468 page = buf + BLCKSZ * i;
1469
1470 /*
1471 * Only check pages which have not been modified since the
1472 * start of the base backup. Otherwise, they might have been
1473 * written only halfway and the checksum would not be valid.
1474 * However, replaying WAL would reinstate the correct page in
1475 * this case. We also skip completely new pages, since they
1476 * don't have a checksum yet.
1477 */
1478 if (!PageIsNew(page) && PageGetLSN(page) < startptr)
1479 {
1480 checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE);
1481 phdr = (PageHeader) page;
1482 if (phdr->pd_checksum != checksum)
1483 {
1484 /*
1485 * Retry the block on the first failure. It's
1486 * possible that we read the first 4K page of the
1487 * block just before postgres updated the entire block
1488 * so it ends up looking torn to us. We only need to
1489 * retry once because the LSN should be updated to
1490 * something we can ignore on the next pass. If the
1491 * error happens again then it is a true validation
1492 * failure.
1493 */
1494 if (block_retry == false)
1495 {
1496 /* Reread the failed block */
1497 if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1)
1498 {
1499 ereport(ERROR,
1500 (errcode_for_file_access(),
1501 errmsg("could not fseek in file \"%s\": %m",
1502 readfilename)));
1503 }
1504
1505 if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ)
1506 {
1507 /*
1508 * If we hit end-of-file, a concurrent
1509 * truncation must have occurred, so break out
1510 * of this loop just as if the initial fread()
1511 * returned 0. We'll drop through to the same
1512 * code that handles that case. (We must fix
1513 * up cnt first, though.)
1514 */
1515 if (feof(fp))
1516 {
1517 cnt = BLCKSZ * i;
1518 break;
1519 }
1520
1521 ereport(ERROR,
1522 (errcode_for_file_access(),
1523 errmsg("could not reread block %d of file \"%s\": %m",
1524 blkno, readfilename)));
1525 }
1526
1527 if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1)
1528 {
1529 ereport(ERROR,
1530 (errcode_for_file_access(),
1531 errmsg("could not fseek in file \"%s\": %m",
1532 readfilename)));
1533 }
1534
1535 /* Set flag so we know a retry was attempted */
1536 block_retry = true;
1537
1538 /* Reset loop to validate the block again */
1539 i--;
1540 continue;
1541 }
1542
1543 checksum_failures++;
1544
1545 if (checksum_failures <= 5)
1546 ereport(WARNING,
1547 (errmsg("checksum verification failed in "
1548 "file \"%s\", block %d: calculated "
1549 "%X but expected %X",
1550 readfilename, blkno, checksum,
1551 phdr->pd_checksum)));
1552 if (checksum_failures == 5)
1553 ereport(WARNING,
1554 (errmsg("further checksum verification "
1555 "failures in file \"%s\" will not "
1556 "be reported", readfilename)));
1557 }
1558 }
1559 block_retry = false;
1560 blkno++;
1561 }
1562 }
1563
1564 /* Send the chunk as a CopyData message */
1565 if (pq_putmessage('d', buf, cnt))
1566 ereport(ERROR,
1567 (errmsg("base backup could not send data, aborting backup")));
1568
1569 len += cnt;
1570 throttle(cnt);
1571
1572 if (feof(fp) || len >= statbuf->st_size)
1573 {
1574 /*
1575 * Reached end of file. The file could be longer, if it was
1576 * extended while we were sending it, but for a base backup we can
1577 * ignore such extended data. It will be restored from WAL.
1578 */
1579 break;
1580 }
1581 }
1582
1583 CHECK_FREAD_ERROR(fp, readfilename);
1584
1585 /* If the file was truncated while we were sending it, pad it with zeros */
1586 if (len < statbuf->st_size)
1587 {
1588 MemSet(buf, 0, sizeof(buf));
1589 while (len < statbuf->st_size)
1590 {
1591 cnt = Min(sizeof(buf), statbuf->st_size - len);
1592 pq_putmessage('d', buf, cnt);
1593 len += cnt;
1594 throttle(cnt);
1595 }
1596 }
1597
1598 /*
1599 * Pad to 512 byte boundary, per tar format requirements. (This small
1600 * piece of data is probably not worth throttling.)
1601 */
1602 pad = ((len + 511) & ~511) - len;
1603 if (pad > 0)
1604 {
1605 MemSet(buf, 0, pad);
1606 pq_putmessage('d', buf, pad);
1607 }
1608
1609 FreeFile(fp);
1610
1611 if (checksum_failures > 1)
1612 {
1613 ereport(WARNING,
1614 (errmsg_plural("file \"%s\" has a total of %d checksum verification failure",
1615 "file \"%s\" has a total of %d checksum verification failures",
1616 checksum_failures,
1617 readfilename, checksum_failures)));
1618
1619 pgstat_report_checksum_failures_in_db(dboid, checksum_failures);
1620 }
1621
1622 total_checksum_failures += checksum_failures;
1623
1624 return true;
1625}
1626
1627
1628static int64
1629_tarWriteHeader(const char *filename, const char *linktarget,
1630 struct stat *statbuf, bool sizeonly)
1631{
1632 char h[512];
1633 enum tarError rc;
1634
1635 if (!sizeonly)
1636 {
1637 rc = tarCreateHeader(h, filename, linktarget, statbuf->st_size,
1638 statbuf->st_mode, statbuf->st_uid, statbuf->st_gid,
1639 statbuf->st_mtime);
1640
1641 switch (rc)
1642 {
1643 case TAR_OK:
1644 break;
1645 case TAR_NAME_TOO_LONG:
1646 ereport(ERROR,
1647 (errmsg("file name too long for tar format: \"%s\"",
1648 filename)));
1649 break;
1650 case TAR_SYMLINK_TOO_LONG:
1651 ereport(ERROR,
1652 (errmsg("symbolic link target too long for tar format: "
1653 "file name \"%s\", target \"%s\"",
1654 filename, linktarget)));
1655 break;
1656 default:
1657 elog(ERROR, "unrecognized tar error: %d", rc);
1658 }
1659
1660 pq_putmessage('d', h, sizeof(h));
1661 }
1662
1663 return sizeof(h);
1664}
1665
1666/*
1667 * Write tar header for a directory. If the entry in statbuf is a link then
1668 * write it as a directory anyway.
1669 */
1670static int64
1671_tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf,
1672 bool sizeonly)
1673{
1674 /* If symlink, write it as a directory anyway */
1675#ifndef WIN32
1676 if (S_ISLNK(statbuf->st_mode))
1677#else
1678 if (pgwin32_is_junction(pathbuf))
1679#endif
1680 statbuf->st_mode = S_IFDIR | pg_dir_create_mode;
1681
1682 return _tarWriteHeader(pathbuf + basepathlen + 1, NULL, statbuf, sizeonly);
1683}
1684
1685/*
1686 * Increment the network transfer counter by the given number of bytes,
1687 * and sleep if necessary to comply with the requested network transfer
1688 * rate.
1689 */
1690static void
1691throttle(size_t increment)
1692{
1693 TimeOffset elapsed_min;
1694
1695 if (throttling_counter < 0)
1696 return;
1697
1698 throttling_counter += increment;
1699 if (throttling_counter < throttling_sample)
1700 return;
1701
1702 /* How much time should have elapsed at minimum? */
1703 elapsed_min = elapsed_min_unit *
1704 (throttling_counter / throttling_sample);
1705
1706 /*
1707 * Since the latch could be set repeatedly because of concurrently WAL
1708 * activity, sleep in a loop to ensure enough time has passed.
1709 */
1710 for (;;)
1711 {
1712 TimeOffset elapsed,
1713 sleep;
1714 int wait_result;
1715
1716 /* Time elapsed since the last measurement (and possible wake up). */
1717 elapsed = GetCurrentTimestamp() - throttled_last;
1718
1719 /* sleep if the transfer is faster than it should be */
1720 sleep = elapsed_min - elapsed;
1721 if (sleep <= 0)
1722 break;
1723
1724 ResetLatch(MyLatch);
1725
1726 /* We're eating a potentially set latch, so check for interrupts */
1727 CHECK_FOR_INTERRUPTS();
1728
1729 /*
1730 * (TAR_SEND_SIZE / throttling_sample * elapsed_min_unit) should be
1731 * the maximum time to sleep. Thus the cast to long is safe.
1732 */
1733 wait_result = WaitLatch(MyLatch,
1734 WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
1735 (long) (sleep / 1000),
1736 WAIT_EVENT_BASE_BACKUP_THROTTLE);
1737
1738 if (wait_result & WL_LATCH_SET)
1739 CHECK_FOR_INTERRUPTS();
1740
1741 /* Done waiting? */
1742 if (wait_result & WL_TIMEOUT)
1743 break;
1744 }
1745
1746 /*
1747 * As we work with integers, only whole multiple of throttling_sample was
1748 * processed. The rest will be done during the next call of this function.
1749 */
1750 throttling_counter %= throttling_sample;
1751
1752 /*
1753 * Time interval for the remaining amount and possible next increments
1754 * starts now.
1755 */
1756 throttled_last = GetCurrentTimestamp();
1757}
1758