1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * basebackup.c |
4 | * code for taking a base backup and streaming it to a standby |
5 | * |
6 | * Portions Copyright (c) 2010-2019, PostgreSQL Global Development Group |
7 | * |
8 | * IDENTIFICATION |
9 | * src/backend/replication/basebackup.c |
10 | * |
11 | *------------------------------------------------------------------------- |
12 | */ |
13 | #include "postgres.h" |
14 | |
15 | #include <sys/stat.h> |
16 | #include <unistd.h> |
17 | #include <time.h> |
18 | |
19 | #include "access/xlog_internal.h" /* for pg_start/stop_backup */ |
20 | #include "catalog/pg_type.h" |
21 | #include "common/file_perm.h" |
22 | #include "lib/stringinfo.h" |
23 | #include "libpq/libpq.h" |
24 | #include "libpq/pqformat.h" |
25 | #include "miscadmin.h" |
26 | #include "nodes/pg_list.h" |
27 | #include "pgtar.h" |
28 | #include "pgstat.h" |
29 | #include "port.h" |
30 | #include "postmaster/syslogger.h" |
31 | #include "replication/basebackup.h" |
32 | #include "replication/walsender.h" |
33 | #include "replication/walsender_private.h" |
34 | #include "storage/bufpage.h" |
35 | #include "storage/checksum.h" |
36 | #include "storage/dsm_impl.h" |
37 | #include "storage/fd.h" |
38 | #include "storage/ipc.h" |
39 | #include "storage/reinit.h" |
40 | #include "utils/builtins.h" |
41 | #include "utils/ps_status.h" |
42 | #include "utils/relcache.h" |
43 | #include "utils/timestamp.h" |
44 | |
45 | |
46 | typedef struct |
47 | { |
48 | const char *label; |
49 | bool progress; |
50 | bool fastcheckpoint; |
51 | bool nowait; |
52 | bool includewal; |
53 | uint32 maxrate; |
54 | bool sendtblspcmapfile; |
55 | } basebackup_options; |
56 | |
57 | |
58 | static int64 sendDir(const char *path, int basepathlen, bool sizeonly, |
59 | List *tablespaces, bool sendtblspclinks); |
60 | static bool sendFile(const char *readfilename, const char *tarfilename, |
61 | struct stat *statbuf, bool missing_ok, Oid dboid); |
62 | static void sendFileWithContent(const char *filename, const char *content); |
63 | static int64 _tarWriteHeader(const char *filename, const char *linktarget, |
64 | struct stat *statbuf, bool sizeonly); |
65 | static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf, |
66 | bool sizeonly); |
67 | static void send_int8_string(StringInfoData *buf, int64 intval); |
68 | static void SendBackupHeader(List *tablespaces); |
69 | static void base_backup_cleanup(int code, Datum arg); |
70 | static void perform_base_backup(basebackup_options *opt); |
71 | static void parse_basebackup_options(List *options, basebackup_options *opt); |
72 | static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli); |
73 | static int compareWalFileNames(const void *a, const void *b); |
74 | static void throttle(size_t increment); |
75 | static bool is_checksummed_file(const char *fullpath, const char *filename); |
76 | |
77 | /* Was the backup currently in-progress initiated in recovery mode? */ |
78 | static bool backup_started_in_recovery = false; |
79 | |
80 | /* Relative path of temporary statistics directory */ |
81 | static char *statrelpath = NULL; |
82 | |
83 | /* |
84 | * Size of each block sent into the tar stream for larger files. |
85 | */ |
86 | #define TAR_SEND_SIZE 32768 |
87 | |
88 | /* |
89 | * How frequently to throttle, as a fraction of the specified rate-second. |
90 | */ |
91 | #define THROTTLING_FREQUENCY 8 |
92 | |
93 | /* |
94 | * Checks whether we encountered any error in fread(). fread() doesn't give |
95 | * any clue what has happened, so we check with ferror(). Also, neither |
96 | * fread() nor ferror() set errno, so we just throw a generic error. |
97 | */ |
98 | #define CHECK_FREAD_ERROR(fp, filename) \ |
99 | do { \ |
100 | if (ferror(fp)) \ |
101 | ereport(ERROR, \ |
102 | (errmsg("could not read from file \"%s\"", filename))); \ |
103 | } while (0) |
104 | |
105 | /* The actual number of bytes, transfer of which may cause sleep. */ |
106 | static uint64 throttling_sample; |
107 | |
108 | /* Amount of data already transferred but not yet throttled. */ |
109 | static int64 throttling_counter; |
110 | |
111 | /* The minimum time required to transfer throttling_sample bytes. */ |
112 | static TimeOffset elapsed_min_unit; |
113 | |
114 | /* The last check of the transfer rate. */ |
115 | static TimestampTz throttled_last; |
116 | |
117 | /* The starting XLOG position of the base backup. */ |
118 | static XLogRecPtr startptr; |
119 | |
120 | /* Total number of checksum failures during base backup. */ |
121 | static int64 total_checksum_failures; |
122 | |
123 | /* Do not verify checksums. */ |
124 | static bool noverify_checksums = false; |
125 | |
126 | /* |
127 | * The contents of these directories are removed or recreated during server |
128 | * start so they are not included in backups. The directories themselves are |
129 | * kept and included as empty to preserve access permissions. |
130 | * |
131 | * Note: this list should be kept in sync with the filter lists in pg_rewind's |
132 | * filemap.c. |
133 | */ |
134 | static const char *excludeDirContents[] = |
135 | { |
136 | /* |
137 | * Skip temporary statistics files. PG_STAT_TMP_DIR must be skipped even |
138 | * when stats_temp_directory is set because PGSS_TEXT_FILE is always |
139 | * created there. |
140 | */ |
141 | PG_STAT_TMP_DIR, |
142 | |
143 | /* |
144 | * It is generally not useful to backup the contents of this directory |
145 | * even if the intention is to restore to another master. See backup.sgml |
146 | * for a more detailed description. |
147 | */ |
148 | "pg_replslot" , |
149 | |
150 | /* Contents removed on startup, see dsm_cleanup_for_mmap(). */ |
151 | PG_DYNSHMEM_DIR, |
152 | |
153 | /* Contents removed on startup, see AsyncShmemInit(). */ |
154 | "pg_notify" , |
155 | |
156 | /* |
157 | * Old contents are loaded for possible debugging but are not required for |
158 | * normal operation, see OldSerXidInit(). |
159 | */ |
160 | "pg_serial" , |
161 | |
162 | /* Contents removed on startup, see DeleteAllExportedSnapshotFiles(). */ |
163 | "pg_snapshots" , |
164 | |
165 | /* Contents zeroed on startup, see StartupSUBTRANS(). */ |
166 | "pg_subtrans" , |
167 | |
168 | /* end of list */ |
169 | NULL |
170 | }; |
171 | |
172 | /* |
173 | * List of files excluded from backups. |
174 | */ |
175 | static const char *excludeFiles[] = |
176 | { |
177 | /* Skip auto conf temporary file. */ |
178 | PG_AUTOCONF_FILENAME ".tmp" , |
179 | |
180 | /* Skip current log file temporary file */ |
181 | LOG_METAINFO_DATAFILE_TMP, |
182 | |
183 | /* Skip relation cache because it is rebuilt on startup */ |
184 | RELCACHE_INIT_FILENAME, |
185 | |
186 | /* |
187 | * If there's a backup_label or tablespace_map file, it belongs to a |
188 | * backup started by the user with pg_start_backup(). It is *not* correct |
189 | * for this backup. Our backup_label/tablespace_map is injected into the |
190 | * tar separately. |
191 | */ |
192 | BACKUP_LABEL_FILE, |
193 | TABLESPACE_MAP, |
194 | |
195 | "postmaster.pid" , |
196 | "postmaster.opts" , |
197 | |
198 | /* end of list */ |
199 | NULL |
200 | }; |
201 | |
202 | /* |
203 | * List of files excluded from checksum validation. |
204 | * |
205 | * Note: this list should be kept in sync with what pg_checksums.c |
206 | * includes. |
207 | */ |
208 | static const char *const noChecksumFiles[] = { |
209 | "pg_control" , |
210 | "pg_filenode.map" , |
211 | "pg_internal.init" , |
212 | "PG_VERSION" , |
213 | #ifdef EXEC_BACKEND |
214 | "config_exec_params" , |
215 | "config_exec_params.new" , |
216 | #endif |
217 | NULL, |
218 | }; |
219 | |
220 | |
221 | /* |
222 | * Called when ERROR or FATAL happens in perform_base_backup() after |
223 | * we have started the backup - make sure we end it! |
224 | */ |
225 | static void |
226 | base_backup_cleanup(int code, Datum arg) |
227 | { |
228 | do_pg_abort_backup(); |
229 | } |
230 | |
231 | /* |
232 | * Actually do a base backup for the specified tablespaces. |
233 | * |
234 | * This is split out mainly to avoid complaints about "variable might be |
235 | * clobbered by longjmp" from stupider versions of gcc. |
236 | */ |
237 | static void |
238 | perform_base_backup(basebackup_options *opt) |
239 | { |
240 | TimeLineID starttli; |
241 | XLogRecPtr endptr; |
242 | TimeLineID endtli; |
243 | StringInfo labelfile; |
244 | StringInfo tblspc_map_file = NULL; |
245 | int datadirpathlen; |
246 | List *tablespaces = NIL; |
247 | |
248 | datadirpathlen = strlen(DataDir); |
249 | |
250 | backup_started_in_recovery = RecoveryInProgress(); |
251 | |
252 | labelfile = makeStringInfo(); |
253 | tblspc_map_file = makeStringInfo(); |
254 | |
255 | total_checksum_failures = 0; |
256 | |
257 | startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli, |
258 | labelfile, &tablespaces, |
259 | tblspc_map_file, |
260 | opt->progress, opt->sendtblspcmapfile); |
261 | |
262 | /* |
263 | * Once do_pg_start_backup has been called, ensure that any failure causes |
264 | * us to abort the backup so we don't "leak" a backup counter. For this |
265 | * reason, *all* functionality between do_pg_start_backup() and the end of |
266 | * do_pg_stop_backup() should be inside the error cleanup block! |
267 | */ |
268 | |
269 | PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0); |
270 | { |
271 | ListCell *lc; |
272 | tablespaceinfo *ti; |
273 | |
274 | SendXlogRecPtrResult(startptr, starttli); |
275 | |
276 | /* |
277 | * Calculate the relative path of temporary statistics directory in |
278 | * order to skip the files which are located in that directory later. |
279 | */ |
280 | if (is_absolute_path(pgstat_stat_directory) && |
281 | strncmp(pgstat_stat_directory, DataDir, datadirpathlen) == 0) |
282 | statrelpath = psprintf("./%s" , pgstat_stat_directory + datadirpathlen + 1); |
283 | else if (strncmp(pgstat_stat_directory, "./" , 2) != 0) |
284 | statrelpath = psprintf("./%s" , pgstat_stat_directory); |
285 | else |
286 | statrelpath = pgstat_stat_directory; |
287 | |
288 | /* Add a node for the base directory at the end */ |
289 | ti = palloc0(sizeof(tablespaceinfo)); |
290 | ti->size = opt->progress ? sendDir("." , 1, true, tablespaces, true) : -1; |
291 | tablespaces = lappend(tablespaces, ti); |
292 | |
293 | /* Send tablespace header */ |
294 | SendBackupHeader(tablespaces); |
295 | |
296 | /* Setup and activate network throttling, if client requested it */ |
297 | if (opt->maxrate > 0) |
298 | { |
299 | throttling_sample = |
300 | (int64) opt->maxrate * (int64) 1024 / THROTTLING_FREQUENCY; |
301 | |
302 | /* |
303 | * The minimum amount of time for throttling_sample bytes to be |
304 | * transferred. |
305 | */ |
306 | elapsed_min_unit = USECS_PER_SEC / THROTTLING_FREQUENCY; |
307 | |
308 | /* Enable throttling. */ |
309 | throttling_counter = 0; |
310 | |
311 | /* The 'real data' starts now (header was ignored). */ |
312 | throttled_last = GetCurrentTimestamp(); |
313 | } |
314 | else |
315 | { |
316 | /* Disable throttling. */ |
317 | throttling_counter = -1; |
318 | } |
319 | |
320 | /* Send off our tablespaces one by one */ |
321 | foreach(lc, tablespaces) |
322 | { |
323 | tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc); |
324 | StringInfoData buf; |
325 | |
326 | /* Send CopyOutResponse message */ |
327 | pq_beginmessage(&buf, 'H'); |
328 | pq_sendbyte(&buf, 0); /* overall format */ |
329 | pq_sendint16(&buf, 0); /* natts */ |
330 | pq_endmessage(&buf); |
331 | |
332 | if (ti->path == NULL) |
333 | { |
334 | struct stat statbuf; |
335 | |
336 | /* In the main tar, include the backup_label first... */ |
337 | sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data); |
338 | |
339 | /* |
340 | * Send tablespace_map file if required and then the bulk of |
341 | * the files. |
342 | */ |
343 | if (tblspc_map_file && opt->sendtblspcmapfile) |
344 | { |
345 | sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data); |
346 | sendDir("." , 1, false, tablespaces, false); |
347 | } |
348 | else |
349 | sendDir("." , 1, false, tablespaces, true); |
350 | |
351 | /* ... and pg_control after everything else. */ |
352 | if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0) |
353 | ereport(ERROR, |
354 | (errcode_for_file_access(), |
355 | errmsg("could not stat file \"%s\": %m" , |
356 | XLOG_CONTROL_FILE))); |
357 | sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, false, InvalidOid); |
358 | } |
359 | else |
360 | sendTablespace(ti->path, false); |
361 | |
362 | /* |
363 | * If we're including WAL, and this is the main data directory we |
364 | * don't terminate the tar stream here. Instead, we will append |
365 | * the xlog files below and terminate it then. This is safe since |
366 | * the main data directory is always sent *last*. |
367 | */ |
368 | if (opt->includewal && ti->path == NULL) |
369 | { |
370 | Assert(lnext(lc) == NULL); |
371 | } |
372 | else |
373 | pq_putemptymessage('c'); /* CopyDone */ |
374 | } |
375 | |
376 | endptr = do_pg_stop_backup(labelfile->data, !opt->nowait, &endtli); |
377 | } |
378 | PG_END_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0); |
379 | |
380 | |
381 | if (opt->includewal) |
382 | { |
383 | /* |
384 | * We've left the last tar file "open", so we can now append the |
385 | * required WAL files to it. |
386 | */ |
387 | char pathbuf[MAXPGPATH]; |
388 | XLogSegNo segno; |
389 | XLogSegNo startsegno; |
390 | XLogSegNo endsegno; |
391 | struct stat statbuf; |
392 | List *historyFileList = NIL; |
393 | List *walFileList = NIL; |
394 | char **walFiles; |
395 | int nWalFiles; |
396 | char firstoff[MAXFNAMELEN]; |
397 | char lastoff[MAXFNAMELEN]; |
398 | DIR *dir; |
399 | struct dirent *de; |
400 | int i; |
401 | ListCell *lc; |
402 | TimeLineID tli; |
403 | |
404 | /* |
405 | * I'd rather not worry about timelines here, so scan pg_wal and |
406 | * include all WAL files in the range between 'startptr' and 'endptr', |
407 | * regardless of the timeline the file is stamped with. If there are |
408 | * some spurious WAL files belonging to timelines that don't belong in |
409 | * this server's history, they will be included too. Normally there |
410 | * shouldn't be such files, but if there are, there's little harm in |
411 | * including them. |
412 | */ |
413 | XLByteToSeg(startptr, startsegno, wal_segment_size); |
414 | XLogFileName(firstoff, ThisTimeLineID, startsegno, wal_segment_size); |
415 | XLByteToPrevSeg(endptr, endsegno, wal_segment_size); |
416 | XLogFileName(lastoff, ThisTimeLineID, endsegno, wal_segment_size); |
417 | |
418 | dir = AllocateDir("pg_wal" ); |
419 | while ((de = ReadDir(dir, "pg_wal" )) != NULL) |
420 | { |
421 | /* Does it look like a WAL segment, and is it in the range? */ |
422 | if (IsXLogFileName(de->d_name) && |
423 | strcmp(de->d_name + 8, firstoff + 8) >= 0 && |
424 | strcmp(de->d_name + 8, lastoff + 8) <= 0) |
425 | { |
426 | walFileList = lappend(walFileList, pstrdup(de->d_name)); |
427 | } |
428 | /* Does it look like a timeline history file? */ |
429 | else if (IsTLHistoryFileName(de->d_name)) |
430 | { |
431 | historyFileList = lappend(historyFileList, pstrdup(de->d_name)); |
432 | } |
433 | } |
434 | FreeDir(dir); |
435 | |
436 | /* |
437 | * Before we go any further, check that none of the WAL segments we |
438 | * need were removed. |
439 | */ |
440 | CheckXLogRemoved(startsegno, ThisTimeLineID); |
441 | |
442 | /* |
443 | * Put the WAL filenames into an array, and sort. We send the files in |
444 | * order from oldest to newest, to reduce the chance that a file is |
445 | * recycled before we get a chance to send it over. |
446 | */ |
447 | nWalFiles = list_length(walFileList); |
448 | walFiles = palloc(nWalFiles * sizeof(char *)); |
449 | i = 0; |
450 | foreach(lc, walFileList) |
451 | { |
452 | walFiles[i++] = lfirst(lc); |
453 | } |
454 | qsort(walFiles, nWalFiles, sizeof(char *), compareWalFileNames); |
455 | |
456 | /* |
457 | * There must be at least one xlog file in the pg_wal directory, since |
458 | * we are doing backup-including-xlog. |
459 | */ |
460 | if (nWalFiles < 1) |
461 | ereport(ERROR, |
462 | (errmsg("could not find any WAL files" ))); |
463 | |
464 | /* |
465 | * Sanity check: the first and last segment should cover startptr and |
466 | * endptr, with no gaps in between. |
467 | */ |
468 | XLogFromFileName(walFiles[0], &tli, &segno, wal_segment_size); |
469 | if (segno != startsegno) |
470 | { |
471 | char startfname[MAXFNAMELEN]; |
472 | |
473 | XLogFileName(startfname, ThisTimeLineID, startsegno, |
474 | wal_segment_size); |
475 | ereport(ERROR, |
476 | (errmsg("could not find WAL file \"%s\"" , startfname))); |
477 | } |
478 | for (i = 0; i < nWalFiles; i++) |
479 | { |
480 | XLogSegNo currsegno = segno; |
481 | XLogSegNo nextsegno = segno + 1; |
482 | |
483 | XLogFromFileName(walFiles[i], &tli, &segno, wal_segment_size); |
484 | if (!(nextsegno == segno || currsegno == segno)) |
485 | { |
486 | char nextfname[MAXFNAMELEN]; |
487 | |
488 | XLogFileName(nextfname, ThisTimeLineID, nextsegno, |
489 | wal_segment_size); |
490 | ereport(ERROR, |
491 | (errmsg("could not find WAL file \"%s\"" , nextfname))); |
492 | } |
493 | } |
494 | if (segno != endsegno) |
495 | { |
496 | char endfname[MAXFNAMELEN]; |
497 | |
498 | XLogFileName(endfname, ThisTimeLineID, endsegno, wal_segment_size); |
499 | ereport(ERROR, |
500 | (errmsg("could not find WAL file \"%s\"" , endfname))); |
501 | } |
502 | |
503 | /* Ok, we have everything we need. Send the WAL files. */ |
504 | for (i = 0; i < nWalFiles; i++) |
505 | { |
506 | FILE *fp; |
507 | char buf[TAR_SEND_SIZE]; |
508 | size_t cnt; |
509 | pgoff_t len = 0; |
510 | |
511 | snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s" , walFiles[i]); |
512 | XLogFromFileName(walFiles[i], &tli, &segno, wal_segment_size); |
513 | |
514 | fp = AllocateFile(pathbuf, "rb" ); |
515 | if (fp == NULL) |
516 | { |
517 | int save_errno = errno; |
518 | |
519 | /* |
520 | * Most likely reason for this is that the file was already |
521 | * removed by a checkpoint, so check for that to get a better |
522 | * error message. |
523 | */ |
524 | CheckXLogRemoved(segno, tli); |
525 | |
526 | errno = save_errno; |
527 | ereport(ERROR, |
528 | (errcode_for_file_access(), |
529 | errmsg("could not open file \"%s\": %m" , pathbuf))); |
530 | } |
531 | |
532 | if (fstat(fileno(fp), &statbuf) != 0) |
533 | ereport(ERROR, |
534 | (errcode_for_file_access(), |
535 | errmsg("could not stat file \"%s\": %m" , |
536 | pathbuf))); |
537 | if (statbuf.st_size != wal_segment_size) |
538 | { |
539 | CheckXLogRemoved(segno, tli); |
540 | ereport(ERROR, |
541 | (errcode_for_file_access(), |
542 | errmsg("unexpected WAL file size \"%s\"" , walFiles[i]))); |
543 | } |
544 | |
545 | /* send the WAL file itself */ |
546 | _tarWriteHeader(pathbuf, NULL, &statbuf, false); |
547 | |
548 | while ((cnt = fread(buf, 1, |
549 | Min(sizeof(buf), wal_segment_size - len), |
550 | fp)) > 0) |
551 | { |
552 | CheckXLogRemoved(segno, tli); |
553 | /* Send the chunk as a CopyData message */ |
554 | if (pq_putmessage('d', buf, cnt)) |
555 | ereport(ERROR, |
556 | (errmsg("base backup could not send data, aborting backup" ))); |
557 | |
558 | len += cnt; |
559 | throttle(cnt); |
560 | |
561 | if (len == wal_segment_size) |
562 | break; |
563 | } |
564 | |
565 | CHECK_FREAD_ERROR(fp, pathbuf); |
566 | |
567 | if (len != wal_segment_size) |
568 | { |
569 | CheckXLogRemoved(segno, tli); |
570 | ereport(ERROR, |
571 | (errcode_for_file_access(), |
572 | errmsg("unexpected WAL file size \"%s\"" , walFiles[i]))); |
573 | } |
574 | |
575 | /* wal_segment_size is a multiple of 512, so no need for padding */ |
576 | |
577 | FreeFile(fp); |
578 | |
579 | /* |
580 | * Mark file as archived, otherwise files can get archived again |
581 | * after promotion of a new node. This is in line with |
582 | * walreceiver.c always doing an XLogArchiveForceDone() after a |
583 | * complete segment. |
584 | */ |
585 | StatusFilePath(pathbuf, walFiles[i], ".done" ); |
586 | sendFileWithContent(pathbuf, "" ); |
587 | } |
588 | |
589 | /* |
590 | * Send timeline history files too. Only the latest timeline history |
591 | * file is required for recovery, and even that only if there happens |
592 | * to be a timeline switch in the first WAL segment that contains the |
593 | * checkpoint record, or if we're taking a base backup from a standby |
594 | * server and the target timeline changes while the backup is taken. |
595 | * But they are small and highly useful for debugging purposes, so |
596 | * better include them all, always. |
597 | */ |
598 | foreach(lc, historyFileList) |
599 | { |
600 | char *fname = lfirst(lc); |
601 | |
602 | snprintf(pathbuf, MAXPGPATH, XLOGDIR "/%s" , fname); |
603 | |
604 | if (lstat(pathbuf, &statbuf) != 0) |
605 | ereport(ERROR, |
606 | (errcode_for_file_access(), |
607 | errmsg("could not stat file \"%s\": %m" , pathbuf))); |
608 | |
609 | sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid); |
610 | |
611 | /* unconditionally mark file as archived */ |
612 | StatusFilePath(pathbuf, fname, ".done" ); |
613 | sendFileWithContent(pathbuf, "" ); |
614 | } |
615 | |
616 | /* Send CopyDone message for the last tar file */ |
617 | pq_putemptymessage('c'); |
618 | } |
619 | SendXlogRecPtrResult(endptr, endtli); |
620 | |
621 | if (total_checksum_failures) |
622 | { |
623 | if (total_checksum_failures > 1) |
624 | { |
625 | char buf[64]; |
626 | |
627 | snprintf(buf, sizeof(buf), INT64_FORMAT, total_checksum_failures); |
628 | |
629 | ereport(WARNING, |
630 | (errmsg("%s total checksum verification failures" , buf))); |
631 | } |
632 | ereport(ERROR, |
633 | (errcode(ERRCODE_DATA_CORRUPTED), |
634 | errmsg("checksum verification failure during base backup" ))); |
635 | } |
636 | |
637 | } |
638 | |
639 | /* |
640 | * qsort comparison function, to compare log/seg portion of WAL segment |
641 | * filenames, ignoring the timeline portion. |
642 | */ |
643 | static int |
644 | compareWalFileNames(const void *a, const void *b) |
645 | { |
646 | char *fna = *((char **) a); |
647 | char *fnb = *((char **) b); |
648 | |
649 | return strcmp(fna + 8, fnb + 8); |
650 | } |
651 | |
652 | /* |
653 | * Parse the base backup options passed down by the parser |
654 | */ |
655 | static void |
656 | parse_basebackup_options(List *options, basebackup_options *opt) |
657 | { |
658 | ListCell *lopt; |
659 | bool o_label = false; |
660 | bool o_progress = false; |
661 | bool o_fast = false; |
662 | bool o_nowait = false; |
663 | bool o_wal = false; |
664 | bool o_maxrate = false; |
665 | bool o_tablespace_map = false; |
666 | bool o_noverify_checksums = false; |
667 | |
668 | MemSet(opt, 0, sizeof(*opt)); |
669 | foreach(lopt, options) |
670 | { |
671 | DefElem *defel = (DefElem *) lfirst(lopt); |
672 | |
673 | if (strcmp(defel->defname, "label" ) == 0) |
674 | { |
675 | if (o_label) |
676 | ereport(ERROR, |
677 | (errcode(ERRCODE_SYNTAX_ERROR), |
678 | errmsg("duplicate option \"%s\"" , defel->defname))); |
679 | opt->label = strVal(defel->arg); |
680 | o_label = true; |
681 | } |
682 | else if (strcmp(defel->defname, "progress" ) == 0) |
683 | { |
684 | if (o_progress) |
685 | ereport(ERROR, |
686 | (errcode(ERRCODE_SYNTAX_ERROR), |
687 | errmsg("duplicate option \"%s\"" , defel->defname))); |
688 | opt->progress = true; |
689 | o_progress = true; |
690 | } |
691 | else if (strcmp(defel->defname, "fast" ) == 0) |
692 | { |
693 | if (o_fast) |
694 | ereport(ERROR, |
695 | (errcode(ERRCODE_SYNTAX_ERROR), |
696 | errmsg("duplicate option \"%s\"" , defel->defname))); |
697 | opt->fastcheckpoint = true; |
698 | o_fast = true; |
699 | } |
700 | else if (strcmp(defel->defname, "nowait" ) == 0) |
701 | { |
702 | if (o_nowait) |
703 | ereport(ERROR, |
704 | (errcode(ERRCODE_SYNTAX_ERROR), |
705 | errmsg("duplicate option \"%s\"" , defel->defname))); |
706 | opt->nowait = true; |
707 | o_nowait = true; |
708 | } |
709 | else if (strcmp(defel->defname, "wal" ) == 0) |
710 | { |
711 | if (o_wal) |
712 | ereport(ERROR, |
713 | (errcode(ERRCODE_SYNTAX_ERROR), |
714 | errmsg("duplicate option \"%s\"" , defel->defname))); |
715 | opt->includewal = true; |
716 | o_wal = true; |
717 | } |
718 | else if (strcmp(defel->defname, "max_rate" ) == 0) |
719 | { |
720 | long maxrate; |
721 | |
722 | if (o_maxrate) |
723 | ereport(ERROR, |
724 | (errcode(ERRCODE_SYNTAX_ERROR), |
725 | errmsg("duplicate option \"%s\"" , defel->defname))); |
726 | |
727 | maxrate = intVal(defel->arg); |
728 | if (maxrate < MAX_RATE_LOWER || maxrate > MAX_RATE_UPPER) |
729 | ereport(ERROR, |
730 | (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), |
731 | errmsg("%d is outside the valid range for parameter \"%s\" (%d .. %d)" , |
732 | (int) maxrate, "MAX_RATE" , MAX_RATE_LOWER, MAX_RATE_UPPER))); |
733 | |
734 | opt->maxrate = (uint32) maxrate; |
735 | o_maxrate = true; |
736 | } |
737 | else if (strcmp(defel->defname, "tablespace_map" ) == 0) |
738 | { |
739 | if (o_tablespace_map) |
740 | ereport(ERROR, |
741 | (errcode(ERRCODE_SYNTAX_ERROR), |
742 | errmsg("duplicate option \"%s\"" , defel->defname))); |
743 | opt->sendtblspcmapfile = true; |
744 | o_tablespace_map = true; |
745 | } |
746 | else if (strcmp(defel->defname, "noverify_checksums" ) == 0) |
747 | { |
748 | if (o_noverify_checksums) |
749 | ereport(ERROR, |
750 | (errcode(ERRCODE_SYNTAX_ERROR), |
751 | errmsg("duplicate option \"%s\"" , defel->defname))); |
752 | noverify_checksums = true; |
753 | o_noverify_checksums = true; |
754 | } |
755 | else |
756 | elog(ERROR, "option \"%s\" not recognized" , |
757 | defel->defname); |
758 | } |
759 | if (opt->label == NULL) |
760 | opt->label = "base backup" ; |
761 | } |
762 | |
763 | |
764 | /* |
765 | * SendBaseBackup() - send a complete base backup. |
766 | * |
767 | * The function will put the system into backup mode like pg_start_backup() |
768 | * does, so that the backup is consistent even though we read directly from |
769 | * the filesystem, bypassing the buffer cache. |
770 | */ |
771 | void |
772 | SendBaseBackup(BaseBackupCmd *cmd) |
773 | { |
774 | basebackup_options opt; |
775 | |
776 | parse_basebackup_options(cmd->options, &opt); |
777 | |
778 | WalSndSetState(WALSNDSTATE_BACKUP); |
779 | |
780 | if (update_process_title) |
781 | { |
782 | char activitymsg[50]; |
783 | |
784 | snprintf(activitymsg, sizeof(activitymsg), "sending backup \"%s\"" , |
785 | opt.label); |
786 | set_ps_display(activitymsg, false); |
787 | } |
788 | |
789 | perform_base_backup(&opt); |
790 | } |
791 | |
792 | static void |
793 | send_int8_string(StringInfoData *buf, int64 intval) |
794 | { |
795 | char is[32]; |
796 | |
797 | sprintf(is, INT64_FORMAT, intval); |
798 | pq_sendint32(buf, strlen(is)); |
799 | pq_sendbytes(buf, is, strlen(is)); |
800 | } |
801 | |
802 | static void |
803 | (List *tablespaces) |
804 | { |
805 | StringInfoData buf; |
806 | ListCell *lc; |
807 | |
808 | /* Construct and send the directory information */ |
809 | pq_beginmessage(&buf, 'T'); /* RowDescription */ |
810 | pq_sendint16(&buf, 3); /* 3 fields */ |
811 | |
812 | /* First field - spcoid */ |
813 | pq_sendstring(&buf, "spcoid" ); |
814 | pq_sendint32(&buf, 0); /* table oid */ |
815 | pq_sendint16(&buf, 0); /* attnum */ |
816 | pq_sendint32(&buf, OIDOID); /* type oid */ |
817 | pq_sendint16(&buf, 4); /* typlen */ |
818 | pq_sendint32(&buf, 0); /* typmod */ |
819 | pq_sendint16(&buf, 0); /* format code */ |
820 | |
821 | /* Second field - spcpath */ |
822 | pq_sendstring(&buf, "spclocation" ); |
823 | pq_sendint32(&buf, 0); |
824 | pq_sendint16(&buf, 0); |
825 | pq_sendint32(&buf, TEXTOID); |
826 | pq_sendint16(&buf, -1); |
827 | pq_sendint32(&buf, 0); |
828 | pq_sendint16(&buf, 0); |
829 | |
830 | /* Third field - size */ |
831 | pq_sendstring(&buf, "size" ); |
832 | pq_sendint32(&buf, 0); |
833 | pq_sendint16(&buf, 0); |
834 | pq_sendint32(&buf, INT8OID); |
835 | pq_sendint16(&buf, 8); |
836 | pq_sendint32(&buf, 0); |
837 | pq_sendint16(&buf, 0); |
838 | pq_endmessage(&buf); |
839 | |
840 | foreach(lc, tablespaces) |
841 | { |
842 | tablespaceinfo *ti = lfirst(lc); |
843 | |
844 | /* Send one datarow message */ |
845 | pq_beginmessage(&buf, 'D'); |
846 | pq_sendint16(&buf, 3); /* number of columns */ |
847 | if (ti->path == NULL) |
848 | { |
849 | pq_sendint32(&buf, -1); /* Length = -1 ==> NULL */ |
850 | pq_sendint32(&buf, -1); |
851 | } |
852 | else |
853 | { |
854 | Size len; |
855 | |
856 | len = strlen(ti->oid); |
857 | pq_sendint32(&buf, len); |
858 | pq_sendbytes(&buf, ti->oid, len); |
859 | |
860 | len = strlen(ti->path); |
861 | pq_sendint32(&buf, len); |
862 | pq_sendbytes(&buf, ti->path, len); |
863 | } |
864 | if (ti->size >= 0) |
865 | send_int8_string(&buf, ti->size / 1024); |
866 | else |
867 | pq_sendint32(&buf, -1); /* NULL */ |
868 | |
869 | pq_endmessage(&buf); |
870 | } |
871 | |
872 | /* Send a CommandComplete message */ |
873 | pq_puttextmessage('C', "SELECT" ); |
874 | } |
875 | |
876 | /* |
877 | * Send a single resultset containing just a single |
878 | * XLogRecPtr record (in text format) |
879 | */ |
880 | static void |
881 | SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli) |
882 | { |
883 | StringInfoData buf; |
884 | char str[MAXFNAMELEN]; |
885 | Size len; |
886 | |
887 | pq_beginmessage(&buf, 'T'); /* RowDescription */ |
888 | pq_sendint16(&buf, 2); /* 2 fields */ |
889 | |
890 | /* Field headers */ |
891 | pq_sendstring(&buf, "recptr" ); |
892 | pq_sendint32(&buf, 0); /* table oid */ |
893 | pq_sendint16(&buf, 0); /* attnum */ |
894 | pq_sendint32(&buf, TEXTOID); /* type oid */ |
895 | pq_sendint16(&buf, -1); |
896 | pq_sendint32(&buf, 0); |
897 | pq_sendint16(&buf, 0); |
898 | |
899 | pq_sendstring(&buf, "tli" ); |
900 | pq_sendint32(&buf, 0); /* table oid */ |
901 | pq_sendint16(&buf, 0); /* attnum */ |
902 | |
903 | /* |
904 | * int8 may seem like a surprising data type for this, but in theory int4 |
905 | * would not be wide enough for this, as TimeLineID is unsigned. |
906 | */ |
907 | pq_sendint32(&buf, INT8OID); /* type oid */ |
908 | pq_sendint16(&buf, -1); |
909 | pq_sendint32(&buf, 0); |
910 | pq_sendint16(&buf, 0); |
911 | pq_endmessage(&buf); |
912 | |
913 | /* Data row */ |
914 | pq_beginmessage(&buf, 'D'); |
915 | pq_sendint16(&buf, 2); /* number of columns */ |
916 | |
917 | len = snprintf(str, sizeof(str), |
918 | "%X/%X" , (uint32) (ptr >> 32), (uint32) ptr); |
919 | pq_sendint32(&buf, len); |
920 | pq_sendbytes(&buf, str, len); |
921 | |
922 | len = snprintf(str, sizeof(str), "%u" , tli); |
923 | pq_sendint32(&buf, len); |
924 | pq_sendbytes(&buf, str, len); |
925 | |
926 | pq_endmessage(&buf); |
927 | |
928 | /* Send a CommandComplete message */ |
929 | pq_puttextmessage('C', "SELECT" ); |
930 | } |
931 | |
932 | /* |
933 | * Inject a file with given name and content in the output tar stream. |
934 | */ |
935 | static void |
936 | sendFileWithContent(const char *filename, const char *content) |
937 | { |
938 | struct stat statbuf; |
939 | int pad, |
940 | len; |
941 | |
942 | len = strlen(content); |
943 | |
944 | /* |
945 | * Construct a stat struct for the backup_label file we're injecting in |
946 | * the tar. |
947 | */ |
948 | /* Windows doesn't have the concept of uid and gid */ |
949 | #ifdef WIN32 |
950 | statbuf.st_uid = 0; |
951 | statbuf.st_gid = 0; |
952 | #else |
953 | statbuf.st_uid = geteuid(); |
954 | statbuf.st_gid = getegid(); |
955 | #endif |
956 | statbuf.st_mtime = time(NULL); |
957 | statbuf.st_mode = pg_file_create_mode; |
958 | statbuf.st_size = len; |
959 | |
960 | _tarWriteHeader(filename, NULL, &statbuf, false); |
961 | /* Send the contents as a CopyData message */ |
962 | pq_putmessage('d', content, len); |
963 | |
964 | /* Pad to 512 byte boundary, per tar format requirements */ |
965 | pad = ((len + 511) & ~511) - len; |
966 | if (pad > 0) |
967 | { |
968 | char buf[512]; |
969 | |
970 | MemSet(buf, 0, pad); |
971 | pq_putmessage('d', buf, pad); |
972 | } |
973 | } |
974 | |
975 | /* |
976 | * Include the tablespace directory pointed to by 'path' in the output tar |
977 | * stream. If 'sizeonly' is true, we just calculate a total length and return |
978 | * it, without actually sending anything. |
979 | * |
980 | * Only used to send auxiliary tablespaces, not PGDATA. |
981 | */ |
982 | int64 |
983 | sendTablespace(char *path, bool sizeonly) |
984 | { |
985 | int64 size; |
986 | char pathbuf[MAXPGPATH]; |
987 | struct stat statbuf; |
988 | |
989 | /* |
990 | * 'path' points to the tablespace location, but we only want to include |
991 | * the version directory in it that belongs to us. |
992 | */ |
993 | snprintf(pathbuf, sizeof(pathbuf), "%s/%s" , path, |
994 | TABLESPACE_VERSION_DIRECTORY); |
995 | |
996 | /* |
997 | * Store a directory entry in the tar file so we get the permissions |
998 | * right. |
999 | */ |
1000 | if (lstat(pathbuf, &statbuf) != 0) |
1001 | { |
1002 | if (errno != ENOENT) |
1003 | ereport(ERROR, |
1004 | (errcode_for_file_access(), |
1005 | errmsg("could not stat file or directory \"%s\": %m" , |
1006 | pathbuf))); |
1007 | |
1008 | /* If the tablespace went away while scanning, it's no error. */ |
1009 | return 0; |
1010 | } |
1011 | |
1012 | size = _tarWriteHeader(TABLESPACE_VERSION_DIRECTORY, NULL, &statbuf, |
1013 | sizeonly); |
1014 | |
1015 | /* Send all the files in the tablespace version directory */ |
1016 | size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true); |
1017 | |
1018 | return size; |
1019 | } |
1020 | |
1021 | /* |
1022 | * Include all files from the given directory in the output tar stream. If |
1023 | * 'sizeonly' is true, we just calculate a total length and return it, without |
1024 | * actually sending anything. |
1025 | * |
1026 | * Omit any directory in the tablespaces list, to avoid backing up |
1027 | * tablespaces twice when they were created inside PGDATA. |
1028 | * |
1029 | * If sendtblspclinks is true, we need to include symlink |
1030 | * information in the tar file. If not, we can skip that |
1031 | * as it will be sent separately in the tablespace_map file. |
1032 | */ |
1033 | static int64 |
1034 | sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, |
1035 | bool sendtblspclinks) |
1036 | { |
1037 | DIR *dir; |
1038 | struct dirent *de; |
1039 | char pathbuf[MAXPGPATH * 2]; |
1040 | struct stat statbuf; |
1041 | int64 size = 0; |
1042 | const char *lastDir; /* Split last dir from parent path. */ |
1043 | bool isDbDir = false; /* Does this directory contain relations? */ |
1044 | |
1045 | /* |
1046 | * Determine if the current path is a database directory that can contain |
1047 | * relations. |
1048 | * |
1049 | * Start by finding the location of the delimiter between the parent path |
1050 | * and the current path. |
1051 | */ |
1052 | lastDir = last_dir_separator(path); |
1053 | |
1054 | /* Does this path look like a database path (i.e. all digits)? */ |
1055 | if (lastDir != NULL && |
1056 | strspn(lastDir + 1, "0123456789" ) == strlen(lastDir + 1)) |
1057 | { |
1058 | /* Part of path that contains the parent directory. */ |
1059 | int parentPathLen = lastDir - path; |
1060 | |
1061 | /* |
1062 | * Mark path as a database directory if the parent path is either |
1063 | * $PGDATA/base or a tablespace version path. |
1064 | */ |
1065 | if (strncmp(path, "./base" , parentPathLen) == 0 || |
1066 | (parentPathLen >= (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) && |
1067 | strncmp(lastDir - (sizeof(TABLESPACE_VERSION_DIRECTORY) - 1), |
1068 | TABLESPACE_VERSION_DIRECTORY, |
1069 | sizeof(TABLESPACE_VERSION_DIRECTORY) - 1) == 0)) |
1070 | isDbDir = true; |
1071 | } |
1072 | |
1073 | dir = AllocateDir(path); |
1074 | while ((de = ReadDir(dir, path)) != NULL) |
1075 | { |
1076 | int excludeIdx; |
1077 | bool excludeFound; |
1078 | ForkNumber relForkNum; /* Type of fork if file is a relation */ |
1079 | int relOidChars; /* Chars in filename that are the rel oid */ |
1080 | |
1081 | /* Skip special stuff */ |
1082 | if (strcmp(de->d_name, "." ) == 0 || strcmp(de->d_name, ".." ) == 0) |
1083 | continue; |
1084 | |
1085 | /* Skip temporary files */ |
1086 | if (strncmp(de->d_name, |
1087 | PG_TEMP_FILE_PREFIX, |
1088 | strlen(PG_TEMP_FILE_PREFIX)) == 0) |
1089 | continue; |
1090 | |
1091 | /* |
1092 | * Check if the postmaster has signaled us to exit, and abort with an |
1093 | * error in that case. The error handler further up will call |
1094 | * do_pg_abort_backup() for us. Also check that if the backup was |
1095 | * started while still in recovery, the server wasn't promoted. |
1096 | * dp_pg_stop_backup() will check that too, but it's better to stop |
1097 | * the backup early than continue to the end and fail there. |
1098 | */ |
1099 | CHECK_FOR_INTERRUPTS(); |
1100 | if (RecoveryInProgress() != backup_started_in_recovery) |
1101 | ereport(ERROR, |
1102 | (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), |
1103 | errmsg("the standby was promoted during online backup" ), |
1104 | errhint("This means that the backup being taken is corrupt " |
1105 | "and should not be used. " |
1106 | "Try taking another online backup." ))); |
1107 | |
1108 | /* Scan for files that should be excluded */ |
1109 | excludeFound = false; |
1110 | for (excludeIdx = 0; excludeFiles[excludeIdx] != NULL; excludeIdx++) |
1111 | { |
1112 | if (strcmp(de->d_name, excludeFiles[excludeIdx]) == 0) |
1113 | { |
1114 | elog(DEBUG1, "file \"%s\" excluded from backup" , de->d_name); |
1115 | excludeFound = true; |
1116 | break; |
1117 | } |
1118 | } |
1119 | |
1120 | if (excludeFound) |
1121 | continue; |
1122 | |
1123 | /* Exclude all forks for unlogged tables except the init fork */ |
1124 | if (isDbDir && |
1125 | parse_filename_for_nontemp_relation(de->d_name, &relOidChars, |
1126 | &relForkNum)) |
1127 | { |
1128 | /* Never exclude init forks */ |
1129 | if (relForkNum != INIT_FORKNUM) |
1130 | { |
1131 | char initForkFile[MAXPGPATH]; |
1132 | char relOid[OIDCHARS + 1]; |
1133 | |
1134 | /* |
1135 | * If any other type of fork, check if there is an init fork |
1136 | * with the same OID. If so, the file can be excluded. |
1137 | */ |
1138 | memcpy(relOid, de->d_name, relOidChars); |
1139 | relOid[relOidChars] = '\0'; |
1140 | snprintf(initForkFile, sizeof(initForkFile), "%s/%s_init" , |
1141 | path, relOid); |
1142 | |
1143 | if (lstat(initForkFile, &statbuf) == 0) |
1144 | { |
1145 | elog(DEBUG2, |
1146 | "unlogged relation file \"%s\" excluded from backup" , |
1147 | de->d_name); |
1148 | |
1149 | continue; |
1150 | } |
1151 | } |
1152 | } |
1153 | |
1154 | /* Exclude temporary relations */ |
1155 | if (isDbDir && looks_like_temp_rel_name(de->d_name)) |
1156 | { |
1157 | elog(DEBUG2, |
1158 | "temporary relation file \"%s\" excluded from backup" , |
1159 | de->d_name); |
1160 | |
1161 | continue; |
1162 | } |
1163 | |
1164 | snprintf(pathbuf, sizeof(pathbuf), "%s/%s" , path, de->d_name); |
1165 | |
1166 | /* Skip pg_control here to back up it last */ |
1167 | if (strcmp(pathbuf, "./global/pg_control" ) == 0) |
1168 | continue; |
1169 | |
1170 | if (lstat(pathbuf, &statbuf) != 0) |
1171 | { |
1172 | if (errno != ENOENT) |
1173 | ereport(ERROR, |
1174 | (errcode_for_file_access(), |
1175 | errmsg("could not stat file or directory \"%s\": %m" , |
1176 | pathbuf))); |
1177 | |
1178 | /* If the file went away while scanning, it's not an error. */ |
1179 | continue; |
1180 | } |
1181 | |
1182 | /* Scan for directories whose contents should be excluded */ |
1183 | excludeFound = false; |
1184 | for (excludeIdx = 0; excludeDirContents[excludeIdx] != NULL; excludeIdx++) |
1185 | { |
1186 | if (strcmp(de->d_name, excludeDirContents[excludeIdx]) == 0) |
1187 | { |
1188 | elog(DEBUG1, "contents of directory \"%s\" excluded from backup" , de->d_name); |
1189 | size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly); |
1190 | excludeFound = true; |
1191 | break; |
1192 | } |
1193 | } |
1194 | |
1195 | if (excludeFound) |
1196 | continue; |
1197 | |
1198 | /* |
1199 | * Exclude contents of directory specified by statrelpath if not set |
1200 | * to the default (pg_stat_tmp) which is caught in the loop above. |
1201 | */ |
1202 | if (statrelpath != NULL && strcmp(pathbuf, statrelpath) == 0) |
1203 | { |
1204 | elog(DEBUG1, "contents of directory \"%s\" excluded from backup" , statrelpath); |
1205 | size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly); |
1206 | continue; |
1207 | } |
1208 | |
1209 | /* |
1210 | * We can skip pg_wal, the WAL segments need to be fetched from the |
1211 | * WAL archive anyway. But include it as an empty directory anyway, so |
1212 | * we get permissions right. |
1213 | */ |
1214 | if (strcmp(pathbuf, "./pg_wal" ) == 0) |
1215 | { |
1216 | /* If pg_wal is a symlink, write it as a directory anyway */ |
1217 | size += _tarWriteDir(pathbuf, basepathlen, &statbuf, sizeonly); |
1218 | |
1219 | /* |
1220 | * Also send archive_status directory (by hackishly reusing |
1221 | * statbuf from above ...). |
1222 | */ |
1223 | size += _tarWriteHeader("./pg_wal/archive_status" , NULL, &statbuf, |
1224 | sizeonly); |
1225 | |
1226 | continue; /* don't recurse into pg_wal */ |
1227 | } |
1228 | |
1229 | /* Allow symbolic links in pg_tblspc only */ |
1230 | if (strcmp(path, "./pg_tblspc" ) == 0 && |
1231 | #ifndef WIN32 |
1232 | S_ISLNK(statbuf.st_mode) |
1233 | #else |
1234 | pgwin32_is_junction(pathbuf) |
1235 | #endif |
1236 | ) |
1237 | { |
1238 | #if defined(HAVE_READLINK) || defined(WIN32) |
1239 | char linkpath[MAXPGPATH]; |
1240 | int rllen; |
1241 | |
1242 | rllen = readlink(pathbuf, linkpath, sizeof(linkpath)); |
1243 | if (rllen < 0) |
1244 | ereport(ERROR, |
1245 | (errcode_for_file_access(), |
1246 | errmsg("could not read symbolic link \"%s\": %m" , |
1247 | pathbuf))); |
1248 | if (rllen >= sizeof(linkpath)) |
1249 | ereport(ERROR, |
1250 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
1251 | errmsg("symbolic link \"%s\" target is too long" , |
1252 | pathbuf))); |
1253 | linkpath[rllen] = '\0'; |
1254 | |
1255 | size += _tarWriteHeader(pathbuf + basepathlen + 1, linkpath, |
1256 | &statbuf, sizeonly); |
1257 | #else |
1258 | |
1259 | /* |
1260 | * If the platform does not have symbolic links, it should not be |
1261 | * possible to have tablespaces - clearly somebody else created |
1262 | * them. Warn about it and ignore. |
1263 | */ |
1264 | ereport(WARNING, |
1265 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
1266 | errmsg("tablespaces are not supported on this platform" ))); |
1267 | continue; |
1268 | #endif /* HAVE_READLINK */ |
1269 | } |
1270 | else if (S_ISDIR(statbuf.st_mode)) |
1271 | { |
1272 | bool skip_this_dir = false; |
1273 | ListCell *lc; |
1274 | |
1275 | /* |
1276 | * Store a directory entry in the tar file so we can get the |
1277 | * permissions right. |
1278 | */ |
1279 | size += _tarWriteHeader(pathbuf + basepathlen + 1, NULL, &statbuf, |
1280 | sizeonly); |
1281 | |
1282 | /* |
1283 | * Call ourselves recursively for a directory, unless it happens |
1284 | * to be a separate tablespace located within PGDATA. |
1285 | */ |
1286 | foreach(lc, tablespaces) |
1287 | { |
1288 | tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc); |
1289 | |
1290 | /* |
1291 | * ti->rpath is the tablespace relative path within PGDATA, or |
1292 | * NULL if the tablespace has been properly located somewhere |
1293 | * else. |
1294 | * |
1295 | * Skip past the leading "./" in pathbuf when comparing. |
1296 | */ |
1297 | if (ti->rpath && strcmp(ti->rpath, pathbuf + 2) == 0) |
1298 | { |
1299 | skip_this_dir = true; |
1300 | break; |
1301 | } |
1302 | } |
1303 | |
1304 | /* |
1305 | * skip sending directories inside pg_tblspc, if not required. |
1306 | */ |
1307 | if (strcmp(pathbuf, "./pg_tblspc" ) == 0 && !sendtblspclinks) |
1308 | skip_this_dir = true; |
1309 | |
1310 | if (!skip_this_dir) |
1311 | size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks); |
1312 | } |
1313 | else if (S_ISREG(statbuf.st_mode)) |
1314 | { |
1315 | bool sent = false; |
1316 | |
1317 | if (!sizeonly) |
1318 | sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf, |
1319 | true, isDbDir ? pg_atoi(lastDir + 1, sizeof(Oid), 0) : InvalidOid); |
1320 | |
1321 | if (sent || sizeonly) |
1322 | { |
1323 | /* Add size, rounded up to 512byte block */ |
1324 | size += ((statbuf.st_size + 511) & ~511); |
1325 | size += 512; /* Size of the header of the file */ |
1326 | } |
1327 | } |
1328 | else |
1329 | ereport(WARNING, |
1330 | (errmsg("skipping special file \"%s\"" , pathbuf))); |
1331 | } |
1332 | FreeDir(dir); |
1333 | return size; |
1334 | } |
1335 | |
1336 | /* |
1337 | * Check if a file should have its checksum validated. |
1338 | * We validate checksums on files in regular tablespaces |
1339 | * (including global and default) only, and in those there |
1340 | * are some files that are explicitly excluded. |
1341 | */ |
1342 | static bool |
1343 | is_checksummed_file(const char *fullpath, const char *filename) |
1344 | { |
1345 | const char *const *f; |
1346 | |
1347 | /* Check that the file is in a tablespace */ |
1348 | if (strncmp(fullpath, "./global/" , 9) == 0 || |
1349 | strncmp(fullpath, "./base/" , 7) == 0 || |
1350 | strncmp(fullpath, "/" , 1) == 0) |
1351 | { |
1352 | /* Compare file against noChecksumFiles skiplist */ |
1353 | for (f = noChecksumFiles; *f; f++) |
1354 | if (strcmp(*f, filename) == 0) |
1355 | return false; |
1356 | |
1357 | return true; |
1358 | } |
1359 | else |
1360 | return false; |
1361 | } |
1362 | |
1363 | /***** |
1364 | * Functions for handling tar file format |
1365 | * |
1366 | * Copied from pg_dump, but modified to work with libpq for sending |
1367 | */ |
1368 | |
1369 | |
1370 | /* |
1371 | * Given the member, write the TAR header & send the file. |
1372 | * |
1373 | * If 'missing_ok' is true, will not throw an error if the file is not found. |
1374 | * |
1375 | * If dboid is anything other than InvalidOid then any checksum failures detected |
1376 | * will get reported to the stats collector. |
1377 | * |
1378 | * Returns true if the file was successfully sent, false if 'missing_ok', |
1379 | * and the file did not exist. |
1380 | */ |
1381 | static bool |
1382 | sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf, |
1383 | bool missing_ok, Oid dboid) |
1384 | { |
1385 | FILE *fp; |
1386 | BlockNumber blkno = 0; |
1387 | bool block_retry = false; |
1388 | char buf[TAR_SEND_SIZE]; |
1389 | uint16 checksum; |
1390 | int checksum_failures = 0; |
1391 | off_t cnt; |
1392 | int i; |
1393 | pgoff_t len = 0; |
1394 | char *page; |
1395 | size_t pad; |
1396 | PageHeader phdr; |
1397 | int segmentno = 0; |
1398 | char *segmentpath; |
1399 | bool verify_checksum = false; |
1400 | |
1401 | fp = AllocateFile(readfilename, "rb" ); |
1402 | if (fp == NULL) |
1403 | { |
1404 | if (errno == ENOENT && missing_ok) |
1405 | return false; |
1406 | ereport(ERROR, |
1407 | (errcode_for_file_access(), |
1408 | errmsg("could not open file \"%s\": %m" , readfilename))); |
1409 | } |
1410 | |
1411 | _tarWriteHeader(tarfilename, NULL, statbuf, false); |
1412 | |
1413 | if (!noverify_checksums && DataChecksumsEnabled()) |
1414 | { |
1415 | char *filename; |
1416 | |
1417 | /* |
1418 | * Get the filename (excluding path). As last_dir_separator() |
1419 | * includes the last directory separator, we chop that off by |
1420 | * incrementing the pointer. |
1421 | */ |
1422 | filename = last_dir_separator(readfilename) + 1; |
1423 | |
1424 | if (is_checksummed_file(readfilename, filename)) |
1425 | { |
1426 | verify_checksum = true; |
1427 | |
1428 | /* |
1429 | * Cut off at the segment boundary (".") to get the segment number |
1430 | * in order to mix it into the checksum. |
1431 | */ |
1432 | segmentpath = strstr(filename, "." ); |
1433 | if (segmentpath != NULL) |
1434 | { |
1435 | segmentno = atoi(segmentpath + 1); |
1436 | if (segmentno == 0) |
1437 | ereport(ERROR, |
1438 | (errmsg("invalid segment number %d in file \"%s\"" , |
1439 | segmentno, filename))); |
1440 | } |
1441 | } |
1442 | } |
1443 | |
1444 | while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0) |
1445 | { |
1446 | /* |
1447 | * The checksums are verified at block level, so we iterate over the |
1448 | * buffer in chunks of BLCKSZ, after making sure that |
1449 | * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of |
1450 | * BLCKSZ bytes. |
1451 | */ |
1452 | Assert(TAR_SEND_SIZE % BLCKSZ == 0); |
1453 | |
1454 | if (verify_checksum && (cnt % BLCKSZ != 0)) |
1455 | { |
1456 | ereport(WARNING, |
1457 | (errmsg("could not verify checksum in file \"%s\", block " |
1458 | "%d: read buffer size %d and page size %d " |
1459 | "differ" , |
1460 | readfilename, blkno, (int) cnt, BLCKSZ))); |
1461 | verify_checksum = false; |
1462 | } |
1463 | |
1464 | if (verify_checksum) |
1465 | { |
1466 | for (i = 0; i < cnt / BLCKSZ; i++) |
1467 | { |
1468 | page = buf + BLCKSZ * i; |
1469 | |
1470 | /* |
1471 | * Only check pages which have not been modified since the |
1472 | * start of the base backup. Otherwise, they might have been |
1473 | * written only halfway and the checksum would not be valid. |
1474 | * However, replaying WAL would reinstate the correct page in |
1475 | * this case. We also skip completely new pages, since they |
1476 | * don't have a checksum yet. |
1477 | */ |
1478 | if (!PageIsNew(page) && PageGetLSN(page) < startptr) |
1479 | { |
1480 | checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE); |
1481 | phdr = (PageHeader) page; |
1482 | if (phdr->pd_checksum != checksum) |
1483 | { |
1484 | /* |
1485 | * Retry the block on the first failure. It's |
1486 | * possible that we read the first 4K page of the |
1487 | * block just before postgres updated the entire block |
1488 | * so it ends up looking torn to us. We only need to |
1489 | * retry once because the LSN should be updated to |
1490 | * something we can ignore on the next pass. If the |
1491 | * error happens again then it is a true validation |
1492 | * failure. |
1493 | */ |
1494 | if (block_retry == false) |
1495 | { |
1496 | /* Reread the failed block */ |
1497 | if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1) |
1498 | { |
1499 | ereport(ERROR, |
1500 | (errcode_for_file_access(), |
1501 | errmsg("could not fseek in file \"%s\": %m" , |
1502 | readfilename))); |
1503 | } |
1504 | |
1505 | if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ) |
1506 | { |
1507 | /* |
1508 | * If we hit end-of-file, a concurrent |
1509 | * truncation must have occurred, so break out |
1510 | * of this loop just as if the initial fread() |
1511 | * returned 0. We'll drop through to the same |
1512 | * code that handles that case. (We must fix |
1513 | * up cnt first, though.) |
1514 | */ |
1515 | if (feof(fp)) |
1516 | { |
1517 | cnt = BLCKSZ * i; |
1518 | break; |
1519 | } |
1520 | |
1521 | ereport(ERROR, |
1522 | (errcode_for_file_access(), |
1523 | errmsg("could not reread block %d of file \"%s\": %m" , |
1524 | blkno, readfilename))); |
1525 | } |
1526 | |
1527 | if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1) |
1528 | { |
1529 | ereport(ERROR, |
1530 | (errcode_for_file_access(), |
1531 | errmsg("could not fseek in file \"%s\": %m" , |
1532 | readfilename))); |
1533 | } |
1534 | |
1535 | /* Set flag so we know a retry was attempted */ |
1536 | block_retry = true; |
1537 | |
1538 | /* Reset loop to validate the block again */ |
1539 | i--; |
1540 | continue; |
1541 | } |
1542 | |
1543 | checksum_failures++; |
1544 | |
1545 | if (checksum_failures <= 5) |
1546 | ereport(WARNING, |
1547 | (errmsg("checksum verification failed in " |
1548 | "file \"%s\", block %d: calculated " |
1549 | "%X but expected %X" , |
1550 | readfilename, blkno, checksum, |
1551 | phdr->pd_checksum))); |
1552 | if (checksum_failures == 5) |
1553 | ereport(WARNING, |
1554 | (errmsg("further checksum verification " |
1555 | "failures in file \"%s\" will not " |
1556 | "be reported" , readfilename))); |
1557 | } |
1558 | } |
1559 | block_retry = false; |
1560 | blkno++; |
1561 | } |
1562 | } |
1563 | |
1564 | /* Send the chunk as a CopyData message */ |
1565 | if (pq_putmessage('d', buf, cnt)) |
1566 | ereport(ERROR, |
1567 | (errmsg("base backup could not send data, aborting backup" ))); |
1568 | |
1569 | len += cnt; |
1570 | throttle(cnt); |
1571 | |
1572 | if (feof(fp) || len >= statbuf->st_size) |
1573 | { |
1574 | /* |
1575 | * Reached end of file. The file could be longer, if it was |
1576 | * extended while we were sending it, but for a base backup we can |
1577 | * ignore such extended data. It will be restored from WAL. |
1578 | */ |
1579 | break; |
1580 | } |
1581 | } |
1582 | |
1583 | CHECK_FREAD_ERROR(fp, readfilename); |
1584 | |
1585 | /* If the file was truncated while we were sending it, pad it with zeros */ |
1586 | if (len < statbuf->st_size) |
1587 | { |
1588 | MemSet(buf, 0, sizeof(buf)); |
1589 | while (len < statbuf->st_size) |
1590 | { |
1591 | cnt = Min(sizeof(buf), statbuf->st_size - len); |
1592 | pq_putmessage('d', buf, cnt); |
1593 | len += cnt; |
1594 | throttle(cnt); |
1595 | } |
1596 | } |
1597 | |
1598 | /* |
1599 | * Pad to 512 byte boundary, per tar format requirements. (This small |
1600 | * piece of data is probably not worth throttling.) |
1601 | */ |
1602 | pad = ((len + 511) & ~511) - len; |
1603 | if (pad > 0) |
1604 | { |
1605 | MemSet(buf, 0, pad); |
1606 | pq_putmessage('d', buf, pad); |
1607 | } |
1608 | |
1609 | FreeFile(fp); |
1610 | |
1611 | if (checksum_failures > 1) |
1612 | { |
1613 | ereport(WARNING, |
1614 | (errmsg_plural("file \"%s\" has a total of %d checksum verification failure" , |
1615 | "file \"%s\" has a total of %d checksum verification failures" , |
1616 | checksum_failures, |
1617 | readfilename, checksum_failures))); |
1618 | |
1619 | pgstat_report_checksum_failures_in_db(dboid, checksum_failures); |
1620 | } |
1621 | |
1622 | total_checksum_failures += checksum_failures; |
1623 | |
1624 | return true; |
1625 | } |
1626 | |
1627 | |
1628 | static int64 |
1629 | (const char *filename, const char *linktarget, |
1630 | struct stat *statbuf, bool sizeonly) |
1631 | { |
1632 | char h[512]; |
1633 | enum tarError rc; |
1634 | |
1635 | if (!sizeonly) |
1636 | { |
1637 | rc = tarCreateHeader(h, filename, linktarget, statbuf->st_size, |
1638 | statbuf->st_mode, statbuf->st_uid, statbuf->st_gid, |
1639 | statbuf->st_mtime); |
1640 | |
1641 | switch (rc) |
1642 | { |
1643 | case TAR_OK: |
1644 | break; |
1645 | case TAR_NAME_TOO_LONG: |
1646 | ereport(ERROR, |
1647 | (errmsg("file name too long for tar format: \"%s\"" , |
1648 | filename))); |
1649 | break; |
1650 | case TAR_SYMLINK_TOO_LONG: |
1651 | ereport(ERROR, |
1652 | (errmsg("symbolic link target too long for tar format: " |
1653 | "file name \"%s\", target \"%s\"" , |
1654 | filename, linktarget))); |
1655 | break; |
1656 | default: |
1657 | elog(ERROR, "unrecognized tar error: %d" , rc); |
1658 | } |
1659 | |
1660 | pq_putmessage('d', h, sizeof(h)); |
1661 | } |
1662 | |
1663 | return sizeof(h); |
1664 | } |
1665 | |
1666 | /* |
1667 | * Write tar header for a directory. If the entry in statbuf is a link then |
1668 | * write it as a directory anyway. |
1669 | */ |
1670 | static int64 |
1671 | _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf, |
1672 | bool sizeonly) |
1673 | { |
1674 | /* If symlink, write it as a directory anyway */ |
1675 | #ifndef WIN32 |
1676 | if (S_ISLNK(statbuf->st_mode)) |
1677 | #else |
1678 | if (pgwin32_is_junction(pathbuf)) |
1679 | #endif |
1680 | statbuf->st_mode = S_IFDIR | pg_dir_create_mode; |
1681 | |
1682 | return _tarWriteHeader(pathbuf + basepathlen + 1, NULL, statbuf, sizeonly); |
1683 | } |
1684 | |
1685 | /* |
1686 | * Increment the network transfer counter by the given number of bytes, |
1687 | * and sleep if necessary to comply with the requested network transfer |
1688 | * rate. |
1689 | */ |
1690 | static void |
1691 | throttle(size_t increment) |
1692 | { |
1693 | TimeOffset elapsed_min; |
1694 | |
1695 | if (throttling_counter < 0) |
1696 | return; |
1697 | |
1698 | throttling_counter += increment; |
1699 | if (throttling_counter < throttling_sample) |
1700 | return; |
1701 | |
1702 | /* How much time should have elapsed at minimum? */ |
1703 | elapsed_min = elapsed_min_unit * |
1704 | (throttling_counter / throttling_sample); |
1705 | |
1706 | /* |
1707 | * Since the latch could be set repeatedly because of concurrently WAL |
1708 | * activity, sleep in a loop to ensure enough time has passed. |
1709 | */ |
1710 | for (;;) |
1711 | { |
1712 | TimeOffset elapsed, |
1713 | sleep; |
1714 | int wait_result; |
1715 | |
1716 | /* Time elapsed since the last measurement (and possible wake up). */ |
1717 | elapsed = GetCurrentTimestamp() - throttled_last; |
1718 | |
1719 | /* sleep if the transfer is faster than it should be */ |
1720 | sleep = elapsed_min - elapsed; |
1721 | if (sleep <= 0) |
1722 | break; |
1723 | |
1724 | ResetLatch(MyLatch); |
1725 | |
1726 | /* We're eating a potentially set latch, so check for interrupts */ |
1727 | CHECK_FOR_INTERRUPTS(); |
1728 | |
1729 | /* |
1730 | * (TAR_SEND_SIZE / throttling_sample * elapsed_min_unit) should be |
1731 | * the maximum time to sleep. Thus the cast to long is safe. |
1732 | */ |
1733 | wait_result = WaitLatch(MyLatch, |
1734 | WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, |
1735 | (long) (sleep / 1000), |
1736 | WAIT_EVENT_BASE_BACKUP_THROTTLE); |
1737 | |
1738 | if (wait_result & WL_LATCH_SET) |
1739 | CHECK_FOR_INTERRUPTS(); |
1740 | |
1741 | /* Done waiting? */ |
1742 | if (wait_result & WL_TIMEOUT) |
1743 | break; |
1744 | } |
1745 | |
1746 | /* |
1747 | * As we work with integers, only whole multiple of throttling_sample was |
1748 | * processed. The rest will be done during the next call of this function. |
1749 | */ |
1750 | throttling_counter %= throttling_sample; |
1751 | |
1752 | /* |
1753 | * Time interval for the remaining amount and possible next increments |
1754 | * starts now. |
1755 | */ |
1756 | throttled_last = GetCurrentTimestamp(); |
1757 | } |
1758 | |