1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * pg_rewind.c |
4 | * Synchronizes a PostgreSQL data directory to a new timeline |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * |
8 | *------------------------------------------------------------------------- |
9 | */ |
10 | #include "postgres_fe.h" |
11 | |
12 | #include <sys/stat.h> |
13 | #include <fcntl.h> |
14 | #include <time.h> |
15 | #include <unistd.h> |
16 | |
17 | #include "pg_rewind.h" |
18 | #include "fetch.h" |
19 | #include "file_ops.h" |
20 | #include "filemap.h" |
21 | |
22 | #include "access/timeline.h" |
23 | #include "access/xlog_internal.h" |
24 | #include "catalog/catversion.h" |
25 | #include "catalog/pg_control.h" |
26 | #include "common/controldata_utils.h" |
27 | #include "common/file_perm.h" |
28 | #include "common/file_utils.h" |
29 | #include "common/restricted_token.h" |
30 | #include "getopt_long.h" |
31 | #include "storage/bufpage.h" |
32 | |
33 | static void usage(const char *progname); |
34 | |
35 | static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, |
36 | XLogRecPtr checkpointloc); |
37 | |
38 | static void digestControlFile(ControlFileData *ControlFile, char *source, |
39 | size_t size); |
40 | static void syncTargetDirectory(void); |
41 | static void sanityChecks(void); |
42 | static void findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex); |
43 | |
44 | static ControlFileData ControlFile_target; |
45 | static ControlFileData ControlFile_source; |
46 | |
47 | const char *progname; |
48 | int WalSegSz; |
49 | |
50 | /* Configuration options */ |
51 | char *datadir_target = NULL; |
52 | char *datadir_source = NULL; |
53 | char *connstr_source = NULL; |
54 | |
55 | static bool debug = false; |
56 | bool showprogress = false; |
57 | bool dry_run = false; |
58 | bool do_sync = true; |
59 | |
60 | /* Target history */ |
61 | TimeLineHistoryEntry *targetHistory; |
62 | int targetNentries; |
63 | |
64 | /* Progress counters */ |
65 | uint64 fetch_size; |
66 | uint64 fetch_done; |
67 | |
68 | |
69 | static void |
70 | usage(const char *progname) |
71 | { |
72 | printf(_("%s resynchronizes a PostgreSQL cluster with another copy of the cluster.\n\n" ), progname); |
73 | printf(_("Usage:\n %s [OPTION]...\n\n" ), progname); |
74 | printf(_("Options:\n" )); |
75 | printf(_(" -D, --target-pgdata=DIRECTORY existing data directory to modify\n" )); |
76 | printf(_(" --source-pgdata=DIRECTORY source data directory to synchronize with\n" )); |
77 | printf(_(" --source-server=CONNSTR source server to synchronize with\n" )); |
78 | printf(_(" -n, --dry-run stop before modifying anything\n" )); |
79 | printf(_(" -N, --no-sync do not wait for changes to be written\n" |
80 | " safely to disk\n" )); |
81 | printf(_(" -P, --progress write progress messages\n" )); |
82 | printf(_(" --debug write a lot of debug messages\n" )); |
83 | printf(_(" -V, --version output version information, then exit\n" )); |
84 | printf(_(" -?, --help show this help, then exit\n" )); |
85 | printf(_("\nReport bugs to <pgsql-bugs@lists.postgresql.org>.\n" )); |
86 | } |
87 | |
88 | |
89 | int |
90 | main(int argc, char **argv) |
91 | { |
92 | static struct option long_options[] = { |
93 | {"help" , no_argument, NULL, '?'}, |
94 | {"target-pgdata" , required_argument, NULL, 'D'}, |
95 | {"source-pgdata" , required_argument, NULL, 1}, |
96 | {"source-server" , required_argument, NULL, 2}, |
97 | {"version" , no_argument, NULL, 'V'}, |
98 | {"dry-run" , no_argument, NULL, 'n'}, |
99 | {"no-sync" , no_argument, NULL, 'N'}, |
100 | {"progress" , no_argument, NULL, 'P'}, |
101 | {"debug" , no_argument, NULL, 3}, |
102 | {NULL, 0, NULL, 0} |
103 | }; |
104 | int option_index; |
105 | int c; |
106 | XLogRecPtr divergerec; |
107 | int lastcommontliIndex; |
108 | XLogRecPtr chkptrec; |
109 | TimeLineID chkpttli; |
110 | XLogRecPtr chkptredo; |
111 | size_t size; |
112 | char *buffer; |
113 | bool rewind_needed; |
114 | XLogRecPtr endrec; |
115 | TimeLineID endtli; |
116 | ControlFileData ControlFile_new; |
117 | |
118 | pg_logging_init(argv[0]); |
119 | set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_rewind" )); |
120 | progname = get_progname(argv[0]); |
121 | |
122 | /* Process command-line arguments */ |
123 | if (argc > 1) |
124 | { |
125 | if (strcmp(argv[1], "--help" ) == 0 || strcmp(argv[1], "-?" ) == 0) |
126 | { |
127 | usage(progname); |
128 | exit(0); |
129 | } |
130 | if (strcmp(argv[1], "--version" ) == 0 || strcmp(argv[1], "-V" ) == 0) |
131 | { |
132 | puts("pg_rewind (PostgreSQL) " PG_VERSION); |
133 | exit(0); |
134 | } |
135 | } |
136 | |
137 | while ((c = getopt_long(argc, argv, "D:nNP" , long_options, &option_index)) != -1) |
138 | { |
139 | switch (c) |
140 | { |
141 | case '?': |
142 | fprintf(stderr, _("Try \"%s --help\" for more information.\n" ), progname); |
143 | exit(1); |
144 | |
145 | case 'P': |
146 | showprogress = true; |
147 | break; |
148 | |
149 | case 'n': |
150 | dry_run = true; |
151 | break; |
152 | |
153 | case 'N': |
154 | do_sync = false; |
155 | break; |
156 | |
157 | case 3: |
158 | debug = true; |
159 | pg_logging_set_level(PG_LOG_DEBUG); |
160 | break; |
161 | |
162 | case 'D': /* -D or --target-pgdata */ |
163 | datadir_target = pg_strdup(optarg); |
164 | break; |
165 | |
166 | case 1: /* --source-pgdata */ |
167 | datadir_source = pg_strdup(optarg); |
168 | break; |
169 | case 2: /* --source-server */ |
170 | connstr_source = pg_strdup(optarg); |
171 | break; |
172 | } |
173 | } |
174 | |
175 | if (datadir_source == NULL && connstr_source == NULL) |
176 | { |
177 | pg_log_error("no source specified (--source-pgdata or --source-server)" ); |
178 | fprintf(stderr, _("Try \"%s --help\" for more information.\n" ), progname); |
179 | exit(1); |
180 | } |
181 | |
182 | if (datadir_source != NULL && connstr_source != NULL) |
183 | { |
184 | pg_log_error("only one of --source-pgdata or --source-server can be specified" ); |
185 | fprintf(stderr, _("Try \"%s --help\" for more information.\n" ), progname); |
186 | exit(1); |
187 | } |
188 | |
189 | if (datadir_target == NULL) |
190 | { |
191 | pg_log_error("no target data directory specified (--target-pgdata)" ); |
192 | fprintf(stderr, _("Try \"%s --help\" for more information.\n" ), progname); |
193 | exit(1); |
194 | } |
195 | |
196 | if (optind < argc) |
197 | { |
198 | pg_log_error("too many command-line arguments (first is \"%s\")" , |
199 | argv[optind]); |
200 | fprintf(stderr, _("Try \"%s --help\" for more information.\n" ), progname); |
201 | exit(1); |
202 | } |
203 | |
204 | /* |
205 | * Don't allow pg_rewind to be run as root, to avoid overwriting the |
206 | * ownership of files in the data directory. We need only check for root |
207 | * -- any other user won't have sufficient permissions to modify files in |
208 | * the data directory. |
209 | */ |
210 | #ifndef WIN32 |
211 | if (geteuid() == 0) |
212 | { |
213 | pg_log_error("cannot be executed by \"root\"" ); |
214 | fprintf(stderr, _("You must run %s as the PostgreSQL superuser.\n" ), |
215 | progname); |
216 | exit(1); |
217 | } |
218 | #endif |
219 | |
220 | get_restricted_token(); |
221 | |
222 | /* Set mask based on PGDATA permissions */ |
223 | if (!GetDataDirectoryCreatePerm(datadir_target)) |
224 | { |
225 | pg_log_error("could not read permissions of directory \"%s\": %m" , |
226 | datadir_target); |
227 | exit(1); |
228 | } |
229 | |
230 | umask(pg_mode_mask); |
231 | |
232 | /* Connect to remote server */ |
233 | if (connstr_source) |
234 | libpqConnect(connstr_source); |
235 | |
236 | /* |
237 | * Ok, we have all the options and we're ready to start. Read in all the |
238 | * information we need from both clusters. |
239 | */ |
240 | buffer = slurpFile(datadir_target, "global/pg_control" , &size); |
241 | digestControlFile(&ControlFile_target, buffer, size); |
242 | pg_free(buffer); |
243 | |
244 | buffer = fetchFile("global/pg_control" , &size); |
245 | digestControlFile(&ControlFile_source, buffer, size); |
246 | pg_free(buffer); |
247 | |
248 | sanityChecks(); |
249 | |
250 | /* |
251 | * If both clusters are already on the same timeline, there's nothing to |
252 | * do. |
253 | */ |
254 | if (ControlFile_target.checkPointCopy.ThisTimeLineID == ControlFile_source.checkPointCopy.ThisTimeLineID) |
255 | { |
256 | pg_log_info("source and target cluster are on the same timeline" ); |
257 | rewind_needed = false; |
258 | } |
259 | else |
260 | { |
261 | findCommonAncestorTimeline(&divergerec, &lastcommontliIndex); |
262 | pg_log_info("servers diverged at WAL location %X/%X on timeline %u" , |
263 | (uint32) (divergerec >> 32), (uint32) divergerec, |
264 | targetHistory[lastcommontliIndex].tli); |
265 | |
266 | /* |
267 | * Check for the possibility that the target is in fact a direct |
268 | * ancestor of the source. In that case, there is no divergent history |
269 | * in the target that needs rewinding. |
270 | */ |
271 | if (ControlFile_target.checkPoint >= divergerec) |
272 | { |
273 | rewind_needed = true; |
274 | } |
275 | else |
276 | { |
277 | XLogRecPtr chkptendrec; |
278 | |
279 | /* Read the checkpoint record on the target to see where it ends. */ |
280 | chkptendrec = readOneRecord(datadir_target, |
281 | ControlFile_target.checkPoint, |
282 | targetNentries - 1); |
283 | |
284 | /* |
285 | * If the histories diverged exactly at the end of the shutdown |
286 | * checkpoint record on the target, there are no WAL records in |
287 | * the target that don't belong in the source's history, and no |
288 | * rewind is needed. |
289 | */ |
290 | if (chkptendrec == divergerec) |
291 | rewind_needed = false; |
292 | else |
293 | rewind_needed = true; |
294 | } |
295 | } |
296 | |
297 | if (!rewind_needed) |
298 | { |
299 | pg_log_info("no rewind required" ); |
300 | exit(0); |
301 | } |
302 | |
303 | findLastCheckpoint(datadir_target, divergerec, |
304 | lastcommontliIndex, |
305 | &chkptrec, &chkpttli, &chkptredo); |
306 | pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u" , |
307 | (uint32) (chkptrec >> 32), (uint32) chkptrec, |
308 | chkpttli); |
309 | |
310 | /* |
311 | * Build the filemap, by comparing the source and target data directories. |
312 | */ |
313 | filemap_create(); |
314 | if (showprogress) |
315 | pg_log_info("reading source file list" ); |
316 | fetchSourceFileList(); |
317 | if (showprogress) |
318 | pg_log_info("reading target file list" ); |
319 | traverse_datadir(datadir_target, &process_target_file); |
320 | |
321 | /* |
322 | * Read the target WAL from last checkpoint before the point of fork, to |
323 | * extract all the pages that were modified on the target cluster after |
324 | * the fork. We can stop reading after reaching the final shutdown record. |
325 | * XXX: If we supported rewinding a server that was not shut down cleanly, |
326 | * we would need to replay until the end of WAL here. |
327 | */ |
328 | if (showprogress) |
329 | pg_log_info("reading WAL in target" ); |
330 | extractPageMap(datadir_target, chkptrec, lastcommontliIndex, |
331 | ControlFile_target.checkPoint); |
332 | filemap_finalize(); |
333 | |
334 | if (showprogress) |
335 | calculate_totals(); |
336 | |
337 | /* this is too verbose even for verbose mode */ |
338 | if (debug) |
339 | print_filemap(); |
340 | |
341 | /* |
342 | * Ok, we're ready to start copying things over. |
343 | */ |
344 | if (showprogress) |
345 | { |
346 | pg_log_info("need to copy %lu MB (total source directory size is %lu MB)" , |
347 | (unsigned long) (filemap->fetch_size / (1024 * 1024)), |
348 | (unsigned long) (filemap->total_size / (1024 * 1024))); |
349 | |
350 | fetch_size = filemap->fetch_size; |
351 | fetch_done = 0; |
352 | } |
353 | |
354 | /* |
355 | * This is the point of no return. Once we start copying things, we have |
356 | * modified the target directory and there is no turning back! |
357 | */ |
358 | |
359 | executeFileMap(); |
360 | |
361 | progress_report(true); |
362 | printf("\n" ); |
363 | |
364 | if (showprogress) |
365 | pg_log_info("creating backup label and updating control file" ); |
366 | createBackupLabel(chkptredo, chkpttli, chkptrec); |
367 | |
368 | /* |
369 | * Update control file of target. Make it ready to perform archive |
370 | * recovery when restarting. |
371 | * |
372 | * minRecoveryPoint is set to the current WAL insert location in the |
373 | * source server. Like in an online backup, it's important that we recover |
374 | * all the WAL that was generated while we copied the files over. |
375 | */ |
376 | memcpy(&ControlFile_new, &ControlFile_source, sizeof(ControlFileData)); |
377 | |
378 | if (connstr_source) |
379 | { |
380 | endrec = libpqGetCurrentXlogInsertLocation(); |
381 | endtli = ControlFile_source.checkPointCopy.ThisTimeLineID; |
382 | } |
383 | else |
384 | { |
385 | endrec = ControlFile_source.checkPoint; |
386 | endtli = ControlFile_source.checkPointCopy.ThisTimeLineID; |
387 | } |
388 | ControlFile_new.minRecoveryPoint = endrec; |
389 | ControlFile_new.minRecoveryPointTLI = endtli; |
390 | ControlFile_new.state = DB_IN_ARCHIVE_RECOVERY; |
391 | update_controlfile(datadir_target, &ControlFile_new, do_sync); |
392 | |
393 | if (showprogress) |
394 | pg_log_info("syncing target data directory" ); |
395 | syncTargetDirectory(); |
396 | |
397 | pg_log_info("Done!" ); |
398 | |
399 | return 0; |
400 | } |
401 | |
402 | static void |
403 | sanityChecks(void) |
404 | { |
405 | /* TODO Check that there's no backup_label in either cluster */ |
406 | |
407 | /* Check system_id match */ |
408 | if (ControlFile_target.system_identifier != ControlFile_source.system_identifier) |
409 | pg_fatal("source and target clusters are from different systems" ); |
410 | |
411 | /* check version */ |
412 | if (ControlFile_target.pg_control_version != PG_CONTROL_VERSION || |
413 | ControlFile_source.pg_control_version != PG_CONTROL_VERSION || |
414 | ControlFile_target.catalog_version_no != CATALOG_VERSION_NO || |
415 | ControlFile_source.catalog_version_no != CATALOG_VERSION_NO) |
416 | { |
417 | pg_fatal("clusters are not compatible with this version of pg_rewind" ); |
418 | } |
419 | |
420 | /* |
421 | * Target cluster need to use checksums or hint bit wal-logging, this to |
422 | * prevent from data corruption that could occur because of hint bits. |
423 | */ |
424 | if (ControlFile_target.data_checksum_version != PG_DATA_CHECKSUM_VERSION && |
425 | !ControlFile_target.wal_log_hints) |
426 | { |
427 | pg_fatal("target server needs to use either data checksums or \"wal_log_hints = on\"" ); |
428 | } |
429 | |
430 | /* |
431 | * Target cluster better not be running. This doesn't guard against |
432 | * someone starting the cluster concurrently. Also, this is probably more |
433 | * strict than necessary; it's OK if the target node was not shut down |
434 | * cleanly, as long as it isn't running at the moment. |
435 | */ |
436 | if (ControlFile_target.state != DB_SHUTDOWNED && |
437 | ControlFile_target.state != DB_SHUTDOWNED_IN_RECOVERY) |
438 | pg_fatal("target server must be shut down cleanly" ); |
439 | |
440 | /* |
441 | * When the source is a data directory, also require that the source |
442 | * server is shut down. There isn't any very strong reason for this |
443 | * limitation, but better safe than sorry. |
444 | */ |
445 | if (datadir_source && |
446 | ControlFile_source.state != DB_SHUTDOWNED && |
447 | ControlFile_source.state != DB_SHUTDOWNED_IN_RECOVERY) |
448 | pg_fatal("source data directory must be shut down cleanly" ); |
449 | } |
450 | |
451 | /* |
452 | * Print a progress report based on the fetch_size and fetch_done variables. |
453 | * |
454 | * Progress report is written at maximum once per second, unless the |
455 | * force parameter is set to true. |
456 | */ |
457 | void |
458 | progress_report(bool force) |
459 | { |
460 | static pg_time_t last_progress_report = 0; |
461 | int percent; |
462 | char fetch_done_str[32]; |
463 | char fetch_size_str[32]; |
464 | pg_time_t now; |
465 | |
466 | if (!showprogress) |
467 | return; |
468 | |
469 | now = time(NULL); |
470 | if (now == last_progress_report && !force) |
471 | return; /* Max once per second */ |
472 | |
473 | last_progress_report = now; |
474 | percent = fetch_size ? (int) ((fetch_done) * 100 / fetch_size) : 0; |
475 | |
476 | /* |
477 | * Avoid overflowing past 100% or the full size. This may make the total |
478 | * size number change as we approach the end of the backup (the estimate |
479 | * will always be wrong if WAL is included), but that's better than having |
480 | * the done column be bigger than the total. |
481 | */ |
482 | if (percent > 100) |
483 | percent = 100; |
484 | if (fetch_done > fetch_size) |
485 | fetch_size = fetch_done; |
486 | |
487 | /* |
488 | * Separate step to keep platform-dependent format code out of |
489 | * translatable strings. And we only test for INT64_FORMAT availability |
490 | * in snprintf, not fprintf. |
491 | */ |
492 | snprintf(fetch_done_str, sizeof(fetch_done_str), INT64_FORMAT, |
493 | fetch_done / 1024); |
494 | snprintf(fetch_size_str, sizeof(fetch_size_str), INT64_FORMAT, |
495 | fetch_size / 1024); |
496 | |
497 | fprintf(stderr, _("%*s/%s kB (%d%%) copied" ), |
498 | (int) strlen(fetch_size_str), fetch_done_str, fetch_size_str, |
499 | percent); |
500 | if (isatty(fileno(stderr))) |
501 | fprintf(stderr, "\r" ); |
502 | else |
503 | fprintf(stderr, "\n" ); |
504 | } |
505 | |
506 | /* |
507 | * Find minimum from two WAL locations assuming InvalidXLogRecPtr means |
508 | * infinity as src/include/access/timeline.h states. This routine should |
509 | * be used only when comparing WAL locations related to history files. |
510 | */ |
511 | static XLogRecPtr |
512 | MinXLogRecPtr(XLogRecPtr a, XLogRecPtr b) |
513 | { |
514 | if (XLogRecPtrIsInvalid(a)) |
515 | return b; |
516 | else if (XLogRecPtrIsInvalid(b)) |
517 | return a; |
518 | else |
519 | return Min(a, b); |
520 | } |
521 | |
522 | /* |
523 | * Retrieve timeline history for given control file which should behold |
524 | * either source or target. |
525 | */ |
526 | static TimeLineHistoryEntry * |
527 | getTimelineHistory(ControlFileData *controlFile, int *nentries) |
528 | { |
529 | TimeLineHistoryEntry *history; |
530 | TimeLineID tli; |
531 | |
532 | tli = controlFile->checkPointCopy.ThisTimeLineID; |
533 | |
534 | /* |
535 | * Timeline 1 does not have a history file, so there is no need to check |
536 | * and fake an entry with infinite start and end positions. |
537 | */ |
538 | if (tli == 1) |
539 | { |
540 | history = (TimeLineHistoryEntry *) pg_malloc(sizeof(TimeLineHistoryEntry)); |
541 | history->tli = tli; |
542 | history->begin = history->end = InvalidXLogRecPtr; |
543 | *nentries = 1; |
544 | } |
545 | else |
546 | { |
547 | char path[MAXPGPATH]; |
548 | char *histfile; |
549 | |
550 | TLHistoryFilePath(path, tli); |
551 | |
552 | /* Get history file from appropriate source */ |
553 | if (controlFile == &ControlFile_source) |
554 | histfile = fetchFile(path, NULL); |
555 | else if (controlFile == &ControlFile_target) |
556 | histfile = slurpFile(datadir_target, path, NULL); |
557 | else |
558 | pg_fatal("invalid control file" ); |
559 | |
560 | history = rewind_parseTimeLineHistory(histfile, tli, nentries); |
561 | pg_free(histfile); |
562 | } |
563 | |
564 | if (debug) |
565 | { |
566 | int i; |
567 | |
568 | if (controlFile == &ControlFile_source) |
569 | pg_log_debug("Source timeline history:" ); |
570 | else if (controlFile == &ControlFile_target) |
571 | pg_log_debug("Target timeline history:" ); |
572 | else |
573 | Assert(false); |
574 | |
575 | /* |
576 | * Print the target timeline history. |
577 | */ |
578 | for (i = 0; i < targetNentries; i++) |
579 | { |
580 | TimeLineHistoryEntry *entry; |
581 | |
582 | entry = &history[i]; |
583 | pg_log_debug("%d: %X/%X - %X/%X" , entry->tli, |
584 | (uint32) (entry->begin >> 32), (uint32) (entry->begin), |
585 | (uint32) (entry->end >> 32), (uint32) (entry->end)); |
586 | } |
587 | } |
588 | |
589 | return history; |
590 | } |
591 | |
592 | /* |
593 | * Determine the TLI of the last common timeline in the timeline history of the |
594 | * two clusters. targetHistory is filled with target timeline history and |
595 | * targetNentries is number of items in targetHistory. *tliIndex is set to the |
596 | * index of last common timeline in targetHistory array, and *recptr is set to |
597 | * the position where the timeline history diverged (ie. the first WAL record |
598 | * that's not the same in both clusters). |
599 | * |
600 | * Control files of both clusters must be read into ControlFile_target/source |
601 | * before calling this routine. |
602 | */ |
603 | static void |
604 | findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex) |
605 | { |
606 | TimeLineHistoryEntry *sourceHistory; |
607 | int sourceNentries; |
608 | int i, |
609 | n; |
610 | |
611 | /* Retrieve timelines for both source and target */ |
612 | sourceHistory = getTimelineHistory(&ControlFile_source, &sourceNentries); |
613 | targetHistory = getTimelineHistory(&ControlFile_target, &targetNentries); |
614 | |
615 | /* |
616 | * Trace the history forward, until we hit the timeline diverge. It may |
617 | * still be possible that the source and target nodes used the same |
618 | * timeline number in their history but with different start position |
619 | * depending on the history files that each node has fetched in previous |
620 | * recovery processes. Hence check the start position of the new timeline |
621 | * as well and move down by one extra timeline entry if they do not match. |
622 | */ |
623 | n = Min(sourceNentries, targetNentries); |
624 | for (i = 0; i < n; i++) |
625 | { |
626 | if (sourceHistory[i].tli != targetHistory[i].tli || |
627 | sourceHistory[i].begin != targetHistory[i].begin) |
628 | break; |
629 | } |
630 | |
631 | if (i > 0) |
632 | { |
633 | i--; |
634 | *recptr = MinXLogRecPtr(sourceHistory[i].end, targetHistory[i].end); |
635 | *tliIndex = i; |
636 | |
637 | pg_free(sourceHistory); |
638 | return; |
639 | } |
640 | else |
641 | { |
642 | pg_fatal("could not find common ancestor of the source and target cluster's timelines" ); |
643 | } |
644 | } |
645 | |
646 | |
647 | /* |
648 | * Create a backup_label file that forces recovery to begin at the last common |
649 | * checkpoint. |
650 | */ |
651 | static void |
652 | createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc) |
653 | { |
654 | XLogSegNo startsegno; |
655 | time_t stamp_time; |
656 | char strfbuf[128]; |
657 | char xlogfilename[MAXFNAMELEN]; |
658 | struct tm *tmp; |
659 | char buf[1000]; |
660 | int len; |
661 | |
662 | XLByteToSeg(startpoint, startsegno, WalSegSz); |
663 | XLogFileName(xlogfilename, starttli, startsegno, WalSegSz); |
664 | |
665 | /* |
666 | * Construct backup label file |
667 | */ |
668 | stamp_time = time(NULL); |
669 | tmp = localtime(&stamp_time); |
670 | strftime(strfbuf, sizeof(strfbuf), "%Y-%m-%d %H:%M:%S %Z" , tmp); |
671 | |
672 | len = snprintf(buf, sizeof(buf), |
673 | "START WAL LOCATION: %X/%X (file %s)\n" |
674 | "CHECKPOINT LOCATION: %X/%X\n" |
675 | "BACKUP METHOD: pg_rewind\n" |
676 | "BACKUP FROM: standby\n" |
677 | "START TIME: %s\n" , |
678 | /* omit LABEL: line */ |
679 | (uint32) (startpoint >> 32), (uint32) startpoint, xlogfilename, |
680 | (uint32) (checkpointloc >> 32), (uint32) checkpointloc, |
681 | strfbuf); |
682 | if (len >= sizeof(buf)) |
683 | pg_fatal("backup label buffer too small" ); /* shouldn't happen */ |
684 | |
685 | /* TODO: move old file out of the way, if any. */ |
686 | open_target_file("backup_label" , true); /* BACKUP_LABEL_FILE */ |
687 | write_target_range(buf, 0, len); |
688 | close_target_file(); |
689 | } |
690 | |
691 | /* |
692 | * Check CRC of control file |
693 | */ |
694 | static void |
695 | checkControlFile(ControlFileData *ControlFile) |
696 | { |
697 | pg_crc32c crc; |
698 | |
699 | /* Calculate CRC */ |
700 | INIT_CRC32C(crc); |
701 | COMP_CRC32C(crc, (char *) ControlFile, offsetof(ControlFileData, crc)); |
702 | FIN_CRC32C(crc); |
703 | |
704 | /* And simply compare it */ |
705 | if (!EQ_CRC32C(crc, ControlFile->crc)) |
706 | pg_fatal("unexpected control file CRC" ); |
707 | } |
708 | |
709 | /* |
710 | * Verify control file contents in the buffer src, and copy it to *ControlFile. |
711 | */ |
712 | static void |
713 | digestControlFile(ControlFileData *ControlFile, char *src, size_t size) |
714 | { |
715 | if (size != PG_CONTROL_FILE_SIZE) |
716 | pg_fatal("unexpected control file size %d, expected %d" , |
717 | (int) size, PG_CONTROL_FILE_SIZE); |
718 | |
719 | memcpy(ControlFile, src, sizeof(ControlFileData)); |
720 | |
721 | /* set and validate WalSegSz */ |
722 | WalSegSz = ControlFile->xlog_seg_size; |
723 | |
724 | if (!IsValidWalSegSize(WalSegSz)) |
725 | pg_fatal(ngettext("WAL segment size must be a power of two between 1 MB and 1 GB, but the control file specifies %d byte" , |
726 | "WAL segment size must be a power of two between 1 MB and 1 GB, but the control file specifies %d bytes" , |
727 | WalSegSz), |
728 | WalSegSz); |
729 | |
730 | /* Additional checks on control file */ |
731 | checkControlFile(ControlFile); |
732 | } |
733 | |
734 | /* |
735 | * Sync target data directory to ensure that modifications are safely on disk. |
736 | * |
737 | * We do this once, for the whole data directory, for performance reasons. At |
738 | * the end of pg_rewind's run, the kernel is likely to already have flushed |
739 | * most dirty buffers to disk. Additionally fsync_pgdata uses a two-pass |
740 | * approach (only initiating writeback in the first pass), which often reduces |
741 | * the overall amount of IO noticeably. |
742 | */ |
743 | static void |
744 | syncTargetDirectory(void) |
745 | { |
746 | if (!do_sync || dry_run) |
747 | return; |
748 | |
749 | fsync_pgdata(datadir_target, PG_VERSION_NUM); |
750 | } |
751 | |