1/*-------------------------------------------------------------------------
2 *
3 * pg_resetwal.c
4 * A utility to "zero out" the xlog when it's corrupt beyond recovery.
5 * Can also rebuild pg_control if needed.
6 *
7 * The theory of operation is fairly simple:
8 * 1. Read the existing pg_control (which will include the last
9 * checkpoint record). If it is an old format then update to
10 * current format.
11 * 2. If pg_control is corrupt, attempt to intuit reasonable values,
12 * by scanning the old xlog if necessary.
13 * 3. Modify pg_control to reflect a "shutdown" state with a checkpoint
14 * record at the start of xlog.
15 * 4. Flush the existing xlog files and write a new segment with
16 * just a checkpoint record in it. The new segment is positioned
17 * just past the end of the old xlog, so that existing LSNs in
18 * data pages will appear to be "in the past".
19 * This is all pretty straightforward except for the intuition part of
20 * step 2 ...
21 *
22 *
23 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
24 * Portions Copyright (c) 1994, Regents of the University of California
25 *
26 * src/bin/pg_resetwal/pg_resetwal.c
27 *
28 *-------------------------------------------------------------------------
29 */
30
31/*
32 * We have to use postgres.h not postgres_fe.h here, because there's so much
33 * backend-only stuff in the XLOG include files we need. But we need a
34 * frontend-ish environment otherwise. Hence this ugly hack.
35 */
36#define FRONTEND 1
37
38#include "postgres.h"
39
40#include <dirent.h>
41#include <fcntl.h>
42#include <sys/stat.h>
43#include <sys/time.h>
44#include <time.h>
45#include <unistd.h>
46
47#include "access/transam.h"
48#include "access/tuptoaster.h"
49#include "access/multixact.h"
50#include "access/xlog.h"
51#include "access/xlog_internal.h"
52#include "common/controldata_utils.h"
53#include "common/fe_memutils.h"
54#include "common/file_perm.h"
55#include "common/logging.h"
56#include "common/restricted_token.h"
57#include "storage/large_object.h"
58#include "pg_getopt.h"
59#include "getopt_long.h"
60
61
62static ControlFileData ControlFile; /* pg_control values */
63static XLogSegNo newXlogSegNo; /* new XLOG segment # */
64static bool guessed = false; /* T if we had to guess at any values */
65static const char *progname;
66static uint32 set_xid_epoch = (uint32) -1;
67static TransactionId set_xid = 0;
68static TransactionId set_oldest_commit_ts_xid = 0;
69static TransactionId set_newest_commit_ts_xid = 0;
70static Oid set_oid = 0;
71static MultiXactId set_mxid = 0;
72static MultiXactOffset set_mxoff = (MultiXactOffset) -1;
73static uint32 minXlogTli = 0;
74static XLogSegNo minXlogSegNo = 0;
75static int WalSegSz;
76static int set_wal_segsize;
77
78static void CheckDataVersion(void);
79static bool ReadControlFile(void);
80static void GuessControlValues(void);
81static void PrintControlValues(bool guessed);
82static void PrintNewControlValues(void);
83static void RewriteControlFile(void);
84static void FindEndOfXLOG(void);
85static void KillExistingXLOG(void);
86static void KillExistingArchiveStatus(void);
87static void WriteEmptyXLOG(void);
88static void usage(void);
89
90
91int
92main(int argc, char *argv[])
93{
94 static struct option long_options[] = {
95 {"commit-timestamp-ids", required_argument, NULL, 'c'},
96 {"pgdata", required_argument, NULL, 'D'},
97 {"epoch", required_argument, NULL, 'e'},
98 {"force", no_argument, NULL, 'f'},
99 {"next-wal-file", required_argument, NULL, 'l'},
100 {"multixact-ids", required_argument, NULL, 'm'},
101 {"dry-run", no_argument, NULL, 'n'},
102 {"next-oid", required_argument, NULL, 'o'},
103 {"multixact-offset", required_argument, NULL, 'O'},
104 {"next-transaction-id", required_argument, NULL, 'x'},
105 {"wal-segsize", required_argument, NULL, 1},
106 {NULL, 0, NULL, 0}
107 };
108
109 int c;
110 bool force = false;
111 bool noupdate = false;
112 MultiXactId set_oldestmxid = 0;
113 char *endptr;
114 char *endptr2;
115 char *DataDir = NULL;
116 char *log_fname = NULL;
117 int fd;
118
119 pg_logging_init(argv[0]);
120 set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_resetwal"));
121 progname = get_progname(argv[0]);
122
123 if (argc > 1)
124 {
125 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
126 {
127 usage();
128 exit(0);
129 }
130 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
131 {
132 puts("pg_resetwal (PostgreSQL) " PG_VERSION);
133 exit(0);
134 }
135 }
136
137
138 while ((c = getopt_long(argc, argv, "c:D:e:fl:m:no:O:x:", long_options, NULL)) != -1)
139 {
140 switch (c)
141 {
142 case 'D':
143 DataDir = optarg;
144 break;
145
146 case 'f':
147 force = true;
148 break;
149
150 case 'n':
151 noupdate = true;
152 break;
153
154 case 'e':
155 set_xid_epoch = strtoul(optarg, &endptr, 0);
156 if (endptr == optarg || *endptr != '\0')
157 {
158 /*------
159 translator: the second %s is a command line argument (-e, etc) */
160 pg_log_error("invalid argument for option %s", "-e");
161 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
162 exit(1);
163 }
164 if (set_xid_epoch == -1)
165 {
166 pg_log_error("transaction ID epoch (-e) must not be -1");
167 exit(1);
168 }
169 break;
170
171 case 'x':
172 set_xid = strtoul(optarg, &endptr, 0);
173 if (endptr == optarg || *endptr != '\0')
174 {
175 pg_log_error("invalid argument for option %s", "-x");
176 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
177 exit(1);
178 }
179 if (set_xid == 0)
180 {
181 pg_log_error("transaction ID (-x) must not be 0");
182 exit(1);
183 }
184 break;
185
186 case 'c':
187 set_oldest_commit_ts_xid = strtoul(optarg, &endptr, 0);
188 if (endptr == optarg || *endptr != ',')
189 {
190 pg_log_error("invalid argument for option %s", "-c");
191 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
192 exit(1);
193 }
194 set_newest_commit_ts_xid = strtoul(endptr + 1, &endptr2, 0);
195 if (endptr2 == endptr + 1 || *endptr2 != '\0')
196 {
197 pg_log_error("invalid argument for option %s", "-c");
198 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
199 exit(1);
200 }
201
202 if (set_oldest_commit_ts_xid < 2 &&
203 set_oldest_commit_ts_xid != 0)
204 {
205 pg_log_error("transaction ID (-c) must be either 0 or greater than or equal to 2");
206 exit(1);
207 }
208
209 if (set_newest_commit_ts_xid < 2 &&
210 set_newest_commit_ts_xid != 0)
211 {
212 pg_log_error("transaction ID (-c) must be either 0 or greater than or equal to 2");
213 exit(1);
214 }
215 break;
216
217 case 'o':
218 set_oid = strtoul(optarg, &endptr, 0);
219 if (endptr == optarg || *endptr != '\0')
220 {
221 pg_log_error("invalid argument for option %s", "-o");
222 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
223 exit(1);
224 }
225 if (set_oid == 0)
226 {
227 pg_log_error("OID (-o) must not be 0");
228 exit(1);
229 }
230 break;
231
232 case 'm':
233 set_mxid = strtoul(optarg, &endptr, 0);
234 if (endptr == optarg || *endptr != ',')
235 {
236 pg_log_error("invalid argument for option %s", "-m");
237 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
238 exit(1);
239 }
240
241 set_oldestmxid = strtoul(endptr + 1, &endptr2, 0);
242 if (endptr2 == endptr + 1 || *endptr2 != '\0')
243 {
244 pg_log_error("invalid argument for option %s", "-m");
245 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
246 exit(1);
247 }
248 if (set_mxid == 0)
249 {
250 pg_log_error("multitransaction ID (-m) must not be 0");
251 exit(1);
252 }
253
254 /*
255 * XXX It'd be nice to have more sanity checks here, e.g. so
256 * that oldest is not wrapped around w.r.t. nextMulti.
257 */
258 if (set_oldestmxid == 0)
259 {
260 pg_log_error("oldest multitransaction ID (-m) must not be 0");
261 exit(1);
262 }
263 break;
264
265 case 'O':
266 set_mxoff = strtoul(optarg, &endptr, 0);
267 if (endptr == optarg || *endptr != '\0')
268 {
269 pg_log_error("invalid argument for option %s", "-O");
270 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
271 exit(1);
272 }
273 if (set_mxoff == -1)
274 {
275 pg_log_error("multitransaction offset (-O) must not be -1");
276 exit(1);
277 }
278 break;
279
280 case 'l':
281 if (strspn(optarg, "01234567890ABCDEFabcdef") != XLOG_FNAME_LEN)
282 {
283 pg_log_error("invalid argument for option %s", "-l");
284 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
285 exit(1);
286 }
287
288 /*
289 * XLogFromFileName requires wal segment size which is not yet
290 * set. Hence wal details are set later on.
291 */
292 log_fname = pg_strdup(optarg);
293 break;
294
295 case 1:
296 set_wal_segsize = strtol(optarg, &endptr, 10) * 1024 * 1024;
297 if (endptr == optarg || *endptr != '\0')
298 {
299 pg_log_error("argument of --wal-segsize must be a number");
300 exit(1);
301 }
302 if (!IsValidWalSegSize(set_wal_segsize))
303 {
304 pg_log_error("argument of --wal-segsize must be a power of 2 between 1 and 1024");
305 exit(1);
306 }
307 break;
308
309 default:
310 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
311 exit(1);
312 }
313 }
314
315 if (DataDir == NULL && optind < argc)
316 DataDir = argv[optind++];
317
318 /* Complain if any arguments remain */
319 if (optind < argc)
320 {
321 pg_log_error("too many command-line arguments (first is \"%s\")",
322 argv[optind]);
323 fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
324 progname);
325 exit(1);
326 }
327
328 if (DataDir == NULL)
329 {
330 pg_log_error("no data directory specified");
331 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
332 exit(1);
333 }
334
335 /*
336 * Don't allow pg_resetwal to be run as root, to avoid overwriting the
337 * ownership of files in the data directory. We need only check for root
338 * -- any other user won't have sufficient permissions to modify files in
339 * the data directory.
340 */
341#ifndef WIN32
342 if (geteuid() == 0)
343 {
344 pg_log_error("cannot be executed by \"root\"");
345 pg_log_info("You must run %s as the PostgreSQL superuser.",
346 progname);
347 exit(1);
348 }
349#endif
350
351 get_restricted_token();
352
353 /* Set mask based on PGDATA permissions */
354 if (!GetDataDirectoryCreatePerm(DataDir))
355 {
356 pg_log_error("could not read permissions of directory \"%s\": %m",
357 DataDir);
358 exit(1);
359 }
360
361 umask(pg_mode_mask);
362
363 if (chdir(DataDir) < 0)
364 {
365 pg_log_error("could not change directory to \"%s\": %m",
366 DataDir);
367 exit(1);
368 }
369
370 /* Check that data directory matches our server version */
371 CheckDataVersion();
372
373 /*
374 * Check for a postmaster lock file --- if there is one, refuse to
375 * proceed, on grounds we might be interfering with a live installation.
376 */
377 if ((fd = open("postmaster.pid", O_RDONLY, 0)) < 0)
378 {
379 if (errno != ENOENT)
380 {
381 pg_log_error("could not open file \"%s\" for reading: %m",
382 "postmaster.pid");
383 exit(1);
384 }
385 }
386 else
387 {
388 pg_log_error("lock file \"%s\" exists", "postmaster.pid");
389 pg_log_info("Is a server running? If not, delete the lock file and try again.");
390 exit(1);
391 }
392
393 /*
394 * Attempt to read the existing pg_control file
395 */
396 if (!ReadControlFile())
397 GuessControlValues();
398
399 /*
400 * If no new WAL segment size was specified, use the control file value.
401 */
402 if (set_wal_segsize != 0)
403 WalSegSz = set_wal_segsize;
404 else
405 WalSegSz = ControlFile.xlog_seg_size;
406
407 if (log_fname != NULL)
408 XLogFromFileName(log_fname, &minXlogTli, &minXlogSegNo, WalSegSz);
409
410 /*
411 * Also look at existing segment files to set up newXlogSegNo
412 */
413 FindEndOfXLOG();
414
415 /*
416 * If we're not going to proceed with the reset, print the current control
417 * file parameters.
418 */
419 if ((guessed && !force) || noupdate)
420 PrintControlValues(guessed);
421
422 /*
423 * Adjust fields if required by switches. (Do this now so that printout,
424 * if any, includes these values.)
425 */
426 if (set_xid_epoch != -1)
427 ControlFile.checkPointCopy.nextFullXid =
428 FullTransactionIdFromEpochAndXid(set_xid_epoch,
429 XidFromFullTransactionId(ControlFile.checkPointCopy.nextFullXid));
430
431 if (set_xid != 0)
432 {
433 ControlFile.checkPointCopy.nextFullXid =
434 FullTransactionIdFromEpochAndXid(EpochFromFullTransactionId(ControlFile.checkPointCopy.nextFullXid),
435 set_xid);
436
437 /*
438 * For the moment, just set oldestXid to a value that will force
439 * immediate autovacuum-for-wraparound. It's not clear whether adding
440 * user control of this is useful, so let's just do something that's
441 * reasonably safe. The magic constant here corresponds to the
442 * maximum allowed value of autovacuum_freeze_max_age.
443 */
444 ControlFile.checkPointCopy.oldestXid = set_xid - 2000000000;
445 if (ControlFile.checkPointCopy.oldestXid < FirstNormalTransactionId)
446 ControlFile.checkPointCopy.oldestXid += FirstNormalTransactionId;
447 ControlFile.checkPointCopy.oldestXidDB = InvalidOid;
448 }
449
450 if (set_oldest_commit_ts_xid != 0)
451 ControlFile.checkPointCopy.oldestCommitTsXid = set_oldest_commit_ts_xid;
452 if (set_newest_commit_ts_xid != 0)
453 ControlFile.checkPointCopy.newestCommitTsXid = set_newest_commit_ts_xid;
454
455 if (set_oid != 0)
456 ControlFile.checkPointCopy.nextOid = set_oid;
457
458 if (set_mxid != 0)
459 {
460 ControlFile.checkPointCopy.nextMulti = set_mxid;
461
462 ControlFile.checkPointCopy.oldestMulti = set_oldestmxid;
463 if (ControlFile.checkPointCopy.oldestMulti < FirstMultiXactId)
464 ControlFile.checkPointCopy.oldestMulti += FirstMultiXactId;
465 ControlFile.checkPointCopy.oldestMultiDB = InvalidOid;
466 }
467
468 if (set_mxoff != -1)
469 ControlFile.checkPointCopy.nextMultiOffset = set_mxoff;
470
471 if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
472 {
473 ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
474 ControlFile.checkPointCopy.PrevTimeLineID = minXlogTli;
475 }
476
477 if (set_wal_segsize != 0)
478 ControlFile.xlog_seg_size = WalSegSz;
479
480 if (minXlogSegNo > newXlogSegNo)
481 newXlogSegNo = minXlogSegNo;
482
483 /*
484 * If we had to guess anything, and -f was not given, just print the
485 * guessed values and exit. Also print if -n is given.
486 */
487 if ((guessed && !force) || noupdate)
488 {
489 PrintNewControlValues();
490 if (!noupdate)
491 {
492 printf(_("\nIf these values seem acceptable, use -f to force reset.\n"));
493 exit(1);
494 }
495 else
496 exit(0);
497 }
498
499 /*
500 * Don't reset from a dirty pg_control without -f, either.
501 */
502 if (ControlFile.state != DB_SHUTDOWNED && !force)
503 {
504 printf(_("The database server was not shut down cleanly.\n"
505 "Resetting the write-ahead log might cause data to be lost.\n"
506 "If you want to proceed anyway, use -f to force reset.\n"));
507 exit(1);
508 }
509
510 /*
511 * Else, do the dirty deed.
512 */
513 RewriteControlFile();
514 KillExistingXLOG();
515 KillExistingArchiveStatus();
516 WriteEmptyXLOG();
517
518 printf(_("Write-ahead log reset\n"));
519 return 0;
520}
521
522
523/*
524 * Look at the version string stored in PG_VERSION and decide if this utility
525 * can be run safely or not.
526 *
527 * We don't want to inject pg_control and WAL files that are for a different
528 * major version; that can't do anything good. Note that we don't treat
529 * mismatching version info in pg_control as a reason to bail out, because
530 * recovering from a corrupted pg_control is one of the main reasons for this
531 * program to exist at all. However, PG_VERSION is unlikely to get corrupted,
532 * and if it were it would be easy to fix by hand. So let's make this check
533 * to prevent simple user errors.
534 */
535static void
536CheckDataVersion(void)
537{
538 const char *ver_file = "PG_VERSION";
539 FILE *ver_fd;
540 char rawline[64];
541 int len;
542
543 if ((ver_fd = fopen(ver_file, "r")) == NULL)
544 {
545 pg_log_error("could not open file \"%s\" for reading: %m",
546 ver_file);
547 exit(1);
548 }
549
550 /* version number has to be the first line read */
551 if (!fgets(rawline, sizeof(rawline), ver_fd))
552 {
553 if (!ferror(ver_fd))
554 pg_log_error("unexpected empty file \"%s\"", ver_file);
555 else
556 pg_log_error("could not read file \"%s\": %m", ver_file);
557 exit(1);
558 }
559
560 /* remove trailing newline, handling Windows newlines as well */
561 len = strlen(rawline);
562 if (len > 0 && rawline[len - 1] == '\n')
563 {
564 rawline[--len] = '\0';
565 if (len > 0 && rawline[len - 1] == '\r')
566 rawline[--len] = '\0';
567 }
568
569 if (strcmp(rawline, PG_MAJORVERSION) != 0)
570 {
571 pg_log_error("data directory is of wrong version");
572 pg_log_info("File \"%s\" contains \"%s\", which is not compatible with this program's version \"%s\".",
573 ver_file, rawline, PG_MAJORVERSION);
574 exit(1);
575 }
576
577 fclose(ver_fd);
578}
579
580
581/*
582 * Try to read the existing pg_control file.
583 *
584 * This routine is also responsible for updating old pg_control versions
585 * to the current format. (Currently we don't do anything of the sort.)
586 */
587static bool
588ReadControlFile(void)
589{
590 int fd;
591 int len;
592 char *buffer;
593 pg_crc32c crc;
594
595 if ((fd = open(XLOG_CONTROL_FILE, O_RDONLY | PG_BINARY, 0)) < 0)
596 {
597 /*
598 * If pg_control is not there at all, or we can't read it, the odds
599 * are we've been handed a bad DataDir path, so give up. User can do
600 * "touch pg_control" to force us to proceed.
601 */
602 pg_log_error("could not open file \"%s\" for reading: %m",
603 XLOG_CONTROL_FILE);
604 if (errno == ENOENT)
605 pg_log_info("If you are sure the data directory path is correct, execute\n"
606 " touch %s\n"
607 "and try again.",
608 XLOG_CONTROL_FILE);
609 exit(1);
610 }
611
612 /* Use malloc to ensure we have a maxaligned buffer */
613 buffer = (char *) pg_malloc(PG_CONTROL_FILE_SIZE);
614
615 len = read(fd, buffer, PG_CONTROL_FILE_SIZE);
616 if (len < 0)
617 {
618 pg_log_error("could not read file \"%s\": %m", XLOG_CONTROL_FILE);
619 exit(1);
620 }
621 close(fd);
622
623 if (len >= sizeof(ControlFileData) &&
624 ((ControlFileData *) buffer)->pg_control_version == PG_CONTROL_VERSION)
625 {
626 /* Check the CRC. */
627 INIT_CRC32C(crc);
628 COMP_CRC32C(crc,
629 buffer,
630 offsetof(ControlFileData, crc));
631 FIN_CRC32C(crc);
632
633 if (!EQ_CRC32C(crc, ((ControlFileData *) buffer)->crc))
634 {
635 /* We will use the data but treat it as guessed. */
636 pg_log_warning("pg_control exists but has invalid CRC; proceed with caution");
637 guessed = true;
638 }
639
640 memcpy(&ControlFile, buffer, sizeof(ControlFile));
641
642 /* return false if WAL segment size is not valid */
643 if (!IsValidWalSegSize(ControlFile.xlog_seg_size))
644 {
645 pg_log_warning(ngettext("pg_control specifies invalid WAL segment size (%d byte); proceed with caution",
646 "pg_control specifies invalid WAL segment size (%d bytes); proceed with caution",
647 ControlFile.xlog_seg_size),
648 ControlFile.xlog_seg_size);
649 return false;
650 }
651
652 return true;
653 }
654
655 /* Looks like it's a mess. */
656 pg_log_warning("pg_control exists but is broken or wrong version; ignoring it");
657 return false;
658}
659
660
661/*
662 * Guess at pg_control values when we can't read the old ones.
663 */
664static void
665GuessControlValues(void)
666{
667 uint64 sysidentifier;
668 struct timeval tv;
669
670 /*
671 * Set up a completely default set of pg_control values.
672 */
673 guessed = true;
674 memset(&ControlFile, 0, sizeof(ControlFile));
675
676 ControlFile.pg_control_version = PG_CONTROL_VERSION;
677 ControlFile.catalog_version_no = CATALOG_VERSION_NO;
678
679 /*
680 * Create a new unique installation identifier, since we can no longer use
681 * any old XLOG records. See notes in xlog.c about the algorithm.
682 */
683 gettimeofday(&tv, NULL);
684 sysidentifier = ((uint64) tv.tv_sec) << 32;
685 sysidentifier |= ((uint64) tv.tv_usec) << 12;
686 sysidentifier |= getpid() & 0xFFF;
687
688 ControlFile.system_identifier = sysidentifier;
689
690 ControlFile.checkPointCopy.redo = SizeOfXLogLongPHD;
691 ControlFile.checkPointCopy.ThisTimeLineID = 1;
692 ControlFile.checkPointCopy.PrevTimeLineID = 1;
693 ControlFile.checkPointCopy.fullPageWrites = false;
694 ControlFile.checkPointCopy.nextFullXid =
695 FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
696 ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId;
697 ControlFile.checkPointCopy.nextMulti = FirstMultiXactId;
698 ControlFile.checkPointCopy.nextMultiOffset = 0;
699 ControlFile.checkPointCopy.oldestXid = FirstNormalTransactionId;
700 ControlFile.checkPointCopy.oldestXidDB = InvalidOid;
701 ControlFile.checkPointCopy.oldestMulti = FirstMultiXactId;
702 ControlFile.checkPointCopy.oldestMultiDB = InvalidOid;
703 ControlFile.checkPointCopy.time = (pg_time_t) time(NULL);
704 ControlFile.checkPointCopy.oldestActiveXid = InvalidTransactionId;
705
706 ControlFile.state = DB_SHUTDOWNED;
707 ControlFile.time = (pg_time_t) time(NULL);
708 ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
709 ControlFile.unloggedLSN = 1;
710
711 /* minRecoveryPoint, backupStartPoint and backupEndPoint can be left zero */
712
713 ControlFile.wal_level = WAL_LEVEL_MINIMAL;
714 ControlFile.wal_log_hints = false;
715 ControlFile.track_commit_timestamp = false;
716 ControlFile.MaxConnections = 100;
717 ControlFile.max_wal_senders = 10;
718 ControlFile.max_worker_processes = 8;
719 ControlFile.max_prepared_xacts = 0;
720 ControlFile.max_locks_per_xact = 64;
721
722 ControlFile.maxAlign = MAXIMUM_ALIGNOF;
723 ControlFile.floatFormat = FLOATFORMAT_VALUE;
724 ControlFile.blcksz = BLCKSZ;
725 ControlFile.relseg_size = RELSEG_SIZE;
726 ControlFile.xlog_blcksz = XLOG_BLCKSZ;
727 ControlFile.xlog_seg_size = DEFAULT_XLOG_SEG_SIZE;
728 ControlFile.nameDataLen = NAMEDATALEN;
729 ControlFile.indexMaxKeys = INDEX_MAX_KEYS;
730 ControlFile.toast_max_chunk_size = TOAST_MAX_CHUNK_SIZE;
731 ControlFile.loblksize = LOBLKSIZE;
732 ControlFile.float4ByVal = FLOAT4PASSBYVAL;
733 ControlFile.float8ByVal = FLOAT8PASSBYVAL;
734
735 /*
736 * XXX eventually, should try to grovel through old XLOG to develop more
737 * accurate values for TimeLineID, nextXID, etc.
738 */
739}
740
741
742/*
743 * Print the guessed pg_control values when we had to guess.
744 *
745 * NB: this display should be just those fields that will not be
746 * reset by RewriteControlFile().
747 */
748static void
749PrintControlValues(bool guessed)
750{
751 char sysident_str[32];
752
753 if (guessed)
754 printf(_("Guessed pg_control values:\n\n"));
755 else
756 printf(_("Current pg_control values:\n\n"));
757
758 /*
759 * Format system_identifier separately to keep platform-dependent format
760 * code out of the translatable message string.
761 */
762 snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
763 ControlFile.system_identifier);
764
765 printf(_("pg_control version number: %u\n"),
766 ControlFile.pg_control_version);
767 printf(_("Catalog version number: %u\n"),
768 ControlFile.catalog_version_no);
769 printf(_("Database system identifier: %s\n"),
770 sysident_str);
771 printf(_("Latest checkpoint's TimeLineID: %u\n"),
772 ControlFile.checkPointCopy.ThisTimeLineID);
773 printf(_("Latest checkpoint's full_page_writes: %s\n"),
774 ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off"));
775 printf(_("Latest checkpoint's NextXID: %u:%u\n"),
776 EpochFromFullTransactionId(ControlFile.checkPointCopy.nextFullXid),
777 XidFromFullTransactionId(ControlFile.checkPointCopy.nextFullXid));
778 printf(_("Latest checkpoint's NextOID: %u\n"),
779 ControlFile.checkPointCopy.nextOid);
780 printf(_("Latest checkpoint's NextMultiXactId: %u\n"),
781 ControlFile.checkPointCopy.nextMulti);
782 printf(_("Latest checkpoint's NextMultiOffset: %u\n"),
783 ControlFile.checkPointCopy.nextMultiOffset);
784 printf(_("Latest checkpoint's oldestXID: %u\n"),
785 ControlFile.checkPointCopy.oldestXid);
786 printf(_("Latest checkpoint's oldestXID's DB: %u\n"),
787 ControlFile.checkPointCopy.oldestXidDB);
788 printf(_("Latest checkpoint's oldestActiveXID: %u\n"),
789 ControlFile.checkPointCopy.oldestActiveXid);
790 printf(_("Latest checkpoint's oldestMultiXid: %u\n"),
791 ControlFile.checkPointCopy.oldestMulti);
792 printf(_("Latest checkpoint's oldestMulti's DB: %u\n"),
793 ControlFile.checkPointCopy.oldestMultiDB);
794 printf(_("Latest checkpoint's oldestCommitTsXid:%u\n"),
795 ControlFile.checkPointCopy.oldestCommitTsXid);
796 printf(_("Latest checkpoint's newestCommitTsXid:%u\n"),
797 ControlFile.checkPointCopy.newestCommitTsXid);
798 printf(_("Maximum data alignment: %u\n"),
799 ControlFile.maxAlign);
800 /* we don't print floatFormat since can't say much useful about it */
801 printf(_("Database block size: %u\n"),
802 ControlFile.blcksz);
803 printf(_("Blocks per segment of large relation: %u\n"),
804 ControlFile.relseg_size);
805 printf(_("WAL block size: %u\n"),
806 ControlFile.xlog_blcksz);
807 printf(_("Bytes per WAL segment: %u\n"),
808 ControlFile.xlog_seg_size);
809 printf(_("Maximum length of identifiers: %u\n"),
810 ControlFile.nameDataLen);
811 printf(_("Maximum columns in an index: %u\n"),
812 ControlFile.indexMaxKeys);
813 printf(_("Maximum size of a TOAST chunk: %u\n"),
814 ControlFile.toast_max_chunk_size);
815 printf(_("Size of a large-object chunk: %u\n"),
816 ControlFile.loblksize);
817 /* This is no longer configurable, but users may still expect to see it: */
818 printf(_("Date/time type storage: %s\n"),
819 _("64-bit integers"));
820 printf(_("Float4 argument passing: %s\n"),
821 (ControlFile.float4ByVal ? _("by value") : _("by reference")));
822 printf(_("Float8 argument passing: %s\n"),
823 (ControlFile.float8ByVal ? _("by value") : _("by reference")));
824 printf(_("Data page checksum version: %u\n"),
825 ControlFile.data_checksum_version);
826}
827
828
829/*
830 * Print the values to be changed.
831 */
832static void
833PrintNewControlValues(void)
834{
835 char fname[MAXFNAMELEN];
836
837 /* This will be always printed in order to keep format same. */
838 printf(_("\n\nValues to be changed:\n\n"));
839
840 XLogFileName(fname, ControlFile.checkPointCopy.ThisTimeLineID,
841 newXlogSegNo, WalSegSz);
842 printf(_("First log segment after reset: %s\n"), fname);
843
844 if (set_mxid != 0)
845 {
846 printf(_("NextMultiXactId: %u\n"),
847 ControlFile.checkPointCopy.nextMulti);
848 printf(_("OldestMultiXid: %u\n"),
849 ControlFile.checkPointCopy.oldestMulti);
850 printf(_("OldestMulti's DB: %u\n"),
851 ControlFile.checkPointCopy.oldestMultiDB);
852 }
853
854 if (set_mxoff != -1)
855 {
856 printf(_("NextMultiOffset: %u\n"),
857 ControlFile.checkPointCopy.nextMultiOffset);
858 }
859
860 if (set_oid != 0)
861 {
862 printf(_("NextOID: %u\n"),
863 ControlFile.checkPointCopy.nextOid);
864 }
865
866 if (set_xid != 0)
867 {
868 printf(_("NextXID: %u\n"),
869 XidFromFullTransactionId(ControlFile.checkPointCopy.nextFullXid));
870 printf(_("OldestXID: %u\n"),
871 ControlFile.checkPointCopy.oldestXid);
872 printf(_("OldestXID's DB: %u\n"),
873 ControlFile.checkPointCopy.oldestXidDB);
874 }
875
876 if (set_xid_epoch != -1)
877 {
878 printf(_("NextXID epoch: %u\n"),
879 EpochFromFullTransactionId(ControlFile.checkPointCopy.nextFullXid));
880 }
881
882 if (set_oldest_commit_ts_xid != 0)
883 {
884 printf(_("oldestCommitTsXid: %u\n"),
885 ControlFile.checkPointCopy.oldestCommitTsXid);
886 }
887 if (set_newest_commit_ts_xid != 0)
888 {
889 printf(_("newestCommitTsXid: %u\n"),
890 ControlFile.checkPointCopy.newestCommitTsXid);
891 }
892
893 if (set_wal_segsize != 0)
894 {
895 printf(_("Bytes per WAL segment: %u\n"),
896 ControlFile.xlog_seg_size);
897 }
898}
899
900
901/*
902 * Write out the new pg_control file.
903 */
904static void
905RewriteControlFile(void)
906{
907 /*
908 * Adjust fields as needed to force an empty XLOG starting at
909 * newXlogSegNo.
910 */
911 XLogSegNoOffsetToRecPtr(newXlogSegNo, SizeOfXLogLongPHD, WalSegSz,
912 ControlFile.checkPointCopy.redo);
913 ControlFile.checkPointCopy.time = (pg_time_t) time(NULL);
914
915 ControlFile.state = DB_SHUTDOWNED;
916 ControlFile.time = (pg_time_t) time(NULL);
917 ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
918 ControlFile.minRecoveryPoint = 0;
919 ControlFile.minRecoveryPointTLI = 0;
920 ControlFile.backupStartPoint = 0;
921 ControlFile.backupEndPoint = 0;
922 ControlFile.backupEndRequired = false;
923
924 /*
925 * Force the defaults for max_* settings. The values don't really matter
926 * as long as wal_level='minimal'; the postmaster will reset these fields
927 * anyway at startup.
928 */
929 ControlFile.wal_level = WAL_LEVEL_MINIMAL;
930 ControlFile.wal_log_hints = false;
931 ControlFile.track_commit_timestamp = false;
932 ControlFile.MaxConnections = 100;
933 ControlFile.max_wal_senders = 10;
934 ControlFile.max_worker_processes = 8;
935 ControlFile.max_prepared_xacts = 0;
936 ControlFile.max_locks_per_xact = 64;
937
938 /* The control file gets flushed here. */
939 update_controlfile(".", &ControlFile, true);
940}
941
942
943/*
944 * Scan existing XLOG files and determine the highest existing WAL address
945 *
946 * On entry, ControlFile.checkPointCopy.redo and ControlFile.xlog_seg_size
947 * are assumed valid (note that we allow the old xlog seg size to differ
948 * from what we're using). On exit, newXlogId and newXlogSeg are set to
949 * suitable values for the beginning of replacement WAL (in our seg size).
950 */
951static void
952FindEndOfXLOG(void)
953{
954 DIR *xldir;
955 struct dirent *xlde;
956 uint64 segs_per_xlogid;
957 uint64 xlogbytepos;
958
959 /*
960 * Initialize the max() computation using the last checkpoint address from
961 * old pg_control. Note that for the moment we are working with segment
962 * numbering according to the old xlog seg size.
963 */
964 segs_per_xlogid = (UINT64CONST(0x0000000100000000) / ControlFile.xlog_seg_size);
965 newXlogSegNo = ControlFile.checkPointCopy.redo / ControlFile.xlog_seg_size;
966
967 /*
968 * Scan the pg_wal directory to find existing WAL segment files. We assume
969 * any present have been used; in most scenarios this should be
970 * conservative, because of xlog.c's attempts to pre-create files.
971 */
972 xldir = opendir(XLOGDIR);
973 if (xldir == NULL)
974 {
975 pg_log_error("could not open directory \"%s\": %m", XLOGDIR);
976 exit(1);
977 }
978
979 while (errno = 0, (xlde = readdir(xldir)) != NULL)
980 {
981 if (IsXLogFileName(xlde->d_name) ||
982 IsPartialXLogFileName(xlde->d_name))
983 {
984 unsigned int tli,
985 log,
986 seg;
987 XLogSegNo segno;
988
989 /*
990 * Note: We don't use XLogFromFileName here, because we want to
991 * use the segment size from the control file, not the size the
992 * pg_resetwal binary was compiled with
993 */
994 sscanf(xlde->d_name, "%08X%08X%08X", &tli, &log, &seg);
995 segno = ((uint64) log) * segs_per_xlogid + seg;
996
997 /*
998 * Note: we take the max of all files found, regardless of their
999 * timelines. Another possibility would be to ignore files of
1000 * timelines other than the target TLI, but this seems safer.
1001 * Better too large a result than too small...
1002 */
1003 if (segno > newXlogSegNo)
1004 newXlogSegNo = segno;
1005 }
1006 }
1007
1008 if (errno)
1009 {
1010 pg_log_error("could not read directory \"%s\": %m", XLOGDIR);
1011 exit(1);
1012 }
1013
1014 if (closedir(xldir))
1015 {
1016 pg_log_error("could not close directory \"%s\": %m", XLOGDIR);
1017 exit(1);
1018 }
1019
1020 /*
1021 * Finally, convert to new xlog seg size, and advance by one to ensure we
1022 * are in virgin territory.
1023 */
1024 xlogbytepos = newXlogSegNo * ControlFile.xlog_seg_size;
1025 newXlogSegNo = (xlogbytepos + ControlFile.xlog_seg_size - 1) / WalSegSz;
1026 newXlogSegNo++;
1027}
1028
1029
1030/*
1031 * Remove existing XLOG files
1032 */
1033static void
1034KillExistingXLOG(void)
1035{
1036 DIR *xldir;
1037 struct dirent *xlde;
1038 char path[MAXPGPATH + sizeof(XLOGDIR)];
1039
1040 xldir = opendir(XLOGDIR);
1041 if (xldir == NULL)
1042 {
1043 pg_log_error("could not open directory \"%s\": %m", XLOGDIR);
1044 exit(1);
1045 }
1046
1047 while (errno = 0, (xlde = readdir(xldir)) != NULL)
1048 {
1049 if (IsXLogFileName(xlde->d_name) ||
1050 IsPartialXLogFileName(xlde->d_name))
1051 {
1052 snprintf(path, sizeof(path), "%s/%s", XLOGDIR, xlde->d_name);
1053 if (unlink(path) < 0)
1054 {
1055 pg_log_error("could not delete file \"%s\": %m", path);
1056 exit(1);
1057 }
1058 }
1059 }
1060
1061 if (errno)
1062 {
1063 pg_log_error("could not read directory \"%s\": %m", XLOGDIR);
1064 exit(1);
1065 }
1066
1067 if (closedir(xldir))
1068 {
1069 pg_log_error("could not close directory \"%s\": %m", XLOGDIR);
1070 exit(1);
1071 }
1072}
1073
1074
1075/*
1076 * Remove existing archive status files
1077 */
1078static void
1079KillExistingArchiveStatus(void)
1080{
1081#define ARCHSTATDIR XLOGDIR "/archive_status"
1082
1083 DIR *xldir;
1084 struct dirent *xlde;
1085 char path[MAXPGPATH + sizeof(ARCHSTATDIR)];
1086
1087 xldir = opendir(ARCHSTATDIR);
1088 if (xldir == NULL)
1089 {
1090 pg_log_error("could not open directory \"%s\": %m", ARCHSTATDIR);
1091 exit(1);
1092 }
1093
1094 while (errno = 0, (xlde = readdir(xldir)) != NULL)
1095 {
1096 if (strspn(xlde->d_name, "0123456789ABCDEF") == XLOG_FNAME_LEN &&
1097 (strcmp(xlde->d_name + XLOG_FNAME_LEN, ".ready") == 0 ||
1098 strcmp(xlde->d_name + XLOG_FNAME_LEN, ".done") == 0 ||
1099 strcmp(xlde->d_name + XLOG_FNAME_LEN, ".partial.ready") == 0 ||
1100 strcmp(xlde->d_name + XLOG_FNAME_LEN, ".partial.done") == 0))
1101 {
1102 snprintf(path, sizeof(path), "%s/%s", ARCHSTATDIR, xlde->d_name);
1103 if (unlink(path) < 0)
1104 {
1105 pg_log_error("could not delete file \"%s\": %m", path);
1106 exit(1);
1107 }
1108 }
1109 }
1110
1111 if (errno)
1112 {
1113 pg_log_error("could not read directory \"%s\": %m", ARCHSTATDIR);
1114 exit(1);
1115 }
1116
1117 if (closedir(xldir))
1118 {
1119 pg_log_error("could not close directory \"%s\": %m", ARCHSTATDIR);
1120 exit(1);
1121 }
1122}
1123
1124
1125/*
1126 * Write an empty XLOG file, containing only the checkpoint record
1127 * already set up in ControlFile.
1128 */
1129static void
1130WriteEmptyXLOG(void)
1131{
1132 PGAlignedXLogBlock buffer;
1133 XLogPageHeader page;
1134 XLogLongPageHeader longpage;
1135 XLogRecord *record;
1136 pg_crc32c crc;
1137 char path[MAXPGPATH];
1138 int fd;
1139 int nbytes;
1140 char *recptr;
1141
1142 memset(buffer.data, 0, XLOG_BLCKSZ);
1143
1144 /* Set up the XLOG page header */
1145 page = (XLogPageHeader) buffer.data;
1146 page->xlp_magic = XLOG_PAGE_MAGIC;
1147 page->xlp_info = XLP_LONG_HEADER;
1148 page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID;
1149 page->xlp_pageaddr = ControlFile.checkPointCopy.redo - SizeOfXLogLongPHD;
1150 longpage = (XLogLongPageHeader) page;
1151 longpage->xlp_sysid = ControlFile.system_identifier;
1152 longpage->xlp_seg_size = WalSegSz;
1153 longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
1154
1155 /* Insert the initial checkpoint record */
1156 recptr = (char *) page + SizeOfXLogLongPHD;
1157 record = (XLogRecord *) recptr;
1158 record->xl_prev = 0;
1159 record->xl_xid = InvalidTransactionId;
1160 record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint);
1161 record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
1162 record->xl_rmid = RM_XLOG_ID;
1163
1164 recptr += SizeOfXLogRecord;
1165 *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
1166 *(recptr++) = sizeof(CheckPoint);
1167 memcpy(recptr, &ControlFile.checkPointCopy,
1168 sizeof(CheckPoint));
1169
1170 INIT_CRC32C(crc);
1171 COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
1172 COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
1173 FIN_CRC32C(crc);
1174 record->xl_crc = crc;
1175
1176 /* Write the first page */
1177 XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID,
1178 newXlogSegNo, WalSegSz);
1179
1180 unlink(path);
1181
1182 fd = open(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
1183 pg_file_create_mode);
1184 if (fd < 0)
1185 {
1186 pg_log_error("could not open file \"%s\": %m", path);
1187 exit(1);
1188 }
1189
1190 errno = 0;
1191 if (write(fd, buffer.data, XLOG_BLCKSZ) != XLOG_BLCKSZ)
1192 {
1193 /* if write didn't set errno, assume problem is no disk space */
1194 if (errno == 0)
1195 errno = ENOSPC;
1196 pg_log_error("could not write file \"%s\": %m", path);
1197 exit(1);
1198 }
1199
1200 /* Fill the rest of the file with zeroes */
1201 memset(buffer.data, 0, XLOG_BLCKSZ);
1202 for (nbytes = XLOG_BLCKSZ; nbytes < WalSegSz; nbytes += XLOG_BLCKSZ)
1203 {
1204 errno = 0;
1205 if (write(fd, buffer.data, XLOG_BLCKSZ) != XLOG_BLCKSZ)
1206 {
1207 if (errno == 0)
1208 errno = ENOSPC;
1209 pg_log_error("could not write file \"%s\": %m", path);
1210 exit(1);
1211 }
1212 }
1213
1214 if (fsync(fd) != 0)
1215 {
1216 pg_log_error("fsync error: %m");
1217 exit(1);
1218 }
1219
1220 close(fd);
1221}
1222
1223
1224static void
1225usage(void)
1226{
1227 printf(_("%s resets the PostgreSQL write-ahead log.\n\n"), progname);
1228 printf(_("Usage:\n %s [OPTION]... DATADIR\n\n"), progname);
1229 printf(_("Options:\n"));
1230 printf(_(" -c, --commit-timestamp-ids=XID,XID\n"
1231 " set oldest and newest transactions bearing\n"
1232 " commit timestamp (zero means no change)\n"));
1233 printf(_(" [-D, --pgdata=]DATADIR data directory\n"));
1234 printf(_(" -e, --epoch=XIDEPOCH set next transaction ID epoch\n"));
1235 printf(_(" -f, --force force update to be done\n"));
1236 printf(_(" -l, --next-wal-file=WALFILE set minimum starting location for new WAL\n"));
1237 printf(_(" -m, --multixact-ids=MXID,MXID set next and oldest multitransaction ID\n"));
1238 printf(_(" -n, --dry-run no update, just show what would be done\n"));
1239 printf(_(" -o, --next-oid=OID set next OID\n"));
1240 printf(_(" -O, --multixact-offset=OFFSET set next multitransaction offset\n"));
1241 printf(_(" -V, --version output version information, then exit\n"));
1242 printf(_(" -x, --next-transaction-id=XID set next transaction ID\n"));
1243 printf(_(" --wal-segsize=SIZE size of WAL segments, in megabytes\n"));
1244 printf(_(" -?, --help show this help, then exit\n"));
1245 printf(_("\nReport bugs to <pgsql-bugs@lists.postgresql.org>.\n"));
1246}
1247