1/*-------------------------------------------------------------------------
2 *
3 * pg_checksums.c
4 * Checks, enables or disables page level checksums for an offline
5 * cluster
6 *
7 * Copyright (c) 2010-2019, PostgreSQL Global Development Group
8 *
9 * IDENTIFICATION
10 * src/bin/pg_checksums/pg_checksums.c
11 *
12 *-------------------------------------------------------------------------
13 */
14
15#include "postgres_fe.h"
16
17#include <dirent.h>
18#include <time.h>
19#include <sys/stat.h>
20#include <unistd.h>
21
22#include "access/xlog_internal.h"
23#include "common/controldata_utils.h"
24#include "common/file_perm.h"
25#include "common/file_utils.h"
26#include "common/logging.h"
27#include "getopt_long.h"
28#include "pg_getopt.h"
29#include "storage/bufpage.h"
30#include "storage/checksum.h"
31#include "storage/checksum_impl.h"
32
33
34static int64 files = 0;
35static int64 blocks = 0;
36static int64 badblocks = 0;
37static ControlFileData *ControlFile;
38
39static char *only_filenode = NULL;
40static bool do_sync = true;
41static bool verbose = false;
42static bool showprogress = false;
43
44typedef enum
45{
46 PG_MODE_CHECK,
47 PG_MODE_DISABLE,
48 PG_MODE_ENABLE
49} PgChecksumMode;
50
51/*
52 * Filename components.
53 *
54 * XXX: fd.h is not declared here as frontend side code is not able to
55 * interact with the backend-side definitions for the various fsync
56 * wrappers.
57 */
58#define PG_TEMP_FILES_DIR "pgsql_tmp"
59#define PG_TEMP_FILE_PREFIX "pgsql_tmp"
60
61static PgChecksumMode mode = PG_MODE_CHECK;
62
63static const char *progname;
64
65/*
66 * Progress status information.
67 */
68int64 total_size = 0;
69int64 current_size = 0;
70static pg_time_t last_progress_report = 0;
71
72static void
73usage(void)
74{
75 printf(_("%s enables, disables, or verifies data checksums in a PostgreSQL database cluster.\n\n"), progname);
76 printf(_("Usage:\n"));
77 printf(_(" %s [OPTION]... [DATADIR]\n"), progname);
78 printf(_("\nOptions:\n"));
79 printf(_(" [-D, --pgdata=]DATADIR data directory\n"));
80 printf(_(" -c, --check check data checksums (default)\n"));
81 printf(_(" -d, --disable disable data checksums\n"));
82 printf(_(" -e, --enable enable data checksums\n"));
83 printf(_(" -f, --filenode=FILENODE check only relation with specified filenode\n"));
84 printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n"));
85 printf(_(" -P, --progress show progress information\n"));
86 printf(_(" -v, --verbose output verbose messages\n"));
87 printf(_(" -V, --version output version information, then exit\n"));
88 printf(_(" -?, --help show this help, then exit\n"));
89 printf(_("\nIf no data directory (DATADIR) is specified, "
90 "the environment variable PGDATA\nis used.\n\n"));
91 printf(_("Report bugs to <pgsql-bugs@lists.postgresql.org>.\n"));
92}
93
94/*
95 * List of files excluded from checksum validation.
96 *
97 * Note: this list should be kept in sync with what basebackup.c includes.
98 */
99static const char *const skip[] = {
100 "pg_control",
101 "pg_filenode.map",
102 "pg_internal.init",
103 "PG_VERSION",
104#ifdef EXEC_BACKEND
105 "config_exec_params",
106 "config_exec_params.new",
107#endif
108 NULL,
109};
110
111/*
112 * Report current progress status. Parts borrowed from
113 * src/bin/pg_basebackup/pg_basebackup.c.
114 */
115static void
116progress_report(bool force)
117{
118 int percent;
119 char total_size_str[32];
120 char current_size_str[32];
121 pg_time_t now;
122
123 Assert(showprogress);
124
125 now = time(NULL);
126 if (now == last_progress_report && !force)
127 return; /* Max once per second */
128
129 /* Save current time */
130 last_progress_report = now;
131
132 /* Adjust total size if current_size is larger */
133 if (current_size > total_size)
134 total_size = current_size;
135
136 /* Calculate current percentage of size done */
137 percent = total_size ? (int) ((current_size) * 100 / total_size) : 0;
138
139 /*
140 * Separate step to keep platform-dependent format code out of
141 * translatable strings. And we only test for INT64_FORMAT availability
142 * in snprintf, not fprintf.
143 */
144 snprintf(total_size_str, sizeof(total_size_str), INT64_FORMAT,
145 total_size / (1024 * 1024));
146 snprintf(current_size_str, sizeof(current_size_str), INT64_FORMAT,
147 current_size / (1024 * 1024));
148
149 fprintf(stderr, _("%*s/%s MB (%d%%) computed"),
150 (int) strlen(current_size_str), current_size_str, total_size_str,
151 percent);
152
153 /* Stay on the same line if reporting to a terminal */
154 fprintf(stderr, isatty(fileno(stderr)) ? "\r" : "\n");
155}
156
157static bool
158skipfile(const char *fn)
159{
160 const char *const *f;
161
162 for (f = skip; *f; f++)
163 if (strcmp(*f, fn) == 0)
164 return true;
165
166 return false;
167}
168
169static void
170scan_file(const char *fn, BlockNumber segmentno)
171{
172 PGAlignedBlock buf;
173 PageHeader header = (PageHeader) buf.data;
174 int f;
175 BlockNumber blockno;
176 int flags;
177
178 Assert(mode == PG_MODE_ENABLE ||
179 mode == PG_MODE_CHECK);
180
181 flags = (mode == PG_MODE_ENABLE) ? O_RDWR : O_RDONLY;
182 f = open(fn, PG_BINARY | flags, 0);
183
184 if (f < 0)
185 {
186 pg_log_error("could not open file \"%s\": %m", fn);
187 exit(1);
188 }
189
190 files++;
191
192 for (blockno = 0;; blockno++)
193 {
194 uint16 csum;
195 int r = read(f, buf.data, BLCKSZ);
196
197 if (r == 0)
198 break;
199 if (r != BLCKSZ)
200 {
201 if (r < 0)
202 pg_log_error("could not read block %u in file \"%s\": %m",
203 blockno, fn);
204 else
205 pg_log_error("could not read block %u in file \"%s\": read %d of %d",
206 blockno, fn, r, BLCKSZ);
207 exit(1);
208 }
209 blocks++;
210
211 /* New pages have no checksum yet */
212 if (PageIsNew(header))
213 continue;
214
215 csum = pg_checksum_page(buf.data, blockno + segmentno * RELSEG_SIZE);
216 current_size += r;
217 if (mode == PG_MODE_CHECK)
218 {
219 if (csum != header->pd_checksum)
220 {
221 if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION)
222 pg_log_error("checksum verification failed in file \"%s\", block %u: calculated checksum %X but block contains %X",
223 fn, blockno, csum, header->pd_checksum);
224 badblocks++;
225 }
226 }
227 else if (mode == PG_MODE_ENABLE)
228 {
229 int w;
230
231 /* Set checksum in page header */
232 header->pd_checksum = csum;
233
234 /* Seek back to beginning of block */
235 if (lseek(f, -BLCKSZ, SEEK_CUR) < 0)
236 {
237 pg_log_error("seek failed for block %u in file \"%s\": %m", blockno, fn);
238 exit(1);
239 }
240
241 /* Write block with checksum */
242 w = write(f, buf.data, BLCKSZ);
243 if (w != BLCKSZ)
244 {
245 if (w < 0)
246 pg_log_error("could not write block %u in file \"%s\": %m",
247 blockno, fn);
248 else
249 pg_log_error("could not write block %u in file \"%s\": wrote %d of %d",
250 blockno, fn, w, BLCKSZ);
251 exit(1);
252 }
253 }
254
255 if (showprogress)
256 progress_report(false);
257 }
258
259 if (verbose)
260 {
261 if (mode == PG_MODE_CHECK)
262 pg_log_info("checksums verified in file \"%s\"", fn);
263 if (mode == PG_MODE_ENABLE)
264 pg_log_info("checksums enabled in file \"%s\"", fn);
265 }
266
267 close(f);
268}
269
270/*
271 * Scan the given directory for items which can be checksummed and
272 * operate on each one of them. If "sizeonly" is true, the size of
273 * all the items which have checksums is computed and returned back
274 * to the caller without operating on the files. This is used to compile
275 * the total size of the data directory for progress reports.
276 */
277static int64
278scan_directory(const char *basedir, const char *subdir, bool sizeonly)
279{
280 int64 dirsize = 0;
281 char path[MAXPGPATH];
282 DIR *dir;
283 struct dirent *de;
284
285 snprintf(path, sizeof(path), "%s/%s", basedir, subdir);
286 dir = opendir(path);
287 if (!dir)
288 {
289 pg_log_error("could not open directory \"%s\": %m", path);
290 exit(1);
291 }
292 while ((de = readdir(dir)) != NULL)
293 {
294 char fn[MAXPGPATH];
295 struct stat st;
296
297 if (strcmp(de->d_name, ".") == 0 ||
298 strcmp(de->d_name, "..") == 0)
299 continue;
300
301 /* Skip temporary files */
302 if (strncmp(de->d_name,
303 PG_TEMP_FILE_PREFIX,
304 strlen(PG_TEMP_FILE_PREFIX)) == 0)
305 continue;
306
307 /* Skip temporary folders */
308 if (strncmp(de->d_name,
309 PG_TEMP_FILES_DIR,
310 strlen(PG_TEMP_FILES_DIR)) == 0)
311 continue;
312
313 snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name);
314 if (lstat(fn, &st) < 0)
315 {
316 pg_log_error("could not stat file \"%s\": %m", fn);
317 exit(1);
318 }
319 if (S_ISREG(st.st_mode))
320 {
321 char fnonly[MAXPGPATH];
322 char *forkpath,
323 *segmentpath;
324 BlockNumber segmentno = 0;
325
326 if (skipfile(de->d_name))
327 continue;
328
329 /*
330 * Cut off at the segment boundary (".") to get the segment number
331 * in order to mix it into the checksum. Then also cut off at the
332 * fork boundary, to get the filenode the file belongs to for
333 * filtering.
334 */
335 strlcpy(fnonly, de->d_name, sizeof(fnonly));
336 segmentpath = strchr(fnonly, '.');
337 if (segmentpath != NULL)
338 {
339 *segmentpath++ = '\0';
340 segmentno = atoi(segmentpath);
341 if (segmentno == 0)
342 {
343 pg_log_error("invalid segment number %d in file name \"%s\"",
344 segmentno, fn);
345 exit(1);
346 }
347 }
348
349 forkpath = strchr(fnonly, '_');
350 if (forkpath != NULL)
351 *forkpath++ = '\0';
352
353 if (only_filenode && strcmp(only_filenode, fnonly) != 0)
354 /* filenode not to be included */
355 continue;
356
357 dirsize += st.st_size;
358
359 /*
360 * No need to work on the file when calculating only the size of
361 * the items in the data folder.
362 */
363 if (!sizeonly)
364 scan_file(fn, segmentno);
365 }
366#ifndef WIN32
367 else if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
368#else
369 else if (S_ISDIR(st.st_mode) || pgwin32_is_junction(fn))
370#endif
371 dirsize += scan_directory(path, de->d_name, sizeonly);
372 }
373 closedir(dir);
374 return dirsize;
375}
376
377int
378main(int argc, char *argv[])
379{
380 static struct option long_options[] = {
381 {"check", no_argument, NULL, 'c'},
382 {"pgdata", required_argument, NULL, 'D'},
383 {"disable", no_argument, NULL, 'd'},
384 {"enable", no_argument, NULL, 'e'},
385 {"filenode", required_argument, NULL, 'f'},
386 {"no-sync", no_argument, NULL, 'N'},
387 {"progress", no_argument, NULL, 'P'},
388 {"verbose", no_argument, NULL, 'v'},
389 {NULL, 0, NULL, 0}
390 };
391
392 char *DataDir = NULL;
393 int c;
394 int option_index;
395 bool crc_ok;
396
397 pg_logging_init(argv[0]);
398 set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_checksums"));
399 progname = get_progname(argv[0]);
400
401 if (argc > 1)
402 {
403 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
404 {
405 usage();
406 exit(0);
407 }
408 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
409 {
410 puts("pg_checksums (PostgreSQL) " PG_VERSION);
411 exit(0);
412 }
413 }
414
415 while ((c = getopt_long(argc, argv, "cD:deNPf:v", long_options, &option_index)) != -1)
416 {
417 switch (c)
418 {
419 case 'c':
420 mode = PG_MODE_CHECK;
421 break;
422 case 'd':
423 mode = PG_MODE_DISABLE;
424 break;
425 case 'e':
426 mode = PG_MODE_ENABLE;
427 break;
428 case 'f':
429 if (atoi(optarg) == 0)
430 {
431 pg_log_error("invalid filenode specification, must be numeric: %s", optarg);
432 exit(1);
433 }
434 only_filenode = pstrdup(optarg);
435 break;
436 case 'N':
437 do_sync = false;
438 break;
439 case 'v':
440 verbose = true;
441 break;
442 case 'D':
443 DataDir = optarg;
444 break;
445 case 'P':
446 showprogress = true;
447 break;
448 default:
449 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
450 exit(1);
451 }
452 }
453
454 if (DataDir == NULL)
455 {
456 if (optind < argc)
457 DataDir = argv[optind++];
458 else
459 DataDir = getenv("PGDATA");
460
461 /* If no DataDir was specified, and none could be found, error out */
462 if (DataDir == NULL)
463 {
464 pg_log_error("no data directory specified");
465 fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
466 exit(1);
467 }
468 }
469
470 /* Complain if any arguments remain */
471 if (optind < argc)
472 {
473 pg_log_error("too many command-line arguments (first is \"%s\")",
474 argv[optind]);
475 fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
476 progname);
477 exit(1);
478 }
479
480 /* filenode checking only works in --check mode */
481 if (mode != PG_MODE_CHECK && only_filenode)
482 {
483 pg_log_error("option -f/--filenode can only be used with --check");
484 fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
485 progname);
486 exit(1);
487 }
488
489 /* Read the control file and check compatibility */
490 ControlFile = get_controlfile(DataDir, &crc_ok);
491 if (!crc_ok)
492 {
493 pg_log_error("pg_control CRC value is incorrect");
494 exit(1);
495 }
496
497 if (ControlFile->pg_control_version != PG_CONTROL_VERSION)
498 {
499 pg_log_error("cluster is not compatible with this version of pg_checksums");
500 exit(1);
501 }
502
503 if (ControlFile->blcksz != BLCKSZ)
504 {
505 pg_log_error("database cluster is not compatible");
506 fprintf(stderr, _("The database cluster was initialized with block size %u, but pg_checksums was compiled with block size %u.\n"),
507 ControlFile->blcksz, BLCKSZ);
508 exit(1);
509 }
510
511 /*
512 * Check if cluster is running. A clean shutdown is required to avoid
513 * random checksum failures caused by torn pages. Note that this doesn't
514 * guard against someone starting the cluster concurrently.
515 */
516 if (ControlFile->state != DB_SHUTDOWNED &&
517 ControlFile->state != DB_SHUTDOWNED_IN_RECOVERY)
518 {
519 pg_log_error("cluster must be shut down");
520 exit(1);
521 }
522
523 if (ControlFile->data_checksum_version == 0 &&
524 mode == PG_MODE_CHECK)
525 {
526 pg_log_error("data checksums are not enabled in cluster");
527 exit(1);
528 }
529
530 if (ControlFile->data_checksum_version == 0 &&
531 mode == PG_MODE_DISABLE)
532 {
533 pg_log_error("data checksums are already disabled in cluster");
534 exit(1);
535 }
536
537 if (ControlFile->data_checksum_version > 0 &&
538 mode == PG_MODE_ENABLE)
539 {
540 pg_log_error("data checksums are already enabled in cluster");
541 exit(1);
542 }
543
544 /* Operate on all files if checking or enabling checksums */
545 if (mode == PG_MODE_CHECK || mode == PG_MODE_ENABLE)
546 {
547 /*
548 * If progress status information is requested, we need to scan the
549 * directory tree twice: once to know how much total data needs to be
550 * processed and once to do the real work.
551 */
552 if (showprogress)
553 {
554 total_size = scan_directory(DataDir, "global", true);
555 total_size += scan_directory(DataDir, "base", true);
556 total_size += scan_directory(DataDir, "pg_tblspc", true);
557 }
558
559 (void) scan_directory(DataDir, "global", false);
560 (void) scan_directory(DataDir, "base", false);
561 (void) scan_directory(DataDir, "pg_tblspc", false);
562
563 if (showprogress)
564 {
565 progress_report(true);
566 fprintf(stderr, "\n"); /* Need to move to next line */
567 }
568
569 printf(_("Checksum operation completed\n"));
570 printf(_("Files scanned: %s\n"), psprintf(INT64_FORMAT, files));
571 printf(_("Blocks scanned: %s\n"), psprintf(INT64_FORMAT, blocks));
572 if (mode == PG_MODE_CHECK)
573 {
574 printf(_("Bad checksums: %s\n"), psprintf(INT64_FORMAT, badblocks));
575 printf(_("Data checksum version: %d\n"), ControlFile->data_checksum_version);
576
577 if (badblocks > 0)
578 exit(1);
579 }
580 }
581
582 /*
583 * Finally make the data durable on disk if enabling or disabling
584 * checksums. Flush first the data directory for safety, and then update
585 * the control file to keep the switch consistent.
586 */
587 if (mode == PG_MODE_ENABLE || mode == PG_MODE_DISABLE)
588 {
589 ControlFile->data_checksum_version =
590 (mode == PG_MODE_ENABLE) ? PG_DATA_CHECKSUM_VERSION : 0;
591
592 if (do_sync)
593 {
594 pg_log_info("syncing data directory");
595 fsync_pgdata(DataDir, PG_VERSION_NUM);
596 }
597
598 pg_log_info("updating control file");
599 update_controlfile(DataDir, ControlFile, do_sync);
600
601 if (verbose)
602 printf(_("Data checksum version: %d\n"), ControlFile->data_checksum_version);
603 if (mode == PG_MODE_ENABLE)
604 printf(_("Checksums enabled in cluster\n"));
605 else
606 printf(_("Checksums disabled in cluster\n"));
607 }
608
609 return 0;
610}
611