1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * pg_checksums.c |
4 | * Checks, enables or disables page level checksums for an offline |
5 | * cluster |
6 | * |
7 | * Copyright (c) 2010-2019, PostgreSQL Global Development Group |
8 | * |
9 | * IDENTIFICATION |
10 | * src/bin/pg_checksums/pg_checksums.c |
11 | * |
12 | *------------------------------------------------------------------------- |
13 | */ |
14 | |
15 | #include "postgres_fe.h" |
16 | |
17 | #include <dirent.h> |
18 | #include <time.h> |
19 | #include <sys/stat.h> |
20 | #include <unistd.h> |
21 | |
22 | #include "access/xlog_internal.h" |
23 | #include "common/controldata_utils.h" |
24 | #include "common/file_perm.h" |
25 | #include "common/file_utils.h" |
26 | #include "common/logging.h" |
27 | #include "getopt_long.h" |
28 | #include "pg_getopt.h" |
29 | #include "storage/bufpage.h" |
30 | #include "storage/checksum.h" |
31 | #include "storage/checksum_impl.h" |
32 | |
33 | |
34 | static int64 files = 0; |
35 | static int64 blocks = 0; |
36 | static int64 badblocks = 0; |
37 | static ControlFileData *ControlFile; |
38 | |
39 | static char *only_filenode = NULL; |
40 | static bool do_sync = true; |
41 | static bool verbose = false; |
42 | static bool showprogress = false; |
43 | |
44 | typedef enum |
45 | { |
46 | PG_MODE_CHECK, |
47 | PG_MODE_DISABLE, |
48 | PG_MODE_ENABLE |
49 | } PgChecksumMode; |
50 | |
51 | /* |
52 | * Filename components. |
53 | * |
54 | * XXX: fd.h is not declared here as frontend side code is not able to |
55 | * interact with the backend-side definitions for the various fsync |
56 | * wrappers. |
57 | */ |
58 | #define PG_TEMP_FILES_DIR "pgsql_tmp" |
59 | #define PG_TEMP_FILE_PREFIX "pgsql_tmp" |
60 | |
61 | static PgChecksumMode mode = PG_MODE_CHECK; |
62 | |
63 | static const char *progname; |
64 | |
65 | /* |
66 | * Progress status information. |
67 | */ |
68 | int64 total_size = 0; |
69 | int64 current_size = 0; |
70 | static pg_time_t last_progress_report = 0; |
71 | |
72 | static void |
73 | usage(void) |
74 | { |
75 | printf(_("%s enables, disables, or verifies data checksums in a PostgreSQL database cluster.\n\n" ), progname); |
76 | printf(_("Usage:\n" )); |
77 | printf(_(" %s [OPTION]... [DATADIR]\n" ), progname); |
78 | printf(_("\nOptions:\n" )); |
79 | printf(_(" [-D, --pgdata=]DATADIR data directory\n" )); |
80 | printf(_(" -c, --check check data checksums (default)\n" )); |
81 | printf(_(" -d, --disable disable data checksums\n" )); |
82 | printf(_(" -e, --enable enable data checksums\n" )); |
83 | printf(_(" -f, --filenode=FILENODE check only relation with specified filenode\n" )); |
84 | printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n" )); |
85 | printf(_(" -P, --progress show progress information\n" )); |
86 | printf(_(" -v, --verbose output verbose messages\n" )); |
87 | printf(_(" -V, --version output version information, then exit\n" )); |
88 | printf(_(" -?, --help show this help, then exit\n" )); |
89 | printf(_("\nIf no data directory (DATADIR) is specified, " |
90 | "the environment variable PGDATA\nis used.\n\n" )); |
91 | printf(_("Report bugs to <pgsql-bugs@lists.postgresql.org>.\n" )); |
92 | } |
93 | |
94 | /* |
95 | * List of files excluded from checksum validation. |
96 | * |
97 | * Note: this list should be kept in sync with what basebackup.c includes. |
98 | */ |
99 | static const char *const skip[] = { |
100 | "pg_control" , |
101 | "pg_filenode.map" , |
102 | "pg_internal.init" , |
103 | "PG_VERSION" , |
104 | #ifdef EXEC_BACKEND |
105 | "config_exec_params" , |
106 | "config_exec_params.new" , |
107 | #endif |
108 | NULL, |
109 | }; |
110 | |
111 | /* |
112 | * Report current progress status. Parts borrowed from |
113 | * src/bin/pg_basebackup/pg_basebackup.c. |
114 | */ |
115 | static void |
116 | progress_report(bool force) |
117 | { |
118 | int percent; |
119 | char total_size_str[32]; |
120 | char current_size_str[32]; |
121 | pg_time_t now; |
122 | |
123 | Assert(showprogress); |
124 | |
125 | now = time(NULL); |
126 | if (now == last_progress_report && !force) |
127 | return; /* Max once per second */ |
128 | |
129 | /* Save current time */ |
130 | last_progress_report = now; |
131 | |
132 | /* Adjust total size if current_size is larger */ |
133 | if (current_size > total_size) |
134 | total_size = current_size; |
135 | |
136 | /* Calculate current percentage of size done */ |
137 | percent = total_size ? (int) ((current_size) * 100 / total_size) : 0; |
138 | |
139 | /* |
140 | * Separate step to keep platform-dependent format code out of |
141 | * translatable strings. And we only test for INT64_FORMAT availability |
142 | * in snprintf, not fprintf. |
143 | */ |
144 | snprintf(total_size_str, sizeof(total_size_str), INT64_FORMAT, |
145 | total_size / (1024 * 1024)); |
146 | snprintf(current_size_str, sizeof(current_size_str), INT64_FORMAT, |
147 | current_size / (1024 * 1024)); |
148 | |
149 | fprintf(stderr, _("%*s/%s MB (%d%%) computed" ), |
150 | (int) strlen(current_size_str), current_size_str, total_size_str, |
151 | percent); |
152 | |
153 | /* Stay on the same line if reporting to a terminal */ |
154 | fprintf(stderr, isatty(fileno(stderr)) ? "\r" : "\n" ); |
155 | } |
156 | |
157 | static bool |
158 | skipfile(const char *fn) |
159 | { |
160 | const char *const *f; |
161 | |
162 | for (f = skip; *f; f++) |
163 | if (strcmp(*f, fn) == 0) |
164 | return true; |
165 | |
166 | return false; |
167 | } |
168 | |
169 | static void |
170 | scan_file(const char *fn, BlockNumber segmentno) |
171 | { |
172 | PGAlignedBlock buf; |
173 | PageHeader = (PageHeader) buf.data; |
174 | int f; |
175 | BlockNumber blockno; |
176 | int flags; |
177 | |
178 | Assert(mode == PG_MODE_ENABLE || |
179 | mode == PG_MODE_CHECK); |
180 | |
181 | flags = (mode == PG_MODE_ENABLE) ? O_RDWR : O_RDONLY; |
182 | f = open(fn, PG_BINARY | flags, 0); |
183 | |
184 | if (f < 0) |
185 | { |
186 | pg_log_error("could not open file \"%s\": %m" , fn); |
187 | exit(1); |
188 | } |
189 | |
190 | files++; |
191 | |
192 | for (blockno = 0;; blockno++) |
193 | { |
194 | uint16 csum; |
195 | int r = read(f, buf.data, BLCKSZ); |
196 | |
197 | if (r == 0) |
198 | break; |
199 | if (r != BLCKSZ) |
200 | { |
201 | if (r < 0) |
202 | pg_log_error("could not read block %u in file \"%s\": %m" , |
203 | blockno, fn); |
204 | else |
205 | pg_log_error("could not read block %u in file \"%s\": read %d of %d" , |
206 | blockno, fn, r, BLCKSZ); |
207 | exit(1); |
208 | } |
209 | blocks++; |
210 | |
211 | /* New pages have no checksum yet */ |
212 | if (PageIsNew(header)) |
213 | continue; |
214 | |
215 | csum = pg_checksum_page(buf.data, blockno + segmentno * RELSEG_SIZE); |
216 | current_size += r; |
217 | if (mode == PG_MODE_CHECK) |
218 | { |
219 | if (csum != header->pd_checksum) |
220 | { |
221 | if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION) |
222 | pg_log_error("checksum verification failed in file \"%s\", block %u: calculated checksum %X but block contains %X" , |
223 | fn, blockno, csum, header->pd_checksum); |
224 | badblocks++; |
225 | } |
226 | } |
227 | else if (mode == PG_MODE_ENABLE) |
228 | { |
229 | int w; |
230 | |
231 | /* Set checksum in page header */ |
232 | header->pd_checksum = csum; |
233 | |
234 | /* Seek back to beginning of block */ |
235 | if (lseek(f, -BLCKSZ, SEEK_CUR) < 0) |
236 | { |
237 | pg_log_error("seek failed for block %u in file \"%s\": %m" , blockno, fn); |
238 | exit(1); |
239 | } |
240 | |
241 | /* Write block with checksum */ |
242 | w = write(f, buf.data, BLCKSZ); |
243 | if (w != BLCKSZ) |
244 | { |
245 | if (w < 0) |
246 | pg_log_error("could not write block %u in file \"%s\": %m" , |
247 | blockno, fn); |
248 | else |
249 | pg_log_error("could not write block %u in file \"%s\": wrote %d of %d" , |
250 | blockno, fn, w, BLCKSZ); |
251 | exit(1); |
252 | } |
253 | } |
254 | |
255 | if (showprogress) |
256 | progress_report(false); |
257 | } |
258 | |
259 | if (verbose) |
260 | { |
261 | if (mode == PG_MODE_CHECK) |
262 | pg_log_info("checksums verified in file \"%s\"" , fn); |
263 | if (mode == PG_MODE_ENABLE) |
264 | pg_log_info("checksums enabled in file \"%s\"" , fn); |
265 | } |
266 | |
267 | close(f); |
268 | } |
269 | |
270 | /* |
271 | * Scan the given directory for items which can be checksummed and |
272 | * operate on each one of them. If "sizeonly" is true, the size of |
273 | * all the items which have checksums is computed and returned back |
274 | * to the caller without operating on the files. This is used to compile |
275 | * the total size of the data directory for progress reports. |
276 | */ |
277 | static int64 |
278 | scan_directory(const char *basedir, const char *subdir, bool sizeonly) |
279 | { |
280 | int64 dirsize = 0; |
281 | char path[MAXPGPATH]; |
282 | DIR *dir; |
283 | struct dirent *de; |
284 | |
285 | snprintf(path, sizeof(path), "%s/%s" , basedir, subdir); |
286 | dir = opendir(path); |
287 | if (!dir) |
288 | { |
289 | pg_log_error("could not open directory \"%s\": %m" , path); |
290 | exit(1); |
291 | } |
292 | while ((de = readdir(dir)) != NULL) |
293 | { |
294 | char fn[MAXPGPATH]; |
295 | struct stat st; |
296 | |
297 | if (strcmp(de->d_name, "." ) == 0 || |
298 | strcmp(de->d_name, ".." ) == 0) |
299 | continue; |
300 | |
301 | /* Skip temporary files */ |
302 | if (strncmp(de->d_name, |
303 | PG_TEMP_FILE_PREFIX, |
304 | strlen(PG_TEMP_FILE_PREFIX)) == 0) |
305 | continue; |
306 | |
307 | /* Skip temporary folders */ |
308 | if (strncmp(de->d_name, |
309 | PG_TEMP_FILES_DIR, |
310 | strlen(PG_TEMP_FILES_DIR)) == 0) |
311 | continue; |
312 | |
313 | snprintf(fn, sizeof(fn), "%s/%s" , path, de->d_name); |
314 | if (lstat(fn, &st) < 0) |
315 | { |
316 | pg_log_error("could not stat file \"%s\": %m" , fn); |
317 | exit(1); |
318 | } |
319 | if (S_ISREG(st.st_mode)) |
320 | { |
321 | char fnonly[MAXPGPATH]; |
322 | char *forkpath, |
323 | *segmentpath; |
324 | BlockNumber segmentno = 0; |
325 | |
326 | if (skipfile(de->d_name)) |
327 | continue; |
328 | |
329 | /* |
330 | * Cut off at the segment boundary (".") to get the segment number |
331 | * in order to mix it into the checksum. Then also cut off at the |
332 | * fork boundary, to get the filenode the file belongs to for |
333 | * filtering. |
334 | */ |
335 | strlcpy(fnonly, de->d_name, sizeof(fnonly)); |
336 | segmentpath = strchr(fnonly, '.'); |
337 | if (segmentpath != NULL) |
338 | { |
339 | *segmentpath++ = '\0'; |
340 | segmentno = atoi(segmentpath); |
341 | if (segmentno == 0) |
342 | { |
343 | pg_log_error("invalid segment number %d in file name \"%s\"" , |
344 | segmentno, fn); |
345 | exit(1); |
346 | } |
347 | } |
348 | |
349 | forkpath = strchr(fnonly, '_'); |
350 | if (forkpath != NULL) |
351 | *forkpath++ = '\0'; |
352 | |
353 | if (only_filenode && strcmp(only_filenode, fnonly) != 0) |
354 | /* filenode not to be included */ |
355 | continue; |
356 | |
357 | dirsize += st.st_size; |
358 | |
359 | /* |
360 | * No need to work on the file when calculating only the size of |
361 | * the items in the data folder. |
362 | */ |
363 | if (!sizeonly) |
364 | scan_file(fn, segmentno); |
365 | } |
366 | #ifndef WIN32 |
367 | else if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode)) |
368 | #else |
369 | else if (S_ISDIR(st.st_mode) || pgwin32_is_junction(fn)) |
370 | #endif |
371 | dirsize += scan_directory(path, de->d_name, sizeonly); |
372 | } |
373 | closedir(dir); |
374 | return dirsize; |
375 | } |
376 | |
377 | int |
378 | main(int argc, char *argv[]) |
379 | { |
380 | static struct option long_options[] = { |
381 | {"check" , no_argument, NULL, 'c'}, |
382 | {"pgdata" , required_argument, NULL, 'D'}, |
383 | {"disable" , no_argument, NULL, 'd'}, |
384 | {"enable" , no_argument, NULL, 'e'}, |
385 | {"filenode" , required_argument, NULL, 'f'}, |
386 | {"no-sync" , no_argument, NULL, 'N'}, |
387 | {"progress" , no_argument, NULL, 'P'}, |
388 | {"verbose" , no_argument, NULL, 'v'}, |
389 | {NULL, 0, NULL, 0} |
390 | }; |
391 | |
392 | char *DataDir = NULL; |
393 | int c; |
394 | int option_index; |
395 | bool crc_ok; |
396 | |
397 | pg_logging_init(argv[0]); |
398 | set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_checksums" )); |
399 | progname = get_progname(argv[0]); |
400 | |
401 | if (argc > 1) |
402 | { |
403 | if (strcmp(argv[1], "--help" ) == 0 || strcmp(argv[1], "-?" ) == 0) |
404 | { |
405 | usage(); |
406 | exit(0); |
407 | } |
408 | if (strcmp(argv[1], "--version" ) == 0 || strcmp(argv[1], "-V" ) == 0) |
409 | { |
410 | puts("pg_checksums (PostgreSQL) " PG_VERSION); |
411 | exit(0); |
412 | } |
413 | } |
414 | |
415 | while ((c = getopt_long(argc, argv, "cD:deNPf:v" , long_options, &option_index)) != -1) |
416 | { |
417 | switch (c) |
418 | { |
419 | case 'c': |
420 | mode = PG_MODE_CHECK; |
421 | break; |
422 | case 'd': |
423 | mode = PG_MODE_DISABLE; |
424 | break; |
425 | case 'e': |
426 | mode = PG_MODE_ENABLE; |
427 | break; |
428 | case 'f': |
429 | if (atoi(optarg) == 0) |
430 | { |
431 | pg_log_error("invalid filenode specification, must be numeric: %s" , optarg); |
432 | exit(1); |
433 | } |
434 | only_filenode = pstrdup(optarg); |
435 | break; |
436 | case 'N': |
437 | do_sync = false; |
438 | break; |
439 | case 'v': |
440 | verbose = true; |
441 | break; |
442 | case 'D': |
443 | DataDir = optarg; |
444 | break; |
445 | case 'P': |
446 | showprogress = true; |
447 | break; |
448 | default: |
449 | fprintf(stderr, _("Try \"%s --help\" for more information.\n" ), progname); |
450 | exit(1); |
451 | } |
452 | } |
453 | |
454 | if (DataDir == NULL) |
455 | { |
456 | if (optind < argc) |
457 | DataDir = argv[optind++]; |
458 | else |
459 | DataDir = getenv("PGDATA" ); |
460 | |
461 | /* If no DataDir was specified, and none could be found, error out */ |
462 | if (DataDir == NULL) |
463 | { |
464 | pg_log_error("no data directory specified" ); |
465 | fprintf(stderr, _("Try \"%s --help\" for more information.\n" ), progname); |
466 | exit(1); |
467 | } |
468 | } |
469 | |
470 | /* Complain if any arguments remain */ |
471 | if (optind < argc) |
472 | { |
473 | pg_log_error("too many command-line arguments (first is \"%s\")" , |
474 | argv[optind]); |
475 | fprintf(stderr, _("Try \"%s --help\" for more information.\n" ), |
476 | progname); |
477 | exit(1); |
478 | } |
479 | |
480 | /* filenode checking only works in --check mode */ |
481 | if (mode != PG_MODE_CHECK && only_filenode) |
482 | { |
483 | pg_log_error("option -f/--filenode can only be used with --check" ); |
484 | fprintf(stderr, _("Try \"%s --help\" for more information.\n" ), |
485 | progname); |
486 | exit(1); |
487 | } |
488 | |
489 | /* Read the control file and check compatibility */ |
490 | ControlFile = get_controlfile(DataDir, &crc_ok); |
491 | if (!crc_ok) |
492 | { |
493 | pg_log_error("pg_control CRC value is incorrect" ); |
494 | exit(1); |
495 | } |
496 | |
497 | if (ControlFile->pg_control_version != PG_CONTROL_VERSION) |
498 | { |
499 | pg_log_error("cluster is not compatible with this version of pg_checksums" ); |
500 | exit(1); |
501 | } |
502 | |
503 | if (ControlFile->blcksz != BLCKSZ) |
504 | { |
505 | pg_log_error("database cluster is not compatible" ); |
506 | fprintf(stderr, _("The database cluster was initialized with block size %u, but pg_checksums was compiled with block size %u.\n" ), |
507 | ControlFile->blcksz, BLCKSZ); |
508 | exit(1); |
509 | } |
510 | |
511 | /* |
512 | * Check if cluster is running. A clean shutdown is required to avoid |
513 | * random checksum failures caused by torn pages. Note that this doesn't |
514 | * guard against someone starting the cluster concurrently. |
515 | */ |
516 | if (ControlFile->state != DB_SHUTDOWNED && |
517 | ControlFile->state != DB_SHUTDOWNED_IN_RECOVERY) |
518 | { |
519 | pg_log_error("cluster must be shut down" ); |
520 | exit(1); |
521 | } |
522 | |
523 | if (ControlFile->data_checksum_version == 0 && |
524 | mode == PG_MODE_CHECK) |
525 | { |
526 | pg_log_error("data checksums are not enabled in cluster" ); |
527 | exit(1); |
528 | } |
529 | |
530 | if (ControlFile->data_checksum_version == 0 && |
531 | mode == PG_MODE_DISABLE) |
532 | { |
533 | pg_log_error("data checksums are already disabled in cluster" ); |
534 | exit(1); |
535 | } |
536 | |
537 | if (ControlFile->data_checksum_version > 0 && |
538 | mode == PG_MODE_ENABLE) |
539 | { |
540 | pg_log_error("data checksums are already enabled in cluster" ); |
541 | exit(1); |
542 | } |
543 | |
544 | /* Operate on all files if checking or enabling checksums */ |
545 | if (mode == PG_MODE_CHECK || mode == PG_MODE_ENABLE) |
546 | { |
547 | /* |
548 | * If progress status information is requested, we need to scan the |
549 | * directory tree twice: once to know how much total data needs to be |
550 | * processed and once to do the real work. |
551 | */ |
552 | if (showprogress) |
553 | { |
554 | total_size = scan_directory(DataDir, "global" , true); |
555 | total_size += scan_directory(DataDir, "base" , true); |
556 | total_size += scan_directory(DataDir, "pg_tblspc" , true); |
557 | } |
558 | |
559 | (void) scan_directory(DataDir, "global" , false); |
560 | (void) scan_directory(DataDir, "base" , false); |
561 | (void) scan_directory(DataDir, "pg_tblspc" , false); |
562 | |
563 | if (showprogress) |
564 | { |
565 | progress_report(true); |
566 | fprintf(stderr, "\n" ); /* Need to move to next line */ |
567 | } |
568 | |
569 | printf(_("Checksum operation completed\n" )); |
570 | printf(_("Files scanned: %s\n" ), psprintf(INT64_FORMAT, files)); |
571 | printf(_("Blocks scanned: %s\n" ), psprintf(INT64_FORMAT, blocks)); |
572 | if (mode == PG_MODE_CHECK) |
573 | { |
574 | printf(_("Bad checksums: %s\n" ), psprintf(INT64_FORMAT, badblocks)); |
575 | printf(_("Data checksum version: %d\n" ), ControlFile->data_checksum_version); |
576 | |
577 | if (badblocks > 0) |
578 | exit(1); |
579 | } |
580 | } |
581 | |
582 | /* |
583 | * Finally make the data durable on disk if enabling or disabling |
584 | * checksums. Flush first the data directory for safety, and then update |
585 | * the control file to keep the switch consistent. |
586 | */ |
587 | if (mode == PG_MODE_ENABLE || mode == PG_MODE_DISABLE) |
588 | { |
589 | ControlFile->data_checksum_version = |
590 | (mode == PG_MODE_ENABLE) ? PG_DATA_CHECKSUM_VERSION : 0; |
591 | |
592 | if (do_sync) |
593 | { |
594 | pg_log_info("syncing data directory" ); |
595 | fsync_pgdata(DataDir, PG_VERSION_NUM); |
596 | } |
597 | |
598 | pg_log_info("updating control file" ); |
599 | update_controlfile(DataDir, ControlFile, do_sync); |
600 | |
601 | if (verbose) |
602 | printf(_("Data checksum version: %d\n" ), ControlFile->data_checksum_version); |
603 | if (mode == PG_MODE_ENABLE) |
604 | printf(_("Checksums enabled in cluster\n" )); |
605 | else |
606 | printf(_("Checksums disabled in cluster\n" )); |
607 | } |
608 | |
609 | return 0; |
610 | } |
611 | |