| 1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
| 2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
| 3 | #ident "$Id$" |
| 4 | /*====== |
| 5 | This file is part of PerconaFT. |
| 6 | |
| 7 | |
| 8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
| 9 | |
| 10 | PerconaFT is free software: you can redistribute it and/or modify |
| 11 | it under the terms of the GNU General Public License, version 2, |
| 12 | as published by the Free Software Foundation. |
| 13 | |
| 14 | PerconaFT is distributed in the hope that it will be useful, |
| 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 17 | GNU General Public License for more details. |
| 18 | |
| 19 | You should have received a copy of the GNU General Public License |
| 20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 21 | |
| 22 | ---------------------------------------- |
| 23 | |
| 24 | PerconaFT is free software: you can redistribute it and/or modify |
| 25 | it under the terms of the GNU Affero General Public License, version 3, |
| 26 | as published by the Free Software Foundation. |
| 27 | |
| 28 | PerconaFT is distributed in the hope that it will be useful, |
| 29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 31 | GNU Affero General Public License for more details. |
| 32 | |
| 33 | You should have received a copy of the GNU Affero General Public License |
| 34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 35 | ======= */ |
| 36 | |
| 37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
| 38 | |
| 39 | //////////////////////////////////////////////////////////////////// |
| 40 | // ftverify - Command line tool that checks the validity of a given |
| 41 | // fractal tree file, one block at a time. |
| 42 | //////////////////////////////////////////////////////////////////// |
| 43 | |
| 44 | #include "portability/toku_assert.h" |
| 45 | #include "portability/toku_list.h" |
| 46 | #include "portability/toku_portability.h" |
| 47 | |
| 48 | #include "ft/serialize/block_allocator.h" |
| 49 | #include "ft/ft-internal.h" |
| 50 | #include "ft/serialize/ft-serialize.h" |
| 51 | #include "ft/serialize/ft_layout_version.h" |
| 52 | #include "ft/serialize/ft_node-serialize.h" |
| 53 | #include "ft/node.h" |
| 54 | #include "ft/serialize/rbuf.h" |
| 55 | #include "ft/serialize/sub_block.h" |
| 56 | #include "util/threadpool.h" |
| 57 | |
| 58 | #include <fcntl.h> |
| 59 | #include <math.h> |
| 60 | #include <stdio.h> |
| 61 | #include <stdlib.h> |
| 62 | #include <sys/stat.h> |
| 63 | #include <sys/types.h> |
| 64 | #include <sysexits.h> |
| 65 | #include <unistd.h> |
| 66 | |
| 67 | static int num_cores = 0; // cache the number of cores for the parallelization |
| 68 | static struct toku_thread_pool *ft_pool = NULL; |
| 69 | static FILE *outf; |
| 70 | static double pct = 0.5; |
| 71 | |
| 72 | // Struct for reporting sub block stats. |
| 73 | struct { |
| 74 | BLOCKNUM ; |
| 75 | int ; |
| 76 | uint32_t ; |
| 77 | uint32_t ; |
| 78 | uint32_t ; |
| 79 | bool ; |
| 80 | bool ; |
| 81 | struct sub_block_info *; |
| 82 | }; |
| 83 | |
| 84 | // Initialization function for the sub block stats. |
| 85 | static void |
| 86 | (BLOCKNUM b, struct verify_block_extra *e) |
| 87 | { |
| 88 | static const struct verify_block_extra default_vbe = |
| 89 | { |
| 90 | .b = { 0 }, |
| 91 | .n_sub_blocks = 0, |
| 92 | .header_length = 0, |
| 93 | .calc_xsum = 0, |
| 94 | .stored_xsum = 0, |
| 95 | .header_valid = true, |
| 96 | .sub_blocks_valid = true, |
| 97 | .sub_block_results = NULL |
| 98 | }; |
| 99 | *e = default_vbe; |
| 100 | e->b = b; |
| 101 | } |
| 102 | |
| 103 | // Reports percentage of completed blocks. |
| 104 | static void |
| 105 | report(int64_t blocks_done, int64_t blocks_failed, int64_t total_blocks) |
| 106 | { |
| 107 | int64_t blocks_per_report = llrint(pct * total_blocks / 100.0); |
| 108 | if (blocks_per_report < 1) { |
| 109 | blocks_per_report = 1; |
| 110 | } |
| 111 | if (blocks_done % blocks_per_report == 0) { |
| 112 | double pct_actually_done = (100.0 * blocks_done) / total_blocks; |
| 113 | printf("% 3.3lf%% | %" PRId64 " blocks checked, %" PRId64 " bad block(s) detected\n" , |
| 114 | pct_actually_done, blocks_done, blocks_failed); |
| 115 | fflush(stdout); |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | // Helper function to deserialize one of the two headers for the ft |
| 120 | // we are checking. |
| 121 | static void |
| 122 | (int fd, struct ft **h1p, struct ft **h2p) |
| 123 | { |
| 124 | struct rbuf rb_0; |
| 125 | struct rbuf rb_1; |
| 126 | uint64_t checkpoint_count_0; |
| 127 | uint64_t checkpoint_count_1; |
| 128 | LSN checkpoint_lsn_0; |
| 129 | LSN checkpoint_lsn_1; |
| 130 | uint32_t version_0, version_1; |
| 131 | bool h0_acceptable = false; |
| 132 | bool h1_acceptable = false; |
| 133 | int r0, r1; |
| 134 | int r; |
| 135 | |
| 136 | { |
| 137 | toku_off_t = 0; |
| 138 | r0 = deserialize_ft_from_fd_into_rbuf( |
| 139 | fd, |
| 140 | header_0_off, |
| 141 | &rb_0, |
| 142 | &checkpoint_count_0, |
| 143 | &checkpoint_lsn_0, |
| 144 | &version_0 |
| 145 | ); |
| 146 | if ((r0==0) && (checkpoint_lsn_0.lsn <= MAX_LSN.lsn)) { |
| 147 | h0_acceptable = true; |
| 148 | } |
| 149 | } |
| 150 | { |
| 151 | toku_off_t = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE; |
| 152 | r1 = deserialize_ft_from_fd_into_rbuf( |
| 153 | fd, |
| 154 | header_1_off, |
| 155 | &rb_1, |
| 156 | &checkpoint_count_1, |
| 157 | &checkpoint_lsn_1, |
| 158 | &version_1 |
| 159 | ); |
| 160 | if ((r1==0) && (checkpoint_lsn_1.lsn <= MAX_LSN.lsn)) { |
| 161 | h1_acceptable = true; |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | // If either header is too new, the dictionary is unreadable |
| 166 | if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) { |
| 167 | fprintf(stderr, "This dictionary was created with a version of PerconaFT that is too new. Aborting.\n" ); |
| 168 | abort(); |
| 169 | } |
| 170 | if (h0_acceptable) { |
| 171 | printf("Found dictionary header 1 with LSN %" PRIu64 "\n" , checkpoint_lsn_0.lsn); |
| 172 | r = deserialize_ft_versioned(fd, &rb_0, h1p, version_0); |
| 173 | |
| 174 | if (r != 0) { |
| 175 | printf("---Header Error----\n" ); |
| 176 | } |
| 177 | |
| 178 | } else { |
| 179 | *h1p = NULL; |
| 180 | } |
| 181 | if (h1_acceptable) { |
| 182 | printf("Found dictionary header 2 with LSN %" PRIu64 "\n" , checkpoint_lsn_1.lsn); |
| 183 | r = deserialize_ft_versioned(fd, &rb_1, h2p, version_1); |
| 184 | if (r != 0) { |
| 185 | printf("---Header Error----\n" ); |
| 186 | } |
| 187 | } else { |
| 188 | *h2p = NULL; |
| 189 | } |
| 190 | |
| 191 | if (rb_0.buf) toku_free(rb_0.buf); |
| 192 | if (rb_1.buf) toku_free(rb_1.buf); |
| 193 | } |
| 194 | |
| 195 | // Helper struct for tracking block checking progress. |
| 196 | struct { |
| 197 | int ; |
| 198 | int64_t , , ; |
| 199 | struct ft *; |
| 200 | }; |
| 201 | |
| 202 | // Check non-upgraded (legacy) node. |
| 203 | // NOTE: These nodes have less checksumming than more |
| 204 | // recent nodes. This effectively means that we are |
| 205 | // skipping over these nodes. |
| 206 | static int |
| 207 | check_old_node(FTNODE node, struct rbuf *rb, int version) |
| 208 | { |
| 209 | int r = 0; |
| 210 | read_legacy_node_info(node, rb, version); |
| 211 | // For version 14 nodes, advance the buffer to the end |
| 212 | // and verify the checksum. |
| 213 | if (version == FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM) { |
| 214 | // Advance the buffer to the end. |
| 215 | rb->ndone = rb->size - 4; |
| 216 | r = check_legacy_end_checksum(rb); |
| 217 | } |
| 218 | |
| 219 | return r; |
| 220 | } |
| 221 | |
| 222 | // Read, decompress, and check the given block. |
| 223 | static int |
| 224 | check_block(BLOCKNUM blocknum, int64_t UU(blocksize), int64_t UU(address), void *) |
| 225 | { |
| 226 | int r = 0; |
| 227 | int failure = 0; |
| 228 | struct check_block_table_extra *CAST_FROM_VOIDP(cbte, extra); |
| 229 | int fd = cbte->fd; |
| 230 | FT ft = cbte->h; |
| 231 | |
| 232 | struct verify_block_extra be; |
| 233 | init_verify_block_extra(blocknum, &be); |
| 234 | |
| 235 | // Let's read the block off of disk and fill a buffer with that |
| 236 | // block. |
| 237 | struct rbuf rb = RBUF_INITIALIZER; |
| 238 | read_block_from_fd_into_rbuf(fd, blocknum, ft, &rb); |
| 239 | |
| 240 | // Allocate the node. |
| 241 | FTNODE XMALLOC(node); |
| 242 | |
| 243 | initialize_ftnode(node, blocknum); |
| 244 | |
| 245 | r = read_and_check_magic(&rb); |
| 246 | if (r == DB_BADFORMAT) { |
| 247 | printf(" Magic failed.\n" ); |
| 248 | failure++; |
| 249 | } |
| 250 | |
| 251 | r = read_and_check_version(node, &rb); |
| 252 | if (r != 0) { |
| 253 | printf(" Version check failed.\n" ); |
| 254 | failure++; |
| 255 | } |
| 256 | |
| 257 | int version = node->layout_version_read_from_disk; |
| 258 | |
| 259 | //////////////////////////// |
| 260 | // UPGRADE FORK GOES HERE // |
| 261 | //////////////////////////// |
| 262 | |
| 263 | // Check nodes before major layout changes in version 15. |
| 264 | // All newer versions should follow the same layout, for now. |
| 265 | // This predicate would need to be changed if the layout |
| 266 | // of the nodes on disk does indeed change in the future. |
| 267 | if (version < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) |
| 268 | { |
| 269 | struct rbuf nrb; |
| 270 | // Use old decompression method for legacy nodes. |
| 271 | r = decompress_from_raw_block_into_rbuf(rb.buf, rb.size, &nrb, blocknum); |
| 272 | if (r != 0) { |
| 273 | failure++; |
| 274 | goto cleanup; |
| 275 | } |
| 276 | |
| 277 | // Check the end-to-end checksum. |
| 278 | r = check_old_node(node, &nrb, version); |
| 279 | if (r != 0) { |
| 280 | failure++; |
| 281 | } |
| 282 | goto cleanup; |
| 283 | } |
| 284 | |
| 285 | read_node_info(node, &rb, version); |
| 286 | |
| 287 | FTNODE_DISK_DATA ndd; |
| 288 | allocate_and_read_partition_offsets(node, &rb, &ndd); |
| 289 | |
| 290 | r = check_node_info_checksum(&rb); |
| 291 | if (r == TOKUDB_BAD_CHECKSUM) { |
| 292 | printf(" Node info checksum failed.\n" ); |
| 293 | failure++; |
| 294 | } |
| 295 | |
| 296 | // Get the partition info sub block. |
| 297 | struct sub_block sb; |
| 298 | sub_block_init(&sb); |
| 299 | r = read_compressed_sub_block(&rb, &sb); |
| 300 | if (r != 0) { |
| 301 | printf(" Partition info checksum failed.\n" ); |
| 302 | failure++; |
| 303 | } |
| 304 | |
| 305 | just_decompress_sub_block(&sb); |
| 306 | |
| 307 | // If we want to inspect the data inside the partitions, we need |
| 308 | // to call setup_ftnode_partitions(node, bfe, true) |
| 309 | |
| 310 | // <CER> TODO: Create function for this. |
| 311 | // Using the node info, decompress all the keys and pivots to |
| 312 | // detect any corruptions. |
| 313 | for (int i = 0; i < node->n_children; ++i) { |
| 314 | uint32_t curr_offset = BP_START(ndd,i); |
| 315 | uint32_t curr_size = BP_SIZE(ndd,i); |
| 316 | struct rbuf curr_rbuf = {.buf = NULL, .size = 0, .ndone = 0}; |
| 317 | rbuf_init(&curr_rbuf, rb.buf + curr_offset, curr_size); |
| 318 | struct sub_block curr_sb; |
| 319 | sub_block_init(&curr_sb); |
| 320 | |
| 321 | r = read_compressed_sub_block(&rb, &sb); |
| 322 | if (r != 0) { |
| 323 | printf(" Compressed child partition %d checksum failed.\n" , i); |
| 324 | failure++; |
| 325 | } |
| 326 | just_decompress_sub_block(&sb); |
| 327 | |
| 328 | r = verify_ftnode_sub_block(&sb, nullptr, blocknum); |
| 329 | if (r != 0) { |
| 330 | printf(" Uncompressed child partition %d checksum failed.\n" , i); |
| 331 | failure++; |
| 332 | } |
| 333 | |
| 334 | // <CER> If needed, we can print row and/or pivot info at this |
| 335 | // point. |
| 336 | } |
| 337 | |
| 338 | cleanup: |
| 339 | // Cleanup and error incrementing. |
| 340 | if (failure) { |
| 341 | cbte->blocks_failed++; |
| 342 | } |
| 343 | |
| 344 | cbte->blocks_done++; |
| 345 | |
| 346 | if (node) { |
| 347 | toku_free(node); |
| 348 | } |
| 349 | |
| 350 | // Print the status of this block to the console. |
| 351 | report(cbte->blocks_done, cbte->blocks_failed, cbte->total_blocks); |
| 352 | // We need to ALWAYS return 0 if we want to continue iterating |
| 353 | // through the nodes in the file. |
| 354 | r = 0; |
| 355 | return r; |
| 356 | } |
| 357 | |
| 358 | // This calls toku_blocktable_iterate on the given block table. |
| 359 | // Passes our check_block() function to be called as we iterate over |
| 360 | // the block table. This will print any interesting failures and |
| 361 | // update us on our progress. |
| 362 | static void check_block_table(int fd, block_table *bt, struct ft *h) { |
| 363 | int64_t num_blocks = bt->get_blocks_in_use_unlocked(); |
| 364 | printf("Starting verification of checkpoint containing" ); |
| 365 | printf(" %" PRId64 " blocks.\n" , num_blocks); |
| 366 | fflush(stdout); |
| 367 | |
| 368 | struct check_block_table_extra = { .fd = fd, |
| 369 | .blocks_done = 0, |
| 370 | .blocks_failed = 0, |
| 371 | .total_blocks = num_blocks, |
| 372 | .h = h }; |
| 373 | int r = bt->iterate(block_table::TRANSLATION_CURRENT, |
| 374 | check_block, |
| 375 | &extra, |
| 376 | true, |
| 377 | true); |
| 378 | if (r != 0) { |
| 379 | // We can print more information here if necessary. |
| 380 | } |
| 381 | |
| 382 | assert(extra.blocks_done == extra.total_blocks); |
| 383 | printf("Finished verification. " ); |
| 384 | printf(" %" PRId64 " blocks checked," , extra.blocks_done); |
| 385 | printf(" %" PRId64 " bad block(s) detected\n" , extra.blocks_failed); |
| 386 | fflush(stdout); |
| 387 | } |
| 388 | |
| 389 | int |
| 390 | main(int argc, char const * const argv[]) |
| 391 | { |
| 392 | // open the file |
| 393 | int r = 0; |
| 394 | int dictfd; |
| 395 | const char *dictfname, *outfname; |
| 396 | if (argc < 3 || argc > 4) { |
| 397 | fprintf(stderr, "%s: Invalid arguments.\n" , argv[0]); |
| 398 | fprintf(stderr, "Usage: %s <dictionary> <logfile> [report%%]\n" , argv[0]); |
| 399 | r = EX_USAGE; |
| 400 | goto exit; |
| 401 | } |
| 402 | |
| 403 | assert(argc == 3 || argc == 4); |
| 404 | dictfname = argv[1]; |
| 405 | outfname = argv[2]; |
| 406 | if (argc == 4) { |
| 407 | set_errno(0); |
| 408 | pct = strtod(argv[3], NULL); |
| 409 | assert_zero(get_maybe_error_errno()); |
| 410 | assert(pct > 0.0 && pct <= 100.0); |
| 411 | } |
| 412 | |
| 413 | // Open the file as read-only. |
| 414 | dictfd = open(dictfname, O_RDONLY | O_BINARY, S_IRWXU | S_IRWXG | S_IRWXO); |
| 415 | if (dictfd < 0) { |
| 416 | perror(dictfname); |
| 417 | fflush(stderr); |
| 418 | abort(); |
| 419 | } |
| 420 | outf = fopen(outfname, "w" ); |
| 421 | if (!outf) { |
| 422 | perror(outfname); |
| 423 | fflush(stderr); |
| 424 | abort(); |
| 425 | } |
| 426 | |
| 427 | // body of toku_ft_serialize_init(); |
| 428 | num_cores = toku_os_get_number_active_processors(); |
| 429 | r = toku_thread_pool_create(&ft_pool, num_cores); lazy_assert_zero(r); |
| 430 | assert_zero(r); |
| 431 | |
| 432 | // deserialize the header(s) |
| 433 | struct ft *h1, *h2; |
| 434 | deserialize_headers(dictfd, &h1, &h2); |
| 435 | |
| 436 | // walk over the block table and check blocks |
| 437 | if (h1) { |
| 438 | printf("Checking dictionary from header 1.\n" ); |
| 439 | check_block_table(dictfd, &h1->blocktable, h1); |
| 440 | } |
| 441 | if (h2) { |
| 442 | printf("Checking dictionary from header 2.\n" ); |
| 443 | check_block_table(dictfd, &h2->blocktable, h2); |
| 444 | } |
| 445 | if (h1 == NULL && h2 == NULL) { |
| 446 | printf("Both headers have a corruption and could not be used.\n" ); |
| 447 | } |
| 448 | |
| 449 | toku_thread_pool_destroy(&ft_pool); |
| 450 | exit: |
| 451 | return r; |
| 452 | } |
| 453 | |