1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
3 | #ident "$Id$" |
4 | /*====== |
5 | This file is part of PerconaFT. |
6 | |
7 | |
8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
9 | |
10 | PerconaFT is free software: you can redistribute it and/or modify |
11 | it under the terms of the GNU General Public License, version 2, |
12 | as published by the Free Software Foundation. |
13 | |
14 | PerconaFT is distributed in the hope that it will be useful, |
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | GNU General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU General Public License |
20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
21 | |
22 | ---------------------------------------- |
23 | |
24 | PerconaFT is free software: you can redistribute it and/or modify |
25 | it under the terms of the GNU Affero General Public License, version 3, |
26 | as published by the Free Software Foundation. |
27 | |
28 | PerconaFT is distributed in the hope that it will be useful, |
29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
31 | GNU Affero General Public License for more details. |
32 | |
33 | You should have received a copy of the GNU Affero General Public License |
34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
35 | ======= */ |
36 | |
37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
38 | |
39 | //////////////////////////////////////////////////////////////////// |
40 | // ftverify - Command line tool that checks the validity of a given |
41 | // fractal tree file, one block at a time. |
42 | //////////////////////////////////////////////////////////////////// |
43 | |
44 | #include "portability/toku_assert.h" |
45 | #include "portability/toku_list.h" |
46 | #include "portability/toku_portability.h" |
47 | |
48 | #include "ft/serialize/block_allocator.h" |
49 | #include "ft/ft-internal.h" |
50 | #include "ft/serialize/ft-serialize.h" |
51 | #include "ft/serialize/ft_layout_version.h" |
52 | #include "ft/serialize/ft_node-serialize.h" |
53 | #include "ft/node.h" |
54 | #include "ft/serialize/rbuf.h" |
55 | #include "ft/serialize/sub_block.h" |
56 | #include "util/threadpool.h" |
57 | |
58 | #include <fcntl.h> |
59 | #include <math.h> |
60 | #include <stdio.h> |
61 | #include <stdlib.h> |
62 | #include <sys/stat.h> |
63 | #include <sys/types.h> |
64 | #include <sysexits.h> |
65 | #include <unistd.h> |
66 | |
67 | static int num_cores = 0; // cache the number of cores for the parallelization |
68 | static struct toku_thread_pool *ft_pool = NULL; |
69 | static FILE *outf; |
70 | static double pct = 0.5; |
71 | |
72 | // Struct for reporting sub block stats. |
73 | struct { |
74 | BLOCKNUM ; |
75 | int ; |
76 | uint32_t ; |
77 | uint32_t ; |
78 | uint32_t ; |
79 | bool ; |
80 | bool ; |
81 | struct sub_block_info *; |
82 | }; |
83 | |
84 | // Initialization function for the sub block stats. |
85 | static void |
86 | (BLOCKNUM b, struct verify_block_extra *e) |
87 | { |
88 | static const struct verify_block_extra default_vbe = |
89 | { |
90 | .b = { 0 }, |
91 | .n_sub_blocks = 0, |
92 | .header_length = 0, |
93 | .calc_xsum = 0, |
94 | .stored_xsum = 0, |
95 | .header_valid = true, |
96 | .sub_blocks_valid = true, |
97 | .sub_block_results = NULL |
98 | }; |
99 | *e = default_vbe; |
100 | e->b = b; |
101 | } |
102 | |
103 | // Reports percentage of completed blocks. |
104 | static void |
105 | report(int64_t blocks_done, int64_t blocks_failed, int64_t total_blocks) |
106 | { |
107 | int64_t blocks_per_report = llrint(pct * total_blocks / 100.0); |
108 | if (blocks_per_report < 1) { |
109 | blocks_per_report = 1; |
110 | } |
111 | if (blocks_done % blocks_per_report == 0) { |
112 | double pct_actually_done = (100.0 * blocks_done) / total_blocks; |
113 | printf("% 3.3lf%% | %" PRId64 " blocks checked, %" PRId64 " bad block(s) detected\n" , |
114 | pct_actually_done, blocks_done, blocks_failed); |
115 | fflush(stdout); |
116 | } |
117 | } |
118 | |
119 | // Helper function to deserialize one of the two headers for the ft |
120 | // we are checking. |
121 | static void |
122 | (int fd, struct ft **h1p, struct ft **h2p) |
123 | { |
124 | struct rbuf rb_0; |
125 | struct rbuf rb_1; |
126 | uint64_t checkpoint_count_0; |
127 | uint64_t checkpoint_count_1; |
128 | LSN checkpoint_lsn_0; |
129 | LSN checkpoint_lsn_1; |
130 | uint32_t version_0, version_1; |
131 | bool h0_acceptable = false; |
132 | bool h1_acceptable = false; |
133 | int r0, r1; |
134 | int r; |
135 | |
136 | { |
137 | toku_off_t = 0; |
138 | r0 = deserialize_ft_from_fd_into_rbuf( |
139 | fd, |
140 | header_0_off, |
141 | &rb_0, |
142 | &checkpoint_count_0, |
143 | &checkpoint_lsn_0, |
144 | &version_0 |
145 | ); |
146 | if ((r0==0) && (checkpoint_lsn_0.lsn <= MAX_LSN.lsn)) { |
147 | h0_acceptable = true; |
148 | } |
149 | } |
150 | { |
151 | toku_off_t = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE; |
152 | r1 = deserialize_ft_from_fd_into_rbuf( |
153 | fd, |
154 | header_1_off, |
155 | &rb_1, |
156 | &checkpoint_count_1, |
157 | &checkpoint_lsn_1, |
158 | &version_1 |
159 | ); |
160 | if ((r1==0) && (checkpoint_lsn_1.lsn <= MAX_LSN.lsn)) { |
161 | h1_acceptable = true; |
162 | } |
163 | } |
164 | |
165 | // If either header is too new, the dictionary is unreadable |
166 | if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) { |
167 | fprintf(stderr, "This dictionary was created with a version of PerconaFT that is too new. Aborting.\n" ); |
168 | abort(); |
169 | } |
170 | if (h0_acceptable) { |
171 | printf("Found dictionary header 1 with LSN %" PRIu64 "\n" , checkpoint_lsn_0.lsn); |
172 | r = deserialize_ft_versioned(fd, &rb_0, h1p, version_0); |
173 | |
174 | if (r != 0) { |
175 | printf("---Header Error----\n" ); |
176 | } |
177 | |
178 | } else { |
179 | *h1p = NULL; |
180 | } |
181 | if (h1_acceptable) { |
182 | printf("Found dictionary header 2 with LSN %" PRIu64 "\n" , checkpoint_lsn_1.lsn); |
183 | r = deserialize_ft_versioned(fd, &rb_1, h2p, version_1); |
184 | if (r != 0) { |
185 | printf("---Header Error----\n" ); |
186 | } |
187 | } else { |
188 | *h2p = NULL; |
189 | } |
190 | |
191 | if (rb_0.buf) toku_free(rb_0.buf); |
192 | if (rb_1.buf) toku_free(rb_1.buf); |
193 | } |
194 | |
195 | // Helper struct for tracking block checking progress. |
196 | struct { |
197 | int ; |
198 | int64_t , , ; |
199 | struct ft *; |
200 | }; |
201 | |
202 | // Check non-upgraded (legacy) node. |
203 | // NOTE: These nodes have less checksumming than more |
204 | // recent nodes. This effectively means that we are |
205 | // skipping over these nodes. |
206 | static int |
207 | check_old_node(FTNODE node, struct rbuf *rb, int version) |
208 | { |
209 | int r = 0; |
210 | read_legacy_node_info(node, rb, version); |
211 | // For version 14 nodes, advance the buffer to the end |
212 | // and verify the checksum. |
213 | if (version == FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM) { |
214 | // Advance the buffer to the end. |
215 | rb->ndone = rb->size - 4; |
216 | r = check_legacy_end_checksum(rb); |
217 | } |
218 | |
219 | return r; |
220 | } |
221 | |
222 | // Read, decompress, and check the given block. |
223 | static int |
224 | check_block(BLOCKNUM blocknum, int64_t UU(blocksize), int64_t UU(address), void *) |
225 | { |
226 | int r = 0; |
227 | int failure = 0; |
228 | struct check_block_table_extra *CAST_FROM_VOIDP(cbte, extra); |
229 | int fd = cbte->fd; |
230 | FT ft = cbte->h; |
231 | |
232 | struct verify_block_extra be; |
233 | init_verify_block_extra(blocknum, &be); |
234 | |
235 | // Let's read the block off of disk and fill a buffer with that |
236 | // block. |
237 | struct rbuf rb = RBUF_INITIALIZER; |
238 | read_block_from_fd_into_rbuf(fd, blocknum, ft, &rb); |
239 | |
240 | // Allocate the node. |
241 | FTNODE XMALLOC(node); |
242 | |
243 | initialize_ftnode(node, blocknum); |
244 | |
245 | r = read_and_check_magic(&rb); |
246 | if (r == DB_BADFORMAT) { |
247 | printf(" Magic failed.\n" ); |
248 | failure++; |
249 | } |
250 | |
251 | r = read_and_check_version(node, &rb); |
252 | if (r != 0) { |
253 | printf(" Version check failed.\n" ); |
254 | failure++; |
255 | } |
256 | |
257 | int version = node->layout_version_read_from_disk; |
258 | |
259 | //////////////////////////// |
260 | // UPGRADE FORK GOES HERE // |
261 | //////////////////////////// |
262 | |
263 | // Check nodes before major layout changes in version 15. |
264 | // All newer versions should follow the same layout, for now. |
265 | // This predicate would need to be changed if the layout |
266 | // of the nodes on disk does indeed change in the future. |
267 | if (version < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) |
268 | { |
269 | struct rbuf nrb; |
270 | // Use old decompression method for legacy nodes. |
271 | r = decompress_from_raw_block_into_rbuf(rb.buf, rb.size, &nrb, blocknum); |
272 | if (r != 0) { |
273 | failure++; |
274 | goto cleanup; |
275 | } |
276 | |
277 | // Check the end-to-end checksum. |
278 | r = check_old_node(node, &nrb, version); |
279 | if (r != 0) { |
280 | failure++; |
281 | } |
282 | goto cleanup; |
283 | } |
284 | |
285 | read_node_info(node, &rb, version); |
286 | |
287 | FTNODE_DISK_DATA ndd; |
288 | allocate_and_read_partition_offsets(node, &rb, &ndd); |
289 | |
290 | r = check_node_info_checksum(&rb); |
291 | if (r == TOKUDB_BAD_CHECKSUM) { |
292 | printf(" Node info checksum failed.\n" ); |
293 | failure++; |
294 | } |
295 | |
296 | // Get the partition info sub block. |
297 | struct sub_block sb; |
298 | sub_block_init(&sb); |
299 | r = read_compressed_sub_block(&rb, &sb); |
300 | if (r != 0) { |
301 | printf(" Partition info checksum failed.\n" ); |
302 | failure++; |
303 | } |
304 | |
305 | just_decompress_sub_block(&sb); |
306 | |
307 | // If we want to inspect the data inside the partitions, we need |
308 | // to call setup_ftnode_partitions(node, bfe, true) |
309 | |
310 | // <CER> TODO: Create function for this. |
311 | // Using the node info, decompress all the keys and pivots to |
312 | // detect any corruptions. |
313 | for (int i = 0; i < node->n_children; ++i) { |
314 | uint32_t curr_offset = BP_START(ndd,i); |
315 | uint32_t curr_size = BP_SIZE(ndd,i); |
316 | struct rbuf curr_rbuf = {.buf = NULL, .size = 0, .ndone = 0}; |
317 | rbuf_init(&curr_rbuf, rb.buf + curr_offset, curr_size); |
318 | struct sub_block curr_sb; |
319 | sub_block_init(&curr_sb); |
320 | |
321 | r = read_compressed_sub_block(&rb, &sb); |
322 | if (r != 0) { |
323 | printf(" Compressed child partition %d checksum failed.\n" , i); |
324 | failure++; |
325 | } |
326 | just_decompress_sub_block(&sb); |
327 | |
328 | r = verify_ftnode_sub_block(&sb, nullptr, blocknum); |
329 | if (r != 0) { |
330 | printf(" Uncompressed child partition %d checksum failed.\n" , i); |
331 | failure++; |
332 | } |
333 | |
334 | // <CER> If needed, we can print row and/or pivot info at this |
335 | // point. |
336 | } |
337 | |
338 | cleanup: |
339 | // Cleanup and error incrementing. |
340 | if (failure) { |
341 | cbte->blocks_failed++; |
342 | } |
343 | |
344 | cbte->blocks_done++; |
345 | |
346 | if (node) { |
347 | toku_free(node); |
348 | } |
349 | |
350 | // Print the status of this block to the console. |
351 | report(cbte->blocks_done, cbte->blocks_failed, cbte->total_blocks); |
352 | // We need to ALWAYS return 0 if we want to continue iterating |
353 | // through the nodes in the file. |
354 | r = 0; |
355 | return r; |
356 | } |
357 | |
358 | // This calls toku_blocktable_iterate on the given block table. |
359 | // Passes our check_block() function to be called as we iterate over |
360 | // the block table. This will print any interesting failures and |
361 | // update us on our progress. |
362 | static void check_block_table(int fd, block_table *bt, struct ft *h) { |
363 | int64_t num_blocks = bt->get_blocks_in_use_unlocked(); |
364 | printf("Starting verification of checkpoint containing" ); |
365 | printf(" %" PRId64 " blocks.\n" , num_blocks); |
366 | fflush(stdout); |
367 | |
368 | struct check_block_table_extra = { .fd = fd, |
369 | .blocks_done = 0, |
370 | .blocks_failed = 0, |
371 | .total_blocks = num_blocks, |
372 | .h = h }; |
373 | int r = bt->iterate(block_table::TRANSLATION_CURRENT, |
374 | check_block, |
375 | &extra, |
376 | true, |
377 | true); |
378 | if (r != 0) { |
379 | // We can print more information here if necessary. |
380 | } |
381 | |
382 | assert(extra.blocks_done == extra.total_blocks); |
383 | printf("Finished verification. " ); |
384 | printf(" %" PRId64 " blocks checked," , extra.blocks_done); |
385 | printf(" %" PRId64 " bad block(s) detected\n" , extra.blocks_failed); |
386 | fflush(stdout); |
387 | } |
388 | |
389 | int |
390 | main(int argc, char const * const argv[]) |
391 | { |
392 | // open the file |
393 | int r = 0; |
394 | int dictfd; |
395 | const char *dictfname, *outfname; |
396 | if (argc < 3 || argc > 4) { |
397 | fprintf(stderr, "%s: Invalid arguments.\n" , argv[0]); |
398 | fprintf(stderr, "Usage: %s <dictionary> <logfile> [report%%]\n" , argv[0]); |
399 | r = EX_USAGE; |
400 | goto exit; |
401 | } |
402 | |
403 | assert(argc == 3 || argc == 4); |
404 | dictfname = argv[1]; |
405 | outfname = argv[2]; |
406 | if (argc == 4) { |
407 | set_errno(0); |
408 | pct = strtod(argv[3], NULL); |
409 | assert_zero(get_maybe_error_errno()); |
410 | assert(pct > 0.0 && pct <= 100.0); |
411 | } |
412 | |
413 | // Open the file as read-only. |
414 | dictfd = open(dictfname, O_RDONLY | O_BINARY, S_IRWXU | S_IRWXG | S_IRWXO); |
415 | if (dictfd < 0) { |
416 | perror(dictfname); |
417 | fflush(stderr); |
418 | abort(); |
419 | } |
420 | outf = fopen(outfname, "w" ); |
421 | if (!outf) { |
422 | perror(outfname); |
423 | fflush(stderr); |
424 | abort(); |
425 | } |
426 | |
427 | // body of toku_ft_serialize_init(); |
428 | num_cores = toku_os_get_number_active_processors(); |
429 | r = toku_thread_pool_create(&ft_pool, num_cores); lazy_assert_zero(r); |
430 | assert_zero(r); |
431 | |
432 | // deserialize the header(s) |
433 | struct ft *h1, *h2; |
434 | deserialize_headers(dictfd, &h1, &h2); |
435 | |
436 | // walk over the block table and check blocks |
437 | if (h1) { |
438 | printf("Checking dictionary from header 1.\n" ); |
439 | check_block_table(dictfd, &h1->blocktable, h1); |
440 | } |
441 | if (h2) { |
442 | printf("Checking dictionary from header 2.\n" ); |
443 | check_block_table(dictfd, &h2->blocktable, h2); |
444 | } |
445 | if (h1 == NULL && h2 == NULL) { |
446 | printf("Both headers have a corruption and could not be used.\n" ); |
447 | } |
448 | |
449 | toku_thread_pool_destroy(&ft_pool); |
450 | exit: |
451 | return r; |
452 | } |
453 | |