1/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3#ident "$Id$"
4/*======
5This file is part of PerconaFT.
6
7
8Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10 PerconaFT is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License, version 2,
12 as published by the Free Software Foundation.
13
14 PerconaFT is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
21
22----------------------------------------
23
24 PerconaFT is free software: you can redistribute it and/or modify
25 it under the terms of the GNU Affero General Public License, version 3,
26 as published by the Free Software Foundation.
27
28 PerconaFT is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU Affero General Public License for more details.
32
33 You should have received a copy of the GNU Affero General Public License
34 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
35======= */
36
37#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38
39////////////////////////////////////////////////////////////////////
40// ftverify - Command line tool that checks the validity of a given
41// fractal tree file, one block at a time.
42////////////////////////////////////////////////////////////////////
43
44#include "portability/toku_assert.h"
45#include "portability/toku_list.h"
46#include "portability/toku_portability.h"
47
48#include "ft/serialize/block_allocator.h"
49#include "ft/ft-internal.h"
50#include "ft/serialize/ft-serialize.h"
51#include "ft/serialize/ft_layout_version.h"
52#include "ft/serialize/ft_node-serialize.h"
53#include "ft/node.h"
54#include "ft/serialize/rbuf.h"
55#include "ft/serialize/sub_block.h"
56#include "util/threadpool.h"
57
58#include <fcntl.h>
59#include <math.h>
60#include <stdio.h>
61#include <stdlib.h>
62#include <sys/stat.h>
63#include <sys/types.h>
64#include <sysexits.h>
65#include <unistd.h>
66
67static int num_cores = 0; // cache the number of cores for the parallelization
68static struct toku_thread_pool *ft_pool = NULL;
69static FILE *outf;
70static double pct = 0.5;
71
72// Struct for reporting sub block stats.
73struct verify_block_extra {
74 BLOCKNUM b;
75 int n_sub_blocks;
76 uint32_t header_length;
77 uint32_t calc_xsum;
78 uint32_t stored_xsum;
79 bool header_valid;
80 bool sub_blocks_valid;
81 struct sub_block_info *sub_block_results;
82};
83
84// Initialization function for the sub block stats.
85static void
86init_verify_block_extra(BLOCKNUM b, struct verify_block_extra *e)
87{
88 static const struct verify_block_extra default_vbe =
89 {
90 .b = { 0 },
91 .n_sub_blocks = 0,
92 .header_length = 0,
93 .calc_xsum = 0,
94 .stored_xsum = 0,
95 .header_valid = true,
96 .sub_blocks_valid = true,
97 .sub_block_results = NULL
98 };
99 *e = default_vbe;
100 e->b = b;
101}
102
103// Reports percentage of completed blocks.
104static void
105report(int64_t blocks_done, int64_t blocks_failed, int64_t total_blocks)
106{
107 int64_t blocks_per_report = llrint(pct * total_blocks / 100.0);
108 if (blocks_per_report < 1) {
109 blocks_per_report = 1;
110 }
111 if (blocks_done % blocks_per_report == 0) {
112 double pct_actually_done = (100.0 * blocks_done) / total_blocks;
113 printf("% 3.3lf%% | %" PRId64 " blocks checked, %" PRId64 " bad block(s) detected\n",
114 pct_actually_done, blocks_done, blocks_failed);
115 fflush(stdout);
116 }
117}
118
119// Helper function to deserialize one of the two headers for the ft
120// we are checking.
121static void
122deserialize_headers(int fd, struct ft **h1p, struct ft **h2p)
123{
124 struct rbuf rb_0;
125 struct rbuf rb_1;
126 uint64_t checkpoint_count_0;
127 uint64_t checkpoint_count_1;
128 LSN checkpoint_lsn_0;
129 LSN checkpoint_lsn_1;
130 uint32_t version_0, version_1;
131 bool h0_acceptable = false;
132 bool h1_acceptable = false;
133 int r0, r1;
134 int r;
135
136 {
137 toku_off_t header_0_off = 0;
138 r0 = deserialize_ft_from_fd_into_rbuf(
139 fd,
140 header_0_off,
141 &rb_0,
142 &checkpoint_count_0,
143 &checkpoint_lsn_0,
144 &version_0
145 );
146 if ((r0==0) && (checkpoint_lsn_0.lsn <= MAX_LSN.lsn)) {
147 h0_acceptable = true;
148 }
149 }
150 {
151 toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
152 r1 = deserialize_ft_from_fd_into_rbuf(
153 fd,
154 header_1_off,
155 &rb_1,
156 &checkpoint_count_1,
157 &checkpoint_lsn_1,
158 &version_1
159 );
160 if ((r1==0) && (checkpoint_lsn_1.lsn <= MAX_LSN.lsn)) {
161 h1_acceptable = true;
162 }
163 }
164
165 // If either header is too new, the dictionary is unreadable
166 if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) {
167 fprintf(stderr, "This dictionary was created with a version of PerconaFT that is too new. Aborting.\n");
168 abort();
169 }
170 if (h0_acceptable) {
171 printf("Found dictionary header 1 with LSN %" PRIu64 "\n", checkpoint_lsn_0.lsn);
172 r = deserialize_ft_versioned(fd, &rb_0, h1p, version_0);
173
174 if (r != 0) {
175 printf("---Header Error----\n");
176 }
177
178 } else {
179 *h1p = NULL;
180 }
181 if (h1_acceptable) {
182 printf("Found dictionary header 2 with LSN %" PRIu64 "\n", checkpoint_lsn_1.lsn);
183 r = deserialize_ft_versioned(fd, &rb_1, h2p, version_1);
184 if (r != 0) {
185 printf("---Header Error----\n");
186 }
187 } else {
188 *h2p = NULL;
189 }
190
191 if (rb_0.buf) toku_free(rb_0.buf);
192 if (rb_1.buf) toku_free(rb_1.buf);
193}
194
195// Helper struct for tracking block checking progress.
196struct check_block_table_extra {
197 int fd;
198 int64_t blocks_done, blocks_failed, total_blocks;
199 struct ft *h;
200};
201
202// Check non-upgraded (legacy) node.
203// NOTE: These nodes have less checksumming than more
204// recent nodes. This effectively means that we are
205// skipping over these nodes.
206static int
207check_old_node(FTNODE node, struct rbuf *rb, int version)
208{
209 int r = 0;
210 read_legacy_node_info(node, rb, version);
211 // For version 14 nodes, advance the buffer to the end
212 // and verify the checksum.
213 if (version == FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM) {
214 // Advance the buffer to the end.
215 rb->ndone = rb->size - 4;
216 r = check_legacy_end_checksum(rb);
217 }
218
219 return r;
220}
221
222// Read, decompress, and check the given block.
223static int
224check_block(BLOCKNUM blocknum, int64_t UU(blocksize), int64_t UU(address), void *extra)
225{
226 int r = 0;
227 int failure = 0;
228 struct check_block_table_extra *CAST_FROM_VOIDP(cbte, extra);
229 int fd = cbte->fd;
230 FT ft = cbte->h;
231
232 struct verify_block_extra be;
233 init_verify_block_extra(blocknum, &be);
234
235 // Let's read the block off of disk and fill a buffer with that
236 // block.
237 struct rbuf rb = RBUF_INITIALIZER;
238 read_block_from_fd_into_rbuf(fd, blocknum, ft, &rb);
239
240 // Allocate the node.
241 FTNODE XMALLOC(node);
242
243 initialize_ftnode(node, blocknum);
244
245 r = read_and_check_magic(&rb);
246 if (r == DB_BADFORMAT) {
247 printf(" Magic failed.\n");
248 failure++;
249 }
250
251 r = read_and_check_version(node, &rb);
252 if (r != 0) {
253 printf(" Version check failed.\n");
254 failure++;
255 }
256
257 int version = node->layout_version_read_from_disk;
258
259 ////////////////////////////
260 // UPGRADE FORK GOES HERE //
261 ////////////////////////////
262
263 // Check nodes before major layout changes in version 15.
264 // All newer versions should follow the same layout, for now.
265 // This predicate would need to be changed if the layout
266 // of the nodes on disk does indeed change in the future.
267 if (version < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES)
268 {
269 struct rbuf nrb;
270 // Use old decompression method for legacy nodes.
271 r = decompress_from_raw_block_into_rbuf(rb.buf, rb.size, &nrb, blocknum);
272 if (r != 0) {
273 failure++;
274 goto cleanup;
275 }
276
277 // Check the end-to-end checksum.
278 r = check_old_node(node, &nrb, version);
279 if (r != 0) {
280 failure++;
281 }
282 goto cleanup;
283 }
284
285 read_node_info(node, &rb, version);
286
287 FTNODE_DISK_DATA ndd;
288 allocate_and_read_partition_offsets(node, &rb, &ndd);
289
290 r = check_node_info_checksum(&rb);
291 if (r == TOKUDB_BAD_CHECKSUM) {
292 printf(" Node info checksum failed.\n");
293 failure++;
294 }
295
296 // Get the partition info sub block.
297 struct sub_block sb;
298 sub_block_init(&sb);
299 r = read_compressed_sub_block(&rb, &sb);
300 if (r != 0) {
301 printf(" Partition info checksum failed.\n");
302 failure++;
303 }
304
305 just_decompress_sub_block(&sb);
306
307 // If we want to inspect the data inside the partitions, we need
308 // to call setup_ftnode_partitions(node, bfe, true)
309
310 // <CER> TODO: Create function for this.
311 // Using the node info, decompress all the keys and pivots to
312 // detect any corruptions.
313 for (int i = 0; i < node->n_children; ++i) {
314 uint32_t curr_offset = BP_START(ndd,i);
315 uint32_t curr_size = BP_SIZE(ndd,i);
316 struct rbuf curr_rbuf = {.buf = NULL, .size = 0, .ndone = 0};
317 rbuf_init(&curr_rbuf, rb.buf + curr_offset, curr_size);
318 struct sub_block curr_sb;
319 sub_block_init(&curr_sb);
320
321 r = read_compressed_sub_block(&rb, &sb);
322 if (r != 0) {
323 printf(" Compressed child partition %d checksum failed.\n", i);
324 failure++;
325 }
326 just_decompress_sub_block(&sb);
327
328 r = verify_ftnode_sub_block(&sb, nullptr, blocknum);
329 if (r != 0) {
330 printf(" Uncompressed child partition %d checksum failed.\n", i);
331 failure++;
332 }
333
334 // <CER> If needed, we can print row and/or pivot info at this
335 // point.
336 }
337
338cleanup:
339 // Cleanup and error incrementing.
340 if (failure) {
341 cbte->blocks_failed++;
342 }
343
344 cbte->blocks_done++;
345
346 if (node) {
347 toku_free(node);
348 }
349
350 // Print the status of this block to the console.
351 report(cbte->blocks_done, cbte->blocks_failed, cbte->total_blocks);
352 // We need to ALWAYS return 0 if we want to continue iterating
353 // through the nodes in the file.
354 r = 0;
355 return r;
356}
357
358// This calls toku_blocktable_iterate on the given block table.
359// Passes our check_block() function to be called as we iterate over
360// the block table. This will print any interesting failures and
361// update us on our progress.
362static void check_block_table(int fd, block_table *bt, struct ft *h) {
363 int64_t num_blocks = bt->get_blocks_in_use_unlocked();
364 printf("Starting verification of checkpoint containing");
365 printf(" %" PRId64 " blocks.\n", num_blocks);
366 fflush(stdout);
367
368 struct check_block_table_extra extra = { .fd = fd,
369 .blocks_done = 0,
370 .blocks_failed = 0,
371 .total_blocks = num_blocks,
372 .h = h };
373 int r = bt->iterate(block_table::TRANSLATION_CURRENT,
374 check_block,
375 &extra,
376 true,
377 true);
378 if (r != 0) {
379 // We can print more information here if necessary.
380 }
381
382 assert(extra.blocks_done == extra.total_blocks);
383 printf("Finished verification. ");
384 printf(" %" PRId64 " blocks checked,", extra.blocks_done);
385 printf(" %" PRId64 " bad block(s) detected\n", extra.blocks_failed);
386 fflush(stdout);
387}
388
389int
390main(int argc, char const * const argv[])
391{
392 // open the file
393 int r = 0;
394 int dictfd;
395 const char *dictfname, *outfname;
396 if (argc < 3 || argc > 4) {
397 fprintf(stderr, "%s: Invalid arguments.\n", argv[0]);
398 fprintf(stderr, "Usage: %s <dictionary> <logfile> [report%%]\n", argv[0]);
399 r = EX_USAGE;
400 goto exit;
401 }
402
403 assert(argc == 3 || argc == 4);
404 dictfname = argv[1];
405 outfname = argv[2];
406 if (argc == 4) {
407 set_errno(0);
408 pct = strtod(argv[3], NULL);
409 assert_zero(get_maybe_error_errno());
410 assert(pct > 0.0 && pct <= 100.0);
411 }
412
413 // Open the file as read-only.
414 dictfd = open(dictfname, O_RDONLY | O_BINARY, S_IRWXU | S_IRWXG | S_IRWXO);
415 if (dictfd < 0) {
416 perror(dictfname);
417 fflush(stderr);
418 abort();
419 }
420 outf = fopen(outfname, "w");
421 if (!outf) {
422 perror(outfname);
423 fflush(stderr);
424 abort();
425 }
426
427 // body of toku_ft_serialize_init();
428 num_cores = toku_os_get_number_active_processors();
429 r = toku_thread_pool_create(&ft_pool, num_cores); lazy_assert_zero(r);
430 assert_zero(r);
431
432 // deserialize the header(s)
433 struct ft *h1, *h2;
434 deserialize_headers(dictfd, &h1, &h2);
435
436 // walk over the block table and check blocks
437 if (h1) {
438 printf("Checking dictionary from header 1.\n");
439 check_block_table(dictfd, &h1->blocktable, h1);
440 }
441 if (h2) {
442 printf("Checking dictionary from header 2.\n");
443 check_block_table(dictfd, &h2->blocktable, h2);
444 }
445 if (h1 == NULL && h2 == NULL) {
446 printf("Both headers have a corruption and could not be used.\n");
447 }
448
449 toku_thread_pool_destroy(&ft_pool);
450exit:
451 return r;
452}
453