ftverify.cc source code [MariaDB/storage/tokudb/PerconaFT/tools/ftverify.cc]

1	/ -- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -- /
2	// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3	#ident "$Id$"
4	/======*
5	This file is part of PerconaFT.
6
7
8	Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10	PerconaFT is free software: you can redistribute it and/or modify
11	it under the terms of the GNU General Public License, version 2,
12	as published by the Free Software Foundation.
13
14	PerconaFT is distributed in the hope that it will be useful,
15	but WITHOUT ANY WARRANTY; without even the implied warranty of
16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17	GNU General Public License for more details.
18
19	You should have received a copy of the GNU General Public License
20	along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
21
22	----------------------------------------
23
24	PerconaFT is free software: you can redistribute it and/or modify
25	it under the terms of the GNU Affero General Public License, version 3,
26	as published by the Free Software Foundation.
27
28	PerconaFT is distributed in the hope that it will be useful,
29	but WITHOUT ANY WARRANTY; without even the implied warranty of
30	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31	GNU Affero General Public License for more details.
32
33	You should have received a copy of the GNU Affero General Public License
34	along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
35	======= /*
36
37	#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38
39	////////////////////////////////////////////////////////////////////
40	// ftverify - Command line tool that checks the validity of a given
41	// fractal tree file, one block at a time.
42	////////////////////////////////////////////////////////////////////
43
44	#include "portability/toku_assert.h"
45	#include "portability/toku_list.h"
46	#include "portability/toku_portability.h"
47
48	#include "ft/serialize/block_allocator.h"
49	#include "ft/ft-internal.h"
50	#include "ft/serialize/ft-serialize.h"
51	#include "ft/serialize/ft_layout_version.h"
52	#include "ft/serialize/ft_node-serialize.h"
53	#include "ft/node.h"
54	#include "ft/serialize/rbuf.h"
55	#include "ft/serialize/sub_block.h"
56	#include "util/threadpool.h"
57
58	#include <fcntl.h>
59	#include <math.h>
60	#include <stdio.h>
61	#include <stdlib.h>
62	#include <sys/stat.h>
63	#include <sys/types.h>
64	#include <sysexits.h>
65	#include <unistd.h>
66
67	static int num_cores = `0`; // cache the number of cores for the parallelization
68	static struct toku_thread_pool *ft_pool = NULL;
69	static FILE *outf;
70	static double pct = `0.5`;
71
72	// Struct for reporting sub block stats.
73	struct verify_block_extra {
74	BLOCKNUM b;
75	int n_sub_blocks;
76	uint32_t header_length;
77	uint32_t calc_xsum;
78	uint32_t stored_xsum;
79	bool header_valid;
80	bool sub_blocks_valid;
81	struct sub_block_info *sub_block_results;
82	};
83
84	// Initialization function for the sub block stats.
85	static void
86	init_verify_block_extra(BLOCKNUM b, struct verify_block_extra *e)
87	{
88	static const struct verify_block_extra default_vbe =
89	{
90	.b = { `0` },
91	.n_sub_blocks = `0`,
92	.header_length = `0`,
93	.calc_xsum = `0`,
94	.stored_xsum = `0`,
95	.header_valid = true,
96	.sub_blocks_valid = true,
97	.sub_block_results = NULL
98	};
99	*e = default_vbe;
100	e->b = b;
101	}
102
103	// Reports percentage of completed blocks.
104	static void
105	report(int64_t blocks_done, int64_t blocks_failed, int64_t total_blocks)
106	{
107	int64_t blocks_per_report = llrint(pct * total_blocks / `100.0`);
108	if (blocks_per_report < `1`) {
109	blocks_per_report = `1`;
110	}
111	if (blocks_done % blocks_per_report == `0`) {
112	double pct_actually_done = (`100.0` * blocks_done) / total_blocks;
113	printf("% 3.3lf%% \| %" PRId64 " blocks checked, %" PRId64 " bad block(s) detected\n",
114	pct_actually_done, blocks_done, blocks_failed);
115	fflush(stdout);
116	}
117	}
118
119	// Helper function to deserialize one of the two headers for the ft
120	// we are checking.
121	static void
122	deserialize_headers(int fd, struct ft h1p, struct ft h2p)
123	{
124	struct rbuf rb_0;
125	struct rbuf rb_1;
126	uint64_t checkpoint_count_0;
127	uint64_t checkpoint_count_1;
128	LSN checkpoint_lsn_0;
129	LSN checkpoint_lsn_1;
130	uint32_t version_0, version_1;
131	bool h0_acceptable = false;
132	bool h1_acceptable = false;
133	int r0, r1;
134	int r;
135
136	{
137	toku_off_t header_0_off = `0`;
138	r0 = deserialize_ft_from_fd_into_rbuf(
139	fd,
140	header_0_off,
141	&rb_0,
142	&checkpoint_count_0,
143	&checkpoint_lsn_0,
144	&version_0
145	);
146	if ((r0==`0`) && (checkpoint_lsn_0.lsn <= MAX_LSN.lsn)) {
147	h0_acceptable = true;
148	}
149	}
150	{
151	toku_off_t header_1_off = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
152	r1 = deserialize_ft_from_fd_into_rbuf(
153	fd,
154	header_1_off,
155	&rb_1,
156	&checkpoint_count_1,
157	&checkpoint_lsn_1,
158	&version_1
159	);
160	if ((r1==`0`) && (checkpoint_lsn_1.lsn <= MAX_LSN.lsn)) {
161	h1_acceptable = true;
162	}
163	}
164
165	// If either header is too new, the dictionary is unreadable
166	if (r0 == TOKUDB_DICTIONARY_TOO_NEW \|\| r1 == TOKUDB_DICTIONARY_TOO_NEW) {
167	fprintf(stderr, "This dictionary was created with a version of PerconaFT that is too new. Aborting.\n");
168	abort();
169	}
170	if (h0_acceptable) {
171	printf("Found dictionary header 1 with LSN %" PRIu64 "\n", checkpoint_lsn_0.lsn);
172	r = deserialize_ft_versioned(fd, &rb_0, h1p, version_0);
173
174	if (r != `0`) {
175	printf("---Header Error----\n");
176	}
177
178	} else {
179	*h1p = NULL;
180	}
181	if (h1_acceptable) {
182	printf("Found dictionary header 2 with LSN %" PRIu64 "\n", checkpoint_lsn_1.lsn);
183	r = deserialize_ft_versioned(fd, &rb_1, h2p, version_1);
184	if (r != `0`) {
185	printf("---Header Error----\n");
186	}
187	} else {
188	*h2p = NULL;
189	}
190
191	if (rb_0.buf) toku_free(rb_0.buf);
192	if (rb_1.buf) toku_free(rb_1.buf);
193	}
194
195	// Helper struct for tracking block checking progress.
196	struct check_block_table_extra {
197	int fd;
198	int64_t blocks_done, blocks_failed, total_blocks;
199	struct ft *h;
200	};
201
202	// Check non-upgraded (legacy) node.
203	// NOTE: These nodes have less checksumming than more
204	// recent nodes. This effectively means that we are
205	// skipping over these nodes.
206	static int
207	check_old_node(FTNODE node, struct rbuf rb, int* version)
208	{
209	int r = `0`;
210	read_legacy_node_info(node, rb, version);
211	// For version 14 nodes, advance the buffer to the end
212	// and verify the checksum.
213	if (version == FT_FIRST_LAYOUT_VERSION_WITH_END_TO_END_CHECKSUM) {
214	// Advance the buffer to the end.
215	rb->ndone = rb->size - `4`;
216	r = check_legacy_end_checksum(rb);
217	}
218
219	return r;
220	}
221
222	// Read, decompress, and check the given block.
223	static int
224	check_block(BLOCKNUM blocknum, int64_t UU(blocksize), int64_t UU(address), void *extra)
225	{
226	int r = `0`;
227	int failure = `0`;
228	struct check_block_table_extra *CAST_FROM_VOIDP(cbte, extra);
229	int fd = cbte->fd;
230	FT ft = cbte->h;
231
232	struct verify_block_extra be;
233	init_verify_block_extra(blocknum, &be);
234
235	// Let's read the block off of disk and fill a buffer with that
236	// block.
237	struct rbuf rb = RBUF_INITIALIZER;
238	read_block_from_fd_into_rbuf(fd, blocknum, ft, &rb);
239
240	// Allocate the node.
241	FTNODE XMALLOC(node);
242
243	initialize_ftnode(node, blocknum);
244
245	r = read_and_check_magic(&rb);
246	if (r == DB_BADFORMAT) {
247	printf(" Magic failed.\n");
248	failure++;
249	}
250
251	r = read_and_check_version(node, &rb);
252	if (r != `0`) {
253	printf(" Version check failed.\n");
254	failure++;
255	}
256
257	int version = node->layout_version_read_from_disk;
258
259	////////////////////////////
260	// UPGRADE FORK GOES HERE //
261	////////////////////////////
262
263	// Check nodes before major layout changes in version 15.
264	// All newer versions should follow the same layout, for now.
265	// This predicate would need to be changed if the layout
266	// of the nodes on disk does indeed change in the future.
267	if (version < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES)
268	{
269	struct rbuf nrb;
270	// Use old decompression method for legacy nodes.
271	r = decompress_from_raw_block_into_rbuf(rb.buf, rb.size, &nrb, blocknum);
272	if (r != `0`) {
273	failure++;
274	goto cleanup;
275	}
276
277	// Check the end-to-end checksum.
278	r = check_old_node(node, &nrb, version);
279	if (r != `0`) {
280	failure++;
281	}
282	goto cleanup;
283	}
284
285	read_node_info(node, &rb, version);
286
287	FTNODE_DISK_DATA ndd;
288	allocate_and_read_partition_offsets(node, &rb, &ndd);
289
290	r = check_node_info_checksum(&rb);
291	if (r == TOKUDB_BAD_CHECKSUM) {
292	printf(" Node info checksum failed.\n");
293	failure++;
294	}
295
296	// Get the partition info sub block.
297	struct sub_block sb;
298	sub_block_init(&sb);
299	r = read_compressed_sub_block(&rb, &sb);
300	if (r != `0`) {
301	printf(" Partition info checksum failed.\n");
302	failure++;
303	}
304
305	just_decompress_sub_block(&sb);
306
307	// If we want to inspect the data inside the partitions, we need
308	// to call setup_ftnode_partitions(node, bfe, true)
309
310	// <CER> TODO: Create function for this.
311	// Using the node info, decompress all the keys and pivots to
312	// detect any corruptions.
313	for (int i = `0`; i < node->n_children; ++i) {
314	uint32_t curr_offset = BP_START(ndd,i);
315	uint32_t curr_size = BP_SIZE(ndd,i);
316	struct rbuf curr_rbuf = {.buf = NULL, .size = `0`, .ndone = `0`};
317	rbuf_init(&curr_rbuf, rb.buf + curr_offset, curr_size);
318	struct sub_block curr_sb;
319	sub_block_init(&curr_sb);
320
321	r = read_compressed_sub_block(&rb, &sb);
322	if (r != `0`) {
323	printf(" Compressed child partition %d checksum failed.\n", i);
324	failure++;
325	}
326	just_decompress_sub_block(&sb);
327
328	r = verify_ftnode_sub_block(&sb, nullptr, blocknum);
329	if (r != `0`) {
330	printf(" Uncompressed child partition %d checksum failed.\n", i);
331	failure++;
332	}
333
334	// <CER> If needed, we can print row and/or pivot info at this
335	// point.
336	}
337
338	cleanup:
339	// Cleanup and error incrementing.
340	if (failure) {
341	cbte->blocks_failed++;
342	}
343
344	cbte->blocks_done++;
345
346	if (node) {
347	toku_free(node);
348	}
349
350	// Print the status of this block to the console.
351	report(cbte->blocks_done, cbte->blocks_failed, cbte->total_blocks);
352	// We need to ALWAYS return 0 if we want to continue iterating
353	// through the nodes in the file.
354	r = `0`;
355	return r;
356	}
357
358	// This calls toku_blocktable_iterate on the given block table.
359	// Passes our check_block() function to be called as we iterate over
360	// the block table. This will print any interesting failures and
361	// update us on our progress.
362	static void check_block_table(int fd, block_table bt, struct* ft *h) {
363	int64_t num_blocks = bt->get_blocks_in_use_unlocked();
364	printf("Starting verification of checkpoint containing");
365	printf(" %" PRId64 " blocks.\n", num_blocks);
366	fflush(stdout);
367
368	struct check_block_table_extra extra = { .fd = fd,
369	.blocks_done = `0`,
370	.blocks_failed = `0`,
371	.total_blocks = num_blocks,
372	.h = h };
373	int r = bt->iterate(block_table::TRANSLATION_CURRENT,
374	check_block,
375	&extra,
376	true,
377	true);
378	if (r != `0`) {
379	// We can print more information here if necessary.
380	}
381
382	assert(extra.blocks_done == extra.total_blocks);
383	printf("Finished verification. ");
384	printf(" %" PRId64 " blocks checked,", extra.blocks_done);
385	printf(" %" PRId64 " bad block(s) detected\n", extra.blocks_failed);
386	fflush(stdout);
387	}
388
389	int
390	main(int argc, char const * const argv[])
391	{
392	// open the file
393	int r = `0`;
394	int dictfd;
395	const char dictfname, outfname;
396	if (argc < `3` \|\| argc > `4`) {
397	fprintf(stderr, "%s: Invalid arguments.\n", argv[`0`]);
398	fprintf(stderr, "Usage: %s <dictionary> <logfile> [report%%]\n", argv[`0`]);
399	r = EX_USAGE;
400	goto exit;
401	}
402
403	assert(argc == `3` \|\| argc == `4`);
404	dictfname = argv[`1`];
405	outfname = argv[`2`];
406	if (argc == `4`) {
407	set_errno(`0`);
408	pct = strtod(argv[`3`], NULL);
409	assert_zero(get_maybe_error_errno());
410	assert(pct > `0.0` && pct <= `100.0`);
411	}
412
413	// Open the file as read-only.
414	dictfd = open(dictfname, O_RDONLY \| O_BINARY, S_IRWXU \| S_IRWXG \| S_IRWXO);
415	if (dictfd < `0`) {
416	perror(dictfname);
417	fflush(stderr);
418	abort();
419	}
420	outf = fopen(outfname, "w");
421	if (!outf) {
422	perror(outfname);
423	fflush(stderr);
424	abort();
425	}
426
427	// body of toku_ft_serialize_init();
428	num_cores = toku_os_get_number_active_processors();
429	r = toku_thread_pool_create(&ft_pool, num_cores); lazy_assert_zero(r);
430	assert_zero(r);
431
432	// deserialize the header(s)
433	struct ft h1, h2;
434	deserialize_headers(dictfd, &h1, &h2);
435
436	// walk over the block table and check blocks
437	if (h1) {
438	printf("Checking dictionary from header 1.\n");
439	check_block_table(dictfd, &h1->blocktable, h1);
440	}
441	if (h2) {
442	printf("Checking dictionary from header 2.\n");
443	check_block_table(dictfd, &h2->blocktable, h2);
444	}
445	if (h1 == NULL && h2 == NULL) {
446	printf("Both headers have a corruption and could not be used.\n");
447	}
448
449	toku_thread_pool_destroy(&ft_pool);
450	exit:
451	return r;
452	}
453

Browse the source code of MariaDB/storage/tokudb/PerconaFT/tools/ftverify.cc