1 | #include <iostream> |
2 | |
3 | #include <boost/program_options.hpp> |
4 | #include <boost/algorithm/string/predicate.hpp> |
5 | |
6 | #include <Compression/CompressedWriteBuffer.h> |
7 | #include <Compression/CompressedReadBuffer.h> |
8 | #include <IO/WriteHelpers.h> |
9 | #include <IO/Operators.h> |
10 | #include <IO/ReadBufferFromFile.h> |
11 | #include <IO/ReadHelpers.h> |
12 | #include <IO/WriteBufferFromFileDescriptor.h> |
13 | #include <Compression/CompressedReadBufferFromFile.h> |
14 | |
15 | |
16 | /** This program checks correctness of .mrk (marks) file for corresponding compressed .bin file. |
17 | */ |
18 | |
19 | |
20 | namespace DB |
21 | { |
22 | namespace ErrorCodes |
23 | { |
24 | extern const int TOO_LARGE_SIZE_COMPRESSED; |
25 | } |
26 | } |
27 | |
28 | |
29 | /// Read and check header of compressed block. Print size of decompressed and compressed data. |
30 | std::pair<UInt32, UInt32> stat(DB::ReadBuffer & in, DB::WriteBuffer & out) |
31 | { |
32 | if (in.eof()) |
33 | return {}; |
34 | |
35 | in.ignore(16); /// checksum |
36 | |
37 | char [COMPRESSED_BLOCK_HEADER_SIZE]; |
38 | in.readStrict(header, COMPRESSED_BLOCK_HEADER_SIZE); |
39 | |
40 | UInt32 size_compressed = unalignedLoad<UInt32>(&header[1]); |
41 | |
42 | if (size_compressed > DBMS_MAX_COMPRESSED_SIZE) |
43 | throw DB::Exception("Too large size_compressed. Most likely corrupted data." , DB::ErrorCodes::TOO_LARGE_SIZE_COMPRESSED); |
44 | |
45 | UInt32 size_decompressed = unalignedLoad<UInt32>(&header[5]); |
46 | |
47 | return {size_compressed, size_decompressed}; |
48 | } |
49 | |
50 | |
51 | void (const std::string & mrk_path, const std::string & bin_path) |
52 | { |
53 | DB::ReadBufferFromFile mrk_in(mrk_path); |
54 | DB::ReadBufferFromFile bin_in(bin_path, 4096); /// Small buffer size just to check header of compressed block. |
55 | |
56 | DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO); |
57 | |
58 | for (size_t mark_num = 0; !mrk_in.eof(); ++mark_num) |
59 | { |
60 | UInt64 offset_in_compressed_file = 0; |
61 | UInt64 offset_in_decompressed_block = 0; |
62 | |
63 | DB::readBinary(offset_in_compressed_file, mrk_in); |
64 | DB::readBinary(offset_in_decompressed_block, mrk_in); |
65 | |
66 | out << "Mark " << mark_num << ", points to " << offset_in_compressed_file << ", " << offset_in_decompressed_block << ". " ; |
67 | |
68 | bin_in.seek(offset_in_compressed_file); |
69 | auto sizes = stat(bin_in, out); |
70 | |
71 | out << "Block sizes: " << sizes.first << ", " << sizes.second << '\n' << DB::flush; |
72 | } |
73 | } |
74 | |
75 | |
76 | void checkByCompressedReadBuffer(const std::string & mrk_path, const std::string & bin_path) |
77 | { |
78 | DB::ReadBufferFromFile mrk_in(mrk_path); |
79 | DB::CompressedReadBufferFromFile bin_in(bin_path, 0, 0); |
80 | |
81 | DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO); |
82 | bool mrk2_format = boost::algorithm::ends_with(mrk_path, ".mrk2" ); |
83 | |
84 | for (size_t mark_num = 0; !mrk_in.eof(); ++mark_num) |
85 | { |
86 | UInt64 offset_in_compressed_file = 0; |
87 | UInt64 offset_in_decompressed_block = 0; |
88 | UInt64 index_granularity_rows = 0; |
89 | |
90 | DB::readBinary(offset_in_compressed_file, mrk_in); |
91 | DB::readBinary(offset_in_decompressed_block, mrk_in); |
92 | |
93 | out << "Mark " << mark_num << ", points to " << offset_in_compressed_file << ", " << offset_in_decompressed_block; |
94 | |
95 | if (mrk2_format) |
96 | { |
97 | DB::readBinary(index_granularity_rows, mrk_in); |
98 | |
99 | out << ", has rows after " << index_granularity_rows; |
100 | } |
101 | |
102 | out << ".\n" << DB::flush; |
103 | |
104 | bin_in.seek(offset_in_compressed_file, offset_in_decompressed_block); |
105 | } |
106 | } |
107 | |
108 | |
109 | int main(int argc, char ** argv) |
110 | { |
111 | boost::program_options::options_description desc("Allowed options" ); |
112 | desc.add_options() |
113 | ("help,h" , "produce help message" ) |
114 | ; |
115 | |
116 | boost::program_options::variables_map options; |
117 | boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); |
118 | |
119 | if (options.count("help" ) || argc != 3) |
120 | { |
121 | std::cout << "Usage: " << argv[0] << " file.mrk file.bin" << std::endl; |
122 | std::cout << desc << std::endl; |
123 | return 1; |
124 | } |
125 | |
126 | try |
127 | { |
128 | /// checkCompressedHeaders(argv[1], argv[2]); |
129 | checkByCompressedReadBuffer(argv[1], argv[2]); |
130 | } |
131 | catch (const DB::Exception & e) |
132 | { |
133 | std::cerr << e.what() << ", " << e.message() << std::endl |
134 | << std::endl |
135 | << "Stack trace:" << std::endl |
136 | << e.getStackTrace().toString() |
137 | << std::endl; |
138 | throw; |
139 | } |
140 | |
141 | return 0; |
142 | } |
143 | |