1#include <iostream>
2
3#include <boost/program_options.hpp>
4#include <boost/algorithm/string/predicate.hpp>
5
6#include <Compression/CompressedWriteBuffer.h>
7#include <Compression/CompressedReadBuffer.h>
8#include <IO/WriteHelpers.h>
9#include <IO/Operators.h>
10#include <IO/ReadBufferFromFile.h>
11#include <IO/ReadHelpers.h>
12#include <IO/WriteBufferFromFileDescriptor.h>
13#include <Compression/CompressedReadBufferFromFile.h>
14
15
16/** This program checks correctness of .mrk (marks) file for corresponding compressed .bin file.
17 */
18
19
20namespace DB
21{
22 namespace ErrorCodes
23 {
24 extern const int TOO_LARGE_SIZE_COMPRESSED;
25 }
26}
27
28
29/// Read and check header of compressed block. Print size of decompressed and compressed data.
30std::pair<UInt32, UInt32> stat(DB::ReadBuffer & in, DB::WriteBuffer & out)
31{
32 if (in.eof())
33 return {};
34
35 in.ignore(16); /// checksum
36
37 char header[COMPRESSED_BLOCK_HEADER_SIZE];
38 in.readStrict(header, COMPRESSED_BLOCK_HEADER_SIZE);
39
40 UInt32 size_compressed = unalignedLoad<UInt32>(&header[1]);
41
42 if (size_compressed > DBMS_MAX_COMPRESSED_SIZE)
43 throw DB::Exception("Too large size_compressed. Most likely corrupted data.", DB::ErrorCodes::TOO_LARGE_SIZE_COMPRESSED);
44
45 UInt32 size_decompressed = unalignedLoad<UInt32>(&header[5]);
46
47 return {size_compressed, size_decompressed};
48}
49
50
51void checkCompressedHeaders(const std::string & mrk_path, const std::string & bin_path)
52{
53 DB::ReadBufferFromFile mrk_in(mrk_path);
54 DB::ReadBufferFromFile bin_in(bin_path, 4096); /// Small buffer size just to check header of compressed block.
55
56 DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO);
57
58 for (size_t mark_num = 0; !mrk_in.eof(); ++mark_num)
59 {
60 UInt64 offset_in_compressed_file = 0;
61 UInt64 offset_in_decompressed_block = 0;
62
63 DB::readBinary(offset_in_compressed_file, mrk_in);
64 DB::readBinary(offset_in_decompressed_block, mrk_in);
65
66 out << "Mark " << mark_num << ", points to " << offset_in_compressed_file << ", " << offset_in_decompressed_block << ". ";
67
68 bin_in.seek(offset_in_compressed_file);
69 auto sizes = stat(bin_in, out);
70
71 out << "Block sizes: " << sizes.first << ", " << sizes.second << '\n' << DB::flush;
72 }
73}
74
75
76void checkByCompressedReadBuffer(const std::string & mrk_path, const std::string & bin_path)
77{
78 DB::ReadBufferFromFile mrk_in(mrk_path);
79 DB::CompressedReadBufferFromFile bin_in(bin_path, 0, 0);
80
81 DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO);
82 bool mrk2_format = boost::algorithm::ends_with(mrk_path, ".mrk2");
83
84 for (size_t mark_num = 0; !mrk_in.eof(); ++mark_num)
85 {
86 UInt64 offset_in_compressed_file = 0;
87 UInt64 offset_in_decompressed_block = 0;
88 UInt64 index_granularity_rows = 0;
89
90 DB::readBinary(offset_in_compressed_file, mrk_in);
91 DB::readBinary(offset_in_decompressed_block, mrk_in);
92
93 out << "Mark " << mark_num << ", points to " << offset_in_compressed_file << ", " << offset_in_decompressed_block;
94
95 if (mrk2_format)
96 {
97 DB::readBinary(index_granularity_rows, mrk_in);
98
99 out << ", has rows after " << index_granularity_rows;
100 }
101
102 out << ".\n" << DB::flush;
103
104 bin_in.seek(offset_in_compressed_file, offset_in_decompressed_block);
105 }
106}
107
108
109int main(int argc, char ** argv)
110{
111 boost::program_options::options_description desc("Allowed options");
112 desc.add_options()
113 ("help,h", "produce help message")
114 ;
115
116 boost::program_options::variables_map options;
117 boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
118
119 if (options.count("help") || argc != 3)
120 {
121 std::cout << "Usage: " << argv[0] << " file.mrk file.bin" << std::endl;
122 std::cout << desc << std::endl;
123 return 1;
124 }
125
126 try
127 {
128 /// checkCompressedHeaders(argv[1], argv[2]);
129 checkByCompressedReadBuffer(argv[1], argv[2]);
130 }
131 catch (const DB::Exception & e)
132 {
133 std::cerr << e.what() << ", " << e.message() << std::endl
134 << std::endl
135 << "Stack trace:" << std::endl
136 << e.getStackTrace().toString()
137 << std::endl;
138 throw;
139 }
140
141 return 0;
142}
143