1 | #include <list> |
2 | #include <iostream> |
3 | #include <IO/ReadBufferFromFileDescriptor.h> |
4 | #include <IO/Operators.h> |
5 | #include <Storages/MergeTree/SimpleMergeSelector.h> |
6 | #include <Storages/MergeTree/LevelMergeSelector.h> |
7 | #include <Common/formatReadable.h> |
8 | |
9 | |
10 | /** This program tests merge-selecting algorithm. |
11 | * Usage: |
12 | clickhouse-client --query=" |
13 | SELECT bytes, now() - modification_time, level, name |
14 | FROM system.parts |
15 | WHERE table = 'visits' AND active AND partition = '201610'" | ./merge_selector2 |
16 | */ |
17 | |
18 | int main(int, char **) |
19 | { |
20 | using namespace DB; |
21 | |
22 | IMergeSelector::Partitions partitions(1); |
23 | IMergeSelector::PartsInPartition & parts = partitions.back(); |
24 | |
25 | /* SimpleMergeSelector::Settings settings; |
26 | SimpleMergeSelector selector(settings);*/ |
27 | |
28 | LevelMergeSelector::Settings settings; |
29 | LevelMergeSelector selector(settings); |
30 | |
31 | ReadBufferFromFileDescriptor in(STDIN_FILENO); |
32 | |
33 | size_t sum_parts_size = 0; |
34 | |
35 | std::list<std::string> part_names; |
36 | |
37 | while (!in.eof()) |
38 | { |
39 | part_names.emplace_back(); |
40 | IMergeSelector::Part part; |
41 | in >> part.size >> "\t" >> part.age >> "\t" >> part.level >> "\t" >> part_names.back() >> "\n" ; |
42 | part.data = part_names.back().data(); |
43 | // part.level = 0; |
44 | parts.emplace_back(part); |
45 | sum_parts_size += part.size; |
46 | } |
47 | |
48 | size_t total_size_merged = 0; |
49 | size_t sum_size_written = sum_parts_size; |
50 | size_t num_merges = 1; |
51 | size_t age_passed = 0; |
52 | |
53 | while (parts.size() > 1) |
54 | { |
55 | IMergeSelector::PartsInPartition selected_parts = selector.select(partitions, 100ULL * 1024 * 1024 * 1024); |
56 | |
57 | if (selected_parts.empty()) |
58 | { |
59 | ++age_passed; |
60 | for (auto & part : parts) |
61 | ++part.age; |
62 | |
63 | if (age_passed > 60 * 86400) |
64 | break; |
65 | |
66 | if (age_passed % 86400 == 0) |
67 | std::cout << "." ; |
68 | |
69 | continue; |
70 | } |
71 | std::cout << "Time passed: " << age_passed << '\n'; |
72 | |
73 | size_t sum_merged_size = 0; |
74 | size_t start_index = 0; |
75 | size_t max_level = 0; |
76 | bool in_range = false; |
77 | |
78 | for (size_t i = 0, size = parts.size(); i < size; ++i) |
79 | { |
80 | if (parts[i].data == selected_parts.front().data) |
81 | { |
82 | std::cout << "\033[1;31m" ; |
83 | in_range = true; |
84 | start_index = i; |
85 | } |
86 | |
87 | std::cout << (parts[i].size / 1024) << "_" << parts[i].level; |
88 | if (in_range) |
89 | { |
90 | sum_merged_size += parts[i].size; |
91 | if (parts[i].level > max_level) |
92 | max_level = parts[i].level; |
93 | } |
94 | |
95 | if (parts[i].data == selected_parts.back().data) |
96 | { |
97 | in_range = false; |
98 | std::cout << "\033[0m" ; |
99 | } |
100 | |
101 | std::cout << " " ; |
102 | } |
103 | |
104 | parts[start_index].size = sum_merged_size; |
105 | parts[start_index].level = max_level + 1; |
106 | parts[start_index].age = 0; |
107 | parts.erase(parts.begin() + start_index + 1, parts.begin() + start_index + selected_parts.size()); |
108 | |
109 | std::cout << '\n'; |
110 | |
111 | sum_size_written += sum_merged_size; |
112 | total_size_merged += sum_merged_size; |
113 | |
114 | ++num_merges; |
115 | |
116 | double time_to_merge = sum_merged_size / (1048576 * 10.0); |
117 | |
118 | age_passed += time_to_merge; |
119 | for (auto & part : parts) |
120 | part.age += time_to_merge; |
121 | |
122 | std::cout << "Time passed: " << age_passed << ", num parts: " << parts.size() |
123 | << ", merged " << selected_parts.size() << " parts, " << formatReadableSizeWithBinarySuffix(sum_merged_size) |
124 | << ", total written: " << formatReadableSizeWithBinarySuffix(total_size_merged) << '\n'; |
125 | } |
126 | |
127 | std::cout << std::fixed << std::setprecision(2) |
128 | << "Write amplification: " << static_cast<double>(sum_size_written) / sum_parts_size << "\n" |
129 | << "Num parts: " << part_names.size() << "\n" |
130 | << "Num merges: " << num_merges << "\n" |
131 | << "Tree depth: " << parts.front().level << "\n" |
132 | ; |
133 | |
134 | return 0; |
135 | } |
136 | |