1 | #include <iostream> |
---|---|
2 | #include <IO/ReadBufferFromFileDescriptor.h> |
3 | #include <IO/ReadHelpers.h> |
4 | #include <Storages/MergeTree/SimpleMergeSelector.h> |
5 | #include <Storages/MergeTree/LevelMergeSelector.h> |
6 | |
7 | |
8 | /** This program tests merge-selecting algorithm. |
9 | * Usage: |
10 | * ./merge_selector <<< "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20" |
11 | * clickhouse-client --query="SELECT 100 + round(10 * rand() / 0xFFFFFFFF) FROM system.numbers LIMIT 105" | tr '\n' ' ' | ./merge_selector |
12 | */ |
13 | |
14 | int main(int, char **) |
15 | { |
16 | using namespace DB; |
17 | |
18 | IMergeSelector::Partitions partitions(1); |
19 | IMergeSelector::PartsInPartition & parts = partitions.back(); |
20 | |
21 | SimpleMergeSelector::Settings settings; |
22 | // settings.base = 2; |
23 | // settings.max_parts_to_merge_at_once = 10; |
24 | SimpleMergeSelector selector(settings); |
25 | |
26 | /* LevelMergeSelector::Settings settings; |
27 | settings.min_parts_to_merge = 8; |
28 | settings.max_parts_to_merge = 16; |
29 | LevelMergeSelector selector(settings);*/ |
30 | |
31 | ReadBufferFromFileDescriptor in(STDIN_FILENO); |
32 | |
33 | size_t sum_parts_size = 0; |
34 | |
35 | while (!in.eof()) |
36 | { |
37 | size_t size = 0; |
38 | readText(size, in); |
39 | skipWhitespaceIfAny(in); |
40 | |
41 | IMergeSelector::Part part; |
42 | part.size = size; |
43 | part.age = 0; |
44 | part.level = 0; |
45 | part.data = reinterpret_cast<const void *>(parts.size()); |
46 | |
47 | parts.emplace_back(part); |
48 | |
49 | sum_parts_size += size; |
50 | } |
51 | |
52 | size_t sum_size_written = sum_parts_size; |
53 | size_t num_merges = 1; |
54 | |
55 | while (parts.size() > 1) |
56 | { |
57 | IMergeSelector::PartsInPartition selected_parts = selector.select(partitions, 0); |
58 | |
59 | if (selected_parts.empty()) |
60 | { |
61 | std::cout << '.'; |
62 | for (auto & part : parts) |
63 | ++part.age; |
64 | continue; |
65 | } |
66 | std::cout << '\n'; |
67 | |
68 | size_t sum_merged_size = 0; |
69 | size_t start_index = 0; |
70 | size_t max_level = 0; |
71 | bool in_range = false; |
72 | |
73 | for (size_t i = 0, size = parts.size(); i < size; ++i) |
74 | { |
75 | if (parts[i].data == selected_parts.front().data) |
76 | { |
77 | std::cout << "\033[1;31m"; |
78 | in_range = true; |
79 | start_index = i; |
80 | } |
81 | |
82 | std::cout << parts[i].size; |
83 | if (in_range) |
84 | { |
85 | sum_merged_size += parts[i].size; |
86 | if (parts[i].level > max_level) |
87 | max_level = parts[i].level; |
88 | } |
89 | |
90 | if (parts[i].data == selected_parts.back().data) |
91 | { |
92 | in_range = false; |
93 | std::cout << "\033[0m"; |
94 | } |
95 | |
96 | std::cout << " "; |
97 | } |
98 | |
99 | parts[start_index].size = sum_merged_size; |
100 | parts[start_index].level = max_level + 1; |
101 | parts[start_index].age = 0; |
102 | parts.erase(parts.begin() + start_index + 1, parts.begin() + start_index + selected_parts.size()); |
103 | |
104 | std::cout << '\n'; |
105 | |
106 | sum_size_written += sum_merged_size; |
107 | ++num_merges; |
108 | } |
109 | |
110 | std::cout << std::fixed << std::setprecision(2) |
111 | << "Write amplification: "<< static_cast<double>(sum_size_written) / sum_parts_size << "\n" |
112 | << "Num merges: "<< num_merges << "\n" |
113 | << "Tree depth: "<< parts.front().level << "\n" |
114 | ; |
115 | |
116 | return 0; |
117 | } |
118 |