1#include <iostream>
2#include <IO/ReadBufferFromFileDescriptor.h>
3#include <IO/ReadHelpers.h>
4#include <Storages/MergeTree/SimpleMergeSelector.h>
5#include <Storages/MergeTree/LevelMergeSelector.h>
6
7
8/** This program tests merge-selecting algorithm.
9 * Usage:
10 * ./merge_selector <<< "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20"
11 * clickhouse-client --query="SELECT 100 + round(10 * rand() / 0xFFFFFFFF) FROM system.numbers LIMIT 105" | tr '\n' ' ' | ./merge_selector
12 */
13
14int main(int, char **)
15{
16 using namespace DB;
17
18 IMergeSelector::Partitions partitions(1);
19 IMergeSelector::PartsInPartition & parts = partitions.back();
20
21 SimpleMergeSelector::Settings settings;
22// settings.base = 2;
23// settings.max_parts_to_merge_at_once = 10;
24 SimpleMergeSelector selector(settings);
25
26/* LevelMergeSelector::Settings settings;
27 settings.min_parts_to_merge = 8;
28 settings.max_parts_to_merge = 16;
29 LevelMergeSelector selector(settings);*/
30
31 ReadBufferFromFileDescriptor in(STDIN_FILENO);
32
33 size_t sum_parts_size = 0;
34
35 while (!in.eof())
36 {
37 size_t size = 0;
38 readText(size, in);
39 skipWhitespaceIfAny(in);
40
41 IMergeSelector::Part part;
42 part.size = size;
43 part.age = 0;
44 part.level = 0;
45 part.data = reinterpret_cast<const void *>(parts.size());
46
47 parts.emplace_back(part);
48
49 sum_parts_size += size;
50 }
51
52 size_t sum_size_written = sum_parts_size;
53 size_t num_merges = 1;
54
55 while (parts.size() > 1)
56 {
57 IMergeSelector::PartsInPartition selected_parts = selector.select(partitions, 0);
58
59 if (selected_parts.empty())
60 {
61 std::cout << '.';
62 for (auto & part : parts)
63 ++part.age;
64 continue;
65 }
66 std::cout << '\n';
67
68 size_t sum_merged_size = 0;
69 size_t start_index = 0;
70 size_t max_level = 0;
71 bool in_range = false;
72
73 for (size_t i = 0, size = parts.size(); i < size; ++i)
74 {
75 if (parts[i].data == selected_parts.front().data)
76 {
77 std::cout << "\033[1;31m";
78 in_range = true;
79 start_index = i;
80 }
81
82 std::cout << parts[i].size;
83 if (in_range)
84 {
85 sum_merged_size += parts[i].size;
86 if (parts[i].level > max_level)
87 max_level = parts[i].level;
88 }
89
90 if (parts[i].data == selected_parts.back().data)
91 {
92 in_range = false;
93 std::cout << "\033[0m";
94 }
95
96 std::cout << " ";
97 }
98
99 parts[start_index].size = sum_merged_size;
100 parts[start_index].level = max_level + 1;
101 parts[start_index].age = 0;
102 parts.erase(parts.begin() + start_index + 1, parts.begin() + start_index + selected_parts.size());
103
104 std::cout << '\n';
105
106 sum_size_written += sum_merged_size;
107 ++num_merges;
108 }
109
110 std::cout << std::fixed << std::setprecision(2)
111 << "Write amplification: " << static_cast<double>(sum_size_written) / sum_parts_size << "\n"
112 << "Num merges: " << num_merges << "\n"
113 << "Tree depth: " << parts.front().level << "\n"
114 ;
115
116 return 0;
117}
118