| 1 | #include <iostream> |
|---|---|
| 2 | #include <IO/ReadBufferFromFileDescriptor.h> |
| 3 | #include <IO/ReadHelpers.h> |
| 4 | #include <Storages/MergeTree/SimpleMergeSelector.h> |
| 5 | #include <Storages/MergeTree/LevelMergeSelector.h> |
| 6 | |
| 7 | |
| 8 | /** This program tests merge-selecting algorithm. |
| 9 | * Usage: |
| 10 | * ./merge_selector <<< "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20" |
| 11 | * clickhouse-client --query="SELECT 100 + round(10 * rand() / 0xFFFFFFFF) FROM system.numbers LIMIT 105" | tr '\n' ' ' | ./merge_selector |
| 12 | */ |
| 13 | |
| 14 | int main(int, char **) |
| 15 | { |
| 16 | using namespace DB; |
| 17 | |
| 18 | IMergeSelector::Partitions partitions(1); |
| 19 | IMergeSelector::PartsInPartition & parts = partitions.back(); |
| 20 | |
| 21 | SimpleMergeSelector::Settings settings; |
| 22 | // settings.base = 2; |
| 23 | // settings.max_parts_to_merge_at_once = 10; |
| 24 | SimpleMergeSelector selector(settings); |
| 25 | |
| 26 | /* LevelMergeSelector::Settings settings; |
| 27 | settings.min_parts_to_merge = 8; |
| 28 | settings.max_parts_to_merge = 16; |
| 29 | LevelMergeSelector selector(settings);*/ |
| 30 | |
| 31 | ReadBufferFromFileDescriptor in(STDIN_FILENO); |
| 32 | |
| 33 | size_t sum_parts_size = 0; |
| 34 | |
| 35 | while (!in.eof()) |
| 36 | { |
| 37 | size_t size = 0; |
| 38 | readText(size, in); |
| 39 | skipWhitespaceIfAny(in); |
| 40 | |
| 41 | IMergeSelector::Part part; |
| 42 | part.size = size; |
| 43 | part.age = 0; |
| 44 | part.level = 0; |
| 45 | part.data = reinterpret_cast<const void *>(parts.size()); |
| 46 | |
| 47 | parts.emplace_back(part); |
| 48 | |
| 49 | sum_parts_size += size; |
| 50 | } |
| 51 | |
| 52 | size_t sum_size_written = sum_parts_size; |
| 53 | size_t num_merges = 1; |
| 54 | |
| 55 | while (parts.size() > 1) |
| 56 | { |
| 57 | IMergeSelector::PartsInPartition selected_parts = selector.select(partitions, 0); |
| 58 | |
| 59 | if (selected_parts.empty()) |
| 60 | { |
| 61 | std::cout << '.'; |
| 62 | for (auto & part : parts) |
| 63 | ++part.age; |
| 64 | continue; |
| 65 | } |
| 66 | std::cout << '\n'; |
| 67 | |
| 68 | size_t sum_merged_size = 0; |
| 69 | size_t start_index = 0; |
| 70 | size_t max_level = 0; |
| 71 | bool in_range = false; |
| 72 | |
| 73 | for (size_t i = 0, size = parts.size(); i < size; ++i) |
| 74 | { |
| 75 | if (parts[i].data == selected_parts.front().data) |
| 76 | { |
| 77 | std::cout << "\033[1;31m"; |
| 78 | in_range = true; |
| 79 | start_index = i; |
| 80 | } |
| 81 | |
| 82 | std::cout << parts[i].size; |
| 83 | if (in_range) |
| 84 | { |
| 85 | sum_merged_size += parts[i].size; |
| 86 | if (parts[i].level > max_level) |
| 87 | max_level = parts[i].level; |
| 88 | } |
| 89 | |
| 90 | if (parts[i].data == selected_parts.back().data) |
| 91 | { |
| 92 | in_range = false; |
| 93 | std::cout << "\033[0m"; |
| 94 | } |
| 95 | |
| 96 | std::cout << " "; |
| 97 | } |
| 98 | |
| 99 | parts[start_index].size = sum_merged_size; |
| 100 | parts[start_index].level = max_level + 1; |
| 101 | parts[start_index].age = 0; |
| 102 | parts.erase(parts.begin() + start_index + 1, parts.begin() + start_index + selected_parts.size()); |
| 103 | |
| 104 | std::cout << '\n'; |
| 105 | |
| 106 | sum_size_written += sum_merged_size; |
| 107 | ++num_merges; |
| 108 | } |
| 109 | |
| 110 | std::cout << std::fixed << std::setprecision(2) |
| 111 | << "Write amplification: "<< static_cast<double>(sum_size_written) / sum_parts_size << "\n" |
| 112 | << "Num merges: "<< num_merges << "\n" |
| 113 | << "Tree depth: "<< parts.front().level << "\n" |
| 114 | ; |
| 115 | |
| 116 | return 0; |
| 117 | } |
| 118 |