| 1 | #include <list> |
| 2 | #include <iostream> |
| 3 | #include <IO/ReadBufferFromFileDescriptor.h> |
| 4 | #include <IO/Operators.h> |
| 5 | #include <Storages/MergeTree/SimpleMergeSelector.h> |
| 6 | #include <Storages/MergeTree/LevelMergeSelector.h> |
| 7 | #include <Common/formatReadable.h> |
| 8 | |
| 9 | |
| 10 | /** This program tests merge-selecting algorithm. |
| 11 | * Usage: |
| 12 | clickhouse-client --query=" |
| 13 | SELECT bytes, now() - modification_time, level, name |
| 14 | FROM system.parts |
| 15 | WHERE table = 'visits' AND active AND partition = '201610'" | ./merge_selector2 |
| 16 | */ |
| 17 | |
| 18 | int main(int, char **) |
| 19 | { |
| 20 | using namespace DB; |
| 21 | |
| 22 | IMergeSelector::Partitions partitions(1); |
| 23 | IMergeSelector::PartsInPartition & parts = partitions.back(); |
| 24 | |
| 25 | /* SimpleMergeSelector::Settings settings; |
| 26 | SimpleMergeSelector selector(settings);*/ |
| 27 | |
| 28 | LevelMergeSelector::Settings settings; |
| 29 | LevelMergeSelector selector(settings); |
| 30 | |
| 31 | ReadBufferFromFileDescriptor in(STDIN_FILENO); |
| 32 | |
| 33 | size_t sum_parts_size = 0; |
| 34 | |
| 35 | std::list<std::string> part_names; |
| 36 | |
| 37 | while (!in.eof()) |
| 38 | { |
| 39 | part_names.emplace_back(); |
| 40 | IMergeSelector::Part part; |
| 41 | in >> part.size >> "\t" >> part.age >> "\t" >> part.level >> "\t" >> part_names.back() >> "\n" ; |
| 42 | part.data = part_names.back().data(); |
| 43 | // part.level = 0; |
| 44 | parts.emplace_back(part); |
| 45 | sum_parts_size += part.size; |
| 46 | } |
| 47 | |
| 48 | size_t total_size_merged = 0; |
| 49 | size_t sum_size_written = sum_parts_size; |
| 50 | size_t num_merges = 1; |
| 51 | size_t age_passed = 0; |
| 52 | |
| 53 | while (parts.size() > 1) |
| 54 | { |
| 55 | IMergeSelector::PartsInPartition selected_parts = selector.select(partitions, 100ULL * 1024 * 1024 * 1024); |
| 56 | |
| 57 | if (selected_parts.empty()) |
| 58 | { |
| 59 | ++age_passed; |
| 60 | for (auto & part : parts) |
| 61 | ++part.age; |
| 62 | |
| 63 | if (age_passed > 60 * 86400) |
| 64 | break; |
| 65 | |
| 66 | if (age_passed % 86400 == 0) |
| 67 | std::cout << "." ; |
| 68 | |
| 69 | continue; |
| 70 | } |
| 71 | std::cout << "Time passed: " << age_passed << '\n'; |
| 72 | |
| 73 | size_t sum_merged_size = 0; |
| 74 | size_t start_index = 0; |
| 75 | size_t max_level = 0; |
| 76 | bool in_range = false; |
| 77 | |
| 78 | for (size_t i = 0, size = parts.size(); i < size; ++i) |
| 79 | { |
| 80 | if (parts[i].data == selected_parts.front().data) |
| 81 | { |
| 82 | std::cout << "\033[1;31m" ; |
| 83 | in_range = true; |
| 84 | start_index = i; |
| 85 | } |
| 86 | |
| 87 | std::cout << (parts[i].size / 1024) << "_" << parts[i].level; |
| 88 | if (in_range) |
| 89 | { |
| 90 | sum_merged_size += parts[i].size; |
| 91 | if (parts[i].level > max_level) |
| 92 | max_level = parts[i].level; |
| 93 | } |
| 94 | |
| 95 | if (parts[i].data == selected_parts.back().data) |
| 96 | { |
| 97 | in_range = false; |
| 98 | std::cout << "\033[0m" ; |
| 99 | } |
| 100 | |
| 101 | std::cout << " " ; |
| 102 | } |
| 103 | |
| 104 | parts[start_index].size = sum_merged_size; |
| 105 | parts[start_index].level = max_level + 1; |
| 106 | parts[start_index].age = 0; |
| 107 | parts.erase(parts.begin() + start_index + 1, parts.begin() + start_index + selected_parts.size()); |
| 108 | |
| 109 | std::cout << '\n'; |
| 110 | |
| 111 | sum_size_written += sum_merged_size; |
| 112 | total_size_merged += sum_merged_size; |
| 113 | |
| 114 | ++num_merges; |
| 115 | |
| 116 | double time_to_merge = sum_merged_size / (1048576 * 10.0); |
| 117 | |
| 118 | age_passed += time_to_merge; |
| 119 | for (auto & part : parts) |
| 120 | part.age += time_to_merge; |
| 121 | |
| 122 | std::cout << "Time passed: " << age_passed << ", num parts: " << parts.size() |
| 123 | << ", merged " << selected_parts.size() << " parts, " << formatReadableSizeWithBinarySuffix(sum_merged_size) |
| 124 | << ", total written: " << formatReadableSizeWithBinarySuffix(total_size_merged) << '\n'; |
| 125 | } |
| 126 | |
| 127 | std::cout << std::fixed << std::setprecision(2) |
| 128 | << "Write amplification: " << static_cast<double>(sum_size_written) / sum_parts_size << "\n" |
| 129 | << "Num parts: " << part_names.size() << "\n" |
| 130 | << "Num merges: " << num_merges << "\n" |
| 131 | << "Tree depth: " << parts.front().level << "\n" |
| 132 | ; |
| 133 | |
| 134 | return 0; |
| 135 | } |
| 136 | |