| 1 | #include <Storages/MergeTree/LevelMergeSelector.h> |
| 2 | |
| 3 | #include <cmath> |
| 4 | |
| 5 | |
| 6 | namespace DB |
| 7 | { |
| 8 | |
| 9 | namespace |
| 10 | { |
| 11 | |
| 12 | /** Estimates best set of parts to merge within passed alternatives. |
| 13 | * It is selected simply: by minimal size. |
| 14 | */ |
| 15 | struct Estimator |
| 16 | { |
| 17 | using Iterator = LevelMergeSelector::PartsInPartition::const_iterator; |
| 18 | |
| 19 | void consider(Iterator begin, Iterator end, size_t sum_size) |
| 20 | { |
| 21 | double current_score = sum_size; |
| 22 | |
| 23 | if (!min_score || current_score < min_score) |
| 24 | { |
| 25 | min_score = current_score; |
| 26 | best_begin = begin; |
| 27 | best_end = end; |
| 28 | } |
| 29 | } |
| 30 | |
| 31 | LevelMergeSelector::PartsInPartition getBest() |
| 32 | { |
| 33 | return LevelMergeSelector::PartsInPartition(best_begin, best_end); |
| 34 | } |
| 35 | |
| 36 | double min_score = 0; |
| 37 | Iterator best_begin {}; |
| 38 | Iterator best_end {}; |
| 39 | }; |
| 40 | |
| 41 | |
| 42 | void selectWithinPartition( |
| 43 | const LevelMergeSelector::PartsInPartition & parts, |
| 44 | const size_t max_total_size_to_merge, |
| 45 | Estimator & estimator, |
| 46 | const LevelMergeSelector::Settings & settings) |
| 47 | { |
| 48 | size_t parts_size = parts.size(); |
| 49 | if (parts_size < settings.parts_to_merge) |
| 50 | return; |
| 51 | |
| 52 | /// To easily calculate sum size in any range. |
| 53 | size_t parts_count = parts.size(); |
| 54 | size_t prefix_sum = 0; |
| 55 | std::vector<size_t> prefix_sums(parts.size() + 1); |
| 56 | |
| 57 | for (size_t i = 0; i < parts_count; ++i) |
| 58 | { |
| 59 | prefix_sum += parts[i].size; |
| 60 | prefix_sums[i + 1] = prefix_sum; |
| 61 | } |
| 62 | |
| 63 | /// Use "corrected" level. It will be non-decreasing while traversing parts right to left. |
| 64 | /// This is done for compatibility with another algorithms. |
| 65 | size_t corrected_level_at_left = 0; |
| 66 | size_t corrected_level_at_right = 0; |
| 67 | |
| 68 | size_t range_end = parts_size; |
| 69 | size_t range_begin = range_end - settings.parts_to_merge; |
| 70 | |
| 71 | for (size_t i = range_begin; i < range_end; ++i) |
| 72 | if (corrected_level_at_left < parts[i].level) |
| 73 | corrected_level_at_left = parts[i].level; |
| 74 | |
| 75 | while (true) |
| 76 | { |
| 77 | if (corrected_level_at_left < parts[range_begin].level) |
| 78 | corrected_level_at_left = parts[range_begin].level; |
| 79 | |
| 80 | if (corrected_level_at_right < parts[range_end - 1].level) |
| 81 | corrected_level_at_right = parts[range_end - 1].level; |
| 82 | |
| 83 | /// Leftmost range of same corrected level. |
| 84 | if (corrected_level_at_left == corrected_level_at_right |
| 85 | && (range_begin == 0 || parts[range_begin - 1].level > corrected_level_at_left)) |
| 86 | { |
| 87 | size_t range_size = prefix_sums[range_end] - prefix_sums[range_begin]; |
| 88 | |
| 89 | if (range_size <= max_total_size_to_merge) |
| 90 | estimator.consider(parts.begin() + range_begin, parts.begin() + range_end, range_size); |
| 91 | |
| 92 | break; /// Minumum level is enough. |
| 93 | } |
| 94 | |
| 95 | if (range_begin == 0) |
| 96 | break; |
| 97 | |
| 98 | --range_begin; |
| 99 | --range_end; |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | } |
| 104 | |
| 105 | |
| 106 | LevelMergeSelector::PartsInPartition LevelMergeSelector::select( |
| 107 | const Partitions & partitions, |
| 108 | const size_t max_total_size_to_merge) |
| 109 | { |
| 110 | Estimator estimator; |
| 111 | |
| 112 | for (const auto & partition : partitions) |
| 113 | selectWithinPartition(partition, max_total_size_to_merge, estimator, settings); |
| 114 | |
| 115 | return estimator.getBest(); |
| 116 | } |
| 117 | |
| 118 | } |
| 119 | |