| 1 | #include <Storages/MergeTree/LevelMergeSelector.h> | 
|---|
| 2 |  | 
|---|
| 3 | #include <cmath> | 
|---|
| 4 |  | 
|---|
| 5 |  | 
|---|
| 6 | namespace DB | 
|---|
| 7 | { | 
|---|
| 8 |  | 
|---|
| 9 | namespace | 
|---|
| 10 | { | 
|---|
| 11 |  | 
|---|
| 12 | /** Estimates best set of parts to merge within passed alternatives. | 
|---|
| 13 | * It is selected simply: by minimal size. | 
|---|
| 14 | */ | 
|---|
| 15 | struct Estimator | 
|---|
| 16 | { | 
|---|
| 17 | using Iterator = LevelMergeSelector::PartsInPartition::const_iterator; | 
|---|
| 18 |  | 
|---|
| 19 | void consider(Iterator begin, Iterator end, size_t sum_size) | 
|---|
| 20 | { | 
|---|
| 21 | double current_score = sum_size; | 
|---|
| 22 |  | 
|---|
| 23 | if (!min_score || current_score < min_score) | 
|---|
| 24 | { | 
|---|
| 25 | min_score = current_score; | 
|---|
| 26 | best_begin = begin; | 
|---|
| 27 | best_end = end; | 
|---|
| 28 | } | 
|---|
| 29 | } | 
|---|
| 30 |  | 
|---|
| 31 | LevelMergeSelector::PartsInPartition getBest() | 
|---|
| 32 | { | 
|---|
| 33 | return LevelMergeSelector::PartsInPartition(best_begin, best_end); | 
|---|
| 34 | } | 
|---|
| 35 |  | 
|---|
| 36 | double min_score = 0; | 
|---|
| 37 | Iterator best_begin {}; | 
|---|
| 38 | Iterator best_end {}; | 
|---|
| 39 | }; | 
|---|
| 40 |  | 
|---|
| 41 |  | 
|---|
| 42 | void selectWithinPartition( | 
|---|
| 43 | const LevelMergeSelector::PartsInPartition & parts, | 
|---|
| 44 | const size_t max_total_size_to_merge, | 
|---|
| 45 | Estimator & estimator, | 
|---|
| 46 | const LevelMergeSelector::Settings & settings) | 
|---|
| 47 | { | 
|---|
| 48 | size_t parts_size = parts.size(); | 
|---|
| 49 | if (parts_size < settings.parts_to_merge) | 
|---|
| 50 | return; | 
|---|
| 51 |  | 
|---|
| 52 | /// To easily calculate sum size in any range. | 
|---|
| 53 | size_t parts_count = parts.size(); | 
|---|
| 54 | size_t prefix_sum = 0; | 
|---|
| 55 | std::vector<size_t> prefix_sums(parts.size() + 1); | 
|---|
| 56 |  | 
|---|
| 57 | for (size_t i = 0; i < parts_count; ++i) | 
|---|
| 58 | { | 
|---|
| 59 | prefix_sum += parts[i].size; | 
|---|
| 60 | prefix_sums[i + 1] = prefix_sum; | 
|---|
| 61 | } | 
|---|
| 62 |  | 
|---|
| 63 | /// Use "corrected" level. It will be non-decreasing while traversing parts right to left. | 
|---|
| 64 | /// This is done for compatibility with another algorithms. | 
|---|
| 65 | size_t corrected_level_at_left = 0; | 
|---|
| 66 | size_t corrected_level_at_right = 0; | 
|---|
| 67 |  | 
|---|
| 68 | size_t range_end = parts_size; | 
|---|
| 69 | size_t range_begin = range_end - settings.parts_to_merge; | 
|---|
| 70 |  | 
|---|
| 71 | for (size_t i = range_begin; i < range_end; ++i) | 
|---|
| 72 | if (corrected_level_at_left < parts[i].level) | 
|---|
| 73 | corrected_level_at_left = parts[i].level; | 
|---|
| 74 |  | 
|---|
| 75 | while (true) | 
|---|
| 76 | { | 
|---|
| 77 | if (corrected_level_at_left < parts[range_begin].level) | 
|---|
| 78 | corrected_level_at_left = parts[range_begin].level; | 
|---|
| 79 |  | 
|---|
| 80 | if (corrected_level_at_right < parts[range_end - 1].level) | 
|---|
| 81 | corrected_level_at_right = parts[range_end - 1].level; | 
|---|
| 82 |  | 
|---|
| 83 | /// Leftmost range of same corrected level. | 
|---|
| 84 | if (corrected_level_at_left == corrected_level_at_right | 
|---|
| 85 | && (range_begin == 0 || parts[range_begin - 1].level > corrected_level_at_left)) | 
|---|
| 86 | { | 
|---|
| 87 | size_t range_size = prefix_sums[range_end] - prefix_sums[range_begin]; | 
|---|
| 88 |  | 
|---|
| 89 | if (range_size <= max_total_size_to_merge) | 
|---|
| 90 | estimator.consider(parts.begin() + range_begin, parts.begin() + range_end, range_size); | 
|---|
| 91 |  | 
|---|
| 92 | break;    /// Minumum level is enough. | 
|---|
| 93 | } | 
|---|
| 94 |  | 
|---|
| 95 | if (range_begin == 0) | 
|---|
| 96 | break; | 
|---|
| 97 |  | 
|---|
| 98 | --range_begin; | 
|---|
| 99 | --range_end; | 
|---|
| 100 | } | 
|---|
| 101 | } | 
|---|
| 102 |  | 
|---|
| 103 | } | 
|---|
| 104 |  | 
|---|
| 105 |  | 
|---|
| 106 | LevelMergeSelector::PartsInPartition LevelMergeSelector::select( | 
|---|
| 107 | const Partitions & partitions, | 
|---|
| 108 | const size_t max_total_size_to_merge) | 
|---|
| 109 | { | 
|---|
| 110 | Estimator estimator; | 
|---|
| 111 |  | 
|---|
| 112 | for (const auto & partition : partitions) | 
|---|
| 113 | selectWithinPartition(partition, max_total_size_to_merge, estimator, settings); | 
|---|
| 114 |  | 
|---|
| 115 | return estimator.getBest(); | 
|---|
| 116 | } | 
|---|
| 117 |  | 
|---|
| 118 | } | 
|---|
| 119 |  | 
|---|