1 | #include <Storages/MergeTree/LevelMergeSelector.h> |
2 | |
3 | #include <cmath> |
4 | |
5 | |
6 | namespace DB |
7 | { |
8 | |
9 | namespace |
10 | { |
11 | |
12 | /** Estimates best set of parts to merge within passed alternatives. |
13 | * It is selected simply: by minimal size. |
14 | */ |
15 | struct Estimator |
16 | { |
17 | using Iterator = LevelMergeSelector::PartsInPartition::const_iterator; |
18 | |
19 | void consider(Iterator begin, Iterator end, size_t sum_size) |
20 | { |
21 | double current_score = sum_size; |
22 | |
23 | if (!min_score || current_score < min_score) |
24 | { |
25 | min_score = current_score; |
26 | best_begin = begin; |
27 | best_end = end; |
28 | } |
29 | } |
30 | |
31 | LevelMergeSelector::PartsInPartition getBest() |
32 | { |
33 | return LevelMergeSelector::PartsInPartition(best_begin, best_end); |
34 | } |
35 | |
36 | double min_score = 0; |
37 | Iterator best_begin {}; |
38 | Iterator best_end {}; |
39 | }; |
40 | |
41 | |
42 | void selectWithinPartition( |
43 | const LevelMergeSelector::PartsInPartition & parts, |
44 | const size_t max_total_size_to_merge, |
45 | Estimator & estimator, |
46 | const LevelMergeSelector::Settings & settings) |
47 | { |
48 | size_t parts_size = parts.size(); |
49 | if (parts_size < settings.parts_to_merge) |
50 | return; |
51 | |
52 | /// To easily calculate sum size in any range. |
53 | size_t parts_count = parts.size(); |
54 | size_t prefix_sum = 0; |
55 | std::vector<size_t> prefix_sums(parts.size() + 1); |
56 | |
57 | for (size_t i = 0; i < parts_count; ++i) |
58 | { |
59 | prefix_sum += parts[i].size; |
60 | prefix_sums[i + 1] = prefix_sum; |
61 | } |
62 | |
63 | /// Use "corrected" level. It will be non-decreasing while traversing parts right to left. |
64 | /// This is done for compatibility with another algorithms. |
65 | size_t corrected_level_at_left = 0; |
66 | size_t corrected_level_at_right = 0; |
67 | |
68 | size_t range_end = parts_size; |
69 | size_t range_begin = range_end - settings.parts_to_merge; |
70 | |
71 | for (size_t i = range_begin; i < range_end; ++i) |
72 | if (corrected_level_at_left < parts[i].level) |
73 | corrected_level_at_left = parts[i].level; |
74 | |
75 | while (true) |
76 | { |
77 | if (corrected_level_at_left < parts[range_begin].level) |
78 | corrected_level_at_left = parts[range_begin].level; |
79 | |
80 | if (corrected_level_at_right < parts[range_end - 1].level) |
81 | corrected_level_at_right = parts[range_end - 1].level; |
82 | |
83 | /// Leftmost range of same corrected level. |
84 | if (corrected_level_at_left == corrected_level_at_right |
85 | && (range_begin == 0 || parts[range_begin - 1].level > corrected_level_at_left)) |
86 | { |
87 | size_t range_size = prefix_sums[range_end] - prefix_sums[range_begin]; |
88 | |
89 | if (range_size <= max_total_size_to_merge) |
90 | estimator.consider(parts.begin() + range_begin, parts.begin() + range_end, range_size); |
91 | |
92 | break; /// Minumum level is enough. |
93 | } |
94 | |
95 | if (range_begin == 0) |
96 | break; |
97 | |
98 | --range_begin; |
99 | --range_end; |
100 | } |
101 | } |
102 | |
103 | } |
104 | |
105 | |
106 | LevelMergeSelector::PartsInPartition LevelMergeSelector::select( |
107 | const Partitions & partitions, |
108 | const size_t max_total_size_to_merge) |
109 | { |
110 | Estimator estimator; |
111 | |
112 | for (const auto & partition : partitions) |
113 | selectWithinPartition(partition, max_total_size_to_merge, estimator, settings); |
114 | |
115 | return estimator.getBest(); |
116 | } |
117 | |
118 | } |
119 | |