1#include <Storages/MergeTree/LevelMergeSelector.h>
2
3#include <cmath>
4
5
6namespace DB
7{
8
9namespace
10{
11
12/** Estimates best set of parts to merge within passed alternatives.
13 * It is selected simply: by minimal size.
14 */
15struct Estimator
16{
17 using Iterator = LevelMergeSelector::PartsInPartition::const_iterator;
18
19 void consider(Iterator begin, Iterator end, size_t sum_size)
20 {
21 double current_score = sum_size;
22
23 if (!min_score || current_score < min_score)
24 {
25 min_score = current_score;
26 best_begin = begin;
27 best_end = end;
28 }
29 }
30
31 LevelMergeSelector::PartsInPartition getBest()
32 {
33 return LevelMergeSelector::PartsInPartition(best_begin, best_end);
34 }
35
36 double min_score = 0;
37 Iterator best_begin {};
38 Iterator best_end {};
39};
40
41
42void selectWithinPartition(
43 const LevelMergeSelector::PartsInPartition & parts,
44 const size_t max_total_size_to_merge,
45 Estimator & estimator,
46 const LevelMergeSelector::Settings & settings)
47{
48 size_t parts_size = parts.size();
49 if (parts_size < settings.parts_to_merge)
50 return;
51
52 /// To easily calculate sum size in any range.
53 size_t parts_count = parts.size();
54 size_t prefix_sum = 0;
55 std::vector<size_t> prefix_sums(parts.size() + 1);
56
57 for (size_t i = 0; i < parts_count; ++i)
58 {
59 prefix_sum += parts[i].size;
60 prefix_sums[i + 1] = prefix_sum;
61 }
62
63 /// Use "corrected" level. It will be non-decreasing while traversing parts right to left.
64 /// This is done for compatibility with another algorithms.
65 size_t corrected_level_at_left = 0;
66 size_t corrected_level_at_right = 0;
67
68 size_t range_end = parts_size;
69 size_t range_begin = range_end - settings.parts_to_merge;
70
71 for (size_t i = range_begin; i < range_end; ++i)
72 if (corrected_level_at_left < parts[i].level)
73 corrected_level_at_left = parts[i].level;
74
75 while (true)
76 {
77 if (corrected_level_at_left < parts[range_begin].level)
78 corrected_level_at_left = parts[range_begin].level;
79
80 if (corrected_level_at_right < parts[range_end - 1].level)
81 corrected_level_at_right = parts[range_end - 1].level;
82
83 /// Leftmost range of same corrected level.
84 if (corrected_level_at_left == corrected_level_at_right
85 && (range_begin == 0 || parts[range_begin - 1].level > corrected_level_at_left))
86 {
87 size_t range_size = prefix_sums[range_end] - prefix_sums[range_begin];
88
89 if (range_size <= max_total_size_to_merge)
90 estimator.consider(parts.begin() + range_begin, parts.begin() + range_end, range_size);
91
92 break; /// Minumum level is enough.
93 }
94
95 if (range_begin == 0)
96 break;
97
98 --range_begin;
99 --range_end;
100 }
101}
102
103}
104
105
106LevelMergeSelector::PartsInPartition LevelMergeSelector::select(
107 const Partitions & partitions,
108 const size_t max_total_size_to_merge)
109{
110 Estimator estimator;
111
112 for (const auto & partition : partitions)
113 selectWithinPartition(partition, max_total_size_to_merge, estimator, settings);
114
115 return estimator.getBest();
116}
117
118}
119