1#pragma once
2
3#include <Storages/MergeTree/MergeSelector.h>
4
5
6namespace DB
7{
8
9class SimpleMergeSelector : public IMergeSelector
10{
11public:
12 struct Settings
13 {
14 /// Zero means unlimited.
15 size_t max_parts_to_merge_at_once = 100;
16
17 /** Minimum ratio of size of one part to all parts in set of parts to merge (for usual cases).
18 * For example, if all parts have equal size, it means, that at least 'base' number of parts should be merged.
19 * If parts has non-uniform sizes, then minimum number of parts to merge is effectively increased.
20 * This behaviour balances merge-tree workload.
21 * It called 'base', because merge-tree depth could be estimated as logarithm with that base.
22 *
23 * If base is higher - then tree gets more wide and narrow, lowering write amplification.
24 * If base is lower - then merges occurs more frequently, lowering number of parts in average.
25 *
26 * We need some balance between write amplification and number of parts.
27 */
28 double base = 5;
29
30 /** Base is lowered until 1 (effectively means "merge any two parts") depending on several variables:
31 *
32 * 1. Total number of parts in partition. If too many - then base is lowered.
33 * It means: when too many parts - do merges more urgently.
34 *
35 * 2. Minimum age of parts participating in merge. If higher age - then base is lowered.
36 * It means: do less wide merges only rarely.
37 *
38 * 3. Sum size of parts participating in merge. If higher - then more age is required to lower base. So, base is lowered slower.
39 * It means: for small parts, it's worth to merge faster, even not so wide or balanced.
40 *
41 * We have multivariative dependency. Let it be logarithmic of size and somewhat multi-linear by other variables,
42 * between some boundary points, and constant outside.
43 */
44
45 size_t min_size_to_lower_base = 1024 * 1024;
46 size_t max_size_to_lower_base = 100ULL * 1024 * 1024 * 1024;
47
48 time_t min_age_to_lower_base_at_min_size = 10;
49 time_t min_age_to_lower_base_at_max_size = 10;
50 time_t max_age_to_lower_base_at_min_size = 3600;
51 time_t max_age_to_lower_base_at_max_size = 30 * 86400;
52
53 size_t min_parts_to_lower_base = 10;
54 size_t max_parts_to_lower_base = 50;
55
56 /// Add this to size before all calculations. It means: merging even very small parts has it's fixed cost.
57 size_t size_fixed_cost_to_add = 5 * 1024 * 1024;
58
59 /** Heuristic:
60 * Make some preference for ranges, that sum_size is like (in terms of ratio) to part previous at left.
61 */
62 bool enable_heuristic_to_align_parts = true;
63 double heuristic_to_align_parts_min_ratio_of_sum_size_to_prev_part = 0.9;
64 double heuristic_to_align_parts_max_absolute_difference_in_powers_of_two = 0.5;
65 double heuristic_to_align_parts_max_score_adjustment = 0.75;
66
67 /** Heuristic:
68 * From right side of range, remove all parts, that size is less than specified ratio of sum_size.
69 */
70 bool enable_heuristic_to_remove_small_parts_at_right = true;
71 double heuristic_to_remove_small_parts_at_right_max_ratio = 0.01;
72 };
73
74 explicit SimpleMergeSelector(const Settings & settings_) : settings(settings_) {}
75
76 PartsInPartition select(
77 const Partitions & partitions,
78 const size_t max_total_size_to_merge) override;
79
80private:
81 const Settings settings;
82};
83
84}
85