1 | #pragma once |
2 | |
3 | #include <Storages/MergeTree/MergeSelector.h> |
4 | |
5 | |
6 | namespace DB |
7 | { |
8 | |
9 | class SimpleMergeSelector : public IMergeSelector |
10 | { |
11 | public: |
12 | struct Settings |
13 | { |
14 | /// Zero means unlimited. |
15 | size_t max_parts_to_merge_at_once = 100; |
16 | |
17 | /** Minimum ratio of size of one part to all parts in set of parts to merge (for usual cases). |
18 | * For example, if all parts have equal size, it means, that at least 'base' number of parts should be merged. |
19 | * If parts has non-uniform sizes, then minimum number of parts to merge is effectively increased. |
20 | * This behaviour balances merge-tree workload. |
21 | * It called 'base', because merge-tree depth could be estimated as logarithm with that base. |
22 | * |
23 | * If base is higher - then tree gets more wide and narrow, lowering write amplification. |
24 | * If base is lower - then merges occurs more frequently, lowering number of parts in average. |
25 | * |
26 | * We need some balance between write amplification and number of parts. |
27 | */ |
28 | double base = 5; |
29 | |
30 | /** Base is lowered until 1 (effectively means "merge any two parts") depending on several variables: |
31 | * |
32 | * 1. Total number of parts in partition. If too many - then base is lowered. |
33 | * It means: when too many parts - do merges more urgently. |
34 | * |
35 | * 2. Minimum age of parts participating in merge. If higher age - then base is lowered. |
36 | * It means: do less wide merges only rarely. |
37 | * |
38 | * 3. Sum size of parts participating in merge. If higher - then more age is required to lower base. So, base is lowered slower. |
39 | * It means: for small parts, it's worth to merge faster, even not so wide or balanced. |
40 | * |
41 | * We have multivariative dependency. Let it be logarithmic of size and somewhat multi-linear by other variables, |
42 | * between some boundary points, and constant outside. |
43 | */ |
44 | |
45 | size_t min_size_to_lower_base = 1024 * 1024; |
46 | size_t max_size_to_lower_base = 100ULL * 1024 * 1024 * 1024; |
47 | |
48 | time_t min_age_to_lower_base_at_min_size = 10; |
49 | time_t min_age_to_lower_base_at_max_size = 10; |
50 | time_t max_age_to_lower_base_at_min_size = 3600; |
51 | time_t max_age_to_lower_base_at_max_size = 30 * 86400; |
52 | |
53 | size_t min_parts_to_lower_base = 10; |
54 | size_t max_parts_to_lower_base = 50; |
55 | |
56 | /// Add this to size before all calculations. It means: merging even very small parts has it's fixed cost. |
57 | size_t size_fixed_cost_to_add = 5 * 1024 * 1024; |
58 | |
59 | /** Heuristic: |
60 | * Make some preference for ranges, that sum_size is like (in terms of ratio) to part previous at left. |
61 | */ |
62 | bool enable_heuristic_to_align_parts = true; |
63 | double heuristic_to_align_parts_min_ratio_of_sum_size_to_prev_part = 0.9; |
64 | double heuristic_to_align_parts_max_absolute_difference_in_powers_of_two = 0.5; |
65 | double heuristic_to_align_parts_max_score_adjustment = 0.75; |
66 | |
67 | /** Heuristic: |
68 | * From right side of range, remove all parts, that size is less than specified ratio of sum_size. |
69 | */ |
70 | bool enable_heuristic_to_remove_small_parts_at_right = true; |
71 | double heuristic_to_remove_small_parts_at_right_max_ratio = 0.01; |
72 | }; |
73 | |
74 | explicit SimpleMergeSelector(const Settings & settings_) : settings(settings_) {} |
75 | |
76 | PartsInPartition select( |
77 | const Partitions & partitions, |
78 | const size_t max_total_size_to_merge) override; |
79 | |
80 | private: |
81 | const Settings settings; |
82 | }; |
83 | |
84 | } |
85 | |