| 1 | #pragma once | 
|---|
| 2 |  | 
|---|
| 3 | #include <Storages/MergeTree/MergeSelector.h> | 
|---|
| 4 |  | 
|---|
| 5 |  | 
|---|
| 6 | namespace DB | 
|---|
| 7 | { | 
|---|
| 8 |  | 
|---|
| 9 | class SimpleMergeSelector : public IMergeSelector | 
|---|
| 10 | { | 
|---|
| 11 | public: | 
|---|
| 12 | struct Settings | 
|---|
| 13 | { | 
|---|
| 14 | /// Zero means unlimited. | 
|---|
| 15 | size_t max_parts_to_merge_at_once = 100; | 
|---|
| 16 |  | 
|---|
| 17 | /** Minimum ratio of size of one part to all parts in set of parts to merge (for usual cases). | 
|---|
| 18 | * For example, if all parts have equal size, it means, that at least 'base' number of parts should be merged. | 
|---|
| 19 | * If parts has non-uniform sizes, then minimum number of parts to merge is effectively increased. | 
|---|
| 20 | * This behaviour balances merge-tree workload. | 
|---|
| 21 | * It called 'base', because merge-tree depth could be estimated as logarithm with that base. | 
|---|
| 22 | * | 
|---|
| 23 | * If base is higher - then tree gets more wide and narrow, lowering write amplification. | 
|---|
| 24 | * If base is lower - then merges occurs more frequently, lowering number of parts in average. | 
|---|
| 25 | * | 
|---|
| 26 | * We need some balance between write amplification and number of parts. | 
|---|
| 27 | */ | 
|---|
| 28 | double base = 5; | 
|---|
| 29 |  | 
|---|
| 30 | /** Base is lowered until 1 (effectively means "merge any two parts") depending on several variables: | 
|---|
| 31 | * | 
|---|
| 32 | * 1. Total number of parts in partition. If too many - then base is lowered. | 
|---|
| 33 | * It means: when too many parts - do merges more urgently. | 
|---|
| 34 | * | 
|---|
| 35 | * 2. Minimum age of parts participating in merge. If higher age - then base is lowered. | 
|---|
| 36 | * It means: do less wide merges only rarely. | 
|---|
| 37 | * | 
|---|
| 38 | * 3. Sum size of parts participating in merge. If higher - then more age is required to lower base. So, base is lowered slower. | 
|---|
| 39 | * It means: for small parts, it's worth to merge faster, even not so wide or balanced. | 
|---|
| 40 | * | 
|---|
| 41 | * We have multivariative dependency. Let it be logarithmic of size and somewhat multi-linear by other variables, | 
|---|
| 42 | *  between some boundary points, and constant outside. | 
|---|
| 43 | */ | 
|---|
| 44 |  | 
|---|
| 45 | size_t min_size_to_lower_base = 1024 * 1024; | 
|---|
| 46 | size_t max_size_to_lower_base = 100ULL * 1024 * 1024 * 1024; | 
|---|
| 47 |  | 
|---|
| 48 | time_t min_age_to_lower_base_at_min_size = 10; | 
|---|
| 49 | time_t min_age_to_lower_base_at_max_size = 10; | 
|---|
| 50 | time_t max_age_to_lower_base_at_min_size = 3600; | 
|---|
| 51 | time_t max_age_to_lower_base_at_max_size = 30 * 86400; | 
|---|
| 52 |  | 
|---|
| 53 | size_t min_parts_to_lower_base = 10; | 
|---|
| 54 | size_t max_parts_to_lower_base = 50; | 
|---|
| 55 |  | 
|---|
| 56 | /// Add this to size before all calculations. It means: merging even very small parts has it's fixed cost. | 
|---|
| 57 | size_t size_fixed_cost_to_add = 5 * 1024 * 1024; | 
|---|
| 58 |  | 
|---|
| 59 | /** Heuristic: | 
|---|
| 60 | * Make some preference for ranges, that sum_size is like (in terms of ratio) to part previous at left. | 
|---|
| 61 | */ | 
|---|
| 62 | bool enable_heuristic_to_align_parts = true; | 
|---|
| 63 | double heuristic_to_align_parts_min_ratio_of_sum_size_to_prev_part = 0.9; | 
|---|
| 64 | double heuristic_to_align_parts_max_absolute_difference_in_powers_of_two = 0.5; | 
|---|
| 65 | double heuristic_to_align_parts_max_score_adjustment = 0.75; | 
|---|
| 66 |  | 
|---|
| 67 | /** Heuristic: | 
|---|
| 68 | * From right side of range, remove all parts, that size is less than specified ratio of sum_size. | 
|---|
| 69 | */ | 
|---|
| 70 | bool enable_heuristic_to_remove_small_parts_at_right = true; | 
|---|
| 71 | double heuristic_to_remove_small_parts_at_right_max_ratio = 0.01; | 
|---|
| 72 | }; | 
|---|
| 73 |  | 
|---|
| 74 | explicit SimpleMergeSelector(const Settings & settings_) : settings(settings_) {} | 
|---|
| 75 |  | 
|---|
| 76 | PartsInPartition select( | 
|---|
| 77 | const Partitions & partitions, | 
|---|
| 78 | const size_t max_total_size_to_merge) override; | 
|---|
| 79 |  | 
|---|
| 80 | private: | 
|---|
| 81 | const Settings settings; | 
|---|
| 82 | }; | 
|---|
| 83 |  | 
|---|
| 84 | } | 
|---|
| 85 |  | 
|---|