| 1 | #pragma once |
| 2 | |
| 3 | #include <Storages/MergeTree/MergeSelector.h> |
| 4 | |
| 5 | |
| 6 | namespace DB |
| 7 | { |
| 8 | |
| 9 | class SimpleMergeSelector : public IMergeSelector |
| 10 | { |
| 11 | public: |
| 12 | struct Settings |
| 13 | { |
| 14 | /// Zero means unlimited. |
| 15 | size_t max_parts_to_merge_at_once = 100; |
| 16 | |
| 17 | /** Minimum ratio of size of one part to all parts in set of parts to merge (for usual cases). |
| 18 | * For example, if all parts have equal size, it means, that at least 'base' number of parts should be merged. |
| 19 | * If parts has non-uniform sizes, then minimum number of parts to merge is effectively increased. |
| 20 | * This behaviour balances merge-tree workload. |
| 21 | * It called 'base', because merge-tree depth could be estimated as logarithm with that base. |
| 22 | * |
| 23 | * If base is higher - then tree gets more wide and narrow, lowering write amplification. |
| 24 | * If base is lower - then merges occurs more frequently, lowering number of parts in average. |
| 25 | * |
| 26 | * We need some balance between write amplification and number of parts. |
| 27 | */ |
| 28 | double base = 5; |
| 29 | |
| 30 | /** Base is lowered until 1 (effectively means "merge any two parts") depending on several variables: |
| 31 | * |
| 32 | * 1. Total number of parts in partition. If too many - then base is lowered. |
| 33 | * It means: when too many parts - do merges more urgently. |
| 34 | * |
| 35 | * 2. Minimum age of parts participating in merge. If higher age - then base is lowered. |
| 36 | * It means: do less wide merges only rarely. |
| 37 | * |
| 38 | * 3. Sum size of parts participating in merge. If higher - then more age is required to lower base. So, base is lowered slower. |
| 39 | * It means: for small parts, it's worth to merge faster, even not so wide or balanced. |
| 40 | * |
| 41 | * We have multivariative dependency. Let it be logarithmic of size and somewhat multi-linear by other variables, |
| 42 | * between some boundary points, and constant outside. |
| 43 | */ |
| 44 | |
| 45 | size_t min_size_to_lower_base = 1024 * 1024; |
| 46 | size_t max_size_to_lower_base = 100ULL * 1024 * 1024 * 1024; |
| 47 | |
| 48 | time_t min_age_to_lower_base_at_min_size = 10; |
| 49 | time_t min_age_to_lower_base_at_max_size = 10; |
| 50 | time_t max_age_to_lower_base_at_min_size = 3600; |
| 51 | time_t max_age_to_lower_base_at_max_size = 30 * 86400; |
| 52 | |
| 53 | size_t min_parts_to_lower_base = 10; |
| 54 | size_t max_parts_to_lower_base = 50; |
| 55 | |
| 56 | /// Add this to size before all calculations. It means: merging even very small parts has it's fixed cost. |
| 57 | size_t size_fixed_cost_to_add = 5 * 1024 * 1024; |
| 58 | |
| 59 | /** Heuristic: |
| 60 | * Make some preference for ranges, that sum_size is like (in terms of ratio) to part previous at left. |
| 61 | */ |
| 62 | bool enable_heuristic_to_align_parts = true; |
| 63 | double heuristic_to_align_parts_min_ratio_of_sum_size_to_prev_part = 0.9; |
| 64 | double heuristic_to_align_parts_max_absolute_difference_in_powers_of_two = 0.5; |
| 65 | double heuristic_to_align_parts_max_score_adjustment = 0.75; |
| 66 | |
| 67 | /** Heuristic: |
| 68 | * From right side of range, remove all parts, that size is less than specified ratio of sum_size. |
| 69 | */ |
| 70 | bool enable_heuristic_to_remove_small_parts_at_right = true; |
| 71 | double heuristic_to_remove_small_parts_at_right_max_ratio = 0.01; |
| 72 | }; |
| 73 | |
| 74 | explicit SimpleMergeSelector(const Settings & settings_) : settings(settings_) {} |
| 75 | |
| 76 | PartsInPartition select( |
| 77 | const Partitions & partitions, |
| 78 | const size_t max_total_size_to_merge) override; |
| 79 | |
| 80 | private: |
| 81 | const Settings settings; |
| 82 | }; |
| 83 | |
| 84 | } |
| 85 | |