| 1 | #pragma once |
| 2 | |
| 3 | #include <cstddef> |
| 4 | #include <ctime> |
| 5 | #include <vector> |
| 6 | #include <functional> |
| 7 | |
| 8 | |
| 9 | namespace DB |
| 10 | { |
| 11 | |
| 12 | /** Interface of algorithm to select data parts to merge |
| 13 | * (merge is also known as "compaction"). |
| 14 | * Following properties depend on it: |
| 15 | * |
| 16 | * 1. Number of data parts at some moment in time. |
| 17 | * If parts are merged frequently, then data will be represented by lower number of parts, in average, |
| 18 | * but with cost of higher write amplification. |
| 19 | * |
| 20 | * 2. Write amplification ratio: how much times, on average, source data was written |
| 21 | * (during initial writes and followed merges). |
| 22 | * |
| 23 | * Number of parallel merges are controlled outside of scope of this interface. |
| 24 | */ |
| 25 | class IMergeSelector |
| 26 | { |
| 27 | public: |
| 28 | /// Information about data part relevant to merge selecting strategy. |
| 29 | struct Part |
| 30 | { |
| 31 | /// Size of data part in bytes. |
| 32 | size_t size; |
| 33 | |
| 34 | /// How old this data part in seconds. |
| 35 | time_t age; |
| 36 | |
| 37 | /// Depth of tree of merges by which this part was created. New parts has zero level. |
| 38 | unsigned level; |
| 39 | |
| 40 | /// Opaque pointer to avoid dependencies (it is not possible to do forward declaration of typedef). |
| 41 | const void * data; |
| 42 | |
| 43 | /// Minimal time, when we need to delete some data from this part. |
| 44 | time_t min_ttl; |
| 45 | |
| 46 | /// Maximum time, when we will need to drop this part altogether because all rows in it are expired. |
| 47 | time_t max_ttl; |
| 48 | }; |
| 49 | |
| 50 | /// Parts are belong to partitions. Only parts within same partition could be merged. |
| 51 | using PartsInPartition = std::vector<Part>; |
| 52 | |
| 53 | /// Parts are in some specific order. Parts could be merged only in contiguous ranges. |
| 54 | using Partitions = std::vector<PartsInPartition>; |
| 55 | |
| 56 | /** Function could be called at any frequency and it must decide, should you do any merge at all. |
| 57 | * If better not to do any merge, it returns empty result. |
| 58 | */ |
| 59 | virtual PartsInPartition select( |
| 60 | const Partitions & partitions, |
| 61 | const size_t max_total_size_to_merge) = 0; |
| 62 | |
| 63 | virtual ~IMergeSelector() {} |
| 64 | }; |
| 65 | |
| 66 | } |
| 67 | |