1#pragma once
2
3#include <cstddef>
4#include <ctime>
5#include <vector>
6#include <functional>
7
8
9namespace DB
10{
11
12/** Interface of algorithm to select data parts to merge
13 * (merge is also known as "compaction").
14 * Following properties depend on it:
15 *
16 * 1. Number of data parts at some moment in time.
17 * If parts are merged frequently, then data will be represented by lower number of parts, in average,
18 * but with cost of higher write amplification.
19 *
20 * 2. Write amplification ratio: how much times, on average, source data was written
21 * (during initial writes and followed merges).
22 *
23 * Number of parallel merges are controlled outside of scope of this interface.
24 */
25class IMergeSelector
26{
27public:
28 /// Information about data part relevant to merge selecting strategy.
29 struct Part
30 {
31 /// Size of data part in bytes.
32 size_t size;
33
34 /// How old this data part in seconds.
35 time_t age;
36
37 /// Depth of tree of merges by which this part was created. New parts has zero level.
38 unsigned level;
39
40 /// Opaque pointer to avoid dependencies (it is not possible to do forward declaration of typedef).
41 const void * data;
42
43 /// Minimal time, when we need to delete some data from this part.
44 time_t min_ttl;
45
46 /// Maximum time, when we will need to drop this part altogether because all rows in it are expired.
47 time_t max_ttl;
48 };
49
50 /// Parts are belong to partitions. Only parts within same partition could be merged.
51 using PartsInPartition = std::vector<Part>;
52
53 /// Parts are in some specific order. Parts could be merged only in contiguous ranges.
54 using Partitions = std::vector<PartsInPartition>;
55
56 /** Function could be called at any frequency and it must decide, should you do any merge at all.
57 * If better not to do any merge, it returns empty result.
58 */
59 virtual PartsInPartition select(
60 const Partitions & partitions,
61 const size_t max_total_size_to_merge) = 0;
62
63 virtual ~IMergeSelector() {}
64};
65
66}
67