1 | #pragma once |
2 | |
3 | #include <Core/Block.h> |
4 | #include <Core/Types.h> |
5 | #include <Core/NamesAndTypes.h> |
6 | #include <Storages/MergeTree/MergeTreeIndexGranularity.h> |
7 | #include <Storages/MergeTree/MergeTreeIndexGranularityInfo.h> |
8 | #include <Storages/MergeTree/MergeTreeIndices.h> |
9 | #include <Storages/MergeTree/MergeTreePartInfo.h> |
10 | #include <Storages/MergeTree/MergeTreePartition.h> |
11 | #include <Storages/MergeTree/MergeTreeDataPartChecksum.h> |
12 | #include <Storages/MergeTree/MergeTreeDataPartTTLInfo.h> |
13 | #include <Storages/MergeTree/KeyCondition.h> |
14 | #include <Columns/IColumn.h> |
15 | |
16 | #include <Poco/Path.h> |
17 | |
18 | #include <shared_mutex> |
19 | |
20 | |
21 | namespace DB |
22 | { |
23 | |
24 | struct ColumnSize; |
25 | class MergeTreeData; |
26 | struct FutureMergedMutatedPart; |
27 | |
28 | |
29 | /// Description of the data part. |
30 | struct MergeTreeDataPart |
31 | { |
32 | using Checksums = MergeTreeDataPartChecksums; |
33 | using Checksum = MergeTreeDataPartChecksums::Checksum; |
34 | |
35 | MergeTreeDataPart(const MergeTreeData & storage_, const DiskPtr & disk_, const String & name_, const MergeTreePartInfo & info_); |
36 | |
37 | MergeTreeDataPart(MergeTreeData & storage_, const DiskPtr & disk_, const String & name_); |
38 | |
39 | /// Returns the name of a column with minimum compressed size (as returned by getColumnSize()). |
40 | /// If no checksums are present returns the name of the first physically existing column. |
41 | String getColumnNameWithMinumumCompressedSize() const; |
42 | |
43 | /// NOTE: Returns zeros if column files are not found in checksums. |
44 | /// NOTE: You must ensure that no ALTERs are in progress when calculating ColumnSizes. |
45 | /// (either by locking columns_lock, or by locking table structure). |
46 | ColumnSize getColumnSize(const String & name, const IDataType & type) const; |
47 | |
48 | ColumnSize getTotalColumnsSize() const; |
49 | |
50 | size_t getFileSizeOrZero(const String & file_name) const; |
51 | |
52 | /// Returns full path to part dir |
53 | String getFullPath() const; |
54 | |
55 | /// Returns part->name with prefixes like 'tmp_<name>' |
56 | String getNameWithPrefix() const; |
57 | |
58 | /// Generate the new name for this part according to `new_part_info` and min/max dates from the old name. |
59 | /// This is useful when you want to change e.g. block numbers or the mutation version of the part. |
60 | String getNewName(const MergeTreePartInfo & new_part_info) const; |
61 | |
62 | bool contains(const MergeTreeDataPart & other) const { return info.contains(other.info); } |
63 | |
64 | /// If the partition key includes date column (a common case), these functions will return min and max values for this column. |
65 | DayNum getMinDate() const; |
66 | DayNum getMaxDate() const; |
67 | |
68 | /// otherwise, if the partition key includes dateTime column (also a common case), these functions will return min and max values for this column. |
69 | time_t getMinTime() const; |
70 | time_t getMaxTime() const; |
71 | |
72 | bool isEmpty() const { return rows_count == 0; } |
73 | |
74 | const MergeTreeData & storage; |
75 | |
76 | DiskPtr disk; |
77 | String name; |
78 | MergeTreePartInfo info; |
79 | |
80 | /// A directory path (relative to storage's path) where part data is actually stored |
81 | /// Examples: 'detached/tmp_fetch_<name>', 'tmp_<name>', '<name>' |
82 | mutable String relative_path; |
83 | |
84 | size_t rows_count = 0; |
85 | std::atomic<UInt64> bytes_on_disk {0}; /// 0 - if not counted; |
86 | /// Is used from several threads without locks (it is changed with ALTER). |
87 | /// May not contain size of checksums.txt and columns.txt |
88 | time_t modification_time = 0; |
89 | /// When the part is removed from the working set. Changes once. |
90 | mutable std::atomic<time_t> remove_time { std::numeric_limits<time_t>::max() }; |
91 | |
92 | /// If true, the destructor will delete the directory with the part. |
93 | bool is_temp = false; |
94 | |
95 | /// If true it means that there are no ZooKeeper node for this part, so it should be deleted only from filesystem |
96 | bool is_duplicate = false; |
97 | |
98 | /// Frozen by ALTER TABLE ... FREEZE ... It is used for information purposes in system.parts table. |
99 | mutable std::atomic<bool> is_frozen {false}; |
100 | |
101 | /** |
102 | * Part state is a stage of its lifetime. States are ordered and state of a part could be increased only. |
103 | * Part state should be modified under data_parts mutex. |
104 | * |
105 | * Possible state transitions: |
106 | * Temporary -> Precommitted: we are trying to commit a fetched, inserted or merged part to active set |
107 | * Precommitted -> Outdated: we could not to add a part to active set and doing a rollback (for example it is duplicated part) |
108 | * Precommitted -> Commited: we successfully committed a part to active dataset |
109 | * Precommitted -> Outdated: a part was replaced by a covering part or DROP PARTITION |
110 | * Outdated -> Deleting: a cleaner selected this part for deletion |
111 | * Deleting -> Outdated: if an ZooKeeper error occurred during the deletion, we will retry deletion |
112 | * Committed -> DeleteOnDestroy if part was moved to another disk |
113 | */ |
114 | enum class State |
115 | { |
116 | Temporary, /// the part is generating now, it is not in data_parts list |
117 | PreCommitted, /// the part is in data_parts, but not used for SELECTs |
118 | Committed, /// active data part, used by current and upcoming SELECTs |
119 | Outdated, /// not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes |
120 | Deleting, /// not active data part with identity refcounter, it is deleting right now by a cleaner |
121 | DeleteOnDestroy, /// part was moved to another disk and should be deleted in own destructor |
122 | }; |
123 | |
124 | using TTLInfo = MergeTreeDataPartTTLInfo; |
125 | using TTLInfos = MergeTreeDataPartTTLInfos; |
126 | |
127 | TTLInfos ttl_infos; |
128 | |
129 | /// Current state of the part. If the part is in working set already, it should be accessed via data_parts mutex |
130 | mutable State state{State::Temporary}; |
131 | |
132 | /// Returns name of state |
133 | static String stateToString(State state); |
134 | String stateString() const; |
135 | |
136 | String getNameWithState() const |
137 | { |
138 | return name + " (state " + stateString() + ")" ; |
139 | } |
140 | |
141 | /// Returns true if state of part is one of affordable_states |
142 | bool checkState(const std::initializer_list<State> & affordable_states) const |
143 | { |
144 | for (auto affordable_state : affordable_states) |
145 | { |
146 | if (state == affordable_state) |
147 | return true; |
148 | } |
149 | return false; |
150 | } |
151 | |
152 | /// Throws an exception if state of the part is not in affordable_states |
153 | void assertState(const std::initializer_list<State> & affordable_states) const; |
154 | |
155 | /// In comparison with lambdas, it is move assignable and could has several overloaded operator() |
156 | struct StatesFilter |
157 | { |
158 | std::initializer_list<State> affordable_states; |
159 | StatesFilter(const std::initializer_list<State> & affordable_states_) : affordable_states(affordable_states_) {} |
160 | |
161 | bool operator() (const std::shared_ptr<const MergeTreeDataPart> & part) const |
162 | { |
163 | return part->checkState(affordable_states); |
164 | } |
165 | }; |
166 | |
167 | /// Returns a lambda that returns true only for part with states from specified list |
168 | static inline StatesFilter getStatesFilter(const std::initializer_list<State> & affordable_states) |
169 | { |
170 | return StatesFilter(affordable_states); |
171 | } |
172 | |
173 | /// Primary key (correspond to primary.idx file). |
174 | /// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple. |
175 | /// Note that marks (also correspond to primary key) is not always in RAM, but cached. See MarkCache.h. |
176 | using Index = Columns; |
177 | Index index; |
178 | |
179 | MergeTreePartition partition; |
180 | |
181 | /// Amount of rows between marks |
182 | /// As index always loaded into memory |
183 | MergeTreeIndexGranularity index_granularity; |
184 | |
185 | /// Index that for each part stores min and max values of a set of columns. This allows quickly excluding |
186 | /// parts based on conditions on these columns imposed by a query. |
187 | /// Currently this index is built using only columns required by partition expression, but in principle it |
188 | /// can be built using any set of columns. |
189 | struct MinMaxIndex |
190 | { |
191 | /// A direct product of ranges for each key column. See Storages/MergeTree/KeyCondition.cpp for details. |
192 | std::vector<Range> parallelogram; |
193 | bool initialized = false; |
194 | |
195 | public: |
196 | MinMaxIndex() = default; |
197 | |
198 | /// For month-based partitioning. |
199 | MinMaxIndex(DayNum min_date, DayNum max_date) |
200 | : parallelogram(1, Range(min_date, true, max_date, true)) |
201 | , initialized(true) |
202 | { |
203 | } |
204 | |
205 | void load(const MergeTreeData & storage, const String & part_path); |
206 | void store(const MergeTreeData & storage, const String & part_path, Checksums & checksums) const; |
207 | void store(const Names & column_names, const DataTypes & data_types, const String & part_path, Checksums & checksums) const; |
208 | |
209 | void update(const Block & block, const Names & column_names); |
210 | void merge(const MinMaxIndex & other); |
211 | }; |
212 | |
213 | MinMaxIndex minmax_idx; |
214 | |
215 | Checksums checksums; |
216 | |
217 | /// Columns description. |
218 | NamesAndTypesList columns; |
219 | |
220 | /// Columns with values, that all have been zeroed by expired ttl |
221 | NameSet empty_columns; |
222 | |
223 | using ColumnToSize = std::map<std::string, UInt64>; |
224 | |
225 | /** It is blocked for writing when changing columns, checksums or any part files. |
226 | * Locked to read when reading columns, checksums or any part files. |
227 | */ |
228 | mutable std::shared_mutex columns_lock; |
229 | |
230 | MergeTreeIndexGranularityInfo index_granularity_info; |
231 | |
232 | ~MergeTreeDataPart(); |
233 | |
234 | /// Calculate the total size of the entire directory with all the files |
235 | static UInt64 calculateTotalSizeOnDisk(const String & from); |
236 | |
237 | void remove() const; |
238 | |
239 | /// Makes checks and move part to new directory |
240 | /// Changes only relative_dir_name, you need to update other metadata (name, is_temp) explicitly |
241 | void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists = true) const; |
242 | |
243 | /// Generate unique path to detach part |
244 | String getRelativePathForDetachedPart(const String & prefix) const; |
245 | |
246 | /// Moves a part to detached/ directory and adds prefix to its name |
247 | void renameToDetached(const String & prefix) const; |
248 | |
249 | /// Makes clone of a part in detached/ directory via hard links |
250 | void makeCloneInDetached(const String & prefix) const; |
251 | |
252 | /// Makes full clone of part in detached/ on another disk |
253 | void makeCloneOnDiskDetached(const ReservationPtr & reservation) const; |
254 | |
255 | /// Populates columns_to_size map (compressed size). |
256 | void accumulateColumnSizes(ColumnToSize & column_to_size) const; |
257 | |
258 | /// Initialize columns (from columns.txt if exists, or create from column files if not). |
259 | /// Load checksums from checksums.txt if exists. Load index if required. |
260 | void loadColumnsChecksumsIndexes(bool require_columns_checksums, bool check_consistency); |
261 | |
262 | /// Checks that .bin and .mrk files exist |
263 | bool hasColumnFiles(const String & column, const IDataType & type) const; |
264 | |
265 | /// For data in RAM ('index') |
266 | UInt64 getIndexSizeInBytes() const; |
267 | UInt64 getIndexSizeInAllocatedBytes() const; |
268 | UInt64 getMarksCount() const; |
269 | |
270 | private: |
271 | /// Reads columns names and types from columns.txt |
272 | void loadColumns(bool require); |
273 | |
274 | /// If checksums.txt exists, reads files' checksums (and sizes) from it |
275 | void loadChecksums(bool require); |
276 | |
277 | /// Loads marks index granularity into memory |
278 | void loadIndexGranularity(); |
279 | |
280 | /// Loads index file. |
281 | void loadIndex(); |
282 | |
283 | /// Load rows count for this part from disk (for the newer storage format version). |
284 | /// For the older format version calculates rows count from the size of a column with a fixed size. |
285 | void loadRowsCount(); |
286 | |
287 | /// Loads ttl infos in json format from file ttl.txt. If file doesn`t exists assigns ttl infos with all zeros |
288 | void loadTTLInfos(); |
289 | |
290 | void loadPartitionAndMinMaxIndex(); |
291 | |
292 | void checkConsistency(bool require_part_metadata); |
293 | |
294 | ColumnSize getColumnSizeImpl(const String & name, const IDataType & type, std::unordered_set<String> * processed_substreams) const; |
295 | }; |
296 | |
297 | |
298 | using MergeTreeDataPartState = MergeTreeDataPart::State; |
299 | |
300 | } |
301 | |