MergeTreeDataPart.h source code [ClickHouse/dbms/src/Storages/MergeTree/MergeTreeDataPart.h]

1	#pragma once
2
3	#include <Core/Block.h>
4	#include <Core/Types.h>
5	#include <Core/NamesAndTypes.h>
6	#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
7	#include <Storages/MergeTree/MergeTreeIndexGranularityInfo.h>
8	#include <Storages/MergeTree/MergeTreeIndices.h>
9	#include <Storages/MergeTree/MergeTreePartInfo.h>
10	#include <Storages/MergeTree/MergeTreePartition.h>
11	#include <Storages/MergeTree/MergeTreeDataPartChecksum.h>
12	#include <Storages/MergeTree/MergeTreeDataPartTTLInfo.h>
13	#include <Storages/MergeTree/KeyCondition.h>
14	#include <Columns/IColumn.h>
15
16	#include <Poco/Path.h>
17
18	#include <shared_mutex>
19
20
21	namespace DB
22	{
23
24	struct ColumnSize;
25	class MergeTreeData;
26	struct FutureMergedMutatedPart;
27
28
29	/// Description of the data part.
30	struct MergeTreeDataPart
31	{
32	using Checksums = MergeTreeDataPartChecksums;
33	using Checksum = MergeTreeDataPartChecksums::Checksum;
34
35	MergeTreeDataPart(const MergeTreeData & storage_, const DiskPtr & disk_, const String & name_, const MergeTreePartInfo & info_);
36
37	MergeTreeDataPart(MergeTreeData & storage_, const DiskPtr & disk_, const String & name_);
38
39	/// Returns the name of a column with minimum compressed size (as returned by getColumnSize()).
40	/// If no checksums are present returns the name of the first physically existing column.
41	String getColumnNameWithMinumumCompressedSize() const;
42
43	/// NOTE: Returns zeros if column files are not found in checksums.
44	/// NOTE: You must ensure that no ALTERs are in progress when calculating ColumnSizes.
45	/// (either by locking columns_lock, or by locking table structure).
46	ColumnSize getColumnSize(const String & name, const IDataType & type) const;
47
48	ColumnSize getTotalColumnsSize() const;
49
50	size_t getFileSizeOrZero(const String & file_name) const;
51
52	/// Returns full path to part dir
53	String getFullPath() const;
54
55	/// Returns part->name with prefixes like 'tmp_<name>'
56	String getNameWithPrefix() const;
57
58	/// Generate the new name for this part according to `new_part_info` and min/max dates from the old name.
59	/// This is useful when you want to change e.g. block numbers or the mutation version of the part.
60	String getNewName(const MergeTreePartInfo & new_part_info) const;
61
62	bool contains(const MergeTreeDataPart & other) const { return info.contains(other.info); }
63
64	/// If the partition key includes date column (a common case), these functions will return min and max values for this column.
65	DayNum getMinDate() const;
66	DayNum getMaxDate() const;
67
68	/// otherwise, if the partition key includes dateTime column (also a common case), these functions will return min and max values for this column.
69	time_t getMinTime() const;
70	time_t getMaxTime() const;
71
72	bool isEmpty() const { return rows_count == `0`; }
73
74	const MergeTreeData & storage;
75
76	DiskPtr disk;
77	String name;
78	MergeTreePartInfo info;
79
80	/// A directory path (relative to storage's path) where part data is actually stored
81	/// Examples: 'detached/tmp_fetch_<name>', 'tmp_<name>', '<name>'
82	mutable String relative_path;
83
84	size_t rows_count = `0`;
85	std::atomic<UInt64> bytes_on_disk {`0`}; /// 0 - if not counted;
86	/// Is used from several threads without locks (it is changed with ALTER).
87	/// May not contain size of checksums.txt and columns.txt
88	time_t modification_time = `0`;
89	/// When the part is removed from the working set. Changes once.
90	mutable std::atomic<time_t> remove_time { std::numeric_limits<time_t>::max() };
91
92	/// If true, the destructor will delete the directory with the part.
93	bool is_temp = false;
94
95	/// If true it means that there are no ZooKeeper node for this part, so it should be deleted only from filesystem
96	bool is_duplicate = false;
97
98	/// Frozen by ALTER TABLE ... FREEZE ... It is used for information purposes in system.parts table.
99	mutable std::atomic<bool> is_frozen {false};
100
101	/**
102	* Part state is a stage of its lifetime. States are ordered and state of a part could be increased only.
103	* Part state should be modified under data_parts mutex.
104	*
105	* Possible state transitions:
106	* Temporary -> Precommitted: we are trying to commit a fetched, inserted or merged part to active set
107	* Precommitted -> Outdated: we could not to add a part to active set and doing a rollback (for example it is duplicated part)
108	* Precommitted -> Commited: we successfully committed a part to active dataset
109	* Precommitted -> Outdated: a part was replaced by a covering part or DROP PARTITION
110	* Outdated -> Deleting: a cleaner selected this part for deletion
111	* Deleting -> Outdated: if an ZooKeeper error occurred during the deletion, we will retry deletion
112	* Committed -> DeleteOnDestroy if part was moved to another disk
113	*/
114	enum class State
115	{
116	Temporary, /// the part is generating now, it is not in data_parts list
117	PreCommitted, /// the part is in data_parts, but not used for SELECTs
118	Committed, /// active data part, used by current and upcoming SELECTs
119	Outdated, /// not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes
120	Deleting, /// not active data part with identity refcounter, it is deleting right now by a cleaner
121	DeleteOnDestroy, /// part was moved to another disk and should be deleted in own destructor
122	};
123
124	using TTLInfo = MergeTreeDataPartTTLInfo;
125	using TTLInfos = MergeTreeDataPartTTLInfos;
126
127	TTLInfos ttl_infos;
128
129	/// Current state of the part. If the part is in working set already, it should be accessed via data_parts mutex
130	mutable State state{State::Temporary};
131
132	/// Returns name of state
133	static String stateToString(State state);
134	String stateString() const;
135
136	String getNameWithState() const
137	{
138	return name + " (state " + stateString() + ")";
139	}
140
141	/// Returns true if state of part is one of affordable_states
142	bool checkState(const std::initializer_list<State> & affordable_states) const
143	{
144	for (auto affordable_state : affordable_states)
145	{
146	if (state == affordable_state)
147	return true;
148	}
149	return false;
150	}
151
152	/// Throws an exception if state of the part is not in affordable_states
153	void assertState(const std::initializer_list<State> & affordable_states) const;
154
155	/// In comparison with lambdas, it is move assignable and could has several overloaded operator()
156	struct StatesFilter
157	{
158	std::initializer_list<State> affordable_states;
159	StatesFilter(const std::initializer_list<State> & affordable_states_) : affordable_states (affordable_states_) {}
160
161	bool operator() (const std::shared_ptr<const MergeTreeDataPart> & part) const
162	{
163	return part ->checkState(affordable_states);
164	}
165	};
166
167	/// Returns a lambda that returns true only for part with states from specified list
168	static inline StatesFilter getStatesFilter(const std::initializer_list<State> & affordable_states)
169	{
170	return StatesFilter (affordable_states);
171	}
172
173	/// Primary key (correspond to primary.idx file).
174	/// Always loaded in RAM. Contains each index_granularity-th value of primary key tuple.
175	/// Note that marks (also correspond to primary key) is not always in RAM, but cached. See MarkCache.h.
176	using Index = Columns;
177	Index index;
178
179	MergeTreePartition partition;
180
181	/// Amount of rows between marks
182	/// As index always loaded into memory
183	MergeTreeIndexGranularity index_granularity;
184
185	/// Index that for each part stores min and max values of a set of columns. This allows quickly excluding
186	/// parts based on conditions on these columns imposed by a query.
187	/// Currently this index is built using only columns required by partition expression, but in principle it
188	/// can be built using any set of columns.
189	struct MinMaxIndex
190	{
191	/// A direct product of ranges for each key column. See Storages/MergeTree/KeyCondition.cpp for details.
192	std::vector<Range> parallelogram;
193	bool initialized = false;
194
195	public:
196	MinMaxIndex() = default;
197
198	/// For month-based partitioning.
199	MinMaxIndex(DayNum min_date, DayNum max_date)
200	: parallelogram (`1`, Range (min_date, true, max_date, true))
201	, initialized(true)
202	{
203	}
204
205	void load(const MergeTreeData & storage, const String & part_path);
206	void store(const MergeTreeData & storage, const String & part_path, Checksums & checksums) const;
207	void store(const Names & column_names, const DataTypes & data_types, const String & part_path, Checksums & checksums) const;
208
209	void update(const Block & block, const Names & column_names);
210	void merge(const MinMaxIndex & other);
211	};
212
213	MinMaxIndex minmax_idx;
214
215	Checksums checksums;
216
217	/// Columns description.
218	NamesAndTypesList columns;
219
220	/// Columns with values, that all have been zeroed by expired ttl
221	NameSet empty_columns;
222
223	using ColumnToSize = std::map<std::string, UInt64>;
224
225	/* It is blocked for writing when changing columns, checksums or any part files.*
226	* Locked to read when reading columns, checksums or any part files.
227	*/
228	mutable std::shared_mutex columns_lock;
229
230	MergeTreeIndexGranularityInfo index_granularity_info;
231
232	~MergeTreeDataPart();
233
234	/// Calculate the total size of the entire directory with all the files
235	static UInt64 calculateTotalSizeOnDisk(const String & from);
236
237	void remove() const;
238
239	/// Makes checks and move part to new directory
240	/// Changes only relative_dir_name, you need to update other metadata (name, is_temp) explicitly
241	void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists = true) const;
242
243	/// Generate unique path to detach part
244	String getRelativePathForDetachedPart(const String & prefix) const;
245
246	/// Moves a part to detached/ directory and adds prefix to its name
247	void renameToDetached(const String & prefix) const;
248
249	/// Makes clone of a part in detached/ directory via hard links
250	void makeCloneInDetached(const String & prefix) const;
251
252	/// Makes full clone of part in detached/ on another disk
253	void makeCloneOnDiskDetached(const ReservationPtr & reservation) const;
254
255	/// Populates columns_to_size map (compressed size).
256	void accumulateColumnSizes(ColumnToSize & column_to_size) const;
257
258	/// Initialize columns (from columns.txt if exists, or create from column files if not).
259	/// Load checksums from checksums.txt if exists. Load index if required.
260	void loadColumnsChecksumsIndexes(bool require_columns_checksums, bool check_consistency);
261
262	/// Checks that .bin and .mrk files exist
263	bool hasColumnFiles(const String & column, const IDataType & type) const;
264
265	/// For data in RAM ('index')
266	UInt64 getIndexSizeInBytes() const;
267	UInt64 getIndexSizeInAllocatedBytes() const;
268	UInt64 getMarksCount() const;
269
270	private:
271	/// Reads columns names and types from columns.txt
272	void loadColumns(bool require);
273
274	/// If checksums.txt exists, reads files' checksums (and sizes) from it
275	void loadChecksums(bool require);
276
277	/// Loads marks index granularity into memory
278	void loadIndexGranularity();
279
280	/// Loads index file.
281	void loadIndex();
282
283	/// Load rows count for this part from disk (for the newer storage format version).
284	/// For the older format version calculates rows count from the size of a column with a fixed size.
285	void loadRowsCount();
286
287	/// Loads ttl infos in json format from file ttl.txt. If file doesn`t exists assigns ttl infos with all zeros
288	void loadTTLInfos();
289
290	void loadPartitionAndMinMaxIndex();
291
292	void checkConsistency(bool require_part_metadata);
293
294	ColumnSize getColumnSizeImpl(const String & name, const IDataType & type, std::unordered_set<String> * processed_substreams) const;
295	};
296
297
298	using MergeTreeDataPartState = MergeTreeDataPart::State;
299
300	}
301

Browse the source code of ClickHouse/dbms/src/Storages/MergeTree/MergeTreeDataPart.h