MergeTreePartition.cpp source code [ClickHouse/dbms/src/Storages/MergeTree/MergeTreePartition.cpp]

1	#include <Storages/MergeTree/MergeTreePartition.h>
2	#include <Storages/MergeTree/MergeTreeData.h>
3	#include <Storages/MergeTree/MergeTreeDataPart.h>
4	#include <IO/ReadBufferFromFile.h>
5	#include <IO/HashingWriteBuffer.h>
6	#include <Common/FieldVisitors.h>
7	#include <DataTypes/DataTypeDate.h>
8	#include <DataTypes/DataTypeTuple.h>
9	#include <Columns/ColumnTuple.h>
10	#include <Common/SipHash.h>
11	#include <Common/typeid_cast.h>
12	#include <Common/hex.h>
13	#include <Core/Block.h>
14
15	#include <Poco/File.h>
16
17	namespace DB
18	{
19
20	static ReadBufferFromFile openForReading(const String & path)
21	{
22	return ReadBufferFromFile (path, std::min(static_cast<Poco::File::FileSize>(DBMS_DEFAULT_BUFFER_SIZE), Poco::File (path).getSize()));
23	}
24
25	String MergeTreePartition::getID(const MergeTreeData & storage) const
26	{
27	return getID(storage.partition_key_sample);
28	}
29
30	/// NOTE: This ID is used to create part names which are then persisted in ZK and as directory names on the file system.
31	/// So if you want to change this method, be sure to guarantee compatibility with existing table data.
32	String MergeTreePartition::getID(const Block & partition_key_sample) const
33	{
34	if (value.size() != partition_key_sample.columns())
35	throw Exception ("Invalid partition key size: " + toString(value.size()), ErrorCodes::LOGICAL_ERROR);
36
37	if (value.empty())
38	return "all"; /// It is tempting to use an empty string here. But that would break directory structure in ZK.
39
40	/// In case all partition fields are represented by integral types, try to produce a human-readable ID.
41	/// Otherwise use a hex-encoded hash.
42	bool are_all_integral = true;
43	for (const Field & field : value)
44	{
45	if (field.getType() != Field::Types::UInt64 && field.getType() != Field::Types::Int64)
46	{
47	are_all_integral = false;
48	break;
49	}
50	}
51
52	String result;
53
54	if (are_all_integral)
55	{
56	FieldVisitorToString to_string_visitor;
57	for (size_t i = `0`; i < value.size(); ++i)
58	{
59	if (i > `0`)
60	result += `'-'`;
61
62	if (typeid_cast<const DataTypeDate *>(partition_key_sample.getByPosition(i).type.get()))
63	result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum (value [i].safeGet<UInt64>())));
64	else
65	result += applyVisitor(to_string_visitor, value [i]);
66
67	/// It is tempting to output DateTime as YYYYMMDDhhmmss, but that would make partition ID
68	/// timezone-dependent.
69	}
70
71	return result;
72	}
73
74	SipHash hash;
75	FieldVisitorHash hashing_visitor(hash);
76	for (const Field & field : value)
77	applyVisitor(hashing_visitor, field);
78
79	char hash_data[`16`];
80	hash.get128(hash_data);
81	result.resize(`32`);
82	for (size_t i = `0`; i < `16`; ++i)
83	writeHexByteLowercase(hash_data[i], &result [`2` * i]);
84
85	return result;
86	}
87
88	void MergeTreePartition::serializeText(const MergeTreeData & storage, WriteBuffer & out, const FormatSettings & format_settings) const
89	{
90	size_t key_size = storage.partition_key_sample.columns();
91
92	if (key_size == `0`)
93	{
94	writeCString("tuple()", out);
95	}
96	else if (key_size == `1`)
97	{
98	const DataTypePtr & type = storage.partition_key_sample.getByPosition(`0`).type;
99	auto column = type ->createColumn();
100	column ->insert(value [`0`]);
101	type ->serializeAsText(*column, `0`, out, format_settings);
102	}
103	else
104	{
105	DataTypes types;
106	Columns columns;
107	for (size_t i = `0`; i < key_size; ++i)
108	{
109	const auto & type = storage.partition_key_sample.getByPosition(i).type;
110	types.push_back(type);
111	auto column = type ->createColumn();
112	column ->insert(value [i]);
113	columns.push_back(std::move(column));
114	}
115
116	DataTypeTuple tuple_type(types);
117	auto tuple_column = ColumnTuple::create(columns);
118	tuple_type.serializeText(*tuple_column, `0`, out, format_settings);
119	}
120	}
121
122	void MergeTreePartition::load(const MergeTreeData & storage, const String & part_path)
123	{
124	if (!storage.partition_key_expr)
125	return;
126
127	ReadBufferFromFile file = openForReading(part_path + "partition.dat");
128	value.resize(storage.partition_key_sample.columns());
129	for (size_t i = `0`; i < storage.partition_key_sample.columns(); ++i)
130	storage.partition_key_sample.getByPosition(i).type ->deserializeBinary(value [i], file);
131	}
132
133	void MergeTreePartition::store(const MergeTreeData & storage, const String & part_path, MergeTreeDataPartChecksums & checksums) const
134	{
135	store(storage.partition_key_sample, part_path, checksums);
136	}
137
138	void MergeTreePartition::store(const Block & partition_key_sample, const String & part_path, MergeTreeDataPartChecksums & checksums) const
139	{
140	if (!partition_key_sample)
141	return;
142
143	WriteBufferFromFile out(part_path + "partition.dat");
144	HashingWriteBuffer out_hashing(out);
145	for (size_t i = `0`; i < value.size(); ++i)
146	partition_key_sample.getByPosition(i).type ->serializeBinary(value [i], out_hashing);
147	out_hashing.next();
148	checksums.files ["partition.dat"].file_size = out_hashing.count();
149	checksums.files ["partition.dat"].file_hash = out_hashing.getHash();
150	}
151
152	}
153

Browse the source code of ClickHouse/dbms/src/Storages/MergeTree/MergeTreePartition.cpp