1#include <Storages/MergeTree/MergeTreePartition.h>
2#include <Storages/MergeTree/MergeTreeData.h>
3#include <Storages/MergeTree/MergeTreeDataPart.h>
4#include <IO/ReadBufferFromFile.h>
5#include <IO/HashingWriteBuffer.h>
6#include <Common/FieldVisitors.h>
7#include <DataTypes/DataTypeDate.h>
8#include <DataTypes/DataTypeTuple.h>
9#include <Columns/ColumnTuple.h>
10#include <Common/SipHash.h>
11#include <Common/typeid_cast.h>
12#include <Common/hex.h>
13#include <Core/Block.h>
14
15#include <Poco/File.h>
16
17namespace DB
18{
19
20static ReadBufferFromFile openForReading(const String & path)
21{
22 return ReadBufferFromFile(path, std::min(static_cast<Poco::File::FileSize>(DBMS_DEFAULT_BUFFER_SIZE), Poco::File(path).getSize()));
23}
24
25String MergeTreePartition::getID(const MergeTreeData & storage) const
26{
27 return getID(storage.partition_key_sample);
28}
29
30/// NOTE: This ID is used to create part names which are then persisted in ZK and as directory names on the file system.
31/// So if you want to change this method, be sure to guarantee compatibility with existing table data.
32String MergeTreePartition::getID(const Block & partition_key_sample) const
33{
34 if (value.size() != partition_key_sample.columns())
35 throw Exception("Invalid partition key size: " + toString(value.size()), ErrorCodes::LOGICAL_ERROR);
36
37 if (value.empty())
38 return "all"; /// It is tempting to use an empty string here. But that would break directory structure in ZK.
39
40 /// In case all partition fields are represented by integral types, try to produce a human-readable ID.
41 /// Otherwise use a hex-encoded hash.
42 bool are_all_integral = true;
43 for (const Field & field : value)
44 {
45 if (field.getType() != Field::Types::UInt64 && field.getType() != Field::Types::Int64)
46 {
47 are_all_integral = false;
48 break;
49 }
50 }
51
52 String result;
53
54 if (are_all_integral)
55 {
56 FieldVisitorToString to_string_visitor;
57 for (size_t i = 0; i < value.size(); ++i)
58 {
59 if (i > 0)
60 result += '-';
61
62 if (typeid_cast<const DataTypeDate *>(partition_key_sample.getByPosition(i).type.get()))
63 result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum(value[i].safeGet<UInt64>())));
64 else
65 result += applyVisitor(to_string_visitor, value[i]);
66
67 /// It is tempting to output DateTime as YYYYMMDDhhmmss, but that would make partition ID
68 /// timezone-dependent.
69 }
70
71 return result;
72 }
73
74 SipHash hash;
75 FieldVisitorHash hashing_visitor(hash);
76 for (const Field & field : value)
77 applyVisitor(hashing_visitor, field);
78
79 char hash_data[16];
80 hash.get128(hash_data);
81 result.resize(32);
82 for (size_t i = 0; i < 16; ++i)
83 writeHexByteLowercase(hash_data[i], &result[2 * i]);
84
85 return result;
86}
87
88void MergeTreePartition::serializeText(const MergeTreeData & storage, WriteBuffer & out, const FormatSettings & format_settings) const
89{
90 size_t key_size = storage.partition_key_sample.columns();
91
92 if (key_size == 0)
93 {
94 writeCString("tuple()", out);
95 }
96 else if (key_size == 1)
97 {
98 const DataTypePtr & type = storage.partition_key_sample.getByPosition(0).type;
99 auto column = type->createColumn();
100 column->insert(value[0]);
101 type->serializeAsText(*column, 0, out, format_settings);
102 }
103 else
104 {
105 DataTypes types;
106 Columns columns;
107 for (size_t i = 0; i < key_size; ++i)
108 {
109 const auto & type = storage.partition_key_sample.getByPosition(i).type;
110 types.push_back(type);
111 auto column = type->createColumn();
112 column->insert(value[i]);
113 columns.push_back(std::move(column));
114 }
115
116 DataTypeTuple tuple_type(types);
117 auto tuple_column = ColumnTuple::create(columns);
118 tuple_type.serializeText(*tuple_column, 0, out, format_settings);
119 }
120}
121
122void MergeTreePartition::load(const MergeTreeData & storage, const String & part_path)
123{
124 if (!storage.partition_key_expr)
125 return;
126
127 ReadBufferFromFile file = openForReading(part_path + "partition.dat");
128 value.resize(storage.partition_key_sample.columns());
129 for (size_t i = 0; i < storage.partition_key_sample.columns(); ++i)
130 storage.partition_key_sample.getByPosition(i).type->deserializeBinary(value[i], file);
131}
132
133void MergeTreePartition::store(const MergeTreeData & storage, const String & part_path, MergeTreeDataPartChecksums & checksums) const
134{
135 store(storage.partition_key_sample, part_path, checksums);
136}
137
138void MergeTreePartition::store(const Block & partition_key_sample, const String & part_path, MergeTreeDataPartChecksums & checksums) const
139{
140 if (!partition_key_sample)
141 return;
142
143 WriteBufferFromFile out(part_path + "partition.dat");
144 HashingWriteBuffer out_hashing(out);
145 for (size_t i = 0; i < value.size(); ++i)
146 partition_key_sample.getByPosition(i).type->serializeBinary(value[i], out_hashing);
147 out_hashing.next();
148 checksums.files["partition.dat"].file_size = out_hashing.count();
149 checksums.files["partition.dat"].file_hash = out_hashing.getHash();
150}
151
152}
153