1#include <Storages/MergeTree/MergeTreePartInfo.h>
2#include <IO/ReadBufferFromString.h>
3#include <IO/ReadHelpers.h>
4#include <IO/WriteHelpers.h>
5
6namespace DB
7{
8
9namespace ErrorCodes
10{
11 extern const int BAD_DATA_PART_NAME;
12}
13
14
15MergeTreePartInfo MergeTreePartInfo::fromPartName(const String & dir_name, MergeTreeDataFormatVersion format_version)
16{
17 MergeTreePartInfo part_info;
18 if (!tryParsePartName(dir_name, &part_info, format_version))
19 throw Exception("Unexpected part name: " + dir_name, ErrorCodes::BAD_DATA_PART_NAME);
20 return part_info;
21}
22
23
24bool MergeTreePartInfo::tryParsePartName(const String & dir_name, MergeTreePartInfo * part_info, MergeTreeDataFormatVersion format_version)
25{
26 ReadBufferFromString in(dir_name);
27
28 String partition_id;
29 if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
30 {
31 UInt32 min_yyyymmdd = 0;
32 UInt32 max_yyyymmdd = 0;
33 if (!tryReadIntText(min_yyyymmdd, in)
34 || !checkChar('_', in)
35 || !tryReadIntText(max_yyyymmdd, in)
36 || !checkChar('_', in))
37 {
38 return false;
39 }
40 partition_id = toString(min_yyyymmdd / 100);
41 }
42 else
43 {
44 while (!in.eof())
45 {
46 char c;
47 readChar(c, in);
48 if (c == '_')
49 break;
50
51 partition_id.push_back(c);
52 }
53 }
54
55 /// Sanity check
56 if (partition_id.empty())
57 {
58 return false;
59 }
60
61 Int64 min_block_num = 0;
62 Int64 max_block_num = 0;
63 UInt32 level = 0;
64 UInt32 mutation = 0;
65
66 if (!tryReadIntText(min_block_num, in)
67 || !checkChar('_', in)
68 || !tryReadIntText(max_block_num, in)
69 || !checkChar('_', in)
70 || !tryReadIntText(level, in))
71 {
72 return false;
73 }
74
75 /// Sanity check
76 if (min_block_num > max_block_num)
77 {
78 return false;
79 }
80
81 if (!in.eof())
82 {
83 if (!checkChar('_', in)
84 || !tryReadIntText(mutation, in)
85 || !in.eof())
86 {
87 return false;
88 }
89 }
90
91 if (part_info)
92 {
93 part_info->partition_id = std::move(partition_id);
94 part_info->min_block = min_block_num;
95 part_info->max_block = max_block_num;
96 part_info->level = level;
97 part_info->mutation = mutation;
98 }
99
100 return true;
101}
102
103
104void MergeTreePartInfo::parseMinMaxDatesFromPartName(const String & dir_name, DayNum & min_date, DayNum & max_date)
105{
106 UInt32 min_yyyymmdd = 0;
107 UInt32 max_yyyymmdd = 0;
108
109 ReadBufferFromString in(dir_name);
110
111 if (!tryReadIntText(min_yyyymmdd, in)
112 || !checkChar('_', in)
113 || !tryReadIntText(max_yyyymmdd, in))
114 {
115 throw Exception("Unexpected part name: " + dir_name, ErrorCodes::BAD_DATA_PART_NAME);
116 }
117
118 const auto & date_lut = DateLUT::instance();
119
120 min_date = date_lut.YYYYMMDDToDayNum(min_yyyymmdd);
121 max_date = date_lut.YYYYMMDDToDayNum(max_yyyymmdd);
122
123 DayNum min_month = date_lut.toFirstDayNumOfMonth(min_date);
124 DayNum max_month = date_lut.toFirstDayNumOfMonth(max_date);
125
126 if (min_month != max_month)
127 throw Exception("Part name " + dir_name + " contains different months", ErrorCodes::BAD_DATA_PART_NAME);
128}
129
130
131bool MergeTreePartInfo::contains(const String & outer_part_name, const String & inner_part_name, MergeTreeDataFormatVersion format_version)
132{
133 MergeTreePartInfo outer = fromPartName(outer_part_name, format_version);
134 MergeTreePartInfo inner = fromPartName(inner_part_name, format_version);
135 return outer.contains(inner);
136}
137
138
139String MergeTreePartInfo::getPartName() const
140{
141 WriteBufferFromOwnString wb;
142
143 writeString(partition_id, wb);
144 writeChar('_', wb);
145 writeIntText(min_block, wb);
146 writeChar('_', wb);
147 writeIntText(max_block, wb);
148 writeChar('_', wb);
149 writeIntText(level, wb);
150
151 if (mutation)
152 {
153 writeChar('_', wb);
154 writeIntText(mutation, wb);
155 }
156
157 return wb.str();
158}
159
160
161String MergeTreePartInfo::getPartNameV0(DayNum left_date, DayNum right_date) const
162{
163 const auto & date_lut = DateLUT::instance();
164
165 /// Directory name for the part has form: `YYYYMMDD_YYYYMMDD_N_N_L`.
166
167 unsigned left_date_id = date_lut.toNumYYYYMMDD(left_date);
168 unsigned right_date_id = date_lut.toNumYYYYMMDD(right_date);
169
170 WriteBufferFromOwnString wb;
171
172 writeIntText(left_date_id, wb);
173 writeChar('_', wb);
174 writeIntText(right_date_id, wb);
175 writeChar('_', wb);
176 writeIntText(min_block, wb);
177 writeChar('_', wb);
178 writeIntText(max_block, wb);
179 writeChar('_', wb);
180 writeIntText(level, wb);
181
182 if (mutation)
183 {
184 writeChar('_', wb);
185 writeIntText(mutation, wb);
186 }
187
188 return wb.str();
189}
190
191bool DetachedPartInfo::tryParseDetachedPartName(const String & dir_name, DetachedPartInfo & part_info,
192 MergeTreeDataFormatVersion format_version)
193{
194 part_info.dir_name = dir_name;
195
196 /// First, try to parse as <part_name>.
197 // TODO what if tryParsePartName will parse prefix as partition_id? It can happen if dir_name doesn't contain mutation number at the end
198 if (MergeTreePartInfo::tryParsePartName(dir_name, &part_info, format_version))
199 return part_info.valid_name = true;
200
201 /// Next, as <prefix>_<partname>. Use entire name as prefix if it fails.
202 part_info.prefix = dir_name;
203 const auto first_separator = dir_name.find_first_of('_');
204 if (first_separator == String::npos)
205 return part_info.valid_name = false;
206
207 // TODO what if <prefix> contains '_'?
208 const auto part_name = dir_name.substr(first_separator + 1,
209 dir_name.size() - first_separator - 1);
210 if (!MergeTreePartInfo::tryParsePartName(part_name, &part_info, format_version))
211 return part_info.valid_name = false;
212
213 part_info.prefix = dir_name.substr(0, first_separator);
214 return part_info.valid_name = true;
215}
216
217}
218