1 | #include <Storages/MergeTree/MergeTreePartInfo.h> |
2 | #include <IO/ReadBufferFromString.h> |
3 | #include <IO/ReadHelpers.h> |
4 | #include <IO/WriteHelpers.h> |
5 | |
6 | namespace DB |
7 | { |
8 | |
9 | namespace ErrorCodes |
10 | { |
11 | extern const int BAD_DATA_PART_NAME; |
12 | } |
13 | |
14 | |
15 | MergeTreePartInfo MergeTreePartInfo::fromPartName(const String & dir_name, MergeTreeDataFormatVersion format_version) |
16 | { |
17 | MergeTreePartInfo part_info; |
18 | if (!tryParsePartName(dir_name, &part_info, format_version)) |
19 | throw Exception("Unexpected part name: " + dir_name, ErrorCodes::BAD_DATA_PART_NAME); |
20 | return part_info; |
21 | } |
22 | |
23 | |
24 | bool MergeTreePartInfo::tryParsePartName(const String & dir_name, MergeTreePartInfo * part_info, MergeTreeDataFormatVersion format_version) |
25 | { |
26 | ReadBufferFromString in(dir_name); |
27 | |
28 | String partition_id; |
29 | if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) |
30 | { |
31 | UInt32 min_yyyymmdd = 0; |
32 | UInt32 max_yyyymmdd = 0; |
33 | if (!tryReadIntText(min_yyyymmdd, in) |
34 | || !checkChar('_', in) |
35 | || !tryReadIntText(max_yyyymmdd, in) |
36 | || !checkChar('_', in)) |
37 | { |
38 | return false; |
39 | } |
40 | partition_id = toString(min_yyyymmdd / 100); |
41 | } |
42 | else |
43 | { |
44 | while (!in.eof()) |
45 | { |
46 | char c; |
47 | readChar(c, in); |
48 | if (c == '_') |
49 | break; |
50 | |
51 | partition_id.push_back(c); |
52 | } |
53 | } |
54 | |
55 | /// Sanity check |
56 | if (partition_id.empty()) |
57 | { |
58 | return false; |
59 | } |
60 | |
61 | Int64 min_block_num = 0; |
62 | Int64 max_block_num = 0; |
63 | UInt32 level = 0; |
64 | UInt32 mutation = 0; |
65 | |
66 | if (!tryReadIntText(min_block_num, in) |
67 | || !checkChar('_', in) |
68 | || !tryReadIntText(max_block_num, in) |
69 | || !checkChar('_', in) |
70 | || !tryReadIntText(level, in)) |
71 | { |
72 | return false; |
73 | } |
74 | |
75 | /// Sanity check |
76 | if (min_block_num > max_block_num) |
77 | { |
78 | return false; |
79 | } |
80 | |
81 | if (!in.eof()) |
82 | { |
83 | if (!checkChar('_', in) |
84 | || !tryReadIntText(mutation, in) |
85 | || !in.eof()) |
86 | { |
87 | return false; |
88 | } |
89 | } |
90 | |
91 | if (part_info) |
92 | { |
93 | part_info->partition_id = std::move(partition_id); |
94 | part_info->min_block = min_block_num; |
95 | part_info->max_block = max_block_num; |
96 | part_info->level = level; |
97 | part_info->mutation = mutation; |
98 | } |
99 | |
100 | return true; |
101 | } |
102 | |
103 | |
104 | void MergeTreePartInfo::parseMinMaxDatesFromPartName(const String & dir_name, DayNum & min_date, DayNum & max_date) |
105 | { |
106 | UInt32 min_yyyymmdd = 0; |
107 | UInt32 max_yyyymmdd = 0; |
108 | |
109 | ReadBufferFromString in(dir_name); |
110 | |
111 | if (!tryReadIntText(min_yyyymmdd, in) |
112 | || !checkChar('_', in) |
113 | || !tryReadIntText(max_yyyymmdd, in)) |
114 | { |
115 | throw Exception("Unexpected part name: " + dir_name, ErrorCodes::BAD_DATA_PART_NAME); |
116 | } |
117 | |
118 | const auto & date_lut = DateLUT::instance(); |
119 | |
120 | min_date = date_lut.YYYYMMDDToDayNum(min_yyyymmdd); |
121 | max_date = date_lut.YYYYMMDDToDayNum(max_yyyymmdd); |
122 | |
123 | DayNum min_month = date_lut.toFirstDayNumOfMonth(min_date); |
124 | DayNum max_month = date_lut.toFirstDayNumOfMonth(max_date); |
125 | |
126 | if (min_month != max_month) |
127 | throw Exception("Part name " + dir_name + " contains different months" , ErrorCodes::BAD_DATA_PART_NAME); |
128 | } |
129 | |
130 | |
131 | bool MergeTreePartInfo::contains(const String & outer_part_name, const String & inner_part_name, MergeTreeDataFormatVersion format_version) |
132 | { |
133 | MergeTreePartInfo outer = fromPartName(outer_part_name, format_version); |
134 | MergeTreePartInfo inner = fromPartName(inner_part_name, format_version); |
135 | return outer.contains(inner); |
136 | } |
137 | |
138 | |
139 | String MergeTreePartInfo::getPartName() const |
140 | { |
141 | WriteBufferFromOwnString wb; |
142 | |
143 | writeString(partition_id, wb); |
144 | writeChar('_', wb); |
145 | writeIntText(min_block, wb); |
146 | writeChar('_', wb); |
147 | writeIntText(max_block, wb); |
148 | writeChar('_', wb); |
149 | writeIntText(level, wb); |
150 | |
151 | if (mutation) |
152 | { |
153 | writeChar('_', wb); |
154 | writeIntText(mutation, wb); |
155 | } |
156 | |
157 | return wb.str(); |
158 | } |
159 | |
160 | |
161 | String MergeTreePartInfo::getPartNameV0(DayNum left_date, DayNum right_date) const |
162 | { |
163 | const auto & date_lut = DateLUT::instance(); |
164 | |
165 | /// Directory name for the part has form: `YYYYMMDD_YYYYMMDD_N_N_L`. |
166 | |
167 | unsigned left_date_id = date_lut.toNumYYYYMMDD(left_date); |
168 | unsigned right_date_id = date_lut.toNumYYYYMMDD(right_date); |
169 | |
170 | WriteBufferFromOwnString wb; |
171 | |
172 | writeIntText(left_date_id, wb); |
173 | writeChar('_', wb); |
174 | writeIntText(right_date_id, wb); |
175 | writeChar('_', wb); |
176 | writeIntText(min_block, wb); |
177 | writeChar('_', wb); |
178 | writeIntText(max_block, wb); |
179 | writeChar('_', wb); |
180 | writeIntText(level, wb); |
181 | |
182 | if (mutation) |
183 | { |
184 | writeChar('_', wb); |
185 | writeIntText(mutation, wb); |
186 | } |
187 | |
188 | return wb.str(); |
189 | } |
190 | |
191 | bool DetachedPartInfo::tryParseDetachedPartName(const String & dir_name, DetachedPartInfo & part_info, |
192 | MergeTreeDataFormatVersion format_version) |
193 | { |
194 | part_info.dir_name = dir_name; |
195 | |
196 | /// First, try to parse as <part_name>. |
197 | // TODO what if tryParsePartName will parse prefix as partition_id? It can happen if dir_name doesn't contain mutation number at the end |
198 | if (MergeTreePartInfo::tryParsePartName(dir_name, &part_info, format_version)) |
199 | return part_info.valid_name = true; |
200 | |
201 | /// Next, as <prefix>_<partname>. Use entire name as prefix if it fails. |
202 | part_info.prefix = dir_name; |
203 | const auto first_separator = dir_name.find_first_of('_'); |
204 | if (first_separator == String::npos) |
205 | return part_info.valid_name = false; |
206 | |
207 | // TODO what if <prefix> contains '_'? |
208 | const auto part_name = dir_name.substr(first_separator + 1, |
209 | dir_name.size() - first_separator - 1); |
210 | if (!MergeTreePartInfo::tryParsePartName(part_name, &part_info, format_version)) |
211 | return part_info.valid_name = false; |
212 | |
213 | part_info.prefix = dir_name.substr(0, first_separator); |
214 | return part_info.valid_name = true; |
215 | } |
216 | |
217 | } |
218 | |