1 | #include <Storages/ColumnsDescription.h> |
2 | #include <Parsers/ASTLiteral.h> |
3 | #include <Parsers/ExpressionElementParsers.h> |
4 | #include <Parsers/ExpressionListParsers.h> |
5 | #include <Parsers/ParserCreateQuery.h> |
6 | #include <Parsers/parseQuery.h> |
7 | #include <Parsers/queryToString.h> |
8 | #include <IO/WriteBuffer.h> |
9 | #include <IO/WriteHelpers.h> |
10 | #include <IO/ReadBuffer.h> |
11 | #include <IO/ReadHelpers.h> |
12 | #include <IO/WriteBufferFromString.h> |
13 | #include <IO/ReadBufferFromString.h> |
14 | #include <DataTypes/DataTypeFactory.h> |
15 | #include <DataTypes/NestedUtils.h> |
16 | #include <DataTypes/DataTypeArray.h> |
17 | #include <DataTypes/DataTypeTuple.h> |
18 | #include <Common/Exception.h> |
19 | #include <Interpreters/Context.h> |
20 | #include <Storages/IStorage.h> |
21 | #include <Common/typeid_cast.h> |
22 | #include <Compression/CompressionFactory.h> |
23 | |
24 | |
25 | namespace DB |
26 | { |
27 | |
28 | namespace ErrorCodes |
29 | { |
30 | extern const int NO_SUCH_COLUMN_IN_TABLE; |
31 | extern const int ILLEGAL_COLUMN; |
32 | extern const int CANNOT_PARSE_TEXT; |
33 | } |
34 | |
35 | ColumnDescription::ColumnDescription(String name_, DataTypePtr type_, bool is_virtual_) |
36 | : name(std::move(name_)), type(std::move(type_)), is_virtual(is_virtual_) |
37 | { |
38 | } |
39 | |
40 | bool ColumnDescription::operator==(const ColumnDescription & other) const |
41 | { |
42 | auto codec_str = [](const CompressionCodecPtr & codec_ptr) { return codec_ptr ? codec_ptr->getCodecDesc() : String(); }; |
43 | auto ttl_str = [](const ASTPtr & ttl_ast) { return ttl_ast ? queryToString(ttl_ast) : String{}; }; |
44 | |
45 | return name == other.name |
46 | && type->equals(*other.type) |
47 | && default_desc == other.default_desc |
48 | && comment == other.comment |
49 | && codec_str(codec) == codec_str(other.codec) |
50 | && ttl_str(ttl) == ttl_str(other.ttl); |
51 | } |
52 | |
53 | void ColumnDescription::writeText(WriteBuffer & buf) const |
54 | { |
55 | writeBackQuotedString(name, buf); |
56 | writeChar(' ', buf); |
57 | DB::writeText(type->getName(), buf); |
58 | |
59 | if (default_desc.expression) |
60 | { |
61 | writeChar('\t', buf); |
62 | DB::writeText(DB::toString(default_desc.kind), buf); |
63 | writeChar('\t', buf); |
64 | DB::writeText(queryToString(default_desc.expression), buf); |
65 | } |
66 | |
67 | if (!comment.empty()) |
68 | { |
69 | writeChar('\t', buf); |
70 | DB::writeText("COMMENT " , buf); |
71 | DB::writeText(queryToString(ASTLiteral(Field(comment))), buf); |
72 | } |
73 | |
74 | if (codec) |
75 | { |
76 | writeChar('\t', buf); |
77 | DB::writeText("CODEC(" , buf); |
78 | DB::writeText(codec->getCodecDesc(), buf); |
79 | DB::writeText(")" , buf); |
80 | } |
81 | |
82 | if (ttl) |
83 | { |
84 | writeChar('\t', buf); |
85 | DB::writeText("TTL " , buf); |
86 | DB::writeText(queryToString(ttl), buf); |
87 | } |
88 | |
89 | writeChar('\n', buf); |
90 | } |
91 | |
92 | void ColumnDescription::readText(ReadBuffer & buf) |
93 | { |
94 | ParserColumnDeclaration column_parser(/* require type */ true); |
95 | String column_line; |
96 | readEscapedStringUntilEOL(column_line, buf); |
97 | ASTPtr ast = parseQuery(column_parser, column_line, "column parser" , 0); |
98 | if (const auto * col_ast = ast->as<ASTColumnDeclaration>()) |
99 | { |
100 | name = col_ast->name; |
101 | type = DataTypeFactory::instance().get(col_ast->type); |
102 | |
103 | if (col_ast->default_expression) |
104 | { |
105 | default_desc.kind = columnDefaultKindFromString(col_ast->default_specifier); |
106 | default_desc.expression = std::move(col_ast->default_expression); |
107 | } |
108 | |
109 | if (col_ast->comment) |
110 | comment = col_ast->comment->as<ASTLiteral &>().value.get<String>(); |
111 | |
112 | if (col_ast->codec) |
113 | codec = CompressionCodecFactory::instance().get(col_ast->codec, type); |
114 | |
115 | if (col_ast->ttl) |
116 | ttl = col_ast->ttl; |
117 | } |
118 | else |
119 | throw Exception("Cannot parse column description" , ErrorCodes::CANNOT_PARSE_TEXT); |
120 | } |
121 | |
122 | |
123 | ColumnsDescription::ColumnsDescription(NamesAndTypesList ordinary, bool all_virtuals) |
124 | { |
125 | for (auto & elem : ordinary) |
126 | add(ColumnDescription(std::move(elem.name), std::move(elem.type), all_virtuals)); |
127 | } |
128 | |
129 | |
130 | /// We are trying to find first column from end with name `column_name` or with a name beginning with `column_name` and ".". |
131 | /// For example "fruits.bananas" |
132 | /// names are considered the same if they completely match or `name_without_dot` matches the part of the name to the point |
133 | static auto getNameRange(const ColumnsDescription::Container & columns, const String & name_without_dot) |
134 | { |
135 | String name_with_dot = name_without_dot + "." ; |
136 | |
137 | auto begin = columns.begin(); |
138 | for (; begin != columns.end(); ++begin) |
139 | { |
140 | if (begin->name == name_without_dot) |
141 | return std::make_pair(begin, std::next(begin)); |
142 | |
143 | if (startsWith(begin->name, name_with_dot)) |
144 | break; |
145 | } |
146 | |
147 | if (begin == columns.end()) |
148 | return std::make_pair(begin, begin); |
149 | |
150 | auto end = std::next(begin); |
151 | for (; end != columns.end(); ++end) |
152 | { |
153 | if (!startsWith(end->name, name_with_dot)) |
154 | break; |
155 | } |
156 | |
157 | return std::make_pair(begin, end); |
158 | } |
159 | |
160 | void ColumnsDescription::add(ColumnDescription column, const String & after_column) |
161 | { |
162 | if (has(column.name)) |
163 | throw Exception("Cannot add column " + column.name + ": column with this name already exists" , |
164 | ErrorCodes::ILLEGAL_COLUMN); |
165 | |
166 | auto insert_it = columns.cend(); |
167 | |
168 | if (!after_column.empty()) |
169 | { |
170 | auto range = getNameRange(columns, after_column); |
171 | if (range.first == range.second) |
172 | throw Exception("Wrong column name. Cannot find column " + after_column + " to insert after" , |
173 | ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); |
174 | |
175 | insert_it = range.second; |
176 | } |
177 | |
178 | columns.get<0>().insert(insert_it, std::move(column)); |
179 | } |
180 | |
181 | void ColumnsDescription::remove(const String & column_name) |
182 | { |
183 | auto range = getNameRange(columns, column_name); |
184 | if (range.first == range.second) |
185 | throw Exception("There is no column " + column_name + " in table." , |
186 | ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); |
187 | |
188 | for (auto list_it = range.first; list_it != range.second;) |
189 | list_it = columns.get<0>().erase(list_it); |
190 | } |
191 | |
192 | |
193 | void ColumnsDescription::flattenNested() |
194 | { |
195 | for (auto it = columns.begin(); it != columns.end();) |
196 | { |
197 | const auto * type_arr = typeid_cast<const DataTypeArray *>(it->type.get()); |
198 | if (!type_arr) |
199 | { |
200 | ++it; |
201 | continue; |
202 | } |
203 | |
204 | const auto * type_tuple = typeid_cast<const DataTypeTuple *>(type_arr->getNestedType().get()); |
205 | if (!type_tuple) |
206 | { |
207 | ++it; |
208 | continue; |
209 | } |
210 | |
211 | ColumnDescription column = std::move(*it); |
212 | it = columns.get<0>().erase(it); |
213 | |
214 | const DataTypes & elements = type_tuple->getElements(); |
215 | const Strings & names = type_tuple->getElementNames(); |
216 | size_t tuple_size = elements.size(); |
217 | |
218 | for (size_t i = 0; i < tuple_size; ++i) |
219 | { |
220 | auto nested_column = column; |
221 | /// TODO: what to do with default expressions? |
222 | nested_column.name = Nested::concatenateName(column.name, names[i]); |
223 | nested_column.type = std::make_shared<DataTypeArray>(elements[i]); |
224 | |
225 | columns.get<0>().insert(it, std::move(nested_column)); |
226 | } |
227 | } |
228 | } |
229 | |
230 | |
231 | NamesAndTypesList ColumnsDescription::getOrdinary() const |
232 | { |
233 | NamesAndTypesList ret; |
234 | for (const auto & col : columns) |
235 | if (col.default_desc.kind == ColumnDefaultKind::Default && !col.is_virtual) |
236 | ret.emplace_back(col.name, col.type); |
237 | return ret; |
238 | } |
239 | |
240 | NamesAndTypesList ColumnsDescription::getMaterialized() const |
241 | { |
242 | NamesAndTypesList ret; |
243 | for (const auto & col : columns) |
244 | if (col.default_desc.kind == ColumnDefaultKind::Materialized) |
245 | ret.emplace_back(col.name, col.type); |
246 | return ret; |
247 | } |
248 | |
249 | NamesAndTypesList ColumnsDescription::getAliases() const |
250 | { |
251 | NamesAndTypesList ret; |
252 | for (const auto & col : columns) |
253 | if (col.default_desc.kind == ColumnDefaultKind::Alias) |
254 | ret.emplace_back(col.name, col.type); |
255 | return ret; |
256 | } |
257 | |
258 | NamesAndTypesList ColumnsDescription::getVirtuals() const |
259 | { |
260 | NamesAndTypesList result; |
261 | for (const auto & column : columns) |
262 | if (column.is_virtual) |
263 | result.emplace_back(column.name, column.type); |
264 | return result; |
265 | } |
266 | |
267 | NamesAndTypesList ColumnsDescription::getAll() const |
268 | { |
269 | NamesAndTypesList ret; |
270 | for (const auto & col : columns) |
271 | ret.emplace_back(col.name, col.type); |
272 | return ret; |
273 | } |
274 | |
275 | |
276 | bool ColumnsDescription::has(const String & column_name) const |
277 | { |
278 | return columns.get<1>().find(column_name) != columns.get<1>().end(); |
279 | } |
280 | |
281 | bool ColumnsDescription::hasNested(const String & column_name) const |
282 | { |
283 | auto range = getNameRange(columns, column_name); |
284 | return range.first != range.second && range.first->name.length() > column_name.length(); |
285 | } |
286 | |
287 | const ColumnDescription & ColumnsDescription::get(const String & column_name) const |
288 | { |
289 | auto it = columns.get<1>().find(column_name); |
290 | if (it == columns.get<1>().end()) |
291 | throw Exception("There is no column " + column_name + " in table." , |
292 | ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); |
293 | |
294 | return *it; |
295 | } |
296 | |
297 | |
298 | NamesAndTypesList ColumnsDescription::getAllPhysical() const |
299 | { |
300 | NamesAndTypesList ret; |
301 | for (const auto & col : columns) |
302 | if (col.default_desc.kind != ColumnDefaultKind::Alias && !col.is_virtual) |
303 | ret.emplace_back(col.name, col.type); |
304 | return ret; |
305 | } |
306 | |
307 | Names ColumnsDescription::getNamesOfPhysical() const |
308 | { |
309 | Names ret; |
310 | for (const auto & col : columns) |
311 | if (col.default_desc.kind != ColumnDefaultKind::Alias && !col.is_virtual) |
312 | ret.emplace_back(col.name); |
313 | return ret; |
314 | } |
315 | |
316 | NameAndTypePair ColumnsDescription::getPhysical(const String & column_name) const |
317 | { |
318 | auto it = columns.get<1>().find(column_name); |
319 | if (it == columns.get<1>().end() || it->default_desc.kind == ColumnDefaultKind::Alias || it->is_virtual) |
320 | throw Exception("There is no physical column " + column_name + " in table." , ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); |
321 | return NameAndTypePair(it->name, it->type); |
322 | } |
323 | |
324 | bool ColumnsDescription::hasPhysical(const String & column_name) const |
325 | { |
326 | auto it = columns.get<1>().find(column_name); |
327 | return it != columns.get<1>().end() && it->default_desc.kind != ColumnDefaultKind::Alias && !it->is_virtual; |
328 | } |
329 | |
330 | |
331 | ColumnDefaults ColumnsDescription::getDefaults() const |
332 | { |
333 | ColumnDefaults ret; |
334 | for (const auto & column : columns) |
335 | if (column.default_desc.expression) |
336 | ret.emplace(column.name, column.default_desc); |
337 | |
338 | return ret; |
339 | } |
340 | |
341 | bool ColumnsDescription::hasDefault(const String & column_name) const |
342 | { |
343 | auto it = columns.get<1>().find(column_name); |
344 | return it != columns.get<1>().end() && it->default_desc.expression; |
345 | } |
346 | |
347 | std::optional<ColumnDefault> ColumnsDescription::getDefault(const String & column_name) const |
348 | { |
349 | auto it = columns.get<1>().find(column_name); |
350 | if (it != columns.get<1>().end() && it->default_desc.expression) |
351 | return it->default_desc; |
352 | |
353 | return {}; |
354 | } |
355 | |
356 | |
357 | CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const |
358 | { |
359 | const auto it = columns.get<1>().find(column_name); |
360 | |
361 | if (it == columns.get<1>().end() || !it->codec) |
362 | return default_codec; |
363 | |
364 | return it->codec; |
365 | } |
366 | |
367 | CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_name) const |
368 | { |
369 | return getCodecOrDefault(column_name, CompressionCodecFactory::instance().getDefaultCodec()); |
370 | } |
371 | |
372 | ColumnsDescription::ColumnTTLs ColumnsDescription::getColumnTTLs() const |
373 | { |
374 | ColumnTTLs ret; |
375 | for (const auto & column : columns) |
376 | if (column.ttl) |
377 | ret.emplace(column.name, column.ttl); |
378 | return ret; |
379 | } |
380 | |
381 | |
382 | String ColumnsDescription::toString() const |
383 | { |
384 | WriteBufferFromOwnString buf; |
385 | |
386 | writeCString("columns format version: 1\n" , buf); |
387 | DB::writeText(columns.size(), buf); |
388 | writeCString(" columns:\n" , buf); |
389 | |
390 | for (const ColumnDescription & column : columns) |
391 | column.writeText(buf); |
392 | |
393 | return buf.str(); |
394 | } |
395 | |
396 | ColumnsDescription ColumnsDescription::parse(const String & str) |
397 | { |
398 | ReadBufferFromString buf{str}; |
399 | |
400 | assertString("columns format version: 1\n" , buf); |
401 | size_t count{}; |
402 | readText(count, buf); |
403 | assertString(" columns:\n" , buf); |
404 | |
405 | ColumnsDescription result; |
406 | for (size_t i = 0; i < count; ++i) |
407 | { |
408 | ColumnDescription column; |
409 | column.readText(buf); |
410 | buf.ignore(1); /// ignore new line |
411 | result.add(std::move(column)); |
412 | } |
413 | |
414 | assertEOF(buf); |
415 | return result; |
416 | } |
417 | |
418 | } |
419 | |