1 | #include <Common/StringUtils/StringUtils.h> |
2 | #include <Columns/ColumnTuple.h> |
3 | #include <Core/Field.h> |
4 | #include <Formats/FormatSettings.h> |
5 | #include <DataTypes/DataTypeTuple.h> |
6 | #include <DataTypes/DataTypeArray.h> |
7 | #include <DataTypes/DataTypeFactory.h> |
8 | #include <Parsers/IAST.h> |
9 | #include <Parsers/ASTNameTypePair.h> |
10 | #include <Common/typeid_cast.h> |
11 | #include <Common/assert_cast.h> |
12 | #include <IO/WriteHelpers.h> |
13 | #include <IO/ReadHelpers.h> |
14 | #include <IO/WriteBufferFromString.h> |
15 | #include <IO/Operators.h> |
16 | |
17 | #include <ext/map.h> |
18 | #include <ext/enumerate.h> |
19 | #include <ext/range.h> |
20 | |
21 | |
22 | namespace DB |
23 | { |
24 | |
25 | namespace ErrorCodes |
26 | { |
27 | extern const int EMPTY_DATA_PASSED; |
28 | extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; |
29 | extern const int DUPLICATE_COLUMN; |
30 | extern const int BAD_ARGUMENTS; |
31 | extern const int NOT_FOUND_COLUMN_IN_BLOCK; |
32 | } |
33 | |
34 | |
35 | DataTypeTuple::DataTypeTuple(const DataTypes & elems_) |
36 | : elems(elems_), have_explicit_names(false) |
37 | { |
38 | /// Automatically assigned names in form of '1', '2', ... |
39 | size_t size = elems.size(); |
40 | names.resize(size); |
41 | for (size_t i = 0; i < size; ++i) |
42 | names[i] = toString(i + 1); |
43 | } |
44 | |
45 | |
46 | DataTypeTuple::DataTypeTuple(const DataTypes & elems_, const Strings & names_) |
47 | : elems(elems_), names(names_), have_explicit_names(true) |
48 | { |
49 | size_t size = elems.size(); |
50 | if (names.size() != size) |
51 | throw Exception("Wrong number of names passed to constructor of DataTypeTuple" , ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); |
52 | |
53 | std::unordered_set<String> names_set; |
54 | for (size_t i = 0; i < size; ++i) |
55 | { |
56 | if (names[i].empty()) |
57 | throw Exception("Names of tuple elements cannot be empty" , ErrorCodes::BAD_ARGUMENTS); |
58 | |
59 | if (isNumericASCII(names[i][0])) |
60 | throw Exception("Explicitly specified names of tuple elements cannot start with digit" , ErrorCodes::BAD_ARGUMENTS); |
61 | |
62 | if (!names_set.insert(names[i]).second) |
63 | throw Exception("Names of tuple elements must be unique" , ErrorCodes::DUPLICATE_COLUMN); |
64 | } |
65 | } |
66 | |
67 | |
68 | |
69 | std::string DataTypeTuple::doGetName() const |
70 | { |
71 | size_t size = elems.size(); |
72 | WriteBufferFromOwnString s; |
73 | |
74 | s << "Tuple(" ; |
75 | for (size_t i = 0; i < size; ++i) |
76 | { |
77 | if (i != 0) |
78 | s << ", " ; |
79 | |
80 | if (have_explicit_names) |
81 | s << backQuoteIfNeed(names[i]) << ' '; |
82 | |
83 | s << elems[i]->getName(); |
84 | } |
85 | s << ")" ; |
86 | |
87 | return s.str(); |
88 | } |
89 | |
90 | |
91 | static inline IColumn & extractElementColumn(IColumn & column, size_t idx) |
92 | { |
93 | return assert_cast<ColumnTuple &>(column).getColumn(idx); |
94 | } |
95 | |
96 | static inline const IColumn & extractElementColumn(const IColumn & column, size_t idx) |
97 | { |
98 | return assert_cast<const ColumnTuple &>(column).getColumn(idx); |
99 | } |
100 | |
101 | |
102 | void DataTypeTuple::serializeBinary(const Field & field, WriteBuffer & ostr) const |
103 | { |
104 | const auto & tuple = get<const Tuple &>(field); |
105 | for (const auto idx_elem : ext::enumerate(elems)) |
106 | idx_elem.second->serializeBinary(tuple[idx_elem.first], ostr); |
107 | } |
108 | |
109 | void DataTypeTuple::deserializeBinary(Field & field, ReadBuffer & istr) const |
110 | { |
111 | const size_t size = elems.size(); |
112 | |
113 | Tuple tuple(size); |
114 | for (const auto i : ext::range(0, size)) |
115 | elems[i]->deserializeBinary(tuple[i], istr); |
116 | |
117 | field = tuple; |
118 | } |
119 | |
120 | void DataTypeTuple::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const |
121 | { |
122 | for (const auto idx_elem : ext::enumerate(elems)) |
123 | idx_elem.second->serializeBinary(extractElementColumn(column, idx_elem.first), row_num, ostr); |
124 | } |
125 | |
126 | |
127 | template <typename F> |
128 | static void addElementSafe(const DataTypes & elems, IColumn & column, F && impl) |
129 | { |
130 | /// We use the assumption that tuples of zero size do not exist. |
131 | size_t old_size = column.size(); |
132 | |
133 | try |
134 | { |
135 | impl(); |
136 | } |
137 | catch (...) |
138 | { |
139 | for (const auto & i : ext::range(0, ext::size(elems))) |
140 | { |
141 | auto & element_column = extractElementColumn(column, i); |
142 | if (element_column.size() > old_size) |
143 | element_column.popBack(1); |
144 | } |
145 | |
146 | throw; |
147 | } |
148 | } |
149 | |
150 | |
151 | void DataTypeTuple::deserializeBinary(IColumn & column, ReadBuffer & istr) const |
152 | { |
153 | addElementSafe(elems, column, [&] |
154 | { |
155 | for (const auto & i : ext::range(0, ext::size(elems))) |
156 | elems[i]->deserializeBinary(extractElementColumn(column, i), istr); |
157 | }); |
158 | } |
159 | |
160 | void DataTypeTuple::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
161 | { |
162 | writeChar('(', ostr); |
163 | for (const auto i : ext::range(0, ext::size(elems))) |
164 | { |
165 | if (i != 0) |
166 | writeChar(',', ostr); |
167 | elems[i]->serializeAsTextQuoted(extractElementColumn(column, i), row_num, ostr, settings); |
168 | } |
169 | writeChar(')', ostr); |
170 | } |
171 | |
172 | void DataTypeTuple::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const |
173 | { |
174 | const size_t size = elems.size(); |
175 | assertChar('(', istr); |
176 | |
177 | addElementSafe(elems, column, [&] |
178 | { |
179 | for (const auto i : ext::range(0, size)) |
180 | { |
181 | skipWhitespaceIfAny(istr); |
182 | if (i != 0) |
183 | { |
184 | assertChar(',', istr); |
185 | skipWhitespaceIfAny(istr); |
186 | } |
187 | elems[i]->deserializeAsTextQuoted(extractElementColumn(column, i), istr, settings); |
188 | } |
189 | }); |
190 | |
191 | // Special format for one element tuple (1,) |
192 | if (1 == elems.size()) |
193 | { |
194 | skipWhitespaceIfAny(istr); |
195 | // Allow both (1) and (1,) |
196 | checkChar(',', istr); |
197 | } |
198 | skipWhitespaceIfAny(istr); |
199 | assertChar(')', istr); |
200 | } |
201 | |
202 | void DataTypeTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
203 | { |
204 | writeChar('[', ostr); |
205 | for (const auto i : ext::range(0, ext::size(elems))) |
206 | { |
207 | if (i != 0) |
208 | writeChar(',', ostr); |
209 | elems[i]->serializeAsTextJSON(extractElementColumn(column, i), row_num, ostr, settings); |
210 | } |
211 | writeChar(']', ostr); |
212 | } |
213 | |
214 | void DataTypeTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const |
215 | { |
216 | const size_t size = elems.size(); |
217 | assertChar('[', istr); |
218 | |
219 | addElementSafe(elems, column, [&] |
220 | { |
221 | for (const auto i : ext::range(0, size)) |
222 | { |
223 | skipWhitespaceIfAny(istr); |
224 | if (i != 0) |
225 | { |
226 | assertChar(',', istr); |
227 | skipWhitespaceIfAny(istr); |
228 | } |
229 | elems[i]->deserializeAsTextJSON(extractElementColumn(column, i), istr, settings); |
230 | } |
231 | }); |
232 | |
233 | skipWhitespaceIfAny(istr); |
234 | assertChar(']', istr); |
235 | } |
236 | |
237 | void DataTypeTuple::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
238 | { |
239 | writeCString("<tuple>" , ostr); |
240 | for (const auto i : ext::range(0, ext::size(elems))) |
241 | { |
242 | writeCString("<elem>" , ostr); |
243 | elems[i]->serializeAsTextXML(extractElementColumn(column, i), row_num, ostr, settings); |
244 | writeCString("</elem>" , ostr); |
245 | } |
246 | writeCString("</tuple>" , ostr); |
247 | } |
248 | |
249 | void DataTypeTuple::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const |
250 | { |
251 | for (const auto i : ext::range(0, ext::size(elems))) |
252 | { |
253 | if (i != 0) |
254 | writeChar(',', ostr); |
255 | elems[i]->serializeAsTextCSV(extractElementColumn(column, i), row_num, ostr, settings); |
256 | } |
257 | } |
258 | |
259 | void DataTypeTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const |
260 | { |
261 | addElementSafe(elems, column, [&] |
262 | { |
263 | const size_t size = elems.size(); |
264 | for (const auto i : ext::range(0, size)) |
265 | { |
266 | if (i != 0) |
267 | { |
268 | skipWhitespaceIfAny(istr); |
269 | assertChar(settings.csv.delimiter, istr); |
270 | skipWhitespaceIfAny(istr); |
271 | } |
272 | elems[i]->deserializeAsTextCSV(extractElementColumn(column, i), istr, settings); |
273 | } |
274 | }); |
275 | } |
276 | |
277 | void DataTypeTuple::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const |
278 | { |
279 | path.push_back(Substream::TupleElement); |
280 | for (const auto i : ext::range(0, ext::size(elems))) |
281 | { |
282 | path.back().tuple_element_name = names[i]; |
283 | elems[i]->enumerateStreams(callback, path); |
284 | } |
285 | path.pop_back(); |
286 | } |
287 | |
288 | struct SerializeBinaryBulkStateTuple : public IDataType::SerializeBinaryBulkState |
289 | { |
290 | std::vector<IDataType::SerializeBinaryBulkStatePtr> states; |
291 | }; |
292 | |
293 | struct DeserializeBinaryBulkStateTuple : public IDataType::DeserializeBinaryBulkState |
294 | { |
295 | std::vector<IDataType::DeserializeBinaryBulkStatePtr> states; |
296 | }; |
297 | |
298 | static SerializeBinaryBulkStateTuple * checkAndGetTupleSerializeState(IDataType::SerializeBinaryBulkStatePtr & state) |
299 | { |
300 | if (!state) |
301 | throw Exception("Got empty state for DataTypeTuple." , ErrorCodes::LOGICAL_ERROR); |
302 | |
303 | auto * tuple_state = typeid_cast<SerializeBinaryBulkStateTuple *>(state.get()); |
304 | if (!tuple_state) |
305 | { |
306 | auto & state_ref = *state; |
307 | throw Exception("Invalid SerializeBinaryBulkState for DataTypeTuple. Expected: " |
308 | + demangle(typeid(SerializeBinaryBulkStateTuple).name()) + ", got " |
309 | + demangle(typeid(state_ref).name()), ErrorCodes::LOGICAL_ERROR); |
310 | } |
311 | |
312 | return tuple_state; |
313 | } |
314 | |
315 | static DeserializeBinaryBulkStateTuple * checkAndGetTupleDeserializeState(IDataType::DeserializeBinaryBulkStatePtr & state) |
316 | { |
317 | if (!state) |
318 | throw Exception("Got empty state for DataTypeTuple." , ErrorCodes::LOGICAL_ERROR); |
319 | |
320 | auto * tuple_state = typeid_cast<DeserializeBinaryBulkStateTuple *>(state.get()); |
321 | if (!tuple_state) |
322 | { |
323 | auto & state_ref = *state; |
324 | throw Exception("Invalid DeserializeBinaryBulkState for DataTypeTuple. Expected: " |
325 | + demangle(typeid(DeserializeBinaryBulkStateTuple).name()) + ", got " |
326 | + demangle(typeid(state_ref).name()), ErrorCodes::LOGICAL_ERROR); |
327 | } |
328 | |
329 | return tuple_state; |
330 | } |
331 | |
332 | void DataTypeTuple::serializeBinaryBulkStatePrefix( |
333 | SerializeBinaryBulkSettings & settings, |
334 | SerializeBinaryBulkStatePtr & state) const |
335 | { |
336 | auto tuple_state = std::make_shared<SerializeBinaryBulkStateTuple>(); |
337 | tuple_state->states.resize(elems.size()); |
338 | |
339 | settings.path.push_back(Substream::TupleElement); |
340 | for (size_t i = 0; i < elems.size(); ++i) |
341 | { |
342 | settings.path.back().tuple_element_name = names[i]; |
343 | elems[i]->serializeBinaryBulkStatePrefix(settings, tuple_state->states[i]); |
344 | } |
345 | settings.path.pop_back(); |
346 | |
347 | state = std::move(tuple_state); |
348 | } |
349 | |
350 | void DataTypeTuple::serializeBinaryBulkStateSuffix( |
351 | SerializeBinaryBulkSettings & settings, |
352 | SerializeBinaryBulkStatePtr & state) const |
353 | { |
354 | auto * tuple_state = checkAndGetTupleSerializeState(state); |
355 | |
356 | settings.path.push_back(Substream::TupleElement); |
357 | for (size_t i = 0; i < elems.size(); ++i) |
358 | { |
359 | settings.path.back().tuple_element_name = names[i]; |
360 | elems[i]->serializeBinaryBulkStateSuffix(settings, tuple_state->states[i]); |
361 | } |
362 | settings.path.pop_back(); |
363 | } |
364 | |
365 | void DataTypeTuple::deserializeBinaryBulkStatePrefix( |
366 | DeserializeBinaryBulkSettings & settings, |
367 | DeserializeBinaryBulkStatePtr & state) const |
368 | { |
369 | auto tuple_state = std::make_shared<DeserializeBinaryBulkStateTuple>(); |
370 | tuple_state->states.resize(elems.size()); |
371 | |
372 | settings.path.push_back(Substream::TupleElement); |
373 | for (size_t i = 0; i < elems.size(); ++i) |
374 | { |
375 | settings.path.back().tuple_element_name = names[i]; |
376 | elems[i]->deserializeBinaryBulkStatePrefix(settings, tuple_state->states[i]); |
377 | } |
378 | settings.path.pop_back(); |
379 | |
380 | state = std::move(tuple_state); |
381 | } |
382 | |
383 | void DataTypeTuple::serializeBinaryBulkWithMultipleStreams( |
384 | const IColumn & column, |
385 | size_t offset, |
386 | size_t limit, |
387 | SerializeBinaryBulkSettings & settings, |
388 | SerializeBinaryBulkStatePtr & state) const |
389 | { |
390 | auto * tuple_state = checkAndGetTupleSerializeState(state); |
391 | |
392 | settings.path.push_back(Substream::TupleElement); |
393 | for (const auto i : ext::range(0, ext::size(elems))) |
394 | { |
395 | settings.path.back().tuple_element_name = names[i]; |
396 | auto & element_col = extractElementColumn(column, i); |
397 | elems[i]->serializeBinaryBulkWithMultipleStreams(element_col, offset, limit, settings, tuple_state->states[i]); |
398 | } |
399 | settings.path.pop_back(); |
400 | } |
401 | |
402 | void DataTypeTuple::deserializeBinaryBulkWithMultipleStreams( |
403 | IColumn & column, |
404 | size_t limit, |
405 | DeserializeBinaryBulkSettings & settings, |
406 | DeserializeBinaryBulkStatePtr & state) const |
407 | { |
408 | auto * tuple_state = checkAndGetTupleDeserializeState(state); |
409 | |
410 | settings.path.push_back(Substream::TupleElement); |
411 | settings.avg_value_size_hint = 0; |
412 | for (const auto i : ext::range(0, ext::size(elems))) |
413 | { |
414 | settings.path.back().tuple_element_name = names[i]; |
415 | auto & element_col = extractElementColumn(column, i); |
416 | elems[i]->deserializeBinaryBulkWithMultipleStreams(element_col, limit, settings, tuple_state->states[i]); |
417 | } |
418 | settings.path.pop_back(); |
419 | } |
420 | |
421 | void DataTypeTuple::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const |
422 | { |
423 | for (; value_index < elems.size(); ++value_index) |
424 | { |
425 | size_t stored = 0; |
426 | elems[value_index]->serializeProtobuf(extractElementColumn(column, value_index), row_num, protobuf, stored); |
427 | if (!stored) |
428 | break; |
429 | } |
430 | } |
431 | |
432 | void DataTypeTuple::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const |
433 | { |
434 | row_added = false; |
435 | bool all_elements_get_row = true; |
436 | addElementSafe(elems, column, [&] |
437 | { |
438 | for (const auto & i : ext::range(0, ext::size(elems))) |
439 | { |
440 | bool element_row_added; |
441 | elems[i]->deserializeProtobuf(extractElementColumn(column, i), protobuf, allow_add_row, element_row_added); |
442 | all_elements_get_row &= element_row_added; |
443 | } |
444 | }); |
445 | row_added = all_elements_get_row; |
446 | } |
447 | |
448 | MutableColumnPtr DataTypeTuple::createColumn() const |
449 | { |
450 | size_t size = elems.size(); |
451 | MutableColumns tuple_columns(size); |
452 | for (size_t i = 0; i < size; ++i) |
453 | tuple_columns[i] = elems[i]->createColumn(); |
454 | return ColumnTuple::create(std::move(tuple_columns)); |
455 | } |
456 | |
457 | Field DataTypeTuple::getDefault() const |
458 | { |
459 | return Tuple(ext::map<Tuple>(elems, [] (const DataTypePtr & elem) { return elem->getDefault(); })); |
460 | } |
461 | |
462 | void DataTypeTuple::insertDefaultInto(IColumn & column) const |
463 | { |
464 | addElementSafe(elems, column, [&] |
465 | { |
466 | for (const auto & i : ext::range(0, ext::size(elems))) |
467 | elems[i]->insertDefaultInto(extractElementColumn(column, i)); |
468 | }); |
469 | } |
470 | |
471 | bool DataTypeTuple::equals(const IDataType & rhs) const |
472 | { |
473 | if (typeid(rhs) != typeid(*this)) |
474 | return false; |
475 | |
476 | const DataTypeTuple & rhs_tuple = static_cast<const DataTypeTuple &>(rhs); |
477 | |
478 | size_t size = elems.size(); |
479 | if (size != rhs_tuple.elems.size()) |
480 | return false; |
481 | |
482 | for (size_t i = 0; i < size; ++i) |
483 | if (!elems[i]->equals(*rhs_tuple.elems[i])) |
484 | return false; |
485 | |
486 | return true; |
487 | } |
488 | |
489 | |
490 | size_t DataTypeTuple::getPositionByName(const String & name) const |
491 | { |
492 | size_t size = elems.size(); |
493 | for (size_t i = 0; i < size; ++i) |
494 | if (names[i] == name) |
495 | return i; |
496 | throw Exception("Tuple doesn't have element with name '" + name + "'" , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); |
497 | } |
498 | |
499 | |
500 | bool DataTypeTuple::textCanContainOnlyValidUTF8() const |
501 | { |
502 | return std::all_of(elems.begin(), elems.end(), [](auto && elem) { return elem->textCanContainOnlyValidUTF8(); }); |
503 | } |
504 | |
505 | bool DataTypeTuple::haveMaximumSizeOfValue() const |
506 | { |
507 | return std::all_of(elems.begin(), elems.end(), [](auto && elem) { return elem->haveMaximumSizeOfValue(); }); |
508 | } |
509 | |
510 | bool DataTypeTuple::isComparable() const |
511 | { |
512 | return std::all_of(elems.begin(), elems.end(), [](auto && elem) { return elem->isComparable(); }); |
513 | } |
514 | |
515 | size_t DataTypeTuple::getMaximumSizeOfValueInMemory() const |
516 | { |
517 | size_t res = 0; |
518 | for (const auto & elem : elems) |
519 | res += elem->getMaximumSizeOfValueInMemory(); |
520 | return res; |
521 | } |
522 | |
523 | size_t DataTypeTuple::getSizeOfValueInMemory() const |
524 | { |
525 | size_t res = 0; |
526 | for (const auto & elem : elems) |
527 | res += elem->getSizeOfValueInMemory(); |
528 | return res; |
529 | } |
530 | |
531 | |
532 | static DataTypePtr create(const String & /*type_name*/, const ASTPtr & arguments) |
533 | { |
534 | if (!arguments || arguments->children.empty()) |
535 | throw Exception("Tuple cannot be empty" , ErrorCodes::EMPTY_DATA_PASSED); |
536 | |
537 | DataTypes nested_types; |
538 | nested_types.reserve(arguments->children.size()); |
539 | |
540 | Strings names; |
541 | names.reserve(arguments->children.size()); |
542 | |
543 | for (const ASTPtr & child : arguments->children) |
544 | { |
545 | if (const auto * name_and_type_pair = child->as<ASTNameTypePair>()) |
546 | { |
547 | nested_types.emplace_back(DataTypeFactory::instance().get(name_and_type_pair->type)); |
548 | names.emplace_back(name_and_type_pair->name); |
549 | } |
550 | else |
551 | nested_types.emplace_back(DataTypeFactory::instance().get(child)); |
552 | } |
553 | |
554 | if (names.empty()) |
555 | return std::make_shared<DataTypeTuple>(nested_types); |
556 | else if (names.size() != nested_types.size()) |
557 | throw Exception("Names are specified not for all elements of Tuple type" , ErrorCodes::BAD_ARGUMENTS); |
558 | else |
559 | return std::make_shared<DataTypeTuple>(nested_types, names); |
560 | } |
561 | |
562 | |
563 | void registerDataTypeTuple(DataTypeFactory & factory) |
564 | { |
565 | factory.registerDataType("Tuple" , create); |
566 | } |
567 | |
568 | void registerDataTypeNested(DataTypeFactory & factory) |
569 | { |
570 | /// Nested(...) data type is just a sugar for Array(Tuple(...)) |
571 | factory.registerDataType("Nested" , [&factory](const String & /*type_name*/, const ASTPtr & arguments) |
572 | { |
573 | return std::make_shared<DataTypeArray>(factory.get("Tuple" , arguments)); |
574 | }); |
575 | } |
576 | |
577 | } |
578 | |