1#include <Common/StringUtils/StringUtils.h>
2#include <Columns/ColumnTuple.h>
3#include <Core/Field.h>
4#include <Formats/FormatSettings.h>
5#include <DataTypes/DataTypeTuple.h>
6#include <DataTypes/DataTypeArray.h>
7#include <DataTypes/DataTypeFactory.h>
8#include <Parsers/IAST.h>
9#include <Parsers/ASTNameTypePair.h>
10#include <Common/typeid_cast.h>
11#include <Common/assert_cast.h>
12#include <IO/WriteHelpers.h>
13#include <IO/ReadHelpers.h>
14#include <IO/WriteBufferFromString.h>
15#include <IO/Operators.h>
16
17#include <ext/map.h>
18#include <ext/enumerate.h>
19#include <ext/range.h>
20
21
22namespace DB
23{
24
25namespace ErrorCodes
26{
27 extern const int EMPTY_DATA_PASSED;
28 extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
29 extern const int DUPLICATE_COLUMN;
30 extern const int BAD_ARGUMENTS;
31 extern const int NOT_FOUND_COLUMN_IN_BLOCK;
32}
33
34
35DataTypeTuple::DataTypeTuple(const DataTypes & elems_)
36 : elems(elems_), have_explicit_names(false)
37{
38 /// Automatically assigned names in form of '1', '2', ...
39 size_t size = elems.size();
40 names.resize(size);
41 for (size_t i = 0; i < size; ++i)
42 names[i] = toString(i + 1);
43}
44
45
46DataTypeTuple::DataTypeTuple(const DataTypes & elems_, const Strings & names_)
47 : elems(elems_), names(names_), have_explicit_names(true)
48{
49 size_t size = elems.size();
50 if (names.size() != size)
51 throw Exception("Wrong number of names passed to constructor of DataTypeTuple", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
52
53 std::unordered_set<String> names_set;
54 for (size_t i = 0; i < size; ++i)
55 {
56 if (names[i].empty())
57 throw Exception("Names of tuple elements cannot be empty", ErrorCodes::BAD_ARGUMENTS);
58
59 if (isNumericASCII(names[i][0]))
60 throw Exception("Explicitly specified names of tuple elements cannot start with digit", ErrorCodes::BAD_ARGUMENTS);
61
62 if (!names_set.insert(names[i]).second)
63 throw Exception("Names of tuple elements must be unique", ErrorCodes::DUPLICATE_COLUMN);
64 }
65}
66
67
68
69std::string DataTypeTuple::doGetName() const
70{
71 size_t size = elems.size();
72 WriteBufferFromOwnString s;
73
74 s << "Tuple(";
75 for (size_t i = 0; i < size; ++i)
76 {
77 if (i != 0)
78 s << ", ";
79
80 if (have_explicit_names)
81 s << backQuoteIfNeed(names[i]) << ' ';
82
83 s << elems[i]->getName();
84 }
85 s << ")";
86
87 return s.str();
88}
89
90
91static inline IColumn & extractElementColumn(IColumn & column, size_t idx)
92{
93 return assert_cast<ColumnTuple &>(column).getColumn(idx);
94}
95
96static inline const IColumn & extractElementColumn(const IColumn & column, size_t idx)
97{
98 return assert_cast<const ColumnTuple &>(column).getColumn(idx);
99}
100
101
102void DataTypeTuple::serializeBinary(const Field & field, WriteBuffer & ostr) const
103{
104 const auto & tuple = get<const Tuple &>(field);
105 for (const auto idx_elem : ext::enumerate(elems))
106 idx_elem.second->serializeBinary(tuple[idx_elem.first], ostr);
107}
108
109void DataTypeTuple::deserializeBinary(Field & field, ReadBuffer & istr) const
110{
111 const size_t size = elems.size();
112
113 Tuple tuple(size);
114 for (const auto i : ext::range(0, size))
115 elems[i]->deserializeBinary(tuple[i], istr);
116
117 field = tuple;
118}
119
120void DataTypeTuple::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
121{
122 for (const auto idx_elem : ext::enumerate(elems))
123 idx_elem.second->serializeBinary(extractElementColumn(column, idx_elem.first), row_num, ostr);
124}
125
126
127template <typename F>
128static void addElementSafe(const DataTypes & elems, IColumn & column, F && impl)
129{
130 /// We use the assumption that tuples of zero size do not exist.
131 size_t old_size = column.size();
132
133 try
134 {
135 impl();
136 }
137 catch (...)
138 {
139 for (const auto & i : ext::range(0, ext::size(elems)))
140 {
141 auto & element_column = extractElementColumn(column, i);
142 if (element_column.size() > old_size)
143 element_column.popBack(1);
144 }
145
146 throw;
147 }
148}
149
150
151void DataTypeTuple::deserializeBinary(IColumn & column, ReadBuffer & istr) const
152{
153 addElementSafe(elems, column, [&]
154 {
155 for (const auto & i : ext::range(0, ext::size(elems)))
156 elems[i]->deserializeBinary(extractElementColumn(column, i), istr);
157 });
158}
159
160void DataTypeTuple::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
161{
162 writeChar('(', ostr);
163 for (const auto i : ext::range(0, ext::size(elems)))
164 {
165 if (i != 0)
166 writeChar(',', ostr);
167 elems[i]->serializeAsTextQuoted(extractElementColumn(column, i), row_num, ostr, settings);
168 }
169 writeChar(')', ostr);
170}
171
172void DataTypeTuple::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
173{
174 const size_t size = elems.size();
175 assertChar('(', istr);
176
177 addElementSafe(elems, column, [&]
178 {
179 for (const auto i : ext::range(0, size))
180 {
181 skipWhitespaceIfAny(istr);
182 if (i != 0)
183 {
184 assertChar(',', istr);
185 skipWhitespaceIfAny(istr);
186 }
187 elems[i]->deserializeAsTextQuoted(extractElementColumn(column, i), istr, settings);
188 }
189 });
190
191 // Special format for one element tuple (1,)
192 if (1 == elems.size())
193 {
194 skipWhitespaceIfAny(istr);
195 // Allow both (1) and (1,)
196 checkChar(',', istr);
197 }
198 skipWhitespaceIfAny(istr);
199 assertChar(')', istr);
200}
201
202void DataTypeTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
203{
204 writeChar('[', ostr);
205 for (const auto i : ext::range(0, ext::size(elems)))
206 {
207 if (i != 0)
208 writeChar(',', ostr);
209 elems[i]->serializeAsTextJSON(extractElementColumn(column, i), row_num, ostr, settings);
210 }
211 writeChar(']', ostr);
212}
213
214void DataTypeTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
215{
216 const size_t size = elems.size();
217 assertChar('[', istr);
218
219 addElementSafe(elems, column, [&]
220 {
221 for (const auto i : ext::range(0, size))
222 {
223 skipWhitespaceIfAny(istr);
224 if (i != 0)
225 {
226 assertChar(',', istr);
227 skipWhitespaceIfAny(istr);
228 }
229 elems[i]->deserializeAsTextJSON(extractElementColumn(column, i), istr, settings);
230 }
231 });
232
233 skipWhitespaceIfAny(istr);
234 assertChar(']', istr);
235}
236
237void DataTypeTuple::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
238{
239 writeCString("<tuple>", ostr);
240 for (const auto i : ext::range(0, ext::size(elems)))
241 {
242 writeCString("<elem>", ostr);
243 elems[i]->serializeAsTextXML(extractElementColumn(column, i), row_num, ostr, settings);
244 writeCString("</elem>", ostr);
245 }
246 writeCString("</tuple>", ostr);
247}
248
249void DataTypeTuple::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
250{
251 for (const auto i : ext::range(0, ext::size(elems)))
252 {
253 if (i != 0)
254 writeChar(',', ostr);
255 elems[i]->serializeAsTextCSV(extractElementColumn(column, i), row_num, ostr, settings);
256 }
257}
258
259void DataTypeTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
260{
261 addElementSafe(elems, column, [&]
262 {
263 const size_t size = elems.size();
264 for (const auto i : ext::range(0, size))
265 {
266 if (i != 0)
267 {
268 skipWhitespaceIfAny(istr);
269 assertChar(settings.csv.delimiter, istr);
270 skipWhitespaceIfAny(istr);
271 }
272 elems[i]->deserializeAsTextCSV(extractElementColumn(column, i), istr, settings);
273 }
274 });
275}
276
277void DataTypeTuple::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
278{
279 path.push_back(Substream::TupleElement);
280 for (const auto i : ext::range(0, ext::size(elems)))
281 {
282 path.back().tuple_element_name = names[i];
283 elems[i]->enumerateStreams(callback, path);
284 }
285 path.pop_back();
286}
287
288struct SerializeBinaryBulkStateTuple : public IDataType::SerializeBinaryBulkState
289{
290 std::vector<IDataType::SerializeBinaryBulkStatePtr> states;
291};
292
293struct DeserializeBinaryBulkStateTuple : public IDataType::DeserializeBinaryBulkState
294{
295 std::vector<IDataType::DeserializeBinaryBulkStatePtr> states;
296};
297
298static SerializeBinaryBulkStateTuple * checkAndGetTupleSerializeState(IDataType::SerializeBinaryBulkStatePtr & state)
299{
300 if (!state)
301 throw Exception("Got empty state for DataTypeTuple.", ErrorCodes::LOGICAL_ERROR);
302
303 auto * tuple_state = typeid_cast<SerializeBinaryBulkStateTuple *>(state.get());
304 if (!tuple_state)
305 {
306 auto & state_ref = *state;
307 throw Exception("Invalid SerializeBinaryBulkState for DataTypeTuple. Expected: "
308 + demangle(typeid(SerializeBinaryBulkStateTuple).name()) + ", got "
309 + demangle(typeid(state_ref).name()), ErrorCodes::LOGICAL_ERROR);
310 }
311
312 return tuple_state;
313}
314
315static DeserializeBinaryBulkStateTuple * checkAndGetTupleDeserializeState(IDataType::DeserializeBinaryBulkStatePtr & state)
316{
317 if (!state)
318 throw Exception("Got empty state for DataTypeTuple.", ErrorCodes::LOGICAL_ERROR);
319
320 auto * tuple_state = typeid_cast<DeserializeBinaryBulkStateTuple *>(state.get());
321 if (!tuple_state)
322 {
323 auto & state_ref = *state;
324 throw Exception("Invalid DeserializeBinaryBulkState for DataTypeTuple. Expected: "
325 + demangle(typeid(DeserializeBinaryBulkStateTuple).name()) + ", got "
326 + demangle(typeid(state_ref).name()), ErrorCodes::LOGICAL_ERROR);
327 }
328
329 return tuple_state;
330}
331
332void DataTypeTuple::serializeBinaryBulkStatePrefix(
333 SerializeBinaryBulkSettings & settings,
334 SerializeBinaryBulkStatePtr & state) const
335{
336 auto tuple_state = std::make_shared<SerializeBinaryBulkStateTuple>();
337 tuple_state->states.resize(elems.size());
338
339 settings.path.push_back(Substream::TupleElement);
340 for (size_t i = 0; i < elems.size(); ++i)
341 {
342 settings.path.back().tuple_element_name = names[i];
343 elems[i]->serializeBinaryBulkStatePrefix(settings, tuple_state->states[i]);
344 }
345 settings.path.pop_back();
346
347 state = std::move(tuple_state);
348}
349
350void DataTypeTuple::serializeBinaryBulkStateSuffix(
351 SerializeBinaryBulkSettings & settings,
352 SerializeBinaryBulkStatePtr & state) const
353{
354 auto * tuple_state = checkAndGetTupleSerializeState(state);
355
356 settings.path.push_back(Substream::TupleElement);
357 for (size_t i = 0; i < elems.size(); ++i)
358 {
359 settings.path.back().tuple_element_name = names[i];
360 elems[i]->serializeBinaryBulkStateSuffix(settings, tuple_state->states[i]);
361 }
362 settings.path.pop_back();
363}
364
365void DataTypeTuple::deserializeBinaryBulkStatePrefix(
366 DeserializeBinaryBulkSettings & settings,
367 DeserializeBinaryBulkStatePtr & state) const
368{
369 auto tuple_state = std::make_shared<DeserializeBinaryBulkStateTuple>();
370 tuple_state->states.resize(elems.size());
371
372 settings.path.push_back(Substream::TupleElement);
373 for (size_t i = 0; i < elems.size(); ++i)
374 {
375 settings.path.back().tuple_element_name = names[i];
376 elems[i]->deserializeBinaryBulkStatePrefix(settings, tuple_state->states[i]);
377 }
378 settings.path.pop_back();
379
380 state = std::move(tuple_state);
381}
382
383void DataTypeTuple::serializeBinaryBulkWithMultipleStreams(
384 const IColumn & column,
385 size_t offset,
386 size_t limit,
387 SerializeBinaryBulkSettings & settings,
388 SerializeBinaryBulkStatePtr & state) const
389{
390 auto * tuple_state = checkAndGetTupleSerializeState(state);
391
392 settings.path.push_back(Substream::TupleElement);
393 for (const auto i : ext::range(0, ext::size(elems)))
394 {
395 settings.path.back().tuple_element_name = names[i];
396 auto & element_col = extractElementColumn(column, i);
397 elems[i]->serializeBinaryBulkWithMultipleStreams(element_col, offset, limit, settings, tuple_state->states[i]);
398 }
399 settings.path.pop_back();
400}
401
402void DataTypeTuple::deserializeBinaryBulkWithMultipleStreams(
403 IColumn & column,
404 size_t limit,
405 DeserializeBinaryBulkSettings & settings,
406 DeserializeBinaryBulkStatePtr & state) const
407{
408 auto * tuple_state = checkAndGetTupleDeserializeState(state);
409
410 settings.path.push_back(Substream::TupleElement);
411 settings.avg_value_size_hint = 0;
412 for (const auto i : ext::range(0, ext::size(elems)))
413 {
414 settings.path.back().tuple_element_name = names[i];
415 auto & element_col = extractElementColumn(column, i);
416 elems[i]->deserializeBinaryBulkWithMultipleStreams(element_col, limit, settings, tuple_state->states[i]);
417 }
418 settings.path.pop_back();
419}
420
421void DataTypeTuple::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
422{
423 for (; value_index < elems.size(); ++value_index)
424 {
425 size_t stored = 0;
426 elems[value_index]->serializeProtobuf(extractElementColumn(column, value_index), row_num, protobuf, stored);
427 if (!stored)
428 break;
429 }
430}
431
432void DataTypeTuple::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const
433{
434 row_added = false;
435 bool all_elements_get_row = true;
436 addElementSafe(elems, column, [&]
437 {
438 for (const auto & i : ext::range(0, ext::size(elems)))
439 {
440 bool element_row_added;
441 elems[i]->deserializeProtobuf(extractElementColumn(column, i), protobuf, allow_add_row, element_row_added);
442 all_elements_get_row &= element_row_added;
443 }
444 });
445 row_added = all_elements_get_row;
446}
447
448MutableColumnPtr DataTypeTuple::createColumn() const
449{
450 size_t size = elems.size();
451 MutableColumns tuple_columns(size);
452 for (size_t i = 0; i < size; ++i)
453 tuple_columns[i] = elems[i]->createColumn();
454 return ColumnTuple::create(std::move(tuple_columns));
455}
456
457Field DataTypeTuple::getDefault() const
458{
459 return Tuple(ext::map<Tuple>(elems, [] (const DataTypePtr & elem) { return elem->getDefault(); }));
460}
461
462void DataTypeTuple::insertDefaultInto(IColumn & column) const
463{
464 addElementSafe(elems, column, [&]
465 {
466 for (const auto & i : ext::range(0, ext::size(elems)))
467 elems[i]->insertDefaultInto(extractElementColumn(column, i));
468 });
469}
470
471bool DataTypeTuple::equals(const IDataType & rhs) const
472{
473 if (typeid(rhs) != typeid(*this))
474 return false;
475
476 const DataTypeTuple & rhs_tuple = static_cast<const DataTypeTuple &>(rhs);
477
478 size_t size = elems.size();
479 if (size != rhs_tuple.elems.size())
480 return false;
481
482 for (size_t i = 0; i < size; ++i)
483 if (!elems[i]->equals(*rhs_tuple.elems[i]))
484 return false;
485
486 return true;
487}
488
489
490size_t DataTypeTuple::getPositionByName(const String & name) const
491{
492 size_t size = elems.size();
493 for (size_t i = 0; i < size; ++i)
494 if (names[i] == name)
495 return i;
496 throw Exception("Tuple doesn't have element with name '" + name + "'", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
497}
498
499
500bool DataTypeTuple::textCanContainOnlyValidUTF8() const
501{
502 return std::all_of(elems.begin(), elems.end(), [](auto && elem) { return elem->textCanContainOnlyValidUTF8(); });
503}
504
505bool DataTypeTuple::haveMaximumSizeOfValue() const
506{
507 return std::all_of(elems.begin(), elems.end(), [](auto && elem) { return elem->haveMaximumSizeOfValue(); });
508}
509
510bool DataTypeTuple::isComparable() const
511{
512 return std::all_of(elems.begin(), elems.end(), [](auto && elem) { return elem->isComparable(); });
513}
514
515size_t DataTypeTuple::getMaximumSizeOfValueInMemory() const
516{
517 size_t res = 0;
518 for (const auto & elem : elems)
519 res += elem->getMaximumSizeOfValueInMemory();
520 return res;
521}
522
523size_t DataTypeTuple::getSizeOfValueInMemory() const
524{
525 size_t res = 0;
526 for (const auto & elem : elems)
527 res += elem->getSizeOfValueInMemory();
528 return res;
529}
530
531
532static DataTypePtr create(const String & /*type_name*/, const ASTPtr & arguments)
533{
534 if (!arguments || arguments->children.empty())
535 throw Exception("Tuple cannot be empty", ErrorCodes::EMPTY_DATA_PASSED);
536
537 DataTypes nested_types;
538 nested_types.reserve(arguments->children.size());
539
540 Strings names;
541 names.reserve(arguments->children.size());
542
543 for (const ASTPtr & child : arguments->children)
544 {
545 if (const auto * name_and_type_pair = child->as<ASTNameTypePair>())
546 {
547 nested_types.emplace_back(DataTypeFactory::instance().get(name_and_type_pair->type));
548 names.emplace_back(name_and_type_pair->name);
549 }
550 else
551 nested_types.emplace_back(DataTypeFactory::instance().get(child));
552 }
553
554 if (names.empty())
555 return std::make_shared<DataTypeTuple>(nested_types);
556 else if (names.size() != nested_types.size())
557 throw Exception("Names are specified not for all elements of Tuple type", ErrorCodes::BAD_ARGUMENTS);
558 else
559 return std::make_shared<DataTypeTuple>(nested_types, names);
560}
561
562
563void registerDataTypeTuple(DataTypeFactory & factory)
564{
565 factory.registerDataType("Tuple", create);
566}
567
568void registerDataTypeNested(DataTypeFactory & factory)
569{
570 /// Nested(...) data type is just a sugar for Array(Tuple(...))
571 factory.registerDataType("Nested", [&factory](const String & /*type_name*/, const ASTPtr & arguments)
572 {
573 return std::make_shared<DataTypeArray>(factory.get("Tuple", arguments));
574 });
575}
576
577}
578