1#include <string.h>
2
3#include <Common/typeid_cast.h>
4#include <Common/assert_cast.h>
5#include <Common/StringUtils/StringUtils.h>
6
7#include <DataTypes/DataTypeArray.h>
8#include <DataTypes/DataTypeTuple.h>
9#include <DataTypes/NestedUtils.h>
10
11#include <Columns/ColumnArray.h>
12#include <Columns/ColumnTuple.h>
13#include <Columns/ColumnConst.h>
14
15#include <Parsers/IAST.h>
16
17
18namespace DB
19{
20
21namespace ErrorCodes
22{
23 extern const int ILLEGAL_COLUMN;
24 extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
25}
26
27namespace Nested
28{
29
30std::string concatenateName(const std::string & nested_table_name, const std::string & nested_field_name)
31{
32 return nested_table_name + "." + nested_field_name;
33}
34
35
36/** Name can be treated as compound if and only if both parts are simple identifiers.
37 */
38std::pair<std::string, std::string> splitName(const std::string & name)
39{
40 const char * begin = name.data();
41 const char * pos = begin;
42 const char * end = begin + name.size();
43
44 if (pos >= end || !isValidIdentifierBegin(*pos))
45 return {name, {}};
46
47 ++pos;
48
49 while (pos < end && isWordCharASCII(*pos))
50 ++pos;
51
52 if (pos >= end || *pos != '.')
53 return {name, {}};
54
55 const char * first_end = pos;
56 ++pos;
57 const char * second_begin = pos;
58
59 if (pos >= end || !isValidIdentifierBegin(*pos))
60 return {name, {}};
61
62 ++pos;
63
64 while (pos < end && isWordCharASCII(*pos))
65 ++pos;
66
67 if (pos != end)
68 return {name, {}};
69
70 return {{ begin, first_end }, { second_begin, end }};
71}
72
73
74std::string extractTableName(const std::string & nested_name)
75{
76 auto splitted = splitName(nested_name);
77 return splitted.first;
78}
79
80
81Block flatten(const Block & block)
82{
83 Block res;
84
85 for (const auto & elem : block)
86 {
87 if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(elem.type.get()))
88 {
89 if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(type_arr->getNestedType().get()))
90 {
91 const DataTypes & element_types = type_tuple->getElements();
92 const Strings & names = type_tuple->getElementNames();
93 size_t tuple_size = element_types.size();
94
95 bool is_const = isColumnConst(*elem.column);
96 const ColumnArray * column_array;
97 if (is_const)
98 column_array = typeid_cast<const ColumnArray *>(&assert_cast<const ColumnConst &>(*elem.column).getDataColumn());
99 else
100 column_array = typeid_cast<const ColumnArray *>(elem.column.get());
101
102 const ColumnPtr & column_offsets = column_array->getOffsetsPtr();
103
104 const ColumnTuple & column_tuple = typeid_cast<const ColumnTuple &>(column_array->getData());
105 const auto & element_columns = column_tuple.getColumns();
106
107 for (size_t i = 0; i < tuple_size; ++i)
108 {
109 String nested_name = concatenateName(elem.name, names[i]);
110 ColumnPtr column_array_of_element = ColumnArray::create(element_columns[i], column_offsets);
111
112 res.insert(ColumnWithTypeAndName(
113 is_const
114 ? ColumnConst::create(std::move(column_array_of_element), block.rows())
115 : std::move(column_array_of_element),
116 std::make_shared<DataTypeArray>(element_types[i]),
117 nested_name));
118 }
119 }
120 else
121 res.insert(elem);
122 }
123 else
124 res.insert(elem);
125 }
126
127 return res;
128}
129
130
131NamesAndTypesList collect(const NamesAndTypesList & names_and_types)
132{
133 NamesAndTypesList res;
134
135 std::map<std::string, NamesAndTypesList> nested;
136 for (const auto & name_type : names_and_types)
137 {
138 bool collected = false;
139 if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(name_type.type.get()))
140 {
141 auto splitted = splitName(name_type.name);
142 if (!splitted.second.empty())
143 {
144 nested[splitted.first].emplace_back(splitted.second, type_arr->getNestedType());
145 collected = true;
146 }
147 }
148
149 if (!collected)
150 res.push_back(name_type);
151 }
152
153 for (const auto & name_elems : nested)
154 res.emplace_back(name_elems.first, std::make_shared<DataTypeArray>(
155 std::make_shared<DataTypeTuple>(name_elems.second.getTypes(), name_elems.second.getNames())));
156
157 return res;
158}
159
160
161void validateArraySizes(const Block & block)
162{
163 /// Nested prefix -> position of first column in block.
164 std::map<std::string, size_t> nested;
165
166 for (size_t i = 0, size = block.columns(); i < size; ++i)
167 {
168 const auto & elem = block.getByPosition(i);
169
170 if (isArray(elem.type))
171 {
172 if (!typeid_cast<const ColumnArray *>(elem.column.get()))
173 throw Exception("Column with Array type is not represented by ColumnArray column: " + elem.column->dumpStructure(), ErrorCodes::ILLEGAL_COLUMN);
174
175 auto splitted = splitName(elem.name);
176
177 /// Is it really a column of Nested data structure.
178 if (!splitted.second.empty())
179 {
180 auto [it, inserted] = nested.emplace(splitted.first, i);
181
182 /// It's not the first column of Nested data structure.
183 if (!inserted)
184 {
185 const ColumnArray & first_array_column = assert_cast<const ColumnArray &>(*block.getByPosition(it->second).column);
186 const ColumnArray & another_array_column = assert_cast<const ColumnArray &>(*elem.column);
187
188 if (!first_array_column.hasEqualOffsets(another_array_column))
189 throw Exception("Elements '" + block.getByPosition(it->second).name
190 + "' and '" + elem.name
191 + "' of Nested data structure '" + splitted.first
192 + "' (Array columns) have different array sizes.", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
193 }
194 }
195 }
196 }
197}
198
199}
200
201}
202