1 | #include <string.h> |
2 | |
3 | #include <Common/typeid_cast.h> |
4 | #include <Common/assert_cast.h> |
5 | #include <Common/StringUtils/StringUtils.h> |
6 | |
7 | #include <DataTypes/DataTypeArray.h> |
8 | #include <DataTypes/DataTypeTuple.h> |
9 | #include <DataTypes/NestedUtils.h> |
10 | |
11 | #include <Columns/ColumnArray.h> |
12 | #include <Columns/ColumnTuple.h> |
13 | #include <Columns/ColumnConst.h> |
14 | |
15 | #include <Parsers/IAST.h> |
16 | |
17 | |
18 | namespace DB |
19 | { |
20 | |
21 | namespace ErrorCodes |
22 | { |
23 | extern const int ILLEGAL_COLUMN; |
24 | extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; |
25 | } |
26 | |
27 | namespace Nested |
28 | { |
29 | |
30 | std::string concatenateName(const std::string & nested_table_name, const std::string & nested_field_name) |
31 | { |
32 | return nested_table_name + "." + nested_field_name; |
33 | } |
34 | |
35 | |
36 | /** Name can be treated as compound if and only if both parts are simple identifiers. |
37 | */ |
38 | std::pair<std::string, std::string> splitName(const std::string & name) |
39 | { |
40 | const char * begin = name.data(); |
41 | const char * pos = begin; |
42 | const char * end = begin + name.size(); |
43 | |
44 | if (pos >= end || !isValidIdentifierBegin(*pos)) |
45 | return {name, {}}; |
46 | |
47 | ++pos; |
48 | |
49 | while (pos < end && isWordCharASCII(*pos)) |
50 | ++pos; |
51 | |
52 | if (pos >= end || *pos != '.') |
53 | return {name, {}}; |
54 | |
55 | const char * first_end = pos; |
56 | ++pos; |
57 | const char * second_begin = pos; |
58 | |
59 | if (pos >= end || !isValidIdentifierBegin(*pos)) |
60 | return {name, {}}; |
61 | |
62 | ++pos; |
63 | |
64 | while (pos < end && isWordCharASCII(*pos)) |
65 | ++pos; |
66 | |
67 | if (pos != end) |
68 | return {name, {}}; |
69 | |
70 | return {{ begin, first_end }, { second_begin, end }}; |
71 | } |
72 | |
73 | |
74 | std::string (const std::string & nested_name) |
75 | { |
76 | auto splitted = splitName(nested_name); |
77 | return splitted.first; |
78 | } |
79 | |
80 | |
81 | Block flatten(const Block & block) |
82 | { |
83 | Block res; |
84 | |
85 | for (const auto & elem : block) |
86 | { |
87 | if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(elem.type.get())) |
88 | { |
89 | if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(type_arr->getNestedType().get())) |
90 | { |
91 | const DataTypes & element_types = type_tuple->getElements(); |
92 | const Strings & names = type_tuple->getElementNames(); |
93 | size_t tuple_size = element_types.size(); |
94 | |
95 | bool is_const = isColumnConst(*elem.column); |
96 | const ColumnArray * column_array; |
97 | if (is_const) |
98 | column_array = typeid_cast<const ColumnArray *>(&assert_cast<const ColumnConst &>(*elem.column).getDataColumn()); |
99 | else |
100 | column_array = typeid_cast<const ColumnArray *>(elem.column.get()); |
101 | |
102 | const ColumnPtr & column_offsets = column_array->getOffsetsPtr(); |
103 | |
104 | const ColumnTuple & column_tuple = typeid_cast<const ColumnTuple &>(column_array->getData()); |
105 | const auto & element_columns = column_tuple.getColumns(); |
106 | |
107 | for (size_t i = 0; i < tuple_size; ++i) |
108 | { |
109 | String nested_name = concatenateName(elem.name, names[i]); |
110 | ColumnPtr column_array_of_element = ColumnArray::create(element_columns[i], column_offsets); |
111 | |
112 | res.insert(ColumnWithTypeAndName( |
113 | is_const |
114 | ? ColumnConst::create(std::move(column_array_of_element), block.rows()) |
115 | : std::move(column_array_of_element), |
116 | std::make_shared<DataTypeArray>(element_types[i]), |
117 | nested_name)); |
118 | } |
119 | } |
120 | else |
121 | res.insert(elem); |
122 | } |
123 | else |
124 | res.insert(elem); |
125 | } |
126 | |
127 | return res; |
128 | } |
129 | |
130 | |
131 | NamesAndTypesList collect(const NamesAndTypesList & names_and_types) |
132 | { |
133 | NamesAndTypesList res; |
134 | |
135 | std::map<std::string, NamesAndTypesList> nested; |
136 | for (const auto & name_type : names_and_types) |
137 | { |
138 | bool collected = false; |
139 | if (const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(name_type.type.get())) |
140 | { |
141 | auto splitted = splitName(name_type.name); |
142 | if (!splitted.second.empty()) |
143 | { |
144 | nested[splitted.first].emplace_back(splitted.second, type_arr->getNestedType()); |
145 | collected = true; |
146 | } |
147 | } |
148 | |
149 | if (!collected) |
150 | res.push_back(name_type); |
151 | } |
152 | |
153 | for (const auto & name_elems : nested) |
154 | res.emplace_back(name_elems.first, std::make_shared<DataTypeArray>( |
155 | std::make_shared<DataTypeTuple>(name_elems.second.getTypes(), name_elems.second.getNames()))); |
156 | |
157 | return res; |
158 | } |
159 | |
160 | |
161 | void validateArraySizes(const Block & block) |
162 | { |
163 | /// Nested prefix -> position of first column in block. |
164 | std::map<std::string, size_t> nested; |
165 | |
166 | for (size_t i = 0, size = block.columns(); i < size; ++i) |
167 | { |
168 | const auto & elem = block.getByPosition(i); |
169 | |
170 | if (isArray(elem.type)) |
171 | { |
172 | if (!typeid_cast<const ColumnArray *>(elem.column.get())) |
173 | throw Exception("Column with Array type is not represented by ColumnArray column: " + elem.column->dumpStructure(), ErrorCodes::ILLEGAL_COLUMN); |
174 | |
175 | auto splitted = splitName(elem.name); |
176 | |
177 | /// Is it really a column of Nested data structure. |
178 | if (!splitted.second.empty()) |
179 | { |
180 | auto [it, inserted] = nested.emplace(splitted.first, i); |
181 | |
182 | /// It's not the first column of Nested data structure. |
183 | if (!inserted) |
184 | { |
185 | const ColumnArray & first_array_column = assert_cast<const ColumnArray &>(*block.getByPosition(it->second).column); |
186 | const ColumnArray & another_array_column = assert_cast<const ColumnArray &>(*elem.column); |
187 | |
188 | if (!first_array_column.hasEqualOffsets(another_array_column)) |
189 | throw Exception("Elements '" + block.getByPosition(it->second).name |
190 | + "' and '" + elem.name |
191 | + "' of Nested data structure '" + splitted.first |
192 | + "' (Array columns) have different array sizes." , ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); |
193 | } |
194 | } |
195 | } |
196 | } |
197 | } |
198 | |
199 | } |
200 | |
201 | } |
202 | |