1 | #include "DictionaryStructure.h" |
2 | #include <Columns/IColumn.h> |
3 | #include <DataTypes/DataTypeFactory.h> |
4 | #include <DataTypes/DataTypeNullable.h> |
5 | #include <Formats/FormatSettings.h> |
6 | #include <IO/WriteHelpers.h> |
7 | #include <Common/StringUtils/StringUtils.h> |
8 | |
9 | #include <numeric> |
10 | #include <unordered_map> |
11 | #include <unordered_set> |
12 | #include <ext/range.h> |
13 | |
14 | |
15 | namespace DB |
16 | { |
17 | namespace ErrorCodes |
18 | { |
19 | extern const int UNKNOWN_TYPE; |
20 | extern const int ARGUMENT_OUT_OF_BOUND; |
21 | extern const int TYPE_MISMATCH; |
22 | extern const int BAD_ARGUMENTS; |
23 | } |
24 | |
25 | namespace |
26 | { |
27 | DictionaryTypedSpecialAttribute makeDictionaryTypedSpecialAttribute( |
28 | const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const std::string & default_type) |
29 | { |
30 | const auto name = config.getString(config_prefix + ".name" , "" ); |
31 | const auto expression = config.getString(config_prefix + ".expression" , "" ); |
32 | |
33 | if (name.empty() && !expression.empty()) |
34 | throw Exception{"Element " + config_prefix + ".name is empty" , ErrorCodes::BAD_ARGUMENTS}; |
35 | |
36 | const auto type_name = config.getString(config_prefix + ".type" , default_type); |
37 | return DictionaryTypedSpecialAttribute{std::move(name), std::move(expression), DataTypeFactory::instance().get(type_name)}; |
38 | } |
39 | |
40 | } |
41 | |
42 | |
43 | AttributeUnderlyingType getAttributeUnderlyingType(const std::string & type) |
44 | { |
45 | static const std::unordered_map<std::string, AttributeUnderlyingType> dictionary{ |
46 | {"UInt8" , AttributeUnderlyingType::utUInt8}, |
47 | {"UInt16" , AttributeUnderlyingType::utUInt16}, |
48 | {"UInt32" , AttributeUnderlyingType::utUInt32}, |
49 | {"UInt64" , AttributeUnderlyingType::utUInt64}, |
50 | {"UUID" , AttributeUnderlyingType::utUInt128}, |
51 | {"Int8" , AttributeUnderlyingType::utInt8}, |
52 | {"Int16" , AttributeUnderlyingType::utInt16}, |
53 | {"Int32" , AttributeUnderlyingType::utInt32}, |
54 | {"Int64" , AttributeUnderlyingType::utInt64}, |
55 | {"Float32" , AttributeUnderlyingType::utFloat32}, |
56 | {"Float64" , AttributeUnderlyingType::utFloat64}, |
57 | {"String" , AttributeUnderlyingType::utString}, |
58 | {"Date" , AttributeUnderlyingType::utUInt16}, |
59 | {"DateTime" , AttributeUnderlyingType::utUInt32}, |
60 | }; |
61 | |
62 | const auto it = dictionary.find(type); |
63 | if (it != std::end(dictionary)) |
64 | return it->second; |
65 | |
66 | if (type.find("Decimal" ) == 0) |
67 | { |
68 | size_t start = strlen("Decimal" ); |
69 | if (type.find("32" , start) == start) |
70 | return AttributeUnderlyingType::utDecimal32; |
71 | if (type.find("64" , start) == start) |
72 | return AttributeUnderlyingType::utDecimal64; |
73 | if (type.find("128" , start) == start) |
74 | return AttributeUnderlyingType::utDecimal128; |
75 | } |
76 | |
77 | throw Exception{"Unknown type " + type, ErrorCodes::UNKNOWN_TYPE}; |
78 | } |
79 | |
80 | |
81 | std::string toString(const AttributeUnderlyingType type) |
82 | { |
83 | switch (type) |
84 | { |
85 | case AttributeUnderlyingType::utUInt8: |
86 | return "UInt8" ; |
87 | case AttributeUnderlyingType::utUInt16: |
88 | return "UInt16" ; |
89 | case AttributeUnderlyingType::utUInt32: |
90 | return "UInt32" ; |
91 | case AttributeUnderlyingType::utUInt64: |
92 | return "UInt64" ; |
93 | case AttributeUnderlyingType::utUInt128: |
94 | return "UUID" ; |
95 | case AttributeUnderlyingType::utInt8: |
96 | return "Int8" ; |
97 | case AttributeUnderlyingType::utInt16: |
98 | return "Int16" ; |
99 | case AttributeUnderlyingType::utInt32: |
100 | return "Int32" ; |
101 | case AttributeUnderlyingType::utInt64: |
102 | return "Int64" ; |
103 | case AttributeUnderlyingType::utFloat32: |
104 | return "Float32" ; |
105 | case AttributeUnderlyingType::utFloat64: |
106 | return "Float64" ; |
107 | case AttributeUnderlyingType::utDecimal32: |
108 | return "Decimal32" ; |
109 | case AttributeUnderlyingType::utDecimal64: |
110 | return "Decimal64" ; |
111 | case AttributeUnderlyingType::utDecimal128: |
112 | return "Decimal128" ; |
113 | case AttributeUnderlyingType::utString: |
114 | return "String" ; |
115 | } |
116 | |
117 | throw Exception{"Unknown attribute_type " + toString(static_cast<int>(type)), ErrorCodes::ARGUMENT_OUT_OF_BOUND}; |
118 | } |
119 | |
120 | |
121 | DictionarySpecialAttribute::DictionarySpecialAttribute(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) |
122 | : name{config.getString(config_prefix + ".name" , "" )}, expression{config.getString(config_prefix + ".expression" , "" )} |
123 | { |
124 | if (name.empty() && !expression.empty()) |
125 | throw Exception{"Element " + config_prefix + ".name is empty" , ErrorCodes::BAD_ARGUMENTS}; |
126 | } |
127 | |
128 | |
129 | DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) |
130 | { |
131 | const auto has_id = config.has(config_prefix + ".id" ); |
132 | const auto has_key = config.has(config_prefix + ".key" ); |
133 | |
134 | if (has_key && has_id) |
135 | throw Exception{"Only one of 'id' and 'key' should be specified" , ErrorCodes::BAD_ARGUMENTS}; |
136 | |
137 | if (has_id) |
138 | id.emplace(config, config_prefix + ".id" ); |
139 | else if (has_key) |
140 | { |
141 | key.emplace(getAttributes(config, config_prefix + ".key" , false, false)); |
142 | if (key->empty()) |
143 | throw Exception{"Empty 'key' supplied" , ErrorCodes::BAD_ARGUMENTS}; |
144 | } |
145 | else |
146 | throw Exception{"Dictionary structure should specify either 'id' or 'key'" , ErrorCodes::BAD_ARGUMENTS}; |
147 | |
148 | if (id) |
149 | { |
150 | if (id->name.empty()) |
151 | throw Exception{"'id' cannot be empty" , ErrorCodes::BAD_ARGUMENTS}; |
152 | |
153 | const auto range_default_type = "Date" ; |
154 | if (config.has(config_prefix + ".range_min" )) |
155 | range_min.emplace(makeDictionaryTypedSpecialAttribute(config, config_prefix + ".range_min" , range_default_type)); |
156 | |
157 | if (config.has(config_prefix + ".range_max" )) |
158 | range_max.emplace(makeDictionaryTypedSpecialAttribute(config, config_prefix + ".range_max" , range_default_type)); |
159 | |
160 | if (range_min.has_value() != range_max.has_value()) |
161 | { |
162 | throw Exception{"Dictionary structure should have both 'range_min' and 'range_max' either specified or not." , |
163 | ErrorCodes::BAD_ARGUMENTS}; |
164 | } |
165 | |
166 | if (range_min && range_max && !range_min->type->equals(*range_max->type)) |
167 | { |
168 | throw Exception{"Dictionary structure 'range_min' and 'range_max' should have same type, " |
169 | "'range_min' type: " |
170 | + range_min->type->getName() |
171 | + ", " |
172 | "'range_max' type: " |
173 | + range_max->type->getName(), |
174 | ErrorCodes::BAD_ARGUMENTS}; |
175 | } |
176 | |
177 | if (range_min) |
178 | { |
179 | if (!range_min->type->isValueRepresentedByInteger()) |
180 | throw Exception{"Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum." |
181 | " Actual 'range_min' and 'range_max' type is " |
182 | + range_min->type->getName(), |
183 | ErrorCodes::BAD_ARGUMENTS}; |
184 | } |
185 | |
186 | if (!id->expression.empty() || (range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty())) |
187 | has_expressions = true; |
188 | } |
189 | |
190 | attributes = getAttributes(config, config_prefix); |
191 | if (attributes.empty()) |
192 | throw Exception{"Dictionary has no attributes defined" , ErrorCodes::BAD_ARGUMENTS}; |
193 | } |
194 | |
195 | |
196 | void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const |
197 | { |
198 | if (key_types.size() != key->size()) |
199 | throw Exception{"Key structure does not match, expected " + getKeyDescription(), ErrorCodes::TYPE_MISMATCH}; |
200 | |
201 | for (const auto i : ext::range(0, key_types.size())) |
202 | { |
203 | const auto & expected_type = (*key)[i].type->getName(); |
204 | const auto & actual_type = key_types[i]->getName(); |
205 | |
206 | if (expected_type != actual_type) |
207 | throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type + ", found " |
208 | + actual_type, |
209 | ErrorCodes::TYPE_MISMATCH}; |
210 | } |
211 | } |
212 | |
213 | |
214 | std::string DictionaryStructure::getKeyDescription() const |
215 | { |
216 | if (id) |
217 | return "UInt64" ; |
218 | |
219 | std::ostringstream out; |
220 | |
221 | out << '('; |
222 | |
223 | auto first = true; |
224 | for (const auto & key_i : *key) |
225 | { |
226 | if (!first) |
227 | out << ", " ; |
228 | |
229 | first = false; |
230 | |
231 | out << key_i.type->getName(); |
232 | } |
233 | |
234 | out << ')'; |
235 | |
236 | return out.str(); |
237 | } |
238 | |
239 | |
240 | bool DictionaryStructure::isKeySizeFixed() const |
241 | { |
242 | if (!key) |
243 | return true; |
244 | |
245 | for (const auto & key_i : *key) |
246 | if (key_i.underlying_type == AttributeUnderlyingType::utString) |
247 | return false; |
248 | |
249 | return true; |
250 | } |
251 | |
252 | size_t DictionaryStructure::getKeySize() const |
253 | { |
254 | return std::accumulate(std::begin(*key), std::end(*key), size_t{}, [](const auto running_size, const auto & key_i) |
255 | { |
256 | return running_size + key_i.type->getSizeOfValueInMemory(); |
257 | }); |
258 | } |
259 | |
260 | |
261 | static void checkAttributeKeys(const Poco::Util::AbstractConfiguration::Keys & keys) |
262 | { |
263 | static const std::unordered_set<std::string> valid_keys |
264 | = {"name" , "type" , "expression" , "null_value" , "hierarchical" , "injective" , "is_object_id" }; |
265 | |
266 | for (const auto & key : keys) |
267 | { |
268 | if (valid_keys.find(key) == valid_keys.end()) |
269 | throw Exception{"Unknown key '" + key + "' inside attribute section" , ErrorCodes::BAD_ARGUMENTS}; |
270 | } |
271 | } |
272 | |
273 | |
274 | std::vector<DictionaryAttribute> DictionaryStructure::getAttributes( |
275 | const Poco::Util::AbstractConfiguration & config, |
276 | const std::string & config_prefix, |
277 | const bool hierarchy_allowed, |
278 | const bool allow_null_values) |
279 | { |
280 | Poco::Util::AbstractConfiguration::Keys config_elems; |
281 | config.keys(config_prefix, config_elems); |
282 | auto has_hierarchy = false; |
283 | |
284 | std::vector<DictionaryAttribute> res_attributes; |
285 | |
286 | const FormatSettings format_settings; |
287 | |
288 | for (const auto & config_elem : config_elems) |
289 | { |
290 | if (!startsWith(config_elem.data(), "attribute" )) |
291 | continue; |
292 | |
293 | const auto prefix = config_prefix + '.' + config_elem + '.'; |
294 | Poco::Util::AbstractConfiguration::Keys attribute_keys; |
295 | config.keys(config_prefix + '.' + config_elem, attribute_keys); |
296 | |
297 | checkAttributeKeys(attribute_keys); |
298 | |
299 | const auto name = config.getString(prefix + "name" ); |
300 | const auto type_string = config.getString(prefix + "type" ); |
301 | const auto type = DataTypeFactory::instance().get(type_string); |
302 | const auto underlying_type = getAttributeUnderlyingType(type_string); |
303 | |
304 | const auto expression = config.getString(prefix + "expression" , "" ); |
305 | if (!expression.empty()) |
306 | has_expressions = true; |
307 | |
308 | Field null_value; |
309 | if (allow_null_values) |
310 | { |
311 | const auto null_value_string = config.getString(prefix + "null_value" ); |
312 | try |
313 | { |
314 | if (null_value_string.empty()) |
315 | null_value = type->getDefault(); |
316 | else |
317 | { |
318 | ReadBufferFromString null_value_buffer{null_value_string}; |
319 | auto column_with_null_value = type->createColumn(); |
320 | type->deserializeAsTextEscaped(*column_with_null_value, null_value_buffer, format_settings); |
321 | null_value = (*column_with_null_value)[0]; |
322 | } |
323 | } |
324 | catch (Exception & e) |
325 | { |
326 | e.addMessage("error parsing null_value" ); |
327 | throw; |
328 | } |
329 | } |
330 | |
331 | const auto hierarchical = config.getBool(prefix + "hierarchical" , false); |
332 | const auto injective = config.getBool(prefix + "injective" , false); |
333 | const auto is_object_id = config.getBool(prefix + "is_object_id" , false); |
334 | if (name.empty()) |
335 | throw Exception{"Properties 'name' and 'type' of an attribute cannot be empty" , ErrorCodes::BAD_ARGUMENTS}; |
336 | |
337 | if (has_hierarchy && !hierarchy_allowed) |
338 | throw Exception{"Hierarchy not allowed in '" + prefix, ErrorCodes::BAD_ARGUMENTS}; |
339 | |
340 | if (has_hierarchy && hierarchical) |
341 | throw Exception{"Only one hierarchical attribute supported" , ErrorCodes::BAD_ARGUMENTS}; |
342 | |
343 | has_hierarchy = has_hierarchy || hierarchical; |
344 | |
345 | res_attributes.emplace_back( |
346 | DictionaryAttribute{name, underlying_type, type, expression, null_value, hierarchical, injective, is_object_id}); |
347 | } |
348 | |
349 | return res_attributes; |
350 | } |
351 | |
352 | } |
353 | |