1#include "DictionaryStructure.h"
2#include <Columns/IColumn.h>
3#include <DataTypes/DataTypeFactory.h>
4#include <DataTypes/DataTypeNullable.h>
5#include <Formats/FormatSettings.h>
6#include <IO/WriteHelpers.h>
7#include <Common/StringUtils/StringUtils.h>
8
9#include <numeric>
10#include <unordered_map>
11#include <unordered_set>
12#include <ext/range.h>
13
14
15namespace DB
16{
17namespace ErrorCodes
18{
19 extern const int UNKNOWN_TYPE;
20 extern const int ARGUMENT_OUT_OF_BOUND;
21 extern const int TYPE_MISMATCH;
22 extern const int BAD_ARGUMENTS;
23}
24
25namespace
26{
27 DictionaryTypedSpecialAttribute makeDictionaryTypedSpecialAttribute(
28 const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const std::string & default_type)
29 {
30 const auto name = config.getString(config_prefix + ".name", "");
31 const auto expression = config.getString(config_prefix + ".expression", "");
32
33 if (name.empty() && !expression.empty())
34 throw Exception{"Element " + config_prefix + ".name is empty", ErrorCodes::BAD_ARGUMENTS};
35
36 const auto type_name = config.getString(config_prefix + ".type", default_type);
37 return DictionaryTypedSpecialAttribute{std::move(name), std::move(expression), DataTypeFactory::instance().get(type_name)};
38 }
39
40}
41
42
43AttributeUnderlyingType getAttributeUnderlyingType(const std::string & type)
44{
45 static const std::unordered_map<std::string, AttributeUnderlyingType> dictionary{
46 {"UInt8", AttributeUnderlyingType::utUInt8},
47 {"UInt16", AttributeUnderlyingType::utUInt16},
48 {"UInt32", AttributeUnderlyingType::utUInt32},
49 {"UInt64", AttributeUnderlyingType::utUInt64},
50 {"UUID", AttributeUnderlyingType::utUInt128},
51 {"Int8", AttributeUnderlyingType::utInt8},
52 {"Int16", AttributeUnderlyingType::utInt16},
53 {"Int32", AttributeUnderlyingType::utInt32},
54 {"Int64", AttributeUnderlyingType::utInt64},
55 {"Float32", AttributeUnderlyingType::utFloat32},
56 {"Float64", AttributeUnderlyingType::utFloat64},
57 {"String", AttributeUnderlyingType::utString},
58 {"Date", AttributeUnderlyingType::utUInt16},
59 {"DateTime", AttributeUnderlyingType::utUInt32},
60 };
61
62 const auto it = dictionary.find(type);
63 if (it != std::end(dictionary))
64 return it->second;
65
66 if (type.find("Decimal") == 0)
67 {
68 size_t start = strlen("Decimal");
69 if (type.find("32", start) == start)
70 return AttributeUnderlyingType::utDecimal32;
71 if (type.find("64", start) == start)
72 return AttributeUnderlyingType::utDecimal64;
73 if (type.find("128", start) == start)
74 return AttributeUnderlyingType::utDecimal128;
75 }
76
77 throw Exception{"Unknown type " + type, ErrorCodes::UNKNOWN_TYPE};
78}
79
80
81std::string toString(const AttributeUnderlyingType type)
82{
83 switch (type)
84 {
85 case AttributeUnderlyingType::utUInt8:
86 return "UInt8";
87 case AttributeUnderlyingType::utUInt16:
88 return "UInt16";
89 case AttributeUnderlyingType::utUInt32:
90 return "UInt32";
91 case AttributeUnderlyingType::utUInt64:
92 return "UInt64";
93 case AttributeUnderlyingType::utUInt128:
94 return "UUID";
95 case AttributeUnderlyingType::utInt8:
96 return "Int8";
97 case AttributeUnderlyingType::utInt16:
98 return "Int16";
99 case AttributeUnderlyingType::utInt32:
100 return "Int32";
101 case AttributeUnderlyingType::utInt64:
102 return "Int64";
103 case AttributeUnderlyingType::utFloat32:
104 return "Float32";
105 case AttributeUnderlyingType::utFloat64:
106 return "Float64";
107 case AttributeUnderlyingType::utDecimal32:
108 return "Decimal32";
109 case AttributeUnderlyingType::utDecimal64:
110 return "Decimal64";
111 case AttributeUnderlyingType::utDecimal128:
112 return "Decimal128";
113 case AttributeUnderlyingType::utString:
114 return "String";
115 }
116
117 throw Exception{"Unknown attribute_type " + toString(static_cast<int>(type)), ErrorCodes::ARGUMENT_OUT_OF_BOUND};
118}
119
120
121DictionarySpecialAttribute::DictionarySpecialAttribute(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
122 : name{config.getString(config_prefix + ".name", "")}, expression{config.getString(config_prefix + ".expression", "")}
123{
124 if (name.empty() && !expression.empty())
125 throw Exception{"Element " + config_prefix + ".name is empty", ErrorCodes::BAD_ARGUMENTS};
126}
127
128
129DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
130{
131 const auto has_id = config.has(config_prefix + ".id");
132 const auto has_key = config.has(config_prefix + ".key");
133
134 if (has_key && has_id)
135 throw Exception{"Only one of 'id' and 'key' should be specified", ErrorCodes::BAD_ARGUMENTS};
136
137 if (has_id)
138 id.emplace(config, config_prefix + ".id");
139 else if (has_key)
140 {
141 key.emplace(getAttributes(config, config_prefix + ".key", false, false));
142 if (key->empty())
143 throw Exception{"Empty 'key' supplied", ErrorCodes::BAD_ARGUMENTS};
144 }
145 else
146 throw Exception{"Dictionary structure should specify either 'id' or 'key'", ErrorCodes::BAD_ARGUMENTS};
147
148 if (id)
149 {
150 if (id->name.empty())
151 throw Exception{"'id' cannot be empty", ErrorCodes::BAD_ARGUMENTS};
152
153 const auto range_default_type = "Date";
154 if (config.has(config_prefix + ".range_min"))
155 range_min.emplace(makeDictionaryTypedSpecialAttribute(config, config_prefix + ".range_min", range_default_type));
156
157 if (config.has(config_prefix + ".range_max"))
158 range_max.emplace(makeDictionaryTypedSpecialAttribute(config, config_prefix + ".range_max", range_default_type));
159
160 if (range_min.has_value() != range_max.has_value())
161 {
162 throw Exception{"Dictionary structure should have both 'range_min' and 'range_max' either specified or not.",
163 ErrorCodes::BAD_ARGUMENTS};
164 }
165
166 if (range_min && range_max && !range_min->type->equals(*range_max->type))
167 {
168 throw Exception{"Dictionary structure 'range_min' and 'range_max' should have same type, "
169 "'range_min' type: "
170 + range_min->type->getName()
171 + ", "
172 "'range_max' type: "
173 + range_max->type->getName(),
174 ErrorCodes::BAD_ARGUMENTS};
175 }
176
177 if (range_min)
178 {
179 if (!range_min->type->isValueRepresentedByInteger())
180 throw Exception{"Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum."
181 " Actual 'range_min' and 'range_max' type is "
182 + range_min->type->getName(),
183 ErrorCodes::BAD_ARGUMENTS};
184 }
185
186 if (!id->expression.empty() || (range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty()))
187 has_expressions = true;
188 }
189
190 attributes = getAttributes(config, config_prefix);
191 if (attributes.empty())
192 throw Exception{"Dictionary has no attributes defined", ErrorCodes::BAD_ARGUMENTS};
193}
194
195
196void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const
197{
198 if (key_types.size() != key->size())
199 throw Exception{"Key structure does not match, expected " + getKeyDescription(), ErrorCodes::TYPE_MISMATCH};
200
201 for (const auto i : ext::range(0, key_types.size()))
202 {
203 const auto & expected_type = (*key)[i].type->getName();
204 const auto & actual_type = key_types[i]->getName();
205
206 if (expected_type != actual_type)
207 throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type + ", found "
208 + actual_type,
209 ErrorCodes::TYPE_MISMATCH};
210 }
211}
212
213
214std::string DictionaryStructure::getKeyDescription() const
215{
216 if (id)
217 return "UInt64";
218
219 std::ostringstream out;
220
221 out << '(';
222
223 auto first = true;
224 for (const auto & key_i : *key)
225 {
226 if (!first)
227 out << ", ";
228
229 first = false;
230
231 out << key_i.type->getName();
232 }
233
234 out << ')';
235
236 return out.str();
237}
238
239
240bool DictionaryStructure::isKeySizeFixed() const
241{
242 if (!key)
243 return true;
244
245 for (const auto & key_i : *key)
246 if (key_i.underlying_type == AttributeUnderlyingType::utString)
247 return false;
248
249 return true;
250}
251
252size_t DictionaryStructure::getKeySize() const
253{
254 return std::accumulate(std::begin(*key), std::end(*key), size_t{}, [](const auto running_size, const auto & key_i)
255 {
256 return running_size + key_i.type->getSizeOfValueInMemory();
257 });
258}
259
260
261static void checkAttributeKeys(const Poco::Util::AbstractConfiguration::Keys & keys)
262{
263 static const std::unordered_set<std::string> valid_keys
264 = {"name", "type", "expression", "null_value", "hierarchical", "injective", "is_object_id"};
265
266 for (const auto & key : keys)
267 {
268 if (valid_keys.find(key) == valid_keys.end())
269 throw Exception{"Unknown key '" + key + "' inside attribute section", ErrorCodes::BAD_ARGUMENTS};
270 }
271}
272
273
274std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
275 const Poco::Util::AbstractConfiguration & config,
276 const std::string & config_prefix,
277 const bool hierarchy_allowed,
278 const bool allow_null_values)
279{
280 Poco::Util::AbstractConfiguration::Keys config_elems;
281 config.keys(config_prefix, config_elems);
282 auto has_hierarchy = false;
283
284 std::vector<DictionaryAttribute> res_attributes;
285
286 const FormatSettings format_settings;
287
288 for (const auto & config_elem : config_elems)
289 {
290 if (!startsWith(config_elem.data(), "attribute"))
291 continue;
292
293 const auto prefix = config_prefix + '.' + config_elem + '.';
294 Poco::Util::AbstractConfiguration::Keys attribute_keys;
295 config.keys(config_prefix + '.' + config_elem, attribute_keys);
296
297 checkAttributeKeys(attribute_keys);
298
299 const auto name = config.getString(prefix + "name");
300 const auto type_string = config.getString(prefix + "type");
301 const auto type = DataTypeFactory::instance().get(type_string);
302 const auto underlying_type = getAttributeUnderlyingType(type_string);
303
304 const auto expression = config.getString(prefix + "expression", "");
305 if (!expression.empty())
306 has_expressions = true;
307
308 Field null_value;
309 if (allow_null_values)
310 {
311 const auto null_value_string = config.getString(prefix + "null_value");
312 try
313 {
314 if (null_value_string.empty())
315 null_value = type->getDefault();
316 else
317 {
318 ReadBufferFromString null_value_buffer{null_value_string};
319 auto column_with_null_value = type->createColumn();
320 type->deserializeAsTextEscaped(*column_with_null_value, null_value_buffer, format_settings);
321 null_value = (*column_with_null_value)[0];
322 }
323 }
324 catch (Exception & e)
325 {
326 e.addMessage("error parsing null_value");
327 throw;
328 }
329 }
330
331 const auto hierarchical = config.getBool(prefix + "hierarchical", false);
332 const auto injective = config.getBool(prefix + "injective", false);
333 const auto is_object_id = config.getBool(prefix + "is_object_id", false);
334 if (name.empty())
335 throw Exception{"Properties 'name' and 'type' of an attribute cannot be empty", ErrorCodes::BAD_ARGUMENTS};
336
337 if (has_hierarchy && !hierarchy_allowed)
338 throw Exception{"Hierarchy not allowed in '" + prefix, ErrorCodes::BAD_ARGUMENTS};
339
340 if (has_hierarchy && hierarchical)
341 throw Exception{"Only one hierarchical attribute supported", ErrorCodes::BAD_ARGUMENTS};
342
343 has_hierarchy = has_hierarchy || hierarchical;
344
345 res_attributes.emplace_back(
346 DictionaryAttribute{name, underlying_type, type, expression, null_value, hierarchical, injective, is_object_id});
347 }
348
349 return res_attributes;
350}
351
352}
353