| 1 | #include "DictionaryStructure.h" |
| 2 | #include <Columns/IColumn.h> |
| 3 | #include <DataTypes/DataTypeFactory.h> |
| 4 | #include <DataTypes/DataTypeNullable.h> |
| 5 | #include <Formats/FormatSettings.h> |
| 6 | #include <IO/WriteHelpers.h> |
| 7 | #include <Common/StringUtils/StringUtils.h> |
| 8 | |
| 9 | #include <numeric> |
| 10 | #include <unordered_map> |
| 11 | #include <unordered_set> |
| 12 | #include <ext/range.h> |
| 13 | |
| 14 | |
| 15 | namespace DB |
| 16 | { |
| 17 | namespace ErrorCodes |
| 18 | { |
| 19 | extern const int UNKNOWN_TYPE; |
| 20 | extern const int ARGUMENT_OUT_OF_BOUND; |
| 21 | extern const int TYPE_MISMATCH; |
| 22 | extern const int BAD_ARGUMENTS; |
| 23 | } |
| 24 | |
| 25 | namespace |
| 26 | { |
| 27 | DictionaryTypedSpecialAttribute makeDictionaryTypedSpecialAttribute( |
| 28 | const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const std::string & default_type) |
| 29 | { |
| 30 | const auto name = config.getString(config_prefix + ".name" , "" ); |
| 31 | const auto expression = config.getString(config_prefix + ".expression" , "" ); |
| 32 | |
| 33 | if (name.empty() && !expression.empty()) |
| 34 | throw Exception{"Element " + config_prefix + ".name is empty" , ErrorCodes::BAD_ARGUMENTS}; |
| 35 | |
| 36 | const auto type_name = config.getString(config_prefix + ".type" , default_type); |
| 37 | return DictionaryTypedSpecialAttribute{std::move(name), std::move(expression), DataTypeFactory::instance().get(type_name)}; |
| 38 | } |
| 39 | |
| 40 | } |
| 41 | |
| 42 | |
| 43 | AttributeUnderlyingType getAttributeUnderlyingType(const std::string & type) |
| 44 | { |
| 45 | static const std::unordered_map<std::string, AttributeUnderlyingType> dictionary{ |
| 46 | {"UInt8" , AttributeUnderlyingType::utUInt8}, |
| 47 | {"UInt16" , AttributeUnderlyingType::utUInt16}, |
| 48 | {"UInt32" , AttributeUnderlyingType::utUInt32}, |
| 49 | {"UInt64" , AttributeUnderlyingType::utUInt64}, |
| 50 | {"UUID" , AttributeUnderlyingType::utUInt128}, |
| 51 | {"Int8" , AttributeUnderlyingType::utInt8}, |
| 52 | {"Int16" , AttributeUnderlyingType::utInt16}, |
| 53 | {"Int32" , AttributeUnderlyingType::utInt32}, |
| 54 | {"Int64" , AttributeUnderlyingType::utInt64}, |
| 55 | {"Float32" , AttributeUnderlyingType::utFloat32}, |
| 56 | {"Float64" , AttributeUnderlyingType::utFloat64}, |
| 57 | {"String" , AttributeUnderlyingType::utString}, |
| 58 | {"Date" , AttributeUnderlyingType::utUInt16}, |
| 59 | {"DateTime" , AttributeUnderlyingType::utUInt32}, |
| 60 | }; |
| 61 | |
| 62 | const auto it = dictionary.find(type); |
| 63 | if (it != std::end(dictionary)) |
| 64 | return it->second; |
| 65 | |
| 66 | if (type.find("Decimal" ) == 0) |
| 67 | { |
| 68 | size_t start = strlen("Decimal" ); |
| 69 | if (type.find("32" , start) == start) |
| 70 | return AttributeUnderlyingType::utDecimal32; |
| 71 | if (type.find("64" , start) == start) |
| 72 | return AttributeUnderlyingType::utDecimal64; |
| 73 | if (type.find("128" , start) == start) |
| 74 | return AttributeUnderlyingType::utDecimal128; |
| 75 | } |
| 76 | |
| 77 | throw Exception{"Unknown type " + type, ErrorCodes::UNKNOWN_TYPE}; |
| 78 | } |
| 79 | |
| 80 | |
| 81 | std::string toString(const AttributeUnderlyingType type) |
| 82 | { |
| 83 | switch (type) |
| 84 | { |
| 85 | case AttributeUnderlyingType::utUInt8: |
| 86 | return "UInt8" ; |
| 87 | case AttributeUnderlyingType::utUInt16: |
| 88 | return "UInt16" ; |
| 89 | case AttributeUnderlyingType::utUInt32: |
| 90 | return "UInt32" ; |
| 91 | case AttributeUnderlyingType::utUInt64: |
| 92 | return "UInt64" ; |
| 93 | case AttributeUnderlyingType::utUInt128: |
| 94 | return "UUID" ; |
| 95 | case AttributeUnderlyingType::utInt8: |
| 96 | return "Int8" ; |
| 97 | case AttributeUnderlyingType::utInt16: |
| 98 | return "Int16" ; |
| 99 | case AttributeUnderlyingType::utInt32: |
| 100 | return "Int32" ; |
| 101 | case AttributeUnderlyingType::utInt64: |
| 102 | return "Int64" ; |
| 103 | case AttributeUnderlyingType::utFloat32: |
| 104 | return "Float32" ; |
| 105 | case AttributeUnderlyingType::utFloat64: |
| 106 | return "Float64" ; |
| 107 | case AttributeUnderlyingType::utDecimal32: |
| 108 | return "Decimal32" ; |
| 109 | case AttributeUnderlyingType::utDecimal64: |
| 110 | return "Decimal64" ; |
| 111 | case AttributeUnderlyingType::utDecimal128: |
| 112 | return "Decimal128" ; |
| 113 | case AttributeUnderlyingType::utString: |
| 114 | return "String" ; |
| 115 | } |
| 116 | |
| 117 | throw Exception{"Unknown attribute_type " + toString(static_cast<int>(type)), ErrorCodes::ARGUMENT_OUT_OF_BOUND}; |
| 118 | } |
| 119 | |
| 120 | |
| 121 | DictionarySpecialAttribute::DictionarySpecialAttribute(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) |
| 122 | : name{config.getString(config_prefix + ".name" , "" )}, expression{config.getString(config_prefix + ".expression" , "" )} |
| 123 | { |
| 124 | if (name.empty() && !expression.empty()) |
| 125 | throw Exception{"Element " + config_prefix + ".name is empty" , ErrorCodes::BAD_ARGUMENTS}; |
| 126 | } |
| 127 | |
| 128 | |
| 129 | DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) |
| 130 | { |
| 131 | const auto has_id = config.has(config_prefix + ".id" ); |
| 132 | const auto has_key = config.has(config_prefix + ".key" ); |
| 133 | |
| 134 | if (has_key && has_id) |
| 135 | throw Exception{"Only one of 'id' and 'key' should be specified" , ErrorCodes::BAD_ARGUMENTS}; |
| 136 | |
| 137 | if (has_id) |
| 138 | id.emplace(config, config_prefix + ".id" ); |
| 139 | else if (has_key) |
| 140 | { |
| 141 | key.emplace(getAttributes(config, config_prefix + ".key" , false, false)); |
| 142 | if (key->empty()) |
| 143 | throw Exception{"Empty 'key' supplied" , ErrorCodes::BAD_ARGUMENTS}; |
| 144 | } |
| 145 | else |
| 146 | throw Exception{"Dictionary structure should specify either 'id' or 'key'" , ErrorCodes::BAD_ARGUMENTS}; |
| 147 | |
| 148 | if (id) |
| 149 | { |
| 150 | if (id->name.empty()) |
| 151 | throw Exception{"'id' cannot be empty" , ErrorCodes::BAD_ARGUMENTS}; |
| 152 | |
| 153 | const auto range_default_type = "Date" ; |
| 154 | if (config.has(config_prefix + ".range_min" )) |
| 155 | range_min.emplace(makeDictionaryTypedSpecialAttribute(config, config_prefix + ".range_min" , range_default_type)); |
| 156 | |
| 157 | if (config.has(config_prefix + ".range_max" )) |
| 158 | range_max.emplace(makeDictionaryTypedSpecialAttribute(config, config_prefix + ".range_max" , range_default_type)); |
| 159 | |
| 160 | if (range_min.has_value() != range_max.has_value()) |
| 161 | { |
| 162 | throw Exception{"Dictionary structure should have both 'range_min' and 'range_max' either specified or not." , |
| 163 | ErrorCodes::BAD_ARGUMENTS}; |
| 164 | } |
| 165 | |
| 166 | if (range_min && range_max && !range_min->type->equals(*range_max->type)) |
| 167 | { |
| 168 | throw Exception{"Dictionary structure 'range_min' and 'range_max' should have same type, " |
| 169 | "'range_min' type: " |
| 170 | + range_min->type->getName() |
| 171 | + ", " |
| 172 | "'range_max' type: " |
| 173 | + range_max->type->getName(), |
| 174 | ErrorCodes::BAD_ARGUMENTS}; |
| 175 | } |
| 176 | |
| 177 | if (range_min) |
| 178 | { |
| 179 | if (!range_min->type->isValueRepresentedByInteger()) |
| 180 | throw Exception{"Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum." |
| 181 | " Actual 'range_min' and 'range_max' type is " |
| 182 | + range_min->type->getName(), |
| 183 | ErrorCodes::BAD_ARGUMENTS}; |
| 184 | } |
| 185 | |
| 186 | if (!id->expression.empty() || (range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty())) |
| 187 | has_expressions = true; |
| 188 | } |
| 189 | |
| 190 | attributes = getAttributes(config, config_prefix); |
| 191 | if (attributes.empty()) |
| 192 | throw Exception{"Dictionary has no attributes defined" , ErrorCodes::BAD_ARGUMENTS}; |
| 193 | } |
| 194 | |
| 195 | |
| 196 | void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const |
| 197 | { |
| 198 | if (key_types.size() != key->size()) |
| 199 | throw Exception{"Key structure does not match, expected " + getKeyDescription(), ErrorCodes::TYPE_MISMATCH}; |
| 200 | |
| 201 | for (const auto i : ext::range(0, key_types.size())) |
| 202 | { |
| 203 | const auto & expected_type = (*key)[i].type->getName(); |
| 204 | const auto & actual_type = key_types[i]->getName(); |
| 205 | |
| 206 | if (expected_type != actual_type) |
| 207 | throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type + ", found " |
| 208 | + actual_type, |
| 209 | ErrorCodes::TYPE_MISMATCH}; |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | |
| 214 | std::string DictionaryStructure::getKeyDescription() const |
| 215 | { |
| 216 | if (id) |
| 217 | return "UInt64" ; |
| 218 | |
| 219 | std::ostringstream out; |
| 220 | |
| 221 | out << '('; |
| 222 | |
| 223 | auto first = true; |
| 224 | for (const auto & key_i : *key) |
| 225 | { |
| 226 | if (!first) |
| 227 | out << ", " ; |
| 228 | |
| 229 | first = false; |
| 230 | |
| 231 | out << key_i.type->getName(); |
| 232 | } |
| 233 | |
| 234 | out << ')'; |
| 235 | |
| 236 | return out.str(); |
| 237 | } |
| 238 | |
| 239 | |
| 240 | bool DictionaryStructure::isKeySizeFixed() const |
| 241 | { |
| 242 | if (!key) |
| 243 | return true; |
| 244 | |
| 245 | for (const auto & key_i : *key) |
| 246 | if (key_i.underlying_type == AttributeUnderlyingType::utString) |
| 247 | return false; |
| 248 | |
| 249 | return true; |
| 250 | } |
| 251 | |
| 252 | size_t DictionaryStructure::getKeySize() const |
| 253 | { |
| 254 | return std::accumulate(std::begin(*key), std::end(*key), size_t{}, [](const auto running_size, const auto & key_i) |
| 255 | { |
| 256 | return running_size + key_i.type->getSizeOfValueInMemory(); |
| 257 | }); |
| 258 | } |
| 259 | |
| 260 | |
| 261 | static void checkAttributeKeys(const Poco::Util::AbstractConfiguration::Keys & keys) |
| 262 | { |
| 263 | static const std::unordered_set<std::string> valid_keys |
| 264 | = {"name" , "type" , "expression" , "null_value" , "hierarchical" , "injective" , "is_object_id" }; |
| 265 | |
| 266 | for (const auto & key : keys) |
| 267 | { |
| 268 | if (valid_keys.find(key) == valid_keys.end()) |
| 269 | throw Exception{"Unknown key '" + key + "' inside attribute section" , ErrorCodes::BAD_ARGUMENTS}; |
| 270 | } |
| 271 | } |
| 272 | |
| 273 | |
| 274 | std::vector<DictionaryAttribute> DictionaryStructure::getAttributes( |
| 275 | const Poco::Util::AbstractConfiguration & config, |
| 276 | const std::string & config_prefix, |
| 277 | const bool hierarchy_allowed, |
| 278 | const bool allow_null_values) |
| 279 | { |
| 280 | Poco::Util::AbstractConfiguration::Keys config_elems; |
| 281 | config.keys(config_prefix, config_elems); |
| 282 | auto has_hierarchy = false; |
| 283 | |
| 284 | std::vector<DictionaryAttribute> res_attributes; |
| 285 | |
| 286 | const FormatSettings format_settings; |
| 287 | |
| 288 | for (const auto & config_elem : config_elems) |
| 289 | { |
| 290 | if (!startsWith(config_elem.data(), "attribute" )) |
| 291 | continue; |
| 292 | |
| 293 | const auto prefix = config_prefix + '.' + config_elem + '.'; |
| 294 | Poco::Util::AbstractConfiguration::Keys attribute_keys; |
| 295 | config.keys(config_prefix + '.' + config_elem, attribute_keys); |
| 296 | |
| 297 | checkAttributeKeys(attribute_keys); |
| 298 | |
| 299 | const auto name = config.getString(prefix + "name" ); |
| 300 | const auto type_string = config.getString(prefix + "type" ); |
| 301 | const auto type = DataTypeFactory::instance().get(type_string); |
| 302 | const auto underlying_type = getAttributeUnderlyingType(type_string); |
| 303 | |
| 304 | const auto expression = config.getString(prefix + "expression" , "" ); |
| 305 | if (!expression.empty()) |
| 306 | has_expressions = true; |
| 307 | |
| 308 | Field null_value; |
| 309 | if (allow_null_values) |
| 310 | { |
| 311 | const auto null_value_string = config.getString(prefix + "null_value" ); |
| 312 | try |
| 313 | { |
| 314 | if (null_value_string.empty()) |
| 315 | null_value = type->getDefault(); |
| 316 | else |
| 317 | { |
| 318 | ReadBufferFromString null_value_buffer{null_value_string}; |
| 319 | auto column_with_null_value = type->createColumn(); |
| 320 | type->deserializeAsTextEscaped(*column_with_null_value, null_value_buffer, format_settings); |
| 321 | null_value = (*column_with_null_value)[0]; |
| 322 | } |
| 323 | } |
| 324 | catch (Exception & e) |
| 325 | { |
| 326 | e.addMessage("error parsing null_value" ); |
| 327 | throw; |
| 328 | } |
| 329 | } |
| 330 | |
| 331 | const auto hierarchical = config.getBool(prefix + "hierarchical" , false); |
| 332 | const auto injective = config.getBool(prefix + "injective" , false); |
| 333 | const auto is_object_id = config.getBool(prefix + "is_object_id" , false); |
| 334 | if (name.empty()) |
| 335 | throw Exception{"Properties 'name' and 'type' of an attribute cannot be empty" , ErrorCodes::BAD_ARGUMENTS}; |
| 336 | |
| 337 | if (has_hierarchy && !hierarchy_allowed) |
| 338 | throw Exception{"Hierarchy not allowed in '" + prefix, ErrorCodes::BAD_ARGUMENTS}; |
| 339 | |
| 340 | if (has_hierarchy && hierarchical) |
| 341 | throw Exception{"Only one hierarchical attribute supported" , ErrorCodes::BAD_ARGUMENTS}; |
| 342 | |
| 343 | has_hierarchy = has_hierarchy || hierarchical; |
| 344 | |
| 345 | res_attributes.emplace_back( |
| 346 | DictionaryAttribute{name, underlying_type, type, expression, null_value, hierarchical, injective, is_object_id}); |
| 347 | } |
| 348 | |
| 349 | return res_attributes; |
| 350 | } |
| 351 | |
| 352 | } |
| 353 | |