1#include "HashedDictionary.h"
2#include <ext/size.h>
3#include "DictionaryBlockInputStream.h"
4#include "DictionaryFactory.h"
5
6namespace
7{
8
9/// NOTE: Trailing return type is explicitly specified for SFINAE.
10
11/// google::sparse_hash_map
12template <typename T> auto first(const T & value) -> decltype(value.first) { return value.first; }
13template <typename T> auto second(const T & value) -> decltype(value.second) { return value.second; }
14
15/// HashMap
16template <typename T> auto first(const T & value) -> decltype(value.getKey()) { return value.getKey(); }
17template <typename T> auto second(const T & value) -> decltype(value.getMapped()) { return value.getMapped(); }
18
19}
20
21namespace DB
22{
23namespace ErrorCodes
24{
25 extern const int TYPE_MISMATCH;
26 extern const int ARGUMENT_OUT_OF_BOUND;
27 extern const int BAD_ARGUMENTS;
28 extern const int DICTIONARY_IS_EMPTY;
29 extern const int UNSUPPORTED_METHOD;
30}
31
32
33HashedDictionary::HashedDictionary(
34 const std::string & database_,
35 const std::string & name_,
36 const DictionaryStructure & dict_struct_,
37 DictionarySourcePtr source_ptr_,
38 const DictionaryLifetime dict_lifetime_,
39 bool require_nonempty_,
40 bool sparse_,
41 BlockPtr saved_block_)
42 : database(database_)
43 , name(name_)
44 , full_name{database_.empty() ? name_ : (database_ + "." + name_)}
45 , dict_struct(dict_struct_)
46 , source_ptr{std::move(source_ptr_)}
47 , dict_lifetime(dict_lifetime_)
48 , require_nonempty(require_nonempty_)
49 , sparse(sparse_)
50 , saved_block{std::move(saved_block_)}
51{
52 createAttributes();
53 loadData();
54 calculateBytesAllocated();
55}
56
57
58void HashedDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
59{
60 const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
61
62 getItemsImpl<UInt64, UInt64>(
63 *hierarchical_attribute,
64 ids,
65 [&](const size_t row, const UInt64 value) { out[row] = value; },
66 [&](const size_t) { return null_value; });
67}
68
69
70/// Allow to use single value in same way as array.
71static inline HashedDictionary::Key getAt(const PaddedPODArray<HashedDictionary::Key> & arr, const size_t idx)
72{
73 return arr[idx];
74}
75static inline HashedDictionary::Key getAt(const HashedDictionary::Key & value, const size_t)
76{
77 return value;
78}
79
80template <typename AttrType, typename ChildType, typename AncestorType>
81void HashedDictionary::isInAttrImpl(const AttrType & attr, const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const
82{
83 const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
84 const auto rows = out.size();
85
86 for (const auto row : ext::range(0, rows))
87 {
88 auto id = getAt(child_ids, row);
89 const auto ancestor_id = getAt(ancestor_ids, row);
90
91 while (id != null_value && id != ancestor_id)
92 {
93 auto it = attr.find(id);
94 if (it != std::end(attr))
95 id = second(*it);
96 else
97 break;
98 }
99
100 out[row] = id != null_value && id == ancestor_id;
101 }
102
103 query_count.fetch_add(rows, std::memory_order_relaxed);
104}
105template <typename ChildType, typename AncestorType>
106void HashedDictionary::isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const
107{
108 if (!sparse)
109 return isInAttrImpl(*std::get<CollectionPtrType<Key>>(hierarchical_attribute->maps), child_ids, ancestor_ids, out);
110 return isInAttrImpl(*std::get<SparseCollectionPtrType<Key>>(hierarchical_attribute->sparse_maps), child_ids, ancestor_ids, out);
111}
112
113void HashedDictionary::isInVectorVector(
114 const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
115{
116 isInImpl(child_ids, ancestor_ids, out);
117}
118
119void HashedDictionary::isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const
120{
121 isInImpl(child_ids, ancestor_id, out);
122}
123
124void HashedDictionary::isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
125{
126 isInImpl(child_id, ancestor_ids, out);
127}
128
129
130#define DECLARE(TYPE) \
131 void HashedDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) \
132 const \
133 { \
134 const auto & attribute = getAttribute(attribute_name); \
135 checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
136\
137 const auto null_value = std::get<TYPE>(attribute.null_values); \
138\
139 getItemsImpl<TYPE, TYPE>( \
140 attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \
141 }
142DECLARE(UInt8)
143DECLARE(UInt16)
144DECLARE(UInt32)
145DECLARE(UInt64)
146DECLARE(UInt128)
147DECLARE(Int8)
148DECLARE(Int16)
149DECLARE(Int32)
150DECLARE(Int64)
151DECLARE(Float32)
152DECLARE(Float64)
153DECLARE(Decimal32)
154DECLARE(Decimal64)
155DECLARE(Decimal128)
156#undef DECLARE
157
158void HashedDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
159{
160 const auto & attribute = getAttribute(attribute_name);
161 checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
162
163 const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
164
165 getItemsImpl<StringRef, StringRef>(
166 attribute,
167 ids,
168 [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
169 [&](const size_t) { return null_value; });
170}
171
172#define DECLARE(TYPE) \
173 void HashedDictionary::get##TYPE( \
174 const std::string & attribute_name, \
175 const PaddedPODArray<Key> & ids, \
176 const PaddedPODArray<TYPE> & def, \
177 ResultArrayType<TYPE> & out) const \
178 { \
179 const auto & attribute = getAttribute(attribute_name); \
180 checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
181\
182 getItemsImpl<TYPE, TYPE>( \
183 attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \
184 }
185DECLARE(UInt8)
186DECLARE(UInt16)
187DECLARE(UInt32)
188DECLARE(UInt64)
189DECLARE(UInt128)
190DECLARE(Int8)
191DECLARE(Int16)
192DECLARE(Int32)
193DECLARE(Int64)
194DECLARE(Float32)
195DECLARE(Float64)
196DECLARE(Decimal32)
197DECLARE(Decimal64)
198DECLARE(Decimal128)
199#undef DECLARE
200
201void HashedDictionary::getString(
202 const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
203{
204 const auto & attribute = getAttribute(attribute_name);
205 checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
206
207 getItemsImpl<StringRef, StringRef>(
208 attribute,
209 ids,
210 [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
211 [&](const size_t row) { return def->getDataAt(row); });
212}
213
214#define DECLARE(TYPE) \
215 void HashedDictionary::get##TYPE( \
216 const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE & def, ResultArrayType<TYPE> & out) const \
217 { \
218 const auto & attribute = getAttribute(attribute_name); \
219 checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
220\
221 getItemsImpl<TYPE, TYPE>( \
222 attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
223 }
224DECLARE(UInt8)
225DECLARE(UInt16)
226DECLARE(UInt32)
227DECLARE(UInt64)
228DECLARE(UInt128)
229DECLARE(Int8)
230DECLARE(Int16)
231DECLARE(Int32)
232DECLARE(Int64)
233DECLARE(Float32)
234DECLARE(Float64)
235DECLARE(Decimal32)
236DECLARE(Decimal64)
237DECLARE(Decimal128)
238#undef DECLARE
239
240void HashedDictionary::getString(
241 const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
242{
243 const auto & attribute = getAttribute(attribute_name);
244 checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
245
246 getItemsImpl<StringRef, StringRef>(
247 attribute,
248 ids,
249 [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
250 [&](const size_t) { return StringRef{def}; });
251}
252
253void HashedDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
254{
255 const auto & attribute = attributes.front();
256
257 switch (attribute.type)
258 {
259 case AttributeUnderlyingType::utUInt8:
260 has<UInt8>(attribute, ids, out);
261 break;
262 case AttributeUnderlyingType::utUInt16:
263 has<UInt16>(attribute, ids, out);
264 break;
265 case AttributeUnderlyingType::utUInt32:
266 has<UInt32>(attribute, ids, out);
267 break;
268 case AttributeUnderlyingType::utUInt64:
269 has<UInt64>(attribute, ids, out);
270 break;
271 case AttributeUnderlyingType::utUInt128:
272 has<UInt128>(attribute, ids, out);
273 break;
274 case AttributeUnderlyingType::utInt8:
275 has<Int8>(attribute, ids, out);
276 break;
277 case AttributeUnderlyingType::utInt16:
278 has<Int16>(attribute, ids, out);
279 break;
280 case AttributeUnderlyingType::utInt32:
281 has<Int32>(attribute, ids, out);
282 break;
283 case AttributeUnderlyingType::utInt64:
284 has<Int64>(attribute, ids, out);
285 break;
286 case AttributeUnderlyingType::utFloat32:
287 has<Float32>(attribute, ids, out);
288 break;
289 case AttributeUnderlyingType::utFloat64:
290 has<Float64>(attribute, ids, out);
291 break;
292 case AttributeUnderlyingType::utString:
293 has<StringRef>(attribute, ids, out);
294 break;
295
296 case AttributeUnderlyingType::utDecimal32:
297 has<Decimal32>(attribute, ids, out);
298 break;
299 case AttributeUnderlyingType::utDecimal64:
300 has<Decimal64>(attribute, ids, out);
301 break;
302 case AttributeUnderlyingType::utDecimal128:
303 has<Decimal128>(attribute, ids, out);
304 break;
305 }
306}
307
308void HashedDictionary::createAttributes()
309{
310 const auto size = dict_struct.attributes.size();
311 attributes.reserve(size);
312
313 for (const auto & attribute : dict_struct.attributes)
314 {
315 attribute_index_by_name.emplace(attribute.name, attributes.size());
316 attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
317
318 if (attribute.hierarchical)
319 {
320 hierarchical_attribute = &attributes.back();
321
322 if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64)
323 throw Exception{full_name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH};
324 }
325 }
326}
327
328void HashedDictionary::blockToAttributes(const Block & block)
329{
330 const auto & id_column = *block.safeGetByPosition(0).column;
331
332 for (const size_t attribute_idx : ext::range(0, attributes.size()))
333 {
334 const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column;
335 auto & attribute = attributes[attribute_idx];
336
337 for (const auto row_idx : ext::range(0, id_column.size()))
338 if (setAttributeValue(attribute, id_column[row_idx].get<UInt64>(), attribute_column[row_idx]))
339 ++element_count;
340 }
341}
342
343void HashedDictionary::updateData()
344{
345 if (!saved_block || saved_block->rows() == 0)
346 {
347 auto stream = source_ptr->loadUpdatedAll();
348 stream->readPrefix();
349
350 while (const auto block = stream->read())
351 {
352 /// We are using this to keep saved data if input stream consists of multiple blocks
353 if (!saved_block)
354 saved_block = std::make_shared<DB::Block>(block.cloneEmpty());
355 for (const auto attribute_idx : ext::range(0, attributes.size() + 1))
356 {
357 const IColumn & update_column = *block.getByPosition(attribute_idx).column.get();
358 MutableColumnPtr saved_column = saved_block->getByPosition(attribute_idx).column->assumeMutable();
359 saved_column->insertRangeFrom(update_column, 0, update_column.size());
360 }
361 }
362 stream->readSuffix();
363 }
364 else
365 {
366 auto stream = source_ptr->loadUpdatedAll();
367 stream->readPrefix();
368
369 while (Block block = stream->read())
370 {
371 const auto & saved_id_column = *saved_block->safeGetByPosition(0).column;
372 const auto & update_id_column = *block.safeGetByPosition(0).column;
373
374 std::unordered_map<Key, std::vector<size_t>> update_ids;
375 for (size_t row = 0; row < update_id_column.size(); ++row)
376 {
377 const auto id = update_id_column.get64(row);
378 update_ids[id].push_back(row);
379 }
380
381 const size_t saved_rows = saved_id_column.size();
382 IColumn::Filter filter(saved_rows);
383 std::unordered_map<Key, std::vector<size_t>>::iterator it;
384
385 for (size_t row = 0; row < saved_id_column.size(); ++row)
386 {
387 auto id = saved_id_column.get64(row);
388 it = update_ids.find(id);
389
390 if (it != update_ids.end())
391 filter[row] = 0;
392 else
393 filter[row] = 1;
394 }
395
396 auto block_columns = block.mutateColumns();
397 for (const auto attribute_idx : ext::range(0, attributes.size() + 1))
398 {
399 auto & column = saved_block->safeGetByPosition(attribute_idx).column;
400 const auto & filtered_column = column->filter(filter, -1);
401
402 block_columns[attribute_idx]->insertRangeFrom(*filtered_column.get(), 0, filtered_column->size());
403 }
404
405 saved_block->setColumns(std::move(block_columns));
406 }
407 stream->readSuffix();
408 }
409
410 if (saved_block)
411 blockToAttributes(*saved_block.get());
412}
413
414void HashedDictionary::loadData()
415{
416 if (!source_ptr->hasUpdateField())
417 {
418 auto stream = source_ptr->loadAll();
419 stream->readPrefix();
420
421 while (const auto block = stream->read())
422 blockToAttributes(block);
423
424 stream->readSuffix();
425 }
426 else
427 updateData();
428
429 if (require_nonempty && 0 == element_count)
430 throw Exception{full_name + ": dictionary source is empty and 'require_nonempty' property is set.", ErrorCodes::DICTIONARY_IS_EMPTY};
431}
432
433template <typename T>
434void HashedDictionary::addAttributeSize(const Attribute & attribute)
435{
436 if (!sparse)
437 {
438 const auto & map_ref = std::get<CollectionPtrType<T>>(attribute.maps);
439 bytes_allocated += sizeof(CollectionType<T>) + map_ref->getBufferSizeInBytes();
440 bucket_count = map_ref->getBufferSizeInCells();
441 }
442 else
443 {
444 const auto & map_ref = std::get<SparseCollectionPtrType<T>>(attribute.sparse_maps);
445 bucket_count = map_ref->bucket_count();
446
447 /** TODO: more accurate calculation */
448 bytes_allocated += sizeof(CollectionType<T>);
449 bytes_allocated += bucket_count;
450 bytes_allocated += map_ref->size() * sizeof(Key) * sizeof(T);
451 }
452}
453
454void HashedDictionary::calculateBytesAllocated()
455{
456 bytes_allocated += attributes.size() * sizeof(attributes.front());
457
458 for (const auto & attribute : attributes)
459 {
460 switch (attribute.type)
461 {
462 case AttributeUnderlyingType::utUInt8:
463 addAttributeSize<UInt8>(attribute);
464 break;
465 case AttributeUnderlyingType::utUInt16:
466 addAttributeSize<UInt16>(attribute);
467 break;
468 case AttributeUnderlyingType::utUInt32:
469 addAttributeSize<UInt32>(attribute);
470 break;
471 case AttributeUnderlyingType::utUInt64:
472 addAttributeSize<UInt64>(attribute);
473 break;
474 case AttributeUnderlyingType::utUInt128:
475 addAttributeSize<UInt128>(attribute);
476 break;
477 case AttributeUnderlyingType::utInt8:
478 addAttributeSize<Int8>(attribute);
479 break;
480 case AttributeUnderlyingType::utInt16:
481 addAttributeSize<Int16>(attribute);
482 break;
483 case AttributeUnderlyingType::utInt32:
484 addAttributeSize<Int32>(attribute);
485 break;
486 case AttributeUnderlyingType::utInt64:
487 addAttributeSize<Int64>(attribute);
488 break;
489 case AttributeUnderlyingType::utFloat32:
490 addAttributeSize<Float32>(attribute);
491 break;
492 case AttributeUnderlyingType::utFloat64:
493 addAttributeSize<Float64>(attribute);
494 break;
495
496 case AttributeUnderlyingType::utDecimal32:
497 addAttributeSize<Decimal32>(attribute);
498 break;
499 case AttributeUnderlyingType::utDecimal64:
500 addAttributeSize<Decimal64>(attribute);
501 break;
502 case AttributeUnderlyingType::utDecimal128:
503 addAttributeSize<Decimal128>(attribute);
504 break;
505
506 case AttributeUnderlyingType::utString:
507 {
508 addAttributeSize<StringRef>(attribute);
509 bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
510
511 break;
512 }
513 }
514 }
515}
516
517template <typename T>
518void HashedDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
519{
520 attribute.null_values = T(null_value.get<NearestFieldType<T>>());
521 if (!sparse)
522 attribute.maps = std::make_unique<CollectionType<T>>();
523 else
524 attribute.sparse_maps = std::make_unique<SparseCollectionType<T>>();
525}
526
527HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
528{
529 Attribute attr{type, {}, {}, {}, {}};
530
531 switch (type)
532 {
533 case AttributeUnderlyingType::utUInt8:
534 createAttributeImpl<UInt8>(attr, null_value);
535 break;
536 case AttributeUnderlyingType::utUInt16:
537 createAttributeImpl<UInt16>(attr, null_value);
538 break;
539 case AttributeUnderlyingType::utUInt32:
540 createAttributeImpl<UInt32>(attr, null_value);
541 break;
542 case AttributeUnderlyingType::utUInt64:
543 createAttributeImpl<UInt64>(attr, null_value);
544 break;
545 case AttributeUnderlyingType::utUInt128:
546 createAttributeImpl<UInt128>(attr, null_value);
547 break;
548 case AttributeUnderlyingType::utInt8:
549 createAttributeImpl<Int8>(attr, null_value);
550 break;
551 case AttributeUnderlyingType::utInt16:
552 createAttributeImpl<Int16>(attr, null_value);
553 break;
554 case AttributeUnderlyingType::utInt32:
555 createAttributeImpl<Int32>(attr, null_value);
556 break;
557 case AttributeUnderlyingType::utInt64:
558 createAttributeImpl<Int64>(attr, null_value);
559 break;
560 case AttributeUnderlyingType::utFloat32:
561 createAttributeImpl<Float32>(attr, null_value);
562 break;
563 case AttributeUnderlyingType::utFloat64:
564 createAttributeImpl<Float64>(attr, null_value);
565 break;
566
567 case AttributeUnderlyingType::utDecimal32:
568 createAttributeImpl<Decimal32>(attr, null_value);
569 break;
570 case AttributeUnderlyingType::utDecimal64:
571 createAttributeImpl<Decimal64>(attr, null_value);
572 break;
573 case AttributeUnderlyingType::utDecimal128:
574 createAttributeImpl<Decimal128>(attr, null_value);
575 break;
576
577 case AttributeUnderlyingType::utString:
578 {
579 attr.null_values = null_value.get<String>();
580 if (!sparse)
581 attr.maps = std::make_unique<CollectionType<StringRef>>();
582 else
583 attr.sparse_maps = std::make_unique<SparseCollectionType<StringRef>>();
584 attr.string_arena = std::make_unique<Arena>();
585 break;
586 }
587 }
588
589 return attr;
590}
591
592
593template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
594void HashedDictionary::getItemsAttrImpl(
595 const AttrType & attr, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
596{
597 const auto rows = ext::size(ids);
598
599 for (const auto i : ext::range(0, rows))
600 {
601 const auto it = attr.find(ids[i]);
602 set_value(i, it != attr.end() ? static_cast<OutputType>(second(*it)) : get_default(i));
603 }
604
605 query_count.fetch_add(rows, std::memory_order_relaxed);
606}
607template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
608void HashedDictionary::getItemsImpl(
609 const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
610{
611 if (!sparse)
612 return getItemsAttrImpl<OutputType>(*std::get<CollectionPtrType<AttributeType>>(attribute.maps), ids, set_value, get_default);
613 return getItemsAttrImpl<OutputType>(*std::get<SparseCollectionPtrType<AttributeType>>(attribute.sparse_maps), ids, set_value, get_default);
614}
615
616
617template <typename T>
618bool HashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const T value)
619{
620 if (!sparse)
621 {
622 auto & map = *std::get<CollectionPtrType<T>>(attribute.maps);
623 return map.insert({id, value}).second;
624 }
625 else
626 {
627 auto & map = *std::get<SparseCollectionPtrType<T>>(attribute.sparse_maps);
628 return map.insert({id, value}).second;
629 }
630}
631
632bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
633{
634 switch (attribute.type)
635 {
636 case AttributeUnderlyingType::utUInt8:
637 return setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>());
638 case AttributeUnderlyingType::utUInt16:
639 return setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>());
640 case AttributeUnderlyingType::utUInt32:
641 return setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>());
642 case AttributeUnderlyingType::utUInt64:
643 return setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>());
644 case AttributeUnderlyingType::utUInt128:
645 return setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>());
646 case AttributeUnderlyingType::utInt8:
647 return setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>());
648 case AttributeUnderlyingType::utInt16:
649 return setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>());
650 case AttributeUnderlyingType::utInt32:
651 return setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>());
652 case AttributeUnderlyingType::utInt64:
653 return setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>());
654 case AttributeUnderlyingType::utFloat32:
655 return setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>());
656 case AttributeUnderlyingType::utFloat64:
657 return setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>());
658
659 case AttributeUnderlyingType::utDecimal32:
660 return setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>());
661 case AttributeUnderlyingType::utDecimal64:
662 return setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>());
663 case AttributeUnderlyingType::utDecimal128:
664 return setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>());
665
666 case AttributeUnderlyingType::utString:
667 {
668 const auto & string = value.get<String>();
669 const auto string_in_arena = attribute.string_arena->insert(string.data(), string.size());
670 if (!sparse)
671 {
672 auto & map = *std::get<CollectionPtrType<StringRef>>(attribute.maps);
673 return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
674 }
675 else
676 {
677 auto & map = *std::get<SparseCollectionPtrType<StringRef>>(attribute.sparse_maps);
678 return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
679 }
680 }
681 }
682
683 throw Exception{"Invalid attribute type", ErrorCodes::BAD_ARGUMENTS};
684}
685
686const HashedDictionary::Attribute & HashedDictionary::getAttribute(const std::string & attribute_name) const
687{
688 const auto it = attribute_index_by_name.find(attribute_name);
689 if (it == std::end(attribute_index_by_name))
690 throw Exception{full_name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
691
692 return attributes[it->second];
693}
694
695template <typename T>
696void HashedDictionary::has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
697{
698 const auto & attr = *std::get<CollectionPtrType<T>>(attribute.maps);
699 const auto rows = ext::size(ids);
700
701 for (const auto i : ext::range(0, rows))
702 out[i] = attr.find(ids[i]) != nullptr;
703
704 query_count.fetch_add(rows, std::memory_order_relaxed);
705}
706
707template <typename T, typename AttrType>
708PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIdsAttrImpl(const AttrType & attr) const
709{
710 PaddedPODArray<Key> ids;
711 ids.reserve(attr.size());
712 for (const auto & value : attr)
713 ids.push_back(first(value));
714
715 return ids;
716}
717template <typename T>
718PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds(const Attribute & attribute) const
719{
720 if (!sparse)
721 return getIdsAttrImpl<T>(*std::get<CollectionPtrType<T>>(attribute.maps));
722 return getIdsAttrImpl<T>(*std::get<SparseCollectionPtrType<T>>(attribute.sparse_maps));
723}
724
725PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
726{
727 const auto & attribute = attributes.front();
728
729 switch (attribute.type)
730 {
731 case AttributeUnderlyingType::utUInt8:
732 return getIds<UInt8>(attribute);
733 case AttributeUnderlyingType::utUInt16:
734 return getIds<UInt16>(attribute);
735 case AttributeUnderlyingType::utUInt32:
736 return getIds<UInt32>(attribute);
737 case AttributeUnderlyingType::utUInt64:
738 return getIds<UInt64>(attribute);
739 case AttributeUnderlyingType::utUInt128:
740 return getIds<UInt128>(attribute);
741 case AttributeUnderlyingType::utInt8:
742 return getIds<Int8>(attribute);
743 case AttributeUnderlyingType::utInt16:
744 return getIds<Int16>(attribute);
745 case AttributeUnderlyingType::utInt32:
746 return getIds<Int32>(attribute);
747 case AttributeUnderlyingType::utInt64:
748 return getIds<Int64>(attribute);
749 case AttributeUnderlyingType::utFloat32:
750 return getIds<Float32>(attribute);
751 case AttributeUnderlyingType::utFloat64:
752 return getIds<Float64>(attribute);
753 case AttributeUnderlyingType::utString:
754 return getIds<StringRef>(attribute);
755
756 case AttributeUnderlyingType::utDecimal32:
757 return getIds<Decimal32>(attribute);
758 case AttributeUnderlyingType::utDecimal64:
759 return getIds<Decimal64>(attribute);
760 case AttributeUnderlyingType::utDecimal128:
761 return getIds<Decimal128>(attribute);
762 }
763 return PaddedPODArray<Key>();
764}
765
766BlockInputStreamPtr HashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
767{
768 using BlockInputStreamType = DictionaryBlockInputStream<HashedDictionary, Key>;
769 return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names);
770}
771
772void registerDictionaryHashed(DictionaryFactory & factory)
773{
774 auto create_layout = [=](const std::string & full_name,
775 const DictionaryStructure & dict_struct,
776 const Poco::Util::AbstractConfiguration & config,
777 const std::string & config_prefix,
778 DictionarySourcePtr source_ptr,
779 bool sparse) -> DictionaryPtr
780 {
781 if (dict_struct.key)
782 throw Exception{"'key' is not supported for dictionary of layout 'hashed'", ErrorCodes::UNSUPPORTED_METHOD};
783
784 if (dict_struct.range_min || dict_struct.range_max)
785 throw Exception{full_name
786 + ": elements .structure.range_min and .structure.range_max should be defined only "
787 "for a dictionary of layout 'range_hashed'",
788 ErrorCodes::BAD_ARGUMENTS};
789
790 const String database = config.getString(config_prefix + ".database", "");
791 const String name = config.getString(config_prefix + ".name");
792 const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
793 const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
794 return std::make_unique<HashedDictionary>(database, name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty, sparse);
795 };
796 using namespace std::placeholders;
797 factory.registerLayout("hashed", std::bind(create_layout, _1, _2, _3, _4, _5, /* sparse = */ false), false);
798 factory.registerLayout("sparse_hashed", std::bind(create_layout, _1, _2, _3, _4, _5, /* sparse = */ true), false);
799}
800
801}
802