1#include "RangeHashedDictionary.h"
2#include <Columns/ColumnNullable.h>
3#include <Functions/FunctionHelpers.h>
4#include <Common/TypeList.h>
5#include <ext/range.h>
6#include "DictionaryFactory.h"
7#include "RangeDictionaryBlockInputStream.h"
8
9namespace
10{
11using RangeStorageType = DB::RangeHashedDictionary::RangeStorageType;
12
13// Null values mean that specified boundary, either min or max is not set on range.
14// To simplify comparison, null value of min bound should be bigger than any other value,
15// and null value of maxbound - less than any value.
16const RangeStorageType RANGE_MIN_NULL_VALUE = std::numeric_limits<RangeStorageType>::max();
17const RangeStorageType RANGE_MAX_NULL_VALUE = std::numeric_limits<RangeStorageType>::lowest();
18
19// Handle both kinds of null values: explicit nulls of NullableColumn and 'implicit' nulls of Date type.
20RangeStorageType getColumnIntValueOrDefault(const DB::IColumn & column, size_t index, bool isDate, const RangeStorageType & default_value)
21{
22 if (column.isNullAt(index))
23 return default_value;
24
25 const RangeStorageType result = static_cast<RangeStorageType>(column.getInt(index));
26 if (isDate && !DB::RangeHashedDictionary::Range::isCorrectDate(result))
27 return default_value;
28
29 return result;
30}
31
32const DB::IColumn & unwrapNullableColumn(const DB::IColumn & column)
33{
34 if (const auto * m = DB::checkAndGetColumn<DB::ColumnNullable>(&column))
35 {
36 return m->getNestedColumn();
37 }
38
39 return column;
40}
41
42}
43
44namespace DB
45{
46namespace ErrorCodes
47{
48 extern const int BAD_ARGUMENTS;
49 extern const int DICTIONARY_IS_EMPTY;
50 extern const int TYPE_MISMATCH;
51 extern const int UNSUPPORTED_METHOD;
52}
53
54bool RangeHashedDictionary::Range::isCorrectDate(const RangeStorageType & date)
55{
56 return 0 < date && date <= DATE_LUT_MAX_DAY_NUM;
57}
58
59bool RangeHashedDictionary::Range::contains(const RangeStorageType & value) const
60{
61 return left <= value && value <= right;
62}
63
64static bool operator<(const RangeHashedDictionary::Range & left, const RangeHashedDictionary::Range & right)
65{
66 return std::tie(left.left, left.right) < std::tie(right.left, right.right);
67}
68
69
70RangeHashedDictionary::RangeHashedDictionary(
71 const std::string & database_,
72 const std::string & name_,
73 const DictionaryStructure & dict_struct_,
74 DictionarySourcePtr source_ptr_,
75 const DictionaryLifetime dict_lifetime_,
76 bool require_nonempty_)
77 : database(database_)
78 , name(name_)
79 , full_name{database_.empty() ? name_ : (database_ + "." + name_)}
80 , dict_struct(dict_struct_)
81 , source_ptr{std::move(source_ptr_)}
82 , dict_lifetime(dict_lifetime_)
83 , require_nonempty(require_nonempty_)
84{
85 createAttributes();
86 loadData();
87 calculateBytesAllocated();
88}
89
90
91#define DECLARE_MULTIPLE_GETTER(TYPE) \
92 void RangeHashedDictionary::get##TYPE( \
93 const std::string & attribute_name, \
94 const PaddedPODArray<Key> & ids, \
95 const PaddedPODArray<RangeStorageType> & dates, \
96 ResultArrayType<TYPE> & out) const \
97 { \
98 const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::ut##TYPE); \
99 getItems<TYPE>(attribute, ids, dates, out); \
100 }
101DECLARE_MULTIPLE_GETTER(UInt8)
102DECLARE_MULTIPLE_GETTER(UInt16)
103DECLARE_MULTIPLE_GETTER(UInt32)
104DECLARE_MULTIPLE_GETTER(UInt64)
105DECLARE_MULTIPLE_GETTER(UInt128)
106DECLARE_MULTIPLE_GETTER(Int8)
107DECLARE_MULTIPLE_GETTER(Int16)
108DECLARE_MULTIPLE_GETTER(Int32)
109DECLARE_MULTIPLE_GETTER(Int64)
110DECLARE_MULTIPLE_GETTER(Float32)
111DECLARE_MULTIPLE_GETTER(Float64)
112DECLARE_MULTIPLE_GETTER(Decimal32)
113DECLARE_MULTIPLE_GETTER(Decimal64)
114DECLARE_MULTIPLE_GETTER(Decimal128)
115#undef DECLARE_MULTIPLE_GETTER
116
117void RangeHashedDictionary::getString(
118 const std::string & attribute_name,
119 const PaddedPODArray<Key> & ids,
120 const PaddedPODArray<RangeStorageType> & dates,
121 ColumnString * out) const
122{
123 const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::utString);
124 const auto & attr = *std::get<Ptr<StringRef>>(attribute.maps);
125 const auto & null_value = std::get<String>(attribute.null_values);
126
127 for (const auto i : ext::range(0, ids.size()))
128 {
129 const auto it = attr.find(ids[i]);
130 if (it)
131 {
132 const auto date = dates[i];
133 const auto & ranges_and_values = it->getMapped();
134 const auto val_it
135 = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<StringRef> & v)
136 {
137 return v.range.contains(date);
138 });
139
140 const auto string_ref = val_it != std::end(ranges_and_values) ? val_it->value : StringRef{null_value};
141 out->insertData(string_ref.data, string_ref.size);
142 }
143 else
144 out->insertData(null_value.data(), null_value.size());
145 }
146
147 query_count.fetch_add(ids.size(), std::memory_order_relaxed);
148}
149
150
151void RangeHashedDictionary::createAttributes()
152{
153 const auto size = dict_struct.attributes.size();
154 attributes.reserve(size);
155
156 for (const auto & attribute : dict_struct.attributes)
157 {
158 attribute_index_by_name.emplace(attribute.name, attributes.size());
159 attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
160
161 if (attribute.hierarchical)
162 throw Exception{full_name + ": hierarchical attributes not supported by " + getName() + " dictionary.",
163 ErrorCodes::BAD_ARGUMENTS};
164 }
165}
166
167void RangeHashedDictionary::loadData()
168{
169 auto stream = source_ptr->loadAll();
170 stream->readPrefix();
171
172 while (const auto block = stream->read())
173 {
174 const auto & id_column = *block.safeGetByPosition(0).column;
175
176 // Support old behaviour, where invalid date means 'open range'.
177 const bool is_date = isDate(dict_struct.range_min->type);
178
179 const auto & min_range_column = unwrapNullableColumn(*block.safeGetByPosition(1).column);
180 const auto & max_range_column = unwrapNullableColumn(*block.safeGetByPosition(2).column);
181
182 element_count += id_column.size();
183
184 for (const auto attribute_idx : ext::range(0, attributes.size()))
185 {
186 const auto & attribute_column = *block.safeGetByPosition(attribute_idx + 3).column;
187 auto & attribute = attributes[attribute_idx];
188
189 for (const auto row_idx : ext::range(0, id_column.size()))
190 {
191 RangeStorageType lower_bound;
192 RangeStorageType upper_bound;
193
194 if (is_date)
195 {
196 lower_bound = getColumnIntValueOrDefault(min_range_column, row_idx, is_date, 0);
197 upper_bound = getColumnIntValueOrDefault(max_range_column, row_idx, is_date, DATE_LUT_MAX_DAY_NUM + 1);
198 }
199 else
200 {
201 lower_bound = getColumnIntValueOrDefault(min_range_column, row_idx, is_date, RANGE_MIN_NULL_VALUE);
202 upper_bound = getColumnIntValueOrDefault(max_range_column, row_idx, is_date, RANGE_MAX_NULL_VALUE);
203 }
204
205 setAttributeValue(attribute, id_column.getUInt(row_idx), Range{lower_bound, upper_bound}, attribute_column[row_idx]);
206 }
207 }
208 }
209
210 stream->readSuffix();
211
212 if (require_nonempty && 0 == element_count)
213 throw Exception{full_name + ": dictionary source is empty and 'require_nonempty' property is set.",
214 ErrorCodes::DICTIONARY_IS_EMPTY};
215}
216
217template <typename T>
218void RangeHashedDictionary::addAttributeSize(const Attribute & attribute)
219{
220 const auto & map_ref = std::get<Ptr<T>>(attribute.maps);
221 bytes_allocated += sizeof(Collection<T>) + map_ref->getBufferSizeInBytes();
222 bucket_count = map_ref->getBufferSizeInCells();
223}
224
225void RangeHashedDictionary::calculateBytesAllocated()
226{
227 bytes_allocated += attributes.size() * sizeof(attributes.front());
228
229 for (const auto & attribute : attributes)
230 {
231 switch (attribute.type)
232 {
233 case AttributeUnderlyingType::utUInt8:
234 addAttributeSize<UInt8>(attribute);
235 break;
236 case AttributeUnderlyingType::utUInt16:
237 addAttributeSize<UInt16>(attribute);
238 break;
239 case AttributeUnderlyingType::utUInt32:
240 addAttributeSize<UInt32>(attribute);
241 break;
242 case AttributeUnderlyingType::utUInt64:
243 addAttributeSize<UInt64>(attribute);
244 break;
245 case AttributeUnderlyingType::utUInt128:
246 addAttributeSize<UInt128>(attribute);
247 break;
248 case AttributeUnderlyingType::utInt8:
249 addAttributeSize<Int8>(attribute);
250 break;
251 case AttributeUnderlyingType::utInt16:
252 addAttributeSize<Int16>(attribute);
253 break;
254 case AttributeUnderlyingType::utInt32:
255 addAttributeSize<Int32>(attribute);
256 break;
257 case AttributeUnderlyingType::utInt64:
258 addAttributeSize<Int64>(attribute);
259 break;
260 case AttributeUnderlyingType::utFloat32:
261 addAttributeSize<Float32>(attribute);
262 break;
263 case AttributeUnderlyingType::utFloat64:
264 addAttributeSize<Float64>(attribute);
265 break;
266
267 case AttributeUnderlyingType::utDecimal32:
268 addAttributeSize<Decimal32>(attribute);
269 break;
270 case AttributeUnderlyingType::utDecimal64:
271 addAttributeSize<Decimal64>(attribute);
272 break;
273 case AttributeUnderlyingType::utDecimal128:
274 addAttributeSize<Decimal128>(attribute);
275 break;
276
277 case AttributeUnderlyingType::utString:
278 {
279 addAttributeSize<StringRef>(attribute);
280 bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
281
282 break;
283 }
284 }
285 }
286}
287
288template <typename T>
289void RangeHashedDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
290{
291 attribute.null_values = T(null_value.get<NearestFieldType<T>>());
292 attribute.maps = std::make_unique<Collection<T>>();
293}
294
295RangeHashedDictionary::Attribute
296RangeHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
297{
298 Attribute attr{type, {}, {}, {}};
299
300 switch (type)
301 {
302 case AttributeUnderlyingType::utUInt8:
303 createAttributeImpl<UInt8>(attr, null_value);
304 break;
305 case AttributeUnderlyingType::utUInt16:
306 createAttributeImpl<UInt16>(attr, null_value);
307 break;
308 case AttributeUnderlyingType::utUInt32:
309 createAttributeImpl<UInt32>(attr, null_value);
310 break;
311 case AttributeUnderlyingType::utUInt64:
312 createAttributeImpl<UInt64>(attr, null_value);
313 break;
314 case AttributeUnderlyingType::utUInt128:
315 createAttributeImpl<UInt128>(attr, null_value);
316 break;
317 case AttributeUnderlyingType::utInt8:
318 createAttributeImpl<Int8>(attr, null_value);
319 break;
320 case AttributeUnderlyingType::utInt16:
321 createAttributeImpl<Int16>(attr, null_value);
322 break;
323 case AttributeUnderlyingType::utInt32:
324 createAttributeImpl<Int32>(attr, null_value);
325 break;
326 case AttributeUnderlyingType::utInt64:
327 createAttributeImpl<Int64>(attr, null_value);
328 break;
329 case AttributeUnderlyingType::utFloat32:
330 createAttributeImpl<Float32>(attr, null_value);
331 break;
332 case AttributeUnderlyingType::utFloat64:
333 createAttributeImpl<Float64>(attr, null_value);
334 break;
335
336 case AttributeUnderlyingType::utDecimal32:
337 createAttributeImpl<Decimal32>(attr, null_value);
338 break;
339 case AttributeUnderlyingType::utDecimal64:
340 createAttributeImpl<Decimal64>(attr, null_value);
341 break;
342 case AttributeUnderlyingType::utDecimal128:
343 createAttributeImpl<Decimal128>(attr, null_value);
344 break;
345
346 case AttributeUnderlyingType::utString:
347 {
348 attr.null_values = null_value.get<String>();
349 attr.maps = std::make_unique<Collection<StringRef>>();
350 attr.string_arena = std::make_unique<Arena>();
351 break;
352 }
353 }
354
355 return attr;
356}
357
358
359template <typename OutputType>
360void RangeHashedDictionary::getItems(
361 const Attribute & attribute,
362 const PaddedPODArray<Key> & ids,
363 const PaddedPODArray<RangeStorageType> & dates,
364 PaddedPODArray<OutputType> & out) const
365{
366 if (false)
367 {
368 }
369#define DISPATCH(TYPE) else if (attribute.type == AttributeUnderlyingType::ut##TYPE) getItemsImpl<TYPE, OutputType>(attribute, ids, dates, out);
370 DISPATCH(UInt8)
371 DISPATCH(UInt16)
372 DISPATCH(UInt32)
373 DISPATCH(UInt64)
374 DISPATCH(UInt128)
375 DISPATCH(Int8)
376 DISPATCH(Int16)
377 DISPATCH(Int32)
378 DISPATCH(Int64)
379 DISPATCH(Float32)
380 DISPATCH(Float64)
381 DISPATCH(Decimal32)
382 DISPATCH(Decimal64)
383 DISPATCH(Decimal128)
384#undef DISPATCH
385 else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
386}
387
388template <typename AttributeType, typename OutputType>
389void RangeHashedDictionary::getItemsImpl(
390 const Attribute & attribute,
391 const PaddedPODArray<Key> & ids,
392 const PaddedPODArray<RangeStorageType> & dates,
393 PaddedPODArray<OutputType> & out) const
394{
395 const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
396 const auto null_value = std::get<AttributeType>(attribute.null_values);
397
398 for (const auto i : ext::range(0, ids.size()))
399 {
400 const auto it = attr.find(ids[i]);
401 if (it)
402 {
403 const auto date = dates[i];
404 const auto & ranges_and_values = it->getMapped();
405 const auto val_it
406 = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<AttributeType> & v)
407 {
408 return v.range.contains(date);
409 });
410
411 out[i] = static_cast<OutputType>(val_it != std::end(ranges_and_values) ? val_it->value : null_value);
412 }
413 else
414 {
415 out[i] = static_cast<OutputType>(null_value);
416 }
417 }
418
419 query_count.fetch_add(ids.size(), std::memory_order_relaxed);
420}
421
422
423template <typename T>
424void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const T value)
425{
426 auto & map = *std::get<Ptr<T>>(attribute.maps);
427 const auto it = map.find(id);
428
429 if (it)
430 {
431 auto & values = it->getMapped();
432
433 const auto insert_it
434 = std::lower_bound(std::begin(values), std::end(values), range, [](const Value<T> & lhs, const Range & rhs_range)
435 {
436 return lhs.range < rhs_range;
437 });
438
439 values.insert(insert_it, Value<T>{range, value});
440 }
441 else
442 map.insert({id, Values<T>{Value<T>{range, value}}});
443}
444
445void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Range & range, const Field & value)
446{
447 switch (attribute.type)
448 {
449 case AttributeUnderlyingType::utUInt8:
450 setAttributeValueImpl<UInt8>(attribute, id, range, value.get<UInt64>());
451 break;
452 case AttributeUnderlyingType::utUInt16:
453 setAttributeValueImpl<UInt16>(attribute, id, range, value.get<UInt64>());
454 break;
455 case AttributeUnderlyingType::utUInt32:
456 setAttributeValueImpl<UInt32>(attribute, id, range, value.get<UInt64>());
457 break;
458 case AttributeUnderlyingType::utUInt64:
459 setAttributeValueImpl<UInt64>(attribute, id, range, value.get<UInt64>());
460 break;
461 case AttributeUnderlyingType::utUInt128:
462 setAttributeValueImpl<UInt128>(attribute, id, range, value.get<UInt128>());
463 break;
464 case AttributeUnderlyingType::utInt8:
465 setAttributeValueImpl<Int8>(attribute, id, range, value.get<Int64>());
466 break;
467 case AttributeUnderlyingType::utInt16:
468 setAttributeValueImpl<Int16>(attribute, id, range, value.get<Int64>());
469 break;
470 case AttributeUnderlyingType::utInt32:
471 setAttributeValueImpl<Int32>(attribute, id, range, value.get<Int64>());
472 break;
473 case AttributeUnderlyingType::utInt64:
474 setAttributeValueImpl<Int64>(attribute, id, range, value.get<Int64>());
475 break;
476 case AttributeUnderlyingType::utFloat32:
477 setAttributeValueImpl<Float32>(attribute, id, range, value.get<Float64>());
478 break;
479 case AttributeUnderlyingType::utFloat64:
480 setAttributeValueImpl<Float64>(attribute, id, range, value.get<Float64>());
481 break;
482
483 case AttributeUnderlyingType::utDecimal32:
484 setAttributeValueImpl<Decimal32>(attribute, id, range, value.get<Decimal32>());
485 break;
486 case AttributeUnderlyingType::utDecimal64:
487 setAttributeValueImpl<Decimal64>(attribute, id, range, value.get<Decimal64>());
488 break;
489 case AttributeUnderlyingType::utDecimal128:
490 setAttributeValueImpl<Decimal128>(attribute, id, range, value.get<Decimal128>());
491 break;
492
493 case AttributeUnderlyingType::utString:
494 {
495 auto & map = *std::get<Ptr<StringRef>>(attribute.maps);
496 const auto & string = value.get<String>();
497 const auto string_in_arena = attribute.string_arena->insert(string.data(), string.size());
498 const StringRef string_ref{string_in_arena, string.size()};
499
500 const auto it = map.find(id);
501
502 if (it)
503 {
504 auto & values = it->getMapped();
505
506 const auto insert_it = std::lower_bound(
507 std::begin(values), std::end(values), range, [](const Value<StringRef> & lhs, const Range & rhs_range)
508 {
509 return lhs.range < rhs_range;
510 });
511
512 values.insert(insert_it, Value<StringRef>{range, string_ref});
513 }
514 else
515 map.insert({id, Values<StringRef>{Value<StringRef>{range, string_ref}}});
516
517 break;
518 }
519 }
520}
521
522const RangeHashedDictionary::Attribute & RangeHashedDictionary::getAttribute(const std::string & attribute_name) const
523{
524 const auto it = attribute_index_by_name.find(attribute_name);
525 if (it == std::end(attribute_index_by_name))
526 throw Exception{full_name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
527
528 return attributes[it->second];
529}
530
531const RangeHashedDictionary::Attribute &
532RangeHashedDictionary::getAttributeWithType(const std::string & attribute_name, const AttributeUnderlyingType type) const
533{
534 const auto & attribute = getAttribute(attribute_name);
535 if (attribute.type != type)
536 throw Exception{attribute_name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
537 ErrorCodes::TYPE_MISMATCH};
538
539 return attribute;
540}
541
542template <typename RangeType>
543void RangeHashedDictionary::getIdsAndDates(
544 PaddedPODArray<Key> & ids, PaddedPODArray<RangeType> & start_dates, PaddedPODArray<RangeType> & end_dates) const
545{
546 const auto & attribute = attributes.front();
547
548 switch (attribute.type)
549 {
550 case AttributeUnderlyingType::utUInt8:
551 getIdsAndDates<UInt8>(attribute, ids, start_dates, end_dates);
552 break;
553 case AttributeUnderlyingType::utUInt16:
554 getIdsAndDates<UInt16>(attribute, ids, start_dates, end_dates);
555 break;
556 case AttributeUnderlyingType::utUInt32:
557 getIdsAndDates<UInt32>(attribute, ids, start_dates, end_dates);
558 break;
559 case AttributeUnderlyingType::utUInt64:
560 getIdsAndDates<UInt64>(attribute, ids, start_dates, end_dates);
561 break;
562 case AttributeUnderlyingType::utUInt128:
563 getIdsAndDates<UInt128>(attribute, ids, start_dates, end_dates);
564 break;
565 case AttributeUnderlyingType::utInt8:
566 getIdsAndDates<Int8>(attribute, ids, start_dates, end_dates);
567 break;
568 case AttributeUnderlyingType::utInt16:
569 getIdsAndDates<Int16>(attribute, ids, start_dates, end_dates);
570 break;
571 case AttributeUnderlyingType::utInt32:
572 getIdsAndDates<Int32>(attribute, ids, start_dates, end_dates);
573 break;
574 case AttributeUnderlyingType::utInt64:
575 getIdsAndDates<Int64>(attribute, ids, start_dates, end_dates);
576 break;
577 case AttributeUnderlyingType::utFloat32:
578 getIdsAndDates<Float32>(attribute, ids, start_dates, end_dates);
579 break;
580 case AttributeUnderlyingType::utFloat64:
581 getIdsAndDates<Float64>(attribute, ids, start_dates, end_dates);
582 break;
583 case AttributeUnderlyingType::utString:
584 getIdsAndDates<StringRef>(attribute, ids, start_dates, end_dates);
585 break;
586
587 case AttributeUnderlyingType::utDecimal32:
588 getIdsAndDates<Decimal32>(attribute, ids, start_dates, end_dates);
589 break;
590 case AttributeUnderlyingType::utDecimal64:
591 getIdsAndDates<Decimal64>(attribute, ids, start_dates, end_dates);
592 break;
593 case AttributeUnderlyingType::utDecimal128:
594 getIdsAndDates<Decimal128>(attribute, ids, start_dates, end_dates);
595 break;
596 }
597}
598
599template <typename T, typename RangeType>
600void RangeHashedDictionary::getIdsAndDates(
601 const Attribute & attribute,
602 PaddedPODArray<Key> & ids,
603 PaddedPODArray<RangeType> & start_dates,
604 PaddedPODArray<RangeType> & end_dates) const
605{
606 const HashMap<UInt64, Values<T>> & attr = *std::get<Ptr<T>>(attribute.maps);
607
608 ids.reserve(attr.size());
609 start_dates.reserve(attr.size());
610 end_dates.reserve(attr.size());
611
612 const bool is_date = isDate(dict_struct.range_min->type);
613
614 for (const auto & key : attr)
615 {
616 for (const auto & value : key.getMapped())
617 {
618 ids.push_back(key.getKey());
619 start_dates.push_back(value.range.left);
620 end_dates.push_back(value.range.right);
621
622 if (is_date && static_cast<UInt64>(end_dates.back()) > DATE_LUT_MAX_DAY_NUM)
623 end_dates.back() = 0;
624 }
625 }
626}
627
628
629template <typename RangeType>
630BlockInputStreamPtr RangeHashedDictionary::getBlockInputStreamImpl(const Names & column_names, size_t max_block_size) const
631{
632 PaddedPODArray<Key> ids;
633 PaddedPODArray<RangeType> start_dates;
634 PaddedPODArray<RangeType> end_dates;
635 getIdsAndDates(ids, start_dates, end_dates);
636
637 using BlockInputStreamType = RangeDictionaryBlockInputStream<RangeHashedDictionary, RangeType, Key>;
638 auto dict_ptr = std::static_pointer_cast<const RangeHashedDictionary>(shared_from_this());
639 return std::make_shared<BlockInputStreamType>(
640 dict_ptr, max_block_size, column_names, std::move(ids), std::move(start_dates), std::move(end_dates));
641}
642
643struct RangeHashedDIctionaryCallGetBlockInputStreamImpl
644{
645 BlockInputStreamPtr stream;
646 const RangeHashedDictionary * dict;
647 const Names * column_names;
648 size_t max_block_size;
649
650 template <typename RangeType, size_t>
651 void operator()()
652 {
653 auto & type = dict->dict_struct.range_min->type;
654 if (!stream && dynamic_cast<const DataTypeNumberBase<RangeType> *>(type.get()))
655 stream = dict->getBlockInputStreamImpl<RangeType>(*column_names, max_block_size);
656 }
657};
658
659BlockInputStreamPtr RangeHashedDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
660{
661 using ListType = TypeList<UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Int128, Float32, Float64>;
662
663 RangeHashedDIctionaryCallGetBlockInputStreamImpl callable;
664 callable.dict = this;
665 callable.column_names = &column_names;
666 callable.max_block_size = max_block_size;
667
668 ListType::forEach(callable);
669
670 if (!callable.stream)
671 throw Exception(
672 "Unexpected range type for RangeHashed dictionary: " + dict_struct.range_min->type->getName(), ErrorCodes::LOGICAL_ERROR);
673
674 return callable.stream;
675}
676
677
678void registerDictionaryRangeHashed(DictionaryFactory & factory)
679{
680 auto create_layout = [=](const std::string & full_name,
681 const DictionaryStructure & dict_struct,
682 const Poco::Util::AbstractConfiguration & config,
683 const std::string & config_prefix,
684 DictionarySourcePtr source_ptr) -> DictionaryPtr
685 {
686 if (dict_struct.key)
687 throw Exception{"'key' is not supported for dictionary of layout 'range_hashed'", ErrorCodes::UNSUPPORTED_METHOD};
688
689 if (!dict_struct.range_min || !dict_struct.range_max)
690 throw Exception{full_name + ": dictionary of layout 'range_hashed' requires .structure.range_min and .structure.range_max",
691 ErrorCodes::BAD_ARGUMENTS};
692
693 const String database = config.getString(config_prefix + ".database", "");
694 const String name = config.getString(config_prefix + ".name");
695 const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
696 const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
697 return std::make_unique<RangeHashedDictionary>(database, name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
698 };
699 factory.registerLayout("range_hashed", create_layout, false);
700}
701
702}
703