1#pragma once
2
3#include <memory>
4#include <Columns/ColumnDecimal.h>
5#include <Columns/ColumnString.h>
6#include <Columns/ColumnVector.h>
7#include <Columns/IColumn.h>
8#include <Core/Names.h>
9#include <DataStreams/IBlockInputStream.h>
10#include <DataTypes/DataTypesNumber.h>
11#include <common/logger_useful.h>
12#include <ext/range.h>
13#include "DictionaryBlockInputStreamBase.h"
14#include "DictionaryStructure.h"
15#include "IDictionary.h"
16
17namespace DB
18{
19namespace ErrorCodes
20{
21 extern const int LOGICAL_ERROR;
22}
23
24
25/* BlockInputStream implementation for external dictionaries
26 * read() returns blocks consisting of the in-memory contents of the dictionaries
27 */
28template <typename DictionaryType, typename Key>
29class DictionaryBlockInputStream : public DictionaryBlockInputStreamBase
30{
31public:
32 using DictionaryPtr = std::shared_ptr<DictionaryType const>;
33
34 DictionaryBlockInputStream(
35 std::shared_ptr<const IDictionaryBase> dictionary, UInt64 max_block_size, PaddedPODArray<Key> && ids, const Names & column_names);
36
37 DictionaryBlockInputStream(
38 std::shared_ptr<const IDictionaryBase> dictionary,
39 UInt64 max_block_size,
40 const std::vector<StringRef> & keys,
41 const Names & column_names);
42
43 using GetColumnsFunction = std::function<ColumnsWithTypeAndName(const Columns &, const std::vector<DictionaryAttribute> & attributes)>;
44
45 // Used to separate key columns format for storage and view.
46 // Calls get_key_columns_function to get key column for dictionary get fuction call
47 // and get_view_columns_function to get key representation.
48 // Now used in trie dictionary, where columns are stored as ip and mask, and are showed as string
49 DictionaryBlockInputStream(
50 std::shared_ptr<const IDictionaryBase> dictionary,
51 UInt64 max_block_size,
52 const Columns & data_columns,
53 const Names & column_names,
54 GetColumnsFunction && get_key_columns_function,
55 GetColumnsFunction && get_view_columns_function);
56
57 String getName() const override { return "Dictionary"; }
58
59protected:
60 Block getBlock(size_t start, size_t size) const override;
61
62private:
63 // pointer types to getXXX functions
64 // for single key dictionaries
65 template <typename Type>
66 using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, PaddedPODArray<Type> &) const;
67
68 template <typename Type>
69 using DictionaryDecimalGetter
70 = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, DecimalPaddedPODArray<Type> &) const;
71
72 using DictionaryStringGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray<Key> &, ColumnString *) const;
73
74 // for complex complex key dictionaries
75 template <typename Type>
76 using GetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, PaddedPODArray<Type> & out) const;
77
78 template <typename Type>
79 using DecimalGetterByKey
80 = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, DecimalPaddedPODArray<Type> & out) const;
81
82 using StringGetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, ColumnString * out) const;
83
84 // call getXXX
85 // for single key dictionaries
86 template <typename Type, typename Container>
87 void callGetter(
88 DictionaryGetter<Type> getter,
89 const PaddedPODArray<Key> & ids_to_fill,
90 const Columns & keys,
91 const DataTypes & data_types,
92 Container & container,
93 const DictionaryAttribute & attribute,
94 const DictionaryType & dictionary) const;
95
96 template <typename Type, typename Container>
97 void callGetter(
98 DictionaryDecimalGetter<Type> getter,
99 const PaddedPODArray<Key> & ids_to_fill,
100 const Columns & keys,
101 const DataTypes & data_types,
102 Container & container,
103 const DictionaryAttribute & attribute,
104 const DictionaryType & dictionary) const;
105
106 template <typename Container>
107 void callGetter(
108 DictionaryStringGetter getter,
109 const PaddedPODArray<Key> & ids_to_fill,
110 const Columns & keys,
111 const DataTypes & data_types,
112 Container & container,
113 const DictionaryAttribute & attribute,
114 const DictionaryType & dictionary) const;
115
116 // for complex complex key dictionaries
117 template <typename Type, typename Container>
118 void callGetter(
119 GetterByKey<Type> getter,
120 const PaddedPODArray<Key> & ids_to_fill,
121 const Columns & keys,
122 const DataTypes & data_types,
123 Container & container,
124 const DictionaryAttribute & attribute,
125 const DictionaryType & dictionary) const;
126
127 template <typename Type, typename Container>
128 void callGetter(
129 DecimalGetterByKey<Type> getter,
130 const PaddedPODArray<Key> & ids_to_fill,
131 const Columns & keys,
132 const DataTypes & data_types,
133 Container & container,
134 const DictionaryAttribute & attribute,
135 const DictionaryType & dictionary) const;
136
137 template <typename Container>
138 void callGetter(
139 StringGetterByKey getter,
140 const PaddedPODArray<Key> & ids_to_fill,
141 const Columns & keys,
142 const DataTypes & data_types,
143 Container & container,
144 const DictionaryAttribute & attribute,
145 const DictionaryType & dictionary) const;
146
147 template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter>
148 Block
149 fillBlock(const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
150
151
152 template <typename AttributeType, typename Getter>
153 ColumnPtr getColumnFromAttribute(
154 Getter getter,
155 const PaddedPODArray<Key> & ids_to_fill,
156 const Columns & keys,
157 const DataTypes & data_types,
158 const DictionaryAttribute & attribute,
159 const DictionaryType & dictionary) const;
160 template <typename Getter>
161 ColumnPtr getColumnFromStringAttribute(
162 Getter getter,
163 const PaddedPODArray<Key> & ids_to_fill,
164 const Columns & keys,
165 const DataTypes & data_types,
166 const DictionaryAttribute & attribute,
167 const DictionaryType & dictionary) const;
168 ColumnPtr getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const;
169
170 void fillKeyColumns(
171 const std::vector<StringRef> & keys,
172 size_t start,
173 size_t size,
174 const DictionaryStructure & dictionary_structure,
175 ColumnsWithTypeAndName & columns) const;
176
177 DictionaryPtr dictionary;
178 Names column_names;
179 PaddedPODArray<Key> ids;
180 ColumnsWithTypeAndName key_columns;
181 Poco::Logger * logger;
182
183 using FillBlockFunction = Block (DictionaryBlockInputStream<DictionaryType, Key>::*)(
184 const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const;
185
186 FillBlockFunction fill_block_function;
187
188 Columns data_columns;
189 GetColumnsFunction get_key_columns_function;
190 GetColumnsFunction get_view_columns_function;
191
192 enum class DictionaryKeyType
193 {
194 Id,
195 ComplexKey,
196 Callback
197 };
198
199 DictionaryKeyType key_type;
200};
201
202
203template <typename DictionaryType, typename Key>
204DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
205 std::shared_ptr<const IDictionaryBase> dictionary_, UInt64 max_block_size_, PaddedPODArray<Key> && ids_, const Names & column_names_)
206 : DictionaryBlockInputStreamBase(ids_.size(), max_block_size_)
207 , dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
208 , column_names(column_names_)
209 , ids(std::move(ids_))
210 , logger(&Poco::Logger::get("DictionaryBlockInputStream"))
211 , fill_block_function(
212 &DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<DictionaryGetter, DictionaryDecimalGetter, DictionaryStringGetter>)
213 , key_type(DictionaryKeyType::Id)
214{
215}
216
217template <typename DictionaryType, typename Key>
218DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
219 std::shared_ptr<const IDictionaryBase> dictionary_,
220 UInt64 max_block_size_,
221 const std::vector<StringRef> & keys,
222 const Names & column_names_)
223 : DictionaryBlockInputStreamBase(keys.size(), max_block_size_)
224 , dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
225 , column_names(column_names_)
226 , logger(&Poco::Logger::get("DictionaryBlockInputStream"))
227 , fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
228 , key_type(DictionaryKeyType::ComplexKey)
229{
230 const DictionaryStructure & dictionaty_structure = dictionary->getStructure();
231 fillKeyColumns(keys, 0, keys.size(), dictionaty_structure, key_columns);
232}
233
234template <typename DictionaryType, typename Key>
235DictionaryBlockInputStream<DictionaryType, Key>::DictionaryBlockInputStream(
236 std::shared_ptr<const IDictionaryBase> dictionary_,
237 UInt64 max_block_size_,
238 const Columns & data_columns_,
239 const Names & column_names_,
240 GetColumnsFunction && get_key_columns_function_,
241 GetColumnsFunction && get_view_columns_function_)
242 : DictionaryBlockInputStreamBase(data_columns_.front()->size(), max_block_size_)
243 , dictionary(std::static_pointer_cast<const DictionaryType>(dictionary_))
244 , column_names(column_names_)
245 , logger(&Poco::Logger::get("DictionaryBlockInputStream"))
246 , fill_block_function(&DictionaryBlockInputStream<DictionaryType, Key>::fillBlock<GetterByKey, DecimalGetterByKey, StringGetterByKey>)
247 , data_columns(data_columns_)
248 , get_key_columns_function(get_key_columns_function_)
249 , get_view_columns_function(get_view_columns_function_)
250 , key_type(DictionaryKeyType::Callback)
251{
252}
253
254
255template <typename DictionaryType, typename Key>
256Block DictionaryBlockInputStream<DictionaryType, Key>::getBlock(size_t start, size_t length) const
257{
258 switch (key_type)
259 {
260 case DictionaryKeyType::ComplexKey:
261 {
262 Columns columns;
263 ColumnsWithTypeAndName view_columns;
264 columns.reserve(key_columns.size());
265 for (const auto & key_column : key_columns)
266 {
267 ColumnPtr column = key_column.column->cut(start, length);
268 columns.emplace_back(column);
269 view_columns.emplace_back(column, key_column.type, key_column.name);
270 }
271 return (this->*fill_block_function)({}, columns, {}, std::move(view_columns));
272 }
273
274 case DictionaryKeyType::Id:
275 {
276 PaddedPODArray<Key> ids_to_fill(ids.begin() + start, ids.begin() + start + length);
277 return (this->*fill_block_function)(ids_to_fill, {}, {}, {});
278 }
279
280 case DictionaryKeyType::Callback:
281 {
282 Columns columns;
283 columns.reserve(data_columns.size());
284 for (const auto & data_column : data_columns)
285 columns.push_back(data_column->cut(start, length));
286 const DictionaryStructure & dictionaty_structure = dictionary->getStructure();
287 const auto & attributes = *dictionaty_structure.key;
288 ColumnsWithTypeAndName keys_with_type_and_name = get_key_columns_function(columns, attributes);
289 ColumnsWithTypeAndName view_with_type_and_name = get_view_columns_function(columns, attributes);
290 DataTypes types;
291 columns.clear();
292 for (const auto & key_column : keys_with_type_and_name)
293 {
294 columns.push_back(key_column.column);
295 types.push_back(key_column.type);
296 }
297 return (this->*fill_block_function)({}, columns, types, std::move(view_with_type_and_name));
298 }
299 }
300
301 throw Exception("Unexpected DictionaryKeyType.", ErrorCodes::LOGICAL_ERROR);
302}
303
304
305template <typename DictionaryType, typename Key>
306template <typename Type, typename Container>
307void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
308 DictionaryGetter<Type> getter,
309 const PaddedPODArray<Key> & ids_to_fill,
310 const Columns & /*keys*/,
311 const DataTypes & /*data_types*/,
312 Container & container,
313 const DictionaryAttribute & attribute,
314 const DictionaryType & dict) const
315{
316 (dict.*getter)(attribute.name, ids_to_fill, container);
317}
318
319template <typename DictionaryType, typename Key>
320template <typename Type, typename Container>
321void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
322 DictionaryDecimalGetter<Type> getter,
323 const PaddedPODArray<Key> & ids_to_fill,
324 const Columns & /*keys*/,
325 const DataTypes & /*data_types*/,
326 Container & container,
327 const DictionaryAttribute & attribute,
328 const DictionaryType & dict) const
329{
330 (dict.*getter)(attribute.name, ids_to_fill, container);
331}
332
333template <typename DictionaryType, typename Key>
334template <typename Container>
335void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
336 DictionaryStringGetter getter,
337 const PaddedPODArray<Key> & ids_to_fill,
338 const Columns & /*keys*/,
339 const DataTypes & /*data_types*/,
340 Container & container,
341 const DictionaryAttribute & attribute,
342 const DictionaryType & dict) const
343{
344 (dict.*getter)(attribute.name, ids_to_fill, container);
345}
346
347template <typename DictionaryType, typename Key>
348template <typename Type, typename Container>
349void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
350 GetterByKey<Type> getter,
351 const PaddedPODArray<Key> & /*ids_to_fill*/,
352 const Columns & keys,
353 const DataTypes & data_types,
354 Container & container,
355 const DictionaryAttribute & attribute,
356 const DictionaryType & dict) const
357{
358 (dict.*getter)(attribute.name, keys, data_types, container);
359}
360
361template <typename DictionaryType, typename Key>
362template <typename Type, typename Container>
363void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
364 DecimalGetterByKey<Type> getter,
365 const PaddedPODArray<Key> & /*ids_to_fill*/,
366 const Columns & keys,
367 const DataTypes & data_types,
368 Container & container,
369 const DictionaryAttribute & attribute,
370 const DictionaryType & dict) const
371{
372 (dict.*getter)(attribute.name, keys, data_types, container);
373}
374
375template <typename DictionaryType, typename Key>
376template <typename Container>
377void DictionaryBlockInputStream<DictionaryType, Key>::callGetter(
378 StringGetterByKey getter,
379 const PaddedPODArray<Key> & /*ids_to_fill*/,
380 const Columns & keys,
381 const DataTypes & data_types,
382 Container & container,
383 const DictionaryAttribute & attribute,
384 const DictionaryType & dict) const
385{
386 (dict.*getter)(attribute.name, keys, data_types, container);
387}
388
389
390template <typename DictionaryType, typename Key>
391template <template <typename> class Getter, template <typename> class DecimalGetter, typename StringGetter>
392Block DictionaryBlockInputStream<DictionaryType, Key>::fillBlock(
393 const PaddedPODArray<Key> & ids_to_fill, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const
394{
395 std::unordered_set<std::string> names(column_names.begin(), column_names.end());
396
397 DataTypes data_types = types;
398 ColumnsWithTypeAndName block_columns;
399
400 data_types.reserve(keys.size());
401 const DictionaryStructure & dictionaty_structure = dictionary->getStructure();
402 if (data_types.empty() && dictionaty_structure.key)
403 for (const auto & key : *dictionaty_structure.key)
404 data_types.push_back(key.type);
405
406 for (const auto & column : view)
407 if (names.find(column.name) != names.end())
408 block_columns.push_back(column);
409
410 const DictionaryStructure & structure = dictionary->getStructure();
411
412 if (structure.id && names.find(structure.id->name) != names.end())
413 block_columns.emplace_back(getColumnFromIds(ids_to_fill), std::make_shared<DataTypeUInt64>(), structure.id->name);
414
415 for (const auto idx : ext::range(0, structure.attributes.size()))
416 {
417 const DictionaryAttribute & attribute = structure.attributes[idx];
418 if (names.find(attribute.name) != names.end())
419 {
420 ColumnPtr column;
421#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
422 column = getColumnFromAttribute<TYPE, Getter<TYPE>>(&DictionaryType::get##TYPE, ids_to_fill, keys, data_types, attribute, *dictionary)
423 switch (attribute.underlying_type)
424 {
425 case AttributeUnderlyingType::utUInt8:
426 GET_COLUMN_FORM_ATTRIBUTE(UInt8);
427 break;
428 case AttributeUnderlyingType::utUInt16:
429 GET_COLUMN_FORM_ATTRIBUTE(UInt16);
430 break;
431 case AttributeUnderlyingType::utUInt32:
432 GET_COLUMN_FORM_ATTRIBUTE(UInt32);
433 break;
434 case AttributeUnderlyingType::utUInt64:
435 GET_COLUMN_FORM_ATTRIBUTE(UInt64);
436 break;
437 case AttributeUnderlyingType::utUInt128:
438 GET_COLUMN_FORM_ATTRIBUTE(UInt128);
439 break;
440 case AttributeUnderlyingType::utInt8:
441 GET_COLUMN_FORM_ATTRIBUTE(Int8);
442 break;
443 case AttributeUnderlyingType::utInt16:
444 GET_COLUMN_FORM_ATTRIBUTE(Int16);
445 break;
446 case AttributeUnderlyingType::utInt32:
447 GET_COLUMN_FORM_ATTRIBUTE(Int32);
448 break;
449 case AttributeUnderlyingType::utInt64:
450 GET_COLUMN_FORM_ATTRIBUTE(Int64);
451 break;
452 case AttributeUnderlyingType::utFloat32:
453 GET_COLUMN_FORM_ATTRIBUTE(Float32);
454 break;
455 case AttributeUnderlyingType::utFloat64:
456 GET_COLUMN_FORM_ATTRIBUTE(Float64);
457 break;
458 case AttributeUnderlyingType::utDecimal32:
459 {
460 column = getColumnFromAttribute<Decimal32, DecimalGetter<Decimal32>>(
461 &DictionaryType::getDecimal32, ids_to_fill, keys, data_types, attribute, *dictionary);
462 break;
463 }
464 case AttributeUnderlyingType::utDecimal64:
465 {
466 column = getColumnFromAttribute<Decimal64, DecimalGetter<Decimal64>>(
467 &DictionaryType::getDecimal64, ids_to_fill, keys, data_types, attribute, *dictionary);
468 break;
469 }
470 case AttributeUnderlyingType::utDecimal128:
471 {
472 column = getColumnFromAttribute<Decimal128, DecimalGetter<Decimal128>>(
473 &DictionaryType::getDecimal128, ids_to_fill, keys, data_types, attribute, *dictionary);
474 break;
475 }
476 case AttributeUnderlyingType::utString:
477 {
478 column = getColumnFromStringAttribute<StringGetter>(
479 &DictionaryType::getString, ids_to_fill, keys, data_types, attribute, *dictionary);
480 break;
481 }
482 }
483#undef GET_COLUMN_FORM_ATTRIBUTE
484 block_columns.emplace_back(column, attribute.type, attribute.name);
485 }
486 }
487 return Block(block_columns);
488}
489
490
491template <typename DictionaryType, typename Key>
492template <typename AttributeType, typename Getter>
493ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromAttribute(
494 Getter getter,
495 const PaddedPODArray<Key> & ids_to_fill,
496 const Columns & keys,
497 const DataTypes & data_types,
498 const DictionaryAttribute & attribute,
499 const DictionaryType & dict) const
500{
501 if constexpr (IsDecimalNumber<AttributeType>)
502 {
503 auto size = ids_to_fill.size();
504 if (!keys.empty())
505 size = keys.front()->size();
506 auto column = ColumnDecimal<AttributeType>::create(size, 0); /// NOTE: There's wrong scale here, but it's unused.
507 callGetter(getter, ids_to_fill, keys, data_types, column->getData(), attribute, dict);
508 return column;
509 }
510 else
511 {
512 auto size = ids_to_fill.size();
513 if (!keys.empty())
514 size = keys.front()->size();
515 auto column_vector = ColumnVector<AttributeType>::create(size);
516 callGetter(getter, ids_to_fill, keys, data_types, column_vector->getData(), attribute, dict);
517 return column_vector;
518 }
519}
520
521
522template <typename DictionaryType, typename Key>
523template <typename Getter>
524ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromStringAttribute(
525 Getter getter,
526 const PaddedPODArray<Key> & ids_to_fill,
527 const Columns & keys,
528 const DataTypes & data_types,
529 const DictionaryAttribute & attribute,
530 const DictionaryType & dict) const
531{
532 auto column_string = ColumnString::create();
533 auto ptr = column_string.get();
534 callGetter(getter, ids_to_fill, keys, data_types, ptr, attribute, dict);
535 return column_string;
536}
537
538
539template <typename DictionaryType, typename Key>
540ColumnPtr DictionaryBlockInputStream<DictionaryType, Key>::getColumnFromIds(const PaddedPODArray<Key> & ids_to_fill) const
541{
542 auto column_vector = ColumnVector<UInt64>::create();
543 column_vector->getData().reserve(ids_to_fill.size());
544 for (UInt64 id : ids_to_fill)
545 column_vector->insertValue(id);
546 return column_vector;
547}
548
549
550template <typename DictionaryType, typename Key>
551void DictionaryBlockInputStream<DictionaryType, Key>::fillKeyColumns(
552 const std::vector<StringRef> & keys,
553 size_t start,
554 size_t size,
555 const DictionaryStructure & dictionary_structure,
556 ColumnsWithTypeAndName & res) const
557{
558 MutableColumns columns;
559 columns.reserve(dictionary_structure.key->size());
560
561 for (const DictionaryAttribute & attribute : *dictionary_structure.key)
562 columns.emplace_back(attribute.type->createColumn());
563
564 for (auto idx : ext::range(start, size))
565 {
566 const auto & key = keys[idx];
567 auto ptr = key.data;
568 for (auto & column : columns)
569 ptr = column->deserializeAndInsertFromArena(ptr);
570 }
571
572 for (size_t i = 0, num_columns = columns.size(); i < num_columns; ++i)
573 res.emplace_back(
574 ColumnWithTypeAndName{std::move(columns[i]), (*dictionary_structure.key)[i].type, (*dictionary_structure.key)[i].name});
575}
576
577}
578