1#pragma once
2#include <Columns/ColumnString.h>
3#include <Columns/ColumnVector.h>
4#include <Columns/IColumn.h>
5#include <DataStreams/IBlockInputStream.h>
6#include <DataTypes/DataTypeDate.h>
7#include <DataTypes/DataTypesNumber.h>
8#include <ext/range.h>
9#include "DictionaryBlockInputStreamBase.h"
10#include "DictionaryStructure.h"
11#include "IDictionary.h"
12#include "RangeHashedDictionary.h"
13
14namespace DB
15{
16/*
17 * BlockInputStream implementation for external dictionaries
18 * read() returns single block consisting of the in-memory contents of the dictionaries
19 */
20template <typename DictionaryType, typename RangeType, typename Key>
21class RangeDictionaryBlockInputStream : public DictionaryBlockInputStreamBase
22{
23public:
24 using DictionaryPtr = std::shared_ptr<DictionaryType const>;
25
26 RangeDictionaryBlockInputStream(
27 DictionaryPtr dictionary,
28 size_t max_block_size,
29 const Names & column_names,
30 PaddedPODArray<Key> && ids_to_fill,
31 PaddedPODArray<RangeType> && start_dates,
32 PaddedPODArray<RangeType> && end_dates);
33
34 String getName() const override { return "RangeDictionary"; }
35
36protected:
37 Block getBlock(size_t start, size_t length) const override;
38
39private:
40 template <typename Type>
41 using DictionaryGetter = void (DictionaryType::*)(
42 const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, PaddedPODArray<Type> &) const;
43
44 template <typename Type>
45 using DictionaryDecimalGetter = void (DictionaryType::*)(
46 const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, DecimalPaddedPODArray<Type> &) const;
47
48 template <typename AttributeType, typename Getter>
49 ColumnPtr getColumnFromAttribute(
50 Getter getter,
51 const PaddedPODArray<Key> & ids_to_fill,
52 const PaddedPODArray<Int64> & dates,
53 const DictionaryAttribute & attribute,
54 const DictionaryType & concrete_dictionary) const;
55 ColumnPtr getColumnFromAttributeString(
56 const PaddedPODArray<Key> & ids_to_fill,
57 const PaddedPODArray<Int64> & dates,
58 const DictionaryAttribute & attribute,
59 const DictionaryType & concrete_dictionary) const;
60 template <typename T>
61 ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array) const;
62
63 template <typename DictionarySpecialAttributeType, typename T>
64 void addSpecialColumn(
65 const std::optional<DictionarySpecialAttributeType> & attribute,
66 DataTypePtr type,
67 const std::string & default_name,
68 const std::unordered_set<std::string> & column_names_set,
69 const PaddedPODArray<T> & values,
70 ColumnsWithTypeAndName & columns) const;
71
72 Block fillBlock(
73 const PaddedPODArray<Key> & ids_to_fill,
74 const PaddedPODArray<RangeType> & block_start_dates,
75 const PaddedPODArray<RangeType> & block_end_dates) const;
76
77 PaddedPODArray<Int64>
78 makeDateKey(const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_end_dates) const;
79
80 DictionaryPtr dictionary;
81 Names column_names;
82 PaddedPODArray<Key> ids;
83 PaddedPODArray<RangeType> start_dates;
84 PaddedPODArray<RangeType> end_dates;
85};
86
87
88template <typename DictionaryType, typename RangeType, typename Key>
89RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::RangeDictionaryBlockInputStream(
90 DictionaryPtr dictionary_,
91 size_t max_block_size_,
92 const Names & column_names_,
93 PaddedPODArray<Key> && ids_,
94 PaddedPODArray<RangeType> && block_start_dates,
95 PaddedPODArray<RangeType> && block_end_dates)
96 : DictionaryBlockInputStreamBase(ids_.size(), max_block_size_)
97 , dictionary(dictionary_)
98 , column_names(column_names_)
99 , ids(std::move(ids_))
100 , start_dates(std::move(block_start_dates))
101 , end_dates(std::move(block_end_dates))
102{
103}
104
105template <typename DictionaryType, typename RangeType, typename Key>
106Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getBlock(size_t start, size_t length) const
107{
108 PaddedPODArray<Key> block_ids;
109 PaddedPODArray<RangeType> block_start_dates;
110 PaddedPODArray<RangeType> block_end_dates;
111 block_ids.reserve(length);
112 block_start_dates.reserve(length);
113 block_end_dates.reserve(length);
114
115 for (auto idx : ext::range(start, start + length))
116 {
117 block_ids.push_back(ids[idx]);
118 block_start_dates.push_back(start_dates[idx]);
119 block_end_dates.push_back(end_dates[idx]);
120 }
121
122 return fillBlock(block_ids, block_start_dates, block_end_dates);
123}
124
125template <typename DictionaryType, typename RangeType, typename Key>
126template <typename AttributeType, typename Getter>
127ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttribute(
128 Getter getter,
129 const PaddedPODArray<Key> & ids_to_fill,
130 const PaddedPODArray<Int64> & dates,
131 const DictionaryAttribute & attribute,
132 const DictionaryType & concrete_dictionary) const
133{
134 if constexpr (IsDecimalNumber<AttributeType>)
135 {
136 auto column = ColumnDecimal<AttributeType>::create(ids_to_fill.size(), 0); /// NOTE: There's wrong scale here, but it's unused.
137 (concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column->getData());
138 return column;
139 }
140 else
141 {
142 auto column_vector = ColumnVector<AttributeType>::create(ids_to_fill.size());
143 (concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column_vector->getData());
144 return column_vector;
145 }
146}
147
148template <typename DictionaryType, typename RangeType, typename Key>
149ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttributeString(
150 const PaddedPODArray<Key> & ids_to_fill,
151 const PaddedPODArray<Int64> & dates,
152 const DictionaryAttribute & attribute,
153 const DictionaryType & concrete_dictionary) const
154{
155 auto column_string = ColumnString::create();
156 concrete_dictionary.getString(attribute.name, ids_to_fill, dates, column_string.get());
157 return column_string;
158}
159
160template <typename DictionaryType, typename RangeType, typename Key>
161template <typename T>
162ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromPODArray(const PaddedPODArray<T> & array) const
163{
164 auto column_vector = ColumnVector<T>::create();
165 column_vector->getData().reserve(array.size());
166 for (T value : array)
167 column_vector->insertValue(value);
168 return column_vector;
169}
170
171
172template <typename DictionaryType, typename RangeType, typename Key>
173template <typename DictionarySpecialAttributeType, typename T>
174void RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::addSpecialColumn(
175 const std::optional<DictionarySpecialAttributeType> & attribute,
176 DataTypePtr type,
177 const std::string & default_name,
178 const std::unordered_set<std::string> & column_names_set,
179 const PaddedPODArray<T> & values,
180 ColumnsWithTypeAndName & columns) const
181{
182 std::string name = default_name;
183 if (attribute)
184 name = attribute->name;
185
186 if (column_names_set.find(name) != column_names_set.end())
187 columns.emplace_back(getColumnFromPODArray(values), type, name);
188}
189
190template <typename DictionaryType, typename RangeType, typename Key>
191PaddedPODArray<Int64> RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::makeDateKey(
192 const PaddedPODArray<RangeType> & block_start_dates, const PaddedPODArray<RangeType> & block_end_dates) const
193{
194 PaddedPODArray<Int64> key(block_start_dates.size());
195 for (size_t i = 0; i < key.size(); ++i)
196 {
197 if (RangeHashedDictionary::Range::isCorrectDate(block_start_dates[i]))
198 key[i] = block_start_dates[i];
199 else
200 key[i] = block_end_dates[i];
201 }
202
203 return key;
204}
205
206
207template <typename DictionaryType, typename RangeType, typename Key>
208Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::fillBlock(
209 const PaddedPODArray<Key> & ids_to_fill,
210 const PaddedPODArray<RangeType> & block_start_dates,
211 const PaddedPODArray<RangeType> & block_end_dates) const
212{
213 ColumnsWithTypeAndName columns;
214 const DictionaryStructure & structure = dictionary->getStructure();
215
216 std::unordered_set<std::string> names(column_names.begin(), column_names.end());
217
218 addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids_to_fill, columns);
219 addSpecialColumn(structure.range_min, structure.range_max->type, "Range Start", names, block_start_dates, columns);
220 addSpecialColumn(structure.range_max, structure.range_max->type, "Range End", names, block_end_dates, columns);
221
222 auto date_key = makeDateKey(block_start_dates, block_end_dates);
223
224 for (const auto idx : ext::range(0, structure.attributes.size()))
225 {
226 const DictionaryAttribute & attribute = structure.attributes[idx];
227 if (names.find(attribute.name) != names.end())
228 {
229 ColumnPtr column;
230#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
231 column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids_to_fill, date_key, attribute, *dictionary)
232 switch (attribute.underlying_type)
233 {
234 case AttributeUnderlyingType::utUInt8:
235 GET_COLUMN_FORM_ATTRIBUTE(UInt8);
236 break;
237 case AttributeUnderlyingType::utUInt16:
238 GET_COLUMN_FORM_ATTRIBUTE(UInt16);
239 break;
240 case AttributeUnderlyingType::utUInt32:
241 GET_COLUMN_FORM_ATTRIBUTE(UInt32);
242 break;
243 case AttributeUnderlyingType::utUInt64:
244 GET_COLUMN_FORM_ATTRIBUTE(UInt64);
245 break;
246 case AttributeUnderlyingType::utUInt128:
247 GET_COLUMN_FORM_ATTRIBUTE(UInt128);
248 break;
249 case AttributeUnderlyingType::utInt8:
250 GET_COLUMN_FORM_ATTRIBUTE(Int8);
251 break;
252 case AttributeUnderlyingType::utInt16:
253 GET_COLUMN_FORM_ATTRIBUTE(Int16);
254 break;
255 case AttributeUnderlyingType::utInt32:
256 GET_COLUMN_FORM_ATTRIBUTE(Int32);
257 break;
258 case AttributeUnderlyingType::utInt64:
259 GET_COLUMN_FORM_ATTRIBUTE(Int64);
260 break;
261 case AttributeUnderlyingType::utFloat32:
262 GET_COLUMN_FORM_ATTRIBUTE(Float32);
263 break;
264 case AttributeUnderlyingType::utFloat64:
265 GET_COLUMN_FORM_ATTRIBUTE(Float64);
266 break;
267 case AttributeUnderlyingType::utDecimal32:
268 GET_COLUMN_FORM_ATTRIBUTE(Decimal32);
269 break;
270 case AttributeUnderlyingType::utDecimal64:
271 GET_COLUMN_FORM_ATTRIBUTE(Decimal64);
272 break;
273 case AttributeUnderlyingType::utDecimal128:
274 GET_COLUMN_FORM_ATTRIBUTE(Decimal128);
275 break;
276 case AttributeUnderlyingType::utString:
277 column = getColumnFromAttributeString(ids_to_fill, date_key, attribute, *dictionary);
278 break;
279 }
280#undef GET_COLUMN_FORM_ATTRIBUTE
281 columns.emplace_back(column, attribute.type, attribute.name);
282 }
283 }
284 return Block(columns);
285}
286
287}
288