1 | #include "FlatDictionary.h" |
2 | #include <IO/WriteHelpers.h> |
3 | #include "DictionaryBlockInputStream.h" |
4 | #include "DictionaryFactory.h" |
5 | |
6 | namespace DB |
7 | { |
8 | namespace ErrorCodes |
9 | { |
10 | extern const int TYPE_MISMATCH; |
11 | extern const int ARGUMENT_OUT_OF_BOUND; |
12 | extern const int BAD_ARGUMENTS; |
13 | extern const int DICTIONARY_IS_EMPTY; |
14 | extern const int LOGICAL_ERROR; |
15 | extern const int UNKNOWN_TYPE; |
16 | extern const int UNSUPPORTED_METHOD; |
17 | } |
18 | |
19 | static const auto initial_array_size = 1024; |
20 | static const auto max_array_size = 500000; |
21 | |
22 | |
23 | FlatDictionary::FlatDictionary( |
24 | const std::string & database_, |
25 | const std::string & name_, |
26 | const DictionaryStructure & dict_struct_, |
27 | DictionarySourcePtr source_ptr_, |
28 | const DictionaryLifetime dict_lifetime_, |
29 | bool require_nonempty_, |
30 | BlockPtr saved_block_) |
31 | : database(database_) |
32 | , name(name_) |
33 | , full_name{database_.empty() ? name_ : (database_ + "." + name_)} |
34 | , dict_struct(dict_struct_) |
35 | , source_ptr{std::move(source_ptr_)} |
36 | , dict_lifetime(dict_lifetime_) |
37 | , require_nonempty(require_nonempty_) |
38 | , loaded_ids(initial_array_size, false) |
39 | , saved_block{std::move(saved_block_)} |
40 | { |
41 | createAttributes(); |
42 | loadData(); |
43 | calculateBytesAllocated(); |
44 | } |
45 | |
46 | |
47 | void FlatDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const |
48 | { |
49 | const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values); |
50 | |
51 | getItemsImpl<UInt64, UInt64>( |
52 | *hierarchical_attribute, |
53 | ids, |
54 | [&](const size_t row, const UInt64 value) { out[row] = value; }, |
55 | [&](const size_t) { return null_value; }); |
56 | } |
57 | |
58 | |
59 | /// Allow to use single value in same way as array. |
60 | static inline FlatDictionary::Key getAt(const PaddedPODArray<FlatDictionary::Key> & arr, const size_t idx) |
61 | { |
62 | return arr[idx]; |
63 | } |
64 | static inline FlatDictionary::Key getAt(const FlatDictionary::Key & value, const size_t) |
65 | { |
66 | return value; |
67 | } |
68 | |
69 | template <typename ChildType, typename AncestorType> |
70 | void FlatDictionary::isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const |
71 | { |
72 | const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values); |
73 | const auto & attr = std::get<ContainerType<Key>>(hierarchical_attribute->arrays); |
74 | const auto rows = out.size(); |
75 | |
76 | size_t loaded_size = attr.size(); |
77 | for (const auto row : ext::range(0, rows)) |
78 | { |
79 | auto id = getAt(child_ids, row); |
80 | const auto ancestor_id = getAt(ancestor_ids, row); |
81 | |
82 | while (id < loaded_size && id != null_value && id != ancestor_id) |
83 | id = attr[id]; |
84 | |
85 | out[row] = id != null_value && id == ancestor_id; |
86 | } |
87 | |
88 | query_count.fetch_add(rows, std::memory_order_relaxed); |
89 | } |
90 | |
91 | |
92 | void FlatDictionary::isInVectorVector( |
93 | const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const |
94 | { |
95 | isInImpl(child_ids, ancestor_ids, out); |
96 | } |
97 | |
98 | void FlatDictionary::isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const |
99 | { |
100 | isInImpl(child_ids, ancestor_id, out); |
101 | } |
102 | |
103 | void FlatDictionary::isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const |
104 | { |
105 | isInImpl(child_id, ancestor_ids, out); |
106 | } |
107 | |
108 | |
109 | #define DECLARE(TYPE) \ |
110 | void FlatDictionary::get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const \ |
111 | { \ |
112 | const auto & attribute = getAttribute(attribute_name); \ |
113 | checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \ |
114 | \ |
115 | const auto null_value = std::get<TYPE>(attribute.null_values); \ |
116 | \ |
117 | getItemsImpl<TYPE, TYPE>( \ |
118 | attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return null_value; }); \ |
119 | } |
120 | DECLARE(UInt8) |
121 | DECLARE(UInt16) |
122 | DECLARE(UInt32) |
123 | DECLARE(UInt64) |
124 | DECLARE(UInt128) |
125 | DECLARE(Int8) |
126 | DECLARE(Int16) |
127 | DECLARE(Int32) |
128 | DECLARE(Int64) |
129 | DECLARE(Float32) |
130 | DECLARE(Float64) |
131 | DECLARE(Decimal32) |
132 | DECLARE(Decimal64) |
133 | DECLARE(Decimal128) |
134 | #undef DECLARE |
135 | |
136 | void FlatDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const |
137 | { |
138 | const auto & attribute = getAttribute(attribute_name); |
139 | checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::utString); |
140 | |
141 | const auto & null_value = std::get<StringRef>(attribute.null_values); |
142 | |
143 | getItemsImpl<StringRef, StringRef>( |
144 | attribute, |
145 | ids, |
146 | [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, |
147 | [&](const size_t) { return null_value; }); |
148 | } |
149 | |
150 | #define DECLARE(TYPE) \ |
151 | void FlatDictionary::get##TYPE( \ |
152 | const std::string & attribute_name, \ |
153 | const PaddedPODArray<Key> & ids, \ |
154 | const PaddedPODArray<TYPE> & def, \ |
155 | ResultArrayType<TYPE> & out) const \ |
156 | { \ |
157 | const auto & attribute = getAttribute(attribute_name); \ |
158 | checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \ |
159 | \ |
160 | getItemsImpl<TYPE, TYPE>( \ |
161 | attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t row) { return def[row]; }); \ |
162 | } |
163 | DECLARE(UInt8) |
164 | DECLARE(UInt16) |
165 | DECLARE(UInt32) |
166 | DECLARE(UInt64) |
167 | DECLARE(UInt128) |
168 | DECLARE(Int8) |
169 | DECLARE(Int16) |
170 | DECLARE(Int32) |
171 | DECLARE(Int64) |
172 | DECLARE(Float32) |
173 | DECLARE(Float64) |
174 | DECLARE(Decimal32) |
175 | DECLARE(Decimal64) |
176 | DECLARE(Decimal128) |
177 | #undef DECLARE |
178 | |
179 | void FlatDictionary::getString( |
180 | const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const |
181 | { |
182 | const auto & attribute = getAttribute(attribute_name); |
183 | checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::utString); |
184 | |
185 | getItemsImpl<StringRef, StringRef>( |
186 | attribute, |
187 | ids, |
188 | [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, |
189 | [&](const size_t row) { return def->getDataAt(row); }); |
190 | } |
191 | |
192 | #define DECLARE(TYPE) \ |
193 | void FlatDictionary::get##TYPE( \ |
194 | const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const \ |
195 | { \ |
196 | const auto & attribute = getAttribute(attribute_name); \ |
197 | checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \ |
198 | \ |
199 | getItemsImpl<TYPE, TYPE>( \ |
200 | attribute, ids, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \ |
201 | } |
202 | DECLARE(UInt8) |
203 | DECLARE(UInt16) |
204 | DECLARE(UInt32) |
205 | DECLARE(UInt64) |
206 | DECLARE(UInt128) |
207 | DECLARE(Int8) |
208 | DECLARE(Int16) |
209 | DECLARE(Int32) |
210 | DECLARE(Int64) |
211 | DECLARE(Float32) |
212 | DECLARE(Float64) |
213 | DECLARE(Decimal32) |
214 | DECLARE(Decimal64) |
215 | DECLARE(Decimal128) |
216 | #undef DECLARE |
217 | |
218 | void FlatDictionary::getString( |
219 | const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const |
220 | { |
221 | const auto & attribute = getAttribute(attribute_name); |
222 | checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::utString); |
223 | |
224 | FlatDictionary::getItemsImpl<StringRef, StringRef>( |
225 | attribute, |
226 | ids, |
227 | [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, |
228 | [&](const size_t) { return StringRef{def}; }); |
229 | } |
230 | |
231 | |
232 | void FlatDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const |
233 | { |
234 | const auto & attribute = attributes.front(); |
235 | |
236 | switch (attribute.type) |
237 | { |
238 | case AttributeUnderlyingType::utUInt8: |
239 | has<UInt8>(attribute, ids, out); |
240 | break; |
241 | case AttributeUnderlyingType::utUInt16: |
242 | has<UInt16>(attribute, ids, out); |
243 | break; |
244 | case AttributeUnderlyingType::utUInt32: |
245 | has<UInt32>(attribute, ids, out); |
246 | break; |
247 | case AttributeUnderlyingType::utUInt64: |
248 | has<UInt64>(attribute, ids, out); |
249 | break; |
250 | case AttributeUnderlyingType::utUInt128: |
251 | has<UInt128>(attribute, ids, out); |
252 | break; |
253 | case AttributeUnderlyingType::utInt8: |
254 | has<Int8>(attribute, ids, out); |
255 | break; |
256 | case AttributeUnderlyingType::utInt16: |
257 | has<Int16>(attribute, ids, out); |
258 | break; |
259 | case AttributeUnderlyingType::utInt32: |
260 | has<Int32>(attribute, ids, out); |
261 | break; |
262 | case AttributeUnderlyingType::utInt64: |
263 | has<Int64>(attribute, ids, out); |
264 | break; |
265 | case AttributeUnderlyingType::utFloat32: |
266 | has<Float32>(attribute, ids, out); |
267 | break; |
268 | case AttributeUnderlyingType::utFloat64: |
269 | has<Float64>(attribute, ids, out); |
270 | break; |
271 | case AttributeUnderlyingType::utString: |
272 | has<String>(attribute, ids, out); |
273 | break; |
274 | |
275 | case AttributeUnderlyingType::utDecimal32: |
276 | has<Decimal32>(attribute, ids, out); |
277 | break; |
278 | case AttributeUnderlyingType::utDecimal64: |
279 | has<Decimal64>(attribute, ids, out); |
280 | break; |
281 | case AttributeUnderlyingType::utDecimal128: |
282 | has<Decimal128>(attribute, ids, out); |
283 | break; |
284 | } |
285 | } |
286 | |
287 | |
288 | void FlatDictionary::createAttributes() |
289 | { |
290 | const auto size = dict_struct.attributes.size(); |
291 | attributes.reserve(size); |
292 | |
293 | for (const auto & attribute : dict_struct.attributes) |
294 | { |
295 | attribute_index_by_name.emplace(attribute.name, attributes.size()); |
296 | attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value)); |
297 | |
298 | if (attribute.hierarchical) |
299 | { |
300 | hierarchical_attribute = &attributes.back(); |
301 | |
302 | if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64) |
303 | throw Exception{full_name + ": hierarchical attribute must be UInt64." , ErrorCodes::TYPE_MISMATCH}; |
304 | } |
305 | } |
306 | } |
307 | |
308 | void FlatDictionary::blockToAttributes(const Block & block) |
309 | { |
310 | const IColumn & id_column = *block.safeGetByPosition(0).column; |
311 | element_count += id_column.size(); |
312 | |
313 | for (const size_t attribute_idx : ext::range(0, attributes.size())) |
314 | { |
315 | const IColumn & attribute_column = *block.safeGetByPosition(attribute_idx + 1).column; |
316 | Attribute & attribute = attributes[attribute_idx]; |
317 | |
318 | for (const auto row_idx : ext::range(0, id_column.size())) |
319 | setAttributeValue(attribute, id_column[row_idx].get<UInt64>(), attribute_column[row_idx]); |
320 | } |
321 | } |
322 | |
323 | void FlatDictionary::updateData() |
324 | { |
325 | if (!saved_block || saved_block->rows() == 0) |
326 | { |
327 | auto stream = source_ptr->loadUpdatedAll(); |
328 | stream->readPrefix(); |
329 | |
330 | while (const auto block = stream->read()) |
331 | { |
332 | /// We are using this to keep saved data if input stream consists of multiple blocks |
333 | if (!saved_block) |
334 | saved_block = std::make_shared<DB::Block>(block.cloneEmpty()); |
335 | for (const auto attribute_idx : ext::range(0, attributes.size() + 1)) |
336 | { |
337 | const IColumn & update_column = *block.getByPosition(attribute_idx).column.get(); |
338 | MutableColumnPtr saved_column = saved_block->getByPosition(attribute_idx).column->assumeMutable(); |
339 | saved_column->insertRangeFrom(update_column, 0, update_column.size()); |
340 | } |
341 | } |
342 | stream->readSuffix(); |
343 | } |
344 | else |
345 | { |
346 | auto stream = source_ptr->loadUpdatedAll(); |
347 | stream->readPrefix(); |
348 | |
349 | while (Block block = stream->read()) |
350 | { |
351 | const auto & saved_id_column = *saved_block->safeGetByPosition(0).column; |
352 | const auto & update_id_column = *block.safeGetByPosition(0).column; |
353 | |
354 | std::unordered_map<Key, std::vector<size_t>> update_ids; |
355 | for (size_t row = 0; row < update_id_column.size(); ++row) |
356 | { |
357 | const auto id = update_id_column.get64(row); |
358 | update_ids[id].push_back(row); |
359 | } |
360 | |
361 | const size_t saved_rows = saved_id_column.size(); |
362 | IColumn::Filter filter(saved_rows); |
363 | std::unordered_map<Key, std::vector<size_t>>::iterator it; |
364 | |
365 | for (size_t row = 0; row < saved_id_column.size(); ++row) |
366 | { |
367 | auto id = saved_id_column.get64(row); |
368 | it = update_ids.find(id); |
369 | |
370 | if (it != update_ids.end()) |
371 | filter[row] = 0; |
372 | else |
373 | filter[row] = 1; |
374 | } |
375 | |
376 | auto block_columns = block.mutateColumns(); |
377 | for (const auto attribute_idx : ext::range(0, attributes.size() + 1)) |
378 | { |
379 | auto & column = saved_block->safeGetByPosition(attribute_idx).column; |
380 | const auto & filtered_column = column->filter(filter, -1); |
381 | |
382 | block_columns[attribute_idx]->insertRangeFrom(*filtered_column.get(), 0, filtered_column->size()); |
383 | } |
384 | |
385 | saved_block->setColumns(std::move(block_columns)); |
386 | } |
387 | stream->readSuffix(); |
388 | } |
389 | |
390 | if (saved_block) |
391 | blockToAttributes(*saved_block.get()); |
392 | } |
393 | |
394 | void FlatDictionary::loadData() |
395 | { |
396 | if (!source_ptr->hasUpdateField()) |
397 | { |
398 | auto stream = source_ptr->loadAll(); |
399 | stream->readPrefix(); |
400 | |
401 | while (const auto block = stream->read()) |
402 | blockToAttributes(block); |
403 | |
404 | stream->readSuffix(); |
405 | } |
406 | else |
407 | updateData(); |
408 | |
409 | if (require_nonempty && 0 == element_count) |
410 | throw Exception{full_name + ": dictionary source is empty and 'require_nonempty' property is set." , ErrorCodes::DICTIONARY_IS_EMPTY}; |
411 | } |
412 | |
413 | |
414 | template <typename T> |
415 | void FlatDictionary::addAttributeSize(const Attribute & attribute) |
416 | { |
417 | const auto & array_ref = std::get<ContainerType<T>>(attribute.arrays); |
418 | bytes_allocated += sizeof(PaddedPODArray<T>) + array_ref.allocated_bytes(); |
419 | bucket_count = array_ref.capacity(); |
420 | } |
421 | |
422 | |
423 | void FlatDictionary::calculateBytesAllocated() |
424 | { |
425 | bytes_allocated += attributes.size() * sizeof(attributes.front()); |
426 | |
427 | for (const auto & attribute : attributes) |
428 | { |
429 | switch (attribute.type) |
430 | { |
431 | case AttributeUnderlyingType::utUInt8: |
432 | addAttributeSize<UInt8>(attribute); |
433 | break; |
434 | case AttributeUnderlyingType::utUInt16: |
435 | addAttributeSize<UInt16>(attribute); |
436 | break; |
437 | case AttributeUnderlyingType::utUInt32: |
438 | addAttributeSize<UInt32>(attribute); |
439 | break; |
440 | case AttributeUnderlyingType::utUInt64: |
441 | addAttributeSize<UInt64>(attribute); |
442 | break; |
443 | case AttributeUnderlyingType::utUInt128: |
444 | addAttributeSize<UInt128>(attribute); |
445 | break; |
446 | case AttributeUnderlyingType::utInt8: |
447 | addAttributeSize<Int8>(attribute); |
448 | break; |
449 | case AttributeUnderlyingType::utInt16: |
450 | addAttributeSize<Int16>(attribute); |
451 | break; |
452 | case AttributeUnderlyingType::utInt32: |
453 | addAttributeSize<Int32>(attribute); |
454 | break; |
455 | case AttributeUnderlyingType::utInt64: |
456 | addAttributeSize<Int64>(attribute); |
457 | break; |
458 | case AttributeUnderlyingType::utFloat32: |
459 | addAttributeSize<Float32>(attribute); |
460 | break; |
461 | case AttributeUnderlyingType::utFloat64: |
462 | addAttributeSize<Float64>(attribute); |
463 | break; |
464 | |
465 | case AttributeUnderlyingType::utDecimal32: |
466 | addAttributeSize<Decimal32>(attribute); |
467 | break; |
468 | case AttributeUnderlyingType::utDecimal64: |
469 | addAttributeSize<Decimal64>(attribute); |
470 | break; |
471 | case AttributeUnderlyingType::utDecimal128: |
472 | addAttributeSize<Decimal128>(attribute); |
473 | break; |
474 | |
475 | case AttributeUnderlyingType::utString: |
476 | { |
477 | addAttributeSize<StringRef>(attribute); |
478 | bytes_allocated += sizeof(Arena) + attribute.string_arena->size(); |
479 | |
480 | break; |
481 | } |
482 | } |
483 | } |
484 | } |
485 | |
486 | |
487 | template <typename T> |
488 | void FlatDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value) |
489 | { |
490 | attribute.null_values = T(null_value.get<NearestFieldType<T>>()); |
491 | const auto & null_value_ref = std::get<T>(attribute.null_values); |
492 | attribute.arrays.emplace<ContainerType<T>>(initial_array_size, null_value_ref); |
493 | } |
494 | |
495 | template <> |
496 | void FlatDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value) |
497 | { |
498 | attribute.string_arena = std::make_unique<Arena>(); |
499 | const String & string = null_value.get<String>(); |
500 | const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size()); |
501 | attribute.null_values.emplace<StringRef>(string_in_arena, string.size()); |
502 | attribute.arrays.emplace<ContainerType<StringRef>>(initial_array_size, StringRef(string_in_arena, string.size())); |
503 | } |
504 | |
505 | |
506 | FlatDictionary::Attribute FlatDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) |
507 | { |
508 | Attribute attr{type, {}, {}, {}}; |
509 | |
510 | switch (type) |
511 | { |
512 | case AttributeUnderlyingType::utUInt8: |
513 | createAttributeImpl<UInt8>(attr, null_value); |
514 | break; |
515 | case AttributeUnderlyingType::utUInt16: |
516 | createAttributeImpl<UInt16>(attr, null_value); |
517 | break; |
518 | case AttributeUnderlyingType::utUInt32: |
519 | createAttributeImpl<UInt32>(attr, null_value); |
520 | break; |
521 | case AttributeUnderlyingType::utUInt64: |
522 | createAttributeImpl<UInt64>(attr, null_value); |
523 | break; |
524 | case AttributeUnderlyingType::utUInt128: |
525 | createAttributeImpl<UInt128>(attr, null_value); |
526 | break; |
527 | case AttributeUnderlyingType::utInt8: |
528 | createAttributeImpl<Int8>(attr, null_value); |
529 | break; |
530 | case AttributeUnderlyingType::utInt16: |
531 | createAttributeImpl<Int16>(attr, null_value); |
532 | break; |
533 | case AttributeUnderlyingType::utInt32: |
534 | createAttributeImpl<Int32>(attr, null_value); |
535 | break; |
536 | case AttributeUnderlyingType::utInt64: |
537 | createAttributeImpl<Int64>(attr, null_value); |
538 | break; |
539 | case AttributeUnderlyingType::utFloat32: |
540 | createAttributeImpl<Float32>(attr, null_value); |
541 | break; |
542 | case AttributeUnderlyingType::utFloat64: |
543 | createAttributeImpl<Float64>(attr, null_value); |
544 | break; |
545 | case AttributeUnderlyingType::utString: |
546 | createAttributeImpl<String>(attr, null_value); |
547 | break; |
548 | |
549 | case AttributeUnderlyingType::utDecimal32: |
550 | createAttributeImpl<Decimal32>(attr, null_value); |
551 | break; |
552 | case AttributeUnderlyingType::utDecimal64: |
553 | createAttributeImpl<Decimal64>(attr, null_value); |
554 | break; |
555 | case AttributeUnderlyingType::utDecimal128: |
556 | createAttributeImpl<Decimal128>(attr, null_value); |
557 | break; |
558 | } |
559 | |
560 | return attr; |
561 | } |
562 | |
563 | |
564 | template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter> |
565 | void FlatDictionary::getItemsImpl( |
566 | const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const |
567 | { |
568 | const auto & attr = std::get<ContainerType<AttributeType>>(attribute.arrays); |
569 | const auto rows = ext::size(ids); |
570 | |
571 | for (const auto row : ext::range(0, rows)) |
572 | { |
573 | const auto id = ids[row]; |
574 | set_value(row, id < ext::size(attr) && loaded_ids[id] ? static_cast<OutputType>(attr[id]) : get_default(row)); |
575 | } |
576 | |
577 | query_count.fetch_add(rows, std::memory_order_relaxed); |
578 | } |
579 | |
580 | template <typename T> |
581 | void FlatDictionary::resize(Attribute & attribute, const Key id) |
582 | { |
583 | if (id >= max_array_size) |
584 | throw Exception{full_name + ": identifier should be less than " + toString(max_array_size), ErrorCodes::ARGUMENT_OUT_OF_BOUND}; |
585 | |
586 | auto & array = std::get<ContainerType<T>>(attribute.arrays); |
587 | if (id >= array.size()) |
588 | { |
589 | const size_t elements_count = id + 1; //id=0 -> elements_count=1 |
590 | loaded_ids.resize(elements_count, false); |
591 | array.resize_fill(elements_count, std::get<T>(attribute.null_values)); |
592 | } |
593 | } |
594 | |
595 | template <typename T> |
596 | void FlatDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const T & value) |
597 | { |
598 | resize<T>(attribute, id); |
599 | auto & array = std::get<ContainerType<T>>(attribute.arrays); |
600 | array[id] = value; |
601 | loaded_ids[id] = true; |
602 | } |
603 | |
604 | template <> |
605 | void FlatDictionary::setAttributeValueImpl<String>(Attribute & attribute, const Key id, const String & string) |
606 | { |
607 | resize<StringRef>(attribute, id); |
608 | const auto string_in_arena = attribute.string_arena->insert(string.data(), string.size()); |
609 | auto & array = std::get<ContainerType<StringRef>>(attribute.arrays); |
610 | array[id] = StringRef{string_in_arena, string.size()}; |
611 | loaded_ids[id] = true; |
612 | } |
613 | |
614 | void FlatDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value) |
615 | { |
616 | switch (attribute.type) |
617 | { |
618 | case AttributeUnderlyingType::utUInt8: |
619 | setAttributeValueImpl<UInt8>(attribute, id, value.get<UInt64>()); |
620 | break; |
621 | case AttributeUnderlyingType::utUInt16: |
622 | setAttributeValueImpl<UInt16>(attribute, id, value.get<UInt64>()); |
623 | break; |
624 | case AttributeUnderlyingType::utUInt32: |
625 | setAttributeValueImpl<UInt32>(attribute, id, value.get<UInt64>()); |
626 | break; |
627 | case AttributeUnderlyingType::utUInt64: |
628 | setAttributeValueImpl<UInt64>(attribute, id, value.get<UInt64>()); |
629 | break; |
630 | case AttributeUnderlyingType::utUInt128: |
631 | setAttributeValueImpl<UInt128>(attribute, id, value.get<UInt128>()); |
632 | break; |
633 | case AttributeUnderlyingType::utInt8: |
634 | setAttributeValueImpl<Int8>(attribute, id, value.get<Int64>()); |
635 | break; |
636 | case AttributeUnderlyingType::utInt16: |
637 | setAttributeValueImpl<Int16>(attribute, id, value.get<Int64>()); |
638 | break; |
639 | case AttributeUnderlyingType::utInt32: |
640 | setAttributeValueImpl<Int32>(attribute, id, value.get<Int64>()); |
641 | break; |
642 | case AttributeUnderlyingType::utInt64: |
643 | setAttributeValueImpl<Int64>(attribute, id, value.get<Int64>()); |
644 | break; |
645 | case AttributeUnderlyingType::utFloat32: |
646 | setAttributeValueImpl<Float32>(attribute, id, value.get<Float64>()); |
647 | break; |
648 | case AttributeUnderlyingType::utFloat64: |
649 | setAttributeValueImpl<Float64>(attribute, id, value.get<Float64>()); |
650 | break; |
651 | case AttributeUnderlyingType::utString: |
652 | setAttributeValueImpl<String>(attribute, id, value.get<String>()); |
653 | break; |
654 | |
655 | case AttributeUnderlyingType::utDecimal32: |
656 | setAttributeValueImpl<Decimal32>(attribute, id, value.get<Decimal32>()); |
657 | break; |
658 | case AttributeUnderlyingType::utDecimal64: |
659 | setAttributeValueImpl<Decimal64>(attribute, id, value.get<Decimal64>()); |
660 | break; |
661 | case AttributeUnderlyingType::utDecimal128: |
662 | setAttributeValueImpl<Decimal128>(attribute, id, value.get<Decimal128>()); |
663 | break; |
664 | } |
665 | } |
666 | |
667 | |
668 | const FlatDictionary::Attribute & FlatDictionary::getAttribute(const std::string & attribute_name) const |
669 | { |
670 | const auto it = attribute_index_by_name.find(attribute_name); |
671 | if (it == std::end(attribute_index_by_name)) |
672 | throw Exception{full_name + ": no such attribute '" + attribute_name + "'" , ErrorCodes::BAD_ARGUMENTS}; |
673 | |
674 | return attributes[it->second]; |
675 | } |
676 | |
677 | |
678 | template <typename T> |
679 | void FlatDictionary::has(const Attribute &, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const |
680 | { |
681 | const auto ids_count = ext::size(ids); |
682 | |
683 | for (const auto i : ext::range(0, ids_count)) |
684 | { |
685 | const auto id = ids[i]; |
686 | out[i] = id < loaded_ids.size() && loaded_ids[id]; |
687 | } |
688 | |
689 | query_count.fetch_add(ids_count, std::memory_order_relaxed); |
690 | } |
691 | |
692 | |
693 | PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const |
694 | { |
695 | const auto ids_count = ext::size(loaded_ids); |
696 | |
697 | PaddedPODArray<Key> ids; |
698 | for (auto idx : ext::range(0, ids_count)) |
699 | if (loaded_ids[idx]) |
700 | ids.push_back(idx); |
701 | return ids; |
702 | } |
703 | |
704 | BlockInputStreamPtr FlatDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const |
705 | { |
706 | using BlockInputStreamType = DictionaryBlockInputStream<FlatDictionary, Key>; |
707 | return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getIds(), column_names); |
708 | } |
709 | |
710 | void registerDictionaryFlat(DictionaryFactory & factory) |
711 | { |
712 | auto create_layout = [=](const std::string & full_name, |
713 | const DictionaryStructure & dict_struct, |
714 | const Poco::Util::AbstractConfiguration & config, |
715 | const std::string & config_prefix, |
716 | DictionarySourcePtr source_ptr) -> DictionaryPtr |
717 | { |
718 | if (dict_struct.key) |
719 | throw Exception{"'key' is not supported for dictionary of layout 'flat'" , ErrorCodes::UNSUPPORTED_METHOD}; |
720 | |
721 | if (dict_struct.range_min || dict_struct.range_max) |
722 | throw Exception{full_name |
723 | + ": elements .structure.range_min and .structure.range_max should be defined only " |
724 | "for a dictionary of layout 'range_hashed'" , |
725 | ErrorCodes::BAD_ARGUMENTS}; |
726 | |
727 | const String database = config.getString(config_prefix + ".database" , "" ); |
728 | const String name = config.getString(config_prefix + ".name" ); |
729 | const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime" }; |
730 | const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty" , false); |
731 | return std::make_unique<FlatDictionary>(database, name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); |
732 | }; |
733 | factory.registerLayout("flat" , create_layout, false); |
734 | } |
735 | |
736 | |
737 | } |
738 | |