1#include "TrieDictionary.h"
2#include <stack>
3#include <Columns/ColumnFixedString.h>
4#include <Columns/ColumnVector.h>
5#include <Common/assert_cast.h>
6#include <DataTypes/DataTypeFixedString.h>
7#include <DataTypes/DataTypeString.h>
8#include <IO/WriteIntText.h>
9#include <Poco/ByteOrder.h>
10#include <Poco/Net/IPAddress.h>
11#include <Common/formatIPv6.h>
12#include <common/itoa.h>
13#include <ext/map.h>
14#include <ext/range.h>
15#include "DictionaryBlockInputStream.h"
16#include "DictionaryFactory.h"
17
18#ifdef __clang__
19 #pragma clang diagnostic ignored "-Wold-style-cast"
20 #pragma clang diagnostic ignored "-Wnewline-eof"
21#endif
22
23#include <btrie.h>
24
25
26namespace DB
27{
28namespace ErrorCodes
29{
30 extern const int TYPE_MISMATCH;
31 extern const int ARGUMENT_OUT_OF_BOUND;
32 extern const int BAD_ARGUMENTS;
33 extern const int DICTIONARY_IS_EMPTY;
34 extern const int NOT_IMPLEMENTED;
35}
36
37TrieDictionary::TrieDictionary(
38 const std::string & database_,
39 const std::string & name_,
40 const DictionaryStructure & dict_struct_,
41 DictionarySourcePtr source_ptr_,
42 const DictionaryLifetime dict_lifetime_,
43 bool require_nonempty_)
44 : database(database_)
45 , name(name_)
46 , full_name{database_.empty() ? name_ : (database_ + "." + name_)}
47 , dict_struct(dict_struct_)
48 , source_ptr{std::move(source_ptr_)}
49 , dict_lifetime(dict_lifetime_)
50 , require_nonempty(require_nonempty_)
51 , logger(&Poco::Logger::get("TrieDictionary"))
52{
53 createAttributes();
54 trie = btrie_create();
55
56 try
57 {
58 loadData();
59 calculateBytesAllocated();
60 }
61 catch (...)
62 {
63 creation_exception = std::current_exception();
64 }
65
66 creation_time = std::chrono::system_clock::now();
67}
68
69TrieDictionary::~TrieDictionary()
70{
71 btrie_destroy(trie);
72}
73
74#define DECLARE(TYPE) \
75 void TrieDictionary::get##TYPE( \
76 const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const \
77 { \
78 validateKeyTypes(key_types); \
79\
80 const auto & attribute = getAttribute(attribute_name); \
81 checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
82\
83 const auto null_value = std::get<TYPE>(attribute.null_values); \
84\
85 getItemsImpl<TYPE, TYPE>( \
86 attribute, \
87 key_columns, \
88 [&](const size_t row, const auto value) { out[row] = value; }, \
89 [&](const size_t) { return null_value; }); \
90 }
91DECLARE(UInt8)
92DECLARE(UInt16)
93DECLARE(UInt32)
94DECLARE(UInt64)
95DECLARE(UInt128)
96DECLARE(Int8)
97DECLARE(Int16)
98DECLARE(Int32)
99DECLARE(Int64)
100DECLARE(Float32)
101DECLARE(Float64)
102DECLARE(Decimal32)
103DECLARE(Decimal64)
104DECLARE(Decimal128)
105#undef DECLARE
106
107void TrieDictionary::getString(
108 const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
109{
110 validateKeyTypes(key_types);
111
112 const auto & attribute = getAttribute(attribute_name);
113 checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
114
115 const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
116
117 getItemsImpl<StringRef, StringRef>(
118 attribute,
119 key_columns,
120 [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
121 [&](const size_t) { return null_value; });
122}
123
124#define DECLARE(TYPE) \
125 void TrieDictionary::get##TYPE( \
126 const std::string & attribute_name, \
127 const Columns & key_columns, \
128 const DataTypes & key_types, \
129 const PaddedPODArray<TYPE> & def, \
130 ResultArrayType<TYPE> & out) const \
131 { \
132 validateKeyTypes(key_types); \
133\
134 const auto & attribute = getAttribute(attribute_name); \
135 checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
136\
137 getItemsImpl<TYPE, TYPE>( \
138 attribute, \
139 key_columns, \
140 [&](const size_t row, const auto value) { out[row] = value; }, \
141 [&](const size_t row) { return def[row]; }); \
142 }
143DECLARE(UInt8)
144DECLARE(UInt16)
145DECLARE(UInt32)
146DECLARE(UInt64)
147DECLARE(UInt128)
148DECLARE(Int8)
149DECLARE(Int16)
150DECLARE(Int32)
151DECLARE(Int64)
152DECLARE(Float32)
153DECLARE(Float64)
154DECLARE(Decimal32)
155DECLARE(Decimal64)
156DECLARE(Decimal128)
157#undef DECLARE
158
159void TrieDictionary::getString(
160 const std::string & attribute_name,
161 const Columns & key_columns,
162 const DataTypes & key_types,
163 const ColumnString * const def,
164 ColumnString * const out) const
165{
166 validateKeyTypes(key_types);
167
168 const auto & attribute = getAttribute(attribute_name);
169 checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
170
171 getItemsImpl<StringRef, StringRef>(
172 attribute,
173 key_columns,
174 [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
175 [&](const size_t row) { return def->getDataAt(row); });
176}
177
178#define DECLARE(TYPE) \
179 void TrieDictionary::get##TYPE( \
180 const std::string & attribute_name, \
181 const Columns & key_columns, \
182 const DataTypes & key_types, \
183 const TYPE def, \
184 ResultArrayType<TYPE> & out) const \
185 { \
186 validateKeyTypes(key_types); \
187\
188 const auto & attribute = getAttribute(attribute_name); \
189 checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::ut##TYPE); \
190\
191 getItemsImpl<TYPE, TYPE>( \
192 attribute, key_columns, [&](const size_t row, const auto value) { out[row] = value; }, [&](const size_t) { return def; }); \
193 }
194DECLARE(UInt8)
195DECLARE(UInt16)
196DECLARE(UInt32)
197DECLARE(UInt64)
198DECLARE(UInt128)
199DECLARE(Int8)
200DECLARE(Int16)
201DECLARE(Int32)
202DECLARE(Int64)
203DECLARE(Float32)
204DECLARE(Float64)
205DECLARE(Decimal32)
206DECLARE(Decimal64)
207DECLARE(Decimal128)
208#undef DECLARE
209
210void TrieDictionary::getString(
211 const std::string & attribute_name,
212 const Columns & key_columns,
213 const DataTypes & key_types,
214 const String & def,
215 ColumnString * const out) const
216{
217 validateKeyTypes(key_types);
218
219 const auto & attribute = getAttribute(attribute_name);
220 checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
221
222 getItemsImpl<StringRef, StringRef>(
223 attribute,
224 key_columns,
225 [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
226 [&](const size_t) { return StringRef{def}; });
227}
228
229void TrieDictionary::has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
230{
231 validateKeyTypes(key_types);
232
233 const auto & attribute = attributes.front();
234
235 switch (attribute.type)
236 {
237 case AttributeUnderlyingType::utUInt8:
238 has<UInt8>(attribute, key_columns, out);
239 break;
240 case AttributeUnderlyingType::utUInt16:
241 has<UInt16>(attribute, key_columns, out);
242 break;
243 case AttributeUnderlyingType::utUInt32:
244 has<UInt32>(attribute, key_columns, out);
245 break;
246 case AttributeUnderlyingType::utUInt64:
247 has<UInt64>(attribute, key_columns, out);
248 break;
249 case AttributeUnderlyingType::utUInt128:
250 has<UInt128>(attribute, key_columns, out);
251 break;
252 case AttributeUnderlyingType::utInt8:
253 has<Int8>(attribute, key_columns, out);
254 break;
255 case AttributeUnderlyingType::utInt16:
256 has<Int16>(attribute, key_columns, out);
257 break;
258 case AttributeUnderlyingType::utInt32:
259 has<Int32>(attribute, key_columns, out);
260 break;
261 case AttributeUnderlyingType::utInt64:
262 has<Int64>(attribute, key_columns, out);
263 break;
264 case AttributeUnderlyingType::utFloat32:
265 has<Float32>(attribute, key_columns, out);
266 break;
267 case AttributeUnderlyingType::utFloat64:
268 has<Float64>(attribute, key_columns, out);
269 break;
270 case AttributeUnderlyingType::utString:
271 has<StringRef>(attribute, key_columns, out);
272 break;
273
274 case AttributeUnderlyingType::utDecimal32:
275 has<Decimal32>(attribute, key_columns, out);
276 break;
277 case AttributeUnderlyingType::utDecimal64:
278 has<Decimal64>(attribute, key_columns, out);
279 break;
280 case AttributeUnderlyingType::utDecimal128:
281 has<Decimal128>(attribute, key_columns, out);
282 break;
283 }
284}
285
286void TrieDictionary::createAttributes()
287{
288 const auto size = dict_struct.attributes.size();
289 attributes.reserve(size);
290
291 for (const auto & attribute : dict_struct.attributes)
292 {
293 attribute_index_by_name.emplace(attribute.name, attributes.size());
294 attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
295
296 if (attribute.hierarchical)
297 throw Exception{full_name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
298 ErrorCodes::TYPE_MISMATCH};
299 }
300}
301
302void TrieDictionary::loadData()
303{
304 auto stream = source_ptr->loadAll();
305 stream->readPrefix();
306
307 /// created upfront to avoid excess allocations
308 const auto keys_size = dict_struct.key->size();
309 StringRefs keys(keys_size);
310
311 const auto attributes_size = attributes.size();
312
313 while (const auto block = stream->read())
314 {
315 const auto rows = block.rows();
316 element_count += rows;
317
318 const auto key_column_ptrs = ext::map<Columns>(
319 ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
320
321 const auto attribute_column_ptrs = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
322 {
323 return block.safeGetByPosition(keys_size + attribute_idx).column;
324 });
325
326 for (const auto row_idx : ext::range(0, rows))
327 {
328 /// calculate key once per row
329 const auto key_column = key_column_ptrs.front();
330
331 for (const auto attribute_idx : ext::range(0, attributes_size))
332 {
333 const auto & attribute_column = *attribute_column_ptrs[attribute_idx];
334 auto & attribute = attributes[attribute_idx];
335 setAttributeValue(attribute, key_column->getDataAt(row_idx), attribute_column[row_idx]);
336 }
337 }
338 }
339
340 stream->readSuffix();
341
342 if (require_nonempty && 0 == element_count)
343 throw Exception{full_name + ": dictionary source is empty and 'require_nonempty' property is set.", ErrorCodes::DICTIONARY_IS_EMPTY};
344}
345
346template <typename T>
347void TrieDictionary::addAttributeSize(const Attribute & attribute)
348{
349 const auto & vec = std::get<ContainerType<T>>(attribute.maps);
350 bytes_allocated += sizeof(ContainerType<T>) + (vec.capacity() * sizeof(T));
351 bucket_count = vec.size();
352}
353
354void TrieDictionary::calculateBytesAllocated()
355{
356 bytes_allocated += attributes.size() * sizeof(attributes.front());
357
358 for (const auto & attribute : attributes)
359 {
360 switch (attribute.type)
361 {
362 case AttributeUnderlyingType::utUInt8:
363 addAttributeSize<UInt8>(attribute);
364 break;
365 case AttributeUnderlyingType::utUInt16:
366 addAttributeSize<UInt16>(attribute);
367 break;
368 case AttributeUnderlyingType::utUInt32:
369 addAttributeSize<UInt32>(attribute);
370 break;
371 case AttributeUnderlyingType::utUInt64:
372 addAttributeSize<UInt64>(attribute);
373 break;
374 case AttributeUnderlyingType::utUInt128:
375 addAttributeSize<UInt128>(attribute);
376 break;
377 case AttributeUnderlyingType::utInt8:
378 addAttributeSize<Int8>(attribute);
379 break;
380 case AttributeUnderlyingType::utInt16:
381 addAttributeSize<Int16>(attribute);
382 break;
383 case AttributeUnderlyingType::utInt32:
384 addAttributeSize<Int32>(attribute);
385 break;
386 case AttributeUnderlyingType::utInt64:
387 addAttributeSize<Int64>(attribute);
388 break;
389 case AttributeUnderlyingType::utFloat32:
390 addAttributeSize<Float32>(attribute);
391 break;
392 case AttributeUnderlyingType::utFloat64:
393 addAttributeSize<Float64>(attribute);
394 break;
395
396 case AttributeUnderlyingType::utDecimal32:
397 addAttributeSize<Decimal32>(attribute);
398 break;
399 case AttributeUnderlyingType::utDecimal64:
400 addAttributeSize<Decimal64>(attribute);
401 break;
402 case AttributeUnderlyingType::utDecimal128:
403 addAttributeSize<Decimal128>(attribute);
404 break;
405
406 case AttributeUnderlyingType::utString:
407 {
408 addAttributeSize<StringRef>(attribute);
409 bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
410
411 break;
412 }
413 }
414 }
415
416 bytes_allocated += btrie_allocated(trie);
417}
418
419void TrieDictionary::validateKeyTypes(const DataTypes & key_types) const
420{
421 if (key_types.size() != 1)
422 throw Exception{"Expected a single IP address", ErrorCodes::TYPE_MISMATCH};
423
424 const auto & actual_type = key_types[0]->getName();
425
426 if (actual_type != "UInt32" && actual_type != "FixedString(16)")
427 throw Exception{"Key does not match, expected either UInt32 or FixedString(16)", ErrorCodes::TYPE_MISMATCH};
428}
429
430
431template <typename T>
432void TrieDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
433{
434 attribute.null_values = T(null_value.get<NearestFieldType<T>>());
435 attribute.maps.emplace<ContainerType<T>>();
436}
437
438TrieDictionary::Attribute TrieDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
439{
440 Attribute attr{type, {}, {}, {}};
441
442 switch (type)
443 {
444 case AttributeUnderlyingType::utUInt8:
445 createAttributeImpl<UInt8>(attr, null_value);
446 break;
447 case AttributeUnderlyingType::utUInt16:
448 createAttributeImpl<UInt16>(attr, null_value);
449 break;
450 case AttributeUnderlyingType::utUInt32:
451 createAttributeImpl<UInt32>(attr, null_value);
452 break;
453 case AttributeUnderlyingType::utUInt64:
454 createAttributeImpl<UInt64>(attr, null_value);
455 break;
456 case AttributeUnderlyingType::utUInt128:
457 createAttributeImpl<UInt128>(attr, null_value);
458 break;
459 case AttributeUnderlyingType::utInt8:
460 createAttributeImpl<Int8>(attr, null_value);
461 break;
462 case AttributeUnderlyingType::utInt16:
463 createAttributeImpl<Int16>(attr, null_value);
464 break;
465 case AttributeUnderlyingType::utInt32:
466 createAttributeImpl<Int32>(attr, null_value);
467 break;
468 case AttributeUnderlyingType::utInt64:
469 createAttributeImpl<Int64>(attr, null_value);
470 break;
471 case AttributeUnderlyingType::utFloat32:
472 createAttributeImpl<Float32>(attr, null_value);
473 break;
474 case AttributeUnderlyingType::utFloat64:
475 createAttributeImpl<Float64>(attr, null_value);
476 break;
477
478 case AttributeUnderlyingType::utDecimal32:
479 createAttributeImpl<Decimal32>(attr, null_value);
480 break;
481 case AttributeUnderlyingType::utDecimal64:
482 createAttributeImpl<Decimal64>(attr, null_value);
483 break;
484 case AttributeUnderlyingType::utDecimal128:
485 createAttributeImpl<Decimal128>(attr, null_value);
486 break;
487
488 case AttributeUnderlyingType::utString:
489 {
490 attr.null_values = null_value.get<String>();
491 attr.maps.emplace<ContainerType<StringRef>>();
492 attr.string_arena = std::make_unique<Arena>();
493 break;
494 }
495 }
496
497 return attr;
498}
499
500
501template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
502void TrieDictionary::getItemsImpl(
503 const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const
504{
505 auto & vec = std::get<ContainerType<AttributeType>>(attribute.maps);
506
507 const auto first_column = key_columns.front();
508 const auto rows = first_column->size();
509 if (first_column->isNumeric())
510 {
511 for (const auto i : ext::range(0, rows))
512 {
513 auto addr = Int32(first_column->get64(i));
514 uintptr_t slot = btrie_find(trie, addr);
515#pragma GCC diagnostic push
516#pragma GCC diagnostic warning "-Wold-style-cast"
517 set_value(i, slot != BTRIE_NULL ? static_cast<OutputType>(vec[slot]) : get_default(i));
518#pragma GCC diagnostic pop
519 }
520 }
521 else
522 {
523 for (const auto i : ext::range(0, rows))
524 {
525 auto addr = first_column->getDataAt(i);
526 if (addr.size != 16)
527 throw Exception("Expected key to be FixedString(16)", ErrorCodes::LOGICAL_ERROR);
528
529 uintptr_t slot = btrie_find_a6(trie, reinterpret_cast<const UInt8 *>(addr.data));
530#pragma GCC diagnostic push
531#pragma GCC diagnostic warning "-Wold-style-cast"
532 set_value(i, slot != BTRIE_NULL ? static_cast<OutputType>(vec[slot]) : get_default(i));
533#pragma GCC diagnostic pop
534 }
535 }
536
537 query_count.fetch_add(rows, std::memory_order_relaxed);
538}
539
540
541template <typename T>
542bool TrieDictionary::setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value)
543{
544 // Insert value into appropriate vector type
545 auto & vec = std::get<ContainerType<T>>(attribute.maps);
546 size_t row = vec.size();
547 vec.push_back(value);
548
549 // Parse IP address and subnet length from string (e.g. 2a02:6b8::3/64)
550 Poco::Net::IPAddress addr, mask;
551 std::string addr_str(key.toString());
552 size_t pos = addr_str.find('/');
553 if (pos != std::string::npos)
554 {
555 addr = Poco::Net::IPAddress(addr_str.substr(0, pos));
556 mask = Poco::Net::IPAddress(std::stoi(addr_str.substr(pos + 1), nullptr, 10), addr.family());
557 }
558 else
559 {
560 addr = Poco::Net::IPAddress(addr_str);
561 mask = Poco::Net::IPAddress(addr.length() * 8, addr.family());
562 }
563
564 /*
565 * Here we might overwrite the same key with the same slot as each key can map to multiple attributes.
566 * However, all columns have equal number of rows so it is okay to store only row number for each key
567 * instead of building a trie for each column. This comes at the cost of additional lookup in attribute
568 * vector on lookup time to return cell from row + column. The reason for this is to save space,
569 * and build only single trie instead of trie for each column.
570 */
571 if (addr.family() == Poco::Net::IPAddress::IPv4)
572 {
573 UInt32 addr_v4 = Poco::ByteOrder::toNetwork(*reinterpret_cast<const UInt32 *>(addr.addr()));
574 UInt32 mask_v4 = Poco::ByteOrder::toNetwork(*reinterpret_cast<const UInt32 *>(mask.addr()));
575 return btrie_insert(trie, addr_v4, mask_v4, row) == 0;
576 }
577
578 const uint8_t * addr_v6 = reinterpret_cast<const uint8_t *>(addr.addr());
579 const uint8_t * mask_v6 = reinterpret_cast<const uint8_t *>(mask.addr());
580 return btrie_insert_a6(trie, addr_v6, mask_v6, row) == 0;
581}
582
583bool TrieDictionary::setAttributeValue(Attribute & attribute, const StringRef key, const Field & value)
584{
585 switch (attribute.type)
586 {
587 case AttributeUnderlyingType::utUInt8:
588 return setAttributeValueImpl<UInt8>(attribute, key, value.get<UInt64>());
589 case AttributeUnderlyingType::utUInt16:
590 return setAttributeValueImpl<UInt16>(attribute, key, value.get<UInt64>());
591 case AttributeUnderlyingType::utUInt32:
592 return setAttributeValueImpl<UInt32>(attribute, key, value.get<UInt64>());
593 case AttributeUnderlyingType::utUInt64:
594 return setAttributeValueImpl<UInt64>(attribute, key, value.get<UInt64>());
595 case AttributeUnderlyingType::utUInt128:
596 return setAttributeValueImpl<UInt128>(attribute, key, value.get<UInt128>());
597 case AttributeUnderlyingType::utInt8:
598 return setAttributeValueImpl<Int8>(attribute, key, value.get<Int64>());
599 case AttributeUnderlyingType::utInt16:
600 return setAttributeValueImpl<Int16>(attribute, key, value.get<Int64>());
601 case AttributeUnderlyingType::utInt32:
602 return setAttributeValueImpl<Int32>(attribute, key, value.get<Int64>());
603 case AttributeUnderlyingType::utInt64:
604 return setAttributeValueImpl<Int64>(attribute, key, value.get<Int64>());
605 case AttributeUnderlyingType::utFloat32:
606 return setAttributeValueImpl<Float32>(attribute, key, value.get<Float64>());
607 case AttributeUnderlyingType::utFloat64:
608 return setAttributeValueImpl<Float64>(attribute, key, value.get<Float64>());
609
610 case AttributeUnderlyingType::utDecimal32:
611 return setAttributeValueImpl<Decimal32>(attribute, key, value.get<Decimal32>());
612 case AttributeUnderlyingType::utDecimal64:
613 return setAttributeValueImpl<Decimal64>(attribute, key, value.get<Decimal64>());
614 case AttributeUnderlyingType::utDecimal128:
615 return setAttributeValueImpl<Decimal128>(attribute, key, value.get<Decimal128>());
616
617 case AttributeUnderlyingType::utString:
618 {
619 const auto & string = value.get<String>();
620 const auto string_in_arena = attribute.string_arena->insert(string.data(), string.size());
621 setAttributeValueImpl<StringRef>(attribute, key, StringRef{string_in_arena, string.size()});
622 return true;
623 }
624 }
625
626 return {};
627}
628
629const TrieDictionary::Attribute & TrieDictionary::getAttribute(const std::string & attribute_name) const
630{
631 const auto it = attribute_index_by_name.find(attribute_name);
632 if (it == std::end(attribute_index_by_name))
633 throw Exception{full_name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
634
635 return attributes[it->second];
636}
637
638template <typename T>
639void TrieDictionary::has(const Attribute &, const Columns & key_columns, PaddedPODArray<UInt8> & out) const
640{
641 const auto first_column = key_columns.front();
642 const auto rows = first_column->size();
643 if (first_column->isNumeric())
644 {
645 for (const auto i : ext::range(0, rows))
646 {
647 auto addr = Int32(first_column->get64(i));
648 uintptr_t slot = btrie_find(trie, addr);
649#pragma GCC diagnostic push
650#pragma GCC diagnostic warning "-Wold-style-cast"
651 out[i] = (slot != BTRIE_NULL);
652#pragma GCC diagnostic pop
653 }
654 }
655 else
656 {
657 for (const auto i : ext::range(0, rows))
658 {
659 auto addr = first_column->getDataAt(i);
660 if (unlikely(addr.size != 16))
661 throw Exception("Expected key to be FixedString(16)", ErrorCodes::LOGICAL_ERROR);
662
663 uintptr_t slot = btrie_find_a6(trie, reinterpret_cast<const UInt8 *>(addr.data));
664#pragma GCC diagnostic push
665#pragma GCC diagnostic warning "-Wold-style-cast"
666 out[i] = (slot != BTRIE_NULL);
667#pragma GCC diagnostic pop
668 }
669 }
670
671 query_count.fetch_add(rows, std::memory_order_relaxed);
672}
673
674template <typename Getter, typename KeyType>
675void TrieDictionary::trieTraverse(const btrie_t * tree, Getter && getter) const
676{
677 KeyType key = 0;
678 const KeyType high_bit = ~((~key) >> 1);
679
680 btrie_node_t * node;
681 node = tree->root;
682
683 std::stack<btrie_node_t *> stack;
684 while (node)
685 {
686 stack.push(node);
687 node = node->left;
688 }
689
690 auto getBit = [&high_bit](size_t size) { return size ? (high_bit >> (size - 1)) : 0; };
691
692 while (!stack.empty())
693 {
694 node = stack.top();
695 stack.pop();
696#pragma GCC diagnostic push
697#pragma GCC diagnostic warning "-Wold-style-cast"
698 if (node && node->value != BTRIE_NULL)
699#pragma GCC diagnostic pop
700 getter(key, stack.size());
701
702 if (node && node->right)
703 {
704 stack.push(nullptr);
705 key |= getBit(stack.size());
706 stack.push(node->right);
707 while (stack.top()->left)
708 stack.push(stack.top()->left);
709 }
710 else
711 key &= ~getBit(stack.size());
712 }
713}
714
715Columns TrieDictionary::getKeyColumns() const
716{
717 auto ip_column = ColumnFixedString::create(IPV6_BINARY_LENGTH);
718 auto mask_column = ColumnVector<UInt8>::create();
719
720#if defined(__SIZEOF_INT128__)
721 auto getter = [&ip_column, &mask_column](__uint128_t ip, size_t mask)
722 {
723 Poco::UInt64 * ip_array = reinterpret_cast<Poco::UInt64 *>(&ip); // Poco:: for old poco + macos
724 ip_array[0] = Poco::ByteOrder::fromNetwork(ip_array[0]);
725 ip_array[1] = Poco::ByteOrder::fromNetwork(ip_array[1]);
726 std::swap(ip_array[0], ip_array[1]);
727 ip_column->insertData(reinterpret_cast<const char *>(ip_array), IPV6_BINARY_LENGTH);
728 mask_column->insertValue(static_cast<UInt8>(mask));
729 };
730
731 trieTraverse<decltype(getter), __uint128_t>(trie, std::move(getter));
732#else
733 throw Exception("TrieDictionary::getKeyColumns is not implemented for 32bit arch", ErrorCodes::NOT_IMPLEMENTED);
734#endif
735 return {std::move(ip_column), std::move(mask_column)};
736}
737
738BlockInputStreamPtr TrieDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
739{
740 using BlockInputStreamType = DictionaryBlockInputStream<TrieDictionary, UInt64>;
741
742 auto getKeys = [](const Columns & columns, const std::vector<DictionaryAttribute> & dict_attributes)
743 {
744 const auto & attr = dict_attributes.front();
745 return ColumnsWithTypeAndName(
746 {ColumnWithTypeAndName(columns.front(), std::make_shared<DataTypeFixedString>(IPV6_BINARY_LENGTH), attr.name)});
747 };
748 auto getView = [](const Columns & columns, const std::vector<DictionaryAttribute> & dict_attributes)
749 {
750 auto column = ColumnString::create();
751 const auto & ip_column = assert_cast<const ColumnFixedString &>(*columns.front());
752 const auto & mask_column = assert_cast<const ColumnVector<UInt8> &>(*columns.back());
753 char buffer[48];
754 for (size_t row : ext::range(0, ip_column.size()))
755 {
756 UInt8 mask = mask_column.getElement(row);
757 char * ptr = buffer;
758 formatIPv6(reinterpret_cast<const unsigned char *>(ip_column.getDataAt(row).data), ptr);
759 *(ptr - 1) = '/';
760 ptr = itoa(mask, ptr);
761 column->insertData(buffer, ptr - buffer);
762 }
763 return ColumnsWithTypeAndName{
764 ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), dict_attributes.front().name)};
765 };
766 return std::make_shared<BlockInputStreamType>(
767 shared_from_this(), max_block_size, getKeyColumns(), column_names, std::move(getKeys), std::move(getView));
768}
769
770
771void registerDictionaryTrie(DictionaryFactory & factory)
772{
773 auto create_layout = [=](const std::string &,
774 const DictionaryStructure & dict_struct,
775 const Poco::Util::AbstractConfiguration & config,
776 const std::string & config_prefix,
777 DictionarySourcePtr source_ptr) -> DictionaryPtr
778 {
779 if (!dict_struct.key)
780 throw Exception{"'key' is required for dictionary of layout 'ip_trie'", ErrorCodes::BAD_ARGUMENTS};
781
782 const String database = config.getString(config_prefix + ".database", "");
783 const String name = config.getString(config_prefix + ".name");
784 const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
785 const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
786 // This is specialised trie for storing IPv4 and IPv6 prefixes.
787 return std::make_unique<TrieDictionary>(database, name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
788 };
789 factory.registerLayout("ip_trie", create_layout, true);
790}
791
792}
793