1#pragma once
2
3#include <atomic>
4#include <memory>
5#include <variant>
6#include <Columns/ColumnDecimal.h>
7#include <Columns/ColumnString.h>
8#include <Common/Arena.h>
9#include <Common/HashTable/HashMap.h>
10#include <common/StringRef.h>
11#include <common/logger_useful.h>
12#include <ext/range.h>
13#include "DictionaryStructure.h"
14#include "IDictionary.h"
15#include "IDictionarySource.h"
16
17struct btrie_s;
18typedef struct btrie_s btrie_t;
19
20namespace DB
21{
22class TrieDictionary final : public IDictionaryBase
23{
24public:
25 TrieDictionary(
26 const std::string & database_,
27 const std::string & name_,
28 const DictionaryStructure & dict_struct_,
29 DictionarySourcePtr source_ptr_,
30 const DictionaryLifetime dict_lifetime_,
31 bool require_nonempty_);
32
33 ~TrieDictionary() override;
34
35 std::string getKeyDescription() const { return key_description; }
36
37 const std::string & getDatabase() const override { return database; }
38 const std::string & getName() const override { return name; }
39 const std::string & getFullName() const override { return full_name; }
40
41 std::string getTypeName() const override { return "Trie"; }
42
43 size_t getBytesAllocated() const override { return bytes_allocated; }
44
45 size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
46
47 double getHitRate() const override { return 1.0; }
48
49 size_t getElementCount() const override { return element_count; }
50
51 double getLoadFactor() const override { return static_cast<double>(element_count) / bucket_count; }
52
53 std::shared_ptr<const IExternalLoadable> clone() const override
54 {
55 return std::make_shared<TrieDictionary>(database, name, dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty);
56 }
57
58 const IDictionarySource * getSource() const override { return source_ptr.get(); }
59
60 const DictionaryLifetime & getLifetime() const override { return dict_lifetime; }
61
62 const DictionaryStructure & getStructure() const override { return dict_struct; }
63
64 bool isInjective(const std::string & attribute_name) const override
65 {
66 return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
67 }
68
69 template <typename T>
70 using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
71
72#define DECLARE(TYPE) \
73 void get##TYPE( \
74 const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
75 DECLARE(UInt8)
76 DECLARE(UInt16)
77 DECLARE(UInt32)
78 DECLARE(UInt64)
79 DECLARE(UInt128)
80 DECLARE(Int8)
81 DECLARE(Int16)
82 DECLARE(Int32)
83 DECLARE(Int64)
84 DECLARE(Float32)
85 DECLARE(Float64)
86 DECLARE(Decimal32)
87 DECLARE(Decimal64)
88 DECLARE(Decimal128)
89#undef DECLARE
90
91 void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
92
93#define DECLARE(TYPE) \
94 void get##TYPE( \
95 const std::string & attribute_name, \
96 const Columns & key_columns, \
97 const DataTypes & key_types, \
98 const PaddedPODArray<TYPE> & def, \
99 ResultArrayType<TYPE> & out) const;
100 DECLARE(UInt8)
101 DECLARE(UInt16)
102 DECLARE(UInt32)
103 DECLARE(UInt64)
104 DECLARE(UInt128)
105 DECLARE(Int8)
106 DECLARE(Int16)
107 DECLARE(Int32)
108 DECLARE(Int64)
109 DECLARE(Float32)
110 DECLARE(Float64)
111 DECLARE(Decimal32)
112 DECLARE(Decimal64)
113 DECLARE(Decimal128)
114#undef DECLARE
115
116 void getString(
117 const std::string & attribute_name,
118 const Columns & key_columns,
119 const DataTypes & key_types,
120 const ColumnString * const def,
121 ColumnString * const out) const;
122
123#define DECLARE(TYPE) \
124 void get##TYPE( \
125 const std::string & attribute_name, \
126 const Columns & key_columns, \
127 const DataTypes & key_types, \
128 const TYPE def, \
129 ResultArrayType<TYPE> & out) const;
130 DECLARE(UInt8)
131 DECLARE(UInt16)
132 DECLARE(UInt32)
133 DECLARE(UInt64)
134 DECLARE(UInt128)
135 DECLARE(Int8)
136 DECLARE(Int16)
137 DECLARE(Int32)
138 DECLARE(Int64)
139 DECLARE(Float32)
140 DECLARE(Float64)
141 DECLARE(Decimal32)
142 DECLARE(Decimal64)
143 DECLARE(Decimal128)
144#undef DECLARE
145
146 void getString(
147 const std::string & attribute_name,
148 const Columns & key_columns,
149 const DataTypes & key_types,
150 const String & def,
151 ColumnString * const out) const;
152
153 void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
154
155 BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
156
157private:
158 template <typename Value>
159 using ContainerType = std::vector<Value>;
160
161 struct Attribute final
162 {
163 AttributeUnderlyingType type;
164 std::variant<
165 UInt8,
166 UInt16,
167 UInt32,
168 UInt64,
169 UInt128,
170 Int8,
171 Int16,
172 Int32,
173 Int64,
174 Decimal32,
175 Decimal64,
176 Decimal128,
177 Float32,
178 Float64,
179 String>
180 null_values;
181 std::variant<
182 ContainerType<UInt8>,
183 ContainerType<UInt16>,
184 ContainerType<UInt32>,
185 ContainerType<UInt64>,
186 ContainerType<UInt128>,
187 ContainerType<Int8>,
188 ContainerType<Int16>,
189 ContainerType<Int32>,
190 ContainerType<Int64>,
191 ContainerType<Decimal32>,
192 ContainerType<Decimal64>,
193 ContainerType<Decimal128>,
194 ContainerType<Float32>,
195 ContainerType<Float64>,
196 ContainerType<StringRef>>
197 maps;
198 std::unique_ptr<Arena> string_arena;
199 };
200
201 void createAttributes();
202
203 void loadData();
204
205 template <typename T>
206 void addAttributeSize(const Attribute & attribute);
207
208 void calculateBytesAllocated();
209
210 void validateKeyTypes(const DataTypes & key_types) const;
211
212 template <typename T>
213 void createAttributeImpl(Attribute & attribute, const Field & null_value);
214
215 Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
216
217
218 template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
219 void
220 getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
221
222
223 template <typename T>
224 bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
225
226 bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
227
228 const Attribute & getAttribute(const std::string & attribute_name) const;
229
230 template <typename T>
231 void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;
232
233 template <typename Getter, typename KeyType>
234 void trieTraverse(const btrie_t * trie, Getter && getter) const;
235
236 Columns getKeyColumns() const;
237
238 const std::string database;
239 const std::string name;
240 const std::string full_name;
241 const DictionaryStructure dict_struct;
242 const DictionarySourcePtr source_ptr;
243 const DictionaryLifetime dict_lifetime;
244 const bool require_nonempty;
245 const std::string key_description{dict_struct.getKeyDescription()};
246
247
248 btrie_t * trie = nullptr;
249 std::map<std::string, size_t> attribute_index_by_name;
250 std::vector<Attribute> attributes;
251
252 size_t bytes_allocated = 0;
253 size_t element_count = 0;
254 size_t bucket_count = 0;
255 mutable std::atomic<size_t> query_count{0};
256
257 std::chrono::time_point<std::chrono::system_clock> creation_time;
258
259 std::exception_ptr creation_exception;
260
261 Logger * logger;
262};
263
264}
265