1#pragma once
2
3#include <atomic>
4#include <memory>
5#include <variant>
6#include <Columns/ColumnDecimal.h>
7#include <Columns/ColumnString.h>
8#include <Common/Arena.h>
9#include <Common/HashTable/HashMap.h>
10#include <Core/Block.h>
11#include <common/StringRef.h>
12#include <ext/range.h>
13#include "DictionaryStructure.h"
14#include "IDictionary.h"
15#include "IDictionarySource.h"
16
17
18namespace DB
19{
20using BlockPtr = std::shared_ptr<Block>;
21
22class ComplexKeyHashedDictionary final : public IDictionaryBase
23{
24public:
25 ComplexKeyHashedDictionary(
26 const std::string & database_,
27 const std::string & name_,
28 const DictionaryStructure & dict_struct_,
29 DictionarySourcePtr source_ptr_,
30 const DictionaryLifetime dict_lifetime_,
31 bool require_nonempty_,
32 BlockPtr saved_block_ = nullptr);
33
34 std::string getKeyDescription() const { return key_description; }
35
36 const std::string & getDatabase() const override { return database; }
37 const std::string & getName() const override { return name; }
38 const std::string & getFullName() const override { return full_name; }
39
40 std::string getTypeName() const override { return "ComplexKeyHashed"; }
41
42 size_t getBytesAllocated() const override { return bytes_allocated; }
43
44 size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
45
46 double getHitRate() const override { return 1.0; }
47
48 size_t getElementCount() const override { return element_count; }
49
50 double getLoadFactor() const override { return static_cast<double>(element_count) / bucket_count; }
51
52 std::shared_ptr<const IExternalLoadable> clone() const override
53 {
54 return std::make_shared<ComplexKeyHashedDictionary>(database, name, dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, saved_block);
55 }
56
57 const IDictionarySource * getSource() const override { return source_ptr.get(); }
58
59 const DictionaryLifetime & getLifetime() const override { return dict_lifetime; }
60
61 const DictionaryStructure & getStructure() const override { return dict_struct; }
62
63 bool isInjective(const std::string & attribute_name) const override
64 {
65 return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
66 }
67
68 template <typename T>
69 using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
70
71#define DECLARE(TYPE) \
72 void get##TYPE( \
73 const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
74 DECLARE(UInt8)
75 DECLARE(UInt16)
76 DECLARE(UInt32)
77 DECLARE(UInt64)
78 DECLARE(UInt128)
79 DECLARE(Int8)
80 DECLARE(Int16)
81 DECLARE(Int32)
82 DECLARE(Int64)
83 DECLARE(Float32)
84 DECLARE(Float64)
85 DECLARE(Decimal32)
86 DECLARE(Decimal64)
87 DECLARE(Decimal128)
88#undef DECLARE
89
90 void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
91
92#define DECLARE(TYPE) \
93 void get##TYPE( \
94 const std::string & attribute_name, \
95 const Columns & key_columns, \
96 const DataTypes & key_types, \
97 const PaddedPODArray<TYPE> & def, \
98 ResultArrayType<TYPE> & out) const;
99 DECLARE(UInt8)
100 DECLARE(UInt16)
101 DECLARE(UInt32)
102 DECLARE(UInt64)
103 DECLARE(UInt128)
104 DECLARE(Int8)
105 DECLARE(Int16)
106 DECLARE(Int32)
107 DECLARE(Int64)
108 DECLARE(Float32)
109 DECLARE(Float64)
110 DECLARE(Decimal32)
111 DECLARE(Decimal64)
112 DECLARE(Decimal128)
113#undef DECLARE
114
115 void getString(
116 const std::string & attribute_name,
117 const Columns & key_columns,
118 const DataTypes & key_types,
119 const ColumnString * const def,
120 ColumnString * const out) const;
121
122#define DECLARE(TYPE) \
123 void get##TYPE( \
124 const std::string & attribute_name, \
125 const Columns & key_columns, \
126 const DataTypes & key_types, \
127 const TYPE def, \
128 ResultArrayType<TYPE> & out) const;
129 DECLARE(UInt8)
130 DECLARE(UInt16)
131 DECLARE(UInt32)
132 DECLARE(UInt64)
133 DECLARE(UInt128)
134 DECLARE(Int8)
135 DECLARE(Int16)
136 DECLARE(Int32)
137 DECLARE(Int64)
138 DECLARE(Float32)
139 DECLARE(Float64)
140 DECLARE(Decimal32)
141 DECLARE(Decimal64)
142 DECLARE(Decimal128)
143#undef DECLARE
144
145 void getString(
146 const std::string & attribute_name,
147 const Columns & key_columns,
148 const DataTypes & key_types,
149 const String & def,
150 ColumnString * const out) const;
151
152 void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
153
154 BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
155
156private:
157 template <typename Value>
158 using ContainerType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
159
160 struct Attribute final
161 {
162 AttributeUnderlyingType type;
163 std::variant<
164 UInt8,
165 UInt16,
166 UInt32,
167 UInt64,
168 UInt128,
169 Int8,
170 Int16,
171 Int32,
172 Int64,
173 Decimal32,
174 Decimal64,
175 Decimal128,
176 Float32,
177 Float64,
178 String>
179 null_values;
180 std::variant<
181 ContainerType<UInt8>,
182 ContainerType<UInt16>,
183 ContainerType<UInt32>,
184 ContainerType<UInt64>,
185 ContainerType<UInt128>,
186 ContainerType<Int8>,
187 ContainerType<Int16>,
188 ContainerType<Int32>,
189 ContainerType<Int64>,
190 ContainerType<Decimal32>,
191 ContainerType<Decimal64>,
192 ContainerType<Decimal128>,
193 ContainerType<Float32>,
194 ContainerType<Float64>,
195 ContainerType<StringRef>>
196 maps;
197 std::unique_ptr<Arena> string_arena;
198 };
199
200 void createAttributes();
201
202 void blockToAttributes(const Block & block);
203
204 void updateData();
205
206 void loadData();
207
208 template <typename T>
209 void addAttributeSize(const Attribute & attribute);
210
211 void calculateBytesAllocated();
212
213 template <typename T>
214 void createAttributeImpl(Attribute & attribute, const Field & null_value);
215
216 Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
217
218 template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
219 void
220 getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
221
222 template <typename T>
223 bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
224
225 bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
226
227 const Attribute & getAttribute(const std::string & attribute_name) const;
228
229 static StringRef placeKeysInPool(const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool);
230
231 template <typename T>
232 void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;
233
234 std::vector<StringRef> getKeys() const;
235
236 template <typename T>
237 std::vector<StringRef> getKeys(const Attribute & attribute) const;
238
239 const std::string database;
240 const std::string name;
241 const std::string full_name;
242 const DictionaryStructure dict_struct;
243 const DictionarySourcePtr source_ptr;
244 const DictionaryLifetime dict_lifetime;
245 const bool require_nonempty;
246 const std::string key_description{dict_struct.getKeyDescription()};
247
248 std::map<std::string, size_t> attribute_index_by_name;
249 std::vector<Attribute> attributes;
250 Arena keys_pool;
251
252 size_t bytes_allocated = 0;
253 size_t element_count = 0;
254 size_t bucket_count = 0;
255 mutable std::atomic<size_t> query_count{0};
256
257 BlockPtr saved_block;
258};
259
260}
261