1#pragma once
2
3#include <atomic>
4#include <memory>
5#include <variant>
6#include <Columns/ColumnDecimal.h>
7#include <Columns/ColumnString.h>
8#include <Core/Block.h>
9#include <Common/HashTable/HashMap.h>
10#include <sparsehash/sparse_hash_map>
11#include <ext/range.h>
12#include "DictionaryStructure.h"
13#include "IDictionary.h"
14#include "IDictionarySource.h"
15
16/** This dictionary stores all content in a hash table in memory
17 * (a separate Key -> Value map for each attribute)
18 * Two variants of hash table are supported: a fast HashMap and memory efficient sparse_hash_map.
19 */
20
21namespace DB
22{
23using BlockPtr = std::shared_ptr<Block>;
24
25class HashedDictionary final : public IDictionary
26{
27public:
28 HashedDictionary(
29 const std::string & database_,
30 const std::string & name_,
31 const DictionaryStructure & dict_struct_,
32 DictionarySourcePtr source_ptr_,
33 const DictionaryLifetime dict_lifetime_,
34 bool require_nonempty_,
35 bool sparse_,
36 BlockPtr saved_block_ = nullptr);
37
38 const std::string & getDatabase() const override { return database; }
39 const std::string & getName() const override { return name; }
40 const std::string & getFullName() const override { return full_name; }
41
42 std::string getTypeName() const override { return sparse ? "SparseHashed" : "Hashed"; }
43
44 size_t getBytesAllocated() const override { return bytes_allocated; }
45
46 size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
47
48 double getHitRate() const override { return 1.0; }
49
50 size_t getElementCount() const override { return element_count; }
51
52 double getLoadFactor() const override { return static_cast<double>(element_count) / bucket_count; }
53
54 std::shared_ptr<const IExternalLoadable> clone() const override
55 {
56 return std::make_shared<HashedDictionary>(database, name, dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, sparse, saved_block);
57 }
58
59 const IDictionarySource * getSource() const override { return source_ptr.get(); }
60
61 const DictionaryLifetime & getLifetime() const override { return dict_lifetime; }
62
63 const DictionaryStructure & getStructure() const override { return dict_struct; }
64
65 bool isInjective(const std::string & attribute_name) const override
66 {
67 return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
68 }
69
70 bool hasHierarchy() const override { return hierarchical_attribute; }
71
72 void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override;
73
74 template <typename T>
75 using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
76
77#define DECLARE(TYPE) \
78 void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
79 DECLARE(UInt8)
80 DECLARE(UInt16)
81 DECLARE(UInt32)
82 DECLARE(UInt64)
83 DECLARE(UInt128)
84 DECLARE(Int8)
85 DECLARE(Int16)
86 DECLARE(Int32)
87 DECLARE(Int64)
88 DECLARE(Float32)
89 DECLARE(Float64)
90 DECLARE(Decimal32)
91 DECLARE(Decimal64)
92 DECLARE(Decimal128)
93#undef DECLARE
94
95 void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
96
97#define DECLARE(TYPE) \
98 void get##TYPE( \
99 const std::string & attribute_name, \
100 const PaddedPODArray<Key> & ids, \
101 const PaddedPODArray<TYPE> & def, \
102 ResultArrayType<TYPE> & out) const;
103 DECLARE(UInt8)
104 DECLARE(UInt16)
105 DECLARE(UInt32)
106 DECLARE(UInt64)
107 DECLARE(UInt128)
108 DECLARE(Int8)
109 DECLARE(Int16)
110 DECLARE(Int32)
111 DECLARE(Int64)
112 DECLARE(Float32)
113 DECLARE(Float64)
114 DECLARE(Decimal32)
115 DECLARE(Decimal64)
116 DECLARE(Decimal128)
117#undef DECLARE
118
119 void
120 getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
121 const;
122
123#define DECLARE(TYPE) \
124 void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE & def, ResultArrayType<TYPE> & out) \
125 const;
126 DECLARE(UInt8)
127 DECLARE(UInt16)
128 DECLARE(UInt32)
129 DECLARE(UInt64)
130 DECLARE(UInt128)
131 DECLARE(Int8)
132 DECLARE(Int16)
133 DECLARE(Int32)
134 DECLARE(Int64)
135 DECLARE(Float32)
136 DECLARE(Float64)
137 DECLARE(Decimal32)
138 DECLARE(Decimal64)
139 DECLARE(Decimal128)
140#undef DECLARE
141
142 void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
143
144 void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
145
146 void isInVectorVector(
147 const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
148 void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
149 void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
150
151 BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
152
153private:
154 template <typename Value>
155 using CollectionType = HashMap<UInt64, Value>;
156 template <typename Value>
157 using CollectionPtrType = std::unique_ptr<CollectionType<Value>>;
158
159 template <typename Value>
160 using SparseCollectionType = google::sparse_hash_map<UInt64, Value, DefaultHash<UInt64>>;
161 template <typename Value>
162 using SparseCollectionPtrType = std::unique_ptr<SparseCollectionType<Value>>;
163
164 struct Attribute final
165 {
166 AttributeUnderlyingType type;
167 std::variant<
168 UInt8,
169 UInt16,
170 UInt32,
171 UInt64,
172 UInt128,
173 Int8,
174 Int16,
175 Int32,
176 Int64,
177 Decimal32,
178 Decimal64,
179 Decimal128,
180 Float32,
181 Float64,
182 String>
183 null_values;
184 std::variant<
185 CollectionPtrType<UInt8>,
186 CollectionPtrType<UInt16>,
187 CollectionPtrType<UInt32>,
188 CollectionPtrType<UInt64>,
189 CollectionPtrType<UInt128>,
190 CollectionPtrType<Int8>,
191 CollectionPtrType<Int16>,
192 CollectionPtrType<Int32>,
193 CollectionPtrType<Int64>,
194 CollectionPtrType<Decimal32>,
195 CollectionPtrType<Decimal64>,
196 CollectionPtrType<Decimal128>,
197 CollectionPtrType<Float32>,
198 CollectionPtrType<Float64>,
199 CollectionPtrType<StringRef>>
200 maps;
201 std::variant<
202 SparseCollectionPtrType<UInt8>,
203 SparseCollectionPtrType<UInt16>,
204 SparseCollectionPtrType<UInt32>,
205 SparseCollectionPtrType<UInt64>,
206 SparseCollectionPtrType<UInt128>,
207 SparseCollectionPtrType<Int8>,
208 SparseCollectionPtrType<Int16>,
209 SparseCollectionPtrType<Int32>,
210 SparseCollectionPtrType<Int64>,
211 SparseCollectionPtrType<Decimal32>,
212 SparseCollectionPtrType<Decimal64>,
213 SparseCollectionPtrType<Decimal128>,
214 SparseCollectionPtrType<Float32>,
215 SparseCollectionPtrType<Float64>,
216 SparseCollectionPtrType<StringRef>>
217 sparse_maps;
218 std::unique_ptr<Arena> string_arena;
219 };
220
221 void createAttributes();
222
223 void blockToAttributes(const Block & block);
224
225 void updateData();
226
227 void loadData();
228
229 template <typename T>
230 void addAttributeSize(const Attribute & attribute);
231
232 void calculateBytesAllocated();
233
234 template <typename T>
235 void createAttributeImpl(Attribute & attribute, const Field & null_value);
236
237 Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
238
239 template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
240 void getItemsAttrImpl(
241 const AttrType & attr, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
242 template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
243 void getItemsImpl(
244 const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
245
246 template <typename T>
247 bool setAttributeValueImpl(Attribute & attribute, const Key id, const T value);
248
249 bool setAttributeValue(Attribute & attribute, const Key id, const Field & value);
250
251 const Attribute & getAttribute(const std::string & attribute_name) const;
252
253 template <typename T>
254 void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
255
256 template <typename T, typename AttrType>
257 PaddedPODArray<Key> getIdsAttrImpl(const AttrType & attr) const;
258 template <typename T>
259 PaddedPODArray<Key> getIds(const Attribute & attribute) const;
260
261 PaddedPODArray<Key> getIds() const;
262
263 template <typename AttrType, typename ChildType, typename AncestorType>
264 void isInAttrImpl(const AttrType & attr, const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
265 template <typename ChildType, typename AncestorType>
266 void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
267
268 const std::string database;
269 const std::string name;
270 const std::string full_name;
271 const DictionaryStructure dict_struct;
272 const DictionarySourcePtr source_ptr;
273 const DictionaryLifetime dict_lifetime;
274 const bool require_nonempty;
275 const bool sparse;
276
277 std::map<std::string, size_t> attribute_index_by_name;
278 std::vector<Attribute> attributes;
279 const Attribute * hierarchical_attribute = nullptr;
280
281 size_t bytes_allocated = 0;
282 size_t element_count = 0;
283 size_t bucket_count = 0;
284 mutable std::atomic<size_t> query_count{0};
285
286 BlockPtr saved_block;
287};
288
289}
290