1#pragma once
2
3#include <atomic>
4#include <variant>
5#include <vector>
6#include <Columns/ColumnDecimal.h>
7#include <Columns/ColumnString.h>
8#include <Common/Arena.h>
9#include <Core/Block.h>
10#include <ext/range.h>
11#include <ext/size.h>
12#include "DictionaryStructure.h"
13#include "IDictionary.h"
14#include "IDictionarySource.h"
15
16
17namespace DB
18{
19using BlockPtr = std::shared_ptr<Block>;
20
21class FlatDictionary final : public IDictionary
22{
23public:
24 FlatDictionary(
25 const std::string & database_,
26 const std::string & name_,
27 const DictionaryStructure & dict_struct_,
28 DictionarySourcePtr source_ptr_,
29 const DictionaryLifetime dict_lifetime_,
30 bool require_nonempty_,
31 BlockPtr saved_block_ = nullptr);
32
33 const std::string & getDatabase() const override { return database; }
34 const std::string & getName() const override { return name; }
35 const std::string & getFullName() const override { return full_name; }
36
37 std::string getTypeName() const override { return "Flat"; }
38
39 size_t getBytesAllocated() const override { return bytes_allocated; }
40
41 size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
42
43 double getHitRate() const override { return 1.0; }
44
45 size_t getElementCount() const override { return element_count; }
46
47 double getLoadFactor() const override { return static_cast<double>(element_count) / bucket_count; }
48
49 std::shared_ptr<const IExternalLoadable> clone() const override
50 {
51 return std::make_shared<FlatDictionary>(database, name, dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, saved_block);
52 }
53
54 const IDictionarySource * getSource() const override { return source_ptr.get(); }
55
56 const DictionaryLifetime & getLifetime() const override { return dict_lifetime; }
57
58 const DictionaryStructure & getStructure() const override { return dict_struct; }
59
60 bool isInjective(const std::string & attribute_name) const override
61 {
62 return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
63 }
64
65 bool hasHierarchy() const override { return hierarchical_attribute; }
66
67 void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override;
68
69 void isInVectorVector(
70 const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
71 void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
72 void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
73
74 template <typename T>
75 using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
76
77#define DECLARE(TYPE) \
78 void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
79 DECLARE(UInt8)
80 DECLARE(UInt16)
81 DECLARE(UInt32)
82 DECLARE(UInt64)
83 DECLARE(UInt128)
84 DECLARE(Int8)
85 DECLARE(Int16)
86 DECLARE(Int32)
87 DECLARE(Int64)
88 DECLARE(Float32)
89 DECLARE(Float64)
90 DECLARE(Decimal32)
91 DECLARE(Decimal64)
92 DECLARE(Decimal128)
93#undef DECLARE
94
95 void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
96
97#define DECLARE(TYPE) \
98 void get##TYPE( \
99 const std::string & attribute_name, \
100 const PaddedPODArray<Key> & ids, \
101 const PaddedPODArray<TYPE> & def, \
102 ResultArrayType<TYPE> & out) const;
103 DECLARE(UInt8)
104 DECLARE(UInt16)
105 DECLARE(UInt32)
106 DECLARE(UInt64)
107 DECLARE(UInt128)
108 DECLARE(Int8)
109 DECLARE(Int16)
110 DECLARE(Int32)
111 DECLARE(Int64)
112 DECLARE(Float32)
113 DECLARE(Float64)
114 DECLARE(Decimal32)
115 DECLARE(Decimal64)
116 DECLARE(Decimal128)
117#undef DECLARE
118
119 void
120 getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
121 const;
122
123#define DECLARE(TYPE) \
124 void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
125 DECLARE(UInt8)
126 DECLARE(UInt16)
127 DECLARE(UInt32)
128 DECLARE(UInt64)
129 DECLARE(UInt128)
130 DECLARE(Int8)
131 DECLARE(Int16)
132 DECLARE(Int32)
133 DECLARE(Int64)
134 DECLARE(Float32)
135 DECLARE(Float64)
136 DECLARE(Decimal32)
137 DECLARE(Decimal64)
138 DECLARE(Decimal128)
139#undef DECLARE
140
141 void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
142
143 void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
144
145 BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
146
147private:
148 template <typename Value>
149 using ContainerType = PaddedPODArray<Value>;
150
151 struct Attribute final
152 {
153 AttributeUnderlyingType type;
154 std::variant<
155 UInt8,
156 UInt16,
157 UInt32,
158 UInt64,
159 UInt128,
160 Int8,
161 Int16,
162 Int32,
163 Int64,
164 Decimal32,
165 Decimal64,
166 Decimal128,
167 Float32,
168 Float64,
169 StringRef>
170 null_values;
171 std::variant<
172 ContainerType<UInt8>,
173 ContainerType<UInt16>,
174 ContainerType<UInt32>,
175 ContainerType<UInt64>,
176 ContainerType<UInt128>,
177 ContainerType<Int8>,
178 ContainerType<Int16>,
179 ContainerType<Int32>,
180 ContainerType<Int64>,
181 ContainerType<Decimal32>,
182 ContainerType<Decimal64>,
183 ContainerType<Decimal128>,
184 ContainerType<Float32>,
185 ContainerType<Float64>,
186 ContainerType<StringRef>>
187 arrays;
188 std::unique_ptr<Arena> string_arena;
189 };
190
191 void createAttributes();
192 void blockToAttributes(const Block & block);
193 void updateData();
194 void loadData();
195
196 template <typename T>
197 void addAttributeSize(const Attribute & attribute);
198
199 void calculateBytesAllocated();
200
201 template <typename T>
202 void createAttributeImpl(Attribute & attribute, const Field & null_value);
203
204 Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
205
206 template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
207 void getItemsImpl(
208 const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
209
210 template <typename T>
211 void resize(Attribute & attribute, const Key id);
212
213 template <typename T>
214 void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value);
215
216 void setAttributeValue(Attribute & attribute, const Key id, const Field & value);
217
218 const Attribute & getAttribute(const std::string & attribute_name) const;
219
220 template <typename T>
221 void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
222
223 template <typename ChildType, typename AncestorType>
224 void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
225
226 PaddedPODArray<Key> getIds() const;
227
228 const std::string database;
229 const std::string name;
230 const std::string full_name;
231 const DictionaryStructure dict_struct;
232 const DictionarySourcePtr source_ptr;
233 const DictionaryLifetime dict_lifetime;
234 const bool require_nonempty;
235
236 std::map<std::string, size_t> attribute_index_by_name;
237 std::vector<Attribute> attributes;
238 const Attribute * hierarchical_attribute = nullptr;
239 std::vector<bool> loaded_ids;
240
241 size_t bytes_allocated = 0;
242 size_t element_count = 0;
243 size_t bucket_count = 0;
244 mutable std::atomic<size_t> query_count{0};
245
246 BlockPtr saved_block;
247};
248
249}
250