1#pragma once
2
3#include <atomic>
4#include <chrono>
5#include <cmath>
6#include <map>
7#include <shared_mutex>
8#include <variant>
9#include <vector>
10#include <common/logger_useful.h>
11#include <Columns/ColumnDecimal.h>
12#include <Columns/ColumnString.h>
13#include <pcg_random.hpp>
14#include <Common/ArenaWithFreeLists.h>
15#include <Common/CurrentMetrics.h>
16#include <ext/bit_cast.h>
17#include "DictionaryStructure.h"
18#include "IDictionary.h"
19#include "IDictionarySource.h"
20
21
22namespace DB
23{
24class CacheDictionary final : public IDictionary
25{
26public:
27 CacheDictionary(
28 const std::string & database_,
29 const std::string & name_,
30 const DictionaryStructure & dict_struct_,
31 DictionarySourcePtr source_ptr_,
32 const DictionaryLifetime dict_lifetime_,
33 const size_t size_);
34
35 const std::string & getDatabase() const override { return database; }
36 const std::string & getName() const override { return name; }
37 const std::string & getFullName() const override { return full_name; }
38
39 std::string getTypeName() const override { return "Cache"; }
40
41 size_t getBytesAllocated() const override { return bytes_allocated + (string_arena ? string_arena->size() : 0); }
42
43 size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
44
45 double getHitRate() const override
46 {
47 return static_cast<double>(hit_count.load(std::memory_order_acquire)) / query_count.load(std::memory_order_relaxed);
48 }
49
50 size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); }
51
52 double getLoadFactor() const override { return static_cast<double>(element_count.load(std::memory_order_relaxed)) / size; }
53
54 bool supportUpdates() const override { return false; }
55
56 std::shared_ptr<const IExternalLoadable> clone() const override
57 {
58 return std::make_shared<CacheDictionary>(database, name, dict_struct, source_ptr->clone(), dict_lifetime, size);
59 }
60
61 const IDictionarySource * getSource() const override { return source_ptr.get(); }
62
63 const DictionaryLifetime & getLifetime() const override { return dict_lifetime; }
64
65 const DictionaryStructure & getStructure() const override { return dict_struct; }
66
67 bool isInjective(const std::string & attribute_name) const override
68 {
69 return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
70 }
71
72 bool hasHierarchy() const override { return hierarchical_attribute; }
73
74 void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override;
75
76 void isInVectorVector(
77 const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
78 void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
79 void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
80
81 std::exception_ptr getLastException() const override;
82
83 template <typename T>
84 using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
85
86#define DECLARE(TYPE) \
87 void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
88 DECLARE(UInt8)
89 DECLARE(UInt16)
90 DECLARE(UInt32)
91 DECLARE(UInt64)
92 DECLARE(UInt128)
93 DECLARE(Int8)
94 DECLARE(Int16)
95 DECLARE(Int32)
96 DECLARE(Int64)
97 DECLARE(Float32)
98 DECLARE(Float64)
99 DECLARE(Decimal32)
100 DECLARE(Decimal64)
101 DECLARE(Decimal128)
102#undef DECLARE
103
104 void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
105
106#define DECLARE(TYPE) \
107 void get##TYPE( \
108 const std::string & attribute_name, \
109 const PaddedPODArray<Key> & ids, \
110 const PaddedPODArray<TYPE> & def, \
111 ResultArrayType<TYPE> & out) const;
112 DECLARE(UInt8)
113 DECLARE(UInt16)
114 DECLARE(UInt32)
115 DECLARE(UInt64)
116 DECLARE(UInt128)
117 DECLARE(Int8)
118 DECLARE(Int16)
119 DECLARE(Int32)
120 DECLARE(Int64)
121 DECLARE(Float32)
122 DECLARE(Float64)
123 DECLARE(Decimal32)
124 DECLARE(Decimal64)
125 DECLARE(Decimal128)
126#undef DECLARE
127
128 void
129 getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
130 const;
131
132#define DECLARE(TYPE) \
133 void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
134 DECLARE(UInt8)
135 DECLARE(UInt16)
136 DECLARE(UInt32)
137 DECLARE(UInt64)
138 DECLARE(UInt128)
139 DECLARE(Int8)
140 DECLARE(Int16)
141 DECLARE(Int32)
142 DECLARE(Int64)
143 DECLARE(Float32)
144 DECLARE(Float64)
145 DECLARE(Decimal32)
146 DECLARE(Decimal64)
147 DECLARE(Decimal128)
148#undef DECLARE
149
150 void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
151
152 void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
153
154 BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
155
156private:
157 template <typename Value>
158 using ContainerType = Value[];
159 template <typename Value>
160 using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
161
162 struct CellMetadata final
163 {
164 using time_point_t = std::chrono::system_clock::time_point;
165 using time_point_rep_t = time_point_t::rep;
166 using time_point_urep_t = std::make_unsigned_t<time_point_rep_t>;
167
168 static constexpr UInt64 EXPIRES_AT_MASK = std::numeric_limits<time_point_rep_t>::max();
169 static constexpr UInt64 IS_DEFAULT_MASK = ~EXPIRES_AT_MASK;
170
171 UInt64 id;
172 /// Stores both expiration time and `is_default` flag in the most significant bit
173 time_point_urep_t data;
174
175 /// Sets expiration time, resets `is_default` flag to false
176 time_point_t expiresAt() const { return ext::safe_bit_cast<time_point_t>(data & EXPIRES_AT_MASK); }
177 void setExpiresAt(const time_point_t & t) { data = ext::safe_bit_cast<time_point_urep_t>(t); }
178
179 bool isDefault() const { return (data & IS_DEFAULT_MASK) == IS_DEFAULT_MASK; }
180 void setDefault() { data |= IS_DEFAULT_MASK; }
181 };
182
183 struct Attribute final
184 {
185 AttributeUnderlyingType type;
186 std::variant<
187 UInt8,
188 UInt16,
189 UInt32,
190 UInt64,
191 UInt128,
192 Int8,
193 Int16,
194 Int32,
195 Int64,
196 Decimal32,
197 Decimal64,
198 Decimal128,
199 Float32,
200 Float64,
201 String>
202 null_values;
203 std::variant<
204 ContainerPtrType<UInt8>,
205 ContainerPtrType<UInt16>,
206 ContainerPtrType<UInt32>,
207 ContainerPtrType<UInt64>,
208 ContainerPtrType<UInt128>,
209 ContainerPtrType<Int8>,
210 ContainerPtrType<Int16>,
211 ContainerPtrType<Int32>,
212 ContainerPtrType<Int64>,
213 ContainerPtrType<Decimal32>,
214 ContainerPtrType<Decimal64>,
215 ContainerPtrType<Decimal128>,
216 ContainerPtrType<Float32>,
217 ContainerPtrType<Float64>,
218 ContainerPtrType<StringRef>>
219 arrays;
220 };
221
222 void createAttributes();
223
224 Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
225
226 template <typename AttributeType, typename OutputType, typename DefaultGetter>
227 void getItemsNumberImpl(
228 Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
229
230 template <typename DefaultGetter>
231 void getItemsString(Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const;
232
233 template <typename PresentIdHandler, typename AbsentIdHandler>
234 void update(const std::vector<Key> & requested_ids, PresentIdHandler && on_cell_updated, AbsentIdHandler && on_id_not_found) const;
235
236 PaddedPODArray<Key> getCachedIds() const;
237
238 bool isEmptyCell(const UInt64 idx) const;
239
240 size_t getCellIdx(const Key id) const;
241
242 void setDefaultAttributeValue(Attribute & attribute, const Key idx) const;
243
244 void setAttributeValue(Attribute & attribute, const Key idx, const Field & value) const;
245
246 Attribute & getAttribute(const std::string & attribute_name) const;
247
248 struct FindResult
249 {
250 const size_t cell_idx;
251 const bool valid;
252 const bool outdated;
253 };
254
255 FindResult findCellIdx(const Key & id, const CellMetadata::time_point_t now) const;
256
257 template <typename AncestorType>
258 void isInImpl(const PaddedPODArray<Key> & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
259
260 const std::string database;
261 const std::string name;
262 const std::string full_name;
263 const DictionaryStructure dict_struct;
264 mutable DictionarySourcePtr source_ptr;
265 const DictionaryLifetime dict_lifetime;
266 Logger * const log;
267
268 mutable std::shared_mutex rw_lock;
269
270 /// Actual size will be increased to match power of 2
271 const size_t size;
272
273 /// all bits to 1 mask (size - 1) (0b1000 - 1 = 0b111)
274 const size_t size_overlap_mask;
275
276 /// Max tries to find cell, overlaped with mask: if size = 16 and start_cell=10: will try cells: 10,11,12,13,14,15,0,1,2,3
277 static constexpr size_t max_collision_length = 10;
278
279 const size_t zero_cell_idx{getCellIdx(0)};
280 std::map<std::string, size_t> attribute_index_by_name;
281 mutable std::vector<Attribute> attributes;
282 mutable std::vector<CellMetadata> cells;
283 Attribute * hierarchical_attribute = nullptr;
284 std::unique_ptr<ArenaWithFreeLists> string_arena;
285
286 mutable std::exception_ptr last_exception;
287 mutable size_t error_count = 0;
288 mutable std::chrono::system_clock::time_point backoff_end_time;
289
290 mutable pcg64 rnd_engine;
291
292 mutable size_t bytes_allocated = 0;
293 mutable std::atomic<size_t> element_count{0};
294 mutable std::atomic<size_t> hit_count{0};
295 mutable std::atomic<size_t> query_count{0};
296};
297
298}
299