1 | #pragma once |
2 | |
3 | #include <atomic> |
4 | #include <chrono> |
5 | #include <cmath> |
6 | #include <map> |
7 | #include <shared_mutex> |
8 | #include <variant> |
9 | #include <vector> |
10 | #include <common/logger_useful.h> |
11 | #include <Columns/ColumnDecimal.h> |
12 | #include <Columns/ColumnString.h> |
13 | #include <pcg_random.hpp> |
14 | #include <Common/ArenaWithFreeLists.h> |
15 | #include <Common/CurrentMetrics.h> |
16 | #include <ext/bit_cast.h> |
17 | #include "DictionaryStructure.h" |
18 | #include "IDictionary.h" |
19 | #include "IDictionarySource.h" |
20 | |
21 | |
22 | namespace DB |
23 | { |
24 | class CacheDictionary final : public IDictionary |
25 | { |
26 | public: |
27 | CacheDictionary( |
28 | const std::string & database_, |
29 | const std::string & name_, |
30 | const DictionaryStructure & dict_struct_, |
31 | DictionarySourcePtr source_ptr_, |
32 | const DictionaryLifetime dict_lifetime_, |
33 | const size_t size_); |
34 | |
35 | const std::string & getDatabase() const override { return database; } |
36 | const std::string & getName() const override { return name; } |
37 | const std::string & getFullName() const override { return full_name; } |
38 | |
39 | std::string getTypeName() const override { return "Cache" ; } |
40 | |
41 | size_t getBytesAllocated() const override { return bytes_allocated + (string_arena ? string_arena->size() : 0); } |
42 | |
43 | size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); } |
44 | |
45 | double getHitRate() const override |
46 | { |
47 | return static_cast<double>(hit_count.load(std::memory_order_acquire)) / query_count.load(std::memory_order_relaxed); |
48 | } |
49 | |
50 | size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); } |
51 | |
52 | double getLoadFactor() const override { return static_cast<double>(element_count.load(std::memory_order_relaxed)) / size; } |
53 | |
54 | bool supportUpdates() const override { return false; } |
55 | |
56 | std::shared_ptr<const IExternalLoadable> clone() const override |
57 | { |
58 | return std::make_shared<CacheDictionary>(database, name, dict_struct, source_ptr->clone(), dict_lifetime, size); |
59 | } |
60 | |
61 | const IDictionarySource * getSource() const override { return source_ptr.get(); } |
62 | |
63 | const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } |
64 | |
65 | const DictionaryStructure & getStructure() const override { return dict_struct; } |
66 | |
67 | bool isInjective(const std::string & attribute_name) const override |
68 | { |
69 | return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; |
70 | } |
71 | |
72 | bool hasHierarchy() const override { return hierarchical_attribute; } |
73 | |
74 | void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override; |
75 | |
76 | void isInVectorVector( |
77 | const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override; |
78 | void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override; |
79 | void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override; |
80 | |
81 | std::exception_ptr getLastException() const override; |
82 | |
83 | template <typename T> |
84 | using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>; |
85 | |
86 | #define DECLARE(TYPE) \ |
87 | void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const; |
88 | DECLARE(UInt8) |
89 | DECLARE(UInt16) |
90 | DECLARE(UInt32) |
91 | DECLARE(UInt64) |
92 | DECLARE(UInt128) |
93 | DECLARE(Int8) |
94 | DECLARE(Int16) |
95 | DECLARE(Int32) |
96 | DECLARE(Int64) |
97 | DECLARE(Float32) |
98 | DECLARE(Float64) |
99 | DECLARE(Decimal32) |
100 | DECLARE(Decimal64) |
101 | DECLARE(Decimal128) |
102 | #undef DECLARE |
103 | |
104 | void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const; |
105 | |
106 | #define DECLARE(TYPE) \ |
107 | void get##TYPE( \ |
108 | const std::string & attribute_name, \ |
109 | const PaddedPODArray<Key> & ids, \ |
110 | const PaddedPODArray<TYPE> & def, \ |
111 | ResultArrayType<TYPE> & out) const; |
112 | DECLARE(UInt8) |
113 | DECLARE(UInt16) |
114 | DECLARE(UInt32) |
115 | DECLARE(UInt64) |
116 | DECLARE(UInt128) |
117 | DECLARE(Int8) |
118 | DECLARE(Int16) |
119 | DECLARE(Int32) |
120 | DECLARE(Int64) |
121 | DECLARE(Float32) |
122 | DECLARE(Float64) |
123 | DECLARE(Decimal32) |
124 | DECLARE(Decimal64) |
125 | DECLARE(Decimal128) |
126 | #undef DECLARE |
127 | |
128 | void |
129 | getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) |
130 | const; |
131 | |
132 | #define DECLARE(TYPE) \ |
133 | void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const; |
134 | DECLARE(UInt8) |
135 | DECLARE(UInt16) |
136 | DECLARE(UInt32) |
137 | DECLARE(UInt64) |
138 | DECLARE(UInt128) |
139 | DECLARE(Int8) |
140 | DECLARE(Int16) |
141 | DECLARE(Int32) |
142 | DECLARE(Int64) |
143 | DECLARE(Float32) |
144 | DECLARE(Float64) |
145 | DECLARE(Decimal32) |
146 | DECLARE(Decimal64) |
147 | DECLARE(Decimal128) |
148 | #undef DECLARE |
149 | |
150 | void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const; |
151 | |
152 | void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override; |
153 | |
154 | BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; |
155 | |
156 | private: |
157 | template <typename Value> |
158 | using ContainerType = Value[]; |
159 | template <typename Value> |
160 | using ContainerPtrType = std::unique_ptr<ContainerType<Value>>; |
161 | |
162 | struct CellMetadata final |
163 | { |
164 | using time_point_t = std::chrono::system_clock::time_point; |
165 | using time_point_rep_t = time_point_t::rep; |
166 | using time_point_urep_t = std::make_unsigned_t<time_point_rep_t>; |
167 | |
168 | static constexpr UInt64 EXPIRES_AT_MASK = std::numeric_limits<time_point_rep_t>::max(); |
169 | static constexpr UInt64 IS_DEFAULT_MASK = ~EXPIRES_AT_MASK; |
170 | |
171 | UInt64 id; |
172 | /// Stores both expiration time and `is_default` flag in the most significant bit |
173 | time_point_urep_t data; |
174 | |
175 | /// Sets expiration time, resets `is_default` flag to false |
176 | time_point_t expiresAt() const { return ext::safe_bit_cast<time_point_t>(data & EXPIRES_AT_MASK); } |
177 | void setExpiresAt(const time_point_t & t) { data = ext::safe_bit_cast<time_point_urep_t>(t); } |
178 | |
179 | bool isDefault() const { return (data & IS_DEFAULT_MASK) == IS_DEFAULT_MASK; } |
180 | void setDefault() { data |= IS_DEFAULT_MASK; } |
181 | }; |
182 | |
183 | struct Attribute final |
184 | { |
185 | AttributeUnderlyingType type; |
186 | std::variant< |
187 | UInt8, |
188 | UInt16, |
189 | UInt32, |
190 | UInt64, |
191 | UInt128, |
192 | Int8, |
193 | Int16, |
194 | Int32, |
195 | Int64, |
196 | Decimal32, |
197 | Decimal64, |
198 | Decimal128, |
199 | Float32, |
200 | Float64, |
201 | String> |
202 | null_values; |
203 | std::variant< |
204 | ContainerPtrType<UInt8>, |
205 | ContainerPtrType<UInt16>, |
206 | ContainerPtrType<UInt32>, |
207 | ContainerPtrType<UInt64>, |
208 | ContainerPtrType<UInt128>, |
209 | ContainerPtrType<Int8>, |
210 | ContainerPtrType<Int16>, |
211 | ContainerPtrType<Int32>, |
212 | ContainerPtrType<Int64>, |
213 | ContainerPtrType<Decimal32>, |
214 | ContainerPtrType<Decimal64>, |
215 | ContainerPtrType<Decimal128>, |
216 | ContainerPtrType<Float32>, |
217 | ContainerPtrType<Float64>, |
218 | ContainerPtrType<StringRef>> |
219 | arrays; |
220 | }; |
221 | |
222 | void createAttributes(); |
223 | |
224 | Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value); |
225 | |
226 | template <typename AttributeType, typename OutputType, typename DefaultGetter> |
227 | void getItemsNumberImpl( |
228 | Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const; |
229 | |
230 | template <typename DefaultGetter> |
231 | void getItemsString(Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const; |
232 | |
233 | template <typename PresentIdHandler, typename AbsentIdHandler> |
234 | void update(const std::vector<Key> & requested_ids, PresentIdHandler && on_cell_updated, AbsentIdHandler && on_id_not_found) const; |
235 | |
236 | PaddedPODArray<Key> getCachedIds() const; |
237 | |
238 | bool isEmptyCell(const UInt64 idx) const; |
239 | |
240 | size_t getCellIdx(const Key id) const; |
241 | |
242 | void setDefaultAttributeValue(Attribute & attribute, const Key idx) const; |
243 | |
244 | void setAttributeValue(Attribute & attribute, const Key idx, const Field & value) const; |
245 | |
246 | Attribute & getAttribute(const std::string & attribute_name) const; |
247 | |
248 | struct FindResult |
249 | { |
250 | const size_t cell_idx; |
251 | const bool valid; |
252 | const bool outdated; |
253 | }; |
254 | |
255 | FindResult findCellIdx(const Key & id, const CellMetadata::time_point_t now) const; |
256 | |
257 | template <typename AncestorType> |
258 | void isInImpl(const PaddedPODArray<Key> & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const; |
259 | |
260 | const std::string database; |
261 | const std::string name; |
262 | const std::string full_name; |
263 | const DictionaryStructure dict_struct; |
264 | mutable DictionarySourcePtr source_ptr; |
265 | const DictionaryLifetime dict_lifetime; |
266 | Logger * const log; |
267 | |
268 | mutable std::shared_mutex rw_lock; |
269 | |
270 | /// Actual size will be increased to match power of 2 |
271 | const size_t size; |
272 | |
273 | /// all bits to 1 mask (size - 1) (0b1000 - 1 = 0b111) |
274 | const size_t size_overlap_mask; |
275 | |
276 | /// Max tries to find cell, overlaped with mask: if size = 16 and start_cell=10: will try cells: 10,11,12,13,14,15,0,1,2,3 |
277 | static constexpr size_t max_collision_length = 10; |
278 | |
279 | const size_t zero_cell_idx{getCellIdx(0)}; |
280 | std::map<std::string, size_t> attribute_index_by_name; |
281 | mutable std::vector<Attribute> attributes; |
282 | mutable std::vector<CellMetadata> cells; |
283 | Attribute * hierarchical_attribute = nullptr; |
284 | std::unique_ptr<ArenaWithFreeLists> string_arena; |
285 | |
286 | mutable std::exception_ptr last_exception; |
287 | mutable size_t error_count = 0; |
288 | mutable std::chrono::system_clock::time_point backoff_end_time; |
289 | |
290 | mutable pcg64 rnd_engine; |
291 | |
292 | mutable size_t bytes_allocated = 0; |
293 | mutable std::atomic<size_t> element_count{0}; |
294 | mutable std::atomic<size_t> hit_count{0}; |
295 | mutable std::atomic<size_t> query_count{0}; |
296 | }; |
297 | |
298 | } |
299 | |