1 | #pragma once |
2 | |
3 | #include <Columns/IColumn.h> |
4 | #include <Common/assert_cast.h> |
5 | #include <Common/HashTable/HashTableKeyHolder.h> |
6 | #include <Interpreters/AggregationCommon.h> |
7 | |
8 | |
9 | namespace DB |
10 | { |
11 | |
12 | namespace ColumnsHashing |
13 | { |
14 | |
15 | /// Generic context for HashMethod. Context is shared between multiple threads, all methods must be thread-safe. |
16 | /// Is used for caching. |
17 | class HashMethodContext |
18 | { |
19 | public: |
20 | virtual ~HashMethodContext() = default; |
21 | |
22 | struct Settings |
23 | { |
24 | size_t max_threads; |
25 | }; |
26 | }; |
27 | |
28 | using HashMethodContextPtr = std::shared_ptr<HashMethodContext>; |
29 | |
30 | |
31 | namespace columns_hashing_impl |
32 | { |
33 | |
34 | template <typename Value, bool consecutive_keys_optimization_> |
35 | struct LastElementCache |
36 | { |
37 | static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_; |
38 | Value value; |
39 | bool empty = true; |
40 | bool found = false; |
41 | |
42 | bool check(const Value & value_) { return !empty && value == value_; } |
43 | |
44 | template <typename Key> |
45 | bool check(const Key & key) { return !empty && value.first == key; } |
46 | }; |
47 | |
48 | template <typename Data> |
49 | struct LastElementCache<Data, false> |
50 | { |
51 | static constexpr bool consecutive_keys_optimization = false; |
52 | }; |
53 | |
54 | template <typename Mapped> |
55 | class EmplaceResultImpl |
56 | { |
57 | Mapped & value; |
58 | Mapped & cached_value; |
59 | bool inserted; |
60 | |
61 | public: |
62 | EmplaceResultImpl(Mapped & value_, Mapped & cached_value_, bool inserted_) |
63 | : value(value_), cached_value(cached_value_), inserted(inserted_) {} |
64 | |
65 | bool isInserted() const { return inserted; } |
66 | auto & getMapped() const { return value; } |
67 | |
68 | void setMapped(const Mapped & mapped) |
69 | { |
70 | cached_value = mapped; |
71 | value = mapped; |
72 | } |
73 | }; |
74 | |
75 | template <> |
76 | class EmplaceResultImpl<void> |
77 | { |
78 | bool inserted; |
79 | |
80 | public: |
81 | explicit EmplaceResultImpl(bool inserted_) : inserted(inserted_) {} |
82 | bool isInserted() const { return inserted; } |
83 | }; |
84 | |
85 | template <typename Mapped> |
86 | class FindResultImpl |
87 | { |
88 | Mapped * value; |
89 | bool found; |
90 | |
91 | public: |
92 | FindResultImpl(Mapped * value_, bool found_) : value(value_), found(found_) {} |
93 | bool isFound() const { return found; } |
94 | Mapped & getMapped() const { return *value; } |
95 | }; |
96 | |
97 | template <> |
98 | class FindResultImpl<void> |
99 | { |
100 | bool found; |
101 | |
102 | public: |
103 | explicit FindResultImpl(bool found_) : found(found_) {} |
104 | bool isFound() const { return found; } |
105 | }; |
106 | |
107 | template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization> |
108 | class HashMethodBase |
109 | { |
110 | public: |
111 | using EmplaceResult = EmplaceResultImpl<Mapped>; |
112 | using FindResult = FindResultImpl<Mapped>; |
113 | static constexpr bool has_mapped = !std::is_same<Mapped, void>::value; |
114 | using Cache = LastElementCache<Value, consecutive_keys_optimization>; |
115 | |
116 | static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; } |
117 | |
118 | template <typename Data> |
119 | ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool) |
120 | { |
121 | auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool); |
122 | return emplaceImpl(key_holder, data); |
123 | } |
124 | |
125 | template <typename Data> |
126 | ALWAYS_INLINE FindResult findKey(Data & data, size_t row, Arena & pool) |
127 | { |
128 | auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool); |
129 | return findKeyImpl(keyHolderGetKey(key_holder), data); |
130 | } |
131 | |
132 | template <typename Data> |
133 | ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool) |
134 | { |
135 | auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool); |
136 | return data.hash(keyHolderGetKey(key_holder)); |
137 | } |
138 | |
139 | protected: |
140 | Cache cache; |
141 | |
142 | HashMethodBase() |
143 | { |
144 | if constexpr (consecutive_keys_optimization) |
145 | { |
146 | if constexpr (has_mapped) |
147 | { |
148 | /// Init PairNoInit elements. |
149 | cache.value.second = Mapped(); |
150 | cache.value.first = {}; |
151 | } |
152 | else |
153 | cache.value = Value(); |
154 | } |
155 | } |
156 | |
157 | template <typename Data, typename KeyHolder> |
158 | ALWAYS_INLINE EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data) |
159 | { |
160 | if constexpr (Cache::consecutive_keys_optimization) |
161 | { |
162 | if (cache.found && cache.check(keyHolderGetKey(key_holder))) |
163 | { |
164 | if constexpr (has_mapped) |
165 | return EmplaceResult(cache.value.second, cache.value.second, false); |
166 | else |
167 | return EmplaceResult(false); |
168 | } |
169 | } |
170 | |
171 | typename Data::LookupResult it; |
172 | bool inserted = false; |
173 | data.emplace(key_holder, it, inserted); |
174 | |
175 | [[maybe_unused]] Mapped * cached = nullptr; |
176 | if constexpr (has_mapped) |
177 | cached = &it->getMapped(); |
178 | |
179 | if (inserted) |
180 | { |
181 | if constexpr (has_mapped) |
182 | { |
183 | new (&it->getMapped()) Mapped(); |
184 | } |
185 | } |
186 | |
187 | if constexpr (consecutive_keys_optimization) |
188 | { |
189 | cache.found = true; |
190 | cache.empty = false; |
191 | |
192 | if constexpr (has_mapped) |
193 | { |
194 | cache.value.first = it->getKey(); |
195 | cache.value.second = it->getMapped(); |
196 | cached = &cache.value.second; |
197 | } |
198 | else |
199 | { |
200 | cache.value = it->getKey(); |
201 | } |
202 | } |
203 | |
204 | if constexpr (has_mapped) |
205 | return EmplaceResult(it->getMapped(), *cached, inserted); |
206 | else |
207 | return EmplaceResult(inserted); |
208 | } |
209 | |
210 | template <typename Data, typename Key> |
211 | ALWAYS_INLINE FindResult findKeyImpl(Key key, Data & data) |
212 | { |
213 | if constexpr (Cache::consecutive_keys_optimization) |
214 | { |
215 | if (cache.check(key)) |
216 | { |
217 | if constexpr (has_mapped) |
218 | return FindResult(&cache.value.second, cache.found); |
219 | else |
220 | return FindResult(cache.found); |
221 | } |
222 | } |
223 | |
224 | auto it = data.find(key); |
225 | |
226 | if constexpr (consecutive_keys_optimization) |
227 | { |
228 | cache.found = it != nullptr; |
229 | cache.empty = false; |
230 | |
231 | if constexpr (has_mapped) |
232 | { |
233 | cache.value.first = key; |
234 | if (it) |
235 | { |
236 | cache.value.second = it->getMapped(); |
237 | } |
238 | } |
239 | else |
240 | { |
241 | cache.value = key; |
242 | } |
243 | } |
244 | |
245 | if constexpr (has_mapped) |
246 | return FindResult(it ? &it->getMapped() : nullptr, it != nullptr); |
247 | else |
248 | return FindResult(it != nullptr); |
249 | } |
250 | }; |
251 | |
252 | |
253 | template <typename T> |
254 | struct MappedCache : public PaddedPODArray<T> {}; |
255 | |
256 | template <> |
257 | struct MappedCache<void> {}; |
258 | |
259 | |
260 | /// This class is designed to provide the functionality that is required for |
261 | /// supporting nullable keys in HashMethodKeysFixed. If there are |
262 | /// no nullable keys, this class is merely implemented as an empty shell. |
263 | template <typename Key, bool has_nullable_keys> |
264 | class BaseStateKeysFixed; |
265 | |
266 | /// Case where nullable keys are supported. |
267 | template <typename Key> |
268 | class BaseStateKeysFixed<Key, true> |
269 | { |
270 | protected: |
271 | BaseStateKeysFixed(const ColumnRawPtrs & key_columns) |
272 | { |
273 | null_maps.reserve(key_columns.size()); |
274 | actual_columns.reserve(key_columns.size()); |
275 | |
276 | for (const auto & col : key_columns) |
277 | { |
278 | if (auto * nullable_col = checkAndGetColumn<ColumnNullable>(col)) |
279 | { |
280 | actual_columns.push_back(&nullable_col->getNestedColumn()); |
281 | null_maps.push_back(&nullable_col->getNullMapColumn()); |
282 | } |
283 | else |
284 | { |
285 | actual_columns.push_back(col); |
286 | null_maps.push_back(nullptr); |
287 | } |
288 | } |
289 | } |
290 | |
291 | /// Return the columns which actually contain the values of the keys. |
292 | /// For a given key column, if it is nullable, we return its nested |
293 | /// column. Otherwise we return the key column itself. |
294 | inline const ColumnRawPtrs & getActualColumns() const |
295 | { |
296 | return actual_columns; |
297 | } |
298 | |
299 | /// Create a bitmap that indicates whether, for a particular row, |
300 | /// a key column bears a null value or not. |
301 | KeysNullMap<Key> createBitmap(size_t row) const |
302 | { |
303 | KeysNullMap<Key> bitmap{}; |
304 | |
305 | for (size_t k = 0; k < null_maps.size(); ++k) |
306 | { |
307 | if (null_maps[k] != nullptr) |
308 | { |
309 | const auto & null_map = assert_cast<const ColumnUInt8 &>(*null_maps[k]).getData(); |
310 | if (null_map[row] == 1) |
311 | { |
312 | size_t bucket = k / 8; |
313 | size_t offset = k % 8; |
314 | bitmap[bucket] |= UInt8(1) << offset; |
315 | } |
316 | } |
317 | } |
318 | |
319 | return bitmap; |
320 | } |
321 | |
322 | private: |
323 | ColumnRawPtrs actual_columns; |
324 | ColumnRawPtrs null_maps; |
325 | }; |
326 | |
327 | /// Case where nullable keys are not supported. |
328 | template <typename Key> |
329 | class BaseStateKeysFixed<Key, false> |
330 | { |
331 | protected: |
332 | BaseStateKeysFixed(const ColumnRawPtrs & columns) : actual_columns(columns) {} |
333 | |
334 | const ColumnRawPtrs & getActualColumns() const { return actual_columns; } |
335 | |
336 | KeysNullMap<Key> createBitmap(size_t) const |
337 | { |
338 | throw Exception{"Internal error: calling createBitmap() for non-nullable keys" |
339 | " is forbidden" , ErrorCodes::LOGICAL_ERROR}; |
340 | } |
341 | |
342 | private: |
343 | ColumnRawPtrs actual_columns; |
344 | }; |
345 | |
346 | } |
347 | |
348 | } |
349 | |
350 | } |
351 | |