1#pragma once
2
3#include <Columns/IColumn.h>
4#include <Common/assert_cast.h>
5#include <Common/HashTable/HashTableKeyHolder.h>
6#include <Interpreters/AggregationCommon.h>
7
8
9namespace DB
10{
11
12namespace ColumnsHashing
13{
14
15/// Generic context for HashMethod. Context is shared between multiple threads, all methods must be thread-safe.
16/// Is used for caching.
17class HashMethodContext
18{
19public:
20 virtual ~HashMethodContext() = default;
21
22 struct Settings
23 {
24 size_t max_threads;
25 };
26};
27
28using HashMethodContextPtr = std::shared_ptr<HashMethodContext>;
29
30
31namespace columns_hashing_impl
32{
33
34template <typename Value, bool consecutive_keys_optimization_>
35struct LastElementCache
36{
37 static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_;
38 Value value;
39 bool empty = true;
40 bool found = false;
41
42 bool check(const Value & value_) { return !empty && value == value_; }
43
44 template <typename Key>
45 bool check(const Key & key) { return !empty && value.first == key; }
46};
47
48template <typename Data>
49struct LastElementCache<Data, false>
50{
51 static constexpr bool consecutive_keys_optimization = false;
52};
53
54template <typename Mapped>
55class EmplaceResultImpl
56{
57 Mapped & value;
58 Mapped & cached_value;
59 bool inserted;
60
61public:
62 EmplaceResultImpl(Mapped & value_, Mapped & cached_value_, bool inserted_)
63 : value(value_), cached_value(cached_value_), inserted(inserted_) {}
64
65 bool isInserted() const { return inserted; }
66 auto & getMapped() const { return value; }
67
68 void setMapped(const Mapped & mapped)
69 {
70 cached_value = mapped;
71 value = mapped;
72 }
73};
74
75template <>
76class EmplaceResultImpl<void>
77{
78 bool inserted;
79
80public:
81 explicit EmplaceResultImpl(bool inserted_) : inserted(inserted_) {}
82 bool isInserted() const { return inserted; }
83};
84
85template <typename Mapped>
86class FindResultImpl
87{
88 Mapped * value;
89 bool found;
90
91public:
92 FindResultImpl(Mapped * value_, bool found_) : value(value_), found(found_) {}
93 bool isFound() const { return found; }
94 Mapped & getMapped() const { return *value; }
95};
96
97template <>
98class FindResultImpl<void>
99{
100 bool found;
101
102public:
103 explicit FindResultImpl(bool found_) : found(found_) {}
104 bool isFound() const { return found; }
105};
106
107template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization>
108class HashMethodBase
109{
110public:
111 using EmplaceResult = EmplaceResultImpl<Mapped>;
112 using FindResult = FindResultImpl<Mapped>;
113 static constexpr bool has_mapped = !std::is_same<Mapped, void>::value;
114 using Cache = LastElementCache<Value, consecutive_keys_optimization>;
115
116 static HashMethodContextPtr createContext(const HashMethodContext::Settings &) { return nullptr; }
117
118 template <typename Data>
119 ALWAYS_INLINE EmplaceResult emplaceKey(Data & data, size_t row, Arena & pool)
120 {
121 auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
122 return emplaceImpl(key_holder, data);
123 }
124
125 template <typename Data>
126 ALWAYS_INLINE FindResult findKey(Data & data, size_t row, Arena & pool)
127 {
128 auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
129 return findKeyImpl(keyHolderGetKey(key_holder), data);
130 }
131
132 template <typename Data>
133 ALWAYS_INLINE size_t getHash(const Data & data, size_t row, Arena & pool)
134 {
135 auto key_holder = static_cast<Derived &>(*this).getKeyHolder(row, pool);
136 return data.hash(keyHolderGetKey(key_holder));
137 }
138
139protected:
140 Cache cache;
141
142 HashMethodBase()
143 {
144 if constexpr (consecutive_keys_optimization)
145 {
146 if constexpr (has_mapped)
147 {
148 /// Init PairNoInit elements.
149 cache.value.second = Mapped();
150 cache.value.first = {};
151 }
152 else
153 cache.value = Value();
154 }
155 }
156
157 template <typename Data, typename KeyHolder>
158 ALWAYS_INLINE EmplaceResult emplaceImpl(KeyHolder & key_holder, Data & data)
159 {
160 if constexpr (Cache::consecutive_keys_optimization)
161 {
162 if (cache.found && cache.check(keyHolderGetKey(key_holder)))
163 {
164 if constexpr (has_mapped)
165 return EmplaceResult(cache.value.second, cache.value.second, false);
166 else
167 return EmplaceResult(false);
168 }
169 }
170
171 typename Data::LookupResult it;
172 bool inserted = false;
173 data.emplace(key_holder, it, inserted);
174
175 [[maybe_unused]] Mapped * cached = nullptr;
176 if constexpr (has_mapped)
177 cached = &it->getMapped();
178
179 if (inserted)
180 {
181 if constexpr (has_mapped)
182 {
183 new (&it->getMapped()) Mapped();
184 }
185 }
186
187 if constexpr (consecutive_keys_optimization)
188 {
189 cache.found = true;
190 cache.empty = false;
191
192 if constexpr (has_mapped)
193 {
194 cache.value.first = it->getKey();
195 cache.value.second = it->getMapped();
196 cached = &cache.value.second;
197 }
198 else
199 {
200 cache.value = it->getKey();
201 }
202 }
203
204 if constexpr (has_mapped)
205 return EmplaceResult(it->getMapped(), *cached, inserted);
206 else
207 return EmplaceResult(inserted);
208 }
209
210 template <typename Data, typename Key>
211 ALWAYS_INLINE FindResult findKeyImpl(Key key, Data & data)
212 {
213 if constexpr (Cache::consecutive_keys_optimization)
214 {
215 if (cache.check(key))
216 {
217 if constexpr (has_mapped)
218 return FindResult(&cache.value.second, cache.found);
219 else
220 return FindResult(cache.found);
221 }
222 }
223
224 auto it = data.find(key);
225
226 if constexpr (consecutive_keys_optimization)
227 {
228 cache.found = it != nullptr;
229 cache.empty = false;
230
231 if constexpr (has_mapped)
232 {
233 cache.value.first = key;
234 if (it)
235 {
236 cache.value.second = it->getMapped();
237 }
238 }
239 else
240 {
241 cache.value = key;
242 }
243 }
244
245 if constexpr (has_mapped)
246 return FindResult(it ? &it->getMapped() : nullptr, it != nullptr);
247 else
248 return FindResult(it != nullptr);
249 }
250};
251
252
253template <typename T>
254struct MappedCache : public PaddedPODArray<T> {};
255
256template <>
257struct MappedCache<void> {};
258
259
260/// This class is designed to provide the functionality that is required for
261/// supporting nullable keys in HashMethodKeysFixed. If there are
262/// no nullable keys, this class is merely implemented as an empty shell.
263template <typename Key, bool has_nullable_keys>
264class BaseStateKeysFixed;
265
266/// Case where nullable keys are supported.
267template <typename Key>
268class BaseStateKeysFixed<Key, true>
269{
270protected:
271 BaseStateKeysFixed(const ColumnRawPtrs & key_columns)
272 {
273 null_maps.reserve(key_columns.size());
274 actual_columns.reserve(key_columns.size());
275
276 for (const auto & col : key_columns)
277 {
278 if (auto * nullable_col = checkAndGetColumn<ColumnNullable>(col))
279 {
280 actual_columns.push_back(&nullable_col->getNestedColumn());
281 null_maps.push_back(&nullable_col->getNullMapColumn());
282 }
283 else
284 {
285 actual_columns.push_back(col);
286 null_maps.push_back(nullptr);
287 }
288 }
289 }
290
291 /// Return the columns which actually contain the values of the keys.
292 /// For a given key column, if it is nullable, we return its nested
293 /// column. Otherwise we return the key column itself.
294 inline const ColumnRawPtrs & getActualColumns() const
295 {
296 return actual_columns;
297 }
298
299 /// Create a bitmap that indicates whether, for a particular row,
300 /// a key column bears a null value or not.
301 KeysNullMap<Key> createBitmap(size_t row) const
302 {
303 KeysNullMap<Key> bitmap{};
304
305 for (size_t k = 0; k < null_maps.size(); ++k)
306 {
307 if (null_maps[k] != nullptr)
308 {
309 const auto & null_map = assert_cast<const ColumnUInt8 &>(*null_maps[k]).getData();
310 if (null_map[row] == 1)
311 {
312 size_t bucket = k / 8;
313 size_t offset = k % 8;
314 bitmap[bucket] |= UInt8(1) << offset;
315 }
316 }
317 }
318
319 return bitmap;
320 }
321
322private:
323 ColumnRawPtrs actual_columns;
324 ColumnRawPtrs null_maps;
325};
326
327/// Case where nullable keys are not supported.
328template <typename Key>
329class BaseStateKeysFixed<Key, false>
330{
331protected:
332 BaseStateKeysFixed(const ColumnRawPtrs & columns) : actual_columns(columns) {}
333
334 const ColumnRawPtrs & getActualColumns() const { return actual_columns; }
335
336 KeysNullMap<Key> createBitmap(size_t) const
337 {
338 throw Exception{"Internal error: calling createBitmap() for non-nullable keys"
339 " is forbidden", ErrorCodes::LOGICAL_ERROR};
340 }
341
342private:
343 ColumnRawPtrs actual_columns;
344};
345
346}
347
348}
349
350}
351