1#pragma once
2
3#include <Interpreters/AggregationCommon.h>
4#include <Common/ColumnsHashing.h>
5#include <Common/assert_cast.h>
6#include <Common/Arena.h>
7#include <Common/HashTable/HashSet.h>
8#include <Common/HashTable/ClearableHashSet.h>
9#include <Common/HashTable/FixedClearableHashSet.h>
10#include <Common/HashTable/FixedHashSet.h>
11#include <Common/UInt128.h>
12
13
14namespace DB
15{
16
17/** Methods for different implementations of sets (used in right hand side of IN or for DISTINCT).
18 * To use as template parameter.
19 */
20
21
22/// For the case where there is one numeric key.
23template <typename FieldType, typename TData, bool use_cache = true> /// UInt8/16/32/64 for any types with corresponding bit width.
24struct SetMethodOneNumber
25{
26 using Data = TData;
27 using Key = typename Data::key_type;
28
29 Data data;
30
31 using State = ColumnsHashing::HashMethodOneNumber<typename Data::value_type,
32 void, FieldType, use_cache>;
33};
34
35/// For the case where there is one string key.
36template <typename TData>
37struct SetMethodString
38{
39 using Data = TData;
40 using Key = typename Data::key_type;
41
42 Data data;
43
44 using State = ColumnsHashing::HashMethodString<typename Data::value_type, void, true, false>;
45};
46
47/// For the case when there is one fixed-length string key.
48template <typename TData>
49struct SetMethodFixedString
50{
51 using Data = TData;
52 using Key = typename Data::key_type;
53
54 Data data;
55
56 using State = ColumnsHashing::HashMethodFixedString<typename Data::value_type, void, true, false>;
57};
58
59namespace set_impl
60{
61
62/// This class is designed to provide the functionality that is required for
63/// supporting nullable keys in SetMethodKeysFixed. If there are
64/// no nullable keys, this class is merely implemented as an empty shell.
65template <typename Key, bool has_nullable_keys>
66class BaseStateKeysFixed;
67
68/// Case where nullable keys are supported.
69template <typename Key>
70class BaseStateKeysFixed<Key, true>
71{
72protected:
73 void init(const ColumnRawPtrs & key_columns)
74 {
75 null_maps.reserve(key_columns.size());
76 actual_columns.reserve(key_columns.size());
77
78 for (const auto & col : key_columns)
79 {
80 if (auto * nullable = checkAndGetColumn<ColumnNullable>(*col))
81 {
82 actual_columns.push_back(&nullable->getNestedColumn());
83 null_maps.push_back(&nullable->getNullMapColumn());
84 }
85 else
86 {
87 actual_columns.push_back(col);
88 null_maps.push_back(nullptr);
89 }
90 }
91 }
92
93 /// Return the columns which actually contain the values of the keys.
94 /// For a given key column, if it is nullable, we return its nested
95 /// column. Otherwise we return the key column itself.
96 inline const ColumnRawPtrs & getActualColumns() const
97 {
98 return actual_columns;
99 }
100
101 /// Create a bitmap that indicates whether, for a particular row,
102 /// a key column bears a null value or not.
103 KeysNullMap<Key> createBitmap(size_t row) const
104 {
105 KeysNullMap<Key> bitmap{};
106
107 for (size_t k = 0; k < null_maps.size(); ++k)
108 {
109 if (null_maps[k] != nullptr)
110 {
111 const auto & null_map = assert_cast<const ColumnUInt8 &>(*null_maps[k]).getData();
112 if (null_map[row] == 1)
113 {
114 size_t bucket = k / 8;
115 size_t offset = k % 8;
116 bitmap[bucket] |= UInt8(1) << offset;
117 }
118 }
119 }
120
121 return bitmap;
122 }
123
124private:
125 ColumnRawPtrs actual_columns;
126 ColumnRawPtrs null_maps;
127};
128
129/// Case where nullable keys are not supported.
130template <typename Key>
131class BaseStateKeysFixed<Key, false>
132{
133protected:
134 void init(const ColumnRawPtrs &)
135 {
136 throw Exception{"Internal error: calling init() for non-nullable"
137 " keys is forbidden", ErrorCodes::LOGICAL_ERROR};
138 }
139
140 const ColumnRawPtrs & getActualColumns() const
141 {
142 throw Exception{"Internal error: calling getActualColumns() for non-nullable"
143 " keys is forbidden", ErrorCodes::LOGICAL_ERROR};
144 }
145
146 KeysNullMap<Key> createBitmap(size_t) const
147 {
148 throw Exception{"Internal error: calling createBitmap() for non-nullable keys"
149 " is forbidden", ErrorCodes::LOGICAL_ERROR};
150 }
151};
152
153}
154
155/// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits.
156template <typename TData, bool has_nullable_keys_ = false>
157struct SetMethodKeysFixed
158{
159 using Data = TData;
160 using Key = typename Data::key_type;
161 static constexpr bool has_nullable_keys = has_nullable_keys_;
162
163 Data data;
164
165 using State = ColumnsHashing::HashMethodKeysFixed<typename Data::value_type, Key, void, has_nullable_keys, false>;
166};
167
168/// For other cases. 128 bit hash from the key.
169template <typename TData>
170struct SetMethodHashed
171{
172 using Data = TData;
173 using Key = typename Data::key_type;
174
175 Data data;
176
177 using State = ColumnsHashing::HashMethodHashed<typename Data::value_type, void>;
178};
179
180
181/** Different implementations of the set.
182 */
183struct NonClearableSet
184{
185 /*
186 * As in Aggregator, using consecutive keys cache doesn't improve performance
187 * for FixedHashTables.
188 */
189 std::unique_ptr<SetMethodOneNumber<UInt8, FixedHashSet<UInt8>, false /* use_cache */>> key8;
190 std::unique_ptr<SetMethodOneNumber<UInt16, FixedHashSet<UInt16>, false /* use_cache */>> key16;
191
192 /** Also for the experiment was tested the ability to use SmallSet,
193 * as long as the number of elements in the set is small (and, if necessary, converted to a full-fledged HashSet).
194 * But this experiment showed that there is an advantage only in rare cases.
195 */
196 std::unique_ptr<SetMethodOneNumber<UInt32, HashSet<UInt32, HashCRC32<UInt32>>>> key32;
197 std::unique_ptr<SetMethodOneNumber<UInt64, HashSet<UInt64, HashCRC32<UInt64>>>> key64;
198 std::unique_ptr<SetMethodString<HashSetWithSavedHash<StringRef>>> key_string;
199 std::unique_ptr<SetMethodFixedString<HashSetWithSavedHash<StringRef>>> key_fixed_string;
200 std::unique_ptr<SetMethodKeysFixed<HashSet<UInt128, UInt128HashCRC32>>> keys128;
201 std::unique_ptr<SetMethodKeysFixed<HashSet<UInt256, UInt256HashCRC32>>> keys256;
202 std::unique_ptr<SetMethodHashed<HashSet<UInt128, UInt128TrivialHash>>> hashed;
203
204 /// Support for nullable keys (for DISTINCT implementation).
205 std::unique_ptr<SetMethodKeysFixed<HashSet<UInt128, UInt128HashCRC32>, true>> nullable_keys128;
206 std::unique_ptr<SetMethodKeysFixed<HashSet<UInt256, UInt256HashCRC32>, true>> nullable_keys256;
207 /** Unlike Aggregator, `concat` method is not used here.
208 * This is done because `hashed` method, although slower, but in this case, uses less RAM.
209 * since when you use it, the key values themselves are not stored.
210 */
211};
212
213struct ClearableSet
214{
215 std::unique_ptr<SetMethodOneNumber<UInt8, FixedClearableHashSet<UInt8>, false /* use_cache */>> key8;
216 std::unique_ptr<SetMethodOneNumber<UInt16, FixedClearableHashSet<UInt16>, false /*use_cache */>> key16;
217
218 std::unique_ptr<SetMethodOneNumber<UInt32, ClearableHashSet<UInt32, HashCRC32<UInt32>>>> key32;
219 std::unique_ptr<SetMethodOneNumber<UInt64, ClearableHashSet<UInt64, HashCRC32<UInt64>>>> key64;
220 std::unique_ptr<SetMethodString<ClearableHashSetWithSavedHash<StringRef>>> key_string;
221 std::unique_ptr<SetMethodFixedString<ClearableHashSetWithSavedHash<StringRef>>> key_fixed_string;
222 std::unique_ptr<SetMethodKeysFixed<ClearableHashSet<UInt128, UInt128HashCRC32>>> keys128;
223 std::unique_ptr<SetMethodKeysFixed<ClearableHashSet<UInt256, UInt256HashCRC32>>> keys256;
224 std::unique_ptr<SetMethodHashed<ClearableHashSet<UInt128, UInt128TrivialHash>>> hashed;
225
226 /// Support for nullable keys (for DISTINCT implementation).
227 std::unique_ptr<SetMethodKeysFixed<ClearableHashSet<UInt128, UInt128HashCRC32>, true>> nullable_keys128;
228 std::unique_ptr<SetMethodKeysFixed<ClearableHashSet<UInt256, UInt256HashCRC32>, true>> nullable_keys256;
229 /** Unlike Aggregator, `concat` method is not used here.
230 * This is done because `hashed` method, although slower, but in this case, uses less RAM.
231 * since when you use it, the key values themselves are not stored.
232 */
233};
234
235template <typename Variant>
236struct SetVariantsTemplate: public Variant
237{
238 Arena string_pool;
239
240 #define APPLY_FOR_SET_VARIANTS(M) \
241 M(key8) \
242 M(key16) \
243 M(key32) \
244 M(key64) \
245 M(key_string) \
246 M(key_fixed_string) \
247 M(keys128) \
248 M(keys256) \
249 M(nullable_keys128) \
250 M(nullable_keys256) \
251 M(hashed)
252
253 #define M(NAME) using Variant::NAME;
254 APPLY_FOR_SET_VARIANTS(M)
255 #undef M
256
257 enum class Type
258 {
259 EMPTY,
260
261 #define M(NAME) NAME,
262 APPLY_FOR_SET_VARIANTS(M)
263 #undef M
264 };
265
266 Type type = Type::EMPTY;
267
268 bool empty() const { return type == Type::EMPTY; }
269
270 static Type chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_sizes);
271
272 void init(Type type_);
273
274 size_t getTotalRowCount() const;
275 /// Counts the size in bytes of the Set buffer and the size of the `string_pool`
276 size_t getTotalByteCount() const;
277};
278
279using SetVariants = SetVariantsTemplate<NonClearableSet>;
280using ClearableSetVariants = SetVariantsTemplate<ClearableSet>;
281
282}
283