1#pragma once
2
3#include <array>
4
5#include <Common/SipHash.h>
6#include <Common/Arena.h>
7#include <Common/UInt128.h>
8#include <Common/HashTable/Hash.h>
9#include <Common/memcpySmall.h>
10#include <Common/assert_cast.h>
11#include <Core/Defines.h>
12#include <common/StringRef.h>
13#include <Columns/IColumn.h>
14#include <Columns/ColumnsNumber.h>
15#include <Columns/ColumnFixedString.h>
16#include <Columns/ColumnLowCardinality.h>
17
18
19template <>
20struct DefaultHash<StringRef> : public StringRefHash {};
21
22
23namespace DB
24{
25
26using Sizes = std::vector<size_t>;
27
28/// When packing the values of nullable columns at a given row, we have to
29/// store the fact that these values are nullable or not. This is achieved
30/// by encoding this information as a bitmap. Let S be the size in bytes of
31/// a packed values binary blob and T the number of bytes we may place into
32/// this blob, the size that the bitmap shall occupy in the blob is equal to:
33/// ceil(T/8). Thus we must have: S = T + ceil(T/8). Below we indicate for
34/// each value of S, the corresponding value of T, and the bitmap size:
35///
36/// 32,28,4
37/// 16,14,2
38/// 8,7,1
39/// 4,3,1
40/// 2,1,1
41///
42
43namespace
44{
45
46template <typename T>
47constexpr auto getBitmapSize()
48{
49 return
50 (sizeof(T) == 32) ?
51 4 :
52 (sizeof(T) == 16) ?
53 2 :
54 ((sizeof(T) == 8) ?
55 1 :
56 ((sizeof(T) == 4) ?
57 1 :
58 ((sizeof(T) == 2) ?
59 1 :
60 0)));
61}
62
63}
64
65template <typename T>
66using KeysNullMap = std::array<UInt8, getBitmapSize<T>()>;
67
68/// Pack into a binary blob of type T a set of fixed-size keys. Granted that all the keys fit into the
69/// binary blob, they are disposed in it consecutively.
70template <typename T, bool has_low_cardinality = false>
71static inline T ALWAYS_INLINE packFixed(
72 size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes,
73 const ColumnRawPtrs * low_cardinality_positions [[maybe_unused]] = nullptr,
74 const Sizes * low_cardinality_sizes [[maybe_unused]] = nullptr)
75{
76 union
77 {
78 T key;
79 char bytes[sizeof(key)] = {};
80 };
81
82 size_t offset = 0;
83
84 for (size_t j = 0; j < keys_size; ++j)
85 {
86 size_t index = i;
87 const IColumn * column = key_columns[j];
88 if constexpr (has_low_cardinality)
89 {
90 if (const IColumn * positions = (*low_cardinality_positions)[j])
91 {
92 switch ((*low_cardinality_sizes)[j])
93 {
94 case sizeof(UInt8): index = assert_cast<const ColumnUInt8 *>(positions)->getElement(i); break;
95 case sizeof(UInt16): index = assert_cast<const ColumnUInt16 *>(positions)->getElement(i); break;
96 case sizeof(UInt32): index = assert_cast<const ColumnUInt32 *>(positions)->getElement(i); break;
97 case sizeof(UInt64): index = assert_cast<const ColumnUInt64 *>(positions)->getElement(i); break;
98 default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR);
99 }
100 }
101 }
102
103 switch (key_sizes[j])
104 {
105 case 1:
106 memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<1>() + index, 1);
107 offset += 1;
108 break;
109 case 2:
110 memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<2>() + index * 2, 2);
111 offset += 2;
112 break;
113 case 4:
114 memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<4>() + index * 4, 4);
115 offset += 4;
116 break;
117 case 8:
118 memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<8>() + index * 8, 8);
119 offset += 8;
120 break;
121 default:
122 memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(column)->getRawDataBegin<1>() + index * key_sizes[j], key_sizes[j]);
123 offset += key_sizes[j];
124 }
125 }
126
127 return key;
128}
129
130/// Similar as above but supports nullable values.
131template <typename T>
132static inline T ALWAYS_INLINE packFixed(
133 size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes,
134 const KeysNullMap<T> & bitmap)
135{
136 union
137 {
138 T key;
139 char bytes[sizeof(key)] = {};
140 };
141
142 size_t offset = 0;
143
144 static constexpr auto bitmap_size = std::tuple_size<KeysNullMap<T>>::value;
145 static constexpr bool has_bitmap = bitmap_size > 0;
146
147 if (has_bitmap)
148 {
149 memcpy(bytes + offset, bitmap.data(), bitmap_size * sizeof(UInt8));
150 offset += bitmap_size;
151 }
152
153 for (size_t j = 0; j < keys_size; ++j)
154 {
155 bool is_null;
156
157 if (!has_bitmap)
158 is_null = false;
159 else
160 {
161 size_t bucket = j / 8;
162 size_t off = j % 8;
163 is_null = ((bitmap[bucket] >> off) & 1) == 1;
164 }
165
166 if (is_null)
167 continue;
168
169 switch (key_sizes[j])
170 {
171 case 1:
172 memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<1>() + i, 1);
173 offset += 1;
174 break;
175 case 2:
176 memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<2>() + i * 2, 2);
177 offset += 2;
178 break;
179 case 4:
180 memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<4>() + i * 4, 4);
181 offset += 4;
182 break;
183 case 8:
184 memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<8>() + i * 8, 8);
185 offset += 8;
186 break;
187 default:
188 memcpy(bytes + offset, static_cast<const ColumnVectorHelper *>(key_columns[j])->getRawDataBegin<1>() + i * key_sizes[j], key_sizes[j]);
189 offset += key_sizes[j];
190 }
191 }
192
193 return key;
194}
195
196
197/// Hash a set of keys into a UInt128 value.
198static inline UInt128 ALWAYS_INLINE hash128(
199 size_t i, size_t keys_size, const ColumnRawPtrs & key_columns)
200{
201 UInt128 key;
202 SipHash hash;
203
204 for (size_t j = 0; j < keys_size; ++j)
205 key_columns[j]->updateHashWithValue(i, hash);
206
207 hash.get128(key.low, key.high);
208
209 return key;
210}
211
212
213/// Copy keys to the pool. Then put into pool StringRefs to them and return the pointer to the first.
214static inline StringRef * ALWAYS_INLINE placeKeysInPool(
215 size_t keys_size, StringRefs & keys, Arena & pool)
216{
217 for (size_t j = 0; j < keys_size; ++j)
218 {
219 char * place = pool.alloc(keys[j].size);
220 memcpySmallAllowReadWriteOverflow15(place, keys[j].data, keys[j].size);
221 keys[j].data = place;
222 }
223
224 /// Place the StringRefs on the newly copied keys in the pool.
225 char * res = pool.alignedAlloc(keys_size * sizeof(StringRef), alignof(StringRef));
226 memcpySmallAllowReadWriteOverflow15(res, keys.data(), keys_size * sizeof(StringRef));
227
228 return reinterpret_cast<StringRef *>(res);
229}
230
231
232/** Serialize keys into a continuous chunk of memory.
233 */
234static inline StringRef ALWAYS_INLINE serializeKeysToPoolContiguous(
235 size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, Arena & pool)
236{
237 const char * begin = nullptr;
238
239 size_t sum_size = 0;
240 for (size_t j = 0; j < keys_size; ++j)
241 sum_size += key_columns[j]->serializeValueIntoArena(i, pool, begin).size;
242
243 return {begin, sum_size};
244}
245
246
247}
248