1 | //===----------------------------------------------------------------------===// |
2 | // DuckDB |
3 | // |
4 | // duckdb/common/bitpacking.hpp |
5 | // |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #pragma once |
10 | |
11 | #include "bitpackinghelpers.h" |
12 | #include "duckdb/common/assert.hpp" |
13 | #include "duckdb/common/exception.hpp" |
14 | #include "duckdb/common/helper.hpp" |
15 | #include "duckdb/common/limits.hpp" |
16 | |
17 | namespace duckdb { |
18 | |
19 | using bitpacking_width_t = uint8_t; |
20 | |
21 | class BitpackingPrimitives { |
22 | |
23 | public: |
24 | static constexpr const idx_t BITPACKING_ALGORITHM_GROUP_SIZE = 32; |
25 | static constexpr const idx_t = sizeof(uint64_t); |
26 | static constexpr const bool BYTE_ALIGNED = false; |
27 | |
28 | // To ensure enough data is available, use GetRequiredSize() to determine the correct size for dst buffer |
29 | // Note: input should be aligned to BITPACKING_ALGORITHM_GROUP_SIZE for good performance. |
30 | template <class T, bool ASSUME_INPUT_ALIGNED = false> |
31 | inline static void PackBuffer(data_ptr_t dst, T *src, idx_t count, bitpacking_width_t width) { |
32 | if (ASSUME_INPUT_ALIGNED) { |
33 | for (idx_t i = 0; i < count; i += BITPACKING_ALGORITHM_GROUP_SIZE) { |
34 | PackGroup<T>(dst + (i * width) / 8, src + i, width); |
35 | } |
36 | } else { |
37 | idx_t misaligned_count = count % BITPACKING_ALGORITHM_GROUP_SIZE; |
38 | T tmp_buffer[BITPACKING_ALGORITHM_GROUP_SIZE]; // TODO maybe faster on the heap? |
39 | |
40 | if (misaligned_count) { |
41 | count -= misaligned_count; |
42 | } |
43 | |
44 | for (idx_t i = 0; i < count; i += BITPACKING_ALGORITHM_GROUP_SIZE) { |
45 | PackGroup<T>(dst + (i * width) / 8, src + i, width); |
46 | } |
47 | |
48 | // Input was not aligned to BITPACKING_ALGORITHM_GROUP_SIZE, we need a copy |
49 | if (misaligned_count) { |
50 | memcpy(tmp_buffer, src + count, misaligned_count * sizeof(T)); |
51 | PackGroup<T>(dst + (count * width) / 8, tmp_buffer, width); |
52 | } |
53 | } |
54 | } |
55 | |
56 | // Unpacks a block of BITPACKING_ALGORITHM_GROUP_SIZE values |
57 | // Assumes both src and dst to be of the correct size |
58 | template <class T> |
59 | inline static void UnPackBuffer(data_ptr_t dst, data_ptr_t src, idx_t count, bitpacking_width_t width, |
60 | bool skip_sign_extension = false) { |
61 | |
62 | for (idx_t i = 0; i < count; i += BITPACKING_ALGORITHM_GROUP_SIZE) { |
63 | UnPackGroup<T>(dst + i * sizeof(T), src + (i * width) / 8, width, skip_sign_extension); |
64 | } |
65 | } |
66 | |
67 | // Packs a block of BITPACKING_ALGORITHM_GROUP_SIZE values |
68 | template <class T> |
69 | inline static void PackBlock(data_ptr_t dst, T *src, bitpacking_width_t width) { |
70 | return PackGroup<T>(dst, src, width); |
71 | } |
72 | |
73 | // Unpacks a block of BITPACKING_ALGORITHM_GROUP_SIZE values |
74 | template <class T> |
75 | inline static void UnPackBlock(data_ptr_t dst, data_ptr_t src, bitpacking_width_t width, |
76 | bool skip_sign_extension = false) { |
77 | return UnPackGroup<T>(dst, src, width, skip_sign_extension); |
78 | } |
79 | |
80 | // Calculates the minimum required number of bits per value that can store all values |
81 | template <class T> |
82 | inline static bitpacking_width_t MinimumBitWidth(T value) { |
83 | return FindMinimumBitWidth<T, BYTE_ALIGNED>(value, value); |
84 | } |
85 | |
86 | // Calculates the minimum required number of bits per value that can store all values |
87 | template <class T> |
88 | inline static bitpacking_width_t MinimumBitWidth(T *values, idx_t count) { |
89 | return FindMinimumBitWidth<T, BYTE_ALIGNED>(values, count); |
90 | } |
91 | |
92 | // Calculates the minimum required number of bits per value that can store all values, |
93 | // given a predetermined minimum and maximum value of the buffer |
94 | template <class T> |
95 | inline static bitpacking_width_t MinimumBitWidth(T minimum, T maximum) { |
96 | return FindMinimumBitWidth<T, BYTE_ALIGNED>(minimum, maximum); |
97 | } |
98 | |
99 | inline static idx_t GetRequiredSize(idx_t count, bitpacking_width_t width) { |
100 | count = RoundUpToAlgorithmGroupSize(num_to_round: count); |
101 | return ((count * width) / 8); |
102 | } |
103 | |
104 | template <class T> |
105 | inline static T RoundUpToAlgorithmGroupSize(T num_to_round) { |
106 | int remainder = num_to_round % BITPACKING_ALGORITHM_GROUP_SIZE; |
107 | if (remainder == 0) { |
108 | return num_to_round; |
109 | } |
110 | |
111 | return num_to_round + BITPACKING_ALGORITHM_GROUP_SIZE - remainder; |
112 | } |
113 | |
114 | private: |
115 | template <class T, bool round_to_next_byte = false> |
116 | static bitpacking_width_t FindMinimumBitWidth(T *values, idx_t count) { |
117 | T min_value = values[0]; |
118 | T max_value = values[0]; |
119 | |
120 | for (idx_t i = 1; i < count; i++) { |
121 | if (values[i] > max_value) { |
122 | max_value = values[i]; |
123 | } |
124 | |
125 | if (std::is_signed<T>::value) { |
126 | if (values[i] < min_value) { |
127 | min_value = values[i]; |
128 | } |
129 | } |
130 | } |
131 | |
132 | return FindMinimumBitWidth<T, round_to_next_byte>(min_value, max_value); |
133 | } |
134 | |
135 | template <class T, bool round_to_next_byte = false> |
136 | static bitpacking_width_t FindMinimumBitWidth(T min_value, T max_value) { |
137 | bitpacking_width_t bitwidth; |
138 | T value; |
139 | |
140 | if (std::is_signed<T>::value) { |
141 | if (min_value == NumericLimits<T>::Minimum()) { |
142 | // handle special case of the minimal value, as it cannot be negated like all other values. |
143 | return sizeof(T) * 8; |
144 | } else { |
145 | value = MaxValue((T)-min_value, max_value); |
146 | } |
147 | } else { |
148 | value = max_value; |
149 | } |
150 | |
151 | if (value == 0) { |
152 | return 0; |
153 | } |
154 | |
155 | if (std::is_signed<T>::value) { |
156 | bitwidth = 1; |
157 | } else { |
158 | bitwidth = 0; |
159 | } |
160 | |
161 | while (value) { |
162 | bitwidth++; |
163 | value >>= 1; |
164 | } |
165 | |
166 | bitwidth = GetEffectiveWidth<T>(bitwidth); |
167 | |
168 | // Assert results are correct |
169 | #ifdef DEBUG |
170 | if (bitwidth < sizeof(T) * 8 && bitwidth != 0) { |
171 | if (std::is_signed<T>::value) { |
172 | D_ASSERT((int64_t)max_value <= (int64_t)(1L << (bitwidth - 1)) - 1); |
173 | D_ASSERT((int64_t)min_value >= (int64_t)(-1 * ((1L << (bitwidth - 1)) - 1) - 1)); |
174 | } else { |
175 | D_ASSERT((uint64_t)max_value <= (uint64_t)(1L << (bitwidth)) - 1); |
176 | } |
177 | } |
178 | #endif |
179 | if (round_to_next_byte) { |
180 | return (bitwidth / 8 + (bitwidth % 8 != 0)) * 8; |
181 | } else { |
182 | return bitwidth; |
183 | } |
184 | } |
185 | |
186 | // Sign bit extension |
187 | template <class T, class T_U = typename std::make_unsigned<T>::type> |
188 | static void SignExtend(data_ptr_t dst, bitpacking_width_t width) { |
189 | T const mask = ((T_U)1) << (width - 1); |
190 | for (idx_t i = 0; i < BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE; ++i) { |
191 | T value = Load<T>(dst + i * sizeof(T)); |
192 | value = value & ((((T_U)1) << width) - ((T_U)1)); |
193 | T result = (value ^ mask) - mask; |
194 | Store(result, dst + i * sizeof(T)); |
195 | } |
196 | } |
197 | |
198 | template <class T> |
199 | static void UnPackGroup(data_ptr_t dst, data_ptr_t src, bitpacking_width_t width, |
200 | bool skip_sign_extension = false) { |
201 | if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) { |
202 | duckdb_fastpforlib::fastunpack(in: (const uint8_t *)src, out: (uint8_t *)dst, bit: (uint32_t)width); |
203 | } else if (std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value) { |
204 | duckdb_fastpforlib::fastunpack(in: (const uint16_t *)src, out: (uint16_t *)dst, bit: (uint32_t)width); |
205 | } else if (std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value) { |
206 | duckdb_fastpforlib::fastunpack(in: (const uint32_t *)src, out: (uint32_t *)dst, bit: (uint32_t)width); |
207 | } else if (std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value) { |
208 | duckdb_fastpforlib::fastunpack(in: (const uint32_t *)src, out: (uint64_t *)dst, bit: (uint32_t)width); |
209 | } else { |
210 | throw InternalException("Unsupported type found in bitpacking." ); |
211 | } |
212 | |
213 | if (NumericLimits<T>::IsSigned() && !skip_sign_extension && width > 0 && width < sizeof(T) * 8) { |
214 | SignExtend<T>(dst, width); |
215 | } |
216 | } |
217 | |
218 | // Prevent compression at widths that are ineffective |
219 | template <class T> |
220 | static bitpacking_width_t GetEffectiveWidth(bitpacking_width_t width) { |
221 | auto bits_of_type = sizeof(T) * 8; |
222 | auto type_size = sizeof(T); |
223 | if (width + type_size > bits_of_type) { |
224 | return bits_of_type; |
225 | } |
226 | return width; |
227 | } |
228 | |
229 | template <class T> |
230 | static void PackGroup(data_ptr_t dst, T *values, bitpacking_width_t width) { |
231 | if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) { |
232 | duckdb_fastpforlib::fastpack(in: (const uint8_t *)values, out: (uint8_t *)dst, bit: (uint32_t)width); |
233 | } else if (std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value) { |
234 | duckdb_fastpforlib::fastpack(in: (const uint16_t *)values, out: (uint16_t *)dst, bit: (uint32_t)width); |
235 | } else if (std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value) { |
236 | duckdb_fastpforlib::fastpack(in: (const uint32_t *)values, out: (uint32_t *)dst, bit: (uint32_t)width); |
237 | } else if (std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value) { |
238 | duckdb_fastpforlib::fastpack(in: (const uint64_t *)values, out: (uint32_t *)dst, bit: (uint32_t)width); |
239 | } else { |
240 | throw InternalException("Unsupported type found in bitpacking." ); |
241 | } |
242 | } |
243 | }; |
244 | |
245 | } // namespace duckdb |
246 | |