1//===----------------------------------------------------------------------===//
2// DuckDB
3//
4// duckdb/common/bitpacking.hpp
5//
6//
7//===----------------------------------------------------------------------===//
8
9#pragma once
10
11#include "bitpackinghelpers.h"
12#include "duckdb/common/assert.hpp"
13#include "duckdb/common/exception.hpp"
14#include "duckdb/common/helper.hpp"
15#include "duckdb/common/limits.hpp"
16
17namespace duckdb {
18
19using bitpacking_width_t = uint8_t;
20
21class BitpackingPrimitives {
22
23public:
24 static constexpr const idx_t BITPACKING_ALGORITHM_GROUP_SIZE = 32;
25 static constexpr const idx_t BITPACKING_HEADER_SIZE = sizeof(uint64_t);
26 static constexpr const bool BYTE_ALIGNED = false;
27
28 // To ensure enough data is available, use GetRequiredSize() to determine the correct size for dst buffer
29 // Note: input should be aligned to BITPACKING_ALGORITHM_GROUP_SIZE for good performance.
30 template <class T, bool ASSUME_INPUT_ALIGNED = false>
31 inline static void PackBuffer(data_ptr_t dst, T *src, idx_t count, bitpacking_width_t width) {
32 if (ASSUME_INPUT_ALIGNED) {
33 for (idx_t i = 0; i < count; i += BITPACKING_ALGORITHM_GROUP_SIZE) {
34 PackGroup<T>(dst + (i * width) / 8, src + i, width);
35 }
36 } else {
37 idx_t misaligned_count = count % BITPACKING_ALGORITHM_GROUP_SIZE;
38 T tmp_buffer[BITPACKING_ALGORITHM_GROUP_SIZE]; // TODO maybe faster on the heap?
39
40 if (misaligned_count) {
41 count -= misaligned_count;
42 }
43
44 for (idx_t i = 0; i < count; i += BITPACKING_ALGORITHM_GROUP_SIZE) {
45 PackGroup<T>(dst + (i * width) / 8, src + i, width);
46 }
47
48 // Input was not aligned to BITPACKING_ALGORITHM_GROUP_SIZE, we need a copy
49 if (misaligned_count) {
50 memcpy(tmp_buffer, src + count, misaligned_count * sizeof(T));
51 PackGroup<T>(dst + (count * width) / 8, tmp_buffer, width);
52 }
53 }
54 }
55
56 // Unpacks a block of BITPACKING_ALGORITHM_GROUP_SIZE values
57 // Assumes both src and dst to be of the correct size
58 template <class T>
59 inline static void UnPackBuffer(data_ptr_t dst, data_ptr_t src, idx_t count, bitpacking_width_t width,
60 bool skip_sign_extension = false) {
61
62 for (idx_t i = 0; i < count; i += BITPACKING_ALGORITHM_GROUP_SIZE) {
63 UnPackGroup<T>(dst + i * sizeof(T), src + (i * width) / 8, width, skip_sign_extension);
64 }
65 }
66
67 // Packs a block of BITPACKING_ALGORITHM_GROUP_SIZE values
68 template <class T>
69 inline static void PackBlock(data_ptr_t dst, T *src, bitpacking_width_t width) {
70 return PackGroup<T>(dst, src, width);
71 }
72
73 // Unpacks a block of BITPACKING_ALGORITHM_GROUP_SIZE values
74 template <class T>
75 inline static void UnPackBlock(data_ptr_t dst, data_ptr_t src, bitpacking_width_t width,
76 bool skip_sign_extension = false) {
77 return UnPackGroup<T>(dst, src, width, skip_sign_extension);
78 }
79
80 // Calculates the minimum required number of bits per value that can store all values
81 template <class T>
82 inline static bitpacking_width_t MinimumBitWidth(T value) {
83 return FindMinimumBitWidth<T, BYTE_ALIGNED>(value, value);
84 }
85
86 // Calculates the minimum required number of bits per value that can store all values
87 template <class T>
88 inline static bitpacking_width_t MinimumBitWidth(T *values, idx_t count) {
89 return FindMinimumBitWidth<T, BYTE_ALIGNED>(values, count);
90 }
91
92 // Calculates the minimum required number of bits per value that can store all values,
93 // given a predetermined minimum and maximum value of the buffer
94 template <class T>
95 inline static bitpacking_width_t MinimumBitWidth(T minimum, T maximum) {
96 return FindMinimumBitWidth<T, BYTE_ALIGNED>(minimum, maximum);
97 }
98
99 inline static idx_t GetRequiredSize(idx_t count, bitpacking_width_t width) {
100 count = RoundUpToAlgorithmGroupSize(num_to_round: count);
101 return ((count * width) / 8);
102 }
103
104 template <class T>
105 inline static T RoundUpToAlgorithmGroupSize(T num_to_round) {
106 int remainder = num_to_round % BITPACKING_ALGORITHM_GROUP_SIZE;
107 if (remainder == 0) {
108 return num_to_round;
109 }
110
111 return num_to_round + BITPACKING_ALGORITHM_GROUP_SIZE - remainder;
112 }
113
114private:
115 template <class T, bool round_to_next_byte = false>
116 static bitpacking_width_t FindMinimumBitWidth(T *values, idx_t count) {
117 T min_value = values[0];
118 T max_value = values[0];
119
120 for (idx_t i = 1; i < count; i++) {
121 if (values[i] > max_value) {
122 max_value = values[i];
123 }
124
125 if (std::is_signed<T>::value) {
126 if (values[i] < min_value) {
127 min_value = values[i];
128 }
129 }
130 }
131
132 return FindMinimumBitWidth<T, round_to_next_byte>(min_value, max_value);
133 }
134
135 template <class T, bool round_to_next_byte = false>
136 static bitpacking_width_t FindMinimumBitWidth(T min_value, T max_value) {
137 bitpacking_width_t bitwidth;
138 T value;
139
140 if (std::is_signed<T>::value) {
141 if (min_value == NumericLimits<T>::Minimum()) {
142 // handle special case of the minimal value, as it cannot be negated like all other values.
143 return sizeof(T) * 8;
144 } else {
145 value = MaxValue((T)-min_value, max_value);
146 }
147 } else {
148 value = max_value;
149 }
150
151 if (value == 0) {
152 return 0;
153 }
154
155 if (std::is_signed<T>::value) {
156 bitwidth = 1;
157 } else {
158 bitwidth = 0;
159 }
160
161 while (value) {
162 bitwidth++;
163 value >>= 1;
164 }
165
166 bitwidth = GetEffectiveWidth<T>(bitwidth);
167
168 // Assert results are correct
169#ifdef DEBUG
170 if (bitwidth < sizeof(T) * 8 && bitwidth != 0) {
171 if (std::is_signed<T>::value) {
172 D_ASSERT((int64_t)max_value <= (int64_t)(1L << (bitwidth - 1)) - 1);
173 D_ASSERT((int64_t)min_value >= (int64_t)(-1 * ((1L << (bitwidth - 1)) - 1) - 1));
174 } else {
175 D_ASSERT((uint64_t)max_value <= (uint64_t)(1L << (bitwidth)) - 1);
176 }
177 }
178#endif
179 if (round_to_next_byte) {
180 return (bitwidth / 8 + (bitwidth % 8 != 0)) * 8;
181 } else {
182 return bitwidth;
183 }
184 }
185
186 // Sign bit extension
187 template <class T, class T_U = typename std::make_unsigned<T>::type>
188 static void SignExtend(data_ptr_t dst, bitpacking_width_t width) {
189 T const mask = ((T_U)1) << (width - 1);
190 for (idx_t i = 0; i < BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE; ++i) {
191 T value = Load<T>(dst + i * sizeof(T));
192 value = value & ((((T_U)1) << width) - ((T_U)1));
193 T result = (value ^ mask) - mask;
194 Store(result, dst + i * sizeof(T));
195 }
196 }
197
198 template <class T>
199 static void UnPackGroup(data_ptr_t dst, data_ptr_t src, bitpacking_width_t width,
200 bool skip_sign_extension = false) {
201 if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) {
202 duckdb_fastpforlib::fastunpack(in: (const uint8_t *)src, out: (uint8_t *)dst, bit: (uint32_t)width);
203 } else if (std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value) {
204 duckdb_fastpforlib::fastunpack(in: (const uint16_t *)src, out: (uint16_t *)dst, bit: (uint32_t)width);
205 } else if (std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value) {
206 duckdb_fastpforlib::fastunpack(in: (const uint32_t *)src, out: (uint32_t *)dst, bit: (uint32_t)width);
207 } else if (std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value) {
208 duckdb_fastpforlib::fastunpack(in: (const uint32_t *)src, out: (uint64_t *)dst, bit: (uint32_t)width);
209 } else {
210 throw InternalException("Unsupported type found in bitpacking.");
211 }
212
213 if (NumericLimits<T>::IsSigned() && !skip_sign_extension && width > 0 && width < sizeof(T) * 8) {
214 SignExtend<T>(dst, width);
215 }
216 }
217
218 // Prevent compression at widths that are ineffective
219 template <class T>
220 static bitpacking_width_t GetEffectiveWidth(bitpacking_width_t width) {
221 auto bits_of_type = sizeof(T) * 8;
222 auto type_size = sizeof(T);
223 if (width + type_size > bits_of_type) {
224 return bits_of_type;
225 }
226 return width;
227 }
228
229 template <class T>
230 static void PackGroup(data_ptr_t dst, T *values, bitpacking_width_t width) {
231 if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) {
232 duckdb_fastpforlib::fastpack(in: (const uint8_t *)values, out: (uint8_t *)dst, bit: (uint32_t)width);
233 } else if (std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value) {
234 duckdb_fastpforlib::fastpack(in: (const uint16_t *)values, out: (uint16_t *)dst, bit: (uint32_t)width);
235 } else if (std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value) {
236 duckdb_fastpforlib::fastpack(in: (const uint32_t *)values, out: (uint32_t *)dst, bit: (uint32_t)width);
237 } else if (std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value) {
238 duckdb_fastpforlib::fastpack(in: (const uint64_t *)values, out: (uint32_t *)dst, bit: (uint32_t)width);
239 } else {
240 throw InternalException("Unsupported type found in bitpacking.");
241 }
242 }
243};
244
245} // namespace duckdb
246