| 1 | //===----------------------------------------------------------------------===// |
| 2 | // DuckDB |
| 3 | // |
| 4 | // duckdb/common/bitpacking.hpp |
| 5 | // |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #pragma once |
| 10 | |
| 11 | #include "bitpackinghelpers.h" |
| 12 | #include "duckdb/common/assert.hpp" |
| 13 | #include "duckdb/common/exception.hpp" |
| 14 | #include "duckdb/common/helper.hpp" |
| 15 | #include "duckdb/common/limits.hpp" |
| 16 | |
| 17 | namespace duckdb { |
| 18 | |
| 19 | using bitpacking_width_t = uint8_t; |
| 20 | |
| 21 | class BitpackingPrimitives { |
| 22 | |
| 23 | public: |
| 24 | static constexpr const idx_t BITPACKING_ALGORITHM_GROUP_SIZE = 32; |
| 25 | static constexpr const idx_t = sizeof(uint64_t); |
| 26 | static constexpr const bool BYTE_ALIGNED = false; |
| 27 | |
| 28 | // To ensure enough data is available, use GetRequiredSize() to determine the correct size for dst buffer |
| 29 | // Note: input should be aligned to BITPACKING_ALGORITHM_GROUP_SIZE for good performance. |
| 30 | template <class T, bool ASSUME_INPUT_ALIGNED = false> |
| 31 | inline static void PackBuffer(data_ptr_t dst, T *src, idx_t count, bitpacking_width_t width) { |
| 32 | if (ASSUME_INPUT_ALIGNED) { |
| 33 | for (idx_t i = 0; i < count; i += BITPACKING_ALGORITHM_GROUP_SIZE) { |
| 34 | PackGroup<T>(dst + (i * width) / 8, src + i, width); |
| 35 | } |
| 36 | } else { |
| 37 | idx_t misaligned_count = count % BITPACKING_ALGORITHM_GROUP_SIZE; |
| 38 | T tmp_buffer[BITPACKING_ALGORITHM_GROUP_SIZE]; // TODO maybe faster on the heap? |
| 39 | |
| 40 | if (misaligned_count) { |
| 41 | count -= misaligned_count; |
| 42 | } |
| 43 | |
| 44 | for (idx_t i = 0; i < count; i += BITPACKING_ALGORITHM_GROUP_SIZE) { |
| 45 | PackGroup<T>(dst + (i * width) / 8, src + i, width); |
| 46 | } |
| 47 | |
| 48 | // Input was not aligned to BITPACKING_ALGORITHM_GROUP_SIZE, we need a copy |
| 49 | if (misaligned_count) { |
| 50 | memcpy(tmp_buffer, src + count, misaligned_count * sizeof(T)); |
| 51 | PackGroup<T>(dst + (count * width) / 8, tmp_buffer, width); |
| 52 | } |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | // Unpacks a block of BITPACKING_ALGORITHM_GROUP_SIZE values |
| 57 | // Assumes both src and dst to be of the correct size |
| 58 | template <class T> |
| 59 | inline static void UnPackBuffer(data_ptr_t dst, data_ptr_t src, idx_t count, bitpacking_width_t width, |
| 60 | bool skip_sign_extension = false) { |
| 61 | |
| 62 | for (idx_t i = 0; i < count; i += BITPACKING_ALGORITHM_GROUP_SIZE) { |
| 63 | UnPackGroup<T>(dst + i * sizeof(T), src + (i * width) / 8, width, skip_sign_extension); |
| 64 | } |
| 65 | } |
| 66 | |
| 67 | // Packs a block of BITPACKING_ALGORITHM_GROUP_SIZE values |
| 68 | template <class T> |
| 69 | inline static void PackBlock(data_ptr_t dst, T *src, bitpacking_width_t width) { |
| 70 | return PackGroup<T>(dst, src, width); |
| 71 | } |
| 72 | |
| 73 | // Unpacks a block of BITPACKING_ALGORITHM_GROUP_SIZE values |
| 74 | template <class T> |
| 75 | inline static void UnPackBlock(data_ptr_t dst, data_ptr_t src, bitpacking_width_t width, |
| 76 | bool skip_sign_extension = false) { |
| 77 | return UnPackGroup<T>(dst, src, width, skip_sign_extension); |
| 78 | } |
| 79 | |
| 80 | // Calculates the minimum required number of bits per value that can store all values |
| 81 | template <class T> |
| 82 | inline static bitpacking_width_t MinimumBitWidth(T value) { |
| 83 | return FindMinimumBitWidth<T, BYTE_ALIGNED>(value, value); |
| 84 | } |
| 85 | |
| 86 | // Calculates the minimum required number of bits per value that can store all values |
| 87 | template <class T> |
| 88 | inline static bitpacking_width_t MinimumBitWidth(T *values, idx_t count) { |
| 89 | return FindMinimumBitWidth<T, BYTE_ALIGNED>(values, count); |
| 90 | } |
| 91 | |
| 92 | // Calculates the minimum required number of bits per value that can store all values, |
| 93 | // given a predetermined minimum and maximum value of the buffer |
| 94 | template <class T> |
| 95 | inline static bitpacking_width_t MinimumBitWidth(T minimum, T maximum) { |
| 96 | return FindMinimumBitWidth<T, BYTE_ALIGNED>(minimum, maximum); |
| 97 | } |
| 98 | |
| 99 | inline static idx_t GetRequiredSize(idx_t count, bitpacking_width_t width) { |
| 100 | count = RoundUpToAlgorithmGroupSize(num_to_round: count); |
| 101 | return ((count * width) / 8); |
| 102 | } |
| 103 | |
| 104 | template <class T> |
| 105 | inline static T RoundUpToAlgorithmGroupSize(T num_to_round) { |
| 106 | int remainder = num_to_round % BITPACKING_ALGORITHM_GROUP_SIZE; |
| 107 | if (remainder == 0) { |
| 108 | return num_to_round; |
| 109 | } |
| 110 | |
| 111 | return num_to_round + BITPACKING_ALGORITHM_GROUP_SIZE - remainder; |
| 112 | } |
| 113 | |
| 114 | private: |
| 115 | template <class T, bool round_to_next_byte = false> |
| 116 | static bitpacking_width_t FindMinimumBitWidth(T *values, idx_t count) { |
| 117 | T min_value = values[0]; |
| 118 | T max_value = values[0]; |
| 119 | |
| 120 | for (idx_t i = 1; i < count; i++) { |
| 121 | if (values[i] > max_value) { |
| 122 | max_value = values[i]; |
| 123 | } |
| 124 | |
| 125 | if (std::is_signed<T>::value) { |
| 126 | if (values[i] < min_value) { |
| 127 | min_value = values[i]; |
| 128 | } |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | return FindMinimumBitWidth<T, round_to_next_byte>(min_value, max_value); |
| 133 | } |
| 134 | |
| 135 | template <class T, bool round_to_next_byte = false> |
| 136 | static bitpacking_width_t FindMinimumBitWidth(T min_value, T max_value) { |
| 137 | bitpacking_width_t bitwidth; |
| 138 | T value; |
| 139 | |
| 140 | if (std::is_signed<T>::value) { |
| 141 | if (min_value == NumericLimits<T>::Minimum()) { |
| 142 | // handle special case of the minimal value, as it cannot be negated like all other values. |
| 143 | return sizeof(T) * 8; |
| 144 | } else { |
| 145 | value = MaxValue((T)-min_value, max_value); |
| 146 | } |
| 147 | } else { |
| 148 | value = max_value; |
| 149 | } |
| 150 | |
| 151 | if (value == 0) { |
| 152 | return 0; |
| 153 | } |
| 154 | |
| 155 | if (std::is_signed<T>::value) { |
| 156 | bitwidth = 1; |
| 157 | } else { |
| 158 | bitwidth = 0; |
| 159 | } |
| 160 | |
| 161 | while (value) { |
| 162 | bitwidth++; |
| 163 | value >>= 1; |
| 164 | } |
| 165 | |
| 166 | bitwidth = GetEffectiveWidth<T>(bitwidth); |
| 167 | |
| 168 | // Assert results are correct |
| 169 | #ifdef DEBUG |
| 170 | if (bitwidth < sizeof(T) * 8 && bitwidth != 0) { |
| 171 | if (std::is_signed<T>::value) { |
| 172 | D_ASSERT((int64_t)max_value <= (int64_t)(1L << (bitwidth - 1)) - 1); |
| 173 | D_ASSERT((int64_t)min_value >= (int64_t)(-1 * ((1L << (bitwidth - 1)) - 1) - 1)); |
| 174 | } else { |
| 175 | D_ASSERT((uint64_t)max_value <= (uint64_t)(1L << (bitwidth)) - 1); |
| 176 | } |
| 177 | } |
| 178 | #endif |
| 179 | if (round_to_next_byte) { |
| 180 | return (bitwidth / 8 + (bitwidth % 8 != 0)) * 8; |
| 181 | } else { |
| 182 | return bitwidth; |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | // Sign bit extension |
| 187 | template <class T, class T_U = typename std::make_unsigned<T>::type> |
| 188 | static void SignExtend(data_ptr_t dst, bitpacking_width_t width) { |
| 189 | T const mask = ((T_U)1) << (width - 1); |
| 190 | for (idx_t i = 0; i < BitpackingPrimitives::BITPACKING_ALGORITHM_GROUP_SIZE; ++i) { |
| 191 | T value = Load<T>(dst + i * sizeof(T)); |
| 192 | value = value & ((((T_U)1) << width) - ((T_U)1)); |
| 193 | T result = (value ^ mask) - mask; |
| 194 | Store(result, dst + i * sizeof(T)); |
| 195 | } |
| 196 | } |
| 197 | |
| 198 | template <class T> |
| 199 | static void UnPackGroup(data_ptr_t dst, data_ptr_t src, bitpacking_width_t width, |
| 200 | bool skip_sign_extension = false) { |
| 201 | if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) { |
| 202 | duckdb_fastpforlib::fastunpack(in: (const uint8_t *)src, out: (uint8_t *)dst, bit: (uint32_t)width); |
| 203 | } else if (std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value) { |
| 204 | duckdb_fastpforlib::fastunpack(in: (const uint16_t *)src, out: (uint16_t *)dst, bit: (uint32_t)width); |
| 205 | } else if (std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value) { |
| 206 | duckdb_fastpforlib::fastunpack(in: (const uint32_t *)src, out: (uint32_t *)dst, bit: (uint32_t)width); |
| 207 | } else if (std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value) { |
| 208 | duckdb_fastpforlib::fastunpack(in: (const uint32_t *)src, out: (uint64_t *)dst, bit: (uint32_t)width); |
| 209 | } else { |
| 210 | throw InternalException("Unsupported type found in bitpacking." ); |
| 211 | } |
| 212 | |
| 213 | if (NumericLimits<T>::IsSigned() && !skip_sign_extension && width > 0 && width < sizeof(T) * 8) { |
| 214 | SignExtend<T>(dst, width); |
| 215 | } |
| 216 | } |
| 217 | |
| 218 | // Prevent compression at widths that are ineffective |
| 219 | template <class T> |
| 220 | static bitpacking_width_t GetEffectiveWidth(bitpacking_width_t width) { |
| 221 | auto bits_of_type = sizeof(T) * 8; |
| 222 | auto type_size = sizeof(T); |
| 223 | if (width + type_size > bits_of_type) { |
| 224 | return bits_of_type; |
| 225 | } |
| 226 | return width; |
| 227 | } |
| 228 | |
| 229 | template <class T> |
| 230 | static void PackGroup(data_ptr_t dst, T *values, bitpacking_width_t width) { |
| 231 | if (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value) { |
| 232 | duckdb_fastpforlib::fastpack(in: (const uint8_t *)values, out: (uint8_t *)dst, bit: (uint32_t)width); |
| 233 | } else if (std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value) { |
| 234 | duckdb_fastpforlib::fastpack(in: (const uint16_t *)values, out: (uint16_t *)dst, bit: (uint32_t)width); |
| 235 | } else if (std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value) { |
| 236 | duckdb_fastpforlib::fastpack(in: (const uint32_t *)values, out: (uint32_t *)dst, bit: (uint32_t)width); |
| 237 | } else if (std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value) { |
| 238 | duckdb_fastpforlib::fastpack(in: (const uint64_t *)values, out: (uint32_t *)dst, bit: (uint32_t)width); |
| 239 | } else { |
| 240 | throw InternalException("Unsupported type found in bitpacking." ); |
| 241 | } |
| 242 | } |
| 243 | }; |
| 244 | |
| 245 | } // namespace duckdb |
| 246 | |