| 1 | #include <cstring> | 
|---|
| 2 |  | 
|---|
| 3 | #include <Compression/CompressionCodecT64.h> | 
|---|
| 4 | #include <Compression/CompressionFactory.h> | 
|---|
| 5 | #include <common/unaligned.h> | 
|---|
| 6 | #include <Parsers/IAST.h> | 
|---|
| 7 | #include <Parsers/ASTLiteral.h> | 
|---|
| 8 | #include <IO/WriteHelpers.h> | 
|---|
| 9 |  | 
|---|
| 10 |  | 
|---|
| 11 | namespace DB | 
|---|
| 12 | { | 
|---|
| 13 |  | 
|---|
| 14 | namespace ErrorCodes | 
|---|
| 15 | { | 
|---|
| 16 | extern const int CANNOT_COMPRESS; | 
|---|
| 17 | extern const int CANNOT_DECOMPRESS; | 
|---|
| 18 | extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE; | 
|---|
| 19 | extern const int ILLEGAL_CODEC_PARAMETER; | 
|---|
| 20 | extern const int LOGICAL_ERROR; | 
|---|
| 21 | } | 
|---|
| 22 |  | 
|---|
| 23 | namespace | 
|---|
| 24 | { | 
|---|
| 25 |  | 
|---|
| 26 | UInt8 codecId() | 
|---|
| 27 | { | 
|---|
| 28 | return static_cast<UInt8>(CompressionMethodByte::T64); | 
|---|
| 29 | } | 
|---|
| 30 |  | 
|---|
| 31 | TypeIndex baseType(TypeIndex type_idx) | 
|---|
| 32 | { | 
|---|
| 33 | switch (type_idx) | 
|---|
| 34 | { | 
|---|
| 35 | case TypeIndex::Int8: | 
|---|
| 36 | return TypeIndex::Int8; | 
|---|
| 37 | case TypeIndex::Int16: | 
|---|
| 38 | return TypeIndex::Int16; | 
|---|
| 39 | case TypeIndex::Int32: | 
|---|
| 40 | case TypeIndex::Decimal32: | 
|---|
| 41 | return TypeIndex::Int32; | 
|---|
| 42 | case TypeIndex::Int64: | 
|---|
| 43 | case TypeIndex::Decimal64: | 
|---|
| 44 | return TypeIndex::Int64; | 
|---|
| 45 | case TypeIndex::UInt8: | 
|---|
| 46 | case TypeIndex::Enum8: | 
|---|
| 47 | return TypeIndex::UInt8; | 
|---|
| 48 | case TypeIndex::UInt16: | 
|---|
| 49 | case TypeIndex::Enum16: | 
|---|
| 50 | case TypeIndex::Date: | 
|---|
| 51 | return TypeIndex::UInt16; | 
|---|
| 52 | case TypeIndex::UInt32: | 
|---|
| 53 | case TypeIndex::DateTime: | 
|---|
| 54 | return TypeIndex::UInt32; | 
|---|
| 55 | case TypeIndex::UInt64: | 
|---|
| 56 | return TypeIndex::UInt64; | 
|---|
| 57 | default: | 
|---|
| 58 | break; | 
|---|
| 59 | } | 
|---|
| 60 |  | 
|---|
| 61 | return TypeIndex::Nothing; | 
|---|
| 62 | } | 
|---|
| 63 |  | 
|---|
| 64 | TypeIndex typeIdx(const DataTypePtr & data_type) | 
|---|
| 65 | { | 
|---|
| 66 | if (!data_type) | 
|---|
| 67 | return TypeIndex::Nothing; | 
|---|
| 68 |  | 
|---|
| 69 | WhichDataType which(*data_type); | 
|---|
| 70 | switch (which.idx) | 
|---|
| 71 | { | 
|---|
| 72 | case TypeIndex::Int8: | 
|---|
| 73 | case TypeIndex::UInt8: | 
|---|
| 74 | case TypeIndex::Enum8: | 
|---|
| 75 | case TypeIndex::Int16: | 
|---|
| 76 | case TypeIndex::UInt16: | 
|---|
| 77 | case TypeIndex::Enum16: | 
|---|
| 78 | case TypeIndex::Date: | 
|---|
| 79 | case TypeIndex::Int32: | 
|---|
| 80 | case TypeIndex::UInt32: | 
|---|
| 81 | case TypeIndex::DateTime: | 
|---|
| 82 | case TypeIndex::Decimal32: | 
|---|
| 83 | case TypeIndex::Int64: | 
|---|
| 84 | case TypeIndex::UInt64: | 
|---|
| 85 | case TypeIndex::Decimal64: | 
|---|
| 86 | return which.idx; | 
|---|
| 87 | default: | 
|---|
| 88 | break; | 
|---|
| 89 | } | 
|---|
| 90 |  | 
|---|
| 91 | return TypeIndex::Nothing; | 
|---|
| 92 | } | 
|---|
| 93 |  | 
|---|
| 94 | void transpose64x8(UInt64 * src_dst) | 
|---|
| 95 | { | 
|---|
| 96 | auto * src8 = reinterpret_cast<const UInt8 *>(src_dst); | 
|---|
| 97 | UInt64 dst[8] = {}; | 
|---|
| 98 |  | 
|---|
| 99 | for (UInt32 i = 0; i < 64; ++i) | 
|---|
| 100 | { | 
|---|
| 101 | UInt64 value = src8[i]; | 
|---|
| 102 | dst[0] |= (value & 0x1) << i; | 
|---|
| 103 | dst[1] |= ((value >> 1) & 0x1) << i; | 
|---|
| 104 | dst[2] |= ((value >> 2) & 0x1) << i; | 
|---|
| 105 | dst[3] |= ((value >> 3) & 0x1) << i; | 
|---|
| 106 | dst[4] |= ((value >> 4) & 0x1) << i; | 
|---|
| 107 | dst[5] |= ((value >> 5) & 0x1) << i; | 
|---|
| 108 | dst[6] |= ((value >> 6) & 0x1) << i; | 
|---|
| 109 | dst[7] |= ((value >> 7) & 0x1) << i; | 
|---|
| 110 | } | 
|---|
| 111 |  | 
|---|
| 112 | memcpy(src_dst, dst, 8 * sizeof(UInt64)); | 
|---|
| 113 | } | 
|---|
| 114 |  | 
|---|
| 115 | void reverseTranspose64x8(UInt64 * src_dst) | 
|---|
| 116 | { | 
|---|
| 117 | UInt8 dst8[64]; | 
|---|
| 118 |  | 
|---|
| 119 | for (UInt32 i = 0; i < 64; ++i) | 
|---|
| 120 | { | 
|---|
| 121 | dst8[i] = ((src_dst[0] >> i) & 0x1) | 
|---|
| 122 | | (((src_dst[1] >> i) & 0x1) << 1) | 
|---|
| 123 | | (((src_dst[2] >> i) & 0x1) << 2) | 
|---|
| 124 | | (((src_dst[3] >> i) & 0x1) << 3) | 
|---|
| 125 | | (((src_dst[4] >> i) & 0x1) << 4) | 
|---|
| 126 | | (((src_dst[5] >> i) & 0x1) << 5) | 
|---|
| 127 | | (((src_dst[6] >> i) & 0x1) << 6) | 
|---|
| 128 | | (((src_dst[7] >> i) & 0x1) << 7); | 
|---|
| 129 | } | 
|---|
| 130 |  | 
|---|
| 131 | memcpy(src_dst, dst8, 8 * sizeof(UInt64)); | 
|---|
| 132 | } | 
|---|
| 133 |  | 
|---|
| 134 | template <typename T> | 
|---|
| 135 | void transposeBytes(T value, UInt64 * matrix, UInt32 col) | 
|---|
| 136 | { | 
|---|
| 137 | UInt8 * matrix8 = reinterpret_cast<UInt8 *>(matrix); | 
|---|
| 138 | const UInt8 * value8 = reinterpret_cast<const UInt8 *>(&value); | 
|---|
| 139 |  | 
|---|
| 140 | if constexpr (sizeof(T) > 4) | 
|---|
| 141 | { | 
|---|
| 142 | matrix8[64 * 7 + col] = value8[7]; | 
|---|
| 143 | matrix8[64 * 6 + col] = value8[6]; | 
|---|
| 144 | matrix8[64 * 5 + col] = value8[5]; | 
|---|
| 145 | matrix8[64 * 4 + col] = value8[4]; | 
|---|
| 146 | } | 
|---|
| 147 |  | 
|---|
| 148 | if constexpr (sizeof(T) > 2) | 
|---|
| 149 | { | 
|---|
| 150 | matrix8[64 * 3 + col] = value8[3]; | 
|---|
| 151 | matrix8[64 * 2 + col] = value8[2]; | 
|---|
| 152 | } | 
|---|
| 153 |  | 
|---|
| 154 | if constexpr (sizeof(T) > 1) | 
|---|
| 155 | matrix8[64 * 1 + col] = value8[1]; | 
|---|
| 156 |  | 
|---|
| 157 | matrix8[64 * 0 + col] = value8[0]; | 
|---|
| 158 | } | 
|---|
| 159 |  | 
|---|
| 160 | template <typename T> | 
|---|
| 161 | void reverseTransposeBytes(const UInt64 * matrix, UInt32 col, T & value) | 
|---|
| 162 | { | 
|---|
| 163 | auto * matrix8 = reinterpret_cast<const UInt8 *>(matrix); | 
|---|
| 164 |  | 
|---|
| 165 | if constexpr (sizeof(T) > 4) | 
|---|
| 166 | { | 
|---|
| 167 | value |= UInt64(matrix8[64 * 7 + col]) << (8 * 7); | 
|---|
| 168 | value |= UInt64(matrix8[64 * 6 + col]) << (8 * 6); | 
|---|
| 169 | value |= UInt64(matrix8[64 * 5 + col]) << (8 * 5); | 
|---|
| 170 | value |= UInt64(matrix8[64 * 4 + col]) << (8 * 4); | 
|---|
| 171 | } | 
|---|
| 172 |  | 
|---|
| 173 | if constexpr (sizeof(T) > 2) | 
|---|
| 174 | { | 
|---|
| 175 | value |= UInt32(matrix8[64 * 3 + col]) << (8 * 3); | 
|---|
| 176 | value |= UInt32(matrix8[64 * 2 + col]) << (8 * 2); | 
|---|
| 177 | } | 
|---|
| 178 |  | 
|---|
| 179 | if constexpr (sizeof(T) > 1) | 
|---|
| 180 | value |= UInt32(matrix8[64 * 1 + col]) << (8 * 1); | 
|---|
| 181 |  | 
|---|
| 182 | value |= UInt32(matrix8[col]); | 
|---|
| 183 | } | 
|---|
| 184 |  | 
|---|
| 185 |  | 
|---|
| 186 | template <typename T> | 
|---|
| 187 | void load(const char * src, T * buf, UInt32 tail = 64) | 
|---|
| 188 | { | 
|---|
| 189 | memcpy(buf, src, tail * sizeof(T)); | 
|---|
| 190 | } | 
|---|
| 191 |  | 
|---|
| 192 | template <typename T> | 
|---|
| 193 | void store(const T * buf, char * dst, UInt32 tail = 64) | 
|---|
| 194 | { | 
|---|
| 195 | memcpy(dst, buf, tail * sizeof(T)); | 
|---|
| 196 | } | 
|---|
| 197 |  | 
|---|
| 198 | template <typename T> | 
|---|
| 199 | void clear(T * buf) | 
|---|
| 200 | { | 
|---|
| 201 | for (UInt32 i = 0; i < 64; ++i) | 
|---|
| 202 | buf[i] = 0; | 
|---|
| 203 | } | 
|---|
| 204 |  | 
|---|
| 205 |  | 
|---|
| 206 | /// UIntX[64] -> UInt64[N] transposed matrix, N <= X | 
|---|
| 207 | template <typename T, bool full = false> | 
|---|
| 208 | void transpose(const T * src, char * dst, UInt32 num_bits, UInt32 tail = 64) | 
|---|
| 209 | { | 
|---|
| 210 | UInt32 full_bytes = num_bits / 8; | 
|---|
| 211 | UInt32 part_bits = num_bits % 8; | 
|---|
| 212 |  | 
|---|
| 213 | UInt64 matrix[64] = {}; | 
|---|
| 214 | for (UInt32 col = 0; col < tail; ++col) | 
|---|
| 215 | transposeBytes(src[col], matrix, col); | 
|---|
| 216 |  | 
|---|
| 217 | if constexpr (full) | 
|---|
| 218 | { | 
|---|
| 219 | UInt64 * matrix_line = matrix; | 
|---|
| 220 | for (UInt32 byte = 0; byte < full_bytes; ++byte, matrix_line += 8) | 
|---|
| 221 | transpose64x8(matrix_line); | 
|---|
| 222 | } | 
|---|
| 223 |  | 
|---|
| 224 | UInt32 full_size = sizeof(UInt64) * (num_bits - part_bits); | 
|---|
| 225 | memcpy(dst, matrix, full_size); | 
|---|
| 226 | dst += full_size; | 
|---|
| 227 |  | 
|---|
| 228 | /// transpose only partially filled last byte | 
|---|
| 229 | if (part_bits) | 
|---|
| 230 | { | 
|---|
| 231 | UInt64 * matrix_line = &matrix[full_bytes * 8]; | 
|---|
| 232 | transpose64x8(matrix_line); | 
|---|
| 233 | memcpy(dst, matrix_line, part_bits * sizeof(UInt64)); | 
|---|
| 234 | } | 
|---|
| 235 | } | 
|---|
| 236 |  | 
|---|
| 237 | /// UInt64[N] transposed matrix -> UIntX[64] | 
|---|
| 238 | template <typename T, bool full = false> | 
|---|
| 239 | void reverseTranspose(const char * src, T * buf, UInt32 num_bits, UInt32 tail = 64) | 
|---|
| 240 | { | 
|---|
| 241 | UInt64 matrix[64] = {}; | 
|---|
| 242 | memcpy(matrix, src, num_bits * sizeof(UInt64)); | 
|---|
| 243 |  | 
|---|
| 244 | UInt32 full_bytes = num_bits / 8; | 
|---|
| 245 | UInt32 part_bits = num_bits % 8; | 
|---|
| 246 |  | 
|---|
| 247 | if constexpr (full) | 
|---|
| 248 | { | 
|---|
| 249 | UInt64 * matrix_line = matrix; | 
|---|
| 250 | for (UInt32 byte = 0; byte < full_bytes; ++byte, matrix_line += 8) | 
|---|
| 251 | reverseTranspose64x8(matrix_line); | 
|---|
| 252 | } | 
|---|
| 253 |  | 
|---|
| 254 | if (part_bits) | 
|---|
| 255 | { | 
|---|
| 256 | UInt64 * matrix_line = &matrix[full_bytes * 8]; | 
|---|
| 257 | reverseTranspose64x8(matrix_line); | 
|---|
| 258 | } | 
|---|
| 259 |  | 
|---|
| 260 | clear(buf); | 
|---|
| 261 | for (UInt32 col = 0; col < tail; ++col) | 
|---|
| 262 | reverseTransposeBytes(matrix, col, buf[col]); | 
|---|
| 263 | } | 
|---|
| 264 |  | 
|---|
| 265 | template <typename T, typename MinMaxT = std::conditional_t<is_signed_v<T>, Int64, UInt64>> | 
|---|
| 266 | void restoreUpperBits(T * buf, T upper_min, T upper_max [[maybe_unused]], T sign_bit [[maybe_unused]], UInt32 tail = 64) | 
|---|
| 267 | { | 
|---|
| 268 | if constexpr (is_signed_v<T>) | 
|---|
| 269 | { | 
|---|
| 270 | /// Restore some data as negatives and others as positives | 
|---|
| 271 | if (sign_bit) | 
|---|
| 272 | { | 
|---|
| 273 | for (UInt32 col = 0; col < tail; ++col) | 
|---|
| 274 | { | 
|---|
| 275 | T & value = buf[col]; | 
|---|
| 276 |  | 
|---|
| 277 | if (value & sign_bit) | 
|---|
| 278 | value |= upper_min; | 
|---|
| 279 | else | 
|---|
| 280 | value |= upper_max; | 
|---|
| 281 | } | 
|---|
| 282 |  | 
|---|
| 283 | return; | 
|---|
| 284 | } | 
|---|
| 285 | } | 
|---|
| 286 |  | 
|---|
| 287 | for (UInt32 col = 0; col < tail; ++col) | 
|---|
| 288 | buf[col] |= upper_min; | 
|---|
| 289 | } | 
|---|
| 290 |  | 
|---|
| 291 |  | 
|---|
| 292 | UInt32 getValuableBitsNumber(UInt64 min, UInt64 max) | 
|---|
| 293 | { | 
|---|
| 294 | UInt64 diff_bits = min ^ max; | 
|---|
| 295 | if (diff_bits) | 
|---|
| 296 | return 64 - __builtin_clzll(diff_bits); | 
|---|
| 297 | return 0; | 
|---|
| 298 | } | 
|---|
| 299 |  | 
|---|
| 300 | UInt32 getValuableBitsNumber(Int64 min, Int64 max) | 
|---|
| 301 | { | 
|---|
| 302 | if (min < 0 && max >= 0) | 
|---|
| 303 | { | 
|---|
| 304 | if (min + max >= 0) | 
|---|
| 305 | return getValuableBitsNumber(0ull, UInt64(max)) + 1; | 
|---|
| 306 | else | 
|---|
| 307 | return getValuableBitsNumber(0ull, UInt64(~min)) + 1; | 
|---|
| 308 | } | 
|---|
| 309 | else | 
|---|
| 310 | return getValuableBitsNumber(UInt64(min), UInt64(max)); | 
|---|
| 311 | } | 
|---|
| 312 |  | 
|---|
| 313 |  | 
|---|
| 314 | template <typename T> | 
|---|
| 315 | void findMinMax(const char * src, UInt32 src_size, T & min, T & max) | 
|---|
| 316 | { | 
|---|
| 317 | min = unalignedLoad<T>(src); | 
|---|
| 318 | max = unalignedLoad<T>(src); | 
|---|
| 319 |  | 
|---|
| 320 | const char * end = src + src_size; | 
|---|
| 321 | for (; src < end; src += sizeof(T)) | 
|---|
| 322 | { | 
|---|
| 323 | auto current = unalignedLoad<T>(src); | 
|---|
| 324 | if (current < min) | 
|---|
| 325 | min = current; | 
|---|
| 326 | if (current > max) | 
|---|
| 327 | max = current; | 
|---|
| 328 | } | 
|---|
| 329 | } | 
|---|
| 330 |  | 
|---|
| 331 |  | 
|---|
| 332 | using Variant = CompressionCodecT64::Variant; | 
|---|
| 333 |  | 
|---|
| 334 | template <typename T, bool full> | 
|---|
| 335 | UInt32 compressData(const char * src, UInt32 bytes_size, char * dst) | 
|---|
| 336 | { | 
|---|
| 337 | using MinMaxType = std::conditional_t<is_signed_v<T>, Int64, UInt64>; | 
|---|
| 338 |  | 
|---|
| 339 | static constexpr const UInt32 matrix_size = 64; | 
|---|
| 340 | static constexpr const UInt32  = 2 * sizeof(UInt64); | 
|---|
| 341 |  | 
|---|
| 342 | if (bytes_size % sizeof(T)) | 
|---|
| 343 | throw Exception( "Cannot compress, data size "+ toString(bytes_size) + " is not multiplier of "+ toString(sizeof(T)), | 
|---|
| 344 | ErrorCodes::CANNOT_COMPRESS); | 
|---|
| 345 |  | 
|---|
| 346 | UInt32 src_size = bytes_size / sizeof(T); | 
|---|
| 347 | UInt32 num_full = src_size / matrix_size; | 
|---|
| 348 | UInt32 tail = src_size % matrix_size; | 
|---|
| 349 |  | 
|---|
| 350 | T min, max; | 
|---|
| 351 | findMinMax<T>(src, bytes_size, min, max); | 
|---|
| 352 | MinMaxType min64 = min; | 
|---|
| 353 | MinMaxType max64 = max; | 
|---|
| 354 |  | 
|---|
| 355 | /// Write header | 
|---|
| 356 | { | 
|---|
| 357 | memcpy(dst, &min64, sizeof(MinMaxType)); | 
|---|
| 358 | memcpy(dst + 8, &max64, sizeof(MinMaxType)); | 
|---|
| 359 | dst += header_size; | 
|---|
| 360 | } | 
|---|
| 361 |  | 
|---|
| 362 | UInt32 num_bits = getValuableBitsNumber(min64, max64); | 
|---|
| 363 | if (!num_bits) | 
|---|
| 364 | return header_size; | 
|---|
| 365 |  | 
|---|
| 366 | T buf[matrix_size]; | 
|---|
| 367 | UInt32 src_shift = sizeof(T) * matrix_size; | 
|---|
| 368 | UInt32 dst_shift = sizeof(UInt64) * num_bits; | 
|---|
| 369 | for (UInt32 i = 0; i < num_full; ++i) | 
|---|
| 370 | { | 
|---|
| 371 | load<T>(src, buf, matrix_size); | 
|---|
| 372 | transpose<T, full>(buf, dst, num_bits); | 
|---|
| 373 | src += src_shift; | 
|---|
| 374 | dst += dst_shift; | 
|---|
| 375 | } | 
|---|
| 376 |  | 
|---|
| 377 | UInt32 dst_bytes = num_full * dst_shift; | 
|---|
| 378 |  | 
|---|
| 379 | if (tail) | 
|---|
| 380 | { | 
|---|
| 381 | load<T>(src, buf, tail); | 
|---|
| 382 | transpose<T, full>(buf, dst, num_bits, tail); | 
|---|
| 383 | dst_bytes += dst_shift; | 
|---|
| 384 | } | 
|---|
| 385 |  | 
|---|
| 386 | return header_size + dst_bytes; | 
|---|
| 387 | } | 
|---|
| 388 |  | 
|---|
| 389 | template <typename T, bool full> | 
|---|
| 390 | void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 uncompressed_size) | 
|---|
| 391 | { | 
|---|
| 392 | using MinMaxType = std::conditional_t<is_signed_v<T>, Int64, UInt64>; | 
|---|
| 393 |  | 
|---|
| 394 | static constexpr const UInt32 matrix_size = 64; | 
|---|
| 395 | static constexpr const UInt32  = 2 * sizeof(UInt64); | 
|---|
| 396 |  | 
|---|
| 397 | if (bytes_size < header_size) | 
|---|
| 398 | throw Exception( "Cannot decompress, data size "+ toString(bytes_size) + " is less then T64 header", | 
|---|
| 399 | ErrorCodes::CANNOT_DECOMPRESS); | 
|---|
| 400 |  | 
|---|
| 401 | if (uncompressed_size % sizeof(T)) | 
|---|
| 402 | throw Exception( "Cannot decompress, unexpected uncompressed size "+ toString(uncompressed_size), | 
|---|
| 403 | ErrorCodes::CANNOT_DECOMPRESS); | 
|---|
| 404 |  | 
|---|
| 405 | UInt64 num_elements = uncompressed_size / sizeof(T); | 
|---|
| 406 | MinMaxType min; | 
|---|
| 407 | MinMaxType max; | 
|---|
| 408 |  | 
|---|
| 409 | /// Read header | 
|---|
| 410 | { | 
|---|
| 411 | memcpy(&min, src, sizeof(MinMaxType)); | 
|---|
| 412 | memcpy(&max, src + 8, sizeof(MinMaxType)); | 
|---|
| 413 | src += header_size; | 
|---|
| 414 | bytes_size -= header_size; | 
|---|
| 415 | } | 
|---|
| 416 |  | 
|---|
| 417 | UInt32 num_bits = getValuableBitsNumber(min, max); | 
|---|
| 418 | if (!num_bits) | 
|---|
| 419 | { | 
|---|
| 420 | T min_value = min; | 
|---|
| 421 | for (UInt32 i = 0; i < num_elements; ++i, dst += sizeof(T)) | 
|---|
| 422 | unalignedStore<T>(dst, min_value); | 
|---|
| 423 | return; | 
|---|
| 424 | } | 
|---|
| 425 |  | 
|---|
| 426 | UInt32 src_shift = sizeof(UInt64) * num_bits; | 
|---|
| 427 | UInt32 dst_shift = sizeof(T) * matrix_size; | 
|---|
| 428 |  | 
|---|
| 429 | if (!bytes_size || bytes_size % src_shift) | 
|---|
| 430 | throw Exception( "Cannot decompress, data size "+ toString(bytes_size) + " is not multiplier of "+ toString(src_shift), | 
|---|
| 431 | ErrorCodes::CANNOT_DECOMPRESS); | 
|---|
| 432 |  | 
|---|
| 433 | UInt32 num_full = bytes_size / src_shift; | 
|---|
| 434 | UInt32 tail = num_elements % matrix_size; | 
|---|
| 435 | if (tail) | 
|---|
| 436 | --num_full; | 
|---|
| 437 |  | 
|---|
| 438 | T upper_min = 0; | 
|---|
| 439 | T upper_max [[maybe_unused]] = 0; | 
|---|
| 440 | T sign_bit [[maybe_unused]] = 0; | 
|---|
| 441 | if (num_bits < 64) | 
|---|
| 442 | upper_min = UInt64(min) >> num_bits << num_bits; | 
|---|
| 443 |  | 
|---|
| 444 | if constexpr (is_signed_v<T>) | 
|---|
| 445 | { | 
|---|
| 446 | if (min < 0 && max >= 0 && num_bits < 64) | 
|---|
| 447 | { | 
|---|
| 448 | sign_bit = 1ull << (num_bits - 1); | 
|---|
| 449 | upper_max = UInt64(max) >> num_bits << num_bits; | 
|---|
| 450 | } | 
|---|
| 451 | } | 
|---|
| 452 |  | 
|---|
| 453 | T buf[matrix_size]; | 
|---|
| 454 | for (UInt32 i = 0; i < num_full; ++i) | 
|---|
| 455 | { | 
|---|
| 456 | reverseTranspose<T, full>(src, buf, num_bits); | 
|---|
| 457 | restoreUpperBits(buf, upper_min, upper_max, sign_bit); | 
|---|
| 458 | store<T>(buf, dst, matrix_size); | 
|---|
| 459 | src += src_shift; | 
|---|
| 460 | dst += dst_shift; | 
|---|
| 461 | } | 
|---|
| 462 |  | 
|---|
| 463 | if (tail) | 
|---|
| 464 | { | 
|---|
| 465 | reverseTranspose<T, full>(src, buf, num_bits, tail); | 
|---|
| 466 | restoreUpperBits(buf, upper_min, upper_max, sign_bit, tail); | 
|---|
| 467 | store<T>(buf, dst, tail); | 
|---|
| 468 | } | 
|---|
| 469 | } | 
|---|
| 470 |  | 
|---|
| 471 | template <typename T> | 
|---|
| 472 | UInt32 compressData(const char * src, UInt32 src_size, char * dst, Variant variant) | 
|---|
| 473 | { | 
|---|
| 474 | if (variant == Variant::Bit) | 
|---|
| 475 | return compressData<T, true>(src, src_size, dst); | 
|---|
| 476 | return compressData<T, false>(src, src_size, dst); | 
|---|
| 477 | } | 
|---|
| 478 |  | 
|---|
| 479 | template <typename T> | 
|---|
| 480 | void decompressData(const char * src, UInt32 src_size, char * dst, UInt32 uncompressed_size, Variant variant) | 
|---|
| 481 | { | 
|---|
| 482 | if (variant == Variant::Bit) | 
|---|
| 483 | decompressData<T, true>(src, src_size, dst, uncompressed_size); | 
|---|
| 484 | else | 
|---|
| 485 | decompressData<T, false>(src, src_size, dst, uncompressed_size); | 
|---|
| 486 | } | 
|---|
| 487 |  | 
|---|
| 488 | } | 
|---|
| 489 |  | 
|---|
| 490 |  | 
|---|
| 491 | UInt32 CompressionCodecT64::doCompressData(const char * src, UInt32 src_size, char * dst) const | 
|---|
| 492 | { | 
|---|
| 493 | UInt8 cookie = static_cast<UInt8>(type_idx) | (static_cast<UInt8>(variant) << 7); | 
|---|
| 494 | memcpy(dst, &cookie, 1); | 
|---|
| 495 | dst += 1; | 
|---|
| 496 |  | 
|---|
| 497 | switch (baseType(type_idx)) | 
|---|
| 498 | { | 
|---|
| 499 | case TypeIndex::Int8: | 
|---|
| 500 | return 1 + compressData<Int8>(src, src_size, dst, variant); | 
|---|
| 501 | case TypeIndex::Int16: | 
|---|
| 502 | return 1 + compressData<Int16>(src, src_size, dst, variant); | 
|---|
| 503 | case TypeIndex::Int32: | 
|---|
| 504 | return 1 + compressData<Int32>(src, src_size, dst, variant); | 
|---|
| 505 | case TypeIndex::Int64: | 
|---|
| 506 | return 1 + compressData<Int64>(src, src_size, dst, variant); | 
|---|
| 507 | case TypeIndex::UInt8: | 
|---|
| 508 | return 1 + compressData<UInt8>(src, src_size, dst, variant); | 
|---|
| 509 | case TypeIndex::UInt16: | 
|---|
| 510 | return 1 + compressData<UInt16>(src, src_size, dst, variant); | 
|---|
| 511 | case TypeIndex::UInt32: | 
|---|
| 512 | return 1 + compressData<UInt32>(src, src_size, dst, variant); | 
|---|
| 513 | case TypeIndex::UInt64: | 
|---|
| 514 | return 1 + compressData<UInt64>(src, src_size, dst, variant); | 
|---|
| 515 | default: | 
|---|
| 516 | break; | 
|---|
| 517 | } | 
|---|
| 518 |  | 
|---|
| 519 | throw Exception( "Connot compress with T64", ErrorCodes::CANNOT_COMPRESS); | 
|---|
| 520 | } | 
|---|
| 521 |  | 
|---|
| 522 | void CompressionCodecT64::doDecompressData(const char * src, UInt32 src_size, char * dst, UInt32 uncompressed_size) const | 
|---|
| 523 | { | 
|---|
| 524 | if (!src_size) | 
|---|
| 525 | throw Exception( "Connot decompress with T64", ErrorCodes::CANNOT_DECOMPRESS); | 
|---|
| 526 |  | 
|---|
| 527 | UInt8 cookie = unalignedLoad<UInt8>(src); | 
|---|
| 528 | src += 1; | 
|---|
| 529 | src_size -= 1; | 
|---|
| 530 |  | 
|---|
| 531 | auto saved_variant = static_cast<Variant>(cookie >> 7); | 
|---|
| 532 | auto saved_type_id = static_cast<TypeIndex>(cookie & 0x7F); | 
|---|
| 533 |  | 
|---|
| 534 | switch (baseType(saved_type_id)) | 
|---|
| 535 | { | 
|---|
| 536 | case TypeIndex::Int8: | 
|---|
| 537 | return decompressData<Int8>(src, src_size, dst, uncompressed_size, saved_variant); | 
|---|
| 538 | case TypeIndex::Int16: | 
|---|
| 539 | return decompressData<Int16>(src, src_size, dst, uncompressed_size, saved_variant); | 
|---|
| 540 | case TypeIndex::Int32: | 
|---|
| 541 | return decompressData<Int32>(src, src_size, dst, uncompressed_size, saved_variant); | 
|---|
| 542 | case TypeIndex::Int64: | 
|---|
| 543 | return decompressData<Int64>(src, src_size, dst, uncompressed_size, saved_variant); | 
|---|
| 544 | case TypeIndex::UInt8: | 
|---|
| 545 | return decompressData<UInt8>(src, src_size, dst, uncompressed_size, saved_variant); | 
|---|
| 546 | case TypeIndex::UInt16: | 
|---|
| 547 | return decompressData<UInt16>(src, src_size, dst, uncompressed_size, saved_variant); | 
|---|
| 548 | case TypeIndex::UInt32: | 
|---|
| 549 | return decompressData<UInt32>(src, src_size, dst, uncompressed_size, saved_variant); | 
|---|
| 550 | case TypeIndex::UInt64: | 
|---|
| 551 | return decompressData<UInt64>(src, src_size, dst, uncompressed_size, saved_variant); | 
|---|
| 552 | default: | 
|---|
| 553 | break; | 
|---|
| 554 | } | 
|---|
| 555 |  | 
|---|
| 556 | throw Exception( "Connot decompress with T64", ErrorCodes::CANNOT_DECOMPRESS); | 
|---|
| 557 | } | 
|---|
| 558 |  | 
|---|
| 559 | void CompressionCodecT64::useInfoAboutType(DataTypePtr data_type) | 
|---|
| 560 | { | 
|---|
| 561 | if (data_type) | 
|---|
| 562 | { | 
|---|
| 563 | type_idx = typeIdx(data_type); | 
|---|
| 564 | if (type_idx == TypeIndex::Nothing) | 
|---|
| 565 | throw Exception( "T64 codec is not supported for specified type", ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE); | 
|---|
| 566 | } | 
|---|
| 567 | } | 
|---|
| 568 |  | 
|---|
| 569 | UInt8 CompressionCodecT64::getMethodByte() const | 
|---|
| 570 | { | 
|---|
| 571 | return codecId(); | 
|---|
| 572 | } | 
|---|
| 573 |  | 
|---|
| 574 | void registerCodecT64(CompressionCodecFactory & factory) | 
|---|
| 575 | { | 
|---|
| 576 | auto reg_func = [&](const ASTPtr & arguments, DataTypePtr type) -> CompressionCodecPtr | 
|---|
| 577 | { | 
|---|
| 578 | Variant variant = Variant::Byte; | 
|---|
| 579 |  | 
|---|
| 580 | if (arguments && !arguments->children.empty()) | 
|---|
| 581 | { | 
|---|
| 582 | if (arguments->children.size() > 1) | 
|---|
| 583 | throw Exception( "T64 support zero or one parameter, given "+ std::to_string(arguments->children.size()), | 
|---|
| 584 | ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE); | 
|---|
| 585 |  | 
|---|
| 586 | const auto children = arguments->children; | 
|---|
| 587 | const auto * literal = children[0]->as<ASTLiteral>(); | 
|---|
| 588 | if (!literal) | 
|---|
| 589 | throw Exception( "Wrong modification for T64. Expected: 'bit', 'byte')", | 
|---|
| 590 | ErrorCodes::ILLEGAL_CODEC_PARAMETER); | 
|---|
| 591 | String name = literal->value.safeGet<String>(); | 
|---|
| 592 |  | 
|---|
| 593 | if (name == "byte") | 
|---|
| 594 | variant = Variant::Byte; | 
|---|
| 595 | else if (name == "bit") | 
|---|
| 596 | variant = Variant::Bit; | 
|---|
| 597 | else | 
|---|
| 598 | throw Exception( "Wrong modification for T64: "+ name, ErrorCodes::ILLEGAL_CODEC_PARAMETER); | 
|---|
| 599 | } | 
|---|
| 600 |  | 
|---|
| 601 | auto type_idx = typeIdx(type); | 
|---|
| 602 | if (type && type_idx == TypeIndex::Nothing) | 
|---|
| 603 | throw Exception( "T64 codec is not supported for specified type", ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE); | 
|---|
| 604 | return std::make_shared<CompressionCodecT64>(type_idx, variant); | 
|---|
| 605 | }; | 
|---|
| 606 |  | 
|---|
| 607 | factory.registerCompressionCodecWithType( "T64", codecId(), reg_func); | 
|---|
| 608 | } | 
|---|
| 609 | } | 
|---|
| 610 |  | 
|---|