| 1 | #pragma once |
| 2 | |
| 3 | #include <Core/Types.h> |
| 4 | #include <Compression/ICompressionCodec.h> |
| 5 | |
| 6 | namespace DB |
| 7 | { |
| 8 | |
| 9 | /// Get 64 integer valuses, makes 64x64 bit matrix, transpose it and crop unused bits (most significant zeroes). |
| 10 | /// In example, if we have UInt8 with only 0 and 1 inside 64xUInt8 would be compressed into 1xUInt64. |
| 11 | /// It detects unused bits by calculating min and max values of data part, saving them in header in compression phase. |
| 12 | /// There's a special case with signed integers parts with crossing zero data. Here it stores one more bit to detect sign of value. |
| 13 | class CompressionCodecT64 : public ICompressionCodec |
| 14 | { |
| 15 | public: |
| 16 | static constexpr UInt32 = 1 + 2 * sizeof(UInt64); |
| 17 | static constexpr UInt32 MAX_COMPRESSED_BLOCK_SIZE = sizeof(UInt64) * 64; |
| 18 | |
| 19 | /// There're 2 compression variants: |
| 20 | /// Byte - transpose bit matrix by bytes (only the last not full byte is transposed by bits). It's default. |
| 21 | /// Bits - full bit-transpose of the bit matrix. It uses more resources and leads to better compression with ZSTD (but worse with LZ4). |
| 22 | enum class Variant |
| 23 | { |
| 24 | Byte, |
| 25 | Bit |
| 26 | }; |
| 27 | |
| 28 | CompressionCodecT64(TypeIndex type_idx_, Variant variant_) |
| 29 | : type_idx(type_idx_) |
| 30 | , variant(variant_) |
| 31 | {} |
| 32 | |
| 33 | UInt8 getMethodByte() const override; |
| 34 | String getCodecDesc() const override |
| 35 | { |
| 36 | return String("T64" ) + ((variant == Variant::Byte) ? "" : "(\'bit\')" ); |
| 37 | } |
| 38 | |
| 39 | void useInfoAboutType(DataTypePtr data_type) override; |
| 40 | |
| 41 | protected: |
| 42 | UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; |
| 43 | void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; |
| 44 | |
| 45 | UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override |
| 46 | { |
| 47 | /// uncompressed_size - (uncompressed_size % (sizeof(T) * 64)) + sizeof(UInt64) * sizeof(T) + header_size |
| 48 | return uncompressed_size + MAX_COMPRESSED_BLOCK_SIZE + HEADER_SIZE; |
| 49 | } |
| 50 | |
| 51 | private: |
| 52 | TypeIndex type_idx; |
| 53 | Variant variant; |
| 54 | }; |
| 55 | |
| 56 | class CompressionCodecFactory; |
| 57 | void registerCodecT64(CompressionCodecFactory & factory); |
| 58 | |
| 59 | } |
| 60 | |