1 | #pragma once |
2 | |
3 | #include <Core/Types.h> |
4 | #include <Compression/ICompressionCodec.h> |
5 | |
6 | namespace DB |
7 | { |
8 | |
9 | /// Get 64 integer valuses, makes 64x64 bit matrix, transpose it and crop unused bits (most significant zeroes). |
10 | /// In example, if we have UInt8 with only 0 and 1 inside 64xUInt8 would be compressed into 1xUInt64. |
11 | /// It detects unused bits by calculating min and max values of data part, saving them in header in compression phase. |
12 | /// There's a special case with signed integers parts with crossing zero data. Here it stores one more bit to detect sign of value. |
13 | class CompressionCodecT64 : public ICompressionCodec |
14 | { |
15 | public: |
16 | static constexpr UInt32 = 1 + 2 * sizeof(UInt64); |
17 | static constexpr UInt32 MAX_COMPRESSED_BLOCK_SIZE = sizeof(UInt64) * 64; |
18 | |
19 | /// There're 2 compression variants: |
20 | /// Byte - transpose bit matrix by bytes (only the last not full byte is transposed by bits). It's default. |
21 | /// Bits - full bit-transpose of the bit matrix. It uses more resources and leads to better compression with ZSTD (but worse with LZ4). |
22 | enum class Variant |
23 | { |
24 | Byte, |
25 | Bit |
26 | }; |
27 | |
28 | CompressionCodecT64(TypeIndex type_idx_, Variant variant_) |
29 | : type_idx(type_idx_) |
30 | , variant(variant_) |
31 | {} |
32 | |
33 | UInt8 getMethodByte() const override; |
34 | String getCodecDesc() const override |
35 | { |
36 | return String("T64" ) + ((variant == Variant::Byte) ? "" : "(\'bit\')" ); |
37 | } |
38 | |
39 | void useInfoAboutType(DataTypePtr data_type) override; |
40 | |
41 | protected: |
42 | UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; |
43 | void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; |
44 | |
45 | UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override |
46 | { |
47 | /// uncompressed_size - (uncompressed_size % (sizeof(T) * 64)) + sizeof(UInt64) * sizeof(T) + header_size |
48 | return uncompressed_size + MAX_COMPRESSED_BLOCK_SIZE + HEADER_SIZE; |
49 | } |
50 | |
51 | private: |
52 | TypeIndex type_idx; |
53 | Variant variant; |
54 | }; |
55 | |
56 | class CompressionCodecFactory; |
57 | void registerCodecT64(CompressionCodecFactory & factory); |
58 | |
59 | } |
60 | |