1 | #pragma once |
2 | |
3 | #include <memory> |
4 | #include <IO/ReadBuffer.h> |
5 | #include <IO/WriteBuffer.h> |
6 | #include <IO/BufferWithOwnMemory.h> |
7 | #include <DataTypes/IDataType.h> |
8 | #include <boost/noncopyable.hpp> |
9 | #include <IO/UncompressedCache.h> |
10 | #include <Compression/LZ4_decompress_faster.h> |
11 | #include <Compression/CompressionInfo.h> |
12 | |
13 | namespace DB |
14 | { |
15 | |
16 | class ICompressionCodec; |
17 | |
18 | using CompressionCodecPtr = std::shared_ptr<ICompressionCodec>; |
19 | using Codecs = std::vector<CompressionCodecPtr>; |
20 | |
21 | /** |
22 | * Represents interface for compression codecs like LZ4, ZSTD, etc. |
23 | */ |
24 | class ICompressionCodec : private boost::noncopyable |
25 | { |
26 | public: |
27 | virtual ~ICompressionCodec() = default; |
28 | |
29 | /// Byte which indicates codec in compressed file |
30 | virtual UInt8 getMethodByte() const = 0; |
31 | |
32 | /// Codec description, for example "ZSTD(2)" or "LZ4,LZ4HC(5)" |
33 | virtual String getCodecDesc() const = 0; |
34 | |
35 | /// Compressed bytes from uncompressed source to dest. Dest should preallocate memory |
36 | UInt32 compress(const char * source, UInt32 source_size, char * dest) const; |
37 | |
38 | /// Decompress bytes from compressed source to dest. Dest should preallocate memory |
39 | UInt32 decompress(const char * source, UInt32 source_size, char * dest) const; |
40 | |
41 | /// Number of bytes, that will be used to compress uncompressed_size bytes with current codec |
42 | virtual UInt32 getCompressedReserveSize(UInt32 uncompressed_size) const { return getHeaderSize() + getMaxCompressedDataSize(uncompressed_size); } |
43 | |
44 | /// Some codecs (LZ4, for example) require additional bytes at end of buffer |
45 | virtual UInt32 getAdditionalSizeAtTheEndOfBuffer() const { return 0; } |
46 | |
47 | /// Size of header in compressed data on disk |
48 | static constexpr UInt8 () { return COMPRESSED_BLOCK_HEADER_SIZE; } |
49 | |
50 | /// Read size of compressed block from compressed source |
51 | static UInt32 readCompressedBlockSize(const char * source); |
52 | |
53 | /// Read size of decompressed block from compressed source |
54 | static UInt32 readDecompressedBlockSize(const char * source); |
55 | |
56 | /// Read method byte from compressed source |
57 | static UInt8 readMethod(const char * source); |
58 | |
59 | /// Some codecs may use information about column type which appears after codec creation |
60 | virtual void useInfoAboutType(DataTypePtr /* data_type */) { } |
61 | |
62 | protected: |
63 | |
64 | /// Return size of compressed data without header |
65 | virtual UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const { return uncompressed_size; } |
66 | |
67 | /// Actually compress data, without header |
68 | virtual UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const = 0; |
69 | |
70 | /// Actually decompress data without header |
71 | virtual void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const = 0; |
72 | }; |
73 | |
74 | } |
75 | |