| 1 | //===----------------------------------------------------------------------===// |
| 2 | // DuckDB |
| 3 | // |
| 4 | // miniz_wrapper.hpp |
| 5 | // |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #pragma once |
| 10 | |
| 11 | #include "miniz.hpp" |
| 12 | #include <string> |
| 13 | #include <stdexcept> |
| 14 | |
| 15 | namespace duckdb { |
| 16 | |
| 17 | enum class MiniZStreamType { |
| 18 | MINIZ_TYPE_NONE, |
| 19 | MINIZ_TYPE_INFLATE, |
| 20 | MINIZ_TYPE_DEFLATE |
| 21 | }; |
| 22 | |
| 23 | struct MiniZStream { |
| 24 | static constexpr uint8_t = 10; |
| 25 | static constexpr uint8_t = 8; |
| 26 | static constexpr uint8_t GZIP_COMPRESSION_DEFLATE = 0x08; |
| 27 | static constexpr unsigned char GZIP_FLAG_UNSUPPORTED = 0x1 | 0x2 | 0x4 | 0x10 | 0x20; |
| 28 | |
| 29 | public: |
| 30 | MiniZStream() : type(MiniZStreamType::MINIZ_TYPE_NONE) { |
| 31 | memset(s: &stream, c: 0, n: sizeof(duckdb_miniz::mz_stream)); |
| 32 | } |
| 33 | ~MiniZStream() { |
| 34 | switch(type) { |
| 35 | case MiniZStreamType::MINIZ_TYPE_INFLATE: |
| 36 | duckdb_miniz::mz_inflateEnd(pStream: &stream); |
| 37 | break; |
| 38 | case MiniZStreamType::MINIZ_TYPE_DEFLATE: |
| 39 | duckdb_miniz::mz_deflateEnd(pStream: &stream); |
| 40 | break; |
| 41 | default: |
| 42 | break; |
| 43 | } |
| 44 | } |
| 45 | void FormatException(std::string error_msg) { |
| 46 | throw std::runtime_error(error_msg); |
| 47 | } |
| 48 | void FormatException(const char *error_msg, int mz_ret) { |
| 49 | auto err = duckdb_miniz::mz_error(err: mz_ret); |
| 50 | FormatException(error_msg: error_msg + std::string(": " ) + (err ? err : "Unknown error code" )); |
| 51 | } |
| 52 | void Decompress(const char *compressed_data, size_t compressed_size, char *out_data, size_t out_size) { |
| 53 | auto mz_ret = mz_inflateInit2(pStream: &stream, window_bits: -MZ_DEFAULT_WINDOW_BITS); |
| 54 | if (mz_ret != duckdb_miniz::MZ_OK) { |
| 55 | FormatException(error_msg: "Failed to initialize miniz" , mz_ret); |
| 56 | } |
| 57 | type = MiniZStreamType::MINIZ_TYPE_INFLATE; |
| 58 | |
| 59 | if (compressed_size < GZIP_HEADER_MINSIZE) { |
| 60 | FormatException(error_msg: "Failed to decompress GZIP block: compressed size is less than gzip header size" ); |
| 61 | } |
| 62 | auto gzip_hdr = (const unsigned char *)compressed_data; |
| 63 | if (gzip_hdr[0] != 0x1F || gzip_hdr[1] != 0x8B || gzip_hdr[2] != GZIP_COMPRESSION_DEFLATE || |
| 64 | gzip_hdr[3] & GZIP_FLAG_UNSUPPORTED) { |
| 65 | FormatException(error_msg: "Input is invalid/unsupported GZIP stream" ); |
| 66 | } |
| 67 | |
| 68 | stream.next_in = (const unsigned char *)compressed_data + GZIP_HEADER_MINSIZE; |
| 69 | stream.avail_in = compressed_size - GZIP_HEADER_MINSIZE; |
| 70 | stream.next_out = (unsigned char *)out_data; |
| 71 | stream.avail_out = out_size; |
| 72 | |
| 73 | mz_ret = mz_inflate(pStream: &stream, flush: duckdb_miniz::MZ_FINISH); |
| 74 | if (mz_ret != duckdb_miniz::MZ_OK && mz_ret != duckdb_miniz::MZ_STREAM_END) { |
| 75 | FormatException(error_msg: "Failed to decompress GZIP block" , mz_ret); |
| 76 | } |
| 77 | } |
| 78 | size_t MaxCompressedLength(size_t input_size) { |
| 79 | return duckdb_miniz::mz_compressBound(source_len: input_size) + GZIP_HEADER_MINSIZE + GZIP_FOOTER_SIZE; |
| 80 | } |
| 81 | static void (unsigned char *) { |
| 82 | memset(s: gzip_header, c: 0, n: GZIP_HEADER_MINSIZE); |
| 83 | gzip_header[0] = 0x1F; |
| 84 | gzip_header[1] = 0x8B; |
| 85 | gzip_header[2] = GZIP_COMPRESSION_DEFLATE; |
| 86 | gzip_header[3] = 0; |
| 87 | gzip_header[4] = 0; |
| 88 | gzip_header[5] = 0; |
| 89 | gzip_header[6] = 0; |
| 90 | gzip_header[7] = 0; |
| 91 | gzip_header[8] = 0; |
| 92 | gzip_header[9] = 0xFF; |
| 93 | } |
| 94 | |
| 95 | static void (unsigned char *, duckdb_miniz::mz_ulong crc, idx_t uncompressed_size) { |
| 96 | gzip_footer[0] = crc & 0xFF; |
| 97 | gzip_footer[1] = (crc >> 8) & 0xFF; |
| 98 | gzip_footer[2] = (crc >> 16) & 0xFF; |
| 99 | gzip_footer[3] = (crc >> 24) & 0xFF; |
| 100 | gzip_footer[4] = uncompressed_size & 0xFF; |
| 101 | gzip_footer[5] = (uncompressed_size >> 8) & 0xFF; |
| 102 | gzip_footer[6] = (uncompressed_size >> 16) & 0xFF; |
| 103 | gzip_footer[7] = (uncompressed_size >> 24) & 0xFF; |
| 104 | } |
| 105 | |
| 106 | void Compress(const char *uncompressed_data, size_t uncompressed_size, char *out_data, size_t *out_size) { |
| 107 | auto mz_ret = mz_deflateInit2(pStream: &stream, level: duckdb_miniz::MZ_DEFAULT_LEVEL, MZ_DEFLATED, window_bits: -MZ_DEFAULT_WINDOW_BITS, mem_level: 1, strategy: 0); |
| 108 | if (mz_ret != duckdb_miniz::MZ_OK) { |
| 109 | FormatException(error_msg: "Failed to initialize miniz" , mz_ret); |
| 110 | } |
| 111 | type = MiniZStreamType::MINIZ_TYPE_DEFLATE; |
| 112 | |
| 113 | auto = (unsigned char*) out_data; |
| 114 | InitializeGZIPHeader(gzip_header); |
| 115 | |
| 116 | auto gzip_body = gzip_header + GZIP_HEADER_MINSIZE; |
| 117 | |
| 118 | stream.next_in = (const unsigned char*) uncompressed_data; |
| 119 | stream.avail_in = uncompressed_size; |
| 120 | stream.next_out = gzip_body; |
| 121 | stream.avail_out = *out_size - GZIP_HEADER_MINSIZE; |
| 122 | |
| 123 | mz_ret = mz_deflate(pStream: &stream, flush: duckdb_miniz::MZ_FINISH); |
| 124 | if (mz_ret != duckdb_miniz::MZ_OK && mz_ret != duckdb_miniz::MZ_STREAM_END) { |
| 125 | FormatException(error_msg: "Failed to compress GZIP block" , mz_ret); |
| 126 | } |
| 127 | auto = gzip_body + stream.total_out; |
| 128 | auto crc = duckdb_miniz::mz_crc32(MZ_CRC32_INIT, ptr: (const unsigned char*) uncompressed_data, buf_len: uncompressed_size); |
| 129 | InitializeGZIPFooter(gzip_footer, crc, uncompressed_size); |
| 130 | |
| 131 | *out_size = stream.total_out + GZIP_HEADER_MINSIZE + GZIP_FOOTER_SIZE; |
| 132 | } |
| 133 | |
| 134 | private: |
| 135 | duckdb_miniz::mz_stream stream; |
| 136 | MiniZStreamType type; |
| 137 | }; |
| 138 | |
| 139 | } |
| 140 | |