1//===----------------------------------------------------------------------===//
2// DuckDB
3//
4// miniz_wrapper.hpp
5//
6//
7//===----------------------------------------------------------------------===//
8
9#pragma once
10
11#include "miniz.hpp"
12#include <string>
13#include <stdexcept>
14
15namespace duckdb {
16
17enum class MiniZStreamType {
18 MINIZ_TYPE_NONE,
19 MINIZ_TYPE_INFLATE,
20 MINIZ_TYPE_DEFLATE
21};
22
23struct MiniZStream {
24 static constexpr uint8_t GZIP_HEADER_MINSIZE = 10;
25 static constexpr uint8_t GZIP_FOOTER_SIZE = 8;
26 static constexpr uint8_t GZIP_COMPRESSION_DEFLATE = 0x08;
27 static constexpr unsigned char GZIP_FLAG_UNSUPPORTED = 0x1 | 0x2 | 0x4 | 0x10 | 0x20;
28
29public:
30 MiniZStream() : type(MiniZStreamType::MINIZ_TYPE_NONE) {
31 memset(s: &stream, c: 0, n: sizeof(duckdb_miniz::mz_stream));
32 }
33 ~MiniZStream() {
34 switch(type) {
35 case MiniZStreamType::MINIZ_TYPE_INFLATE:
36 duckdb_miniz::mz_inflateEnd(pStream: &stream);
37 break;
38 case MiniZStreamType::MINIZ_TYPE_DEFLATE:
39 duckdb_miniz::mz_deflateEnd(pStream: &stream);
40 break;
41 default:
42 break;
43 }
44 }
45 void FormatException(std::string error_msg) {
46 throw std::runtime_error(error_msg);
47 }
48 void FormatException(const char *error_msg, int mz_ret) {
49 auto err = duckdb_miniz::mz_error(err: mz_ret);
50 FormatException(error_msg: error_msg + std::string(": ") + (err ? err : "Unknown error code"));
51 }
52 void Decompress(const char *compressed_data, size_t compressed_size, char *out_data, size_t out_size) {
53 auto mz_ret = mz_inflateInit2(pStream: &stream, window_bits: -MZ_DEFAULT_WINDOW_BITS);
54 if (mz_ret != duckdb_miniz::MZ_OK) {
55 FormatException(error_msg: "Failed to initialize miniz", mz_ret);
56 }
57 type = MiniZStreamType::MINIZ_TYPE_INFLATE;
58
59 if (compressed_size < GZIP_HEADER_MINSIZE) {
60 FormatException(error_msg: "Failed to decompress GZIP block: compressed size is less than gzip header size");
61 }
62 auto gzip_hdr = (const unsigned char *)compressed_data;
63 if (gzip_hdr[0] != 0x1F || gzip_hdr[1] != 0x8B || gzip_hdr[2] != GZIP_COMPRESSION_DEFLATE ||
64 gzip_hdr[3] & GZIP_FLAG_UNSUPPORTED) {
65 FormatException(error_msg: "Input is invalid/unsupported GZIP stream");
66 }
67
68 stream.next_in = (const unsigned char *)compressed_data + GZIP_HEADER_MINSIZE;
69 stream.avail_in = compressed_size - GZIP_HEADER_MINSIZE;
70 stream.next_out = (unsigned char *)out_data;
71 stream.avail_out = out_size;
72
73 mz_ret = mz_inflate(pStream: &stream, flush: duckdb_miniz::MZ_FINISH);
74 if (mz_ret != duckdb_miniz::MZ_OK && mz_ret != duckdb_miniz::MZ_STREAM_END) {
75 FormatException(error_msg: "Failed to decompress GZIP block", mz_ret);
76 }
77 }
78 size_t MaxCompressedLength(size_t input_size) {
79 return duckdb_miniz::mz_compressBound(source_len: input_size) + GZIP_HEADER_MINSIZE + GZIP_FOOTER_SIZE;
80 }
81 static void InitializeGZIPHeader(unsigned char *gzip_header) {
82 memset(s: gzip_header, c: 0, n: GZIP_HEADER_MINSIZE);
83 gzip_header[0] = 0x1F;
84 gzip_header[1] = 0x8B;
85 gzip_header[2] = GZIP_COMPRESSION_DEFLATE;
86 gzip_header[3] = 0;
87 gzip_header[4] = 0;
88 gzip_header[5] = 0;
89 gzip_header[6] = 0;
90 gzip_header[7] = 0;
91 gzip_header[8] = 0;
92 gzip_header[9] = 0xFF;
93 }
94
95 static void InitializeGZIPFooter(unsigned char *gzip_footer, duckdb_miniz::mz_ulong crc, idx_t uncompressed_size) {
96 gzip_footer[0] = crc & 0xFF;
97 gzip_footer[1] = (crc >> 8) & 0xFF;
98 gzip_footer[2] = (crc >> 16) & 0xFF;
99 gzip_footer[3] = (crc >> 24) & 0xFF;
100 gzip_footer[4] = uncompressed_size & 0xFF;
101 gzip_footer[5] = (uncompressed_size >> 8) & 0xFF;
102 gzip_footer[6] = (uncompressed_size >> 16) & 0xFF;
103 gzip_footer[7] = (uncompressed_size >> 24) & 0xFF;
104 }
105
106 void Compress(const char *uncompressed_data, size_t uncompressed_size, char *out_data, size_t *out_size) {
107 auto mz_ret = mz_deflateInit2(pStream: &stream, level: duckdb_miniz::MZ_DEFAULT_LEVEL, MZ_DEFLATED, window_bits: -MZ_DEFAULT_WINDOW_BITS, mem_level: 1, strategy: 0);
108 if (mz_ret != duckdb_miniz::MZ_OK) {
109 FormatException(error_msg: "Failed to initialize miniz", mz_ret);
110 }
111 type = MiniZStreamType::MINIZ_TYPE_DEFLATE;
112
113 auto gzip_header = (unsigned char*) out_data;
114 InitializeGZIPHeader(gzip_header);
115
116 auto gzip_body = gzip_header + GZIP_HEADER_MINSIZE;
117
118 stream.next_in = (const unsigned char*) uncompressed_data;
119 stream.avail_in = uncompressed_size;
120 stream.next_out = gzip_body;
121 stream.avail_out = *out_size - GZIP_HEADER_MINSIZE;
122
123 mz_ret = mz_deflate(pStream: &stream, flush: duckdb_miniz::MZ_FINISH);
124 if (mz_ret != duckdb_miniz::MZ_OK && mz_ret != duckdb_miniz::MZ_STREAM_END) {
125 FormatException(error_msg: "Failed to compress GZIP block", mz_ret);
126 }
127 auto gzip_footer = gzip_body + stream.total_out;
128 auto crc = duckdb_miniz::mz_crc32(MZ_CRC32_INIT, ptr: (const unsigned char*) uncompressed_data, buf_len: uncompressed_size);
129 InitializeGZIPFooter(gzip_footer, crc, uncompressed_size);
130
131 *out_size = stream.total_out + GZIP_HEADER_MINSIZE + GZIP_FOOTER_SIZE;
132 }
133
134private:
135 duckdb_miniz::mz_stream stream;
136 MiniZStreamType type;
137};
138
139}
140