1 | /* |
2 | * Copyright 2017-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | |
17 | #include <folly/compression/Zlib.h> |
18 | |
19 | #if FOLLY_HAVE_LIBZ |
20 | |
21 | #include <folly/Conv.h> |
22 | #include <folly/Optional.h> |
23 | #include <folly/Range.h> |
24 | #include <folly/ScopeGuard.h> |
25 | #include <folly/compression/Compression.h> |
26 | #include <folly/compression/Utils.h> |
27 | #include <folly/io/Cursor.h> |
28 | |
29 | using folly::io::compression::detail::dataStartsWithLE; |
30 | using folly::io::compression::detail::prefixToStringLE; |
31 | |
32 | namespace folly { |
33 | namespace io { |
34 | namespace zlib { |
35 | |
36 | namespace { |
37 | |
38 | bool isValidStrategy(int strategy) { |
39 | std::array<int, 5> strategies{{ |
40 | Z_DEFAULT_STRATEGY, |
41 | Z_FILTERED, |
42 | Z_HUFFMAN_ONLY, |
43 | Z_RLE, |
44 | Z_FIXED, |
45 | }}; |
46 | return std::any_of(strategies.begin(), strategies.end(), [&](int i) { |
47 | return i == strategy; |
48 | }); |
49 | } |
50 | |
51 | int getWindowBits(Options::Format format, int windowSize) { |
52 | switch (format) { |
53 | case Options::Format::ZLIB: |
54 | return windowSize; |
55 | case Options::Format::GZIP: |
56 | return windowSize + 16; |
57 | case Options::Format::RAW: |
58 | return -windowSize; |
59 | case Options::Format::AUTO: |
60 | return windowSize + 32; |
61 | default: |
62 | return windowSize; |
63 | } |
64 | } |
65 | |
66 | CodecType getCodecType(Options options) { |
67 | if (options.windowSize == 15 && options.format == Options::Format::ZLIB) { |
68 | return CodecType::ZLIB; |
69 | } else if ( |
70 | options.windowSize == 15 && options.format == Options::Format::GZIP) { |
71 | return CodecType::GZIP; |
72 | } else { |
73 | return CodecType::USER_DEFINED; |
74 | } |
75 | } |
76 | |
77 | class ZlibStreamCodec final : public StreamCodec { |
78 | public: |
79 | static std::unique_ptr<Codec> createCodec(Options options, int level); |
80 | static std::unique_ptr<StreamCodec> createStream(Options options, int level); |
81 | |
82 | explicit ZlibStreamCodec(Options options, int level); |
83 | ~ZlibStreamCodec() override; |
84 | |
85 | std::vector<std::string> validPrefixes() const override; |
86 | bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength) |
87 | const override; |
88 | |
89 | private: |
90 | uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override; |
91 | |
92 | void doResetStream() override; |
93 | bool doCompressStream( |
94 | ByteRange& input, |
95 | MutableByteRange& output, |
96 | StreamCodec::FlushOp flush) override; |
97 | bool doUncompressStream( |
98 | ByteRange& input, |
99 | MutableByteRange& output, |
100 | StreamCodec::FlushOp flush) override; |
101 | |
102 | void resetDeflateStream(); |
103 | void resetInflateStream(); |
104 | |
105 | Options options_; |
106 | |
107 | Optional<z_stream> deflateStream_{}; |
108 | Optional<z_stream> inflateStream_{}; |
109 | int level_; |
110 | bool needReset_{true}; |
111 | }; |
112 | static constexpr uint16_t kGZIPMagicLE = 0x8B1F; |
113 | |
114 | std::vector<std::string> ZlibStreamCodec::validPrefixes() const { |
115 | if (type() == CodecType::ZLIB) { |
116 | // Zlib streams start with a 2 byte header. |
117 | // |
118 | // 0 1 |
119 | // +---+---+ |
120 | // |CMF|FLG| |
121 | // +---+---+ |
122 | // |
123 | // We won't restrict the values of any sub-fields except as described below. |
124 | // |
125 | // The lowest 4 bits of CMF is the compression method (CM). |
126 | // CM == 0x8 is the deflate compression method, which is currently the only |
127 | // supported compression method, so any valid prefix must have CM == 0x8. |
128 | // |
129 | // The lowest 5 bits of FLG is FCHECK. |
130 | // FCHECK must be such that the two header bytes are a multiple of 31 when |
131 | // interpreted as a big endian 16-bit number. |
132 | std::vector<std::string> result; |
133 | // 16 values for the first byte, 8 values for the second byte. |
134 | // There are also 4 combinations where both 0x00 and 0x1F work as FCHECK. |
135 | result.reserve(132); |
136 | // Select all values for the CMF byte that use the deflate algorithm 0x8. |
137 | for (uint32_t first = 0x0800; first <= 0xF800; first += 0x1000) { |
138 | // Select all values for the FLG, but leave FCHECK as 0 since it's fixed. |
139 | for (uint32_t second = 0x00; second <= 0xE0; second += 0x20) { |
140 | uint16_t prefix = first | second; |
141 | // Compute FCHECK. |
142 | prefix += 31 - (prefix % 31); |
143 | result.push_back(prefixToStringLE(Endian::big(prefix))); |
144 | // zlib won't produce this, but it is a valid prefix. |
145 | if ((prefix & 0x1F) == 31) { |
146 | prefix -= 31; |
147 | result.push_back(prefixToStringLE(Endian::big(prefix))); |
148 | } |
149 | } |
150 | } |
151 | return result; |
152 | } else if (type() == CodecType::GZIP) { |
153 | // The gzip frame starts with 2 magic bytes. |
154 | return {prefixToStringLE(kGZIPMagicLE)}; |
155 | } else { |
156 | return {}; |
157 | } |
158 | } |
159 | |
160 | bool ZlibStreamCodec::canUncompress(const IOBuf* data, Optional<uint64_t>) |
161 | const { |
162 | if (type() == CodecType::ZLIB) { |
163 | uint16_t value; |
164 | Cursor cursor{data}; |
165 | if (!cursor.tryReadBE(value)) { |
166 | return false; |
167 | } |
168 | // zlib compressed if using deflate and is a multiple of 31. |
169 | return (value & 0x0F00) == 0x0800 && value % 31 == 0; |
170 | } else if (type() == CodecType::GZIP) { |
171 | return dataStartsWithLE(data, kGZIPMagicLE); |
172 | } else { |
173 | return false; |
174 | } |
175 | } |
176 | |
177 | uint64_t ZlibStreamCodec::doMaxCompressedLength( |
178 | uint64_t uncompressedLength) const { |
179 | // When passed a nullptr, deflateBound() adds 6 bytes for a zlib wrapper. A |
180 | // gzip wrapper is 18 bytes, so we add the 12 byte difference. |
181 | return deflateBound(nullptr, uncompressedLength) + |
182 | (options_.format == Options::Format::GZIP ? 12 : 0); |
183 | } |
184 | |
185 | std::unique_ptr<Codec> ZlibStreamCodec::createCodec( |
186 | Options options, |
187 | int level) { |
188 | return std::make_unique<ZlibStreamCodec>(options, level); |
189 | } |
190 | |
191 | std::unique_ptr<StreamCodec> ZlibStreamCodec::createStream( |
192 | Options options, |
193 | int level) { |
194 | return std::make_unique<ZlibStreamCodec>(options, level); |
195 | } |
196 | |
197 | static bool inBounds(int value, int low, int high) { |
198 | return (value >= low) && (value <= high); |
199 | } |
200 | |
201 | static int zlibConvertLevel(int level) { |
202 | switch (level) { |
203 | case COMPRESSION_LEVEL_FASTEST: |
204 | return 1; |
205 | case COMPRESSION_LEVEL_DEFAULT: |
206 | return 6; |
207 | case COMPRESSION_LEVEL_BEST: |
208 | return 9; |
209 | } |
210 | if (!inBounds(level, 0, 9)) { |
211 | throw std::invalid_argument( |
212 | to<std::string>("ZlibStreamCodec: invalid level: " , level)); |
213 | } |
214 | return level; |
215 | } |
216 | |
217 | ZlibStreamCodec::ZlibStreamCodec(Options options, int level) |
218 | : StreamCodec( |
219 | getCodecType(options), |
220 | zlibConvertLevel(level), |
221 | getCodecType(options) == CodecType::GZIP ? "gzip" : "zlib" ), |
222 | level_(zlibConvertLevel(level)) { |
223 | options_ = options; |
224 | |
225 | // Although zlib allows a windowSize of 8..15, a value of 8 is not |
226 | // properly supported and is treated as a value of 9. This means data deflated |
227 | // with windowSize==8 can not be re-inflated with windowSize==8. windowSize==8 |
228 | // is also not supported for gzip and raw deflation. |
229 | // Hence, the codec supports only 9..15. |
230 | if (!inBounds(options_.windowSize, 9, 15)) { |
231 | throw std::invalid_argument(to<std::string>( |
232 | "ZlibStreamCodec: invalid windowSize option: " , options.windowSize)); |
233 | } |
234 | if (!inBounds(options_.memLevel, 1, 9)) { |
235 | throw std::invalid_argument(to<std::string>( |
236 | "ZlibStreamCodec: invalid memLevel option: " , options.memLevel)); |
237 | } |
238 | if (!isValidStrategy(options_.strategy)) { |
239 | throw std::invalid_argument(to<std::string>( |
240 | "ZlibStreamCodec: invalid strategy: " , options.strategy)); |
241 | } |
242 | } |
243 | |
244 | ZlibStreamCodec::~ZlibStreamCodec() { |
245 | if (deflateStream_) { |
246 | deflateEnd(deflateStream_.get_pointer()); |
247 | deflateStream_.clear(); |
248 | } |
249 | if (inflateStream_) { |
250 | inflateEnd(inflateStream_.get_pointer()); |
251 | inflateStream_.clear(); |
252 | } |
253 | } |
254 | |
255 | void ZlibStreamCodec::doResetStream() { |
256 | needReset_ = true; |
257 | } |
258 | |
259 | void ZlibStreamCodec::resetDeflateStream() { |
260 | if (deflateStream_) { |
261 | int const rc = deflateReset(deflateStream_.get_pointer()); |
262 | if (rc != Z_OK) { |
263 | deflateStream_.clear(); |
264 | throw std::runtime_error( |
265 | to<std::string>("ZlibStreamCodec: deflateReset error: " , rc)); |
266 | } |
267 | return; |
268 | } |
269 | deflateStream_ = z_stream{}; |
270 | |
271 | // The automatic header detection format is only for inflation. |
272 | // Use zlib for deflation if the format is auto. |
273 | int const windowBits = getWindowBits( |
274 | options_.format == Options::Format::AUTO ? Options::Format::ZLIB |
275 | : options_.format, |
276 | options_.windowSize); |
277 | |
278 | int const rc = deflateInit2( |
279 | deflateStream_.get_pointer(), |
280 | level_, |
281 | Z_DEFLATED, |
282 | windowBits, |
283 | options_.memLevel, |
284 | options_.strategy); |
285 | if (rc != Z_OK) { |
286 | deflateStream_.clear(); |
287 | throw std::runtime_error( |
288 | to<std::string>("ZlibStreamCodec: deflateInit error: " , rc)); |
289 | } |
290 | } |
291 | |
292 | void ZlibStreamCodec::resetInflateStream() { |
293 | if (inflateStream_) { |
294 | int const rc = inflateReset(inflateStream_.get_pointer()); |
295 | if (rc != Z_OK) { |
296 | inflateStream_.clear(); |
297 | throw std::runtime_error( |
298 | to<std::string>("ZlibStreamCodec: inflateReset error: " , rc)); |
299 | } |
300 | return; |
301 | } |
302 | inflateStream_ = z_stream{}; |
303 | int const rc = inflateInit2( |
304 | inflateStream_.get_pointer(), |
305 | getWindowBits(options_.format, options_.windowSize)); |
306 | if (rc != Z_OK) { |
307 | inflateStream_.clear(); |
308 | throw std::runtime_error( |
309 | to<std::string>("ZlibStreamCodec: inflateInit error: " , rc)); |
310 | } |
311 | } |
312 | |
313 | static int zlibTranslateFlush(StreamCodec::FlushOp flush) { |
314 | switch (flush) { |
315 | case StreamCodec::FlushOp::NONE: |
316 | return Z_NO_FLUSH; |
317 | case StreamCodec::FlushOp::FLUSH: |
318 | return Z_SYNC_FLUSH; |
319 | case StreamCodec::FlushOp::END: |
320 | return Z_FINISH; |
321 | default: |
322 | throw std::invalid_argument("ZlibStreamCodec: Invalid flush" ); |
323 | } |
324 | } |
325 | |
326 | static int zlibThrowOnError(int rc) { |
327 | switch (rc) { |
328 | case Z_OK: |
329 | case Z_BUF_ERROR: |
330 | case Z_STREAM_END: |
331 | return rc; |
332 | default: |
333 | throw std::runtime_error(to<std::string>("ZlibStreamCodec: error: " , rc)); |
334 | } |
335 | } |
336 | |
337 | bool ZlibStreamCodec::doCompressStream( |
338 | ByteRange& input, |
339 | MutableByteRange& output, |
340 | StreamCodec::FlushOp flush) { |
341 | if (needReset_) { |
342 | resetDeflateStream(); |
343 | needReset_ = false; |
344 | } |
345 | DCHECK(deflateStream_.hasValue()); |
346 | // zlib will return Z_STREAM_ERROR if output.data() is null. |
347 | if (output.data() == nullptr) { |
348 | return false; |
349 | } |
350 | deflateStream_->next_in = const_cast<uint8_t*>(input.data()); |
351 | deflateStream_->avail_in = input.size(); |
352 | deflateStream_->next_out = output.data(); |
353 | deflateStream_->avail_out = output.size(); |
354 | SCOPE_EXIT { |
355 | input.uncheckedAdvance(input.size() - deflateStream_->avail_in); |
356 | output.uncheckedAdvance(output.size() - deflateStream_->avail_out); |
357 | }; |
358 | int const rc = zlibThrowOnError( |
359 | deflate(deflateStream_.get_pointer(), zlibTranslateFlush(flush))); |
360 | switch (flush) { |
361 | case StreamCodec::FlushOp::NONE: |
362 | return false; |
363 | case StreamCodec::FlushOp::FLUSH: |
364 | return deflateStream_->avail_in == 0 && deflateStream_->avail_out != 0; |
365 | case StreamCodec::FlushOp::END: |
366 | return rc == Z_STREAM_END; |
367 | default: |
368 | throw std::invalid_argument("ZlibStreamCodec: Invalid flush" ); |
369 | } |
370 | } |
371 | |
372 | bool ZlibStreamCodec::doUncompressStream( |
373 | ByteRange& input, |
374 | MutableByteRange& output, |
375 | StreamCodec::FlushOp flush) { |
376 | if (needReset_) { |
377 | resetInflateStream(); |
378 | needReset_ = false; |
379 | } |
380 | DCHECK(inflateStream_.hasValue()); |
381 | // zlib will return Z_STREAM_ERROR if output.data() is null. |
382 | if (output.data() == nullptr) { |
383 | return false; |
384 | } |
385 | inflateStream_->next_in = const_cast<uint8_t*>(input.data()); |
386 | inflateStream_->avail_in = input.size(); |
387 | inflateStream_->next_out = output.data(); |
388 | inflateStream_->avail_out = output.size(); |
389 | SCOPE_EXIT { |
390 | input.advance(input.size() - inflateStream_->avail_in); |
391 | output.advance(output.size() - inflateStream_->avail_out); |
392 | }; |
393 | int const rc = zlibThrowOnError( |
394 | inflate(inflateStream_.get_pointer(), zlibTranslateFlush(flush))); |
395 | return rc == Z_STREAM_END; |
396 | } |
397 | |
398 | } // namespace |
399 | |
400 | Options defaultGzipOptions() { |
401 | return Options(Options::Format::GZIP); |
402 | } |
403 | |
404 | Options defaultZlibOptions() { |
405 | return Options(Options::Format::ZLIB); |
406 | } |
407 | |
408 | std::unique_ptr<Codec> getCodec(Options options, int level) { |
409 | return ZlibStreamCodec::createCodec(options, level); |
410 | } |
411 | |
412 | std::unique_ptr<StreamCodec> getStreamCodec(Options options, int level) { |
413 | return ZlibStreamCodec::createStream(options, level); |
414 | } |
415 | |
416 | } // namespace zlib |
417 | } // namespace io |
418 | } // namespace folly |
419 | |
420 | #endif // FOLLY_HAVE_LIBZ |
421 | |