1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
3 | #ident "$Id$" |
4 | /*====== |
5 | This file is part of PerconaFT. |
6 | |
7 | |
8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
9 | |
10 | PerconaFT is free software: you can redistribute it and/or modify |
11 | it under the terms of the GNU General Public License, version 2, |
12 | as published by the Free Software Foundation. |
13 | |
14 | PerconaFT is distributed in the hope that it will be useful, |
15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 | GNU General Public License for more details. |
18 | |
19 | You should have received a copy of the GNU General Public License |
20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
21 | |
22 | ---------------------------------------- |
23 | |
24 | PerconaFT is free software: you can redistribute it and/or modify |
25 | it under the terms of the GNU Affero General Public License, version 3, |
26 | as published by the Free Software Foundation. |
27 | |
28 | PerconaFT is distributed in the hope that it will be useful, |
29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
31 | GNU Affero General Public License for more details. |
32 | |
33 | You should have received a copy of the GNU Affero General Public License |
34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
35 | ======= */ |
36 | |
37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
38 | |
39 | #include <my_global.h> |
40 | #include <toku_portability.h> |
41 | #include <util/scoped_malloc.h> |
42 | |
43 | #include <zlib.h> |
44 | #include <lzma.h> |
45 | #include <snappy.h> |
46 | |
47 | #include "compress.h" |
48 | #include "memory.h" |
49 | #include "quicklz.h" |
50 | #include "toku_assert.h" |
51 | |
52 | static inline enum toku_compression_method |
53 | normalize_compression_method(enum toku_compression_method method) |
54 | // Effect: resolve "friendly" names like "fast" and "small" into their real values. |
55 | { |
56 | switch (method) { |
57 | case TOKU_DEFAULT_COMPRESSION_METHOD: |
58 | case TOKU_FAST_COMPRESSION_METHOD: |
59 | return TOKU_QUICKLZ_METHOD; |
60 | case TOKU_SMALL_COMPRESSION_METHOD: |
61 | return TOKU_LZMA_METHOD; |
62 | default: |
63 | return method; // everything else is fine |
64 | } |
65 | } |
66 | |
67 | size_t toku_compress_bound (enum toku_compression_method a, size_t size) |
68 | // See compress.h for the specification of this function. |
69 | { |
70 | a = normalize_compression_method(a); |
71 | switch (a) { |
72 | case TOKU_NO_COMPRESSION: |
73 | return size + 1; |
74 | case TOKU_LZMA_METHOD: |
75 | return 1+lzma_stream_buffer_bound(size); // We need one extra for the rfc1950-style header byte (bits -03 are TOKU_LZMA_METHOD (1), bits 4-7 are the compression level) |
76 | case TOKU_QUICKLZ_METHOD: |
77 | return size+400 + 1; // quicklz manual says 400 bytes is enough. We need one more byte for the rfc1950-style header byte. bits 0-3 are 9, bits 4-7 are the QLZ_COMPRESSION_LEVEL. |
78 | case TOKU_ZLIB_METHOD: |
79 | return compressBound (size); |
80 | case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: |
81 | return 2+deflateBound(nullptr, size); // We need one extra for the rfc1950-style header byte, and one extra to store windowBits (a bit over cautious about future upgrades maybe). |
82 | case TOKU_SNAPPY_METHOD: |
83 | return (1 + snappy::MaxCompressedLength(size)); |
84 | default: |
85 | break; |
86 | } |
87 | // fall through for bad enum (thus compiler can warn us if we didn't use all the enums |
88 | assert(0); return 0; |
89 | } |
90 | |
91 | void toku_compress (enum toku_compression_method a, |
92 | // the following types and naming conventions come from zlib.h |
93 | Bytef *dest, uLongf *destLen, |
94 | const Bytef *source, uLong sourceLen) |
95 | // See compress.h for the specification of this function. |
96 | { |
97 | static const int zlib_compression_level = 5; |
98 | static const int zlib_without_checksum_windowbits = -15; |
99 | |
100 | a = normalize_compression_method(a); |
101 | switch (a) { |
102 | case TOKU_NO_COMPRESSION: |
103 | dest[0] = TOKU_NO_COMPRESSION; |
104 | memcpy(dest + 1, source, sourceLen); |
105 | *destLen = sourceLen + 1; |
106 | return; |
107 | case TOKU_ZLIB_METHOD: { |
108 | int r = compress2(dest, destLen, source, sourceLen, zlib_compression_level); |
109 | assert(r == Z_OK); |
110 | assert((dest[0]&0xF) == TOKU_ZLIB_METHOD); |
111 | return; |
112 | } |
113 | case TOKU_QUICKLZ_METHOD: { |
114 | if (sourceLen==0) { |
115 | // quicklz requires at least one byte, so we handle this ourselves |
116 | assert(1 <= *destLen); |
117 | *destLen = 1; |
118 | } else { |
119 | toku::scoped_calloc qsc_buf(sizeof(qlz_state_compress)); |
120 | qlz_state_compress *qsc = reinterpret_cast<qlz_state_compress *>(qsc_buf.get()); |
121 | size_t actual_destlen = qlz_compress(source, (char*)(dest+1), sourceLen, qsc); |
122 | assert(actual_destlen + 1 <= *destLen); |
123 | // add one for the rfc1950-style header byte. |
124 | *destLen = actual_destlen + 1; |
125 | } |
126 | // Fill in that first byte |
127 | dest[0] = TOKU_QUICKLZ_METHOD + (QLZ_COMPRESSION_LEVEL << 4); |
128 | return; |
129 | } |
130 | case TOKU_LZMA_METHOD: { |
131 | const int lzma_compression_level = 2; |
132 | if (sourceLen==0) { |
133 | // lzma version 4.999 requires at least one byte, so we'll do it ourselves. |
134 | assert(1<=*destLen); |
135 | *destLen = 1; |
136 | } else { |
137 | size_t out_pos = 1; |
138 | lzma_ret r = lzma_easy_buffer_encode(lzma_compression_level, |
139 | LZMA_CHECK_NONE, NULL, |
140 | source, sourceLen, |
141 | dest, &out_pos, *destLen); |
142 | assert(out_pos < *destLen); |
143 | if (r != LZMA_OK) { |
144 | fprintf(stderr, "lzma_easy_buffer_encode() returned %d\n" , (int) r); |
145 | } |
146 | assert(r==LZMA_OK); |
147 | *destLen = out_pos; |
148 | } |
149 | dest[0] = TOKU_LZMA_METHOD + (lzma_compression_level << 4); |
150 | return; |
151 | } |
152 | case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: { |
153 | z_stream strm; |
154 | strm.zalloc = Z_NULL; |
155 | strm.zfree = Z_NULL; |
156 | strm.opaque = Z_NULL; |
157 | strm.next_in = const_cast<Bytef *>(source); |
158 | strm.avail_in = sourceLen; |
159 | int r = deflateInit2(&strm, zlib_compression_level, Z_DEFLATED, |
160 | zlib_without_checksum_windowbits, 8, Z_DEFAULT_STRATEGY); |
161 | lazy_assert(r == Z_OK); |
162 | strm.next_out = dest + 2; |
163 | strm.avail_out = *destLen - 2; |
164 | r = deflate(&strm, Z_FINISH); |
165 | lazy_assert(r == Z_STREAM_END); |
166 | r = deflateEnd(&strm); |
167 | lazy_assert(r == Z_OK); |
168 | *destLen = strm.total_out + 2; |
169 | dest[0] = TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD + (zlib_compression_level << 4); |
170 | dest[1] = zlib_without_checksum_windowbits; |
171 | return; |
172 | } |
173 | case TOKU_SNAPPY_METHOD: { |
174 | size_t tmp_dest= *destLen; |
175 | snappy::RawCompress((char*)source, sourceLen, (char*)dest + 1, |
176 | &tmp_dest); |
177 | *destLen= tmp_dest + 1; |
178 | dest[0] = TOKU_SNAPPY_METHOD; |
179 | return; |
180 | } |
181 | default: |
182 | break; |
183 | } |
184 | // default fall through to error. |
185 | assert(0); |
186 | } |
187 | |
188 | void toku_decompress (Bytef *dest, uLongf destLen, |
189 | const Bytef *source, uLongf sourceLen) |
190 | // See compress.h for the specification of this function. |
191 | { |
192 | assert(sourceLen>=1); // need at least one byte for the RFC header. |
193 | switch (source[0] & 0xF) { |
194 | case TOKU_NO_COMPRESSION: |
195 | memcpy(dest, source + 1, sourceLen - 1); |
196 | return; |
197 | case TOKU_ZLIB_METHOD: { |
198 | uLongf actual_destlen = destLen; |
199 | int r = uncompress(dest, &actual_destlen, source, sourceLen); |
200 | assert(r == Z_OK); |
201 | assert(actual_destlen == destLen); |
202 | return; |
203 | } |
204 | case TOKU_QUICKLZ_METHOD: |
205 | if (sourceLen>1) { |
206 | toku::scoped_calloc state_buf(sizeof(qlz_state_decompress)); |
207 | qlz_state_decompress *qsd = reinterpret_cast<qlz_state_decompress *>(state_buf.get()); |
208 | uLongf actual_destlen = qlz_decompress((char*)source+1, dest, qsd); |
209 | assert(actual_destlen == destLen); |
210 | } else { |
211 | // length 1 means there is no data, so do nothing. |
212 | assert(destLen==0); |
213 | } |
214 | return; |
215 | case TOKU_LZMA_METHOD: { |
216 | if (sourceLen>1) { |
217 | uint64_t memlimit = UINT64_MAX; |
218 | size_t out_pos = 0; |
219 | size_t in_pos = 1; |
220 | lzma_ret r = lzma_stream_buffer_decode(&memlimit, // memlimit, use UINT64_MAX to disable this check |
221 | 0, // flags |
222 | NULL, // allocator |
223 | source, &in_pos, sourceLen, |
224 | dest, &out_pos, destLen); |
225 | assert(r==LZMA_OK); |
226 | assert(out_pos == destLen); |
227 | } else { |
228 | // length 1 means there is no data, so do nothing. |
229 | assert(destLen==0); |
230 | } |
231 | return; |
232 | } |
233 | case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: { |
234 | z_stream strm; |
235 | strm.next_in = const_cast<Bytef *>(source + 2); |
236 | strm.avail_in = sourceLen - 2; |
237 | strm.zalloc = Z_NULL; |
238 | strm.zfree = Z_NULL; |
239 | strm.opaque = Z_NULL; |
240 | int8_t windowBits = source[1]; |
241 | int r = inflateInit2(&strm, windowBits); |
242 | lazy_assert(r == Z_OK); |
243 | strm.next_out = dest; |
244 | strm.avail_out = destLen; |
245 | r = inflate(&strm, Z_FINISH); |
246 | lazy_assert(r == Z_STREAM_END); |
247 | r = inflateEnd(&strm); |
248 | lazy_assert(r == Z_OK); |
249 | return; |
250 | } |
251 | case TOKU_SNAPPY_METHOD: { |
252 | bool r = snappy::RawUncompress((char*)source + 1, sourceLen - 1, (char*)dest); |
253 | assert(r); |
254 | return; |
255 | } |
256 | } |
257 | // default fall through to error. |
258 | assert(0); |
259 | } |
260 | |