1/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
3#ident "$Id$"
4/*======
5This file is part of PerconaFT.
6
7
8Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
9
10 PerconaFT is free software: you can redistribute it and/or modify
11 it under the terms of the GNU General Public License, version 2,
12 as published by the Free Software Foundation.
13
14 PerconaFT is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
21
22----------------------------------------
23
24 PerconaFT is free software: you can redistribute it and/or modify
25 it under the terms of the GNU Affero General Public License, version 3,
26 as published by the Free Software Foundation.
27
28 PerconaFT is distributed in the hope that it will be useful,
29 but WITHOUT ANY WARRANTY; without even the implied warranty of
30 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 GNU Affero General Public License for more details.
32
33 You should have received a copy of the GNU Affero General Public License
34 along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
35======= */
36
37#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
38
39#include <my_global.h>
40#include <toku_portability.h>
41#include <util/scoped_malloc.h>
42
43#include <zlib.h>
44#include <lzma.h>
45#include <snappy.h>
46
47#include "compress.h"
48#include "memory.h"
49#include "quicklz.h"
50#include "toku_assert.h"
51
52static inline enum toku_compression_method
53normalize_compression_method(enum toku_compression_method method)
54// Effect: resolve "friendly" names like "fast" and "small" into their real values.
55{
56 switch (method) {
57 case TOKU_DEFAULT_COMPRESSION_METHOD:
58 case TOKU_FAST_COMPRESSION_METHOD:
59 return TOKU_QUICKLZ_METHOD;
60 case TOKU_SMALL_COMPRESSION_METHOD:
61 return TOKU_LZMA_METHOD;
62 default:
63 return method; // everything else is fine
64 }
65}
66
67size_t toku_compress_bound (enum toku_compression_method a, size_t size)
68// See compress.h for the specification of this function.
69{
70 a = normalize_compression_method(a);
71 switch (a) {
72 case TOKU_NO_COMPRESSION:
73 return size + 1;
74 case TOKU_LZMA_METHOD:
75 return 1+lzma_stream_buffer_bound(size); // We need one extra for the rfc1950-style header byte (bits -03 are TOKU_LZMA_METHOD (1), bits 4-7 are the compression level)
76 case TOKU_QUICKLZ_METHOD:
77 return size+400 + 1; // quicklz manual says 400 bytes is enough. We need one more byte for the rfc1950-style header byte. bits 0-3 are 9, bits 4-7 are the QLZ_COMPRESSION_LEVEL.
78 case TOKU_ZLIB_METHOD:
79 return compressBound (size);
80 case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD:
81 return 2+deflateBound(nullptr, size); // We need one extra for the rfc1950-style header byte, and one extra to store windowBits (a bit over cautious about future upgrades maybe).
82 case TOKU_SNAPPY_METHOD:
83 return (1 + snappy::MaxCompressedLength(size));
84 default:
85 break;
86 }
87 // fall through for bad enum (thus compiler can warn us if we didn't use all the enums
88 assert(0); return 0;
89}
90
91void toku_compress (enum toku_compression_method a,
92 // the following types and naming conventions come from zlib.h
93 Bytef *dest, uLongf *destLen,
94 const Bytef *source, uLong sourceLen)
95// See compress.h for the specification of this function.
96{
97 static const int zlib_compression_level = 5;
98 static const int zlib_without_checksum_windowbits = -15;
99
100 a = normalize_compression_method(a);
101 switch (a) {
102 case TOKU_NO_COMPRESSION:
103 dest[0] = TOKU_NO_COMPRESSION;
104 memcpy(dest + 1, source, sourceLen);
105 *destLen = sourceLen + 1;
106 return;
107 case TOKU_ZLIB_METHOD: {
108 int r = compress2(dest, destLen, source, sourceLen, zlib_compression_level);
109 assert(r == Z_OK);
110 assert((dest[0]&0xF) == TOKU_ZLIB_METHOD);
111 return;
112 }
113 case TOKU_QUICKLZ_METHOD: {
114 if (sourceLen==0) {
115 // quicklz requires at least one byte, so we handle this ourselves
116 assert(1 <= *destLen);
117 *destLen = 1;
118 } else {
119 toku::scoped_calloc qsc_buf(sizeof(qlz_state_compress));
120 qlz_state_compress *qsc = reinterpret_cast<qlz_state_compress *>(qsc_buf.get());
121 size_t actual_destlen = qlz_compress(source, (char*)(dest+1), sourceLen, qsc);
122 assert(actual_destlen + 1 <= *destLen);
123 // add one for the rfc1950-style header byte.
124 *destLen = actual_destlen + 1;
125 }
126 // Fill in that first byte
127 dest[0] = TOKU_QUICKLZ_METHOD + (QLZ_COMPRESSION_LEVEL << 4);
128 return;
129 }
130 case TOKU_LZMA_METHOD: {
131 const int lzma_compression_level = 2;
132 if (sourceLen==0) {
133 // lzma version 4.999 requires at least one byte, so we'll do it ourselves.
134 assert(1<=*destLen);
135 *destLen = 1;
136 } else {
137 size_t out_pos = 1;
138 lzma_ret r = lzma_easy_buffer_encode(lzma_compression_level,
139 LZMA_CHECK_NONE, NULL,
140 source, sourceLen,
141 dest, &out_pos, *destLen);
142 assert(out_pos < *destLen);
143 if (r != LZMA_OK) {
144 fprintf(stderr, "lzma_easy_buffer_encode() returned %d\n", (int) r);
145 }
146 assert(r==LZMA_OK);
147 *destLen = out_pos;
148 }
149 dest[0] = TOKU_LZMA_METHOD + (lzma_compression_level << 4);
150 return;
151 }
152 case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: {
153 z_stream strm;
154 strm.zalloc = Z_NULL;
155 strm.zfree = Z_NULL;
156 strm.opaque = Z_NULL;
157 strm.next_in = const_cast<Bytef *>(source);
158 strm.avail_in = sourceLen;
159 int r = deflateInit2(&strm, zlib_compression_level, Z_DEFLATED,
160 zlib_without_checksum_windowbits, 8, Z_DEFAULT_STRATEGY);
161 lazy_assert(r == Z_OK);
162 strm.next_out = dest + 2;
163 strm.avail_out = *destLen - 2;
164 r = deflate(&strm, Z_FINISH);
165 lazy_assert(r == Z_STREAM_END);
166 r = deflateEnd(&strm);
167 lazy_assert(r == Z_OK);
168 *destLen = strm.total_out + 2;
169 dest[0] = TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD + (zlib_compression_level << 4);
170 dest[1] = zlib_without_checksum_windowbits;
171 return;
172 }
173 case TOKU_SNAPPY_METHOD: {
174 size_t tmp_dest= *destLen;
175 snappy::RawCompress((char*)source, sourceLen, (char*)dest + 1,
176 &tmp_dest);
177 *destLen= tmp_dest + 1;
178 dest[0] = TOKU_SNAPPY_METHOD;
179 return;
180 }
181 default:
182 break;
183 }
184 // default fall through to error.
185 assert(0);
186}
187
188void toku_decompress (Bytef *dest, uLongf destLen,
189 const Bytef *source, uLongf sourceLen)
190// See compress.h for the specification of this function.
191{
192 assert(sourceLen>=1); // need at least one byte for the RFC header.
193 switch (source[0] & 0xF) {
194 case TOKU_NO_COMPRESSION:
195 memcpy(dest, source + 1, sourceLen - 1);
196 return;
197 case TOKU_ZLIB_METHOD: {
198 uLongf actual_destlen = destLen;
199 int r = uncompress(dest, &actual_destlen, source, sourceLen);
200 assert(r == Z_OK);
201 assert(actual_destlen == destLen);
202 return;
203 }
204 case TOKU_QUICKLZ_METHOD:
205 if (sourceLen>1) {
206 toku::scoped_calloc state_buf(sizeof(qlz_state_decompress));
207 qlz_state_decompress *qsd = reinterpret_cast<qlz_state_decompress *>(state_buf.get());
208 uLongf actual_destlen = qlz_decompress((char*)source+1, dest, qsd);
209 assert(actual_destlen == destLen);
210 } else {
211 // length 1 means there is no data, so do nothing.
212 assert(destLen==0);
213 }
214 return;
215 case TOKU_LZMA_METHOD: {
216 if (sourceLen>1) {
217 uint64_t memlimit = UINT64_MAX;
218 size_t out_pos = 0;
219 size_t in_pos = 1;
220 lzma_ret r = lzma_stream_buffer_decode(&memlimit, // memlimit, use UINT64_MAX to disable this check
221 0, // flags
222 NULL, // allocator
223 source, &in_pos, sourceLen,
224 dest, &out_pos, destLen);
225 assert(r==LZMA_OK);
226 assert(out_pos == destLen);
227 } else {
228 // length 1 means there is no data, so do nothing.
229 assert(destLen==0);
230 }
231 return;
232 }
233 case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: {
234 z_stream strm;
235 strm.next_in = const_cast<Bytef *>(source + 2);
236 strm.avail_in = sourceLen - 2;
237 strm.zalloc = Z_NULL;
238 strm.zfree = Z_NULL;
239 strm.opaque = Z_NULL;
240 int8_t windowBits = source[1];
241 int r = inflateInit2(&strm, windowBits);
242 lazy_assert(r == Z_OK);
243 strm.next_out = dest;
244 strm.avail_out = destLen;
245 r = inflate(&strm, Z_FINISH);
246 lazy_assert(r == Z_STREAM_END);
247 r = inflateEnd(&strm);
248 lazy_assert(r == Z_OK);
249 return;
250 }
251 case TOKU_SNAPPY_METHOD: {
252 bool r = snappy::RawUncompress((char*)source + 1, sourceLen - 1, (char*)dest);
253 assert(r);
254 return;
255 }
256 }
257 // default fall through to error.
258 assert(0);
259}
260