| 1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
| 2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
| 3 | #ident "$Id$" |
| 4 | /*====== |
| 5 | This file is part of PerconaFT. |
| 6 | |
| 7 | |
| 8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
| 9 | |
| 10 | PerconaFT is free software: you can redistribute it and/or modify |
| 11 | it under the terms of the GNU General Public License, version 2, |
| 12 | as published by the Free Software Foundation. |
| 13 | |
| 14 | PerconaFT is distributed in the hope that it will be useful, |
| 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 17 | GNU General Public License for more details. |
| 18 | |
| 19 | You should have received a copy of the GNU General Public License |
| 20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 21 | |
| 22 | ---------------------------------------- |
| 23 | |
| 24 | PerconaFT is free software: you can redistribute it and/or modify |
| 25 | it under the terms of the GNU Affero General Public License, version 3, |
| 26 | as published by the Free Software Foundation. |
| 27 | |
| 28 | PerconaFT is distributed in the hope that it will be useful, |
| 29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 31 | GNU Affero General Public License for more details. |
| 32 | |
| 33 | You should have received a copy of the GNU Affero General Public License |
| 34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 35 | ======= */ |
| 36 | |
| 37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
| 38 | |
| 39 | #include <my_global.h> |
| 40 | #include <toku_portability.h> |
| 41 | #include <util/scoped_malloc.h> |
| 42 | |
| 43 | #include <zlib.h> |
| 44 | #include <lzma.h> |
| 45 | #include <snappy.h> |
| 46 | |
| 47 | #include "compress.h" |
| 48 | #include "memory.h" |
| 49 | #include "quicklz.h" |
| 50 | #include "toku_assert.h" |
| 51 | |
| 52 | static inline enum toku_compression_method |
| 53 | normalize_compression_method(enum toku_compression_method method) |
| 54 | // Effect: resolve "friendly" names like "fast" and "small" into their real values. |
| 55 | { |
| 56 | switch (method) { |
| 57 | case TOKU_DEFAULT_COMPRESSION_METHOD: |
| 58 | case TOKU_FAST_COMPRESSION_METHOD: |
| 59 | return TOKU_QUICKLZ_METHOD; |
| 60 | case TOKU_SMALL_COMPRESSION_METHOD: |
| 61 | return TOKU_LZMA_METHOD; |
| 62 | default: |
| 63 | return method; // everything else is fine |
| 64 | } |
| 65 | } |
| 66 | |
| 67 | size_t toku_compress_bound (enum toku_compression_method a, size_t size) |
| 68 | // See compress.h for the specification of this function. |
| 69 | { |
| 70 | a = normalize_compression_method(a); |
| 71 | switch (a) { |
| 72 | case TOKU_NO_COMPRESSION: |
| 73 | return size + 1; |
| 74 | case TOKU_LZMA_METHOD: |
| 75 | return 1+lzma_stream_buffer_bound(size); // We need one extra for the rfc1950-style header byte (bits -03 are TOKU_LZMA_METHOD (1), bits 4-7 are the compression level) |
| 76 | case TOKU_QUICKLZ_METHOD: |
| 77 | return size+400 + 1; // quicklz manual says 400 bytes is enough. We need one more byte for the rfc1950-style header byte. bits 0-3 are 9, bits 4-7 are the QLZ_COMPRESSION_LEVEL. |
| 78 | case TOKU_ZLIB_METHOD: |
| 79 | return compressBound (size); |
| 80 | case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: |
| 81 | return 2+deflateBound(nullptr, size); // We need one extra for the rfc1950-style header byte, and one extra to store windowBits (a bit over cautious about future upgrades maybe). |
| 82 | case TOKU_SNAPPY_METHOD: |
| 83 | return (1 + snappy::MaxCompressedLength(size)); |
| 84 | default: |
| 85 | break; |
| 86 | } |
| 87 | // fall through for bad enum (thus compiler can warn us if we didn't use all the enums |
| 88 | assert(0); return 0; |
| 89 | } |
| 90 | |
| 91 | void toku_compress (enum toku_compression_method a, |
| 92 | // the following types and naming conventions come from zlib.h |
| 93 | Bytef *dest, uLongf *destLen, |
| 94 | const Bytef *source, uLong sourceLen) |
| 95 | // See compress.h for the specification of this function. |
| 96 | { |
| 97 | static const int zlib_compression_level = 5; |
| 98 | static const int zlib_without_checksum_windowbits = -15; |
| 99 | |
| 100 | a = normalize_compression_method(a); |
| 101 | switch (a) { |
| 102 | case TOKU_NO_COMPRESSION: |
| 103 | dest[0] = TOKU_NO_COMPRESSION; |
| 104 | memcpy(dest + 1, source, sourceLen); |
| 105 | *destLen = sourceLen + 1; |
| 106 | return; |
| 107 | case TOKU_ZLIB_METHOD: { |
| 108 | int r = compress2(dest, destLen, source, sourceLen, zlib_compression_level); |
| 109 | assert(r == Z_OK); |
| 110 | assert((dest[0]&0xF) == TOKU_ZLIB_METHOD); |
| 111 | return; |
| 112 | } |
| 113 | case TOKU_QUICKLZ_METHOD: { |
| 114 | if (sourceLen==0) { |
| 115 | // quicklz requires at least one byte, so we handle this ourselves |
| 116 | assert(1 <= *destLen); |
| 117 | *destLen = 1; |
| 118 | } else { |
| 119 | toku::scoped_calloc qsc_buf(sizeof(qlz_state_compress)); |
| 120 | qlz_state_compress *qsc = reinterpret_cast<qlz_state_compress *>(qsc_buf.get()); |
| 121 | size_t actual_destlen = qlz_compress(source, (char*)(dest+1), sourceLen, qsc); |
| 122 | assert(actual_destlen + 1 <= *destLen); |
| 123 | // add one for the rfc1950-style header byte. |
| 124 | *destLen = actual_destlen + 1; |
| 125 | } |
| 126 | // Fill in that first byte |
| 127 | dest[0] = TOKU_QUICKLZ_METHOD + (QLZ_COMPRESSION_LEVEL << 4); |
| 128 | return; |
| 129 | } |
| 130 | case TOKU_LZMA_METHOD: { |
| 131 | const int lzma_compression_level = 2; |
| 132 | if (sourceLen==0) { |
| 133 | // lzma version 4.999 requires at least one byte, so we'll do it ourselves. |
| 134 | assert(1<=*destLen); |
| 135 | *destLen = 1; |
| 136 | } else { |
| 137 | size_t out_pos = 1; |
| 138 | lzma_ret r = lzma_easy_buffer_encode(lzma_compression_level, |
| 139 | LZMA_CHECK_NONE, NULL, |
| 140 | source, sourceLen, |
| 141 | dest, &out_pos, *destLen); |
| 142 | assert(out_pos < *destLen); |
| 143 | if (r != LZMA_OK) { |
| 144 | fprintf(stderr, "lzma_easy_buffer_encode() returned %d\n" , (int) r); |
| 145 | } |
| 146 | assert(r==LZMA_OK); |
| 147 | *destLen = out_pos; |
| 148 | } |
| 149 | dest[0] = TOKU_LZMA_METHOD + (lzma_compression_level << 4); |
| 150 | return; |
| 151 | } |
| 152 | case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: { |
| 153 | z_stream strm; |
| 154 | strm.zalloc = Z_NULL; |
| 155 | strm.zfree = Z_NULL; |
| 156 | strm.opaque = Z_NULL; |
| 157 | strm.next_in = const_cast<Bytef *>(source); |
| 158 | strm.avail_in = sourceLen; |
| 159 | int r = deflateInit2(&strm, zlib_compression_level, Z_DEFLATED, |
| 160 | zlib_without_checksum_windowbits, 8, Z_DEFAULT_STRATEGY); |
| 161 | lazy_assert(r == Z_OK); |
| 162 | strm.next_out = dest + 2; |
| 163 | strm.avail_out = *destLen - 2; |
| 164 | r = deflate(&strm, Z_FINISH); |
| 165 | lazy_assert(r == Z_STREAM_END); |
| 166 | r = deflateEnd(&strm); |
| 167 | lazy_assert(r == Z_OK); |
| 168 | *destLen = strm.total_out + 2; |
| 169 | dest[0] = TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD + (zlib_compression_level << 4); |
| 170 | dest[1] = zlib_without_checksum_windowbits; |
| 171 | return; |
| 172 | } |
| 173 | case TOKU_SNAPPY_METHOD: { |
| 174 | size_t tmp_dest= *destLen; |
| 175 | snappy::RawCompress((char*)source, sourceLen, (char*)dest + 1, |
| 176 | &tmp_dest); |
| 177 | *destLen= tmp_dest + 1; |
| 178 | dest[0] = TOKU_SNAPPY_METHOD; |
| 179 | return; |
| 180 | } |
| 181 | default: |
| 182 | break; |
| 183 | } |
| 184 | // default fall through to error. |
| 185 | assert(0); |
| 186 | } |
| 187 | |
| 188 | void toku_decompress (Bytef *dest, uLongf destLen, |
| 189 | const Bytef *source, uLongf sourceLen) |
| 190 | // See compress.h for the specification of this function. |
| 191 | { |
| 192 | assert(sourceLen>=1); // need at least one byte for the RFC header. |
| 193 | switch (source[0] & 0xF) { |
| 194 | case TOKU_NO_COMPRESSION: |
| 195 | memcpy(dest, source + 1, sourceLen - 1); |
| 196 | return; |
| 197 | case TOKU_ZLIB_METHOD: { |
| 198 | uLongf actual_destlen = destLen; |
| 199 | int r = uncompress(dest, &actual_destlen, source, sourceLen); |
| 200 | assert(r == Z_OK); |
| 201 | assert(actual_destlen == destLen); |
| 202 | return; |
| 203 | } |
| 204 | case TOKU_QUICKLZ_METHOD: |
| 205 | if (sourceLen>1) { |
| 206 | toku::scoped_calloc state_buf(sizeof(qlz_state_decompress)); |
| 207 | qlz_state_decompress *qsd = reinterpret_cast<qlz_state_decompress *>(state_buf.get()); |
| 208 | uLongf actual_destlen = qlz_decompress((char*)source+1, dest, qsd); |
| 209 | assert(actual_destlen == destLen); |
| 210 | } else { |
| 211 | // length 1 means there is no data, so do nothing. |
| 212 | assert(destLen==0); |
| 213 | } |
| 214 | return; |
| 215 | case TOKU_LZMA_METHOD: { |
| 216 | if (sourceLen>1) { |
| 217 | uint64_t memlimit = UINT64_MAX; |
| 218 | size_t out_pos = 0; |
| 219 | size_t in_pos = 1; |
| 220 | lzma_ret r = lzma_stream_buffer_decode(&memlimit, // memlimit, use UINT64_MAX to disable this check |
| 221 | 0, // flags |
| 222 | NULL, // allocator |
| 223 | source, &in_pos, sourceLen, |
| 224 | dest, &out_pos, destLen); |
| 225 | assert(r==LZMA_OK); |
| 226 | assert(out_pos == destLen); |
| 227 | } else { |
| 228 | // length 1 means there is no data, so do nothing. |
| 229 | assert(destLen==0); |
| 230 | } |
| 231 | return; |
| 232 | } |
| 233 | case TOKU_ZLIB_WITHOUT_CHECKSUM_METHOD: { |
| 234 | z_stream strm; |
| 235 | strm.next_in = const_cast<Bytef *>(source + 2); |
| 236 | strm.avail_in = sourceLen - 2; |
| 237 | strm.zalloc = Z_NULL; |
| 238 | strm.zfree = Z_NULL; |
| 239 | strm.opaque = Z_NULL; |
| 240 | int8_t windowBits = source[1]; |
| 241 | int r = inflateInit2(&strm, windowBits); |
| 242 | lazy_assert(r == Z_OK); |
| 243 | strm.next_out = dest; |
| 244 | strm.avail_out = destLen; |
| 245 | r = inflate(&strm, Z_FINISH); |
| 246 | lazy_assert(r == Z_STREAM_END); |
| 247 | r = inflateEnd(&strm); |
| 248 | lazy_assert(r == Z_OK); |
| 249 | return; |
| 250 | } |
| 251 | case TOKU_SNAPPY_METHOD: { |
| 252 | bool r = snappy::RawUncompress((char*)source + 1, sourceLen - 1, (char*)dest); |
| 253 | assert(r); |
| 254 | return; |
| 255 | } |
| 256 | } |
| 257 | // default fall through to error. |
| 258 | assert(0); |
| 259 | } |
| 260 | |