| 1 | /* |
| 2 | * Copyright 2018 The Chromium Authors. All rights reserved. |
| 3 | * Use of this source code is governed by a BSD-style license that can be |
| 4 | * found in the Chromium source repository LICENSE file. |
| 5 | * |
| 6 | * A benchmark test harness for measuring decoding performance of gzip or zlib |
| 7 | * (deflate) encoded compressed data. Given a file containing any data, encode |
| 8 | * (compress) it into gzip or zlib format and then decode (uncompress). Output |
| 9 | * the median and maximum encoding and decoding rates in MB/s. |
| 10 | * |
| 11 | * Raw deflate (no gzip or zlib stream wrapper) mode is also supported. Select |
| 12 | * it with the [raw] argument. Use the [gzip] [zlib] arguments to select those |
| 13 | * stream wrappers. |
| 14 | * |
| 15 | * Note this code can be compiled outside of the Chromium build system against |
| 16 | * the system zlib (-lz) with g++ or clang++ as follows: |
| 17 | * |
| 18 | * g++|clang++ -O3 -Wall -std=c++11 -lstdc++ -lz zlib_bench.cc |
| 19 | */ |
| 20 | |
| 21 | #include <algorithm> |
| 22 | #include <chrono> |
| 23 | #include <fstream> |
| 24 | #include <memory> |
| 25 | #include <string> |
| 26 | #include <vector> |
| 27 | |
| 28 | #include <memory.h> |
| 29 | #include <stdint.h> |
| 30 | #include <stdio.h> |
| 31 | #include <stdlib.h> |
| 32 | |
| 33 | #include "zlib.h" |
| 34 | |
| 35 | void error_exit(const char* error, int code) { |
| 36 | fprintf(stderr, "%s (%d)\n" , error, code); |
| 37 | exit(code); |
| 38 | } |
| 39 | |
| 40 | inline char* string_data(std::string* s) { |
| 41 | return s->empty() ? nullptr : &*s->begin(); |
| 42 | } |
| 43 | |
| 44 | struct Data { |
| 45 | Data(size_t s) { data.reset(new (std::nothrow) char[size = s]); } |
| 46 | std::unique_ptr<char[]> data; |
| 47 | size_t size; |
| 48 | }; |
| 49 | |
| 50 | Data read_file_data_or_exit(const char* name) { |
| 51 | std::ifstream file(name, std::ios::in | std::ios::binary); |
| 52 | if (!file) { |
| 53 | perror(name); |
| 54 | exit(1); |
| 55 | } |
| 56 | |
| 57 | file.seekg(0, std::ios::end); |
| 58 | Data data(file.tellg()); |
| 59 | file.seekg(0, std::ios::beg); |
| 60 | |
| 61 | if (file && data.data) |
| 62 | file.read(data.data.get(), data.size); |
| 63 | |
| 64 | if (!file || !data.data || !data.size) { |
| 65 | perror((std::string("failed: reading " ) + name).c_str()); |
| 66 | exit(1); |
| 67 | } |
| 68 | |
| 69 | return data; |
| 70 | } |
| 71 | |
| 72 | size_t zlib_estimate_compressed_size(size_t input_size) { |
| 73 | return compressBound(input_size); |
| 74 | } |
| 75 | |
| 76 | enum zlib_wrapper { |
| 77 | kWrapperNONE, |
| 78 | kWrapperZLIB, |
| 79 | kWrapperGZIP, |
| 80 | kWrapperZRAW, |
| 81 | }; |
| 82 | |
| 83 | inline int zlib_stream_wrapper_type(zlib_wrapper type) { |
| 84 | if (type == kWrapperZLIB) // zlib DEFLATE stream wrapper |
| 85 | return MAX_WBITS; |
| 86 | if (type == kWrapperGZIP) // gzip DEFLATE stream wrapper |
| 87 | return MAX_WBITS + 16; |
| 88 | if (type == kWrapperZRAW) // no wrapper, use raw DEFLATE |
| 89 | return -MAX_WBITS; |
| 90 | error_exit("bad wrapper type" , int(type)); |
| 91 | return 0; |
| 92 | } |
| 93 | |
| 94 | const char* zlib_wrapper_name(zlib_wrapper type) { |
| 95 | if (type == kWrapperZLIB) |
| 96 | return "ZLIB" ; |
| 97 | if (type == kWrapperGZIP) |
| 98 | return "GZIP" ; |
| 99 | if (type == kWrapperZRAW) |
| 100 | return "RAW" ; |
| 101 | error_exit("bad wrapper type" , int(type)); |
| 102 | return nullptr; |
| 103 | } |
| 104 | |
| 105 | static int zlib_strategy = Z_DEFAULT_STRATEGY; |
| 106 | |
| 107 | const char* zlib_level_strategy_name(int compression_level) { |
| 108 | if (compression_level == 0) |
| 109 | return "" ; // strategy is meaningless at level 0 |
| 110 | if (zlib_strategy == Z_HUFFMAN_ONLY) |
| 111 | return "huffman " ; |
| 112 | if (zlib_strategy == Z_RLE) |
| 113 | return "rle " ; |
| 114 | if (zlib_strategy == Z_DEFAULT_STRATEGY) |
| 115 | return "" ; |
| 116 | error_exit("bad strategy" , zlib_strategy); |
| 117 | return nullptr; |
| 118 | } |
| 119 | |
| 120 | static int zlib_compression_level = Z_DEFAULT_COMPRESSION; |
| 121 | |
| 122 | void zlib_compress( |
| 123 | const zlib_wrapper type, |
| 124 | const char* input, |
| 125 | const size_t input_size, |
| 126 | std::string* output, |
| 127 | bool resize_output = false) |
| 128 | { |
| 129 | if (resize_output) |
| 130 | output->resize(zlib_estimate_compressed_size(input_size)); |
| 131 | size_t output_size = output->size(); |
| 132 | |
| 133 | z_stream stream; |
| 134 | memset(&stream, 0, sizeof(stream)); |
| 135 | |
| 136 | int result = deflateInit2(&stream, zlib_compression_level, Z_DEFLATED, |
| 137 | zlib_stream_wrapper_type(type), MAX_MEM_LEVEL, zlib_strategy); |
| 138 | if (result != Z_OK) |
| 139 | error_exit("deflateInit2 failed" , result); |
| 140 | |
| 141 | stream.next_out = (Bytef*)string_data(output); |
| 142 | stream.avail_out = (uInt)output_size; |
| 143 | stream.next_in = (z_const Bytef*)input; |
| 144 | stream.avail_in = (uInt)input_size; |
| 145 | |
| 146 | result = deflate(&stream, Z_FINISH); |
| 147 | if (result == Z_STREAM_END) |
| 148 | output_size = stream.total_out; |
| 149 | result |= deflateEnd(&stream); |
| 150 | if (result != Z_STREAM_END) |
| 151 | error_exit("compress failed" , result); |
| 152 | |
| 153 | if (resize_output) |
| 154 | output->resize(output_size); |
| 155 | } |
| 156 | |
| 157 | void zlib_uncompress( |
| 158 | const zlib_wrapper type, |
| 159 | const std::string& input, |
| 160 | const size_t output_size, |
| 161 | std::string* output) |
| 162 | { |
| 163 | z_stream stream; |
| 164 | memset(&stream, 0, sizeof(stream)); |
| 165 | |
| 166 | int result = inflateInit2(&stream, zlib_stream_wrapper_type(type)); |
| 167 | if (result != Z_OK) |
| 168 | error_exit("inflateInit2 failed" , result); |
| 169 | |
| 170 | stream.next_out = (Bytef*)string_data(output); |
| 171 | stream.avail_out = (uInt)output->size(); |
| 172 | stream.next_in = (z_const Bytef*)input.data(); |
| 173 | stream.avail_in = (uInt)input.size(); |
| 174 | |
| 175 | result = inflate(&stream, Z_FINISH); |
| 176 | if (stream.total_out != output_size) |
| 177 | result = Z_DATA_ERROR; |
| 178 | result |= inflateEnd(&stream); |
| 179 | if (result == Z_STREAM_END) |
| 180 | return; |
| 181 | |
| 182 | std::string error("uncompress failed: " ); |
| 183 | if (stream.msg) |
| 184 | error.append(stream.msg); |
| 185 | error_exit(error.c_str(), result); |
| 186 | } |
| 187 | |
| 188 | void verify_equal(const char* input, size_t size, std::string* output) { |
| 189 | const char* data = string_data(output); |
| 190 | if (output->size() == size && !memcmp(data, input, size)) |
| 191 | return; |
| 192 | fprintf(stderr, "uncompressed data does not match the input data\n" ); |
| 193 | exit(3); |
| 194 | } |
| 195 | |
| 196 | void zlib_file(const char* name, const zlib_wrapper type) { |
| 197 | /* |
| 198 | * Read the file data. |
| 199 | */ |
| 200 | const auto file = read_file_data_or_exit(name); |
| 201 | const int length = static_cast<int>(file.size); |
| 202 | const char* data = file.data.get(); |
| 203 | |
| 204 | /* |
| 205 | * Report compression strategy and file name. |
| 206 | */ |
| 207 | const char* strategy = zlib_level_strategy_name(zlib_compression_level); |
| 208 | printf("%s%-40s :\n" , strategy, name); |
| 209 | |
| 210 | /* |
| 211 | * Chop the data into blocks. |
| 212 | */ |
| 213 | const int block_size = 1 << 20; |
| 214 | const int blocks = (length + block_size - 1) / block_size; |
| 215 | |
| 216 | std::vector<const char*> input(blocks); |
| 217 | std::vector<size_t> input_length(blocks); |
| 218 | std::vector<std::string> compressed(blocks); |
| 219 | std::vector<std::string> output(blocks); |
| 220 | |
| 221 | for (int b = 0; b < blocks; ++b) { |
| 222 | int input_start = b * block_size; |
| 223 | int input_limit = std::min<int>((b + 1) * block_size, length); |
| 224 | input[b] = data + input_start; |
| 225 | input_length[b] = input_limit - input_start; |
| 226 | } |
| 227 | |
| 228 | /* |
| 229 | * Run the zlib compress/uncompress loop a few times with |repeats| to |
| 230 | * process about 10MB of data if the length is small relative to 10MB. |
| 231 | * If length is large relative to 10MB, process the data once. |
| 232 | */ |
| 233 | const int mega_byte = 1024 * 1024; |
| 234 | const int repeats = (10 * mega_byte + length) / (length + 1); |
| 235 | const int runs = 5; |
| 236 | double ctime[runs]; |
| 237 | double utime[runs]; |
| 238 | |
| 239 | for (int run = 0; run < runs; ++run) { |
| 240 | const auto now = [] { return std::chrono::steady_clock::now(); }; |
| 241 | |
| 242 | // Pre-grow the output buffer so we don't measure string resize time. |
| 243 | for (int b = 0; b < blocks; ++b) |
| 244 | compressed[b].resize(zlib_estimate_compressed_size(block_size)); |
| 245 | |
| 246 | auto start = now(); |
| 247 | for (int b = 0; b < blocks; ++b) |
| 248 | for (int r = 0; r < repeats; ++r) |
| 249 | zlib_compress(type, input[b], input_length[b], &compressed[b]); |
| 250 | ctime[run] = std::chrono::duration<double>(now() - start).count(); |
| 251 | |
| 252 | // Compress again, resizing compressed, so we don't leave junk at the |
| 253 | // end of the compressed string that could confuse zlib_uncompress(). |
| 254 | for (int b = 0; b < blocks; ++b) |
| 255 | zlib_compress(type, input[b], input_length[b], &compressed[b], true); |
| 256 | |
| 257 | for (int b = 0; b < blocks; ++b) |
| 258 | output[b].resize(input_length[b]); |
| 259 | |
| 260 | start = now(); |
| 261 | for (int r = 0; r < repeats; ++r) |
| 262 | for (int b = 0; b < blocks; ++b) |
| 263 | zlib_uncompress(type, compressed[b], input_length[b], &output[b]); |
| 264 | utime[run] = std::chrono::duration<double>(now() - start).count(); |
| 265 | |
| 266 | for (int b = 0; b < blocks; ++b) |
| 267 | verify_equal(input[b], input_length[b], &output[b]); |
| 268 | } |
| 269 | |
| 270 | /* |
| 271 | * Output the median/maximum compress/uncompress rates in MB/s. |
| 272 | */ |
| 273 | size_t output_length = 0; |
| 274 | for (size_t i = 0; i < compressed.size(); ++i) |
| 275 | output_length += compressed[i].size(); |
| 276 | |
| 277 | std::sort(ctime, ctime + runs); |
| 278 | std::sort(utime, utime + runs); |
| 279 | |
| 280 | double deflate_rate_med = length * repeats / mega_byte / ctime[runs / 2]; |
| 281 | double inflate_rate_med = length * repeats / mega_byte / utime[runs / 2]; |
| 282 | double deflate_rate_max = length * repeats / mega_byte / ctime[0]; |
| 283 | double inflate_rate_max = length * repeats / mega_byte / utime[0]; |
| 284 | |
| 285 | // type, block size, compression ratio, etc |
| 286 | printf("%s: [b %dM] bytes %6d -> %6u %4.1f%%" , |
| 287 | zlib_wrapper_name(type), block_size / (1 << 20), length, |
| 288 | static_cast<unsigned>(output_length), output_length * 100.0 / length); |
| 289 | |
| 290 | // compress / uncompress median (max) rates |
| 291 | printf(" comp %5.1f (%5.1f) MB/s uncomp %5.1f (%5.1f) MB/s\n" , |
| 292 | deflate_rate_med, deflate_rate_max, inflate_rate_med, inflate_rate_max); |
| 293 | } |
| 294 | |
| 295 | static int argn = 1; |
| 296 | |
| 297 | char* get_option(int argc, char* argv[], const char* option) { |
| 298 | if (argn < argc) |
| 299 | return !strcmp(argv[argn], option) ? argv[argn++] : nullptr; |
| 300 | return nullptr; |
| 301 | } |
| 302 | |
| 303 | bool get_compression(int argc, char* argv[], int* value) { |
| 304 | if (argn < argc) |
| 305 | *value = isdigit(argv[argn][0]) ? atoi(argv[argn++]) : -1; |
| 306 | return *value >= 0 && *value <= 9; |
| 307 | } |
| 308 | |
| 309 | void usage_exit(const char* program) { |
| 310 | printf( |
| 311 | "usage: %s gzip|zlib|raw [--compression 0:9] [--huffman|--rle] " |
| 312 | "files...\n" , |
| 313 | program); |
| 314 | exit(1); |
| 315 | } |
| 316 | |
| 317 | int main(int argc, char* argv[]) { |
| 318 | zlib_wrapper type; |
| 319 | if (get_option(argc, argv, "zlib" )) |
| 320 | type = kWrapperZLIB; |
| 321 | else if (get_option(argc, argv, "gzip" )) |
| 322 | type = kWrapperGZIP; |
| 323 | else if (get_option(argc, argv, "raw" )) |
| 324 | type = kWrapperZRAW; |
| 325 | else |
| 326 | usage_exit(argv[0]); |
| 327 | |
| 328 | while (argn < argc && argv[argn][0] == '-') { |
| 329 | if (get_option(argc, argv, "--compression" )) { |
| 330 | if (!get_compression(argc, argv, &zlib_compression_level)) |
| 331 | usage_exit(argv[0]); |
| 332 | } else if (get_option(argc, argv, "--huffman" )) { |
| 333 | zlib_strategy = Z_HUFFMAN_ONLY; |
| 334 | } else if (get_option(argc, argv, "--rle" )) { |
| 335 | zlib_strategy = Z_RLE; |
| 336 | } else { |
| 337 | usage_exit(argv[0]); |
| 338 | } |
| 339 | } |
| 340 | |
| 341 | if (argn >= argc) |
| 342 | usage_exit(argv[0]); |
| 343 | while (argn < argc) |
| 344 | zlib_file(argv[argn++], type); |
| 345 | |
| 346 | return 0; |
| 347 | } |
| 348 | |