1/*
2 * Copyright 2018 The Chromium Authors. All rights reserved.
3 * Use of this source code is governed by a BSD-style license that can be
4 * found in the Chromium source repository LICENSE file.
5 *
6 * A benchmark test harness for measuring decoding performance of gzip or zlib
7 * (deflate) encoded compressed data. Given a file containing any data, encode
8 * (compress) it into gzip or zlib format and then decode (uncompress). Output
9 * the median and maximum encoding and decoding rates in MB/s.
10 *
11 * Raw deflate (no gzip or zlib stream wrapper) mode is also supported. Select
12 * it with the [raw] argument. Use the [gzip] [zlib] arguments to select those
13 * stream wrappers.
14 *
15 * Note this code can be compiled outside of the Chromium build system against
16 * the system zlib (-lz) with g++ or clang++ as follows:
17 *
18 * g++|clang++ -O3 -Wall -std=c++11 -lstdc++ -lz zlib_bench.cc
19 */
20
21#include <algorithm>
22#include <chrono>
23#include <fstream>
24#include <memory>
25#include <string>
26#include <vector>
27
28#include <memory.h>
29#include <stdint.h>
30#include <stdio.h>
31#include <stdlib.h>
32
33#include "zlib.h"
34
35void error_exit(const char* error, int code) {
36 fprintf(stderr, "%s (%d)\n", error, code);
37 exit(code);
38}
39
40inline char* string_data(std::string* s) {
41 return s->empty() ? nullptr : &*s->begin();
42}
43
44struct Data {
45 Data(size_t s) { data.reset(new (std::nothrow) char[size = s]); }
46 std::unique_ptr<char[]> data;
47 size_t size;
48};
49
50Data read_file_data_or_exit(const char* name) {
51 std::ifstream file(name, std::ios::in | std::ios::binary);
52 if (!file) {
53 perror(name);
54 exit(1);
55 }
56
57 file.seekg(0, std::ios::end);
58 Data data(file.tellg());
59 file.seekg(0, std::ios::beg);
60
61 if (file && data.data)
62 file.read(data.data.get(), data.size);
63
64 if (!file || !data.data || !data.size) {
65 perror((std::string("failed: reading ") + name).c_str());
66 exit(1);
67 }
68
69 return data;
70}
71
72size_t zlib_estimate_compressed_size(size_t input_size) {
73 return compressBound(input_size);
74}
75
76enum zlib_wrapper {
77 kWrapperNONE,
78 kWrapperZLIB,
79 kWrapperGZIP,
80 kWrapperZRAW,
81};
82
83inline int zlib_stream_wrapper_type(zlib_wrapper type) {
84 if (type == kWrapperZLIB) // zlib DEFLATE stream wrapper
85 return MAX_WBITS;
86 if (type == kWrapperGZIP) // gzip DEFLATE stream wrapper
87 return MAX_WBITS + 16;
88 if (type == kWrapperZRAW) // no wrapper, use raw DEFLATE
89 return -MAX_WBITS;
90 error_exit("bad wrapper type", int(type));
91 return 0;
92}
93
94const char* zlib_wrapper_name(zlib_wrapper type) {
95 if (type == kWrapperZLIB)
96 return "ZLIB";
97 if (type == kWrapperGZIP)
98 return "GZIP";
99 if (type == kWrapperZRAW)
100 return "RAW";
101 error_exit("bad wrapper type", int(type));
102 return nullptr;
103}
104
105static int zlib_strategy = Z_DEFAULT_STRATEGY;
106
107const char* zlib_level_strategy_name(int compression_level) {
108 if (compression_level == 0)
109 return ""; // strategy is meaningless at level 0
110 if (zlib_strategy == Z_HUFFMAN_ONLY)
111 return "huffman ";
112 if (zlib_strategy == Z_RLE)
113 return "rle ";
114 if (zlib_strategy == Z_DEFAULT_STRATEGY)
115 return "";
116 error_exit("bad strategy", zlib_strategy);
117 return nullptr;
118}
119
120static int zlib_compression_level = Z_DEFAULT_COMPRESSION;
121
122void zlib_compress(
123 const zlib_wrapper type,
124 const char* input,
125 const size_t input_size,
126 std::string* output,
127 bool resize_output = false)
128{
129 if (resize_output)
130 output->resize(zlib_estimate_compressed_size(input_size));
131 size_t output_size = output->size();
132
133 z_stream stream;
134 memset(&stream, 0, sizeof(stream));
135
136 int result = deflateInit2(&stream, zlib_compression_level, Z_DEFLATED,
137 zlib_stream_wrapper_type(type), MAX_MEM_LEVEL, zlib_strategy);
138 if (result != Z_OK)
139 error_exit("deflateInit2 failed", result);
140
141 stream.next_out = (Bytef*)string_data(output);
142 stream.avail_out = (uInt)output_size;
143 stream.next_in = (z_const Bytef*)input;
144 stream.avail_in = (uInt)input_size;
145
146 result = deflate(&stream, Z_FINISH);
147 if (result == Z_STREAM_END)
148 output_size = stream.total_out;
149 result |= deflateEnd(&stream);
150 if (result != Z_STREAM_END)
151 error_exit("compress failed", result);
152
153 if (resize_output)
154 output->resize(output_size);
155}
156
157void zlib_uncompress(
158 const zlib_wrapper type,
159 const std::string& input,
160 const size_t output_size,
161 std::string* output)
162{
163 z_stream stream;
164 memset(&stream, 0, sizeof(stream));
165
166 int result = inflateInit2(&stream, zlib_stream_wrapper_type(type));
167 if (result != Z_OK)
168 error_exit("inflateInit2 failed", result);
169
170 stream.next_out = (Bytef*)string_data(output);
171 stream.avail_out = (uInt)output->size();
172 stream.next_in = (z_const Bytef*)input.data();
173 stream.avail_in = (uInt)input.size();
174
175 result = inflate(&stream, Z_FINISH);
176 if (stream.total_out != output_size)
177 result = Z_DATA_ERROR;
178 result |= inflateEnd(&stream);
179 if (result == Z_STREAM_END)
180 return;
181
182 std::string error("uncompress failed: ");
183 if (stream.msg)
184 error.append(stream.msg);
185 error_exit(error.c_str(), result);
186}
187
188void verify_equal(const char* input, size_t size, std::string* output) {
189 const char* data = string_data(output);
190 if (output->size() == size && !memcmp(data, input, size))
191 return;
192 fprintf(stderr, "uncompressed data does not match the input data\n");
193 exit(3);
194}
195
196void zlib_file(const char* name, const zlib_wrapper type) {
197 /*
198 * Read the file data.
199 */
200 const auto file = read_file_data_or_exit(name);
201 const int length = static_cast<int>(file.size);
202 const char* data = file.data.get();
203
204 /*
205 * Report compression strategy and file name.
206 */
207 const char* strategy = zlib_level_strategy_name(zlib_compression_level);
208 printf("%s%-40s :\n", strategy, name);
209
210 /*
211 * Chop the data into blocks.
212 */
213 const int block_size = 1 << 20;
214 const int blocks = (length + block_size - 1) / block_size;
215
216 std::vector<const char*> input(blocks);
217 std::vector<size_t> input_length(blocks);
218 std::vector<std::string> compressed(blocks);
219 std::vector<std::string> output(blocks);
220
221 for (int b = 0; b < blocks; ++b) {
222 int input_start = b * block_size;
223 int input_limit = std::min<int>((b + 1) * block_size, length);
224 input[b] = data + input_start;
225 input_length[b] = input_limit - input_start;
226 }
227
228 /*
229 * Run the zlib compress/uncompress loop a few times with |repeats| to
230 * process about 10MB of data if the length is small relative to 10MB.
231 * If length is large relative to 10MB, process the data once.
232 */
233 const int mega_byte = 1024 * 1024;
234 const int repeats = (10 * mega_byte + length) / (length + 1);
235 const int runs = 5;
236 double ctime[runs];
237 double utime[runs];
238
239 for (int run = 0; run < runs; ++run) {
240 const auto now = [] { return std::chrono::steady_clock::now(); };
241
242 // Pre-grow the output buffer so we don't measure string resize time.
243 for (int b = 0; b < blocks; ++b)
244 compressed[b].resize(zlib_estimate_compressed_size(block_size));
245
246 auto start = now();
247 for (int b = 0; b < blocks; ++b)
248 for (int r = 0; r < repeats; ++r)
249 zlib_compress(type, input[b], input_length[b], &compressed[b]);
250 ctime[run] = std::chrono::duration<double>(now() - start).count();
251
252 // Compress again, resizing compressed, so we don't leave junk at the
253 // end of the compressed string that could confuse zlib_uncompress().
254 for (int b = 0; b < blocks; ++b)
255 zlib_compress(type, input[b], input_length[b], &compressed[b], true);
256
257 for (int b = 0; b < blocks; ++b)
258 output[b].resize(input_length[b]);
259
260 start = now();
261 for (int r = 0; r < repeats; ++r)
262 for (int b = 0; b < blocks; ++b)
263 zlib_uncompress(type, compressed[b], input_length[b], &output[b]);
264 utime[run] = std::chrono::duration<double>(now() - start).count();
265
266 for (int b = 0; b < blocks; ++b)
267 verify_equal(input[b], input_length[b], &output[b]);
268 }
269
270 /*
271 * Output the median/maximum compress/uncompress rates in MB/s.
272 */
273 size_t output_length = 0;
274 for (size_t i = 0; i < compressed.size(); ++i)
275 output_length += compressed[i].size();
276
277 std::sort(ctime, ctime + runs);
278 std::sort(utime, utime + runs);
279
280 double deflate_rate_med = length * repeats / mega_byte / ctime[runs / 2];
281 double inflate_rate_med = length * repeats / mega_byte / utime[runs / 2];
282 double deflate_rate_max = length * repeats / mega_byte / ctime[0];
283 double inflate_rate_max = length * repeats / mega_byte / utime[0];
284
285 // type, block size, compression ratio, etc
286 printf("%s: [b %dM] bytes %6d -> %6u %4.1f%%",
287 zlib_wrapper_name(type), block_size / (1 << 20), length,
288 static_cast<unsigned>(output_length), output_length * 100.0 / length);
289
290 // compress / uncompress median (max) rates
291 printf(" comp %5.1f (%5.1f) MB/s uncomp %5.1f (%5.1f) MB/s\n",
292 deflate_rate_med, deflate_rate_max, inflate_rate_med, inflate_rate_max);
293}
294
295static int argn = 1;
296
297char* get_option(int argc, char* argv[], const char* option) {
298 if (argn < argc)
299 return !strcmp(argv[argn], option) ? argv[argn++] : nullptr;
300 return nullptr;
301}
302
303bool get_compression(int argc, char* argv[], int* value) {
304 if (argn < argc)
305 *value = isdigit(argv[argn][0]) ? atoi(argv[argn++]) : -1;
306 return *value >= 0 && *value <= 9;
307}
308
309void usage_exit(const char* program) {
310 printf(
311 "usage: %s gzip|zlib|raw [--compression 0:9] [--huffman|--rle] "
312 "files...\n",
313 program);
314 exit(1);
315}
316
317int main(int argc, char* argv[]) {
318 zlib_wrapper type;
319 if (get_option(argc, argv, "zlib"))
320 type = kWrapperZLIB;
321 else if (get_option(argc, argv, "gzip"))
322 type = kWrapperGZIP;
323 else if (get_option(argc, argv, "raw"))
324 type = kWrapperZRAW;
325 else
326 usage_exit(argv[0]);
327
328 while (argn < argc && argv[argn][0] == '-') {
329 if (get_option(argc, argv, "--compression")) {
330 if (!get_compression(argc, argv, &zlib_compression_level))
331 usage_exit(argv[0]);
332 } else if (get_option(argc, argv, "--huffman")) {
333 zlib_strategy = Z_HUFFMAN_ONLY;
334 } else if (get_option(argc, argv, "--rle")) {
335 zlib_strategy = Z_RLE;
336 } else {
337 usage_exit(argv[0]);
338 }
339 }
340
341 if (argn >= argc)
342 usage_exit(argv[0]);
343 while (argn < argc)
344 zlib_file(argv[argn++], type);
345
346 return 0;
347}
348