1 | /* |
2 | * Copyright 2018 The Chromium Authors. All rights reserved. |
3 | * Use of this source code is governed by a BSD-style license that can be |
4 | * found in the Chromium source repository LICENSE file. |
5 | * |
6 | * A benchmark test harness for measuring decoding performance of gzip or zlib |
7 | * (deflate) encoded compressed data. Given a file containing any data, encode |
8 | * (compress) it into gzip or zlib format and then decode (uncompress). Output |
9 | * the median and maximum encoding and decoding rates in MB/s. |
10 | * |
11 | * Raw deflate (no gzip or zlib stream wrapper) mode is also supported. Select |
12 | * it with the [raw] argument. Use the [gzip] [zlib] arguments to select those |
13 | * stream wrappers. |
14 | * |
15 | * Note this code can be compiled outside of the Chromium build system against |
16 | * the system zlib (-lz) with g++ or clang++ as follows: |
17 | * |
18 | * g++|clang++ -O3 -Wall -std=c++11 -lstdc++ -lz zlib_bench.cc |
19 | */ |
20 | |
21 | #include <algorithm> |
22 | #include <chrono> |
23 | #include <fstream> |
24 | #include <memory> |
25 | #include <string> |
26 | #include <vector> |
27 | |
28 | #include <memory.h> |
29 | #include <stdint.h> |
30 | #include <stdio.h> |
31 | #include <stdlib.h> |
32 | |
33 | #include "zlib.h" |
34 | |
35 | void error_exit(const char* error, int code) { |
36 | fprintf(stderr, "%s (%d)\n" , error, code); |
37 | exit(code); |
38 | } |
39 | |
40 | inline char* string_data(std::string* s) { |
41 | return s->empty() ? nullptr : &*s->begin(); |
42 | } |
43 | |
44 | struct Data { |
45 | Data(size_t s) { data.reset(new (std::nothrow) char[size = s]); } |
46 | std::unique_ptr<char[]> data; |
47 | size_t size; |
48 | }; |
49 | |
50 | Data read_file_data_or_exit(const char* name) { |
51 | std::ifstream file(name, std::ios::in | std::ios::binary); |
52 | if (!file) { |
53 | perror(name); |
54 | exit(1); |
55 | } |
56 | |
57 | file.seekg(0, std::ios::end); |
58 | Data data(file.tellg()); |
59 | file.seekg(0, std::ios::beg); |
60 | |
61 | if (file && data.data) |
62 | file.read(data.data.get(), data.size); |
63 | |
64 | if (!file || !data.data || !data.size) { |
65 | perror((std::string("failed: reading " ) + name).c_str()); |
66 | exit(1); |
67 | } |
68 | |
69 | return data; |
70 | } |
71 | |
72 | size_t zlib_estimate_compressed_size(size_t input_size) { |
73 | return compressBound(input_size); |
74 | } |
75 | |
76 | enum zlib_wrapper { |
77 | kWrapperNONE, |
78 | kWrapperZLIB, |
79 | kWrapperGZIP, |
80 | kWrapperZRAW, |
81 | }; |
82 | |
83 | inline int zlib_stream_wrapper_type(zlib_wrapper type) { |
84 | if (type == kWrapperZLIB) // zlib DEFLATE stream wrapper |
85 | return MAX_WBITS; |
86 | if (type == kWrapperGZIP) // gzip DEFLATE stream wrapper |
87 | return MAX_WBITS + 16; |
88 | if (type == kWrapperZRAW) // no wrapper, use raw DEFLATE |
89 | return -MAX_WBITS; |
90 | error_exit("bad wrapper type" , int(type)); |
91 | return 0; |
92 | } |
93 | |
94 | const char* zlib_wrapper_name(zlib_wrapper type) { |
95 | if (type == kWrapperZLIB) |
96 | return "ZLIB" ; |
97 | if (type == kWrapperGZIP) |
98 | return "GZIP" ; |
99 | if (type == kWrapperZRAW) |
100 | return "RAW" ; |
101 | error_exit("bad wrapper type" , int(type)); |
102 | return nullptr; |
103 | } |
104 | |
105 | static int zlib_strategy = Z_DEFAULT_STRATEGY; |
106 | |
107 | const char* zlib_level_strategy_name(int compression_level) { |
108 | if (compression_level == 0) |
109 | return "" ; // strategy is meaningless at level 0 |
110 | if (zlib_strategy == Z_HUFFMAN_ONLY) |
111 | return "huffman " ; |
112 | if (zlib_strategy == Z_RLE) |
113 | return "rle " ; |
114 | if (zlib_strategy == Z_DEFAULT_STRATEGY) |
115 | return "" ; |
116 | error_exit("bad strategy" , zlib_strategy); |
117 | return nullptr; |
118 | } |
119 | |
120 | static int zlib_compression_level = Z_DEFAULT_COMPRESSION; |
121 | |
122 | void zlib_compress( |
123 | const zlib_wrapper type, |
124 | const char* input, |
125 | const size_t input_size, |
126 | std::string* output, |
127 | bool resize_output = false) |
128 | { |
129 | if (resize_output) |
130 | output->resize(zlib_estimate_compressed_size(input_size)); |
131 | size_t output_size = output->size(); |
132 | |
133 | z_stream stream; |
134 | memset(&stream, 0, sizeof(stream)); |
135 | |
136 | int result = deflateInit2(&stream, zlib_compression_level, Z_DEFLATED, |
137 | zlib_stream_wrapper_type(type), MAX_MEM_LEVEL, zlib_strategy); |
138 | if (result != Z_OK) |
139 | error_exit("deflateInit2 failed" , result); |
140 | |
141 | stream.next_out = (Bytef*)string_data(output); |
142 | stream.avail_out = (uInt)output_size; |
143 | stream.next_in = (z_const Bytef*)input; |
144 | stream.avail_in = (uInt)input_size; |
145 | |
146 | result = deflate(&stream, Z_FINISH); |
147 | if (result == Z_STREAM_END) |
148 | output_size = stream.total_out; |
149 | result |= deflateEnd(&stream); |
150 | if (result != Z_STREAM_END) |
151 | error_exit("compress failed" , result); |
152 | |
153 | if (resize_output) |
154 | output->resize(output_size); |
155 | } |
156 | |
157 | void zlib_uncompress( |
158 | const zlib_wrapper type, |
159 | const std::string& input, |
160 | const size_t output_size, |
161 | std::string* output) |
162 | { |
163 | z_stream stream; |
164 | memset(&stream, 0, sizeof(stream)); |
165 | |
166 | int result = inflateInit2(&stream, zlib_stream_wrapper_type(type)); |
167 | if (result != Z_OK) |
168 | error_exit("inflateInit2 failed" , result); |
169 | |
170 | stream.next_out = (Bytef*)string_data(output); |
171 | stream.avail_out = (uInt)output->size(); |
172 | stream.next_in = (z_const Bytef*)input.data(); |
173 | stream.avail_in = (uInt)input.size(); |
174 | |
175 | result = inflate(&stream, Z_FINISH); |
176 | if (stream.total_out != output_size) |
177 | result = Z_DATA_ERROR; |
178 | result |= inflateEnd(&stream); |
179 | if (result == Z_STREAM_END) |
180 | return; |
181 | |
182 | std::string error("uncompress failed: " ); |
183 | if (stream.msg) |
184 | error.append(stream.msg); |
185 | error_exit(error.c_str(), result); |
186 | } |
187 | |
188 | void verify_equal(const char* input, size_t size, std::string* output) { |
189 | const char* data = string_data(output); |
190 | if (output->size() == size && !memcmp(data, input, size)) |
191 | return; |
192 | fprintf(stderr, "uncompressed data does not match the input data\n" ); |
193 | exit(3); |
194 | } |
195 | |
196 | void zlib_file(const char* name, const zlib_wrapper type) { |
197 | /* |
198 | * Read the file data. |
199 | */ |
200 | const auto file = read_file_data_or_exit(name); |
201 | const int length = static_cast<int>(file.size); |
202 | const char* data = file.data.get(); |
203 | |
204 | /* |
205 | * Report compression strategy and file name. |
206 | */ |
207 | const char* strategy = zlib_level_strategy_name(zlib_compression_level); |
208 | printf("%s%-40s :\n" , strategy, name); |
209 | |
210 | /* |
211 | * Chop the data into blocks. |
212 | */ |
213 | const int block_size = 1 << 20; |
214 | const int blocks = (length + block_size - 1) / block_size; |
215 | |
216 | std::vector<const char*> input(blocks); |
217 | std::vector<size_t> input_length(blocks); |
218 | std::vector<std::string> compressed(blocks); |
219 | std::vector<std::string> output(blocks); |
220 | |
221 | for (int b = 0; b < blocks; ++b) { |
222 | int input_start = b * block_size; |
223 | int input_limit = std::min<int>((b + 1) * block_size, length); |
224 | input[b] = data + input_start; |
225 | input_length[b] = input_limit - input_start; |
226 | } |
227 | |
228 | /* |
229 | * Run the zlib compress/uncompress loop a few times with |repeats| to |
230 | * process about 10MB of data if the length is small relative to 10MB. |
231 | * If length is large relative to 10MB, process the data once. |
232 | */ |
233 | const int mega_byte = 1024 * 1024; |
234 | const int repeats = (10 * mega_byte + length) / (length + 1); |
235 | const int runs = 5; |
236 | double ctime[runs]; |
237 | double utime[runs]; |
238 | |
239 | for (int run = 0; run < runs; ++run) { |
240 | const auto now = [] { return std::chrono::steady_clock::now(); }; |
241 | |
242 | // Pre-grow the output buffer so we don't measure string resize time. |
243 | for (int b = 0; b < blocks; ++b) |
244 | compressed[b].resize(zlib_estimate_compressed_size(block_size)); |
245 | |
246 | auto start = now(); |
247 | for (int b = 0; b < blocks; ++b) |
248 | for (int r = 0; r < repeats; ++r) |
249 | zlib_compress(type, input[b], input_length[b], &compressed[b]); |
250 | ctime[run] = std::chrono::duration<double>(now() - start).count(); |
251 | |
252 | // Compress again, resizing compressed, so we don't leave junk at the |
253 | // end of the compressed string that could confuse zlib_uncompress(). |
254 | for (int b = 0; b < blocks; ++b) |
255 | zlib_compress(type, input[b], input_length[b], &compressed[b], true); |
256 | |
257 | for (int b = 0; b < blocks; ++b) |
258 | output[b].resize(input_length[b]); |
259 | |
260 | start = now(); |
261 | for (int r = 0; r < repeats; ++r) |
262 | for (int b = 0; b < blocks; ++b) |
263 | zlib_uncompress(type, compressed[b], input_length[b], &output[b]); |
264 | utime[run] = std::chrono::duration<double>(now() - start).count(); |
265 | |
266 | for (int b = 0; b < blocks; ++b) |
267 | verify_equal(input[b], input_length[b], &output[b]); |
268 | } |
269 | |
270 | /* |
271 | * Output the median/maximum compress/uncompress rates in MB/s. |
272 | */ |
273 | size_t output_length = 0; |
274 | for (size_t i = 0; i < compressed.size(); ++i) |
275 | output_length += compressed[i].size(); |
276 | |
277 | std::sort(ctime, ctime + runs); |
278 | std::sort(utime, utime + runs); |
279 | |
280 | double deflate_rate_med = length * repeats / mega_byte / ctime[runs / 2]; |
281 | double inflate_rate_med = length * repeats / mega_byte / utime[runs / 2]; |
282 | double deflate_rate_max = length * repeats / mega_byte / ctime[0]; |
283 | double inflate_rate_max = length * repeats / mega_byte / utime[0]; |
284 | |
285 | // type, block size, compression ratio, etc |
286 | printf("%s: [b %dM] bytes %6d -> %6u %4.1f%%" , |
287 | zlib_wrapper_name(type), block_size / (1 << 20), length, |
288 | static_cast<unsigned>(output_length), output_length * 100.0 / length); |
289 | |
290 | // compress / uncompress median (max) rates |
291 | printf(" comp %5.1f (%5.1f) MB/s uncomp %5.1f (%5.1f) MB/s\n" , |
292 | deflate_rate_med, deflate_rate_max, inflate_rate_med, inflate_rate_max); |
293 | } |
294 | |
295 | static int argn = 1; |
296 | |
297 | char* get_option(int argc, char* argv[], const char* option) { |
298 | if (argn < argc) |
299 | return !strcmp(argv[argn], option) ? argv[argn++] : nullptr; |
300 | return nullptr; |
301 | } |
302 | |
303 | bool get_compression(int argc, char* argv[], int* value) { |
304 | if (argn < argc) |
305 | *value = isdigit(argv[argn][0]) ? atoi(argv[argn++]) : -1; |
306 | return *value >= 0 && *value <= 9; |
307 | } |
308 | |
309 | void usage_exit(const char* program) { |
310 | printf( |
311 | "usage: %s gzip|zlib|raw [--compression 0:9] [--huffman|--rle] " |
312 | "files...\n" , |
313 | program); |
314 | exit(1); |
315 | } |
316 | |
317 | int main(int argc, char* argv[]) { |
318 | zlib_wrapper type; |
319 | if (get_option(argc, argv, "zlib" )) |
320 | type = kWrapperZLIB; |
321 | else if (get_option(argc, argv, "gzip" )) |
322 | type = kWrapperGZIP; |
323 | else if (get_option(argc, argv, "raw" )) |
324 | type = kWrapperZRAW; |
325 | else |
326 | usage_exit(argv[0]); |
327 | |
328 | while (argn < argc && argv[argn][0] == '-') { |
329 | if (get_option(argc, argv, "--compression" )) { |
330 | if (!get_compression(argc, argv, &zlib_compression_level)) |
331 | usage_exit(argv[0]); |
332 | } else if (get_option(argc, argv, "--huffman" )) { |
333 | zlib_strategy = Z_HUFFMAN_ONLY; |
334 | } else if (get_option(argc, argv, "--rle" )) { |
335 | zlib_strategy = Z_RLE; |
336 | } else { |
337 | usage_exit(argv[0]); |
338 | } |
339 | } |
340 | |
341 | if (argn >= argc) |
342 | usage_exit(argv[0]); |
343 | while (argn < argc) |
344 | zlib_file(argv[argn++], type); |
345 | |
346 | return 0; |
347 | } |
348 | |