1 | /* |
2 | * uzlib - tiny deflate/inflate library (deflate, gzip, zlib) |
3 | * |
4 | * Copyright (c) 2003 by Joergen Ibsen / Jibz |
5 | * All Rights Reserved |
6 | * http://www.ibsensoftware.com/ |
7 | * |
8 | * Copyright (c) 2014-2018 by Paul Sokolovsky |
9 | * |
10 | * This software is provided 'as-is', without any express |
11 | * or implied warranty. In no event will the authors be |
12 | * held liable for any damages arising from the use of |
13 | * this software. |
14 | * |
15 | * Permission is granted to anyone to use this software |
16 | * for any purpose, including commercial applications, |
17 | * and to alter it and redistribute it freely, subject to |
18 | * the following restrictions: |
19 | * |
20 | * 1. The origin of this software must not be |
21 | * misrepresented; you must not claim that you |
22 | * wrote the original software. If you use this |
23 | * software in a product, an acknowledgment in |
24 | * the product documentation would be appreciated |
25 | * but is not required. |
26 | * |
27 | * 2. Altered source versions must be plainly marked |
28 | * as such, and must not be misrepresented as |
29 | * being the original software. |
30 | * |
31 | * 3. This notice may not be removed or altered from |
32 | * any source distribution. |
33 | */ |
34 | |
35 | #include <assert.h> |
36 | #include "tinf.h" |
37 | |
38 | #define UZLIB_DUMP_ARRAY(heading, arr, size) \ |
39 | { \ |
40 | printf("%s", heading); \ |
41 | for (int i = 0; i < size; ++i) { \ |
42 | printf(" %d", (arr)[i]); \ |
43 | } \ |
44 | printf("\n"); \ |
45 | } |
46 | |
47 | uint32_t tinf_get_le_uint32(TINF_DATA *d); |
48 | uint32_t tinf_get_be_uint32(TINF_DATA *d); |
49 | |
50 | /* --------------------------------------------------- * |
51 | * -- uninitialized global data (static structures) -- * |
52 | * --------------------------------------------------- */ |
53 | |
54 | #ifdef RUNTIME_BITS_TABLES |
55 | |
56 | /* extra bits and base tables for length codes */ |
57 | unsigned char length_bits[30]; |
58 | unsigned short length_base[30]; |
59 | |
60 | /* extra bits and base tables for distance codes */ |
61 | unsigned char dist_bits[30]; |
62 | unsigned short dist_base[30]; |
63 | |
64 | #else |
65 | |
66 | const unsigned char length_bits[30] = { |
67 | 0, 0, 0, 0, 0, 0, 0, 0, |
68 | 1, 1, 1, 1, 2, 2, 2, 2, |
69 | 3, 3, 3, 3, 4, 4, 4, 4, |
70 | 5, 5, 5, 5 |
71 | }; |
72 | const unsigned short length_base[30] = { |
73 | 3, 4, 5, 6, 7, 8, 9, 10, |
74 | 11, 13, 15, 17, 19, 23, 27, 31, |
75 | 35, 43, 51, 59, 67, 83, 99, 115, |
76 | 131, 163, 195, 227, 258 |
77 | }; |
78 | |
79 | const unsigned char dist_bits[30] = { |
80 | 0, 0, 0, 0, 1, 1, 2, 2, |
81 | 3, 3, 4, 4, 5, 5, 6, 6, |
82 | 7, 7, 8, 8, 9, 9, 10, 10, |
83 | 11, 11, 12, 12, 13, 13 |
84 | }; |
85 | const unsigned short dist_base[30] = { |
86 | 1, 2, 3, 4, 5, 7, 9, 13, |
87 | 17, 25, 33, 49, 65, 97, 129, 193, |
88 | 257, 385, 513, 769, 1025, 1537, 2049, 3073, |
89 | 4097, 6145, 8193, 12289, 16385, 24577 |
90 | }; |
91 | |
92 | #endif |
93 | |
94 | /* special ordering of code length codes */ |
95 | const unsigned char clcidx[] = { |
96 | 16, 17, 18, 0, 8, 7, 9, 6, |
97 | 10, 5, 11, 4, 12, 3, 13, 2, |
98 | 14, 1, 15 |
99 | }; |
100 | |
101 | /* ----------------------- * |
102 | * -- utility functions -- * |
103 | * ----------------------- */ |
104 | |
105 | #ifdef RUNTIME_BITS_TABLES |
106 | /* build extra bits and base tables */ |
107 | static void tinf_build_bits_base(unsigned char *bits, unsigned short *base, int delta, int first) |
108 | { |
109 | int i, sum; |
110 | |
111 | /* build bits table */ |
112 | for (i = 0; i < delta; ++i) bits[i] = 0; |
113 | for (i = 0; i < 30 - delta; ++i) bits[i + delta] = i / delta; |
114 | |
115 | /* build base table */ |
116 | for (sum = first, i = 0; i < 30; ++i) |
117 | { |
118 | base[i] = sum; |
119 | sum += 1 << bits[i]; |
120 | } |
121 | } |
122 | #endif |
123 | |
124 | /* build the fixed huffman trees */ |
125 | static void tinf_build_fixed_trees(TINF_TREE *lt, TINF_TREE *dt) |
126 | { |
127 | int i; |
128 | |
129 | /* build fixed length tree */ |
130 | for (i = 0; i < 7; ++i) lt->table[i] = 0; |
131 | |
132 | lt->table[7] = 24; |
133 | lt->table[8] = 152; |
134 | lt->table[9] = 112; |
135 | |
136 | for (i = 0; i < 24; ++i) lt->trans[i] = 256 + i; |
137 | for (i = 0; i < 144; ++i) lt->trans[24 + i] = i; |
138 | for (i = 0; i < 8; ++i) lt->trans[24 + 144 + i] = 280 + i; |
139 | for (i = 0; i < 112; ++i) lt->trans[24 + 144 + 8 + i] = 144 + i; |
140 | |
141 | /* build fixed distance tree */ |
142 | for (i = 0; i < 5; ++i) dt->table[i] = 0; |
143 | |
144 | dt->table[5] = 32; |
145 | |
146 | for (i = 0; i < 32; ++i) dt->trans[i] = i; |
147 | } |
148 | |
149 | /* given an array of code lengths, build a tree */ |
150 | static void tinf_build_tree(TINF_TREE *t, const unsigned char *lengths, unsigned int num) |
151 | { |
152 | unsigned short offs[16]; |
153 | unsigned int i, sum; |
154 | |
155 | /* clear code length count table */ |
156 | for (i = 0; i < 16; ++i) t->table[i] = 0; |
157 | |
158 | /* scan symbol lengths, and sum code length counts */ |
159 | for (i = 0; i < num; ++i) t->table[lengths[i]]++; |
160 | |
161 | #if UZLIB_CONF_DEBUG_LOG >= 2 |
162 | UZLIB_DUMP_ARRAY("codelen counts:" , t->table, TINF_ARRAY_SIZE(t->table)); |
163 | #endif |
164 | |
165 | /* In the lengths array, 0 means unused code. So, t->table[0] now contains |
166 | number of unused codes. But table's purpose is to contain # of codes of |
167 | particular length, and there're 0 codes of length 0. */ |
168 | t->table[0] = 0; |
169 | |
170 | /* compute offset table for distribution sort */ |
171 | for (sum = 0, i = 0; i < 16; ++i) |
172 | { |
173 | offs[i] = sum; |
174 | sum += t->table[i]; |
175 | } |
176 | |
177 | #if UZLIB_CONF_DEBUG_LOG >= 2 |
178 | UZLIB_DUMP_ARRAY("codelen offsets:" , offs, TINF_ARRAY_SIZE(offs)); |
179 | #endif |
180 | |
181 | /* create code->symbol translation table (symbols sorted by code) */ |
182 | for (i = 0; i < num; ++i) |
183 | { |
184 | if (lengths[i]) t->trans[offs[lengths[i]]++] = i; |
185 | } |
186 | } |
187 | |
188 | /* ---------------------- * |
189 | * -- decode functions -- * |
190 | * ---------------------- */ |
191 | |
192 | unsigned char uzlib_get_byte(TINF_DATA *d) |
193 | { |
194 | /* If end of source buffer is not reached, return next byte from source |
195 | buffer. */ |
196 | if (d->source < d->source_limit) { |
197 | return *d->source++; |
198 | } |
199 | |
200 | /* Otherwise if there's callback and we haven't seen EOF yet, try to |
201 | read next byte using it. (Note: the callback can also update ->source |
202 | and ->source_limit). */ |
203 | if (d->readSource && !d->eof) { |
204 | int val = d->readSource(d); |
205 | if (val >= 0) { |
206 | return (unsigned char)val; |
207 | } |
208 | } |
209 | |
210 | /* Otherwise, we hit EOF (either from ->readSource() or from exhaustion |
211 | of the buffer), and it will be "sticky", i.e. further calls to this |
212 | function will end up here too. */ |
213 | d->eof = true; |
214 | |
215 | return 0; |
216 | } |
217 | |
218 | uint32_t tinf_get_le_uint32(TINF_DATA *d) |
219 | { |
220 | uint32_t val = 0; |
221 | int i; |
222 | for (i = 4; i--;) { |
223 | val = val >> 8 | ((uint32_t)uzlib_get_byte(d)) << 24; |
224 | } |
225 | return val; |
226 | } |
227 | |
228 | uint32_t tinf_get_be_uint32(TINF_DATA *d) |
229 | { |
230 | uint32_t val = 0; |
231 | int i; |
232 | for (i = 4; i--;) { |
233 | val = val << 8 | uzlib_get_byte(d); |
234 | } |
235 | return val; |
236 | } |
237 | |
238 | /* get one bit from source stream */ |
239 | static int tinf_getbit(TINF_DATA *d) |
240 | { |
241 | unsigned int bit; |
242 | |
243 | /* check if tag is empty */ |
244 | if (!d->bitcount--) |
245 | { |
246 | /* load next tag */ |
247 | d->tag = uzlib_get_byte(d); |
248 | d->bitcount = 7; |
249 | } |
250 | |
251 | /* shift bit out of tag */ |
252 | bit = d->tag & 0x01; |
253 | d->tag >>= 1; |
254 | |
255 | return bit; |
256 | } |
257 | |
258 | /* read a num bit value from a stream and add base */ |
259 | static unsigned int tinf_read_bits(TINF_DATA *d, int num, int base) |
260 | { |
261 | unsigned int val = 0; |
262 | |
263 | /* read num bits */ |
264 | if (num) |
265 | { |
266 | unsigned int limit = 1 << (num); |
267 | unsigned int mask; |
268 | |
269 | for (mask = 1; mask < limit; mask *= 2) |
270 | if (tinf_getbit(d)) val += mask; |
271 | } |
272 | |
273 | return val + base; |
274 | } |
275 | |
276 | /* given a data stream and a tree, decode a symbol */ |
277 | static int tinf_decode_symbol(TINF_DATA *d, TINF_TREE *t) |
278 | { |
279 | int sum = 0, cur = 0, len = 0; |
280 | |
281 | /* get more bits while code value is above sum */ |
282 | do { |
283 | |
284 | cur = 2*cur + tinf_getbit(d); |
285 | |
286 | if (++len == TINF_ARRAY_SIZE(t->table)) { |
287 | return TINF_DATA_ERROR; |
288 | } |
289 | |
290 | sum += t->table[len]; |
291 | cur -= t->table[len]; |
292 | |
293 | } while (cur >= 0); |
294 | |
295 | sum += cur; |
296 | #if UZLIB_CONF_PARANOID_CHECKS |
297 | if (sum < 0 || sum >= TINF_ARRAY_SIZE(t->trans)) { |
298 | return TINF_DATA_ERROR; |
299 | } |
300 | #endif |
301 | |
302 | return t->trans[sum]; |
303 | } |
304 | |
305 | /* given a data stream, decode dynamic trees from it */ |
306 | static int tinf_decode_trees(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt) |
307 | { |
308 | /* code lengths for 288 literal/len symbols and 32 dist symbols */ |
309 | unsigned char lengths[288+32]; |
310 | unsigned int hlit, hdist, hclen, hlimit; |
311 | unsigned int i, num, length; |
312 | |
313 | /* get 5 bits HLIT (257-286) */ |
314 | hlit = tinf_read_bits(d, 5, 257); |
315 | |
316 | /* get 5 bits HDIST (1-32) */ |
317 | hdist = tinf_read_bits(d, 5, 1); |
318 | |
319 | /* get 4 bits HCLEN (4-19) */ |
320 | hclen = tinf_read_bits(d, 4, 4); |
321 | |
322 | for (i = 0; i < 19; ++i) lengths[i] = 0; |
323 | |
324 | /* read code lengths for code length alphabet */ |
325 | for (i = 0; i < hclen; ++i) |
326 | { |
327 | /* get 3 bits code length (0-7) */ |
328 | unsigned int clen = tinf_read_bits(d, 3, 0); |
329 | |
330 | lengths[clcidx[i]] = clen; |
331 | } |
332 | |
333 | /* build code length tree, temporarily use length tree */ |
334 | tinf_build_tree(lt, lengths, 19); |
335 | |
336 | /* decode code lengths for the dynamic trees */ |
337 | hlimit = hlit + hdist; |
338 | for (num = 0; num < hlimit; ) |
339 | { |
340 | int sym = tinf_decode_symbol(d, lt); |
341 | unsigned char fill_value = 0; |
342 | int lbits, lbase = 3; |
343 | |
344 | /* error decoding */ |
345 | if (sym < 0) return sym; |
346 | |
347 | switch (sym) |
348 | { |
349 | case 16: |
350 | /* copy previous code length 3-6 times (read 2 bits) */ |
351 | if (num == 0) return TINF_DATA_ERROR; |
352 | fill_value = lengths[num - 1]; |
353 | lbits = 2; |
354 | break; |
355 | case 17: |
356 | /* repeat code length 0 for 3-10 times (read 3 bits) */ |
357 | lbits = 3; |
358 | break; |
359 | case 18: |
360 | /* repeat code length 0 for 11-138 times (read 7 bits) */ |
361 | lbits = 7; |
362 | lbase = 11; |
363 | break; |
364 | default: |
365 | /* values 0-15 represent the actual code lengths */ |
366 | lengths[num++] = sym; |
367 | /* continue the for loop */ |
368 | continue; |
369 | } |
370 | |
371 | /* special code length 16-18 are handled here */ |
372 | length = tinf_read_bits(d, lbits, lbase); |
373 | if (num + length > hlimit) return TINF_DATA_ERROR; |
374 | for (; length; --length) |
375 | { |
376 | lengths[num++] = fill_value; |
377 | } |
378 | } |
379 | |
380 | #if UZLIB_CONF_DEBUG_LOG >= 2 |
381 | printf("lit code lengths (%d):" , hlit); |
382 | UZLIB_DUMP_ARRAY("" , lengths, hlit); |
383 | printf("dist code lengths (%d):" , hdist); |
384 | UZLIB_DUMP_ARRAY("" , lengths + hlit, hdist); |
385 | #endif |
386 | |
387 | #if UZLIB_CONF_PARANOID_CHECKS |
388 | /* Check that there's "end of block" symbol */ |
389 | if (lengths[256] == 0) { |
390 | return TINF_DATA_ERROR; |
391 | } |
392 | #endif |
393 | |
394 | /* build dynamic trees */ |
395 | tinf_build_tree(lt, lengths, hlit); |
396 | tinf_build_tree(dt, lengths + hlit, hdist); |
397 | |
398 | return TINF_OK; |
399 | } |
400 | |
401 | /* ----------------------------- * |
402 | * -- block inflate functions -- * |
403 | * ----------------------------- */ |
404 | |
405 | /* given a stream and two trees, inflate next byte of output */ |
406 | static int tinf_inflate_block_data(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt) |
407 | { |
408 | if (d->curlen == 0) { |
409 | unsigned int offs; |
410 | int dist; |
411 | int sym = tinf_decode_symbol(d, lt); |
412 | //printf("huff sym: %02x\n", sym); |
413 | |
414 | if (d->eof) { |
415 | return TINF_DATA_ERROR; |
416 | } |
417 | |
418 | /* literal byte */ |
419 | if (sym < 256) { |
420 | TINF_PUT(d, sym); |
421 | return TINF_OK; |
422 | } |
423 | |
424 | /* end of block */ |
425 | if (sym == 256) { |
426 | return TINF_DONE; |
427 | } |
428 | |
429 | /* substring from sliding dictionary */ |
430 | sym -= 257; |
431 | if (sym >= 29) { |
432 | return TINF_DATA_ERROR; |
433 | } |
434 | |
435 | /* possibly get more bits from length code */ |
436 | d->curlen = tinf_read_bits(d, length_bits[sym], length_base[sym]); |
437 | |
438 | dist = tinf_decode_symbol(d, dt); |
439 | if (dist >= 30) { |
440 | return TINF_DATA_ERROR; |
441 | } |
442 | |
443 | /* possibly get more bits from distance code */ |
444 | offs = tinf_read_bits(d, dist_bits[dist], dist_base[dist]); |
445 | |
446 | /* calculate and validate actual LZ offset to use */ |
447 | if (d->dict_ring) { |
448 | if (offs > d->dict_size) { |
449 | return TINF_DICT_ERROR; |
450 | } |
451 | /* Note: unlike full-dest-in-memory case below, we don't |
452 | try to catch offset which points to not yet filled |
453 | part of the dictionary here. Doing so would require |
454 | keeping another variable to track "filled in" size |
455 | of the dictionary. Appearance of such an offset cannot |
456 | lead to accessing memory outside of the dictionary |
457 | buffer, and clients which don't want to leak unrelated |
458 | information, should explicitly initialize dictionary |
459 | buffer passed to uzlib. */ |
460 | |
461 | d->lzOff = d->dict_idx - offs; |
462 | if (d->lzOff < 0) { |
463 | d->lzOff += d->dict_size; |
464 | } |
465 | } else { |
466 | /* catch trying to point before the start of dest buffer */ |
467 | if (offs > (unsigned int)(d->dest - d->destStart)) { |
468 | return TINF_DATA_ERROR; |
469 | } |
470 | d->lzOff = -offs; |
471 | } |
472 | } |
473 | |
474 | /* copy next byte from dict substring */ |
475 | if (d->dict_ring) { |
476 | TINF_PUT(d, d->dict_ring[d->lzOff]); |
477 | if ((unsigned)++d->lzOff == d->dict_size) { |
478 | d->lzOff = 0; |
479 | } |
480 | } else { |
481 | d->dest[0] = d->dest[d->lzOff]; |
482 | d->dest++; |
483 | } |
484 | d->curlen--; |
485 | return TINF_OK; |
486 | } |
487 | |
488 | /* inflate next byte from uncompressed block of data */ |
489 | static int tinf_inflate_uncompressed_block(TINF_DATA *d) |
490 | { |
491 | if (d->curlen == 0) { |
492 | unsigned int length, invlength; |
493 | |
494 | /* get length */ |
495 | length = uzlib_get_byte(d); |
496 | length += 256 * uzlib_get_byte(d); |
497 | /* get one's complement of length */ |
498 | invlength = uzlib_get_byte(d); |
499 | invlength += 256 * uzlib_get_byte(d); |
500 | /* check length */ |
501 | if (length != (~invlength & 0x0000ffff)) return TINF_DATA_ERROR; |
502 | |
503 | /* increment length to properly return TINF_DONE below, without |
504 | producing data at the same time */ |
505 | d->curlen = length + 1; |
506 | |
507 | /* make sure we start next block on a byte boundary */ |
508 | d->bitcount = 0; |
509 | } |
510 | |
511 | if (--d->curlen == 0) { |
512 | return TINF_DONE; |
513 | } |
514 | |
515 | unsigned char c = uzlib_get_byte(d); |
516 | TINF_PUT(d, c); |
517 | return TINF_OK; |
518 | } |
519 | |
520 | /* ---------------------- * |
521 | * -- public functions -- * |
522 | * ---------------------- */ |
523 | |
524 | /* initialize global (static) data */ |
525 | void uzlib_init(void) |
526 | { |
527 | #ifdef RUNTIME_BITS_TABLES |
528 | /* build extra bits and base tables */ |
529 | tinf_build_bits_base(length_bits, length_base, 4, 3); |
530 | tinf_build_bits_base(dist_bits, dist_base, 2, 1); |
531 | |
532 | /* fix a special case */ |
533 | length_bits[28] = 0; |
534 | length_base[28] = 258; |
535 | #endif |
536 | } |
537 | |
538 | /* initialize decompression structure */ |
539 | void uzlib_uncompress_init(TINF_DATA *d, void *dict, unsigned int dictLen) |
540 | { |
541 | d->eof = 0; |
542 | d->bitcount = 0; |
543 | d->bfinal = 0; |
544 | d->btype = -1; |
545 | d->dict_size = dictLen; |
546 | d->dict_ring = dict; |
547 | d->dict_idx = 0; |
548 | d->curlen = 0; |
549 | } |
550 | |
551 | /* inflate next output bytes from compressed stream */ |
552 | int uzlib_uncompress(TINF_DATA *d) |
553 | { |
554 | do { |
555 | int res; |
556 | |
557 | /* start a new block */ |
558 | if (d->btype == -1) { |
559 | next_blk: |
560 | /* read final block flag */ |
561 | d->bfinal = tinf_getbit(d); |
562 | /* read block type (2 bits) */ |
563 | d->btype = tinf_read_bits(d, 2, 0); |
564 | |
565 | #if UZLIB_CONF_DEBUG_LOG >= 1 |
566 | printf("Started new block: type=%d final=%d\n" , d->btype, d->bfinal); |
567 | #endif |
568 | |
569 | if (d->btype == 1) { |
570 | /* build fixed huffman trees */ |
571 | tinf_build_fixed_trees(&d->ltree, &d->dtree); |
572 | } else if (d->btype == 2) { |
573 | /* decode trees from stream */ |
574 | res = tinf_decode_trees(d, &d->ltree, &d->dtree); |
575 | if (res != TINF_OK) { |
576 | return res; |
577 | } |
578 | } |
579 | } |
580 | |
581 | /* process current block */ |
582 | switch (d->btype) |
583 | { |
584 | case 0: |
585 | /* decompress uncompressed block */ |
586 | res = tinf_inflate_uncompressed_block(d); |
587 | break; |
588 | case 1: |
589 | case 2: |
590 | /* decompress block with fixed/dynamic huffman trees */ |
591 | /* trees were decoded previously, so it's the same routine for both */ |
592 | res = tinf_inflate_block_data(d, &d->ltree, &d->dtree); |
593 | break; |
594 | default: |
595 | return TINF_DATA_ERROR; |
596 | } |
597 | |
598 | if (res == TINF_DONE && !d->bfinal) { |
599 | /* the block has ended (without producing more data), but we |
600 | can't return without data, so start procesing next block */ |
601 | goto next_blk; |
602 | } |
603 | |
604 | if (res != TINF_OK) { |
605 | return res; |
606 | } |
607 | |
608 | } while (d->dest < d->dest_limit); |
609 | |
610 | return TINF_OK; |
611 | } |
612 | |
613 | /* inflate next output bytes from compressed stream, updating |
614 | checksum, and at the end of stream, verify it */ |
615 | int uzlib_uncompress_chksum(TINF_DATA *d) |
616 | { |
617 | int res; |
618 | unsigned char *data = d->dest; |
619 | |
620 | res = uzlib_uncompress(d); |
621 | |
622 | if (res < 0) return res; |
623 | |
624 | switch (d->checksum_type) { |
625 | |
626 | case TINF_CHKSUM_ADLER: |
627 | d->checksum = uzlib_adler32(data, d->dest - data, d->checksum); |
628 | break; |
629 | |
630 | case TINF_CHKSUM_CRC: |
631 | d->checksum = uzlib_crc32(data, d->dest - data, d->checksum); |
632 | break; |
633 | } |
634 | |
635 | if (res == TINF_DONE) { |
636 | unsigned int val; |
637 | |
638 | switch (d->checksum_type) { |
639 | |
640 | case TINF_CHKSUM_ADLER: |
641 | val = tinf_get_be_uint32(d); |
642 | if (d->checksum != val) { |
643 | return TINF_CHKSUM_ERROR; |
644 | } |
645 | break; |
646 | |
647 | case TINF_CHKSUM_CRC: |
648 | val = tinf_get_le_uint32(d); |
649 | if (~d->checksum != val) { |
650 | return TINF_CHKSUM_ERROR; |
651 | } |
652 | // Uncompressed size. TODO: Check |
653 | val = tinf_get_le_uint32(d); |
654 | break; |
655 | } |
656 | } |
657 | |
658 | return res; |
659 | } |
660 | |