1/*
2 * uzlib - tiny deflate/inflate library (deflate, gzip, zlib)
3 *
4 * Copyright (c) 2003 by Joergen Ibsen / Jibz
5 * All Rights Reserved
6 * http://www.ibsensoftware.com/
7 *
8 * Copyright (c) 2014-2018 by Paul Sokolovsky
9 *
10 * This software is provided 'as-is', without any express
11 * or implied warranty. In no event will the authors be
12 * held liable for any damages arising from the use of
13 * this software.
14 *
15 * Permission is granted to anyone to use this software
16 * for any purpose, including commercial applications,
17 * and to alter it and redistribute it freely, subject to
18 * the following restrictions:
19 *
20 * 1. The origin of this software must not be
21 * misrepresented; you must not claim that you
22 * wrote the original software. If you use this
23 * software in a product, an acknowledgment in
24 * the product documentation would be appreciated
25 * but is not required.
26 *
27 * 2. Altered source versions must be plainly marked
28 * as such, and must not be misrepresented as
29 * being the original software.
30 *
31 * 3. This notice may not be removed or altered from
32 * any source distribution.
33 */
34
35#include <assert.h>
36#include "tinf.h"
37
38#define UZLIB_DUMP_ARRAY(heading, arr, size) \
39 { \
40 printf("%s", heading); \
41 for (int i = 0; i < size; ++i) { \
42 printf(" %d", (arr)[i]); \
43 } \
44 printf("\n"); \
45 }
46
47uint32_t tinf_get_le_uint32(TINF_DATA *d);
48uint32_t tinf_get_be_uint32(TINF_DATA *d);
49
50/* --------------------------------------------------- *
51 * -- uninitialized global data (static structures) -- *
52 * --------------------------------------------------- */
53
54#ifdef RUNTIME_BITS_TABLES
55
56/* extra bits and base tables for length codes */
57unsigned char length_bits[30];
58unsigned short length_base[30];
59
60/* extra bits and base tables for distance codes */
61unsigned char dist_bits[30];
62unsigned short dist_base[30];
63
64#else
65
66const unsigned char length_bits[30] = {
67 0, 0, 0, 0, 0, 0, 0, 0,
68 1, 1, 1, 1, 2, 2, 2, 2,
69 3, 3, 3, 3, 4, 4, 4, 4,
70 5, 5, 5, 5
71};
72const unsigned short length_base[30] = {
73 3, 4, 5, 6, 7, 8, 9, 10,
74 11, 13, 15, 17, 19, 23, 27, 31,
75 35, 43, 51, 59, 67, 83, 99, 115,
76 131, 163, 195, 227, 258
77};
78
79const unsigned char dist_bits[30] = {
80 0, 0, 0, 0, 1, 1, 2, 2,
81 3, 3, 4, 4, 5, 5, 6, 6,
82 7, 7, 8, 8, 9, 9, 10, 10,
83 11, 11, 12, 12, 13, 13
84};
85const unsigned short dist_base[30] = {
86 1, 2, 3, 4, 5, 7, 9, 13,
87 17, 25, 33, 49, 65, 97, 129, 193,
88 257, 385, 513, 769, 1025, 1537, 2049, 3073,
89 4097, 6145, 8193, 12289, 16385, 24577
90};
91
92#endif
93
94/* special ordering of code length codes */
95const unsigned char clcidx[] = {
96 16, 17, 18, 0, 8, 7, 9, 6,
97 10, 5, 11, 4, 12, 3, 13, 2,
98 14, 1, 15
99};
100
101/* ----------------------- *
102 * -- utility functions -- *
103 * ----------------------- */
104
105#ifdef RUNTIME_BITS_TABLES
106/* build extra bits and base tables */
107static void tinf_build_bits_base(unsigned char *bits, unsigned short *base, int delta, int first)
108{
109 int i, sum;
110
111 /* build bits table */
112 for (i = 0; i < delta; ++i) bits[i] = 0;
113 for (i = 0; i < 30 - delta; ++i) bits[i + delta] = i / delta;
114
115 /* build base table */
116 for (sum = first, i = 0; i < 30; ++i)
117 {
118 base[i] = sum;
119 sum += 1 << bits[i];
120 }
121}
122#endif
123
124/* build the fixed huffman trees */
125static void tinf_build_fixed_trees(TINF_TREE *lt, TINF_TREE *dt)
126{
127 int i;
128
129 /* build fixed length tree */
130 for (i = 0; i < 7; ++i) lt->table[i] = 0;
131
132 lt->table[7] = 24;
133 lt->table[8] = 152;
134 lt->table[9] = 112;
135
136 for (i = 0; i < 24; ++i) lt->trans[i] = 256 + i;
137 for (i = 0; i < 144; ++i) lt->trans[24 + i] = i;
138 for (i = 0; i < 8; ++i) lt->trans[24 + 144 + i] = 280 + i;
139 for (i = 0; i < 112; ++i) lt->trans[24 + 144 + 8 + i] = 144 + i;
140
141 /* build fixed distance tree */
142 for (i = 0; i < 5; ++i) dt->table[i] = 0;
143
144 dt->table[5] = 32;
145
146 for (i = 0; i < 32; ++i) dt->trans[i] = i;
147}
148
149/* given an array of code lengths, build a tree */
150static void tinf_build_tree(TINF_TREE *t, const unsigned char *lengths, unsigned int num)
151{
152 unsigned short offs[16];
153 unsigned int i, sum;
154
155 /* clear code length count table */
156 for (i = 0; i < 16; ++i) t->table[i] = 0;
157
158 /* scan symbol lengths, and sum code length counts */
159 for (i = 0; i < num; ++i) t->table[lengths[i]]++;
160
161 #if UZLIB_CONF_DEBUG_LOG >= 2
162 UZLIB_DUMP_ARRAY("codelen counts:", t->table, TINF_ARRAY_SIZE(t->table));
163 #endif
164
165 /* In the lengths array, 0 means unused code. So, t->table[0] now contains
166 number of unused codes. But table's purpose is to contain # of codes of
167 particular length, and there're 0 codes of length 0. */
168 t->table[0] = 0;
169
170 /* compute offset table for distribution sort */
171 for (sum = 0, i = 0; i < 16; ++i)
172 {
173 offs[i] = sum;
174 sum += t->table[i];
175 }
176
177 #if UZLIB_CONF_DEBUG_LOG >= 2
178 UZLIB_DUMP_ARRAY("codelen offsets:", offs, TINF_ARRAY_SIZE(offs));
179 #endif
180
181 /* create code->symbol translation table (symbols sorted by code) */
182 for (i = 0; i < num; ++i)
183 {
184 if (lengths[i]) t->trans[offs[lengths[i]]++] = i;
185 }
186}
187
188/* ---------------------- *
189 * -- decode functions -- *
190 * ---------------------- */
191
192unsigned char uzlib_get_byte(TINF_DATA *d)
193{
194 /* If end of source buffer is not reached, return next byte from source
195 buffer. */
196 if (d->source < d->source_limit) {
197 return *d->source++;
198 }
199
200 /* Otherwise if there's callback and we haven't seen EOF yet, try to
201 read next byte using it. (Note: the callback can also update ->source
202 and ->source_limit). */
203 if (d->readSource && !d->eof) {
204 int val = d->readSource(d);
205 if (val >= 0) {
206 return (unsigned char)val;
207 }
208 }
209
210 /* Otherwise, we hit EOF (either from ->readSource() or from exhaustion
211 of the buffer), and it will be "sticky", i.e. further calls to this
212 function will end up here too. */
213 d->eof = true;
214
215 return 0;
216}
217
218uint32_t tinf_get_le_uint32(TINF_DATA *d)
219{
220 uint32_t val = 0;
221 int i;
222 for (i = 4; i--;) {
223 val = val >> 8 | ((uint32_t)uzlib_get_byte(d)) << 24;
224 }
225 return val;
226}
227
228uint32_t tinf_get_be_uint32(TINF_DATA *d)
229{
230 uint32_t val = 0;
231 int i;
232 for (i = 4; i--;) {
233 val = val << 8 | uzlib_get_byte(d);
234 }
235 return val;
236}
237
238/* get one bit from source stream */
239static int tinf_getbit(TINF_DATA *d)
240{
241 unsigned int bit;
242
243 /* check if tag is empty */
244 if (!d->bitcount--)
245 {
246 /* load next tag */
247 d->tag = uzlib_get_byte(d);
248 d->bitcount = 7;
249 }
250
251 /* shift bit out of tag */
252 bit = d->tag & 0x01;
253 d->tag >>= 1;
254
255 return bit;
256}
257
258/* read a num bit value from a stream and add base */
259static unsigned int tinf_read_bits(TINF_DATA *d, int num, int base)
260{
261 unsigned int val = 0;
262
263 /* read num bits */
264 if (num)
265 {
266 unsigned int limit = 1 << (num);
267 unsigned int mask;
268
269 for (mask = 1; mask < limit; mask *= 2)
270 if (tinf_getbit(d)) val += mask;
271 }
272
273 return val + base;
274}
275
276/* given a data stream and a tree, decode a symbol */
277static int tinf_decode_symbol(TINF_DATA *d, TINF_TREE *t)
278{
279 int sum = 0, cur = 0, len = 0;
280
281 /* get more bits while code value is above sum */
282 do {
283
284 cur = 2*cur + tinf_getbit(d);
285
286 if (++len == TINF_ARRAY_SIZE(t->table)) {
287 return TINF_DATA_ERROR;
288 }
289
290 sum += t->table[len];
291 cur -= t->table[len];
292
293 } while (cur >= 0);
294
295 sum += cur;
296 #if UZLIB_CONF_PARANOID_CHECKS
297 if (sum < 0 || sum >= TINF_ARRAY_SIZE(t->trans)) {
298 return TINF_DATA_ERROR;
299 }
300 #endif
301
302 return t->trans[sum];
303}
304
305/* given a data stream, decode dynamic trees from it */
306static int tinf_decode_trees(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
307{
308 /* code lengths for 288 literal/len symbols and 32 dist symbols */
309 unsigned char lengths[288+32];
310 unsigned int hlit, hdist, hclen, hlimit;
311 unsigned int i, num, length;
312
313 /* get 5 bits HLIT (257-286) */
314 hlit = tinf_read_bits(d, 5, 257);
315
316 /* get 5 bits HDIST (1-32) */
317 hdist = tinf_read_bits(d, 5, 1);
318
319 /* get 4 bits HCLEN (4-19) */
320 hclen = tinf_read_bits(d, 4, 4);
321
322 for (i = 0; i < 19; ++i) lengths[i] = 0;
323
324 /* read code lengths for code length alphabet */
325 for (i = 0; i < hclen; ++i)
326 {
327 /* get 3 bits code length (0-7) */
328 unsigned int clen = tinf_read_bits(d, 3, 0);
329
330 lengths[clcidx[i]] = clen;
331 }
332
333 /* build code length tree, temporarily use length tree */
334 tinf_build_tree(lt, lengths, 19);
335
336 /* decode code lengths for the dynamic trees */
337 hlimit = hlit + hdist;
338 for (num = 0; num < hlimit; )
339 {
340 int sym = tinf_decode_symbol(d, lt);
341 unsigned char fill_value = 0;
342 int lbits, lbase = 3;
343
344 /* error decoding */
345 if (sym < 0) return sym;
346
347 switch (sym)
348 {
349 case 16:
350 /* copy previous code length 3-6 times (read 2 bits) */
351 if (num == 0) return TINF_DATA_ERROR;
352 fill_value = lengths[num - 1];
353 lbits = 2;
354 break;
355 case 17:
356 /* repeat code length 0 for 3-10 times (read 3 bits) */
357 lbits = 3;
358 break;
359 case 18:
360 /* repeat code length 0 for 11-138 times (read 7 bits) */
361 lbits = 7;
362 lbase = 11;
363 break;
364 default:
365 /* values 0-15 represent the actual code lengths */
366 lengths[num++] = sym;
367 /* continue the for loop */
368 continue;
369 }
370
371 /* special code length 16-18 are handled here */
372 length = tinf_read_bits(d, lbits, lbase);
373 if (num + length > hlimit) return TINF_DATA_ERROR;
374 for (; length; --length)
375 {
376 lengths[num++] = fill_value;
377 }
378 }
379
380 #if UZLIB_CONF_DEBUG_LOG >= 2
381 printf("lit code lengths (%d):", hlit);
382 UZLIB_DUMP_ARRAY("", lengths, hlit);
383 printf("dist code lengths (%d):", hdist);
384 UZLIB_DUMP_ARRAY("", lengths + hlit, hdist);
385 #endif
386
387 #if UZLIB_CONF_PARANOID_CHECKS
388 /* Check that there's "end of block" symbol */
389 if (lengths[256] == 0) {
390 return TINF_DATA_ERROR;
391 }
392 #endif
393
394 /* build dynamic trees */
395 tinf_build_tree(lt, lengths, hlit);
396 tinf_build_tree(dt, lengths + hlit, hdist);
397
398 return TINF_OK;
399}
400
401/* ----------------------------- *
402 * -- block inflate functions -- *
403 * ----------------------------- */
404
405/* given a stream and two trees, inflate next byte of output */
406static int tinf_inflate_block_data(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
407{
408 if (d->curlen == 0) {
409 unsigned int offs;
410 int dist;
411 int sym = tinf_decode_symbol(d, lt);
412 //printf("huff sym: %02x\n", sym);
413
414 if (d->eof) {
415 return TINF_DATA_ERROR;
416 }
417
418 /* literal byte */
419 if (sym < 256) {
420 TINF_PUT(d, sym);
421 return TINF_OK;
422 }
423
424 /* end of block */
425 if (sym == 256) {
426 return TINF_DONE;
427 }
428
429 /* substring from sliding dictionary */
430 sym -= 257;
431 if (sym >= 29) {
432 return TINF_DATA_ERROR;
433 }
434
435 /* possibly get more bits from length code */
436 d->curlen = tinf_read_bits(d, length_bits[sym], length_base[sym]);
437
438 dist = tinf_decode_symbol(d, dt);
439 if (dist >= 30) {
440 return TINF_DATA_ERROR;
441 }
442
443 /* possibly get more bits from distance code */
444 offs = tinf_read_bits(d, dist_bits[dist], dist_base[dist]);
445
446 /* calculate and validate actual LZ offset to use */
447 if (d->dict_ring) {
448 if (offs > d->dict_size) {
449 return TINF_DICT_ERROR;
450 }
451 /* Note: unlike full-dest-in-memory case below, we don't
452 try to catch offset which points to not yet filled
453 part of the dictionary here. Doing so would require
454 keeping another variable to track "filled in" size
455 of the dictionary. Appearance of such an offset cannot
456 lead to accessing memory outside of the dictionary
457 buffer, and clients which don't want to leak unrelated
458 information, should explicitly initialize dictionary
459 buffer passed to uzlib. */
460
461 d->lzOff = d->dict_idx - offs;
462 if (d->lzOff < 0) {
463 d->lzOff += d->dict_size;
464 }
465 } else {
466 /* catch trying to point before the start of dest buffer */
467 if (offs > (unsigned int)(d->dest - d->destStart)) {
468 return TINF_DATA_ERROR;
469 }
470 d->lzOff = -offs;
471 }
472 }
473
474 /* copy next byte from dict substring */
475 if (d->dict_ring) {
476 TINF_PUT(d, d->dict_ring[d->lzOff]);
477 if ((unsigned)++d->lzOff == d->dict_size) {
478 d->lzOff = 0;
479 }
480 } else {
481 d->dest[0] = d->dest[d->lzOff];
482 d->dest++;
483 }
484 d->curlen--;
485 return TINF_OK;
486}
487
488/* inflate next byte from uncompressed block of data */
489static int tinf_inflate_uncompressed_block(TINF_DATA *d)
490{
491 if (d->curlen == 0) {
492 unsigned int length, invlength;
493
494 /* get length */
495 length = uzlib_get_byte(d);
496 length += 256 * uzlib_get_byte(d);
497 /* get one's complement of length */
498 invlength = uzlib_get_byte(d);
499 invlength += 256 * uzlib_get_byte(d);
500 /* check length */
501 if (length != (~invlength & 0x0000ffff)) return TINF_DATA_ERROR;
502
503 /* increment length to properly return TINF_DONE below, without
504 producing data at the same time */
505 d->curlen = length + 1;
506
507 /* make sure we start next block on a byte boundary */
508 d->bitcount = 0;
509 }
510
511 if (--d->curlen == 0) {
512 return TINF_DONE;
513 }
514
515 unsigned char c = uzlib_get_byte(d);
516 TINF_PUT(d, c);
517 return TINF_OK;
518}
519
520/* ---------------------- *
521 * -- public functions -- *
522 * ---------------------- */
523
524/* initialize global (static) data */
525void uzlib_init(void)
526{
527#ifdef RUNTIME_BITS_TABLES
528 /* build extra bits and base tables */
529 tinf_build_bits_base(length_bits, length_base, 4, 3);
530 tinf_build_bits_base(dist_bits, dist_base, 2, 1);
531
532 /* fix a special case */
533 length_bits[28] = 0;
534 length_base[28] = 258;
535#endif
536}
537
538/* initialize decompression structure */
539void uzlib_uncompress_init(TINF_DATA *d, void *dict, unsigned int dictLen)
540{
541 d->eof = 0;
542 d->bitcount = 0;
543 d->bfinal = 0;
544 d->btype = -1;
545 d->dict_size = dictLen;
546 d->dict_ring = dict;
547 d->dict_idx = 0;
548 d->curlen = 0;
549}
550
551/* inflate next output bytes from compressed stream */
552int uzlib_uncompress(TINF_DATA *d)
553{
554 do {
555 int res;
556
557 /* start a new block */
558 if (d->btype == -1) {
559next_blk:
560 /* read final block flag */
561 d->bfinal = tinf_getbit(d);
562 /* read block type (2 bits) */
563 d->btype = tinf_read_bits(d, 2, 0);
564
565 #if UZLIB_CONF_DEBUG_LOG >= 1
566 printf("Started new block: type=%d final=%d\n", d->btype, d->bfinal);
567 #endif
568
569 if (d->btype == 1) {
570 /* build fixed huffman trees */
571 tinf_build_fixed_trees(&d->ltree, &d->dtree);
572 } else if (d->btype == 2) {
573 /* decode trees from stream */
574 res = tinf_decode_trees(d, &d->ltree, &d->dtree);
575 if (res != TINF_OK) {
576 return res;
577 }
578 }
579 }
580
581 /* process current block */
582 switch (d->btype)
583 {
584 case 0:
585 /* decompress uncompressed block */
586 res = tinf_inflate_uncompressed_block(d);
587 break;
588 case 1:
589 case 2:
590 /* decompress block with fixed/dynamic huffman trees */
591 /* trees were decoded previously, so it's the same routine for both */
592 res = tinf_inflate_block_data(d, &d->ltree, &d->dtree);
593 break;
594 default:
595 return TINF_DATA_ERROR;
596 }
597
598 if (res == TINF_DONE && !d->bfinal) {
599 /* the block has ended (without producing more data), but we
600 can't return without data, so start procesing next block */
601 goto next_blk;
602 }
603
604 if (res != TINF_OK) {
605 return res;
606 }
607
608 } while (d->dest < d->dest_limit);
609
610 return TINF_OK;
611}
612
613/* inflate next output bytes from compressed stream, updating
614 checksum, and at the end of stream, verify it */
615int uzlib_uncompress_chksum(TINF_DATA *d)
616{
617 int res;
618 unsigned char *data = d->dest;
619
620 res = uzlib_uncompress(d);
621
622 if (res < 0) return res;
623
624 switch (d->checksum_type) {
625
626 case TINF_CHKSUM_ADLER:
627 d->checksum = uzlib_adler32(data, d->dest - data, d->checksum);
628 break;
629
630 case TINF_CHKSUM_CRC:
631 d->checksum = uzlib_crc32(data, d->dest - data, d->checksum);
632 break;
633 }
634
635 if (res == TINF_DONE) {
636 unsigned int val;
637
638 switch (d->checksum_type) {
639
640 case TINF_CHKSUM_ADLER:
641 val = tinf_get_be_uint32(d);
642 if (d->checksum != val) {
643 return TINF_CHKSUM_ERROR;
644 }
645 break;
646
647 case TINF_CHKSUM_CRC:
648 val = tinf_get_le_uint32(d);
649 if (~d->checksum != val) {
650 return TINF_CHKSUM_ERROR;
651 }
652 // Uncompressed size. TODO: Check
653 val = tinf_get_le_uint32(d);
654 break;
655 }
656 }
657
658 return res;
659}
660