tinflate.c source code [MicroPython/extmod/uzlib/tinflate.c]

1	/*
2	* uzlib - tiny deflate/inflate library (deflate, gzip, zlib)
3	*
4	* Copyright (c) 2003 by Joergen Ibsen / Jibz
5	* All Rights Reserved
6	* http://www.ibsensoftware.com/
7	*
8	* Copyright (c) 2014-2018 by Paul Sokolovsky
9	*
10	* This software is provided 'as-is', without any express
11	* or implied warranty. In no event will the authors be
12	* held liable for any damages arising from the use of
13	* this software.
14	*
15	* Permission is granted to anyone to use this software
16	* for any purpose, including commercial applications,
17	* and to alter it and redistribute it freely, subject to
18	* the following restrictions:
19	*
20	* 1. The origin of this software must not be
21	* misrepresented; you must not claim that you
22	* wrote the original software. If you use this
23	* software in a product, an acknowledgment in
24	* the product documentation would be appreciated
25	* but is not required.
26	*
27	* 2. Altered source versions must be plainly marked
28	* as such, and must not be misrepresented as
29	* being the original software.
30	*
31	* 3. This notice may not be removed or altered from
32	* any source distribution.
33	*/
34
35	#include <assert.h>
36	#include "tinf.h"
37
38	#define UZLIB_DUMP_ARRAY(heading, arr, size) \
39	{ \
40	printf("%s", heading); \
41	for (int i = 0; i < size; ++i) { \
42	printf(" %d", (arr)[i]); \
43	} \
44	printf("\n"); \
45	}
46
47	uint32_t tinf_get_le_uint32(TINF_DATA *d);
48	uint32_t tinf_get_be_uint32(TINF_DATA *d);
49
50	/* --------------------------------------------------- *
51	* -- uninitialized global data (static structures) -- *
52	* --------------------------------------------------- */
53
54	#ifdef RUNTIME_BITS_TABLES
55
56	/ extra bits and base tables for length codes /
57	unsigned char length_bits[`30`];
58	unsigned short length_base[`30`];
59
60	/ extra bits and base tables for distance codes /
61	unsigned char dist_bits[`30`];
62	unsigned short dist_base[`30`];
63
64	#else
65
66	const unsigned char length_bits[`30`] = {
67	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
68	`1`, `1`, `1`, `1`, `2`, `2`, `2`, `2`,
69	`3`, `3`, `3`, `3`, `4`, `4`, `4`, `4`,
70	`5`, `5`, `5`, `5`
71	};
72	const unsigned short length_base[`30`] = {
73	`3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`,
74	`11`, `13`, `15`, `17`, `19`, `23`, `27`, `31`,
75	`35`, `43`, `51`, `59`, `67`, `83`, `99`, `115`,
76	`131`, `163`, `195`, `227`, `258`
77	};
78
79	const unsigned char dist_bits[`30`] = {
80	`0`, `0`, `0`, `0`, `1`, `1`, `2`, `2`,
81	`3`, `3`, `4`, `4`, `5`, `5`, `6`, `6`,
82	`7`, `7`, `8`, `8`, `9`, `9`, `10`, `10`,
83	`11`, `11`, `12`, `12`, `13`, `13`
84	};
85	const unsigned short dist_base[`30`] = {
86	`1`, `2`, `3`, `4`, `5`, `7`, `9`, `13`,
87	`17`, `25`, `33`, `49`, `65`, `97`, `129`, `193`,
88	`257`, `385`, `513`, `769`, `1025`, `1537`, `2049`, `3073`,
89	`4097`, `6145`, `8193`, `12289`, `16385`, `24577`
90	};
91
92	#endif
93
94	/ special ordering of code length codes /
95	const unsigned char clcidx[] = {
96	`16`, `17`, `18`, `0`, `8`, `7`, `9`, `6`,
97	`10`, `5`, `11`, `4`, `12`, `3`, `13`, `2`,
98	`14`, `1`, `15`
99	};
100
101	/* ----------------------- *
102	* -- utility functions -- *
103	* ----------------------- */
104
105	#ifdef RUNTIME_BITS_TABLES
106	/ build extra bits and base tables /
107	static void tinf_build_bits_base(unsigned char bits, unsigned* short base, int* delta, int first)
108	{
109	int i, sum;
110
111	/ build bits table /
112	for (i = `0`; i < delta; ++i) bits[i] = `0`;
113	for (i = `0`; i < `30` - delta; ++i) bits[i + delta] = i / delta;
114
115	/ build base table /
116	for (sum = first, i = `0`; i < `30`; ++i)
117	{
118	base[i] = sum;
119	sum += `1` << bits[i];
120	}
121	}
122	#endif
123
124	/ build the fixed huffman trees /
125	static void tinf_build_fixed_trees(TINF_TREE lt, TINF_TREE dt)
126	{
127	int i;
128
129	/ build fixed length tree /
130	for (i = `0`; i < `7`; ++i) lt->table[i] = `0`;
131
132	lt->table[`7`] = `24`;
133	lt->table[`8`] = `152`;
134	lt->table[`9`] = `112`;
135
136	for (i = `0`; i < `24`; ++i) lt->trans[i] = `256` + i;
137	for (i = `0`; i < `144`; ++i) lt->trans[`24` + i] = i;
138	for (i = `0`; i < `8`; ++i) lt->trans[`24` + `144` + i] = `280` + i;
139	for (i = `0`; i < `112`; ++i) lt->trans[`24` + `144` + `8` + i] = `144` + i;
140
141	/ build fixed distance tree /
142	for (i = `0`; i < `5`; ++i) dt->table[i] = `0`;
143
144	dt->table[`5`] = `32`;
145
146	for (i = `0`; i < `32`; ++i) dt->trans[i] = i;
147	}
148
149	/ given an array of code lengths, build a tree /
150	static void tinf_build_tree(TINF_TREE t, const* unsigned char lengths, unsigned* int num)
151	{
152	unsigned short offs[`16`];
153	unsigned int i, sum;
154
155	/ clear code length count table /
156	for (i = `0`; i < `16`; ++i) t->table[i] = `0`;
157
158	/ scan symbol lengths, and sum code length counts /
159	for (i = `0`; i < num; ++i) t->table[lengths[i]]++;
160
161	#if UZLIB_CONF_DEBUG_LOG >= 2
162	UZLIB_DUMP_ARRAY("codelen counts:", t->table, TINF_ARRAY_SIZE(t->table));
163	#endif
164
165	/ In the lengths array, 0 means unused code. So, t->table[0] now contains*
166	number of unused codes. But table's purpose is to contain # of codes of
167	particular length, and there're 0 codes of length 0. /*
168	t->table[`0`] = `0`;
169
170	/ compute offset table for distribution sort /
171	for (sum = `0`, i = `0`; i < `16`; ++i)
172	{
173	offs[i] = sum;
174	sum += t->table[i];
175	}
176
177	#if UZLIB_CONF_DEBUG_LOG >= 2
178	UZLIB_DUMP_ARRAY("codelen offsets:", offs, TINF_ARRAY_SIZE(offs));
179	#endif
180
181	/ create code->symbol translation table (symbols sorted by code) /
182	for (i = `0`; i < num; ++i)
183	{
184	if (lengths[i]) t->trans[offs[lengths[i]]++] = i;
185	}
186	}
187
188	/* ---------------------- *
189	* -- decode functions -- *
190	* ---------------------- */
191
192	unsigned char uzlib_get_byte(TINF_DATA *d)
193	{
194	/ If end of source buffer is not reached, return next byte from source*
195	buffer. /*
196	if (d->source < d->source_limit) {
197	return *d->source++;
198	}
199
200	/ Otherwise if there's callback and we haven't seen EOF yet, try to*
201	read next byte using it. (Note: the callback can also update ->source
202	and ->source_limit). /*
203	if (d->readSource && !d->eof) {
204	int val = d->readSource(d);
205	if (val >= `0`) {
206	return (unsigned char)val;
207	}
208	}
209
210	/ Otherwise, we hit EOF (either from ->readSource() or from exhaustion*
211	of the buffer), and it will be "sticky", i.e. further calls to this
212	function will end up here too. /*
213	d->eof = true;
214
215	return `0`;
216	}
217
218	uint32_t tinf_get_le_uint32(TINF_DATA *d)
219	{
220	uint32_t val = `0`;
221	int i;
222	for (i = `4`; i--;) {
223	val = val >> `8` \| ((uint32_t)uzlib_get_byte(d)) << `24`;
224	}
225	return val;
226	}
227
228	uint32_t tinf_get_be_uint32(TINF_DATA *d)
229	{
230	uint32_t val = `0`;
231	int i;
232	for (i = `4`; i--;) {
233	val = val << `8` \| uzlib_get_byte(d);
234	}
235	return val;
236	}
237
238	/ get one bit from source stream /
239	static int tinf_getbit(TINF_DATA *d)
240	{
241	unsigned int bit;
242
243	/ check if tag is empty /
244	if (!d->bitcount--)
245	{
246	/ load next tag /
247	d->tag = uzlib_get_byte(d);
248	d->bitcount = `7`;
249	}
250
251	/ shift bit out of tag /
252	bit = d->tag & `0x01`;
253	d->tag >>= `1`;
254
255	return bit;
256	}
257
258	/ read a num bit value from a stream and add base /
259	static unsigned int tinf_read_bits(TINF_DATA d, int* num, int base)
260	{
261	unsigned int val = `0`;
262
263	/ read num bits /
264	if (num)
265	{
266	unsigned int limit = `1` << (num);
267	unsigned int mask;
268
269	for (mask = `1`; mask < limit; mask *= `2`)
270	if (tinf_getbit(d)) val += mask;
271	}
272
273	return val + base;
274	}
275
276	/ given a data stream and a tree, decode a symbol /
277	static int tinf_decode_symbol(TINF_DATA d, TINF_TREE t)
278	{
279	int sum = `0`, cur = `0`, len = `0`;
280
281	/ get more bits while code value is above sum /
282	do {
283
284	cur = `2`*cur + tinf_getbit(d);
285
286	if (++len == TINF_ARRAY_SIZE(t->table)) {
287	return TINF_DATA_ERROR;
288	}
289
290	sum += t->table[len];
291	cur -= t->table[len];
292
293	} while (cur >= `0`);
294
295	sum += cur;
296	#if UZLIB_CONF_PARANOID_CHECKS
297	if (sum < `0` \|\| sum >= TINF_ARRAY_SIZE(t->trans)) {
298	return TINF_DATA_ERROR;
299	}
300	#endif
301
302	return t->trans[sum];
303	}
304
305	/ given a data stream, decode dynamic trees from it /
306	static int tinf_decode_trees(TINF_DATA d, TINF_TREE lt, TINF_TREE *dt)
307	{
308	/ code lengths for 288 literal/len symbols and 32 dist symbols /
309	unsigned char lengths[`288`+`32`];
310	unsigned int hlit, hdist, hclen, hlimit;
311	unsigned int i, num, length;
312
313	/ get 5 bits HLIT (257-286) /
314	hlit = tinf_read_bits(d, `5`, `257`);
315
316	/ get 5 bits HDIST (1-32) /
317	hdist = tinf_read_bits(d, `5`, `1`);
318
319	/ get 4 bits HCLEN (4-19) /
320	hclen = tinf_read_bits(d, `4`, `4`);
321
322	for (i = `0`; i < `19`; ++i) lengths[i] = `0`;
323
324	/ read code lengths for code length alphabet /
325	for (i = `0`; i < hclen; ++i)
326	{
327	/ get 3 bits code length (0-7) /
328	unsigned int clen = tinf_read_bits(d, `3`, `0`);
329
330	lengths[clcidx[i]] = clen;
331	}
332
333	/ build code length tree, temporarily use length tree /
334	tinf_build_tree(lt, lengths, `19`);
335
336	/ decode code lengths for the dynamic trees /
337	hlimit = hlit + hdist;
338	for (num = `0`; num < hlimit; )
339	{
340	int sym = tinf_decode_symbol(d, lt);
341	unsigned char fill_value = `0`;
342	int lbits, lbase = `3`;
343
344	/ error decoding /
345	if (sym < `0`) return sym;
346
347	switch (sym)
348	{
349	case `16`:
350	/ copy previous code length 3-6 times (read 2 bits) /
351	if (num == `0`) return TINF_DATA_ERROR;
352	fill_value = lengths[num - `1`];
353	lbits = `2`;
354	break;
355	case `17`:
356	/ repeat code length 0 for 3-10 times (read 3 bits) /
357	lbits = `3`;
358	break;
359	case `18`:
360	/ repeat code length 0 for 11-138 times (read 7 bits) /
361	lbits = `7`;
362	lbase = `11`;
363	break;
364	default:
365	/ values 0-15 represent the actual code lengths /
366	lengths[num++] = sym;
367	/ continue the for loop /
368	continue;
369	}
370
371	/ special code length 16-18 are handled here /
372	length = tinf_read_bits(d, lbits, lbase);
373	if (num + length > hlimit) return TINF_DATA_ERROR;
374	for (; length; --length)
375	{
376	lengths[num++] = fill_value;
377	}
378	}
379
380	#if UZLIB_CONF_DEBUG_LOG >= 2
381	printf("lit code lengths (%d):", hlit);
382	UZLIB_DUMP_ARRAY("", lengths, hlit);
383	printf("dist code lengths (%d):", hdist);
384	UZLIB_DUMP_ARRAY("", lengths + hlit, hdist);
385	#endif
386
387	#if UZLIB_CONF_PARANOID_CHECKS
388	/ Check that there's "end of block" symbol /
389	if (lengths[`256`] == `0`) {
390	return TINF_DATA_ERROR;
391	}
392	#endif
393
394	/ build dynamic trees /
395	tinf_build_tree(lt, lengths, hlit);
396	tinf_build_tree(dt, lengths + hlit, hdist);
397
398	return TINF_OK;
399	}
400
401	/* ----------------------------- *
402	* -- block inflate functions -- *
403	* ----------------------------- */
404
405	/ given a stream and two trees, inflate next byte of output /
406	static int tinf_inflate_block_data(TINF_DATA d, TINF_TREE lt, TINF_TREE *dt)
407	{
408	if (d->curlen == `0`) {
409	unsigned int offs;
410	int dist;
411	int sym = tinf_decode_symbol(d, lt);
412	//printf("huff sym: %02x\n", sym);
413
414	if (d->eof) {
415	return TINF_DATA_ERROR;
416	}
417
418	/ literal byte /
419	if (sym < `256`) {
420	TINF_PUT(d, sym);
421	return TINF_OK;
422	}
423
424	/ end of block /
425	if (sym == `256`) {
426	return TINF_DONE;
427	}
428
429	/ substring from sliding dictionary /
430	sym -= `257`;
431	if (sym >= `29`) {
432	return TINF_DATA_ERROR;
433	}
434
435	/ possibly get more bits from length code /
436	d->curlen = tinf_read_bits(d, length_bits[sym], length_base[sym]);
437
438	dist = tinf_decode_symbol(d, dt);
439	if (dist >= `30`) {
440	return TINF_DATA_ERROR;
441	}
442
443	/ possibly get more bits from distance code /
444	offs = tinf_read_bits(d, dist_bits[dist], dist_base[dist]);
445
446	/ calculate and validate actual LZ offset to use /
447	if (d->dict_ring) {
448	if (offs > d->dict_size) {
449	return TINF_DICT_ERROR;
450	}
451	/ Note: unlike full-dest-in-memory case below, we don't*
452	try to catch offset which points to not yet filled
453	part of the dictionary here. Doing so would require
454	keeping another variable to track "filled in" size
455	of the dictionary. Appearance of such an offset cannot
456	lead to accessing memory outside of the dictionary
457	buffer, and clients which don't want to leak unrelated
458	information, should explicitly initialize dictionary
459	buffer passed to uzlib. /*
460
461	d->lzOff = d->dict_idx - offs;
462	if (d->lzOff < `0`) {
463	d->lzOff += d->dict_size;
464	}
465	} else {
466	/ catch trying to point before the start of dest buffer /
467	if (offs > (unsigned int)(d->dest - d->destStart)) {
468	return TINF_DATA_ERROR;
469	}
470	d->lzOff = -offs;
471	}
472	}
473
474	/ copy next byte from dict substring /
475	if (d->dict_ring) {
476	TINF_PUT(d, d->dict_ring[d->lzOff]);
477	if ((unsigned)++d->lzOff == d->dict_size) {
478	d->lzOff = `0`;
479	}
480	} else {
481	d->dest[`0`] = d->dest[d->lzOff];
482	d->dest++;
483	}
484	d->curlen--;
485	return TINF_OK;
486	}
487
488	/ inflate next byte from uncompressed block of data /
489	static int tinf_inflate_uncompressed_block(TINF_DATA *d)
490	{
491	if (d->curlen == `0`) {
492	unsigned int length, invlength;
493
494	/ get length /
495	length = uzlib_get_byte(d);
496	length += `256` * uzlib_get_byte(d);
497	/ get one's complement of length /
498	invlength = uzlib_get_byte(d);
499	invlength += `256` * uzlib_get_byte(d);
500	/ check length /
501	if (length != (~invlength & `0x0000ffff`)) return TINF_DATA_ERROR;
502
503	/ increment length to properly return TINF_DONE below, without*
504	producing data at the same time /*
505	d->curlen = length + `1`;
506
507	/ make sure we start next block on a byte boundary /
508	d->bitcount = `0`;
509	}
510
511	if (--d->curlen == `0`) {
512	return TINF_DONE;
513	}
514
515	unsigned char c = uzlib_get_byte(d);
516	TINF_PUT(d, c);
517	return TINF_OK;
518	}
519
520	/* ---------------------- *
521	* -- public functions -- *
522	* ---------------------- */
523
524	/ initialize global (static) data /
525	void uzlib_init(void)
526	{
527	#ifdef RUNTIME_BITS_TABLES
528	/ build extra bits and base tables /
529	tinf_build_bits_base(length_bits, length_base, `4`, `3`);
530	tinf_build_bits_base(dist_bits, dist_base, `2`, `1`);
531
532	/ fix a special case /
533	length_bits[`28`] = `0`;
534	length_base[`28`] = `258`;
535	#endif
536	}
537
538	/ initialize decompression structure /
539	void uzlib_uncompress_init(TINF_DATA d, void* dict, unsigned* int dictLen)
540	{
541	d->eof = `0`;
542	d->bitcount = `0`;
543	d->bfinal = `0`;
544	d->btype = -`1`;
545	d->dict_size = dictLen;
546	d->dict_ring = dict;
547	d->dict_idx = `0`;
548	d->curlen = `0`;
549	}
550
551	/ inflate next output bytes from compressed stream /
552	int uzlib_uncompress(TINF_DATA *d)
553	{
554	do {
555	int res;
556
557	/ start a new block /
558	if (d->btype == -`1`) {
559	next_blk:
560	/ read final block flag /
561	d->bfinal = tinf_getbit(d);
562	/ read block type (2 bits) /
563	d->btype = tinf_read_bits(d, `2`, `0`);
564
565	#if UZLIB_CONF_DEBUG_LOG >= 1
566	printf("Started new block: type=%d final=%d\n", d->btype, d->bfinal);
567	#endif
568
569	if (d->btype == `1`) {
570	/ build fixed huffman trees /
571	tinf_build_fixed_trees(&d->ltree, &d->dtree);
572	} else if (d->btype == `2`) {
573	/ decode trees from stream /
574	res = tinf_decode_trees(d, &d->ltree, &d->dtree);
575	if (res != TINF_OK) {
576	return res;
577	}
578	}
579	}
580
581	/ process current block /
582	switch (d->btype)
583	{
584	case `0`:
585	/ decompress uncompressed block /
586	res = tinf_inflate_uncompressed_block(d);
587	break;
588	case `1`:
589	case `2`:
590	/ decompress block with fixed/dynamic huffman trees /
591	/ trees were decoded previously, so it's the same routine for both /
592	res = tinf_inflate_block_data(d, &d->ltree, &d->dtree);
593	break;
594	default:
595	return TINF_DATA_ERROR;
596	}
597
598	if (res == TINF_DONE && !d->bfinal) {
599	/ the block has ended (without producing more data), but we*
600	can't return without data, so start procesing next block /*
601	goto next_blk;
602	}
603
604	if (res != TINF_OK) {
605	return res;
606	}
607
608	} while (d->dest < d->dest_limit);
609
610	return TINF_OK;
611	}
612
613	/ inflate next output bytes from compressed stream, updating*
614	checksum, and at the end of stream, verify it /*
615	int uzlib_uncompress_chksum(TINF_DATA *d)
616	{
617	int res;
618	unsigned char *data = d->dest;
619
620	res = uzlib_uncompress(d);
621
622	if (res < `0`) return res;
623
624	switch (d->checksum_type) {
625
626	case TINF_CHKSUM_ADLER:
627	d->checksum = uzlib_adler32(data, d->dest - data, d->checksum);
628	break;
629
630	case TINF_CHKSUM_CRC:
631	d->checksum = uzlib_crc32(data, d->dest - data, d->checksum);
632	break;
633	}
634
635	if (res == TINF_DONE) {
636	unsigned int val;
637
638	switch (d->checksum_type) {
639
640	case TINF_CHKSUM_ADLER:
641	val = tinf_get_be_uint32(d);
642	if (d->checksum != val) {
643	return TINF_CHKSUM_ERROR;
644	}
645	break;
646
647	case TINF_CHKSUM_CRC:
648	val = tinf_get_le_uint32(d);
649	if (~d->checksum != val) {
650	return TINF_CHKSUM_ERROR;
651	}
652	// Uncompressed size. TODO: Check
653	val = tinf_get_le_uint32(d);
654	break;
655	}
656	}
657
658	return res;
659	}
660

Browse the source code of MicroPython/extmod/uzlib/tinflate.c