1/* inffast_chunk.c -- fast decoding
2 * Copyright (C) 1995-2017 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6#include "zutil.h"
7#include "inftrees.h"
8#include "inflate.h"
9#include "contrib/optimizations/inffast_chunk.h"
10#include "contrib/optimizations/chunkcopy.h"
11
12#ifdef ASMINF
13# pragma message("Assembler code may have bugs -- use at your own risk")
14#else
15
16/*
17 Decode literal, length, and distance codes and write out the resulting
18 literal and match bytes until either not enough input or output is
19 available, an end-of-block is encountered, or a data error is encountered.
20 When large enough input and output buffers are supplied to inflate(), for
21 example, a 16K input buffer and a 64K output buffer, more than 95% of the
22 inflate() execution time is spent in this routine.
23
24 Entry assumptions:
25
26 state->mode == LEN
27 strm->avail_in >= INFLATE_FAST_MIN_INPUT (6 or 8 bytes)
28 strm->avail_out >= INFLATE_FAST_MIN_OUTPUT (258 bytes)
29 start >= strm->avail_out
30 state->bits < 8
31 (state->hold >> state->bits) == 0
32 strm->next_out[0..strm->avail_out] does not overlap with
33 strm->next_in[0..strm->avail_in]
34 strm->state->window is allocated with an additional
35 CHUNKCOPY_CHUNK_SIZE-1 bytes of padding beyond strm->state->wsize
36
37 On return, state->mode is one of:
38
39 LEN -- ran out of enough output space or enough available input
40 TYPE -- reached end of block code, inflate() to interpret next block
41 BAD -- error in block data
42
43 Notes:
44
45 INFLATE_FAST_MIN_INPUT: 6 or 8 bytes
46
47 - The maximum input bits used by a length/distance pair is 15 bits for the
48 length code, 5 bits for the length extra, 15 bits for the distance code,
49 and 13 bits for the distance extra. This totals 48 bits, or six bytes.
50 Therefore if strm->avail_in >= 6, then there is enough input to avoid
51 checking for available input while decoding.
52
53 - The wide input data reading option reads 64 input bits at a time. Thus,
54 if strm->avail_in >= 8, then there is enough input to avoid checking for
55 available input while decoding. Reading consumes the input with:
56
57 hold |= read64le(in) << bits;
58 in += 6;
59 bits += 48;
60
61 reporting 6 bytes of new input because |bits| is 0..15 (2 bytes rounded
62 up, worst case) and 6 bytes is enough to decode as noted above. At exit,
63 hold &= (1U << bits) - 1 drops excess input to keep the invariant:
64
65 (state->hold >> state->bits) == 0
66
67 INFLATE_FAST_MIN_OUTPUT: 258 bytes
68
69 - The maximum bytes that a single length/distance pair can output is 258
70 bytes, which is the maximum length that can be coded. inflate_fast()
71 requires strm->avail_out >= 258 for each loop to avoid checking for
72 available output space while decoding.
73 */
74void ZLIB_INTERNAL inflate_fast_chunk_(strm, start)
75z_streamp strm;
76unsigned start; /* inflate()'s starting value for strm->avail_out */
77{
78 struct inflate_state FAR *state;
79 z_const unsigned char FAR *in; /* local strm->next_in */
80 z_const unsigned char FAR *last; /* have enough input while in < last */
81 unsigned char FAR *out; /* local strm->next_out */
82 unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
83 unsigned char FAR *end; /* while out < end, enough space available */
84 unsigned char FAR *limit; /* safety limit for chunky copies */
85#ifdef INFLATE_STRICT
86 unsigned dmax; /* maximum distance from zlib header */
87#endif
88 unsigned wsize; /* window size or zero if not using window */
89 unsigned whave; /* valid bytes in the window */
90 unsigned wnext; /* window write index */
91 unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
92 inflate_holder_t hold; /* local strm->hold */
93 unsigned bits; /* local strm->bits */
94 code const FAR *lcode; /* local strm->lencode */
95 code const FAR *dcode; /* local strm->distcode */
96 unsigned lmask; /* mask for first level of length codes */
97 unsigned dmask; /* mask for first level of distance codes */
98 code here; /* retrieved table entry */
99 unsigned op; /* code bits, operation, extra bits, or */
100 /* window position, window bytes to copy */
101 unsigned len; /* match length, unused bytes */
102 unsigned dist; /* match distance */
103 unsigned char FAR *from; /* where to copy match from */
104
105 /* copy state to local variables */
106 state = (struct inflate_state FAR *)strm->state;
107 in = strm->next_in;
108 last = in + (strm->avail_in - (INFLATE_FAST_MIN_INPUT - 1));
109 out = strm->next_out;
110 beg = out - (start - strm->avail_out);
111 end = out + (strm->avail_out - (INFLATE_FAST_MIN_OUTPUT - 1));
112 limit = out + strm->avail_out;
113#ifdef INFLATE_STRICT
114 dmax = state->dmax;
115#endif
116 wsize = state->wsize;
117 whave = state->whave;
118 wnext = (state->wnext == 0 && whave >= wsize) ? wsize : state->wnext;
119 window = state->window;
120 hold = state->hold;
121 bits = state->bits;
122 lcode = state->lencode;
123 dcode = state->distcode;
124 lmask = (1U << state->lenbits) - 1;
125 dmask = (1U << state->distbits) - 1;
126
127 /* decode literals and length/distances until end-of-block or not enough
128 input data or output space */
129 do {
130 if (bits < 15) {
131#ifdef INFLATE_CHUNK_READ_64LE
132 hold |= read64le(in) << bits;
133 in += 6;
134 bits += 48;
135#else
136 hold += (unsigned long)(*in++) << bits;
137 bits += 8;
138 hold += (unsigned long)(*in++) << bits;
139 bits += 8;
140#endif
141 }
142 here = lcode[hold & lmask];
143 dolen:
144 op = (unsigned)(here.bits);
145 hold >>= op;
146 bits -= op;
147 op = (unsigned)(here.op);
148 if (op == 0) { /* literal */
149 Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
150 "inflate: literal '%c'\n" :
151 "inflate: literal 0x%02x\n", here.val));
152 *out++ = (unsigned char)(here.val);
153 }
154 else if (op & 16) { /* length base */
155 len = (unsigned)(here.val);
156 op &= 15; /* number of extra bits */
157 if (op) {
158 if (bits < op) {
159#ifdef INFLATE_CHUNK_READ_64LE
160 hold |= read64le(in) << bits;
161 in += 6;
162 bits += 48;
163#else
164 hold += (unsigned long)(*in++) << bits;
165 bits += 8;
166#endif
167 }
168 len += (unsigned)hold & ((1U << op) - 1);
169 hold >>= op;
170 bits -= op;
171 }
172 Tracevv((stderr, "inflate: length %u\n", len));
173 if (bits < 15) {
174#ifdef INFLATE_CHUNK_READ_64LE
175 hold |= read64le(in) << bits;
176 in += 6;
177 bits += 48;
178#else
179 hold += (unsigned long)(*in++) << bits;
180 bits += 8;
181 hold += (unsigned long)(*in++) << bits;
182 bits += 8;
183#endif
184 }
185 here = dcode[hold & dmask];
186 dodist:
187 op = (unsigned)(here.bits);
188 hold >>= op;
189 bits -= op;
190 op = (unsigned)(here.op);
191 if (op & 16) { /* distance base */
192 dist = (unsigned)(here.val);
193 op &= 15; /* number of extra bits */
194 if (bits < op) {
195#ifdef INFLATE_CHUNK_READ_64LE
196 hold |= read64le(in) << bits;
197 in += 6;
198 bits += 48;
199#else
200 hold += (unsigned long)(*in++) << bits;
201 bits += 8;
202 if (bits < op) {
203 hold += (unsigned long)(*in++) << bits;
204 bits += 8;
205 }
206#endif
207 }
208 dist += (unsigned)hold & ((1U << op) - 1);
209#ifdef INFLATE_STRICT
210 if (dist > dmax) {
211 strm->msg = (char *)"invalid distance too far back";
212 state->mode = BAD;
213 break;
214 }
215#endif
216 hold >>= op;
217 bits -= op;
218 Tracevv((stderr, "inflate: distance %u\n", dist));
219 op = (unsigned)(out - beg); /* max distance in output */
220 if (dist > op) { /* see if copy from window */
221 op = dist - op; /* distance back in window */
222 if (op > whave) {
223 if (state->sane) {
224 strm->msg =
225 (char *)"invalid distance too far back";
226 state->mode = BAD;
227 break;
228 }
229#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
230 if (len <= op - whave) {
231 do {
232 *out++ = 0;
233 } while (--len);
234 continue;
235 }
236 len -= op - whave;
237 do {
238 *out++ = 0;
239 } while (--op > whave);
240 if (op == 0) {
241 from = out - dist;
242 do {
243 *out++ = *from++;
244 } while (--len);
245 continue;
246 }
247#endif
248 }
249 from = window;
250 if (wnext >= op) { /* contiguous in window */
251 from += wnext - op;
252 }
253 else { /* wrap around window */
254 op -= wnext;
255 from += wsize - op;
256 if (op < len) { /* some from end of window */
257 len -= op;
258 out = chunkcopy_safe(out, from, op, limit);
259 from = window; /* more from start of window */
260 op = wnext;
261 /* This (rare) case can create a situation where
262 the first chunkcopy below must be checked.
263 */
264 }
265 }
266 if (op < len) { /* still need some from output */
267 out = chunkcopy_safe(out, from, op, limit);
268 len -= op;
269 /* When dist is small the amount of data that can be
270 copied from the window is also small, and progress
271 towards the dangerous end of the output buffer is
272 also small. This means that for trivial memsets and
273 for chunkunroll_relaxed() a safety check is
274 unnecessary. However, these conditions may not be
275 entered at all, and in that case it's possible that
276 the main copy is near the end.
277 */
278 out = chunkunroll_relaxed(out, &dist, &len);
279 out = chunkcopy_safe(out, out - dist, len, limit);
280 } else {
281 /* from points to window, so there is no risk of
282 overlapping pointers requiring memset-like behaviour
283 */
284 out = chunkcopy_safe(out, from, len, limit);
285 }
286 }
287 else {
288 /* Whole reference is in range of current output. No
289 range checks are necessary because we start with room
290 for at least 258 bytes of output, so unroll and roundoff
291 operations can write beyond `out+len` so long as they
292 stay within 258 bytes of `out`.
293 */
294 out = chunkcopy_lapped_relaxed(out, dist, len);
295 }
296 }
297 else if ((op & 64) == 0) { /* 2nd level distance code */
298 here = dcode[here.val + (hold & ((1U << op) - 1))];
299 goto dodist;
300 }
301 else {
302 strm->msg = (char *)"invalid distance code";
303 state->mode = BAD;
304 break;
305 }
306 }
307 else if ((op & 64) == 0) { /* 2nd level length code */
308 here = lcode[here.val + (hold & ((1U << op) - 1))];
309 goto dolen;
310 }
311 else if (op & 32) { /* end-of-block */
312 Tracevv((stderr, "inflate: end of block\n"));
313 state->mode = TYPE;
314 break;
315 }
316 else {
317 strm->msg = (char *)"invalid literal/length code";
318 state->mode = BAD;
319 break;
320 }
321 } while (in < last && out < end);
322
323 /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
324 len = bits >> 3;
325 in -= len;
326 bits -= len << 3;
327 hold &= (1U << bits) - 1;
328
329 /* update state and return */
330 strm->next_in = in;
331 strm->next_out = out;
332 strm->avail_in = (unsigned)(in < last ?
333 (INFLATE_FAST_MIN_INPUT - 1) + (last - in) :
334 (INFLATE_FAST_MIN_INPUT - 1) - (in - last));
335 strm->avail_out = (unsigned)(out < end ?
336 (INFLATE_FAST_MIN_OUTPUT - 1) + (end - out) :
337 (INFLATE_FAST_MIN_OUTPUT - 1) - (out - end));
338 state->hold = hold;
339 state->bits = bits;
340
341 Assert((state->hold >> state->bits) == 0, "invalid input data state");
342 return;
343}
344
345/*
346 inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
347 - Using bit fields for code structure
348 - Different op definition to avoid & for extra bits (do & for table bits)
349 - Three separate decoding do-loops for direct, window, and wnext == 0
350 - Special case for distance > 1 copies to do overlapped load and store copy
351 - Explicit branch predictions (based on measured branch probabilities)
352 - Deferring match copy and interspersed it with decoding subsequent codes
353 - Swapping literal/length else
354 - Swapping window/direct else
355 - Larger unrolled copy loops (three is about right)
356 - Moving len -= 3 statement into middle of loop
357 */
358
359#endif /* !ASMINF */
360