1 | /* gzread.c -- zlib functions for reading gzip files |
2 | * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler |
3 | * For conditions of distribution and use, see copyright notice in zlib.h |
4 | */ |
5 | |
6 | #include "zbuild.h" |
7 | #include "zutil_p.h" |
8 | #include "gzguts.h" |
9 | |
10 | /* Local functions */ |
11 | static int gz_load(gz_state *, unsigned char *, unsigned, unsigned *); |
12 | static int gz_avail(gz_state *); |
13 | static int gz_look(gz_state *); |
14 | static int gz_decomp(gz_state *); |
15 | static int gz_fetch(gz_state *); |
16 | static int gz_skip(gz_state *, z_off64_t); |
17 | static size_t gz_read(gz_state *, void *, size_t); |
18 | |
19 | /* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from |
20 | state->fd, and update state->eof, state->err, and state->msg as appropriate. |
21 | This function needs to loop on read(), since read() is not guaranteed to |
22 | read the number of bytes requested, depending on the type of descriptor. */ |
23 | static int gz_load(gz_state *state, unsigned char *buf, unsigned len, unsigned *have) { |
24 | ssize_t ret; |
25 | |
26 | *have = 0; |
27 | do { |
28 | ret = read(fd: state->fd, buf: buf + *have, nbytes: len - *have); |
29 | if (ret <= 0) |
30 | break; |
31 | *have += (unsigned)ret; |
32 | } while (*have < len); |
33 | if (ret < 0) { |
34 | gz_error(state, Z_ERRNO, zstrerror()); |
35 | return -1; |
36 | } |
37 | if (ret == 0) |
38 | state->eof = 1; |
39 | return 0; |
40 | } |
41 | |
42 | /* Load up input buffer and set eof flag if last data loaded -- return -1 on |
43 | error, 0 otherwise. Note that the eof flag is set when the end of the input |
44 | file is reached, even though there may be unused data in the buffer. Once |
45 | that data has been used, no more attempts will be made to read the file. |
46 | If strm->avail_in != 0, then the current data is moved to the beginning of |
47 | the input buffer, and then the remainder of the buffer is loaded with the |
48 | available data from the input file. */ |
49 | static int gz_avail(gz_state *state) { |
50 | unsigned got; |
51 | PREFIX3(stream) *strm = &(state->strm); |
52 | |
53 | if (state->err != Z_OK && state->err != Z_BUF_ERROR) |
54 | return -1; |
55 | if (state->eof == 0) { |
56 | if (strm->avail_in) { /* copy what's there to the start */ |
57 | unsigned char *p = state->in; |
58 | unsigned const char *q = strm->next_in; |
59 | unsigned n = strm->avail_in; |
60 | do { |
61 | *p++ = *q++; |
62 | } while (--n); |
63 | } |
64 | if (gz_load(state, buf: state->in + strm->avail_in, len: state->size - strm->avail_in, have: &got) == -1) |
65 | return -1; |
66 | strm->avail_in += got; |
67 | strm->next_in = state->in; |
68 | } |
69 | return 0; |
70 | } |
71 | |
72 | /* Look for gzip header, set up for inflate or copy. state->x.have must be 0. |
73 | If this is the first time in, allocate required memory. state->how will be |
74 | left unchanged if there is no more input data available, will be set to COPY |
75 | if there is no gzip header and direct copying will be performed, or it will |
76 | be set to GZIP for decompression. If direct copying, then leftover input |
77 | data from the input buffer will be copied to the output buffer. In that |
78 | case, all further file reads will be directly to either the output buffer or |
79 | a user buffer. If decompressing, the inflate state will be initialized. |
80 | gz_look() will return 0 on success or -1 on failure. */ |
81 | static int gz_look(gz_state *state) { |
82 | PREFIX3(stream) *strm = &(state->strm); |
83 | |
84 | /* allocate read buffers and inflate memory */ |
85 | if (state->size == 0) { |
86 | /* allocate buffers */ |
87 | state->in = (unsigned char *)zng_alloc(size: state->want); |
88 | state->out = (unsigned char *)zng_alloc(size: state->want << 1); |
89 | if (state->in == NULL || state->out == NULL) { |
90 | zng_free(ptr: state->out); |
91 | zng_free(ptr: state->in); |
92 | gz_error(state, Z_MEM_ERROR, "out of memory" ); |
93 | return -1; |
94 | } |
95 | state->size = state->want; |
96 | |
97 | /* allocate inflate memory */ |
98 | state->strm.zalloc = NULL; |
99 | state->strm.zfree = NULL; |
100 | state->strm.opaque = NULL; |
101 | state->strm.avail_in = 0; |
102 | state->strm.next_in = NULL; |
103 | if (PREFIX(inflateInit2)(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */ |
104 | zng_free(ptr: state->out); |
105 | zng_free(ptr: state->in); |
106 | state->size = 0; |
107 | gz_error(state, Z_MEM_ERROR, "out of memory" ); |
108 | return -1; |
109 | } |
110 | } |
111 | |
112 | /* get at least the magic bytes in the input buffer */ |
113 | if (strm->avail_in < 2) { |
114 | if (gz_avail(state) == -1) |
115 | return -1; |
116 | if (strm->avail_in == 0) |
117 | return 0; |
118 | } |
119 | |
120 | /* look for gzip magic bytes -- if there, do gzip decoding (note: there is |
121 | a logical dilemma here when considering the case of a partially written |
122 | gzip file, to wit, if a single 31 byte is written, then we cannot tell |
123 | whether this is a single-byte file, or just a partially written gzip |
124 | file -- for here we assume that if a gzip file is being written, then |
125 | the header will be written in a single operation, so that reading a |
126 | single byte is sufficient indication that it is not a gzip file) */ |
127 | if (strm->avail_in > 1 && |
128 | strm->next_in[0] == 31 && strm->next_in[1] == 139) { |
129 | PREFIX(inflateReset)(strm); |
130 | state->how = GZIP; |
131 | state->direct = 0; |
132 | return 0; |
133 | } |
134 | |
135 | /* no gzip header -- if we were decoding gzip before, then this is trailing |
136 | garbage. Ignore the trailing garbage and finish. */ |
137 | if (state->direct == 0) { |
138 | strm->avail_in = 0; |
139 | state->eof = 1; |
140 | state->x.have = 0; |
141 | return 0; |
142 | } |
143 | |
144 | /* doing raw i/o, copy any leftover input to output -- this assumes that |
145 | the output buffer is larger than the input buffer, which also assures |
146 | space for gzungetc() */ |
147 | state->x.next = state->out; |
148 | if (strm->avail_in) { |
149 | memcpy(dest: state->x.next, src: strm->next_in, n: strm->avail_in); |
150 | state->x.have = strm->avail_in; |
151 | strm->avail_in = 0; |
152 | } |
153 | state->how = COPY; |
154 | state->direct = 1; |
155 | return 0; |
156 | } |
157 | |
158 | /* Decompress from input to the provided next_out and avail_out in the state. |
159 | On return, state->x.have and state->x.next point to the just decompressed |
160 | data. If the gzip stream completes, state->how is reset to LOOK to look for |
161 | the next gzip stream or raw data, once state->x.have is depleted. Returns 0 |
162 | on success, -1 on failure. */ |
163 | static int gz_decomp(gz_state *state) { |
164 | int ret = Z_OK; |
165 | unsigned had; |
166 | PREFIX3(stream) *strm = &(state->strm); |
167 | |
168 | /* fill output buffer up to end of deflate stream */ |
169 | had = strm->avail_out; |
170 | do { |
171 | /* get more input for inflate() */ |
172 | if (strm->avail_in == 0 && gz_avail(state) == -1) |
173 | return -1; |
174 | if (strm->avail_in == 0) { |
175 | gz_error(state, Z_BUF_ERROR, "unexpected end of file" ); |
176 | break; |
177 | } |
178 | |
179 | /* decompress and handle errors */ |
180 | ret = PREFIX(inflate)(strm, Z_NO_FLUSH); |
181 | if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { |
182 | gz_error(state, Z_STREAM_ERROR, "internal error: inflate stream corrupt" ); |
183 | return -1; |
184 | } |
185 | if (ret == Z_MEM_ERROR) { |
186 | gz_error(state, Z_MEM_ERROR, "out of memory" ); |
187 | return -1; |
188 | } |
189 | if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ |
190 | gz_error(state, Z_DATA_ERROR, strm->msg == NULL ? "compressed data error" : strm->msg); |
191 | return -1; |
192 | } |
193 | } while (strm->avail_out && ret != Z_STREAM_END); |
194 | |
195 | /* update available output */ |
196 | state->x.have = had - strm->avail_out; |
197 | state->x.next = strm->next_out - state->x.have; |
198 | |
199 | /* if the gzip stream completed successfully, look for another */ |
200 | if (ret == Z_STREAM_END) |
201 | state->how = LOOK; |
202 | |
203 | /* good decompression */ |
204 | return 0; |
205 | } |
206 | |
207 | /* Fetch data and put it in the output buffer. Assumes state->x.have is 0. |
208 | Data is either copied from the input file or decompressed from the input |
209 | file depending on state->how. If state->how is LOOK, then a gzip header is |
210 | looked for to determine whether to copy or decompress. Returns -1 on error, |
211 | otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the |
212 | end of the input file has been reached and all data has been processed. */ |
213 | static int gz_fetch(gz_state *state) { |
214 | PREFIX3(stream) *strm = &(state->strm); |
215 | |
216 | do { |
217 | switch (state->how) { |
218 | case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */ |
219 | if (gz_look(state) == -1) |
220 | return -1; |
221 | if (state->how == LOOK) |
222 | return 0; |
223 | break; |
224 | case COPY: /* -> COPY */ |
225 | if (gz_load(state, buf: state->out, len: state->size << 1, have: &(state->x.have)) |
226 | == -1) |
227 | return -1; |
228 | state->x.next = state->out; |
229 | return 0; |
230 | case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */ |
231 | strm->avail_out = state->size << 1; |
232 | strm->next_out = state->out; |
233 | if (gz_decomp(state) == -1) |
234 | return -1; |
235 | } |
236 | } while (state->x.have == 0 && (!state->eof || strm->avail_in)); |
237 | return 0; |
238 | } |
239 | |
240 | /* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */ |
241 | static int gz_skip(gz_state *state, z_off64_t len) { |
242 | unsigned n; |
243 | |
244 | /* skip over len bytes or reach end-of-file, whichever comes first */ |
245 | while (len) |
246 | /* skip over whatever is in output buffer */ |
247 | if (state->x.have) { |
248 | n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ? |
249 | (unsigned)len : state->x.have; |
250 | state->x.have -= n; |
251 | state->x.next += n; |
252 | state->x.pos += n; |
253 | len -= n; |
254 | } else if (state->eof && state->strm.avail_in == 0) { |
255 | /* output buffer empty -- return if we're at the end of the input */ |
256 | break; |
257 | } else { |
258 | /* need more data to skip -- load up output buffer */ |
259 | /* get more output, looking for header if required */ |
260 | if (gz_fetch(state) == -1) |
261 | return -1; |
262 | } |
263 | return 0; |
264 | } |
265 | |
266 | /* Read len bytes into buf from file, or less than len up to the end of the |
267 | input. Return the number of bytes read. If zero is returned, either the |
268 | end of file was reached, or there was an error. state->err must be |
269 | consulted in that case to determine which. */ |
270 | static size_t gz_read(gz_state *state, void *buf, size_t len) { |
271 | size_t got; |
272 | unsigned n; |
273 | |
274 | /* if len is zero, avoid unnecessary operations */ |
275 | if (len == 0) |
276 | return 0; |
277 | |
278 | /* process a skip request */ |
279 | if (state->seek) { |
280 | state->seek = 0; |
281 | if (gz_skip(state, len: state->skip) == -1) |
282 | return 0; |
283 | } |
284 | |
285 | /* get len bytes to buf, or less than len if at the end */ |
286 | got = 0; |
287 | do { |
288 | /* set n to the maximum amount of len that fits in an unsigned int */ |
289 | n = (unsigned)-1; |
290 | if (n > len) |
291 | n = (unsigned)len; |
292 | |
293 | /* first just try copying data from the output buffer */ |
294 | if (state->x.have) { |
295 | if (state->x.have < n) |
296 | n = state->x.have; |
297 | memcpy(dest: buf, src: state->x.next, n: n); |
298 | state->x.next += n; |
299 | state->x.have -= n; |
300 | } |
301 | |
302 | /* output buffer empty -- return if we're at the end of the input */ |
303 | else if (state->eof && state->strm.avail_in == 0) { |
304 | state->past = 1; /* tried to read past end */ |
305 | break; |
306 | } |
307 | |
308 | /* need output data -- for small len or new stream load up our output |
309 | buffer */ |
310 | else if (state->how == LOOK || n < (state->size << 1)) { |
311 | /* get more output, looking for header if required */ |
312 | if (gz_fetch(state) == -1) |
313 | return 0; |
314 | continue; /* no progress yet -- go back to copy above */ |
315 | /* the copy above assures that we will leave with space in the |
316 | output buffer, allowing at least one gzungetc() to succeed */ |
317 | } |
318 | |
319 | /* large len -- read directly into user buffer */ |
320 | else if (state->how == COPY) { /* read directly */ |
321 | if (gz_load(state, buf: (unsigned char *)buf, len: n, have: &n) == -1) |
322 | return 0; |
323 | } |
324 | |
325 | /* large len -- decompress directly into user buffer */ |
326 | else { /* state->how == GZIP */ |
327 | state->strm.avail_out = n; |
328 | state->strm.next_out = (unsigned char *)buf; |
329 | if (gz_decomp(state) == -1) |
330 | return 0; |
331 | n = state->x.have; |
332 | state->x.have = 0; |
333 | } |
334 | |
335 | /* update progress */ |
336 | len -= n; |
337 | buf = (char *)buf + n; |
338 | got += n; |
339 | state->x.pos += n; |
340 | } while (len); |
341 | |
342 | /* return number of bytes read into user buffer */ |
343 | return got; |
344 | } |
345 | |
346 | /* -- see zlib.h -- */ |
347 | int Z_EXPORT PREFIX(gzread)(gzFile file, void *buf, unsigned len) { |
348 | gz_state *state; |
349 | |
350 | /* get internal structure */ |
351 | if (file == NULL) |
352 | return -1; |
353 | state = (gz_state *)file; |
354 | |
355 | /* check that we're reading and that there's no (serious) error */ |
356 | if (state->mode != GZ_READ || |
357 | (state->err != Z_OK && state->err != Z_BUF_ERROR)) |
358 | return -1; |
359 | |
360 | /* since an int is returned, make sure len fits in one, otherwise return |
361 | with an error (this avoids a flaw in the interface) */ |
362 | if ((int)len < 0) { |
363 | gz_error(state, Z_STREAM_ERROR, "request does not fit in an int" ); |
364 | return -1; |
365 | } |
366 | |
367 | /* read len or fewer bytes to buf */ |
368 | len = (unsigned)gz_read(state, buf, len); |
369 | |
370 | /* check for an error */ |
371 | if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR) |
372 | return -1; |
373 | |
374 | /* return the number of bytes read (this is assured to fit in an int) */ |
375 | return (int)len; |
376 | } |
377 | |
378 | /* -- see zlib.h -- */ |
379 | size_t Z_EXPORT PREFIX(gzfread)(void *buf, size_t size, size_t nitems, gzFile file) { |
380 | size_t len; |
381 | gz_state *state; |
382 | |
383 | /* Exit early if size is zero, also prevents potential division by zero */ |
384 | if (size == 0) |
385 | return 0; |
386 | |
387 | /* get internal structure */ |
388 | if (file == NULL) |
389 | return 0; |
390 | state = (gz_state *)file; |
391 | |
392 | /* check that we're reading and that there's no (serious) error */ |
393 | if (state->mode != GZ_READ || |
394 | (state->err != Z_OK && state->err != Z_BUF_ERROR)) |
395 | return 0; |
396 | |
397 | /* compute bytes to read -- error on overflow */ |
398 | if (size && SIZE_MAX / size < nitems) { |
399 | gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t" ); |
400 | return 0; |
401 | } |
402 | len = nitems * size; |
403 | |
404 | /* read len or fewer bytes to buf, return the number of full items read */ |
405 | return len ? gz_read(state, buf, len) / size : 0; |
406 | } |
407 | |
408 | /* -- see zlib.h -- */ |
409 | #undef gzgetc |
410 | #undef zng_gzgetc |
411 | int Z_EXPORT PREFIX(gzgetc)(gzFile file) { |
412 | unsigned char buf[1]; |
413 | gz_state *state; |
414 | |
415 | /* get internal structure */ |
416 | if (file == NULL) |
417 | return -1; |
418 | state = (gz_state *)file; |
419 | |
420 | /* check that we're reading and that there's no (serious) error */ |
421 | if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR)) |
422 | return -1; |
423 | |
424 | /* try output buffer (no need to check for skip request) */ |
425 | if (state->x.have) { |
426 | state->x.have--; |
427 | state->x.pos++; |
428 | return *(state->x.next)++; |
429 | } |
430 | |
431 | /* nothing there -- try gz_read() */ |
432 | return gz_read(state, buf, len: 1) < 1 ? -1 : buf[0]; |
433 | } |
434 | |
435 | int Z_EXPORT PREFIX(gzgetc_)(gzFile file) { |
436 | return PREFIX(gzgetc)(file); |
437 | } |
438 | |
439 | /* -- see zlib.h -- */ |
440 | int Z_EXPORT PREFIX(gzungetc)(int c, gzFile file) { |
441 | gz_state *state; |
442 | |
443 | /* get internal structure */ |
444 | if (file == NULL) |
445 | return -1; |
446 | state = (gz_state *)file; |
447 | |
448 | /* check that we're reading and that there's no (serious) error */ |
449 | if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR)) |
450 | return -1; |
451 | |
452 | /* process a skip request */ |
453 | if (state->seek) { |
454 | state->seek = 0; |
455 | if (gz_skip(state, len: state->skip) == -1) |
456 | return -1; |
457 | } |
458 | |
459 | /* can't push EOF */ |
460 | if (c < 0) |
461 | return -1; |
462 | |
463 | /* if output buffer empty, put byte at end (allows more pushing) */ |
464 | if (state->x.have == 0) { |
465 | state->x.have = 1; |
466 | state->x.next = state->out + (state->size << 1) - 1; |
467 | state->x.next[0] = (unsigned char)c; |
468 | state->x.pos--; |
469 | state->past = 0; |
470 | return c; |
471 | } |
472 | |
473 | /* if no room, give up (must have already done a gzungetc()) */ |
474 | if (state->x.have == (state->size << 1)) { |
475 | gz_error(state, Z_DATA_ERROR, "out of room to push characters" ); |
476 | return -1; |
477 | } |
478 | |
479 | /* slide output data if needed and insert byte before existing data */ |
480 | if (state->x.next == state->out) { |
481 | unsigned char *src = state->out + state->x.have; |
482 | unsigned char *dest = state->out + (state->size << 1); |
483 | while (src > state->out) |
484 | *--dest = *--src; |
485 | state->x.next = dest; |
486 | } |
487 | state->x.have++; |
488 | state->x.next--; |
489 | state->x.next[0] = (unsigned char)c; |
490 | state->x.pos--; |
491 | state->past = 0; |
492 | return c; |
493 | } |
494 | |
495 | /* -- see zlib.h -- */ |
496 | char * Z_EXPORT PREFIX(gzgets)(gzFile file, char *buf, int len) { |
497 | unsigned left, n; |
498 | char *str; |
499 | unsigned char *eol; |
500 | gz_state *state; |
501 | |
502 | /* check parameters and get internal structure */ |
503 | if (file == NULL || buf == NULL || len < 1) |
504 | return NULL; |
505 | state = (gz_state *)file; |
506 | |
507 | /* check that we're reading and that there's no (serious) error */ |
508 | if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR)) |
509 | return NULL; |
510 | |
511 | /* process a skip request */ |
512 | if (state->seek) { |
513 | state->seek = 0; |
514 | if (gz_skip(state, len: state->skip) == -1) |
515 | return NULL; |
516 | } |
517 | |
518 | /* copy output bytes up to new line or len - 1, whichever comes first -- |
519 | append a terminating zero to the string (we don't check for a zero in |
520 | the contents, let the user worry about that) */ |
521 | str = buf; |
522 | left = (unsigned)len - 1; |
523 | if (left) { |
524 | do { |
525 | /* assure that something is in the output buffer */ |
526 | if (state->x.have == 0 && gz_fetch(state) == -1) |
527 | return NULL; /* error */ |
528 | if (state->x.have == 0) { /* end of file */ |
529 | state->past = 1; /* read past end */ |
530 | break; /* return what we have */ |
531 | } |
532 | |
533 | /* look for end-of-line in current output buffer */ |
534 | n = state->x.have > left ? left : state->x.have; |
535 | eol = (unsigned char *)memchr(s: state->x.next, c: '\n', n: n); |
536 | if (eol != NULL) |
537 | n = (unsigned)(eol - state->x.next) + 1; |
538 | |
539 | /* copy through end-of-line, or remainder if not found */ |
540 | memcpy(dest: buf, src: state->x.next, n: n); |
541 | state->x.have -= n; |
542 | state->x.next += n; |
543 | state->x.pos += n; |
544 | left -= n; |
545 | buf += n; |
546 | } while (left && eol == NULL); |
547 | } |
548 | |
549 | /* return terminated string, or if nothing, end of file */ |
550 | if (buf == str) |
551 | return NULL; |
552 | buf[0] = 0; |
553 | return str; |
554 | } |
555 | |
556 | /* -- see zlib.h -- */ |
557 | int Z_EXPORT PREFIX(gzdirect)(gzFile file) { |
558 | gz_state *state; |
559 | |
560 | /* get internal structure */ |
561 | if (file == NULL) |
562 | return 0; |
563 | |
564 | state = (gz_state *)file; |
565 | |
566 | /* if the state is not known, but we can find out, then do so (this is |
567 | mainly for right after a gzopen() or gzdopen()) */ |
568 | if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) |
569 | (void)gz_look(state); |
570 | |
571 | /* return 1 if transparent, 0 if processing a gzip stream */ |
572 | return state->direct; |
573 | } |
574 | |
575 | /* -- see zlib.h -- */ |
576 | int Z_EXPORT PREFIX(gzclose_r)(gzFile file) { |
577 | int ret, err; |
578 | gz_state *state; |
579 | |
580 | /* get internal structure */ |
581 | if (file == NULL) |
582 | return Z_STREAM_ERROR; |
583 | |
584 | state = (gz_state *)file; |
585 | |
586 | /* check that we're reading */ |
587 | if (state->mode != GZ_READ) |
588 | return Z_STREAM_ERROR; |
589 | |
590 | /* free memory and close file */ |
591 | if (state->size) { |
592 | PREFIX(inflateEnd)(strm: &(state->strm)); |
593 | zng_free(ptr: state->out); |
594 | zng_free(ptr: state->in); |
595 | } |
596 | err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK; |
597 | gz_error(state, Z_OK, NULL); |
598 | free(ptr: state->path); |
599 | ret = close(fd: state->fd); |
600 | zng_free(ptr: state); |
601 | return ret ? Z_ERRNO : err; |
602 | } |
603 | |