1/* gzread.c -- zlib functions for reading gzip files
2 * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6#include "zbuild.h"
7#include "gzguts.h"
8
9/* Local functions */
10static int gz_load(gz_state *, unsigned char *, unsigned, unsigned *);
11static int gz_avail(gz_state *);
12static int gz_look(gz_state *);
13static int gz_decomp(gz_state *);
14static int gz_fetch(gz_state *);
15static int gz_skip(gz_state *, z_off64_t);
16static size_t gz_read(gz_state *, void *, size_t);
17
18/* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from
19 state->fd, and update state->eof, state->err, and state->msg as appropriate.
20 This function needs to loop on read(), since read() is not guaranteed to
21 read the number of bytes requested, depending on the type of descriptor. */
22static int gz_load(gz_state *state, unsigned char *buf, unsigned len, unsigned *have) {
23 ssize_t ret;
24
25 *have = 0;
26 do {
27 ret = read(state->fd, buf + *have, len - *have);
28 if (ret <= 0)
29 break;
30 *have += (unsigned)ret;
31 } while (*have < len);
32 if (ret < 0) {
33 gz_error(state, Z_ERRNO, zstrerror());
34 return -1;
35 }
36 if (ret == 0)
37 state->eof = 1;
38 return 0;
39}
40
41/* Load up input buffer and set eof flag if last data loaded -- return -1 on
42 error, 0 otherwise. Note that the eof flag is set when the end of the input
43 file is reached, even though there may be unused data in the buffer. Once
44 that data has been used, no more attempts will be made to read the file.
45 If strm->avail_in != 0, then the current data is moved to the beginning of
46 the input buffer, and then the remainder of the buffer is loaded with the
47 available data from the input file. */
48static int gz_avail(gz_state *state) {
49 unsigned got;
50 PREFIX3(stream) *strm = &(state->strm);
51
52 if (state->err != Z_OK && state->err != Z_BUF_ERROR)
53 return -1;
54 if (state->eof == 0) {
55 if (strm->avail_in) { /* copy what's there to the start */
56 unsigned char *p = state->in;
57 unsigned const char *q = strm->next_in;
58 unsigned n = strm->avail_in;
59 do {
60 *p++ = *q++;
61 } while (--n);
62 }
63 if (gz_load(state, state->in + strm->avail_in, state->size - strm->avail_in, &got) == -1)
64 return -1;
65 strm->avail_in += got;
66 strm->next_in = state->in;
67 }
68 return 0;
69}
70
71/* Look for gzip header, set up for inflate or copy. state->x.have must be 0.
72 If this is the first time in, allocate required memory. state->how will be
73 left unchanged if there is no more input data available, will be set to COPY
74 if there is no gzip header and direct copying will be performed, or it will
75 be set to GZIP for decompression. If direct copying, then leftover input
76 data from the input buffer will be copied to the output buffer. In that
77 case, all further file reads will be directly to either the output buffer or
78 a user buffer. If decompressing, the inflate state will be initialized.
79 gz_look() will return 0 on success or -1 on failure. */
80static int gz_look(gz_state *state) {
81 PREFIX3(stream) *strm = &(state->strm);
82
83 /* allocate read buffers and inflate memory */
84 if (state->size == 0) {
85 /* allocate buffers */
86 state->in = (unsigned char *)malloc(state->want);
87 state->out = (unsigned char *)malloc(state->want << 1);
88 if (state->in == NULL || state->out == NULL) {
89 free(state->out);
90 free(state->in);
91 gz_error(state, Z_MEM_ERROR, "out of memory");
92 return -1;
93 }
94 state->size = state->want;
95
96 /* allocate inflate memory */
97 state->strm.zalloc = NULL;
98 state->strm.zfree = NULL;
99 state->strm.opaque = NULL;
100 state->strm.avail_in = 0;
101 state->strm.next_in = NULL;
102 if (PREFIX(inflateInit2)(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */
103 free(state->out);
104 free(state->in);
105 state->size = 0;
106 gz_error(state, Z_MEM_ERROR, "out of memory");
107 return -1;
108 }
109 }
110
111 /* get at least the magic bytes in the input buffer */
112 if (strm->avail_in < 2) {
113 if (gz_avail(state) == -1)
114 return -1;
115 if (strm->avail_in == 0)
116 return 0;
117 }
118
119 /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
120 a logical dilemma here when considering the case of a partially written
121 gzip file, to wit, if a single 31 byte is written, then we cannot tell
122 whether this is a single-byte file, or just a partially written gzip
123 file -- for here we assume that if a gzip file is being written, then
124 the header will be written in a single operation, so that reading a
125 single byte is sufficient indication that it is not a gzip file) */
126 if (strm->avail_in > 1 &&
127 strm->next_in[0] == 31 && strm->next_in[1] == 139) {
128 PREFIX(inflateReset)(strm);
129 state->how = GZIP;
130 state->direct = 0;
131 return 0;
132 }
133
134 /* no gzip header -- if we were decoding gzip before, then this is trailing
135 garbage. Ignore the trailing garbage and finish. */
136 if (state->direct == 0) {
137 strm->avail_in = 0;
138 state->eof = 1;
139 state->x.have = 0;
140 return 0;
141 }
142
143 /* doing raw i/o, copy any leftover input to output -- this assumes that
144 the output buffer is larger than the input buffer, which also assures
145 space for gzungetc() */
146 state->x.next = state->out;
147 if (strm->avail_in) {
148 memcpy(state->x.next, strm->next_in, strm->avail_in);
149 state->x.have = strm->avail_in;
150 strm->avail_in = 0;
151 }
152 state->how = COPY;
153 state->direct = 1;
154 return 0;
155}
156
157/* Decompress from input to the provided next_out and avail_out in the state.
158 On return, state->x.have and state->x.next point to the just decompressed
159 data. If the gzip stream completes, state->how is reset to LOOK to look for
160 the next gzip stream or raw data, once state->x.have is depleted. Returns 0
161 on success, -1 on failure. */
162static int gz_decomp(gz_state *state) {
163 int ret = Z_OK;
164 unsigned had;
165 PREFIX3(stream) *strm = &(state->strm);
166
167 /* fill output buffer up to end of deflate stream */
168 had = strm->avail_out;
169 do {
170 /* get more input for inflate() */
171 if (strm->avail_in == 0 && gz_avail(state) == -1)
172 return -1;
173 if (strm->avail_in == 0) {
174 gz_error(state, Z_BUF_ERROR, "unexpected end of file");
175 break;
176 }
177
178 /* decompress and handle errors */
179 ret = PREFIX(inflate)(strm, Z_NO_FLUSH);
180 if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
181 gz_error(state, Z_STREAM_ERROR, "internal error: inflate stream corrupt");
182 return -1;
183 }
184 if (ret == Z_MEM_ERROR) {
185 gz_error(state, Z_MEM_ERROR, "out of memory");
186 return -1;
187 }
188 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
189 gz_error(state, Z_DATA_ERROR, strm->msg == NULL ? "compressed data error" : strm->msg);
190 return -1;
191 }
192 } while (strm->avail_out && ret != Z_STREAM_END);
193
194 /* update available output */
195 state->x.have = had - strm->avail_out;
196 state->x.next = strm->next_out - state->x.have;
197
198 /* if the gzip stream completed successfully, look for another */
199 if (ret == Z_STREAM_END)
200 state->how = LOOK;
201
202 /* good decompression */
203 return 0;
204}
205
206/* Fetch data and put it in the output buffer. Assumes state->x.have is 0.
207 Data is either copied from the input file or decompressed from the input
208 file depending on state->how. If state->how is LOOK, then a gzip header is
209 looked for to determine whether to copy or decompress. Returns -1 on error,
210 otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the
211 end of the input file has been reached and all data has been processed. */
212static int gz_fetch(gz_state *state) {
213 PREFIX3(stream) *strm = &(state->strm);
214
215 do {
216 switch (state->how) {
217 case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */
218 if (gz_look(state) == -1)
219 return -1;
220 if (state->how == LOOK)
221 return 0;
222 break;
223 case COPY: /* -> COPY */
224 if (gz_load(state, state->out, state->size << 1, &(state->x.have))
225 == -1)
226 return -1;
227 state->x.next = state->out;
228 return 0;
229 case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */
230 strm->avail_out = state->size << 1;
231 strm->next_out = state->out;
232 if (gz_decomp(state) == -1)
233 return -1;
234 }
235 } while (state->x.have == 0 && (!state->eof || strm->avail_in));
236 return 0;
237}
238
239/* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */
240static int gz_skip(gz_state *state, z_off64_t len) {
241 unsigned n;
242
243 /* skip over len bytes or reach end-of-file, whichever comes first */
244 while (len)
245 /* skip over whatever is in output buffer */
246 if (state->x.have) {
247 n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
248 (unsigned)len : state->x.have;
249 state->x.have -= n;
250 state->x.next += n;
251 state->x.pos += n;
252 len -= n;
253 } else if (state->eof && state->strm.avail_in == 0) {
254 /* output buffer empty -- return if we're at the end of the input */
255 break;
256 } else {
257 /* need more data to skip -- load up output buffer */
258 /* get more output, looking for header if required */
259 if (gz_fetch(state) == -1)
260 return -1;
261 }
262 return 0;
263}
264
265/* Read len bytes into buf from file, or less than len up to the end of the
266 input. Return the number of bytes read. If zero is returned, either the
267 end of file was reached, or there was an error. state->err must be
268 consulted in that case to determine which. */
269static size_t gz_read(gz_state *state, void *buf, size_t len) {
270 size_t got;
271 unsigned n;
272
273 /* if len is zero, avoid unnecessary operations */
274 if (len == 0)
275 return 0;
276
277 /* process a skip request */
278 if (state->seek) {
279 state->seek = 0;
280 if (gz_skip(state, state->skip) == -1)
281 return 0;
282 }
283
284 /* get len bytes to buf, or less than len if at the end */
285 got = 0;
286 do {
287 /* set n to the maximum amount of len that fits in an unsigned int */
288 n = (unsigned)-1;
289 if (n > len)
290 n = (unsigned)len;
291
292 /* first just try copying data from the output buffer */
293 if (state->x.have) {
294 if (state->x.have < n)
295 n = state->x.have;
296 memcpy(buf, state->x.next, n);
297 state->x.next += n;
298 state->x.have -= n;
299 }
300
301 /* output buffer empty -- return if we're at the end of the input */
302 else if (state->eof && state->strm.avail_in == 0) {
303 state->past = 1; /* tried to read past end */
304 break;
305 }
306
307 /* need output data -- for small len or new stream load up our output
308 buffer */
309 else if (state->how == LOOK || n < (state->size << 1)) {
310 /* get more output, looking for header if required */
311 if (gz_fetch(state) == -1)
312 return 0;
313 continue; /* no progress yet -- go back to copy above */
314 /* the copy above assures that we will leave with space in the
315 output buffer, allowing at least one gzungetc() to succeed */
316 }
317
318 /* large len -- read directly into user buffer */
319 else if (state->how == COPY) { /* read directly */
320 if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
321 return 0;
322 }
323
324 /* large len -- decompress directly into user buffer */
325 else { /* state->how == GZIP */
326 state->strm.avail_out = n;
327 state->strm.next_out = (unsigned char *)buf;
328 if (gz_decomp(state) == -1)
329 return 0;
330 n = state->x.have;
331 state->x.have = 0;
332 }
333
334 /* update progress */
335 len -= n;
336 buf = (char *)buf + n;
337 got += n;
338 state->x.pos += n;
339 } while (len);
340
341 /* return number of bytes read into user buffer */
342 return got;
343}
344
345/* -- see zlib.h -- */
346int ZEXPORT PREFIX(gzread)(gzFile file, void *buf, unsigned len) {
347 gz_state *state;
348
349 /* get internal structure */
350 if (file == NULL)
351 return -1;
352 state = (gz_state *)file;
353
354 /* check that we're reading and that there's no (serious) error */
355 if (state->mode != GZ_READ ||
356 (state->err != Z_OK && state->err != Z_BUF_ERROR))
357 return -1;
358
359 /* since an int is returned, make sure len fits in one, otherwise return
360 with an error (this avoids a flaw in the interface) */
361 if ((int)len < 0) {
362 gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
363 return -1;
364 }
365
366 /* read len or fewer bytes to buf */
367 len = (unsigned)gz_read(state, buf, len);
368
369 /* check for an error */
370 if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
371 return -1;
372
373 /* return the number of bytes read (this is assured to fit in an int) */
374 return (int)len;
375}
376
377/* -- see zlib.h -- */
378size_t ZEXPORT PREFIX(gzfread)(void *buf, size_t size, size_t nitems, gzFile file) {
379 size_t len;
380 gz_state *state;
381
382 /* Exit early if size is zero, also prevents potential division by zero */
383 if (size == 0)
384 return 0;
385
386 /* get internal structure */
387 if (file == NULL)
388 return 0;
389 state = (gz_state *)file;
390
391 /* check that we're reading and that there's no (serious) error */
392 if (state->mode != GZ_READ ||
393 (state->err != Z_OK && state->err != Z_BUF_ERROR))
394 return 0;
395
396 /* compute bytes to read -- error on overflow */
397 len = nitems * size;
398 if (size && len / size != nitems) {
399 gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
400 return 0;
401 }
402
403 /* read len or fewer bytes to buf, return the number of full items read */
404 return len ? gz_read(state, buf, len) / size : 0;
405}
406
407/* -- see zlib.h -- */
408#undef gzgetc
409#undef zng_gzgetc
410int ZEXPORT PREFIX(gzgetc)(gzFile file) {
411 unsigned char buf[1];
412 gz_state *state;
413
414 /* get internal structure */
415 if (file == NULL)
416 return -1;
417 state = (gz_state *)file;
418
419 /* check that we're reading and that there's no (serious) error */
420 if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR))
421 return -1;
422
423 /* try output buffer (no need to check for skip request) */
424 if (state->x.have) {
425 state->x.have--;
426 state->x.pos++;
427 return *(state->x.next)++;
428 }
429
430 /* nothing there -- try gz_read() */
431 return gz_read(state, buf, 1) < 1 ? -1 : buf[0];
432}
433
434int ZEXPORT PREFIX(gzgetc_)(gzFile file) {
435 return PREFIX(gzgetc)(file);
436}
437
438/* -- see zlib.h -- */
439int ZEXPORT PREFIX(gzungetc)(int c, gzFile file) {
440 gz_state *state;
441
442 /* get internal structure */
443 if (file == NULL)
444 return -1;
445 state = (gz_state *)file;
446
447 /* check that we're reading and that there's no (serious) error */
448 if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR))
449 return -1;
450
451 /* process a skip request */
452 if (state->seek) {
453 state->seek = 0;
454 if (gz_skip(state, state->skip) == -1)
455 return -1;
456 }
457
458 /* can't push EOF */
459 if (c < 0)
460 return -1;
461
462 /* if output buffer empty, put byte at end (allows more pushing) */
463 if (state->x.have == 0) {
464 state->x.have = 1;
465 state->x.next = state->out + (state->size << 1) - 1;
466 state->x.next[0] = (unsigned char)c;
467 state->x.pos--;
468 state->past = 0;
469 return c;
470 }
471
472 /* if no room, give up (must have already done a gzungetc()) */
473 if (state->x.have == (state->size << 1)) {
474 gz_error(state, Z_DATA_ERROR, "out of room to push characters");
475 return -1;
476 }
477
478 /* slide output data if needed and insert byte before existing data */
479 if (state->x.next == state->out) {
480 unsigned char *src = state->out + state->x.have;
481 unsigned char *dest = state->out + (state->size << 1);
482 while (src > state->out)
483 *--dest = *--src;
484 state->x.next = dest;
485 }
486 state->x.have++;
487 state->x.next--;
488 state->x.next[0] = (unsigned char)c;
489 state->x.pos--;
490 state->past = 0;
491 return c;
492}
493
494/* -- see zlib.h -- */
495char * ZEXPORT PREFIX(gzgets)(gzFile file, char *buf, int len) {
496 unsigned left, n;
497 char *str;
498 unsigned char *eol;
499 gz_state *state;
500
501 /* check parameters and get internal structure */
502 if (file == NULL || buf == NULL || len < 1)
503 return NULL;
504 state = (gz_state *)file;
505
506 /* check that we're reading and that there's no (serious) error */
507 if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR))
508 return NULL;
509
510 /* process a skip request */
511 if (state->seek) {
512 state->seek = 0;
513 if (gz_skip(state, state->skip) == -1)
514 return NULL;
515 }
516
517 /* copy output bytes up to new line or len - 1, whichever comes first --
518 append a terminating zero to the string (we don't check for a zero in
519 the contents, let the user worry about that) */
520 str = buf;
521 left = (unsigned)len - 1;
522 if (left) do {
523 /* assure that something is in the output buffer */
524 if (state->x.have == 0 && gz_fetch(state) == -1)
525 return NULL; /* error */
526 if (state->x.have == 0) { /* end of file */
527 state->past = 1; /* read past end */
528 break; /* return what we have */
529 }
530
531 /* look for end-of-line in current output buffer */
532 n = state->x.have > left ? left : state->x.have;
533 eol = (unsigned char *)memchr(state->x.next, '\n', n);
534 if (eol != NULL)
535 n = (unsigned)(eol - state->x.next) + 1;
536
537 /* copy through end-of-line, or remainder if not found */
538 memcpy(buf, state->x.next, n);
539 state->x.have -= n;
540 state->x.next += n;
541 state->x.pos += n;
542 left -= n;
543 buf += n;
544 } while (left && eol == NULL);
545
546 /* return terminated string, or if nothing, end of file */
547 if (buf == str)
548 return NULL;
549 buf[0] = 0;
550 return str;
551}
552
553/* -- see zlib.h -- */
554int ZEXPORT PREFIX(gzdirect)(gzFile file) {
555 gz_state *state;
556
557 /* get internal structure */
558 if (file == NULL)
559 return 0;
560
561 state = (gz_state *)file;
562
563 /* if the state is not known, but we can find out, then do so (this is
564 mainly for right after a gzopen() or gzdopen()) */
565 if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
566 (void)gz_look(state);
567
568 /* return 1 if transparent, 0 if processing a gzip stream */
569 return state->direct;
570}
571
572/* -- see zlib.h -- */
573int ZEXPORT PREFIX(gzclose_r)(gzFile file) {
574 int ret, err;
575 gz_state *state;
576
577 /* get internal structure */
578 if (file == NULL)
579 return Z_STREAM_ERROR;
580
581 state = (gz_state *)file;
582
583 /* check that we're reading */
584 if (state->mode != GZ_READ)
585 return Z_STREAM_ERROR;
586
587 /* free memory and close file */
588 if (state->size) {
589 PREFIX(inflateEnd)(&(state->strm));
590 free(state->out);
591 free(state->in);
592 }
593 err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
594 gz_error(state, Z_OK, NULL);
595 free(state->path);
596 ret = close(state->fd);
597 free(state);
598 return ret ? Z_ERRNO : err;
599}
600