1/* gzread.c -- zlib functions for reading gzip files
2 * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6#include "zbuild.h"
7#include "zutil_p.h"
8#include "gzguts.h"
9
10/* Local functions */
11static int gz_load(gz_state *, unsigned char *, unsigned, unsigned *);
12static int gz_avail(gz_state *);
13static int gz_look(gz_state *);
14static int gz_decomp(gz_state *);
15static int gz_fetch(gz_state *);
16static int gz_skip(gz_state *, z_off64_t);
17static size_t gz_read(gz_state *, void *, size_t);
18
19/* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from
20 state->fd, and update state->eof, state->err, and state->msg as appropriate.
21 This function needs to loop on read(), since read() is not guaranteed to
22 read the number of bytes requested, depending on the type of descriptor. */
23static int gz_load(gz_state *state, unsigned char *buf, unsigned len, unsigned *have) {
24 ssize_t ret;
25
26 *have = 0;
27 do {
28 ret = read(fd: state->fd, buf: buf + *have, nbytes: len - *have);
29 if (ret <= 0)
30 break;
31 *have += (unsigned)ret;
32 } while (*have < len);
33 if (ret < 0) {
34 gz_error(state, Z_ERRNO, zstrerror());
35 return -1;
36 }
37 if (ret == 0)
38 state->eof = 1;
39 return 0;
40}
41
42/* Load up input buffer and set eof flag if last data loaded -- return -1 on
43 error, 0 otherwise. Note that the eof flag is set when the end of the input
44 file is reached, even though there may be unused data in the buffer. Once
45 that data has been used, no more attempts will be made to read the file.
46 If strm->avail_in != 0, then the current data is moved to the beginning of
47 the input buffer, and then the remainder of the buffer is loaded with the
48 available data from the input file. */
49static int gz_avail(gz_state *state) {
50 unsigned got;
51 PREFIX3(stream) *strm = &(state->strm);
52
53 if (state->err != Z_OK && state->err != Z_BUF_ERROR)
54 return -1;
55 if (state->eof == 0) {
56 if (strm->avail_in) { /* copy what's there to the start */
57 unsigned char *p = state->in;
58 unsigned const char *q = strm->next_in;
59 unsigned n = strm->avail_in;
60 do {
61 *p++ = *q++;
62 } while (--n);
63 }
64 if (gz_load(state, buf: state->in + strm->avail_in, len: state->size - strm->avail_in, have: &got) == -1)
65 return -1;
66 strm->avail_in += got;
67 strm->next_in = state->in;
68 }
69 return 0;
70}
71
72/* Look for gzip header, set up for inflate or copy. state->x.have must be 0.
73 If this is the first time in, allocate required memory. state->how will be
74 left unchanged if there is no more input data available, will be set to COPY
75 if there is no gzip header and direct copying will be performed, or it will
76 be set to GZIP for decompression. If direct copying, then leftover input
77 data from the input buffer will be copied to the output buffer. In that
78 case, all further file reads will be directly to either the output buffer or
79 a user buffer. If decompressing, the inflate state will be initialized.
80 gz_look() will return 0 on success or -1 on failure. */
81static int gz_look(gz_state *state) {
82 PREFIX3(stream) *strm = &(state->strm);
83
84 /* allocate read buffers and inflate memory */
85 if (state->size == 0) {
86 /* allocate buffers */
87 state->in = (unsigned char *)zng_alloc(size: state->want);
88 state->out = (unsigned char *)zng_alloc(size: state->want << 1);
89 if (state->in == NULL || state->out == NULL) {
90 zng_free(ptr: state->out);
91 zng_free(ptr: state->in);
92 gz_error(state, Z_MEM_ERROR, "out of memory");
93 return -1;
94 }
95 state->size = state->want;
96
97 /* allocate inflate memory */
98 state->strm.zalloc = NULL;
99 state->strm.zfree = NULL;
100 state->strm.opaque = NULL;
101 state->strm.avail_in = 0;
102 state->strm.next_in = NULL;
103 if (PREFIX(inflateInit2)(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */
104 zng_free(ptr: state->out);
105 zng_free(ptr: state->in);
106 state->size = 0;
107 gz_error(state, Z_MEM_ERROR, "out of memory");
108 return -1;
109 }
110 }
111
112 /* get at least the magic bytes in the input buffer */
113 if (strm->avail_in < 2) {
114 if (gz_avail(state) == -1)
115 return -1;
116 if (strm->avail_in == 0)
117 return 0;
118 }
119
120 /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
121 a logical dilemma here when considering the case of a partially written
122 gzip file, to wit, if a single 31 byte is written, then we cannot tell
123 whether this is a single-byte file, or just a partially written gzip
124 file -- for here we assume that if a gzip file is being written, then
125 the header will be written in a single operation, so that reading a
126 single byte is sufficient indication that it is not a gzip file) */
127 if (strm->avail_in > 1 &&
128 strm->next_in[0] == 31 && strm->next_in[1] == 139) {
129 PREFIX(inflateReset)(strm);
130 state->how = GZIP;
131 state->direct = 0;
132 return 0;
133 }
134
135 /* no gzip header -- if we were decoding gzip before, then this is trailing
136 garbage. Ignore the trailing garbage and finish. */
137 if (state->direct == 0) {
138 strm->avail_in = 0;
139 state->eof = 1;
140 state->x.have = 0;
141 return 0;
142 }
143
144 /* doing raw i/o, copy any leftover input to output -- this assumes that
145 the output buffer is larger than the input buffer, which also assures
146 space for gzungetc() */
147 state->x.next = state->out;
148 if (strm->avail_in) {
149 memcpy(dest: state->x.next, src: strm->next_in, n: strm->avail_in);
150 state->x.have = strm->avail_in;
151 strm->avail_in = 0;
152 }
153 state->how = COPY;
154 state->direct = 1;
155 return 0;
156}
157
158/* Decompress from input to the provided next_out and avail_out in the state.
159 On return, state->x.have and state->x.next point to the just decompressed
160 data. If the gzip stream completes, state->how is reset to LOOK to look for
161 the next gzip stream or raw data, once state->x.have is depleted. Returns 0
162 on success, -1 on failure. */
163static int gz_decomp(gz_state *state) {
164 int ret = Z_OK;
165 unsigned had;
166 PREFIX3(stream) *strm = &(state->strm);
167
168 /* fill output buffer up to end of deflate stream */
169 had = strm->avail_out;
170 do {
171 /* get more input for inflate() */
172 if (strm->avail_in == 0 && gz_avail(state) == -1)
173 return -1;
174 if (strm->avail_in == 0) {
175 gz_error(state, Z_BUF_ERROR, "unexpected end of file");
176 break;
177 }
178
179 /* decompress and handle errors */
180 ret = PREFIX(inflate)(strm, Z_NO_FLUSH);
181 if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
182 gz_error(state, Z_STREAM_ERROR, "internal error: inflate stream corrupt");
183 return -1;
184 }
185 if (ret == Z_MEM_ERROR) {
186 gz_error(state, Z_MEM_ERROR, "out of memory");
187 return -1;
188 }
189 if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
190 gz_error(state, Z_DATA_ERROR, strm->msg == NULL ? "compressed data error" : strm->msg);
191 return -1;
192 }
193 } while (strm->avail_out && ret != Z_STREAM_END);
194
195 /* update available output */
196 state->x.have = had - strm->avail_out;
197 state->x.next = strm->next_out - state->x.have;
198
199 /* if the gzip stream completed successfully, look for another */
200 if (ret == Z_STREAM_END)
201 state->how = LOOK;
202
203 /* good decompression */
204 return 0;
205}
206
207/* Fetch data and put it in the output buffer. Assumes state->x.have is 0.
208 Data is either copied from the input file or decompressed from the input
209 file depending on state->how. If state->how is LOOK, then a gzip header is
210 looked for to determine whether to copy or decompress. Returns -1 on error,
211 otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the
212 end of the input file has been reached and all data has been processed. */
213static int gz_fetch(gz_state *state) {
214 PREFIX3(stream) *strm = &(state->strm);
215
216 do {
217 switch (state->how) {
218 case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */
219 if (gz_look(state) == -1)
220 return -1;
221 if (state->how == LOOK)
222 return 0;
223 break;
224 case COPY: /* -> COPY */
225 if (gz_load(state, buf: state->out, len: state->size << 1, have: &(state->x.have))
226 == -1)
227 return -1;
228 state->x.next = state->out;
229 return 0;
230 case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */
231 strm->avail_out = state->size << 1;
232 strm->next_out = state->out;
233 if (gz_decomp(state) == -1)
234 return -1;
235 }
236 } while (state->x.have == 0 && (!state->eof || strm->avail_in));
237 return 0;
238}
239
240/* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */
241static int gz_skip(gz_state *state, z_off64_t len) {
242 unsigned n;
243
244 /* skip over len bytes or reach end-of-file, whichever comes first */
245 while (len)
246 /* skip over whatever is in output buffer */
247 if (state->x.have) {
248 n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
249 (unsigned)len : state->x.have;
250 state->x.have -= n;
251 state->x.next += n;
252 state->x.pos += n;
253 len -= n;
254 } else if (state->eof && state->strm.avail_in == 0) {
255 /* output buffer empty -- return if we're at the end of the input */
256 break;
257 } else {
258 /* need more data to skip -- load up output buffer */
259 /* get more output, looking for header if required */
260 if (gz_fetch(state) == -1)
261 return -1;
262 }
263 return 0;
264}
265
266/* Read len bytes into buf from file, or less than len up to the end of the
267 input. Return the number of bytes read. If zero is returned, either the
268 end of file was reached, or there was an error. state->err must be
269 consulted in that case to determine which. */
270static size_t gz_read(gz_state *state, void *buf, size_t len) {
271 size_t got;
272 unsigned n;
273
274 /* if len is zero, avoid unnecessary operations */
275 if (len == 0)
276 return 0;
277
278 /* process a skip request */
279 if (state->seek) {
280 state->seek = 0;
281 if (gz_skip(state, len: state->skip) == -1)
282 return 0;
283 }
284
285 /* get len bytes to buf, or less than len if at the end */
286 got = 0;
287 do {
288 /* set n to the maximum amount of len that fits in an unsigned int */
289 n = (unsigned)-1;
290 if (n > len)
291 n = (unsigned)len;
292
293 /* first just try copying data from the output buffer */
294 if (state->x.have) {
295 if (state->x.have < n)
296 n = state->x.have;
297 memcpy(dest: buf, src: state->x.next, n: n);
298 state->x.next += n;
299 state->x.have -= n;
300 }
301
302 /* output buffer empty -- return if we're at the end of the input */
303 else if (state->eof && state->strm.avail_in == 0) {
304 state->past = 1; /* tried to read past end */
305 break;
306 }
307
308 /* need output data -- for small len or new stream load up our output
309 buffer */
310 else if (state->how == LOOK || n < (state->size << 1)) {
311 /* get more output, looking for header if required */
312 if (gz_fetch(state) == -1)
313 return 0;
314 continue; /* no progress yet -- go back to copy above */
315 /* the copy above assures that we will leave with space in the
316 output buffer, allowing at least one gzungetc() to succeed */
317 }
318
319 /* large len -- read directly into user buffer */
320 else if (state->how == COPY) { /* read directly */
321 if (gz_load(state, buf: (unsigned char *)buf, len: n, have: &n) == -1)
322 return 0;
323 }
324
325 /* large len -- decompress directly into user buffer */
326 else { /* state->how == GZIP */
327 state->strm.avail_out = n;
328 state->strm.next_out = (unsigned char *)buf;
329 if (gz_decomp(state) == -1)
330 return 0;
331 n = state->x.have;
332 state->x.have = 0;
333 }
334
335 /* update progress */
336 len -= n;
337 buf = (char *)buf + n;
338 got += n;
339 state->x.pos += n;
340 } while (len);
341
342 /* return number of bytes read into user buffer */
343 return got;
344}
345
346/* -- see zlib.h -- */
347int Z_EXPORT PREFIX(gzread)(gzFile file, void *buf, unsigned len) {
348 gz_state *state;
349
350 /* get internal structure */
351 if (file == NULL)
352 return -1;
353 state = (gz_state *)file;
354
355 /* check that we're reading and that there's no (serious) error */
356 if (state->mode != GZ_READ ||
357 (state->err != Z_OK && state->err != Z_BUF_ERROR))
358 return -1;
359
360 /* since an int is returned, make sure len fits in one, otherwise return
361 with an error (this avoids a flaw in the interface) */
362 if ((int)len < 0) {
363 gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
364 return -1;
365 }
366
367 /* read len or fewer bytes to buf */
368 len = (unsigned)gz_read(state, buf, len);
369
370 /* check for an error */
371 if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
372 return -1;
373
374 /* return the number of bytes read (this is assured to fit in an int) */
375 return (int)len;
376}
377
378/* -- see zlib.h -- */
379size_t Z_EXPORT PREFIX(gzfread)(void *buf, size_t size, size_t nitems, gzFile file) {
380 size_t len;
381 gz_state *state;
382
383 /* Exit early if size is zero, also prevents potential division by zero */
384 if (size == 0)
385 return 0;
386
387 /* get internal structure */
388 if (file == NULL)
389 return 0;
390 state = (gz_state *)file;
391
392 /* check that we're reading and that there's no (serious) error */
393 if (state->mode != GZ_READ ||
394 (state->err != Z_OK && state->err != Z_BUF_ERROR))
395 return 0;
396
397 /* compute bytes to read -- error on overflow */
398 if (size && SIZE_MAX / size < nitems) {
399 gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
400 return 0;
401 }
402 len = nitems * size;
403
404 /* read len or fewer bytes to buf, return the number of full items read */
405 return len ? gz_read(state, buf, len) / size : 0;
406}
407
408/* -- see zlib.h -- */
409#undef gzgetc
410#undef zng_gzgetc
411int Z_EXPORT PREFIX(gzgetc)(gzFile file) {
412 unsigned char buf[1];
413 gz_state *state;
414
415 /* get internal structure */
416 if (file == NULL)
417 return -1;
418 state = (gz_state *)file;
419
420 /* check that we're reading and that there's no (serious) error */
421 if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR))
422 return -1;
423
424 /* try output buffer (no need to check for skip request) */
425 if (state->x.have) {
426 state->x.have--;
427 state->x.pos++;
428 return *(state->x.next)++;
429 }
430
431 /* nothing there -- try gz_read() */
432 return gz_read(state, buf, len: 1) < 1 ? -1 : buf[0];
433}
434
435int Z_EXPORT PREFIX(gzgetc_)(gzFile file) {
436 return PREFIX(gzgetc)(file);
437}
438
439/* -- see zlib.h -- */
440int Z_EXPORT PREFIX(gzungetc)(int c, gzFile file) {
441 gz_state *state;
442
443 /* get internal structure */
444 if (file == NULL)
445 return -1;
446 state = (gz_state *)file;
447
448 /* check that we're reading and that there's no (serious) error */
449 if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR))
450 return -1;
451
452 /* process a skip request */
453 if (state->seek) {
454 state->seek = 0;
455 if (gz_skip(state, len: state->skip) == -1)
456 return -1;
457 }
458
459 /* can't push EOF */
460 if (c < 0)
461 return -1;
462
463 /* if output buffer empty, put byte at end (allows more pushing) */
464 if (state->x.have == 0) {
465 state->x.have = 1;
466 state->x.next = state->out + (state->size << 1) - 1;
467 state->x.next[0] = (unsigned char)c;
468 state->x.pos--;
469 state->past = 0;
470 return c;
471 }
472
473 /* if no room, give up (must have already done a gzungetc()) */
474 if (state->x.have == (state->size << 1)) {
475 gz_error(state, Z_DATA_ERROR, "out of room to push characters");
476 return -1;
477 }
478
479 /* slide output data if needed and insert byte before existing data */
480 if (state->x.next == state->out) {
481 unsigned char *src = state->out + state->x.have;
482 unsigned char *dest = state->out + (state->size << 1);
483 while (src > state->out)
484 *--dest = *--src;
485 state->x.next = dest;
486 }
487 state->x.have++;
488 state->x.next--;
489 state->x.next[0] = (unsigned char)c;
490 state->x.pos--;
491 state->past = 0;
492 return c;
493}
494
495/* -- see zlib.h -- */
496char * Z_EXPORT PREFIX(gzgets)(gzFile file, char *buf, int len) {
497 unsigned left, n;
498 char *str;
499 unsigned char *eol;
500 gz_state *state;
501
502 /* check parameters and get internal structure */
503 if (file == NULL || buf == NULL || len < 1)
504 return NULL;
505 state = (gz_state *)file;
506
507 /* check that we're reading and that there's no (serious) error */
508 if (state->mode != GZ_READ || (state->err != Z_OK && state->err != Z_BUF_ERROR))
509 return NULL;
510
511 /* process a skip request */
512 if (state->seek) {
513 state->seek = 0;
514 if (gz_skip(state, len: state->skip) == -1)
515 return NULL;
516 }
517
518 /* copy output bytes up to new line or len - 1, whichever comes first --
519 append a terminating zero to the string (we don't check for a zero in
520 the contents, let the user worry about that) */
521 str = buf;
522 left = (unsigned)len - 1;
523 if (left) {
524 do {
525 /* assure that something is in the output buffer */
526 if (state->x.have == 0 && gz_fetch(state) == -1)
527 return NULL; /* error */
528 if (state->x.have == 0) { /* end of file */
529 state->past = 1; /* read past end */
530 break; /* return what we have */
531 }
532
533 /* look for end-of-line in current output buffer */
534 n = state->x.have > left ? left : state->x.have;
535 eol = (unsigned char *)memchr(s: state->x.next, c: '\n', n: n);
536 if (eol != NULL)
537 n = (unsigned)(eol - state->x.next) + 1;
538
539 /* copy through end-of-line, or remainder if not found */
540 memcpy(dest: buf, src: state->x.next, n: n);
541 state->x.have -= n;
542 state->x.next += n;
543 state->x.pos += n;
544 left -= n;
545 buf += n;
546 } while (left && eol == NULL);
547 }
548
549 /* return terminated string, or if nothing, end of file */
550 if (buf == str)
551 return NULL;
552 buf[0] = 0;
553 return str;
554}
555
556/* -- see zlib.h -- */
557int Z_EXPORT PREFIX(gzdirect)(gzFile file) {
558 gz_state *state;
559
560 /* get internal structure */
561 if (file == NULL)
562 return 0;
563
564 state = (gz_state *)file;
565
566 /* if the state is not known, but we can find out, then do so (this is
567 mainly for right after a gzopen() or gzdopen()) */
568 if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
569 (void)gz_look(state);
570
571 /* return 1 if transparent, 0 if processing a gzip stream */
572 return state->direct;
573}
574
575/* -- see zlib.h -- */
576int Z_EXPORT PREFIX(gzclose_r)(gzFile file) {
577 int ret, err;
578 gz_state *state;
579
580 /* get internal structure */
581 if (file == NULL)
582 return Z_STREAM_ERROR;
583
584 state = (gz_state *)file;
585
586 /* check that we're reading */
587 if (state->mode != GZ_READ)
588 return Z_STREAM_ERROR;
589
590 /* free memory and close file */
591 if (state->size) {
592 PREFIX(inflateEnd)(strm: &(state->strm));
593 zng_free(ptr: state->out);
594 zng_free(ptr: state->in);
595 }
596 err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
597 gz_error(state, Z_OK, NULL);
598 free(ptr: state->path);
599 ret = close(fd: state->fd);
600 zng_free(ptr: state);
601 return ret ? Z_ERRNO : err;
602}
603