gzread.c source code [ClickHouse/contrib/zlib-ng/gzread.c]

1	/ gzread.c -- zlib functions for reading gzip files*
2	* Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
3	* For conditions of distribution and use, see copyright notice in zlib.h
4	*/
5
6	#include "zbuild.h"
7	#include "gzguts.h"
8
9	/ Local functions /
10	static int gz_load(gz_state , unsigned* char , unsigned, unsigned* *);
11	static int gz_avail(gz_state *);
12	static int gz_look(gz_state *);
13	static int gz_decomp(gz_state *);
14	static int gz_fetch(gz_state *);
15	static int gz_skip(gz_state *, z_off64_t);
16	static size_t gz_read(gz_state , void* *, size_t);
17
18	/ Use read() to load a buffer -- return -1 on error, otherwise 0. Read from*
19	state->fd, and update state->eof, state->err, and state->msg as appropriate.
20	This function needs to loop on read(), since read() is not guaranteed to
21	read the number of bytes requested, depending on the type of descriptor. /*
22	static int gz_load(gz_state state, unsigned* char buf, unsigned* len, unsigned *have) {
23	ssize_t ret;
24
25	*have = `0`;
26	do {
27	ret = read(state->fd, buf + have, len - have);
28	if (ret <= `0`)
29	break;
30	have += (unsigned*)ret;
31	} while (*have < len);
32	if (ret < `0`) {
33	gz_error(state, Z_ERRNO, zstrerror());
34	return -`1`;
35	}
36	if (ret == `0`)
37	state->eof = `1`;
38	return `0`;
39	}
40
41	/ Load up input buffer and set eof flag if last data loaded -- return -1 on*
42	error, 0 otherwise. Note that the eof flag is set when the end of the input
43	file is reached, even though there may be unused data in the buffer. Once
44	that data has been used, no more attempts will be made to read the file.
45	If strm->avail_in != 0, then the current data is moved to the beginning of
46	the input buffer, and then the remainder of the buffer is loaded with the
47	available data from the input file. /*
48	static int gz_avail(gz_state *state) {
49	unsigned got;
50	PREFIX3(stream) *strm = &(state->strm);
51
52	if (state->err != Z_OK && state->err != Z_BUF_ERROR)
53	return -`1`;
54	if (state->eof == `0`) {
55	if (strm->avail_in) { / copy what's there to the start /
56	unsigned char *p = state->in;
57	unsigned const char *q = strm->next_in;
58	unsigned n = strm->avail_in;
59	do {
60	p++ = q++;
61	} while (--n);
62	}
63	if (gz_load(state, state->in + strm->avail_in, state->size - strm->avail_in, &got) == -`1`)
64	return -`1`;
65	strm->avail_in += got;
66	strm->next_in = state->in;
67	}
68	return `0`;
69	}
70
71	/ Look for gzip header, set up for inflate or copy. state->x.have must be 0.*
72	If this is the first time in, allocate required memory. state->how will be
73	left unchanged if there is no more input data available, will be set to COPY
74	if there is no gzip header and direct copying will be performed, or it will
75	be set to GZIP for decompression. If direct copying, then leftover input
76	data from the input buffer will be copied to the output buffer. In that
77	case, all further file reads will be directly to either the output buffer or
78	a user buffer. If decompressing, the inflate state will be initialized.
79	gz_look() will return 0 on success or -1 on failure. /*
80	static int gz_look(gz_state *state) {
81	PREFIX3(stream) *strm = &(state->strm);
82
83	/ allocate read buffers and inflate memory /
84	if (state->size == `0`) {
85	/ allocate buffers /
86	state->in = (unsigned char *)malloc(state->want);
87	state->out = (unsigned char *)malloc(state->want << `1`);
88	if (state->in == NULL \|\| state->out == NULL) {
89	free(state->out);
90	free(state->in);
91	gz_error(state, Z_MEM_ERROR, "out of memory");
92	return -`1`;
93	}
94	state->size = state->want;
95
96	/ allocate inflate memory /
97	state->strm.zalloc = NULL;
98	state->strm.zfree = NULL;
99	state->strm.opaque = NULL;
100	state->strm.avail_in = `0`;
101	state->strm.next_in = NULL;
102	if (PREFIX(inflateInit2)(&(state->strm), `15` + `16`) != Z_OK) { / gunzip /
103	free(state->out);
104	free(state->in);
105	state->size = `0`;
106	gz_error(state, Z_MEM_ERROR, "out of memory");
107	return -`1`;
108	}
109	}
110
111	/ get at least the magic bytes in the input buffer /
112	if (strm->avail_in < `2`) {
113	if (gz_avail(state) == -`1`)
114	return -`1`;
115	if (strm->avail_in == `0`)
116	return `0`;
117	}
118
119	/ look for gzip magic bytes -- if there, do gzip decoding (note: there is*
120	a logical dilemma here when considering the case of a partially written
121	gzip file, to wit, if a single 31 byte is written, then we cannot tell
122	whether this is a single-byte file, or just a partially written gzip
123	file -- for here we assume that if a gzip file is being written, then
124	the header will be written in a single operation, so that reading a
125	single byte is sufficient indication that it is not a gzip file) /*
126	if (strm->avail_in > `1` &&
127	strm->next_in[`0`] == `31` && strm->next_in[`1`] == `139`) {
128	PREFIX(inflateReset)(strm);
129	state->how = GZIP;
130	state->direct = `0`;
131	return `0`;
132	}
133
134	/ no gzip header -- if we were decoding gzip before, then this is trailing*
135	garbage. Ignore the trailing garbage and finish. /*
136	if (state->direct == `0`) {
137	strm->avail_in = `0`;
138	state->eof = `1`;
139	state->x.have = `0`;
140	return `0`;
141	}
142
143	/ doing raw i/o, copy any leftover input to output -- this assumes that*
144	the output buffer is larger than the input buffer, which also assures
145	space for gzungetc() /*
146	state->x.next = state->out;
147	if (strm->avail_in) {
148	memcpy(state->x.next, strm->next_in, strm->avail_in);
149	state->x.have = strm->avail_in;
150	strm->avail_in = `0`;
151	}
152	state->how = COPY;
153	state->direct = `1`;
154	return `0`;
155	}
156
157	/ Decompress from input to the provided next_out and avail_out in the state.*
158	On return, state->x.have and state->x.next point to the just decompressed
159	data. If the gzip stream completes, state->how is reset to LOOK to look for
160	the next gzip stream or raw data, once state->x.have is depleted. Returns 0
161	on success, -1 on failure. /*
162	static int gz_decomp(gz_state *state) {
163	int ret = Z_OK;
164	unsigned had;
165	PREFIX3(stream) *strm = &(state->strm);
166
167	/ fill output buffer up to end of deflate stream /
168	had = strm->avail_out;
169	do {
170	/ get more input for inflate() /
171	if (strm->avail_in == `0` && gz_avail(state) == -`1`)
172	return -`1`;
173	if (strm->avail_in == `0`) {
174	gz_error(state, Z_BUF_ERROR, "unexpected end of file");
175	break;
176	}
177
178	/ decompress and handle errors /
179	ret = PREFIX(inflate)(strm, Z_NO_FLUSH);
180	if (ret == Z_STREAM_ERROR \|\| ret == Z_NEED_DICT) {
181	gz_error(state, Z_STREAM_ERROR, "internal error: inflate stream corrupt");
182	return -`1`;
183	}
184	if (ret == Z_MEM_ERROR) {
185	gz_error(state, Z_MEM_ERROR, "out of memory");
186	return -`1`;
187	}
188	if (ret == Z_DATA_ERROR) { / deflate stream invalid /
189	gz_error(state, Z_DATA_ERROR, strm->msg == NULL ? "compressed data error" : strm->msg);
190	return -`1`;
191	}
192	} while (strm->avail_out && ret != Z_STREAM_END);
193
194	/ update available output /
195	state->x.have = had - strm->avail_out;
196	state->x.next = strm->next_out - state->x.have;
197
198	/ if the gzip stream completed successfully, look for another /
199	if (ret == Z_STREAM_END)
200	state->how = LOOK;
201
202	/ good decompression /
203	return `0`;
204	}
205
206	/ Fetch data and put it in the output buffer. Assumes state->x.have is 0.*
207	Data is either copied from the input file or decompressed from the input
208	file depending on state->how. If state->how is LOOK, then a gzip header is
209	looked for to determine whether to copy or decompress. Returns -1 on error,
210	otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the
211	end of the input file has been reached and all data has been processed. /*
212	static int gz_fetch(gz_state *state) {
213	PREFIX3(stream) *strm = &(state->strm);
214
215	do {
216	switch (state->how) {
217	case LOOK: / -> LOOK, COPY (only if never GZIP), or GZIP /
218	if (gz_look(state) == -`1`)
219	return -`1`;
220	if (state->how == LOOK)
221	return `0`;
222	break;
223	case COPY: / -> COPY /
224	if (gz_load(state, state->out, state->size << `1`, &(state->x.have))
225	== -`1`)
226	return -`1`;
227	state->x.next = state->out;
228	return `0`;
229	case GZIP: / -> GZIP or LOOK (if end of gzip stream) /
230	strm->avail_out = state->size << `1`;
231	strm->next_out = state->out;
232	if (gz_decomp(state) == -`1`)
233	return -`1`;
234	}
235	} while (state->x.have == `0` && (!state->eof \|\| strm->avail_in));
236	return `0`;
237	}
238
239	/ Skip len uncompressed bytes of output. Return -1 on error, 0 on success. /
240	static int gz_skip(gz_state *state, z_off64_t len) {
241	unsigned n;
242
243	/ skip over len bytes or reach end-of-file, whichever comes first /
244	while (len)
245	/ skip over whatever is in output buffer /
246	if (state->x.have) {
247	n = GT_OFF(state->x.have) \|\| (z_off64_t)state->x.have > len ?
248	(unsigned)len : state->x.have;
249	state->x.have -= n;
250	state->x.next += n;
251	state->x.pos += n;
252	len -= n;
253	} else if (state->eof && state->strm.avail_in == `0`) {
254	/ output buffer empty -- return if we're at the end of the input /
255	break;
256	} else {
257	/ need more data to skip -- load up output buffer /
258	/ get more output, looking for header if required /
259	if (gz_fetch(state) == -`1`)
260	return -`1`;
261	}
262	return `0`;
263	}
264
265	/ Read len bytes into buf from file, or less than len up to the end of the*
266	input. Return the number of bytes read. If zero is returned, either the
267	end of file was reached, or there was an error. state->err must be
268	consulted in that case to determine which. /*
269	static size_t gz_read(gz_state state, void* *buf, size_t len) {
270	size_t got;
271	unsigned n;
272
273	/ if len is zero, avoid unnecessary operations /
274	if (len == `0`)
275	return `0`;
276
277	/ process a skip request /
278	if (state->seek) {
279	state->seek = `0`;
280	if (gz_skip(state, state->skip) == -`1`)
281	return `0`;
282	}
283
284	/ get len bytes to buf, or less than len if at the end /
285	got = `0`;
286	do {
287	/ set n to the maximum amount of len that fits in an unsigned int /
288	n = (unsigned)-`1`;
289	if (n > len)
290	n = (unsigned)len;
291
292	/ first just try copying data from the output buffer /
293	if (state->x.have) {
294	if (state->x.have < n)
295	n = state->x.have;
296	memcpy(buf, state->x.next, n);
297	state->x.next += n;
298	state->x.have -= n;
299	}
300
301	/ output buffer empty -- return if we're at the end of the input /
302	else if (state->eof && state->strm.avail_in == `0`) {
303	state->past = `1`; / tried to read past end /
304	break;
305	}
306
307	/ need output data -- for small len or new stream load up our output*
308	buffer /*
309	else if (state->how == LOOK \|\| n < (state->size << `1`)) {
310	/ get more output, looking for header if required /
311	if (gz_fetch(state) == -`1`)
312	return `0`;
313	continue; / no progress yet -- go back to copy above /
314	/ the copy above assures that we will leave with space in the*
315	output buffer, allowing at least one gzungetc() to succeed /*
316	}
317
318	/ large len -- read directly into user buffer /
319	else if (state->how == COPY) { / read directly /
320	if (gz_load(state, (unsigned char *)buf, n, &n) == -`1`)
321	return `0`;
322	}
323
324	/ large len -- decompress directly into user buffer /
325	else { / state->how == GZIP /
326	state->strm.avail_out = n;
327	state->strm.next_out = (unsigned char *)buf;
328	if (gz_decomp(state) == -`1`)
329	return `0`;
330	n = state->x.have;
331	state->x.have = `0`;
332	}
333
334	/ update progress /
335	len -= n;
336	buf = (char *)buf + n;
337	got += n;
338	state->x.pos += n;
339	} while (len);
340
341	/ return number of bytes read into user buffer /
342	return got;
343	}
344
345	/ -- see zlib.h -- /
346	int ZEXPORT PREFIX(gzread)(gzFile file, void buf, unsigned* len) {
347	gz_state *state;
348
349	/ get internal structure /
350	if (file == NULL)
351	return -`1`;
352	state = (gz_state *)file;
353
354	/ check that we're reading and that there's no (serious) error /
355	if (state->mode != GZ_READ \|\|
356	(state->err != Z_OK && state->err != Z_BUF_ERROR))
357	return -`1`;
358
359	/ since an int is returned, make sure len fits in one, otherwise return*
360	with an error (this avoids a flaw in the interface) /*
361	if ((int)len < `0`) {
362	gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
363	return -`1`;
364	}
365
366	/ read len or fewer bytes to buf /
367	len = (unsigned)gz_read(state, buf, len);
368
369	/ check for an error /
370	if (len == `0` && state->err != Z_OK && state->err != Z_BUF_ERROR)
371	return -`1`;
372
373	/ return the number of bytes read (this is assured to fit in an int) /
374	return (int)len;
375	}
376
377	/ -- see zlib.h -- /
378	size_t ZEXPORT PREFIX(gzfread)(void *buf, size_t size, size_t nitems, gzFile file) {
379	size_t len;
380	gz_state *state;
381
382	/ Exit early if size is zero, also prevents potential division by zero /
383	if (size == `0`)
384	return `0`;
385
386	/ get internal structure /
387	if (file == NULL)
388	return `0`;
389	state = (gz_state *)file;
390
391	/ check that we're reading and that there's no (serious) error /
392	if (state->mode != GZ_READ \|\|
393	(state->err != Z_OK && state->err != Z_BUF_ERROR))
394	return `0`;
395
396	/ compute bytes to read -- error on overflow /
397	len = nitems * size;
398	if (size && len / size != nitems) {
399	gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
400	return `0`;
401	}
402
403	/ read len or fewer bytes to buf, return the number of full items read /
404	return len ? gz_read(state, buf, len) / size : `0`;
405	}
406
407	/ -- see zlib.h -- /
408	#undef gzgetc
409	#undef zng_gzgetc
410	int ZEXPORT PREFIX(gzgetc)(gzFile file) {
411	unsigned char buf[`1`];
412	gz_state *state;
413
414	/ get internal structure /
415	if (file == NULL)
416	return -`1`;
417	state = (gz_state *)file;
418
419	/ check that we're reading and that there's no (serious) error /
420	if (state->mode != GZ_READ \|\| (state->err != Z_OK && state->err != Z_BUF_ERROR))
421	return -`1`;
422
423	/ try output buffer (no need to check for skip request) /
424	if (state->x.have) {
425	state->x.have--;
426	state->x.pos++;
427	return *(state->x.next)++;
428	}
429
430	/ nothing there -- try gz_read() /
431	return gz_read(state, buf, `1`) < `1` ? -`1` : buf[`0`];
432	}
433
434	int ZEXPORT PREFIX(gzgetc_)(gzFile file) {
435	return PREFIX(gzgetc)(file);
436	}
437
438	/ -- see zlib.h -- /
439	int ZEXPORT PREFIX(gzungetc)(int c, gzFile file) {
440	gz_state *state;
441
442	/ get internal structure /
443	if (file == NULL)
444	return -`1`;
445	state = (gz_state *)file;
446
447	/ check that we're reading and that there's no (serious) error /
448	if (state->mode != GZ_READ \|\| (state->err != Z_OK && state->err != Z_BUF_ERROR))
449	return -`1`;
450
451	/ process a skip request /
452	if (state->seek) {
453	state->seek = `0`;
454	if (gz_skip(state, state->skip) == -`1`)
455	return -`1`;
456	}
457
458	/ can't push EOF /
459	if (c < `0`)
460	return -`1`;
461
462	/ if output buffer empty, put byte at end (allows more pushing) /
463	if (state->x.have == `0`) {
464	state->x.have = `1`;
465	state->x.next = state->out + (state->size << `1`) - `1`;
466	state->x.next[`0`] = (unsigned char)c;
467	state->x.pos--;
468	state->past = `0`;
469	return c;
470	}
471
472	/ if no room, give up (must have already done a gzungetc()) /
473	if (state->x.have == (state->size << `1`)) {
474	gz_error(state, Z_DATA_ERROR, "out of room to push characters");
475	return -`1`;
476	}
477
478	/ slide output data if needed and insert byte before existing data /
479	if (state->x.next == state->out) {
480	unsigned char *src = state->out + state->x.have;
481	unsigned char *dest = state->out + (state->size << `1`);
482	while (src > state->out)
483	--dest = --src;
484	state->x.next = dest;
485	}
486	state->x.have++;
487	state->x.next--;
488	state->x.next[`0`] = (unsigned char)c;
489	state->x.pos--;
490	state->past = `0`;
491	return c;
492	}
493
494	/ -- see zlib.h -- /
495	char * ZEXPORT PREFIX(gzgets)(gzFile file, char buf, int* len) {
496	unsigned left, n;
497	char *str;
498	unsigned char *eol;
499	gz_state *state;
500
501	/ check parameters and get internal structure /
502	if (file == NULL \|\| buf == NULL \|\| len < `1`)
503	return NULL;
504	state = (gz_state *)file;
505
506	/ check that we're reading and that there's no (serious) error /
507	if (state->mode != GZ_READ \|\| (state->err != Z_OK && state->err != Z_BUF_ERROR))
508	return NULL;
509
510	/ process a skip request /
511	if (state->seek) {
512	state->seek = `0`;
513	if (gz_skip(state, state->skip) == -`1`)
514	return NULL;
515	}
516
517	/ copy output bytes up to new line or len - 1, whichever comes first --*
518	append a terminating zero to the string (we don't check for a zero in
519	the contents, let the user worry about that) /*
520	str = buf;
521	left = (unsigned)len - `1`;
522	if (left) do {
523	/ assure that something is in the output buffer /
524	if (state->x.have == `0` && gz_fetch(state) == -`1`)
525	return NULL; / error /
526	if (state->x.have == `0`) { / end of file /
527	state->past = `1`; / read past end /
528	break; / return what we have /
529	}
530
531	/ look for end-of-line in current output buffer /
532	n = state->x.have > left ? left : state->x.have;
533	eol = (unsigned char *)memchr(state->x.next, `'\n'`, n);
534	if (eol != NULL)
535	n = (unsigned)(eol - state->x.next) + `1`;
536
537	/ copy through end-of-line, or remainder if not found /
538	memcpy(buf, state->x.next, n);
539	state->x.have -= n;
540	state->x.next += n;
541	state->x.pos += n;
542	left -= n;
543	buf += n;
544	} while (left && eol == NULL);
545
546	/ return terminated string, or if nothing, end of file /
547	if (buf == str)
548	return NULL;
549	buf[`0`] = `0`;
550	return str;
551	}
552
553	/ -- see zlib.h -- /
554	int ZEXPORT PREFIX(gzdirect)(gzFile file) {
555	gz_state *state;
556
557	/ get internal structure /
558	if (file == NULL)
559	return `0`;
560
561	state = (gz_state *)file;
562
563	/ if the state is not known, but we can find out, then do so (this is*
564	mainly for right after a gzopen() or gzdopen()) /*
565	if (state->mode == GZ_READ && state->how == LOOK && state->x.have == `0`)
566	(void)gz_look(state);
567
568	/ return 1 if transparent, 0 if processing a gzip stream /
569	return state->direct;
570	}
571
572	/ -- see zlib.h -- /
573	int ZEXPORT PREFIX(gzclose_r)(gzFile file) {
574	int ret, err;
575	gz_state *state;
576
577	/ get internal structure /
578	if (file == NULL)
579	return Z_STREAM_ERROR;
580
581	state = (gz_state *)file;
582
583	/ check that we're reading /
584	if (state->mode != GZ_READ)
585	return Z_STREAM_ERROR;
586
587	/ free memory and close file /
588	if (state->size) {
589	PREFIX(inflateEnd)(&(state->strm));
590	free(state->out);
591	free(state->in);
592	}
593	err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
594	gz_error(state, Z_OK, NULL);
595	free(state->path);
596	ret = close(state->fd);
597	free(state);
598	return ret ? Z_ERRNO : err;
599	}
600

Browse the source code of ClickHouse/contrib/zlib-ng/gzread.c