1 | /** |
2 | * xzlib.c: front end for the transparent suport of lzma compression |
3 | * at the I/O layer, based on an example file from lzma project |
4 | * |
5 | * See Copyright for the status of this software. |
6 | * |
7 | * Anders F Bjorklund <afb@users.sourceforge.net> |
8 | */ |
9 | #define IN_LIBXML |
10 | #include "libxml.h" |
11 | #ifdef LIBXML_LZMA_ENABLED |
12 | |
13 | #include <string.h> |
14 | #ifdef HAVE_ERRNO_H |
15 | #include <errno.h> |
16 | #endif |
17 | |
18 | |
19 | #ifdef HAVE_SYS_TYPES_H |
20 | #include <sys/types.h> |
21 | #endif |
22 | #ifdef HAVE_SYS_STAT_H |
23 | #include <sys/stat.h> |
24 | #endif |
25 | #ifdef HAVE_FCNTL_H |
26 | #include <fcntl.h> |
27 | #endif |
28 | #ifdef HAVE_UNISTD_H |
29 | #include <unistd.h> |
30 | #endif |
31 | #ifdef HAVE_STDLIB_H |
32 | #include <stdlib.h> |
33 | #endif |
34 | #ifdef LIBXML_ZLIB_ENABLED |
35 | #include <zlib.h> |
36 | #endif |
37 | #ifdef LIBXML_LZMA_ENABLED |
38 | #include <lzma.h> |
39 | #endif |
40 | |
41 | #include "xzlib.h" |
42 | #include <libxml/xmlmemory.h> |
43 | |
44 | /* values for xz_state how */ |
45 | #define LOOK 0 /* look for a gzip/lzma header */ |
46 | #define COPY 1 /* copy input directly */ |
47 | #define GZIP 2 /* decompress a gzip stream */ |
48 | #define LZMA 3 /* decompress a lzma stream */ |
49 | |
50 | /* internal lzma file state data structure */ |
51 | typedef struct { |
52 | int mode; /* see lzma modes above */ |
53 | int fd; /* file descriptor */ |
54 | char *path; /* path or fd for error messages */ |
55 | uint64_t pos; /* current position in uncompressed data */ |
56 | unsigned int size; /* buffer size, zero if not allocated yet */ |
57 | unsigned int want; /* requested buffer size, default is BUFSIZ */ |
58 | unsigned char *in; /* input buffer */ |
59 | unsigned char *out; /* output buffer (double-sized when reading) */ |
60 | unsigned char *next; /* next output data to deliver or write */ |
61 | unsigned int have; /* amount of output data unused at next */ |
62 | int eof; /* true if end of input file reached */ |
63 | uint64_t start; /* where the lzma data started, for rewinding */ |
64 | uint64_t raw; /* where the raw data started, for seeking */ |
65 | int how; /* 0: get header, 1: copy, 2: decompress */ |
66 | int direct; /* true if last read direct, false if lzma */ |
67 | /* seek request */ |
68 | uint64_t skip; /* amount to skip (already rewound if backwards) */ |
69 | int seek; /* true if seek request pending */ |
70 | /* error information */ |
71 | int err; /* error code */ |
72 | char *msg; /* error message */ |
73 | /* lzma stream */ |
74 | int init; /* is the iniflate stream initialized */ |
75 | lzma_stream strm; /* stream structure in-place (not a pointer) */ |
76 | char padding1[32]; /* padding allowing to cope with possible |
77 | extensions of above structure without |
78 | too much side effect */ |
79 | #ifdef LIBXML_ZLIB_ENABLED |
80 | /* zlib inflate or deflate stream */ |
81 | z_stream zstrm; /* stream structure in-place (not a pointer) */ |
82 | #endif |
83 | char padding2[32]; /* padding allowing to cope with possible |
84 | extensions of above structure without |
85 | too much side effect */ |
86 | } xz_state, *xz_statep; |
87 | |
88 | static void |
89 | xz_error(xz_statep state, int err, const char *msg) |
90 | { |
91 | /* free previously allocated message and clear */ |
92 | if (state->msg != NULL) { |
93 | if (state->err != LZMA_MEM_ERROR) |
94 | xmlFree(state->msg); |
95 | state->msg = NULL; |
96 | } |
97 | |
98 | /* set error code, and if no message, then done */ |
99 | state->err = err; |
100 | if (msg == NULL) |
101 | return; |
102 | |
103 | /* for an out of memory error, save as static string */ |
104 | if (err == LZMA_MEM_ERROR) { |
105 | state->msg = (char *) msg; |
106 | return; |
107 | } |
108 | |
109 | /* construct error message with path */ |
110 | if ((state->msg = |
111 | xmlMalloc(strlen(state->path) + strlen(msg) + 3)) == NULL) { |
112 | state->err = LZMA_MEM_ERROR; |
113 | state->msg = (char *) "out of memory" ; |
114 | return; |
115 | } |
116 | strcpy(state->msg, state->path); |
117 | strcat(state->msg, ": " ); |
118 | strcat(state->msg, msg); |
119 | return; |
120 | } |
121 | |
122 | static void |
123 | xz_reset(xz_statep state) |
124 | { |
125 | state->have = 0; /* no output data available */ |
126 | state->eof = 0; /* not at end of file */ |
127 | state->how = LOOK; /* look for gzip header */ |
128 | state->direct = 1; /* default for empty file */ |
129 | state->seek = 0; /* no seek request pending */ |
130 | xz_error(state, LZMA_OK, NULL); /* clear error */ |
131 | state->pos = 0; /* no uncompressed data yet */ |
132 | state->strm.avail_in = 0; /* no input data yet */ |
133 | #ifdef LIBXML_ZLIB_ENABLED |
134 | state->zstrm.avail_in = 0; /* no input data yet */ |
135 | #endif |
136 | } |
137 | |
138 | static xzFile |
139 | xz_open(const char *path, int fd, const char *mode ATTRIBUTE_UNUSED) |
140 | { |
141 | xz_statep state; |
142 | |
143 | /* allocate xzFile structure to return */ |
144 | state = xmlMalloc(sizeof(xz_state)); |
145 | if (state == NULL) |
146 | return NULL; |
147 | state->size = 0; /* no buffers allocated yet */ |
148 | state->want = BUFSIZ; /* requested buffer size */ |
149 | state->msg = NULL; /* no error message yet */ |
150 | state->init = 0; /* initialization of zlib data */ |
151 | |
152 | /* save the path name for error messages */ |
153 | state->path = xmlMalloc(strlen(path) + 1); |
154 | if (state->path == NULL) { |
155 | xmlFree(state); |
156 | return NULL; |
157 | } |
158 | strcpy(state->path, path); |
159 | |
160 | /* open the file with the appropriate mode (or just use fd) */ |
161 | state->fd = fd != -1 ? fd : open(path, |
162 | #ifdef O_LARGEFILE |
163 | O_LARGEFILE | |
164 | #endif |
165 | #ifdef O_BINARY |
166 | O_BINARY | |
167 | #endif |
168 | O_RDONLY, 0666); |
169 | if (state->fd == -1) { |
170 | xmlFree(state->path); |
171 | xmlFree(state); |
172 | return NULL; |
173 | } |
174 | |
175 | /* save the current position for rewinding (only if reading) */ |
176 | state->start = lseek(state->fd, 0, SEEK_CUR); |
177 | if (state->start == (uint64_t) - 1) |
178 | state->start = 0; |
179 | |
180 | /* initialize stream */ |
181 | xz_reset(state); |
182 | |
183 | /* return stream */ |
184 | return (xzFile) state; |
185 | } |
186 | |
187 | static int |
188 | xz_compressed(xzFile f) { |
189 | xz_statep state; |
190 | |
191 | if (f == NULL) |
192 | return(-1); |
193 | state = (xz_statep) f; |
194 | if (state->init <= 0) |
195 | return(-1); |
196 | |
197 | switch (state->how) { |
198 | case COPY: |
199 | return(0); |
200 | case GZIP: |
201 | case LZMA: |
202 | return(1); |
203 | } |
204 | return(-1); |
205 | } |
206 | |
207 | xzFile |
208 | __libxml2_xzopen(const char *path, const char *mode) |
209 | { |
210 | return xz_open(path, -1, mode); |
211 | } |
212 | |
213 | int |
214 | __libxml2_xzcompressed(xzFile f) { |
215 | return xz_compressed(f); |
216 | } |
217 | |
218 | xzFile |
219 | __libxml2_xzdopen(int fd, const char *mode) |
220 | { |
221 | char *path; /* identifier for error messages */ |
222 | xzFile xz; |
223 | |
224 | if (fd == -1 || (path = xmlMalloc(7 + 3 * sizeof(int))) == NULL) |
225 | return NULL; |
226 | sprintf(path, "<fd:%d>" , fd); /* for debugging */ |
227 | xz = xz_open(path, fd, mode); |
228 | xmlFree(path); |
229 | return xz; |
230 | } |
231 | |
232 | static int |
233 | xz_load(xz_statep state, unsigned char *buf, unsigned int len, |
234 | unsigned int *have) |
235 | { |
236 | int ret; |
237 | |
238 | *have = 0; |
239 | do { |
240 | ret = read(state->fd, buf + *have, len - *have); |
241 | if (ret <= 0) |
242 | break; |
243 | *have += ret; |
244 | } while (*have < len); |
245 | if (ret < 0) { |
246 | xz_error(state, -1, strerror(errno)); |
247 | return -1; |
248 | } |
249 | if (ret == 0) |
250 | state->eof = 1; |
251 | return 0; |
252 | } |
253 | |
254 | static int |
255 | xz_avail(xz_statep state) |
256 | { |
257 | lzma_stream *strm = &(state->strm); |
258 | |
259 | if (state->err != LZMA_OK) |
260 | return -1; |
261 | if (state->eof == 0) { |
262 | /* avail_in is size_t, which is not necessary sizeof(unsigned) */ |
263 | unsigned tmp = strm->avail_in; |
264 | |
265 | if (xz_load(state, state->in, state->size, &tmp) == -1) { |
266 | strm->avail_in = tmp; |
267 | return -1; |
268 | } |
269 | strm->avail_in = tmp; |
270 | strm->next_in = state->in; |
271 | } |
272 | return 0; |
273 | } |
274 | |
275 | #ifdef LIBXML_ZLIB_ENABLED |
276 | static int |
277 | xz_avail_zstrm(xz_statep state) |
278 | { |
279 | int ret; |
280 | state->strm.avail_in = state->zstrm.avail_in; |
281 | state->strm.next_in = state->zstrm.next_in; |
282 | ret = xz_avail(state); |
283 | state->zstrm.avail_in = (uInt) state->strm.avail_in; |
284 | state->zstrm.next_in = (Bytef *) state->strm.next_in; |
285 | return ret; |
286 | } |
287 | #endif |
288 | |
289 | static int |
290 | is_format_xz(xz_statep state) |
291 | { |
292 | lzma_stream *strm = &(state->strm); |
293 | |
294 | return strm->avail_in >= 6 && memcmp(state->in, "\3757zXZ" , 6) == 0; |
295 | } |
296 | |
297 | static int |
298 | is_format_lzma(xz_statep state) |
299 | { |
300 | lzma_stream *strm = &(state->strm); |
301 | |
302 | lzma_filter filter; |
303 | lzma_options_lzma *opt; |
304 | uint32_t dict_size; |
305 | uint64_t uncompressed_size; |
306 | size_t i; |
307 | |
308 | if (strm->avail_in < 13) |
309 | return 0; |
310 | |
311 | filter.id = LZMA_FILTER_LZMA1; |
312 | if (lzma_properties_decode(&filter, NULL, state->in, 5) != LZMA_OK) |
313 | return 0; |
314 | |
315 | opt = filter.options; |
316 | dict_size = opt->dict_size; |
317 | free(opt); /* we can't use xmlFree on a string returned by zlib */ |
318 | |
319 | /* A hack to ditch tons of false positives: We allow only dictionary |
320 | * sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone |
321 | * created only files with 2^n, but accepts any dictionary size. |
322 | * If someone complains, this will be reconsidered. |
323 | */ |
324 | if (dict_size != UINT32_MAX) { |
325 | uint32_t d = dict_size - 1; |
326 | |
327 | d |= d >> 2; |
328 | d |= d >> 3; |
329 | d |= d >> 4; |
330 | d |= d >> 8; |
331 | d |= d >> 16; |
332 | ++d; |
333 | if (d != dict_size || dict_size == 0) |
334 | return 0; |
335 | } |
336 | |
337 | /* Another hack to ditch false positives: Assume that if the |
338 | * uncompressed size is known, it must be less than 256 GiB. |
339 | * Again, if someone complains, this will be reconsidered. |
340 | */ |
341 | uncompressed_size = 0; |
342 | for (i = 0; i < 8; ++i) |
343 | uncompressed_size |= (uint64_t) (state->in[5 + i]) << (i * 8); |
344 | |
345 | if (uncompressed_size != UINT64_MAX |
346 | && uncompressed_size > (UINT64_C(1) << 38)) |
347 | return 0; |
348 | |
349 | return 1; |
350 | } |
351 | |
352 | #ifdef LIBXML_ZLIB_ENABLED |
353 | |
354 | /* Get next byte from input, or -1 if end or error. */ |
355 | #define NEXT() ((strm->avail_in == 0 && xz_avail(state) == -1) ? -1 : \ |
356 | (strm->avail_in == 0 ? -1 : \ |
357 | (strm->avail_in--, *(strm->next_in)++))) |
358 | /* Same thing, but from zstrm */ |
359 | #define NEXTZ() ((strm->avail_in == 0 && xz_avail_zstrm(state) == -1) ? -1 : \ |
360 | (strm->avail_in == 0 ? -1 : \ |
361 | (strm->avail_in--, *(strm->next_in)++))) |
362 | |
363 | /* Get a four-byte little-endian integer and return 0 on success and the value |
364 | in *ret. Otherwise -1 is returned and *ret is not modified. */ |
365 | static int |
366 | gz_next4(xz_statep state, unsigned long *ret) |
367 | { |
368 | int ch; |
369 | unsigned long val; |
370 | z_streamp strm = &(state->zstrm); |
371 | |
372 | val = NEXTZ(); |
373 | val += (unsigned) NEXTZ() << 8; |
374 | val += (unsigned long) NEXTZ() << 16; |
375 | ch = NEXTZ(); |
376 | if (ch == -1) |
377 | return -1; |
378 | val += (unsigned long) ch << 24; |
379 | *ret = val; |
380 | return 0; |
381 | } |
382 | #endif |
383 | |
384 | static int |
385 | xz_head(xz_statep state) |
386 | { |
387 | lzma_stream *strm = &(state->strm); |
388 | lzma_stream init = LZMA_STREAM_INIT; |
389 | int flags; |
390 | unsigned len; |
391 | |
392 | /* allocate read buffers and inflate memory */ |
393 | if (state->size == 0) { |
394 | /* allocate buffers */ |
395 | state->in = xmlMalloc(state->want); |
396 | state->out = xmlMalloc(state->want << 1); |
397 | if (state->in == NULL || state->out == NULL) { |
398 | if (state->out != NULL) |
399 | xmlFree(state->out); |
400 | if (state->in != NULL) |
401 | xmlFree(state->in); |
402 | xz_error(state, LZMA_MEM_ERROR, "out of memory" ); |
403 | return -1; |
404 | } |
405 | state->size = state->want; |
406 | |
407 | /* allocate decoder memory */ |
408 | state->strm = init; |
409 | state->strm.avail_in = 0; |
410 | state->strm.next_in = NULL; |
411 | if (lzma_auto_decoder(&state->strm, 100000000, 0) != LZMA_OK) { |
412 | xmlFree(state->out); |
413 | xmlFree(state->in); |
414 | state->size = 0; |
415 | xz_error(state, LZMA_MEM_ERROR, "out of memory" ); |
416 | return -1; |
417 | } |
418 | #ifdef LIBXML_ZLIB_ENABLED |
419 | /* allocate inflate memory */ |
420 | state->zstrm.zalloc = Z_NULL; |
421 | state->zstrm.zfree = Z_NULL; |
422 | state->zstrm.opaque = Z_NULL; |
423 | state->zstrm.avail_in = 0; |
424 | state->zstrm.next_in = Z_NULL; |
425 | if (state->init == 0) { |
426 | if (inflateInit2(&(state->zstrm), -15) != Z_OK) {/* raw inflate */ |
427 | xmlFree(state->out); |
428 | xmlFree(state->in); |
429 | state->size = 0; |
430 | xz_error(state, LZMA_MEM_ERROR, "out of memory" ); |
431 | return -1; |
432 | } |
433 | state->init = 1; |
434 | } |
435 | #endif |
436 | } |
437 | |
438 | /* get some data in the input buffer */ |
439 | if (strm->avail_in == 0) { |
440 | if (xz_avail(state) == -1) |
441 | return -1; |
442 | if (strm->avail_in == 0) |
443 | return 0; |
444 | } |
445 | |
446 | /* look for the xz magic header bytes */ |
447 | if (is_format_xz(state) || is_format_lzma(state)) { |
448 | state->how = LZMA; |
449 | state->direct = 0; |
450 | return 0; |
451 | } |
452 | #ifdef LIBXML_ZLIB_ENABLED |
453 | /* look for the gzip magic header bytes 31 and 139 */ |
454 | if (strm->next_in[0] == 31) { |
455 | strm->avail_in--; |
456 | strm->next_in++; |
457 | if (strm->avail_in == 0 && xz_avail(state) == -1) |
458 | return -1; |
459 | if (strm->avail_in && strm->next_in[0] == 139) { |
460 | /* we have a gzip header, woo hoo! */ |
461 | strm->avail_in--; |
462 | strm->next_in++; |
463 | |
464 | /* skip rest of header */ |
465 | if (NEXT() != 8) { /* compression method */ |
466 | xz_error(state, LZMA_DATA_ERROR, |
467 | "unknown compression method" ); |
468 | return -1; |
469 | } |
470 | flags = NEXT(); |
471 | if (flags & 0xe0) { /* reserved flag bits */ |
472 | xz_error(state, LZMA_DATA_ERROR, |
473 | "unknown header flags set" ); |
474 | return -1; |
475 | } |
476 | NEXT(); /* modification time */ |
477 | NEXT(); |
478 | NEXT(); |
479 | NEXT(); |
480 | NEXT(); /* extra flags */ |
481 | NEXT(); /* operating system */ |
482 | if (flags & 4) { /* extra field */ |
483 | len = (unsigned) NEXT(); |
484 | len += (unsigned) NEXT() << 8; |
485 | while (len--) |
486 | if (NEXT() < 0) |
487 | break; |
488 | } |
489 | if (flags & 8) /* file name */ |
490 | while (NEXT() > 0) ; |
491 | if (flags & 16) /* comment */ |
492 | while (NEXT() > 0) ; |
493 | if (flags & 2) { /* header crc */ |
494 | NEXT(); |
495 | NEXT(); |
496 | } |
497 | /* an unexpected end of file is not checked for here -- it will be |
498 | * noticed on the first request for uncompressed data */ |
499 | |
500 | /* set up for decompression */ |
501 | inflateReset(&state->zstrm); |
502 | state->zstrm.adler = crc32(0L, Z_NULL, 0); |
503 | state->how = GZIP; |
504 | state->direct = 0; |
505 | return 0; |
506 | } else { |
507 | /* not a gzip file -- save first byte (31) and fall to raw i/o */ |
508 | state->out[0] = 31; |
509 | state->have = 1; |
510 | } |
511 | } |
512 | #endif |
513 | |
514 | /* doing raw i/o, save start of raw data for seeking, copy any leftover |
515 | * input to output -- this assumes that the output buffer is larger than |
516 | * the input buffer, which also assures space for gzungetc() */ |
517 | state->raw = state->pos; |
518 | state->next = state->out; |
519 | if (strm->avail_in) { |
520 | memcpy(state->next + state->have, strm->next_in, strm->avail_in); |
521 | state->have += strm->avail_in; |
522 | strm->avail_in = 0; |
523 | } |
524 | state->how = COPY; |
525 | state->direct = 1; |
526 | return 0; |
527 | } |
528 | |
529 | static int |
530 | xz_decomp(xz_statep state) |
531 | { |
532 | int ret; |
533 | unsigned had; |
534 | unsigned long crc, len; |
535 | lzma_stream *strm = &(state->strm); |
536 | |
537 | lzma_action action = LZMA_RUN; |
538 | |
539 | /* fill output buffer up to end of deflate stream */ |
540 | had = strm->avail_out; |
541 | do { |
542 | /* get more input for inflate() */ |
543 | if (strm->avail_in == 0 && xz_avail(state) == -1) |
544 | return -1; |
545 | if (strm->avail_in == 0) { |
546 | xz_error(state, LZMA_DATA_ERROR, "unexpected end of file" ); |
547 | return -1; |
548 | } |
549 | if (state->eof) |
550 | action = LZMA_FINISH; |
551 | |
552 | /* decompress and handle errors */ |
553 | #ifdef LIBXML_ZLIB_ENABLED |
554 | if (state->how == GZIP) { |
555 | state->zstrm.avail_in = (uInt) state->strm.avail_in; |
556 | state->zstrm.next_in = (Bytef *) state->strm.next_in; |
557 | state->zstrm.avail_out = (uInt) state->strm.avail_out; |
558 | state->zstrm.next_out = (Bytef *) state->strm.next_out; |
559 | ret = inflate(&state->zstrm, Z_NO_FLUSH); |
560 | if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { |
561 | xz_error(state, Z_STREAM_ERROR, |
562 | "internal error: inflate stream corrupt" ); |
563 | return -1; |
564 | } |
565 | if (ret == Z_MEM_ERROR) |
566 | ret = LZMA_MEM_ERROR; |
567 | if (ret == Z_DATA_ERROR) |
568 | ret = LZMA_DATA_ERROR; |
569 | if (ret == Z_STREAM_END) |
570 | ret = LZMA_STREAM_END; |
571 | state->strm.avail_in = state->zstrm.avail_in; |
572 | state->strm.next_in = state->zstrm.next_in; |
573 | state->strm.avail_out = state->zstrm.avail_out; |
574 | state->strm.next_out = state->zstrm.next_out; |
575 | } else /* state->how == LZMA */ |
576 | #endif |
577 | ret = lzma_code(strm, action); |
578 | if (ret == LZMA_MEM_ERROR) { |
579 | xz_error(state, LZMA_MEM_ERROR, "out of memory" ); |
580 | return -1; |
581 | } |
582 | if (ret == LZMA_DATA_ERROR) { |
583 | xz_error(state, LZMA_DATA_ERROR, "compressed data error" ); |
584 | return -1; |
585 | } |
586 | if (ret == LZMA_PROG_ERROR) { |
587 | xz_error(state, LZMA_PROG_ERROR, "compression error" ); |
588 | return -1; |
589 | } |
590 | } while (strm->avail_out && ret != LZMA_STREAM_END); |
591 | |
592 | /* update available output and crc check value */ |
593 | state->have = had - strm->avail_out; |
594 | state->next = strm->next_out - state->have; |
595 | #ifdef LIBXML_ZLIB_ENABLED |
596 | state->zstrm.adler = |
597 | crc32(state->zstrm.adler, state->next, state->have); |
598 | #endif |
599 | |
600 | if (ret == LZMA_STREAM_END) { |
601 | #ifdef LIBXML_ZLIB_ENABLED |
602 | if (state->how == GZIP) { |
603 | if (gz_next4(state, &crc) == -1 || gz_next4(state, &len) == -1) { |
604 | xz_error(state, LZMA_DATA_ERROR, "unexpected end of file" ); |
605 | return -1; |
606 | } |
607 | if (crc != state->zstrm.adler) { |
608 | xz_error(state, LZMA_DATA_ERROR, "incorrect data check" ); |
609 | return -1; |
610 | } |
611 | if (len != (state->zstrm.total_out & 0xffffffffL)) { |
612 | xz_error(state, LZMA_DATA_ERROR, "incorrect length check" ); |
613 | return -1; |
614 | } |
615 | state->strm.avail_in = 0; |
616 | state->strm.next_in = NULL; |
617 | state->strm.avail_out = 0; |
618 | state->strm.next_out = NULL; |
619 | } else |
620 | #endif |
621 | if (strm->avail_in != 0 || !state->eof) { |
622 | xz_error(state, LZMA_DATA_ERROR, "trailing garbage" ); |
623 | return -1; |
624 | } |
625 | state->how = LOOK; /* ready for next stream, once have is 0 (leave |
626 | * state->direct unchanged to remember how) */ |
627 | } |
628 | |
629 | /* good decompression */ |
630 | return 0; |
631 | } |
632 | |
633 | static int |
634 | xz_make(xz_statep state) |
635 | { |
636 | lzma_stream *strm = &(state->strm); |
637 | |
638 | if (state->how == LOOK) { /* look for lzma / gzip header */ |
639 | if (xz_head(state) == -1) |
640 | return -1; |
641 | if (state->have) /* got some data from xz_head() */ |
642 | return 0; |
643 | } |
644 | if (state->how == COPY) { /* straight copy */ |
645 | if (xz_load(state, state->out, state->size << 1, &(state->have)) == |
646 | -1) |
647 | return -1; |
648 | state->next = state->out; |
649 | } else if (state->how == LZMA || state->how == GZIP) { /* decompress */ |
650 | strm->avail_out = state->size << 1; |
651 | strm->next_out = state->out; |
652 | if (xz_decomp(state) == -1) |
653 | return -1; |
654 | } |
655 | return 0; |
656 | } |
657 | |
658 | static int |
659 | xz_skip(xz_statep state, uint64_t len) |
660 | { |
661 | unsigned n; |
662 | |
663 | /* skip over len bytes or reach end-of-file, whichever comes first */ |
664 | while (len) |
665 | /* skip over whatever is in output buffer */ |
666 | if (state->have) { |
667 | n = (uint64_t) state->have > len ? |
668 | (unsigned) len : state->have; |
669 | state->have -= n; |
670 | state->next += n; |
671 | state->pos += n; |
672 | len -= n; |
673 | } |
674 | |
675 | /* output buffer empty -- return if we're at the end of the input */ |
676 | else if (state->eof && state->strm.avail_in == 0) |
677 | break; |
678 | |
679 | /* need more data to skip -- load up output buffer */ |
680 | else { |
681 | /* get more output, looking for header if required */ |
682 | if (xz_make(state) == -1) |
683 | return -1; |
684 | } |
685 | return 0; |
686 | } |
687 | |
688 | int |
689 | __libxml2_xzread(xzFile file, void *buf, unsigned len) |
690 | { |
691 | unsigned got, n; |
692 | xz_statep state; |
693 | lzma_stream *strm; |
694 | |
695 | /* get internal structure */ |
696 | if (file == NULL) |
697 | return -1; |
698 | state = (xz_statep) file; |
699 | strm = &(state->strm); |
700 | |
701 | /* check that we're reading and that there's no error */ |
702 | if (state->err != LZMA_OK) |
703 | return -1; |
704 | |
705 | /* since an int is returned, make sure len fits in one, otherwise return |
706 | * with an error (this avoids the flaw in the interface) */ |
707 | if ((int) len < 0) { |
708 | xz_error(state, LZMA_BUF_ERROR, |
709 | "requested length does not fit in int" ); |
710 | return -1; |
711 | } |
712 | |
713 | /* if len is zero, avoid unnecessary operations */ |
714 | if (len == 0) |
715 | return 0; |
716 | |
717 | /* process a skip request */ |
718 | if (state->seek) { |
719 | state->seek = 0; |
720 | if (xz_skip(state, state->skip) == -1) |
721 | return -1; |
722 | } |
723 | |
724 | /* get len bytes to buf, or less than len if at the end */ |
725 | got = 0; |
726 | do { |
727 | /* first just try copying data from the output buffer */ |
728 | if (state->have) { |
729 | n = state->have > len ? len : state->have; |
730 | memcpy(buf, state->next, n); |
731 | state->next += n; |
732 | state->have -= n; |
733 | } |
734 | |
735 | /* output buffer empty -- return if we're at the end of the input */ |
736 | else if (state->eof && strm->avail_in == 0) |
737 | break; |
738 | |
739 | /* need output data -- for small len or new stream load up our output |
740 | * buffer */ |
741 | else if (state->how == LOOK || len < (state->size << 1)) { |
742 | /* get more output, looking for header if required */ |
743 | if (xz_make(state) == -1) |
744 | return -1; |
745 | continue; /* no progress yet -- go back to memcpy() above */ |
746 | /* the copy above assures that we will leave with space in the |
747 | * output buffer, allowing at least one gzungetc() to succeed */ |
748 | } |
749 | |
750 | /* large len -- read directly into user buffer */ |
751 | else if (state->how == COPY) { /* read directly */ |
752 | if (xz_load(state, buf, len, &n) == -1) |
753 | return -1; |
754 | } |
755 | |
756 | /* large len -- decompress directly into user buffer */ |
757 | else { /* state->how == LZMA */ |
758 | strm->avail_out = len; |
759 | strm->next_out = buf; |
760 | if (xz_decomp(state) == -1) |
761 | return -1; |
762 | n = state->have; |
763 | state->have = 0; |
764 | } |
765 | |
766 | /* update progress */ |
767 | len -= n; |
768 | buf = (char *) buf + n; |
769 | got += n; |
770 | state->pos += n; |
771 | } while (len); |
772 | |
773 | /* return number of bytes read into user buffer (will fit in int) */ |
774 | return (int) got; |
775 | } |
776 | |
777 | int |
778 | __libxml2_xzclose(xzFile file) |
779 | { |
780 | int ret; |
781 | xz_statep state; |
782 | |
783 | /* get internal structure */ |
784 | if (file == NULL) |
785 | return LZMA_DATA_ERROR; |
786 | state = (xz_statep) file; |
787 | |
788 | /* free memory and close file */ |
789 | if (state->size) { |
790 | lzma_end(&(state->strm)); |
791 | #ifdef LIBXML_ZLIB_ENABLED |
792 | if (state->init == 1) |
793 | inflateEnd(&(state->zstrm)); |
794 | state->init = 0; |
795 | #endif |
796 | xmlFree(state->out); |
797 | xmlFree(state->in); |
798 | } |
799 | xmlFree(state->path); |
800 | if ((state->msg != NULL) && (state->err != LZMA_MEM_ERROR)) |
801 | xmlFree(state->msg); |
802 | ret = close(state->fd); |
803 | xmlFree(state); |
804 | return ret ? ret : LZMA_OK; |
805 | } |
806 | #endif /* LIBXML_LZMA_ENABLED */ |
807 | |