1 | #include "mupdf/fitz.h" |
2 | #include "fitz-imp.h" |
3 | |
4 | #include <string.h> |
5 | |
6 | #include <zlib.h> |
7 | |
8 | #if !defined (INT32_MAX) |
9 | #define INT32_MAX 2147483647L |
10 | #endif |
11 | |
12 | #define ZIP_LOCAL_FILE_SIG 0x04034b50 |
13 | #define ZIP_DATA_DESC_SIG 0x08074b50 |
14 | #define ZIP_CENTRAL_DIRECTORY_SIG 0x02014b50 |
15 | #define ZIP_END_OF_CENTRAL_DIRECTORY_SIG 0x06054b50 |
16 | |
17 | #define ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIG 0x07064b50 |
18 | #define ZIP64_END_OF_CENTRAL_DIRECTORY_SIG 0x06064b50 |
19 | #define 0x0001 |
20 | |
21 | #define ZIP_ENCRYPTED_FLAG 0x1 |
22 | |
23 | typedef struct zip_entry_s zip_entry; |
24 | typedef struct fz_zip_archive_s fz_zip_archive; |
25 | |
26 | struct zip_entry_s |
27 | { |
28 | char *name; |
29 | uint64_t offset, csize, usize; |
30 | }; |
31 | |
32 | struct fz_zip_archive_s |
33 | { |
34 | fz_archive super; |
35 | |
36 | uint64_t count; |
37 | zip_entry *entries; |
38 | }; |
39 | |
40 | static void drop_zip_archive(fz_context *ctx, fz_archive *arch) |
41 | { |
42 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
43 | int i; |
44 | for (i = 0; i < zip->count; ++i) |
45 | fz_free(ctx, zip->entries[i].name); |
46 | fz_free(ctx, zip->entries); |
47 | } |
48 | |
49 | static void read_zip_dir_imp(fz_context *ctx, fz_zip_archive *zip, int64_t start_offset) |
50 | { |
51 | fz_stream *file = zip->super.file; |
52 | uint32_t sig; |
53 | int i; |
54 | int namesize, metasize, ; |
55 | uint64_t count, offset; |
56 | uint64_t csize, usize; |
57 | char *name = NULL; |
58 | size_t n; |
59 | |
60 | fz_var(name); |
61 | |
62 | zip->count = 0; |
63 | |
64 | fz_seek(ctx, file, start_offset, 0); |
65 | |
66 | sig = fz_read_uint32_le(ctx, file); |
67 | if (sig != ZIP_END_OF_CENTRAL_DIRECTORY_SIG) |
68 | fz_throw(ctx, FZ_ERROR_GENERIC, "wrong zip end of central directory signature (0x%x)" , sig); |
69 | |
70 | (void) fz_read_uint16_le(ctx, file); /* this disk */ |
71 | (void) fz_read_uint16_le(ctx, file); /* start disk */ |
72 | (void) fz_read_uint16_le(ctx, file); /* entries in this disk */ |
73 | count = fz_read_uint16_le(ctx, file); /* entries in central directory disk */ |
74 | (void) fz_read_uint32_le(ctx, file); /* size of central directory */ |
75 | offset = fz_read_uint32_le(ctx, file); /* offset to central directory */ |
76 | |
77 | /* ZIP64 */ |
78 | if (count == 0xFFFF || offset == 0xFFFFFFFF) |
79 | { |
80 | int64_t offset64, count64; |
81 | |
82 | fz_seek(ctx, file, start_offset - 20, 0); |
83 | |
84 | sig = fz_read_uint32_le(ctx, file); |
85 | if (sig != ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIG) |
86 | fz_throw(ctx, FZ_ERROR_GENERIC, "wrong zip64 end of central directory locator signature (0x%x)" , sig); |
87 | |
88 | (void) fz_read_uint32_le(ctx, file); /* start disk */ |
89 | offset64 = fz_read_uint64_le(ctx, file); /* offset to end of central directory record */ |
90 | |
91 | fz_seek(ctx, file, offset64, 0); |
92 | |
93 | sig = fz_read_uint32_le(ctx, file); |
94 | if (sig != ZIP64_END_OF_CENTRAL_DIRECTORY_SIG) |
95 | fz_throw(ctx, FZ_ERROR_GENERIC, "wrong zip64 end of central directory signature (0x%x)" , sig); |
96 | |
97 | (void) fz_read_uint64_le(ctx, file); /* size of record */ |
98 | (void) fz_read_uint16_le(ctx, file); /* version made by */ |
99 | (void) fz_read_uint16_le(ctx, file); /* version to extract */ |
100 | (void) fz_read_uint32_le(ctx, file); /* disk number */ |
101 | (void) fz_read_uint32_le(ctx, file); /* disk number start */ |
102 | count64 = fz_read_uint64_le(ctx, file); /* entries in central directory disk */ |
103 | (void) fz_read_uint64_le(ctx, file); /* entries in central directory */ |
104 | (void) fz_read_uint64_le(ctx, file); /* size of central directory */ |
105 | offset64 = fz_read_uint64_le(ctx, file); /* offset to central directory */ |
106 | |
107 | if (count == 0xFFFF) |
108 | { |
109 | count = count64; |
110 | } |
111 | if (offset == 0xFFFFFFFF) |
112 | { |
113 | offset = offset64; |
114 | } |
115 | } |
116 | |
117 | fz_seek(ctx, file, offset, 0); |
118 | |
119 | fz_try(ctx) |
120 | { |
121 | for (i = 0; i < count; i++) |
122 | { |
123 | sig = fz_read_uint32_le(ctx, file); |
124 | if (sig != ZIP_CENTRAL_DIRECTORY_SIG) |
125 | fz_throw(ctx, FZ_ERROR_GENERIC, "wrong zip central directory signature (0x%x)" , sig); |
126 | |
127 | (void) fz_read_uint16_le(ctx, file); /* version made by */ |
128 | (void) fz_read_uint16_le(ctx, file); /* version to extract */ |
129 | (void) fz_read_uint16_le(ctx, file); /* general */ |
130 | (void) fz_read_uint16_le(ctx, file); /* method */ |
131 | (void) fz_read_uint16_le(ctx, file); /* last mod file time */ |
132 | (void) fz_read_uint16_le(ctx, file); /* last mod file date */ |
133 | (void) fz_read_uint32_le(ctx, file); /* crc-32 */ |
134 | csize = fz_read_uint32_le(ctx, file); |
135 | usize = fz_read_uint32_le(ctx, file); |
136 | namesize = fz_read_uint16_le(ctx, file); |
137 | metasize = fz_read_uint16_le(ctx, file); |
138 | commentsize = fz_read_uint16_le(ctx, file); |
139 | (void) fz_read_uint16_le(ctx, file); /* disk number start */ |
140 | (void) fz_read_uint16_le(ctx, file); /* int file atts */ |
141 | (void) fz_read_uint32_le(ctx, file); /* ext file atts */ |
142 | offset = fz_read_uint32_le(ctx, file); |
143 | |
144 | if (namesize < 0 || metasize < 0 || commentsize < 0) |
145 | fz_throw(ctx, FZ_ERROR_GENERIC, "invalid size in zip entry" ); |
146 | |
147 | name = fz_malloc(ctx, namesize + 1); |
148 | |
149 | n = fz_read(ctx, file, (unsigned char*)name, namesize); |
150 | if (n < (size_t)namesize) |
151 | fz_throw(ctx, FZ_ERROR_GENERIC, "premature end of data in zip entry name" ); |
152 | name[namesize] = '\0'; |
153 | |
154 | while (metasize > 0) |
155 | { |
156 | int type = fz_read_uint16_le(ctx, file); |
157 | int size = fz_read_uint16_le(ctx, file); |
158 | |
159 | if (type == ZIP64_EXTRA_FIELD_SIG) |
160 | { |
161 | int sizeleft = size; |
162 | if (usize == 0xFFFFFFFF && sizeleft >= 8) |
163 | { |
164 | usize = fz_read_uint64_le(ctx, file); |
165 | sizeleft -= 8; |
166 | } |
167 | if (csize == 0xFFFFFFFF && sizeleft >= 8) |
168 | { |
169 | csize = fz_read_uint64_le(ctx, file); |
170 | sizeleft -= 8; |
171 | } |
172 | if (offset == 0xFFFFFFFF && sizeleft >= 8) |
173 | { |
174 | offset = fz_read_uint64_le(ctx, file); |
175 | sizeleft -= 8; |
176 | } |
177 | fz_seek(ctx, file, sizeleft - size, 1); |
178 | } |
179 | fz_seek(ctx, file, size, 1); |
180 | metasize -= 4 + size; |
181 | } |
182 | |
183 | if (usize > INT32_MAX || csize > INT32_MAX) |
184 | fz_throw(ctx, FZ_ERROR_GENERIC, "zip archive entry larger than 2 GB" ); |
185 | |
186 | fz_seek(ctx, file, commentsize, 1); |
187 | |
188 | zip->entries = fz_realloc_array(ctx, zip->entries, zip->count + 1, zip_entry); |
189 | |
190 | zip->entries[zip->count].offset = offset; |
191 | zip->entries[zip->count].csize = csize; |
192 | zip->entries[zip->count].usize = usize; |
193 | zip->entries[zip->count].name = name; |
194 | name = NULL; |
195 | |
196 | zip->count++; |
197 | } |
198 | } |
199 | fz_always(ctx) |
200 | fz_free(ctx, name); |
201 | fz_catch(ctx) |
202 | fz_rethrow(ctx); |
203 | } |
204 | |
205 | static int (fz_context *ctx, fz_zip_archive *zip, zip_entry *ent) |
206 | { |
207 | fz_stream *file = zip->super.file; |
208 | uint32_t sig; |
209 | int general, method, namelength, ; |
210 | |
211 | fz_seek(ctx, file, ent->offset, 0); |
212 | |
213 | sig = fz_read_uint32_le(ctx, file); |
214 | if (sig != ZIP_LOCAL_FILE_SIG) |
215 | fz_throw(ctx, FZ_ERROR_GENERIC, "wrong zip local file signature (0x%x)" , sig); |
216 | |
217 | (void) fz_read_uint16_le(ctx, file); /* version */ |
218 | general = fz_read_uint16_le(ctx, file); /* general */ |
219 | if (general & ZIP_ENCRYPTED_FLAG) |
220 | fz_throw(ctx, FZ_ERROR_GENERIC, "zip content is encrypted" ); |
221 | |
222 | method = fz_read_uint16_le(ctx, file); |
223 | (void) fz_read_uint16_le(ctx, file); /* file time */ |
224 | (void) fz_read_uint16_le(ctx, file); /* file date */ |
225 | (void) fz_read_uint32_le(ctx, file); /* crc-32 */ |
226 | (void) fz_read_uint32_le(ctx, file); /* csize */ |
227 | (void) fz_read_uint32_le(ctx, file); /* usize */ |
228 | namelength = fz_read_uint16_le(ctx, file); |
229 | extralength = fz_read_uint16_le(ctx, file); |
230 | |
231 | fz_seek(ctx, file, namelength + extralength, 1); |
232 | |
233 | return method; |
234 | } |
235 | |
236 | static void ensure_zip_entries(fz_context *ctx, fz_zip_archive *zip) |
237 | { |
238 | fz_stream *file = zip->super.file; |
239 | unsigned char buf[512]; |
240 | size_t size, back, maxback; |
241 | size_t i, n; |
242 | |
243 | fz_seek(ctx, file, 0, SEEK_END); |
244 | size = fz_tell(ctx, file); |
245 | |
246 | maxback = fz_minz(size, 0xFFFF + sizeof buf); |
247 | back = fz_minz(maxback, sizeof buf); |
248 | |
249 | while (back <= maxback) |
250 | { |
251 | fz_seek(ctx, file, (int64_t)(size - back), 0); |
252 | n = fz_read(ctx, file, buf, sizeof buf); |
253 | if (n < 4) |
254 | break; |
255 | for (i = n - 4; i > 0; i--) |
256 | if (!memcmp(buf + i, "PK\5\6" , 4)) |
257 | { |
258 | read_zip_dir_imp(ctx, zip, size - back + i); |
259 | return; |
260 | } |
261 | back += sizeof buf - 4; |
262 | } |
263 | |
264 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find end of central directory" ); |
265 | } |
266 | |
267 | static zip_entry *lookup_zip_entry(fz_context *ctx, fz_zip_archive *zip, const char *name) |
268 | { |
269 | int i; |
270 | if (name[0] == '/') |
271 | ++name; |
272 | for (i = 0; i < zip->count; i++) |
273 | if (!fz_strcasecmp(name, zip->entries[i].name)) |
274 | return &zip->entries[i]; |
275 | return NULL; |
276 | } |
277 | |
278 | static fz_stream *open_zip_entry(fz_context *ctx, fz_archive *arch, const char *name) |
279 | { |
280 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
281 | fz_stream *file = zip->super.file; |
282 | int method; |
283 | zip_entry *ent; |
284 | |
285 | ent = lookup_zip_entry(ctx, zip, name); |
286 | if (!ent) |
287 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find named zip archive entry" ); |
288 | |
289 | method = read_zip_entry_header(ctx, zip, ent); |
290 | if (method == 0) |
291 | return fz_open_null_filter(ctx, file, ent->usize, fz_tell(ctx, file)); |
292 | if (method == 8) |
293 | return fz_open_flated(ctx, file, -15); |
294 | fz_throw(ctx, FZ_ERROR_GENERIC, "unknown zip method: %d" , method); |
295 | } |
296 | |
297 | static fz_buffer *read_zip_entry(fz_context *ctx, fz_archive *arch, const char *name) |
298 | { |
299 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
300 | fz_stream *file = zip->super.file; |
301 | fz_buffer *ubuf; |
302 | unsigned char *cbuf; |
303 | int method; |
304 | z_stream z; |
305 | int code; |
306 | int len; |
307 | zip_entry *ent; |
308 | |
309 | fz_var(cbuf); |
310 | |
311 | ent = lookup_zip_entry(ctx, zip, name); |
312 | if (!ent) |
313 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find named zip archive entry" ); |
314 | |
315 | method = read_zip_entry_header(ctx, zip, ent); |
316 | ubuf = fz_new_buffer(ctx, ent->usize + 1); /* +1 because many callers will add a terminating zero */ |
317 | |
318 | if (method == 0) |
319 | { |
320 | fz_try(ctx) |
321 | { |
322 | ubuf->len = fz_read(ctx, file, ubuf->data, ent->usize); |
323 | if (ubuf->len < (size_t)ent->usize) |
324 | fz_warn(ctx, "premature end of data in stored zip archive entry" ); |
325 | } |
326 | fz_catch(ctx) |
327 | { |
328 | fz_drop_buffer(ctx, ubuf); |
329 | fz_rethrow(ctx); |
330 | } |
331 | return ubuf; |
332 | } |
333 | else if (method == 8) |
334 | { |
335 | fz_try(ctx) |
336 | { |
337 | cbuf = fz_malloc(ctx, ent->csize); |
338 | |
339 | fz_read(ctx, file, cbuf, ent->csize); |
340 | |
341 | z.zalloc = fz_zlib_alloc; |
342 | z.zfree = fz_zlib_free; |
343 | z.opaque = ctx; |
344 | z.next_in = cbuf; |
345 | z.avail_in = ent->csize; |
346 | z.next_out = ubuf->data; |
347 | z.avail_out = ent->usize; |
348 | |
349 | code = inflateInit2(&z, -15); |
350 | if (code != Z_OK) |
351 | { |
352 | fz_throw(ctx, FZ_ERROR_GENERIC, "zlib inflateInit2 error: %s" , z.msg); |
353 | } |
354 | code = inflate(&z, Z_FINISH); |
355 | if (code != Z_STREAM_END) |
356 | { |
357 | inflateEnd(&z); |
358 | fz_throw(ctx, FZ_ERROR_GENERIC, "zlib inflate error: %s" , z.msg); |
359 | } |
360 | code = inflateEnd(&z); |
361 | if (code != Z_OK) |
362 | { |
363 | fz_throw(ctx, FZ_ERROR_GENERIC, "zlib inflateEnd error: %s" , z.msg); |
364 | } |
365 | |
366 | len = ent->usize - z.avail_out; |
367 | if (len < ent->usize) |
368 | fz_warn(ctx, "premature end of data in compressed archive entry" ); |
369 | ubuf->len = len; |
370 | } |
371 | fz_always(ctx) |
372 | { |
373 | fz_free(ctx, cbuf); |
374 | } |
375 | fz_catch(ctx) |
376 | { |
377 | fz_drop_buffer(ctx, ubuf); |
378 | fz_rethrow(ctx); |
379 | } |
380 | return ubuf; |
381 | } |
382 | |
383 | fz_drop_buffer(ctx, ubuf); |
384 | fz_throw(ctx, FZ_ERROR_GENERIC, "unknown zip method: %d" , method); |
385 | } |
386 | |
387 | static int has_zip_entry(fz_context *ctx, fz_archive *arch, const char *name) |
388 | { |
389 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
390 | zip_entry *ent = lookup_zip_entry(ctx, zip, name); |
391 | return ent != NULL; |
392 | } |
393 | |
394 | static const char *list_zip_entry(fz_context *ctx, fz_archive *arch, int idx) |
395 | { |
396 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
397 | if (idx < 0 || idx >= zip->count) |
398 | return NULL; |
399 | return zip->entries[idx].name; |
400 | } |
401 | |
402 | static int count_zip_entries(fz_context *ctx, fz_archive *arch) |
403 | { |
404 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
405 | return zip->count; |
406 | } |
407 | |
408 | /* |
409 | Detect if stream object is a zip archive. |
410 | |
411 | Assumes that the stream object is seekable. |
412 | */ |
413 | int |
414 | fz_is_zip_archive(fz_context *ctx, fz_stream *file) |
415 | { |
416 | const unsigned char signature[4] = { 'P', 'K', 0x03, 0x04 }; |
417 | unsigned char data[4]; |
418 | size_t n; |
419 | |
420 | fz_seek(ctx, file, 0, 0); |
421 | n = fz_read(ctx, file, data, nelem(data)); |
422 | if (n != nelem(signature)) |
423 | return 0; |
424 | if (memcmp(data, signature, nelem(signature))) |
425 | return 0; |
426 | |
427 | return 1; |
428 | } |
429 | |
430 | /* |
431 | Open a zip archive stream. |
432 | |
433 | Open an archive using a seekable stream object rather than |
434 | opening a file or directory on disk. |
435 | |
436 | An exception is throw if the stream is not a zip archive as |
437 | indicated by the presence of a zip signature. |
438 | |
439 | */ |
440 | fz_archive * |
441 | fz_open_zip_archive_with_stream(fz_context *ctx, fz_stream *file) |
442 | { |
443 | fz_zip_archive *zip; |
444 | |
445 | if (!fz_is_zip_archive(ctx, file)) |
446 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot recognize zip archive" ); |
447 | |
448 | zip = fz_new_derived_archive(ctx, file, fz_zip_archive); |
449 | zip->super.format = "zip" ; |
450 | zip->super.count_entries = count_zip_entries; |
451 | zip->super.list_entry = list_zip_entry; |
452 | zip->super.has_entry = has_zip_entry; |
453 | zip->super.read_entry = read_zip_entry; |
454 | zip->super.open_entry = open_zip_entry; |
455 | zip->super.drop_archive = drop_zip_archive; |
456 | |
457 | fz_try(ctx) |
458 | { |
459 | ensure_zip_entries(ctx, zip); |
460 | } |
461 | fz_catch(ctx) |
462 | { |
463 | fz_drop_archive(ctx, &zip->super); |
464 | fz_rethrow(ctx); |
465 | } |
466 | |
467 | return &zip->super; |
468 | } |
469 | |
470 | /* |
471 | Open a zip archive file. |
472 | |
473 | An exception is throw if the file is not a zip archive as |
474 | indicated by the presence of a zip signature. |
475 | |
476 | filename: a path to a zip archive file as it would be given to |
477 | open(2). |
478 | */ |
479 | fz_archive * |
480 | fz_open_zip_archive(fz_context *ctx, const char *filename) |
481 | { |
482 | fz_archive *zip = NULL; |
483 | fz_stream *file; |
484 | |
485 | file = fz_open_file(ctx, filename); |
486 | |
487 | fz_var(zip); |
488 | |
489 | fz_try(ctx) |
490 | zip = fz_open_zip_archive_with_stream(ctx, file); |
491 | fz_always(ctx) |
492 | fz_drop_stream(ctx, file); |
493 | fz_catch(ctx) |
494 | fz_rethrow(ctx); |
495 | |
496 | return zip; |
497 | } |
498 | |