1#include "mupdf/fitz.h"
2#include "fitz-imp.h"
3
4#include <string.h>
5
6#include <zlib.h>
7
8#if !defined (INT32_MAX)
9#define INT32_MAX 2147483647L
10#endif
11
12#define ZIP_LOCAL_FILE_SIG 0x04034b50
13#define ZIP_DATA_DESC_SIG 0x08074b50
14#define ZIP_CENTRAL_DIRECTORY_SIG 0x02014b50
15#define ZIP_END_OF_CENTRAL_DIRECTORY_SIG 0x06054b50
16
17#define ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIG 0x07064b50
18#define ZIP64_END_OF_CENTRAL_DIRECTORY_SIG 0x06064b50
19#define ZIP64_EXTRA_FIELD_SIG 0x0001
20
21#define ZIP_ENCRYPTED_FLAG 0x1
22
23typedef struct zip_entry_s zip_entry;
24typedef struct fz_zip_archive_s fz_zip_archive;
25
26struct zip_entry_s
27{
28 char *name;
29 uint64_t offset, csize, usize;
30};
31
32struct fz_zip_archive_s
33{
34 fz_archive super;
35
36 uint64_t count;
37 zip_entry *entries;
38};
39
40static void drop_zip_archive(fz_context *ctx, fz_archive *arch)
41{
42 fz_zip_archive *zip = (fz_zip_archive *) arch;
43 int i;
44 for (i = 0; i < zip->count; ++i)
45 fz_free(ctx, zip->entries[i].name);
46 fz_free(ctx, zip->entries);
47}
48
49static void read_zip_dir_imp(fz_context *ctx, fz_zip_archive *zip, int64_t start_offset)
50{
51 fz_stream *file = zip->super.file;
52 uint32_t sig;
53 int i;
54 int namesize, metasize, commentsize;
55 uint64_t count, offset;
56 uint64_t csize, usize;
57 char *name = NULL;
58 size_t n;
59
60 fz_var(name);
61
62 zip->count = 0;
63
64 fz_seek(ctx, file, start_offset, 0);
65
66 sig = fz_read_uint32_le(ctx, file);
67 if (sig != ZIP_END_OF_CENTRAL_DIRECTORY_SIG)
68 fz_throw(ctx, FZ_ERROR_GENERIC, "wrong zip end of central directory signature (0x%x)", sig);
69
70 (void) fz_read_uint16_le(ctx, file); /* this disk */
71 (void) fz_read_uint16_le(ctx, file); /* start disk */
72 (void) fz_read_uint16_le(ctx, file); /* entries in this disk */
73 count = fz_read_uint16_le(ctx, file); /* entries in central directory disk */
74 (void) fz_read_uint32_le(ctx, file); /* size of central directory */
75 offset = fz_read_uint32_le(ctx, file); /* offset to central directory */
76
77 /* ZIP64 */
78 if (count == 0xFFFF || offset == 0xFFFFFFFF)
79 {
80 int64_t offset64, count64;
81
82 fz_seek(ctx, file, start_offset - 20, 0);
83
84 sig = fz_read_uint32_le(ctx, file);
85 if (sig != ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIG)
86 fz_throw(ctx, FZ_ERROR_GENERIC, "wrong zip64 end of central directory locator signature (0x%x)", sig);
87
88 (void) fz_read_uint32_le(ctx, file); /* start disk */
89 offset64 = fz_read_uint64_le(ctx, file); /* offset to end of central directory record */
90
91 fz_seek(ctx, file, offset64, 0);
92
93 sig = fz_read_uint32_le(ctx, file);
94 if (sig != ZIP64_END_OF_CENTRAL_DIRECTORY_SIG)
95 fz_throw(ctx, FZ_ERROR_GENERIC, "wrong zip64 end of central directory signature (0x%x)", sig);
96
97 (void) fz_read_uint64_le(ctx, file); /* size of record */
98 (void) fz_read_uint16_le(ctx, file); /* version made by */
99 (void) fz_read_uint16_le(ctx, file); /* version to extract */
100 (void) fz_read_uint32_le(ctx, file); /* disk number */
101 (void) fz_read_uint32_le(ctx, file); /* disk number start */
102 count64 = fz_read_uint64_le(ctx, file); /* entries in central directory disk */
103 (void) fz_read_uint64_le(ctx, file); /* entries in central directory */
104 (void) fz_read_uint64_le(ctx, file); /* size of central directory */
105 offset64 = fz_read_uint64_le(ctx, file); /* offset to central directory */
106
107 if (count == 0xFFFF)
108 {
109 count = count64;
110 }
111 if (offset == 0xFFFFFFFF)
112 {
113 offset = offset64;
114 }
115 }
116
117 fz_seek(ctx, file, offset, 0);
118
119 fz_try(ctx)
120 {
121 for (i = 0; i < count; i++)
122 {
123 sig = fz_read_uint32_le(ctx, file);
124 if (sig != ZIP_CENTRAL_DIRECTORY_SIG)
125 fz_throw(ctx, FZ_ERROR_GENERIC, "wrong zip central directory signature (0x%x)", sig);
126
127 (void) fz_read_uint16_le(ctx, file); /* version made by */
128 (void) fz_read_uint16_le(ctx, file); /* version to extract */
129 (void) fz_read_uint16_le(ctx, file); /* general */
130 (void) fz_read_uint16_le(ctx, file); /* method */
131 (void) fz_read_uint16_le(ctx, file); /* last mod file time */
132 (void) fz_read_uint16_le(ctx, file); /* last mod file date */
133 (void) fz_read_uint32_le(ctx, file); /* crc-32 */
134 csize = fz_read_uint32_le(ctx, file);
135 usize = fz_read_uint32_le(ctx, file);
136 namesize = fz_read_uint16_le(ctx, file);
137 metasize = fz_read_uint16_le(ctx, file);
138 commentsize = fz_read_uint16_le(ctx, file);
139 (void) fz_read_uint16_le(ctx, file); /* disk number start */
140 (void) fz_read_uint16_le(ctx, file); /* int file atts */
141 (void) fz_read_uint32_le(ctx, file); /* ext file atts */
142 offset = fz_read_uint32_le(ctx, file);
143
144 if (namesize < 0 || metasize < 0 || commentsize < 0)
145 fz_throw(ctx, FZ_ERROR_GENERIC, "invalid size in zip entry");
146
147 name = fz_malloc(ctx, namesize + 1);
148
149 n = fz_read(ctx, file, (unsigned char*)name, namesize);
150 if (n < (size_t)namesize)
151 fz_throw(ctx, FZ_ERROR_GENERIC, "premature end of data in zip entry name");
152 name[namesize] = '\0';
153
154 while (metasize > 0)
155 {
156 int type = fz_read_uint16_le(ctx, file);
157 int size = fz_read_uint16_le(ctx, file);
158
159 if (type == ZIP64_EXTRA_FIELD_SIG)
160 {
161 int sizeleft = size;
162 if (usize == 0xFFFFFFFF && sizeleft >= 8)
163 {
164 usize = fz_read_uint64_le(ctx, file);
165 sizeleft -= 8;
166 }
167 if (csize == 0xFFFFFFFF && sizeleft >= 8)
168 {
169 csize = fz_read_uint64_le(ctx, file);
170 sizeleft -= 8;
171 }
172 if (offset == 0xFFFFFFFF && sizeleft >= 8)
173 {
174 offset = fz_read_uint64_le(ctx, file);
175 sizeleft -= 8;
176 }
177 fz_seek(ctx, file, sizeleft - size, 1);
178 }
179 fz_seek(ctx, file, size, 1);
180 metasize -= 4 + size;
181 }
182
183 if (usize > INT32_MAX || csize > INT32_MAX)
184 fz_throw(ctx, FZ_ERROR_GENERIC, "zip archive entry larger than 2 GB");
185
186 fz_seek(ctx, file, commentsize, 1);
187
188 zip->entries = fz_realloc_array(ctx, zip->entries, zip->count + 1, zip_entry);
189
190 zip->entries[zip->count].offset = offset;
191 zip->entries[zip->count].csize = csize;
192 zip->entries[zip->count].usize = usize;
193 zip->entries[zip->count].name = name;
194 name = NULL;
195
196 zip->count++;
197 }
198 }
199 fz_always(ctx)
200 fz_free(ctx, name);
201 fz_catch(ctx)
202 fz_rethrow(ctx);
203}
204
205static int read_zip_entry_header(fz_context *ctx, fz_zip_archive *zip, zip_entry *ent)
206{
207 fz_stream *file = zip->super.file;
208 uint32_t sig;
209 int general, method, namelength, extralength;
210
211 fz_seek(ctx, file, ent->offset, 0);
212
213 sig = fz_read_uint32_le(ctx, file);
214 if (sig != ZIP_LOCAL_FILE_SIG)
215 fz_throw(ctx, FZ_ERROR_GENERIC, "wrong zip local file signature (0x%x)", sig);
216
217 (void) fz_read_uint16_le(ctx, file); /* version */
218 general = fz_read_uint16_le(ctx, file); /* general */
219 if (general & ZIP_ENCRYPTED_FLAG)
220 fz_throw(ctx, FZ_ERROR_GENERIC, "zip content is encrypted");
221
222 method = fz_read_uint16_le(ctx, file);
223 (void) fz_read_uint16_le(ctx, file); /* file time */
224 (void) fz_read_uint16_le(ctx, file); /* file date */
225 (void) fz_read_uint32_le(ctx, file); /* crc-32 */
226 (void) fz_read_uint32_le(ctx, file); /* csize */
227 (void) fz_read_uint32_le(ctx, file); /* usize */
228 namelength = fz_read_uint16_le(ctx, file);
229 extralength = fz_read_uint16_le(ctx, file);
230
231 fz_seek(ctx, file, namelength + extralength, 1);
232
233 return method;
234}
235
236static void ensure_zip_entries(fz_context *ctx, fz_zip_archive *zip)
237{
238 fz_stream *file = zip->super.file;
239 unsigned char buf[512];
240 size_t size, back, maxback;
241 size_t i, n;
242
243 fz_seek(ctx, file, 0, SEEK_END);
244 size = fz_tell(ctx, file);
245
246 maxback = fz_minz(size, 0xFFFF + sizeof buf);
247 back = fz_minz(maxback, sizeof buf);
248
249 while (back <= maxback)
250 {
251 fz_seek(ctx, file, (int64_t)(size - back), 0);
252 n = fz_read(ctx, file, buf, sizeof buf);
253 if (n < 4)
254 break;
255 for (i = n - 4; i > 0; i--)
256 if (!memcmp(buf + i, "PK\5\6", 4))
257 {
258 read_zip_dir_imp(ctx, zip, size - back + i);
259 return;
260 }
261 back += sizeof buf - 4;
262 }
263
264 fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find end of central directory");
265}
266
267static zip_entry *lookup_zip_entry(fz_context *ctx, fz_zip_archive *zip, const char *name)
268{
269 int i;
270 if (name[0] == '/')
271 ++name;
272 for (i = 0; i < zip->count; i++)
273 if (!fz_strcasecmp(name, zip->entries[i].name))
274 return &zip->entries[i];
275 return NULL;
276}
277
278static fz_stream *open_zip_entry(fz_context *ctx, fz_archive *arch, const char *name)
279{
280 fz_zip_archive *zip = (fz_zip_archive *) arch;
281 fz_stream *file = zip->super.file;
282 int method;
283 zip_entry *ent;
284
285 ent = lookup_zip_entry(ctx, zip, name);
286 if (!ent)
287 fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find named zip archive entry");
288
289 method = read_zip_entry_header(ctx, zip, ent);
290 if (method == 0)
291 return fz_open_null_filter(ctx, file, ent->usize, fz_tell(ctx, file));
292 if (method == 8)
293 return fz_open_flated(ctx, file, -15);
294 fz_throw(ctx, FZ_ERROR_GENERIC, "unknown zip method: %d", method);
295}
296
297static fz_buffer *read_zip_entry(fz_context *ctx, fz_archive *arch, const char *name)
298{
299 fz_zip_archive *zip = (fz_zip_archive *) arch;
300 fz_stream *file = zip->super.file;
301 fz_buffer *ubuf;
302 unsigned char *cbuf;
303 int method;
304 z_stream z;
305 int code;
306 int len;
307 zip_entry *ent;
308
309 fz_var(cbuf);
310
311 ent = lookup_zip_entry(ctx, zip, name);
312 if (!ent)
313 fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find named zip archive entry");
314
315 method = read_zip_entry_header(ctx, zip, ent);
316 ubuf = fz_new_buffer(ctx, ent->usize + 1); /* +1 because many callers will add a terminating zero */
317
318 if (method == 0)
319 {
320 fz_try(ctx)
321 {
322 ubuf->len = fz_read(ctx, file, ubuf->data, ent->usize);
323 if (ubuf->len < (size_t)ent->usize)
324 fz_warn(ctx, "premature end of data in stored zip archive entry");
325 }
326 fz_catch(ctx)
327 {
328 fz_drop_buffer(ctx, ubuf);
329 fz_rethrow(ctx);
330 }
331 return ubuf;
332 }
333 else if (method == 8)
334 {
335 fz_try(ctx)
336 {
337 cbuf = fz_malloc(ctx, ent->csize);
338
339 fz_read(ctx, file, cbuf, ent->csize);
340
341 z.zalloc = fz_zlib_alloc;
342 z.zfree = fz_zlib_free;
343 z.opaque = ctx;
344 z.next_in = cbuf;
345 z.avail_in = ent->csize;
346 z.next_out = ubuf->data;
347 z.avail_out = ent->usize;
348
349 code = inflateInit2(&z, -15);
350 if (code != Z_OK)
351 {
352 fz_throw(ctx, FZ_ERROR_GENERIC, "zlib inflateInit2 error: %s", z.msg);
353 }
354 code = inflate(&z, Z_FINISH);
355 if (code != Z_STREAM_END)
356 {
357 inflateEnd(&z);
358 fz_throw(ctx, FZ_ERROR_GENERIC, "zlib inflate error: %s", z.msg);
359 }
360 code = inflateEnd(&z);
361 if (code != Z_OK)
362 {
363 fz_throw(ctx, FZ_ERROR_GENERIC, "zlib inflateEnd error: %s", z.msg);
364 }
365
366 len = ent->usize - z.avail_out;
367 if (len < ent->usize)
368 fz_warn(ctx, "premature end of data in compressed archive entry");
369 ubuf->len = len;
370 }
371 fz_always(ctx)
372 {
373 fz_free(ctx, cbuf);
374 }
375 fz_catch(ctx)
376 {
377 fz_drop_buffer(ctx, ubuf);
378 fz_rethrow(ctx);
379 }
380 return ubuf;
381 }
382
383 fz_drop_buffer(ctx, ubuf);
384 fz_throw(ctx, FZ_ERROR_GENERIC, "unknown zip method: %d", method);
385}
386
387static int has_zip_entry(fz_context *ctx, fz_archive *arch, const char *name)
388{
389 fz_zip_archive *zip = (fz_zip_archive *) arch;
390 zip_entry *ent = lookup_zip_entry(ctx, zip, name);
391 return ent != NULL;
392}
393
394static const char *list_zip_entry(fz_context *ctx, fz_archive *arch, int idx)
395{
396 fz_zip_archive *zip = (fz_zip_archive *) arch;
397 if (idx < 0 || idx >= zip->count)
398 return NULL;
399 return zip->entries[idx].name;
400}
401
402static int count_zip_entries(fz_context *ctx, fz_archive *arch)
403{
404 fz_zip_archive *zip = (fz_zip_archive *) arch;
405 return zip->count;
406}
407
408/*
409 Detect if stream object is a zip archive.
410
411 Assumes that the stream object is seekable.
412*/
413int
414fz_is_zip_archive(fz_context *ctx, fz_stream *file)
415{
416 const unsigned char signature[4] = { 'P', 'K', 0x03, 0x04 };
417 unsigned char data[4];
418 size_t n;
419
420 fz_seek(ctx, file, 0, 0);
421 n = fz_read(ctx, file, data, nelem(data));
422 if (n != nelem(signature))
423 return 0;
424 if (memcmp(data, signature, nelem(signature)))
425 return 0;
426
427 return 1;
428}
429
430/*
431 Open a zip archive stream.
432
433 Open an archive using a seekable stream object rather than
434 opening a file or directory on disk.
435
436 An exception is throw if the stream is not a zip archive as
437 indicated by the presence of a zip signature.
438
439*/
440fz_archive *
441fz_open_zip_archive_with_stream(fz_context *ctx, fz_stream *file)
442{
443 fz_zip_archive *zip;
444
445 if (!fz_is_zip_archive(ctx, file))
446 fz_throw(ctx, FZ_ERROR_GENERIC, "cannot recognize zip archive");
447
448 zip = fz_new_derived_archive(ctx, file, fz_zip_archive);
449 zip->super.format = "zip";
450 zip->super.count_entries = count_zip_entries;
451 zip->super.list_entry = list_zip_entry;
452 zip->super.has_entry = has_zip_entry;
453 zip->super.read_entry = read_zip_entry;
454 zip->super.open_entry = open_zip_entry;
455 zip->super.drop_archive = drop_zip_archive;
456
457 fz_try(ctx)
458 {
459 ensure_zip_entries(ctx, zip);
460 }
461 fz_catch(ctx)
462 {
463 fz_drop_archive(ctx, &zip->super);
464 fz_rethrow(ctx);
465 }
466
467 return &zip->super;
468}
469
470/*
471 Open a zip archive file.
472
473 An exception is throw if the file is not a zip archive as
474 indicated by the presence of a zip signature.
475
476 filename: a path to a zip archive file as it would be given to
477 open(2).
478*/
479fz_archive *
480fz_open_zip_archive(fz_context *ctx, const char *filename)
481{
482 fz_archive *zip = NULL;
483 fz_stream *file;
484
485 file = fz_open_file(ctx, filename);
486
487 fz_var(zip);
488
489 fz_try(ctx)
490 zip = fz_open_zip_archive_with_stream(ctx, file);
491 fz_always(ctx)
492 fz_drop_stream(ctx, file);
493 fz_catch(ctx)
494 fz_rethrow(ctx);
495
496 return zip;
497}
498