| 1 | #include "mupdf/fitz.h" |
| 2 | #include "fitz-imp.h" |
| 3 | |
| 4 | #include <string.h> |
| 5 | |
| 6 | #include <zlib.h> |
| 7 | |
| 8 | #if !defined (INT32_MAX) |
| 9 | #define INT32_MAX 2147483647L |
| 10 | #endif |
| 11 | |
| 12 | #define ZIP_LOCAL_FILE_SIG 0x04034b50 |
| 13 | #define ZIP_DATA_DESC_SIG 0x08074b50 |
| 14 | #define ZIP_CENTRAL_DIRECTORY_SIG 0x02014b50 |
| 15 | #define ZIP_END_OF_CENTRAL_DIRECTORY_SIG 0x06054b50 |
| 16 | |
| 17 | #define ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIG 0x07064b50 |
| 18 | #define ZIP64_END_OF_CENTRAL_DIRECTORY_SIG 0x06064b50 |
| 19 | #define 0x0001 |
| 20 | |
| 21 | #define ZIP_ENCRYPTED_FLAG 0x1 |
| 22 | |
| 23 | typedef struct zip_entry_s zip_entry; |
| 24 | typedef struct fz_zip_archive_s fz_zip_archive; |
| 25 | |
| 26 | struct zip_entry_s |
| 27 | { |
| 28 | char *name; |
| 29 | uint64_t offset, csize, usize; |
| 30 | }; |
| 31 | |
| 32 | struct fz_zip_archive_s |
| 33 | { |
| 34 | fz_archive super; |
| 35 | |
| 36 | uint64_t count; |
| 37 | zip_entry *entries; |
| 38 | }; |
| 39 | |
| 40 | static void drop_zip_archive(fz_context *ctx, fz_archive *arch) |
| 41 | { |
| 42 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
| 43 | int i; |
| 44 | for (i = 0; i < zip->count; ++i) |
| 45 | fz_free(ctx, zip->entries[i].name); |
| 46 | fz_free(ctx, zip->entries); |
| 47 | } |
| 48 | |
| 49 | static void read_zip_dir_imp(fz_context *ctx, fz_zip_archive *zip, int64_t start_offset) |
| 50 | { |
| 51 | fz_stream *file = zip->super.file; |
| 52 | uint32_t sig; |
| 53 | int i; |
| 54 | int namesize, metasize, ; |
| 55 | uint64_t count, offset; |
| 56 | uint64_t csize, usize; |
| 57 | char *name = NULL; |
| 58 | size_t n; |
| 59 | |
| 60 | fz_var(name); |
| 61 | |
| 62 | zip->count = 0; |
| 63 | |
| 64 | fz_seek(ctx, file, start_offset, 0); |
| 65 | |
| 66 | sig = fz_read_uint32_le(ctx, file); |
| 67 | if (sig != ZIP_END_OF_CENTRAL_DIRECTORY_SIG) |
| 68 | fz_throw(ctx, FZ_ERROR_GENERIC, "wrong zip end of central directory signature (0x%x)" , sig); |
| 69 | |
| 70 | (void) fz_read_uint16_le(ctx, file); /* this disk */ |
| 71 | (void) fz_read_uint16_le(ctx, file); /* start disk */ |
| 72 | (void) fz_read_uint16_le(ctx, file); /* entries in this disk */ |
| 73 | count = fz_read_uint16_le(ctx, file); /* entries in central directory disk */ |
| 74 | (void) fz_read_uint32_le(ctx, file); /* size of central directory */ |
| 75 | offset = fz_read_uint32_le(ctx, file); /* offset to central directory */ |
| 76 | |
| 77 | /* ZIP64 */ |
| 78 | if (count == 0xFFFF || offset == 0xFFFFFFFF) |
| 79 | { |
| 80 | int64_t offset64, count64; |
| 81 | |
| 82 | fz_seek(ctx, file, start_offset - 20, 0); |
| 83 | |
| 84 | sig = fz_read_uint32_le(ctx, file); |
| 85 | if (sig != ZIP64_END_OF_CENTRAL_DIRECTORY_LOCATOR_SIG) |
| 86 | fz_throw(ctx, FZ_ERROR_GENERIC, "wrong zip64 end of central directory locator signature (0x%x)" , sig); |
| 87 | |
| 88 | (void) fz_read_uint32_le(ctx, file); /* start disk */ |
| 89 | offset64 = fz_read_uint64_le(ctx, file); /* offset to end of central directory record */ |
| 90 | |
| 91 | fz_seek(ctx, file, offset64, 0); |
| 92 | |
| 93 | sig = fz_read_uint32_le(ctx, file); |
| 94 | if (sig != ZIP64_END_OF_CENTRAL_DIRECTORY_SIG) |
| 95 | fz_throw(ctx, FZ_ERROR_GENERIC, "wrong zip64 end of central directory signature (0x%x)" , sig); |
| 96 | |
| 97 | (void) fz_read_uint64_le(ctx, file); /* size of record */ |
| 98 | (void) fz_read_uint16_le(ctx, file); /* version made by */ |
| 99 | (void) fz_read_uint16_le(ctx, file); /* version to extract */ |
| 100 | (void) fz_read_uint32_le(ctx, file); /* disk number */ |
| 101 | (void) fz_read_uint32_le(ctx, file); /* disk number start */ |
| 102 | count64 = fz_read_uint64_le(ctx, file); /* entries in central directory disk */ |
| 103 | (void) fz_read_uint64_le(ctx, file); /* entries in central directory */ |
| 104 | (void) fz_read_uint64_le(ctx, file); /* size of central directory */ |
| 105 | offset64 = fz_read_uint64_le(ctx, file); /* offset to central directory */ |
| 106 | |
| 107 | if (count == 0xFFFF) |
| 108 | { |
| 109 | count = count64; |
| 110 | } |
| 111 | if (offset == 0xFFFFFFFF) |
| 112 | { |
| 113 | offset = offset64; |
| 114 | } |
| 115 | } |
| 116 | |
| 117 | fz_seek(ctx, file, offset, 0); |
| 118 | |
| 119 | fz_try(ctx) |
| 120 | { |
| 121 | for (i = 0; i < count; i++) |
| 122 | { |
| 123 | sig = fz_read_uint32_le(ctx, file); |
| 124 | if (sig != ZIP_CENTRAL_DIRECTORY_SIG) |
| 125 | fz_throw(ctx, FZ_ERROR_GENERIC, "wrong zip central directory signature (0x%x)" , sig); |
| 126 | |
| 127 | (void) fz_read_uint16_le(ctx, file); /* version made by */ |
| 128 | (void) fz_read_uint16_le(ctx, file); /* version to extract */ |
| 129 | (void) fz_read_uint16_le(ctx, file); /* general */ |
| 130 | (void) fz_read_uint16_le(ctx, file); /* method */ |
| 131 | (void) fz_read_uint16_le(ctx, file); /* last mod file time */ |
| 132 | (void) fz_read_uint16_le(ctx, file); /* last mod file date */ |
| 133 | (void) fz_read_uint32_le(ctx, file); /* crc-32 */ |
| 134 | csize = fz_read_uint32_le(ctx, file); |
| 135 | usize = fz_read_uint32_le(ctx, file); |
| 136 | namesize = fz_read_uint16_le(ctx, file); |
| 137 | metasize = fz_read_uint16_le(ctx, file); |
| 138 | commentsize = fz_read_uint16_le(ctx, file); |
| 139 | (void) fz_read_uint16_le(ctx, file); /* disk number start */ |
| 140 | (void) fz_read_uint16_le(ctx, file); /* int file atts */ |
| 141 | (void) fz_read_uint32_le(ctx, file); /* ext file atts */ |
| 142 | offset = fz_read_uint32_le(ctx, file); |
| 143 | |
| 144 | if (namesize < 0 || metasize < 0 || commentsize < 0) |
| 145 | fz_throw(ctx, FZ_ERROR_GENERIC, "invalid size in zip entry" ); |
| 146 | |
| 147 | name = fz_malloc(ctx, namesize + 1); |
| 148 | |
| 149 | n = fz_read(ctx, file, (unsigned char*)name, namesize); |
| 150 | if (n < (size_t)namesize) |
| 151 | fz_throw(ctx, FZ_ERROR_GENERIC, "premature end of data in zip entry name" ); |
| 152 | name[namesize] = '\0'; |
| 153 | |
| 154 | while (metasize > 0) |
| 155 | { |
| 156 | int type = fz_read_uint16_le(ctx, file); |
| 157 | int size = fz_read_uint16_le(ctx, file); |
| 158 | |
| 159 | if (type == ZIP64_EXTRA_FIELD_SIG) |
| 160 | { |
| 161 | int sizeleft = size; |
| 162 | if (usize == 0xFFFFFFFF && sizeleft >= 8) |
| 163 | { |
| 164 | usize = fz_read_uint64_le(ctx, file); |
| 165 | sizeleft -= 8; |
| 166 | } |
| 167 | if (csize == 0xFFFFFFFF && sizeleft >= 8) |
| 168 | { |
| 169 | csize = fz_read_uint64_le(ctx, file); |
| 170 | sizeleft -= 8; |
| 171 | } |
| 172 | if (offset == 0xFFFFFFFF && sizeleft >= 8) |
| 173 | { |
| 174 | offset = fz_read_uint64_le(ctx, file); |
| 175 | sizeleft -= 8; |
| 176 | } |
| 177 | fz_seek(ctx, file, sizeleft - size, 1); |
| 178 | } |
| 179 | fz_seek(ctx, file, size, 1); |
| 180 | metasize -= 4 + size; |
| 181 | } |
| 182 | |
| 183 | if (usize > INT32_MAX || csize > INT32_MAX) |
| 184 | fz_throw(ctx, FZ_ERROR_GENERIC, "zip archive entry larger than 2 GB" ); |
| 185 | |
| 186 | fz_seek(ctx, file, commentsize, 1); |
| 187 | |
| 188 | zip->entries = fz_realloc_array(ctx, zip->entries, zip->count + 1, zip_entry); |
| 189 | |
| 190 | zip->entries[zip->count].offset = offset; |
| 191 | zip->entries[zip->count].csize = csize; |
| 192 | zip->entries[zip->count].usize = usize; |
| 193 | zip->entries[zip->count].name = name; |
| 194 | name = NULL; |
| 195 | |
| 196 | zip->count++; |
| 197 | } |
| 198 | } |
| 199 | fz_always(ctx) |
| 200 | fz_free(ctx, name); |
| 201 | fz_catch(ctx) |
| 202 | fz_rethrow(ctx); |
| 203 | } |
| 204 | |
| 205 | static int (fz_context *ctx, fz_zip_archive *zip, zip_entry *ent) |
| 206 | { |
| 207 | fz_stream *file = zip->super.file; |
| 208 | uint32_t sig; |
| 209 | int general, method, namelength, ; |
| 210 | |
| 211 | fz_seek(ctx, file, ent->offset, 0); |
| 212 | |
| 213 | sig = fz_read_uint32_le(ctx, file); |
| 214 | if (sig != ZIP_LOCAL_FILE_SIG) |
| 215 | fz_throw(ctx, FZ_ERROR_GENERIC, "wrong zip local file signature (0x%x)" , sig); |
| 216 | |
| 217 | (void) fz_read_uint16_le(ctx, file); /* version */ |
| 218 | general = fz_read_uint16_le(ctx, file); /* general */ |
| 219 | if (general & ZIP_ENCRYPTED_FLAG) |
| 220 | fz_throw(ctx, FZ_ERROR_GENERIC, "zip content is encrypted" ); |
| 221 | |
| 222 | method = fz_read_uint16_le(ctx, file); |
| 223 | (void) fz_read_uint16_le(ctx, file); /* file time */ |
| 224 | (void) fz_read_uint16_le(ctx, file); /* file date */ |
| 225 | (void) fz_read_uint32_le(ctx, file); /* crc-32 */ |
| 226 | (void) fz_read_uint32_le(ctx, file); /* csize */ |
| 227 | (void) fz_read_uint32_le(ctx, file); /* usize */ |
| 228 | namelength = fz_read_uint16_le(ctx, file); |
| 229 | extralength = fz_read_uint16_le(ctx, file); |
| 230 | |
| 231 | fz_seek(ctx, file, namelength + extralength, 1); |
| 232 | |
| 233 | return method; |
| 234 | } |
| 235 | |
| 236 | static void ensure_zip_entries(fz_context *ctx, fz_zip_archive *zip) |
| 237 | { |
| 238 | fz_stream *file = zip->super.file; |
| 239 | unsigned char buf[512]; |
| 240 | size_t size, back, maxback; |
| 241 | size_t i, n; |
| 242 | |
| 243 | fz_seek(ctx, file, 0, SEEK_END); |
| 244 | size = fz_tell(ctx, file); |
| 245 | |
| 246 | maxback = fz_minz(size, 0xFFFF + sizeof buf); |
| 247 | back = fz_minz(maxback, sizeof buf); |
| 248 | |
| 249 | while (back <= maxback) |
| 250 | { |
| 251 | fz_seek(ctx, file, (int64_t)(size - back), 0); |
| 252 | n = fz_read(ctx, file, buf, sizeof buf); |
| 253 | if (n < 4) |
| 254 | break; |
| 255 | for (i = n - 4; i > 0; i--) |
| 256 | if (!memcmp(buf + i, "PK\5\6" , 4)) |
| 257 | { |
| 258 | read_zip_dir_imp(ctx, zip, size - back + i); |
| 259 | return; |
| 260 | } |
| 261 | back += sizeof buf - 4; |
| 262 | } |
| 263 | |
| 264 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find end of central directory" ); |
| 265 | } |
| 266 | |
| 267 | static zip_entry *lookup_zip_entry(fz_context *ctx, fz_zip_archive *zip, const char *name) |
| 268 | { |
| 269 | int i; |
| 270 | if (name[0] == '/') |
| 271 | ++name; |
| 272 | for (i = 0; i < zip->count; i++) |
| 273 | if (!fz_strcasecmp(name, zip->entries[i].name)) |
| 274 | return &zip->entries[i]; |
| 275 | return NULL; |
| 276 | } |
| 277 | |
| 278 | static fz_stream *open_zip_entry(fz_context *ctx, fz_archive *arch, const char *name) |
| 279 | { |
| 280 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
| 281 | fz_stream *file = zip->super.file; |
| 282 | int method; |
| 283 | zip_entry *ent; |
| 284 | |
| 285 | ent = lookup_zip_entry(ctx, zip, name); |
| 286 | if (!ent) |
| 287 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find named zip archive entry" ); |
| 288 | |
| 289 | method = read_zip_entry_header(ctx, zip, ent); |
| 290 | if (method == 0) |
| 291 | return fz_open_null_filter(ctx, file, ent->usize, fz_tell(ctx, file)); |
| 292 | if (method == 8) |
| 293 | return fz_open_flated(ctx, file, -15); |
| 294 | fz_throw(ctx, FZ_ERROR_GENERIC, "unknown zip method: %d" , method); |
| 295 | } |
| 296 | |
| 297 | static fz_buffer *read_zip_entry(fz_context *ctx, fz_archive *arch, const char *name) |
| 298 | { |
| 299 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
| 300 | fz_stream *file = zip->super.file; |
| 301 | fz_buffer *ubuf; |
| 302 | unsigned char *cbuf; |
| 303 | int method; |
| 304 | z_stream z; |
| 305 | int code; |
| 306 | int len; |
| 307 | zip_entry *ent; |
| 308 | |
| 309 | fz_var(cbuf); |
| 310 | |
| 311 | ent = lookup_zip_entry(ctx, zip, name); |
| 312 | if (!ent) |
| 313 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find named zip archive entry" ); |
| 314 | |
| 315 | method = read_zip_entry_header(ctx, zip, ent); |
| 316 | ubuf = fz_new_buffer(ctx, ent->usize + 1); /* +1 because many callers will add a terminating zero */ |
| 317 | |
| 318 | if (method == 0) |
| 319 | { |
| 320 | fz_try(ctx) |
| 321 | { |
| 322 | ubuf->len = fz_read(ctx, file, ubuf->data, ent->usize); |
| 323 | if (ubuf->len < (size_t)ent->usize) |
| 324 | fz_warn(ctx, "premature end of data in stored zip archive entry" ); |
| 325 | } |
| 326 | fz_catch(ctx) |
| 327 | { |
| 328 | fz_drop_buffer(ctx, ubuf); |
| 329 | fz_rethrow(ctx); |
| 330 | } |
| 331 | return ubuf; |
| 332 | } |
| 333 | else if (method == 8) |
| 334 | { |
| 335 | fz_try(ctx) |
| 336 | { |
| 337 | cbuf = fz_malloc(ctx, ent->csize); |
| 338 | |
| 339 | fz_read(ctx, file, cbuf, ent->csize); |
| 340 | |
| 341 | z.zalloc = fz_zlib_alloc; |
| 342 | z.zfree = fz_zlib_free; |
| 343 | z.opaque = ctx; |
| 344 | z.next_in = cbuf; |
| 345 | z.avail_in = ent->csize; |
| 346 | z.next_out = ubuf->data; |
| 347 | z.avail_out = ent->usize; |
| 348 | |
| 349 | code = inflateInit2(&z, -15); |
| 350 | if (code != Z_OK) |
| 351 | { |
| 352 | fz_throw(ctx, FZ_ERROR_GENERIC, "zlib inflateInit2 error: %s" , z.msg); |
| 353 | } |
| 354 | code = inflate(&z, Z_FINISH); |
| 355 | if (code != Z_STREAM_END) |
| 356 | { |
| 357 | inflateEnd(&z); |
| 358 | fz_throw(ctx, FZ_ERROR_GENERIC, "zlib inflate error: %s" , z.msg); |
| 359 | } |
| 360 | code = inflateEnd(&z); |
| 361 | if (code != Z_OK) |
| 362 | { |
| 363 | fz_throw(ctx, FZ_ERROR_GENERIC, "zlib inflateEnd error: %s" , z.msg); |
| 364 | } |
| 365 | |
| 366 | len = ent->usize - z.avail_out; |
| 367 | if (len < ent->usize) |
| 368 | fz_warn(ctx, "premature end of data in compressed archive entry" ); |
| 369 | ubuf->len = len; |
| 370 | } |
| 371 | fz_always(ctx) |
| 372 | { |
| 373 | fz_free(ctx, cbuf); |
| 374 | } |
| 375 | fz_catch(ctx) |
| 376 | { |
| 377 | fz_drop_buffer(ctx, ubuf); |
| 378 | fz_rethrow(ctx); |
| 379 | } |
| 380 | return ubuf; |
| 381 | } |
| 382 | |
| 383 | fz_drop_buffer(ctx, ubuf); |
| 384 | fz_throw(ctx, FZ_ERROR_GENERIC, "unknown zip method: %d" , method); |
| 385 | } |
| 386 | |
| 387 | static int has_zip_entry(fz_context *ctx, fz_archive *arch, const char *name) |
| 388 | { |
| 389 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
| 390 | zip_entry *ent = lookup_zip_entry(ctx, zip, name); |
| 391 | return ent != NULL; |
| 392 | } |
| 393 | |
| 394 | static const char *list_zip_entry(fz_context *ctx, fz_archive *arch, int idx) |
| 395 | { |
| 396 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
| 397 | if (idx < 0 || idx >= zip->count) |
| 398 | return NULL; |
| 399 | return zip->entries[idx].name; |
| 400 | } |
| 401 | |
| 402 | static int count_zip_entries(fz_context *ctx, fz_archive *arch) |
| 403 | { |
| 404 | fz_zip_archive *zip = (fz_zip_archive *) arch; |
| 405 | return zip->count; |
| 406 | } |
| 407 | |
| 408 | /* |
| 409 | Detect if stream object is a zip archive. |
| 410 | |
| 411 | Assumes that the stream object is seekable. |
| 412 | */ |
| 413 | int |
| 414 | fz_is_zip_archive(fz_context *ctx, fz_stream *file) |
| 415 | { |
| 416 | const unsigned char signature[4] = { 'P', 'K', 0x03, 0x04 }; |
| 417 | unsigned char data[4]; |
| 418 | size_t n; |
| 419 | |
| 420 | fz_seek(ctx, file, 0, 0); |
| 421 | n = fz_read(ctx, file, data, nelem(data)); |
| 422 | if (n != nelem(signature)) |
| 423 | return 0; |
| 424 | if (memcmp(data, signature, nelem(signature))) |
| 425 | return 0; |
| 426 | |
| 427 | return 1; |
| 428 | } |
| 429 | |
| 430 | /* |
| 431 | Open a zip archive stream. |
| 432 | |
| 433 | Open an archive using a seekable stream object rather than |
| 434 | opening a file or directory on disk. |
| 435 | |
| 436 | An exception is throw if the stream is not a zip archive as |
| 437 | indicated by the presence of a zip signature. |
| 438 | |
| 439 | */ |
| 440 | fz_archive * |
| 441 | fz_open_zip_archive_with_stream(fz_context *ctx, fz_stream *file) |
| 442 | { |
| 443 | fz_zip_archive *zip; |
| 444 | |
| 445 | if (!fz_is_zip_archive(ctx, file)) |
| 446 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot recognize zip archive" ); |
| 447 | |
| 448 | zip = fz_new_derived_archive(ctx, file, fz_zip_archive); |
| 449 | zip->super.format = "zip" ; |
| 450 | zip->super.count_entries = count_zip_entries; |
| 451 | zip->super.list_entry = list_zip_entry; |
| 452 | zip->super.has_entry = has_zip_entry; |
| 453 | zip->super.read_entry = read_zip_entry; |
| 454 | zip->super.open_entry = open_zip_entry; |
| 455 | zip->super.drop_archive = drop_zip_archive; |
| 456 | |
| 457 | fz_try(ctx) |
| 458 | { |
| 459 | ensure_zip_entries(ctx, zip); |
| 460 | } |
| 461 | fz_catch(ctx) |
| 462 | { |
| 463 | fz_drop_archive(ctx, &zip->super); |
| 464 | fz_rethrow(ctx); |
| 465 | } |
| 466 | |
| 467 | return &zip->super; |
| 468 | } |
| 469 | |
| 470 | /* |
| 471 | Open a zip archive file. |
| 472 | |
| 473 | An exception is throw if the file is not a zip archive as |
| 474 | indicated by the presence of a zip signature. |
| 475 | |
| 476 | filename: a path to a zip archive file as it would be given to |
| 477 | open(2). |
| 478 | */ |
| 479 | fz_archive * |
| 480 | fz_open_zip_archive(fz_context *ctx, const char *filename) |
| 481 | { |
| 482 | fz_archive *zip = NULL; |
| 483 | fz_stream *file; |
| 484 | |
| 485 | file = fz_open_file(ctx, filename); |
| 486 | |
| 487 | fz_var(zip); |
| 488 | |
| 489 | fz_try(ctx) |
| 490 | zip = fz_open_zip_archive_with_stream(ctx, file); |
| 491 | fz_always(ctx) |
| 492 | fz_drop_stream(ctx, file); |
| 493 | fz_catch(ctx) |
| 494 | fz_rethrow(ctx); |
| 495 | |
| 496 | return zip; |
| 497 | } |
| 498 | |