1 | #include "mupdf/fitz.h" |
2 | #include "mupdf/pdf.h" |
3 | |
4 | #include <string.h> |
5 | |
6 | /* Scan file for objects and reconstruct xref table */ |
7 | |
8 | struct entry |
9 | { |
10 | int num; |
11 | int gen; |
12 | int64_t ofs; |
13 | int64_t stm_ofs; |
14 | int stm_len; |
15 | }; |
16 | |
17 | static void add_root(fz_context *ctx, pdf_obj *obj, pdf_obj ***roots, int *num_roots, int *max_roots) |
18 | { |
19 | if (*num_roots == *max_roots) |
20 | { |
21 | int new_max_roots = *max_roots * 2; |
22 | if (new_max_roots == 0) |
23 | new_max_roots = 4; |
24 | *roots = fz_realloc_array(ctx, *roots, new_max_roots, pdf_obj*); |
25 | *max_roots = new_max_roots; |
26 | } |
27 | (*roots)[(*num_roots)++] = pdf_keep_obj(ctx, obj); |
28 | } |
29 | |
30 | int |
31 | pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, int64_t *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, int64_t *tmpofs, pdf_obj **root) |
32 | { |
33 | fz_stream *file = doc->file; |
34 | pdf_token tok; |
35 | int stm_len; |
36 | |
37 | *stmofsp = 0; |
38 | if (stmlenp) |
39 | *stmlenp = -1; |
40 | |
41 | stm_len = 0; |
42 | |
43 | /* On entry to this function, we know that we've just seen |
44 | * '<int> <int> obj'. We expect the next thing we see to be a |
45 | * pdf object. Regardless of the type of thing we meet next |
46 | * we only need to fully parse it if it is a dictionary. */ |
47 | tok = pdf_lex(ctx, file, buf); |
48 | |
49 | if (tok == PDF_TOK_OPEN_DICT) |
50 | { |
51 | pdf_obj *obj, *dict = NULL; |
52 | |
53 | fz_try(ctx) |
54 | { |
55 | dict = pdf_parse_dict(ctx, doc, file, buf); |
56 | } |
57 | fz_catch(ctx) |
58 | { |
59 | fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); |
60 | /* Don't let a broken object at EOF overwrite a good one */ |
61 | if (file->eof) |
62 | fz_rethrow(ctx); |
63 | /* Silently swallow the error */ |
64 | dict = pdf_new_dict(ctx, NULL, 2); |
65 | } |
66 | |
67 | /* We must be careful not to try to resolve any indirections |
68 | * here. We have just read dict, so we know it to be a non |
69 | * indirected dictionary. Before we look at any values that |
70 | * we get back from looking up in it, we need to check they |
71 | * aren't indirected. */ |
72 | |
73 | if (encrypt || id || root) |
74 | { |
75 | obj = pdf_dict_get(ctx, dict, PDF_NAME(Type)); |
76 | if (!pdf_is_indirect(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME(XRef))) |
77 | { |
78 | if (encrypt) |
79 | { |
80 | obj = pdf_dict_get(ctx, dict, PDF_NAME(Encrypt)); |
81 | if (obj) |
82 | { |
83 | pdf_drop_obj(ctx, *encrypt); |
84 | *encrypt = pdf_keep_obj(ctx, obj); |
85 | } |
86 | } |
87 | |
88 | if (id) |
89 | { |
90 | obj = pdf_dict_get(ctx, dict, PDF_NAME(ID)); |
91 | if (obj) |
92 | { |
93 | pdf_drop_obj(ctx, *id); |
94 | *id = pdf_keep_obj(ctx, obj); |
95 | } |
96 | } |
97 | |
98 | if (root) |
99 | *root = pdf_keep_obj(ctx, pdf_dict_get(ctx, dict, PDF_NAME(Root))); |
100 | } |
101 | } |
102 | |
103 | obj = pdf_dict_get(ctx, dict, PDF_NAME(Length)); |
104 | if (!pdf_is_indirect(ctx, obj) && pdf_is_int(ctx, obj)) |
105 | stm_len = pdf_to_int(ctx, obj); |
106 | |
107 | if (doc->file_reading_linearly && page) |
108 | { |
109 | obj = pdf_dict_get(ctx, dict, PDF_NAME(Type)); |
110 | if (!pdf_is_indirect(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME(Page))) |
111 | { |
112 | pdf_drop_obj(ctx, *page); |
113 | *page = pdf_keep_obj(ctx, dict); |
114 | } |
115 | } |
116 | |
117 | pdf_drop_obj(ctx, dict); |
118 | } |
119 | |
120 | while ( tok != PDF_TOK_STREAM && |
121 | tok != PDF_TOK_ENDOBJ && |
122 | tok != PDF_TOK_ERROR && |
123 | tok != PDF_TOK_EOF && |
124 | tok != PDF_TOK_INT ) |
125 | { |
126 | *tmpofs = fz_tell(ctx, file); |
127 | if (*tmpofs < 0) |
128 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file" ); |
129 | tok = pdf_lex(ctx, file, buf); |
130 | } |
131 | |
132 | if (tok == PDF_TOK_STREAM) |
133 | { |
134 | int c = fz_read_byte(ctx, file); |
135 | if (c == '\r') { |
136 | c = fz_peek_byte(ctx, file); |
137 | if (c == '\n') |
138 | fz_read_byte(ctx, file); |
139 | } |
140 | |
141 | *stmofsp = fz_tell(ctx, file); |
142 | if (*stmofsp < 0) |
143 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot seek in file" ); |
144 | |
145 | if (stm_len > 0) |
146 | { |
147 | fz_seek(ctx, file, *stmofsp + stm_len, 0); |
148 | fz_try(ctx) |
149 | { |
150 | tok = pdf_lex(ctx, file, buf); |
151 | } |
152 | fz_catch(ctx) |
153 | { |
154 | fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); |
155 | fz_warn(ctx, "cannot find endstream token, falling back to scanning" ); |
156 | } |
157 | if (tok == PDF_TOK_ENDSTREAM) |
158 | goto atobjend; |
159 | fz_seek(ctx, file, *stmofsp, 0); |
160 | } |
161 | |
162 | (void)fz_read(ctx, file, (unsigned char *) buf->scratch, 9); |
163 | |
164 | while (memcmp(buf->scratch, "endstream" , 9) != 0) |
165 | { |
166 | c = fz_read_byte(ctx, file); |
167 | if (c == EOF) |
168 | break; |
169 | memmove(&buf->scratch[0], &buf->scratch[1], 8); |
170 | buf->scratch[8] = c; |
171 | } |
172 | |
173 | if (stmlenp) |
174 | *stmlenp = fz_tell(ctx, file) - *stmofsp - 9; |
175 | |
176 | atobjend: |
177 | *tmpofs = fz_tell(ctx, file); |
178 | if (*tmpofs < 0) |
179 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file" ); |
180 | tok = pdf_lex(ctx, file, buf); |
181 | if (tok != PDF_TOK_ENDOBJ) |
182 | fz_warn(ctx, "object missing 'endobj' token" ); |
183 | else |
184 | { |
185 | /* Read another token as we always return the next one */ |
186 | *tmpofs = fz_tell(ctx, file); |
187 | if (*tmpofs < 0) |
188 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file" ); |
189 | tok = pdf_lex(ctx, file, buf); |
190 | } |
191 | } |
192 | return tok; |
193 | } |
194 | |
195 | static void |
196 | pdf_repair_obj_stm(fz_context *ctx, pdf_document *doc, int stm_num) |
197 | { |
198 | pdf_obj *obj; |
199 | fz_stream *stm = NULL; |
200 | pdf_token tok; |
201 | int i, n, count; |
202 | pdf_lexbuf buf; |
203 | |
204 | fz_var(stm); |
205 | |
206 | pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL); |
207 | |
208 | fz_try(ctx) |
209 | { |
210 | obj = pdf_load_object(ctx, doc, stm_num); |
211 | |
212 | count = pdf_dict_get_int(ctx, obj, PDF_NAME(N)); |
213 | |
214 | pdf_drop_obj(ctx, obj); |
215 | |
216 | stm = pdf_open_stream_number(ctx, doc, stm_num); |
217 | |
218 | for (i = 0; i < count; i++) |
219 | { |
220 | pdf_xref_entry *entry; |
221 | |
222 | tok = pdf_lex(ctx, stm, &buf); |
223 | if (tok != PDF_TOK_INT) |
224 | fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d 0 R)" , stm_num); |
225 | |
226 | n = buf.i; |
227 | if (n < 0) |
228 | { |
229 | fz_warn(ctx, "ignoring object with invalid object number (%d %d R)" , n, i); |
230 | continue; |
231 | } |
232 | else if (n >= pdf_xref_len(ctx, doc)) |
233 | { |
234 | fz_warn(ctx, "ignoring object with invalid object number (%d %d R)" , n, i); |
235 | continue; |
236 | } |
237 | |
238 | entry = pdf_get_populating_xref_entry(ctx, doc, n); |
239 | entry->ofs = stm_num; |
240 | entry->gen = i; |
241 | entry->num = n; |
242 | entry->stm_ofs = 0; |
243 | pdf_drop_obj(ctx, entry->obj); |
244 | entry->obj = NULL; |
245 | entry->type = 'o'; |
246 | |
247 | tok = pdf_lex(ctx, stm, &buf); |
248 | if (tok != PDF_TOK_INT) |
249 | fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d 0 R)" , stm_num); |
250 | } |
251 | } |
252 | fz_always(ctx) |
253 | { |
254 | fz_drop_stream(ctx, stm); |
255 | pdf_lexbuf_fin(ctx, &buf); |
256 | } |
257 | fz_catch(ctx) |
258 | { |
259 | fz_rethrow(ctx); |
260 | } |
261 | } |
262 | |
263 | static void |
264 | orphan_object(fz_context *ctx, pdf_document *doc, pdf_obj *obj) |
265 | { |
266 | if (doc->orphans_count == doc->orphans_max) |
267 | { |
268 | int new_max = (doc->orphans_max ? doc->orphans_max*2 : 32); |
269 | |
270 | fz_try(ctx) |
271 | { |
272 | doc->orphans = fz_realloc_array(ctx, doc->orphans, new_max, pdf_obj*); |
273 | doc->orphans_max = new_max; |
274 | } |
275 | fz_catch(ctx) |
276 | { |
277 | pdf_drop_obj(ctx, obj); |
278 | fz_rethrow(ctx); |
279 | } |
280 | } |
281 | doc->orphans[doc->orphans_count++] = obj; |
282 | } |
283 | |
284 | static int is_white(int c) |
285 | { |
286 | return c == '\x00' || c == '\x09' || c == '\x0a' || c == '\x0c' || c == '\x0d' || c == '\x20'; |
287 | } |
288 | |
289 | void |
290 | pdf_repair_xref(fz_context *ctx, pdf_document *doc) |
291 | { |
292 | pdf_obj *dict, *obj = NULL; |
293 | pdf_obj *length; |
294 | |
295 | pdf_obj *encrypt = NULL; |
296 | pdf_obj *id = NULL; |
297 | pdf_obj **roots = NULL; |
298 | pdf_obj *info = NULL; |
299 | |
300 | struct entry *list = NULL; |
301 | int listlen; |
302 | int listcap; |
303 | int maxnum = 0; |
304 | |
305 | int num = 0; |
306 | int gen = 0; |
307 | int64_t tmpofs, stm_ofs, numofs = 0, genofs = 0; |
308 | int stm_len; |
309 | pdf_token tok; |
310 | int next; |
311 | int i; |
312 | size_t j, n; |
313 | int c; |
314 | pdf_lexbuf *buf = &doc->lexbuf.base; |
315 | int num_roots = 0; |
316 | int max_roots = 0; |
317 | |
318 | fz_var(encrypt); |
319 | fz_var(id); |
320 | fz_var(roots); |
321 | fz_var(num_roots); |
322 | fz_var(max_roots); |
323 | fz_var(info); |
324 | fz_var(list); |
325 | fz_var(obj); |
326 | |
327 | fz_warn(ctx, "repairing PDF document" ); |
328 | |
329 | if (doc->repair_attempted) |
330 | fz_throw(ctx, FZ_ERROR_GENERIC, "Repair failed already - not trying again" ); |
331 | doc->repair_attempted = 1; |
332 | |
333 | doc->dirty = 1; |
334 | doc->freeze_updates = 1; /* Can't support incremental update after repair */ |
335 | |
336 | pdf_forget_xref(ctx, doc); |
337 | |
338 | fz_seek(ctx, doc->file, 0, 0); |
339 | |
340 | fz_try(ctx) |
341 | { |
342 | pdf_xref_entry *entry; |
343 | listlen = 0; |
344 | listcap = 1024; |
345 | list = fz_malloc_array(ctx, listcap, struct entry); |
346 | |
347 | /* look for '%PDF' version marker within first kilobyte of file */ |
348 | n = fz_read(ctx, doc->file, (unsigned char *)buf->scratch, fz_mini(buf->size, 1024)); |
349 | |
350 | fz_seek(ctx, doc->file, 0, 0); |
351 | if (n >= 4) |
352 | { |
353 | for (j = 0; j < n - 4; j++) |
354 | { |
355 | if (memcmp(&buf->scratch[j], "%PDF" , 4) == 0) |
356 | { |
357 | fz_seek(ctx, doc->file, (int64_t)(j + 8), 0); /* skip "%PDF-X.Y" */ |
358 | break; |
359 | } |
360 | } |
361 | } |
362 | |
363 | /* skip comment line after version marker since some generators |
364 | * forget to terminate the comment with a newline */ |
365 | c = fz_read_byte(ctx, doc->file); |
366 | while (c >= 0 && (c == ' ' || c == '%')) |
367 | c = fz_read_byte(ctx, doc->file); |
368 | fz_unread_byte(ctx, doc->file); |
369 | |
370 | while (1) |
371 | { |
372 | tmpofs = fz_tell(ctx, doc->file); |
373 | if (tmpofs < 0) |
374 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file" ); |
375 | |
376 | fz_try(ctx) |
377 | tok = pdf_lex_no_string(ctx, doc->file, buf); |
378 | fz_catch(ctx) |
379 | { |
380 | fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); |
381 | fz_warn(ctx, "skipping ahead to next token" ); |
382 | do |
383 | c = fz_read_byte(ctx, doc->file); |
384 | while (c != EOF && !is_white(c)); |
385 | if (c == EOF) |
386 | tok = PDF_TOK_EOF; |
387 | else |
388 | continue; |
389 | } |
390 | |
391 | /* If we have the next token already, then we'll jump |
392 | * back here, rather than going through the top of |
393 | * the loop. */ |
394 | have_next_token: |
395 | |
396 | if (tok == PDF_TOK_INT) |
397 | { |
398 | if (buf->i < 0) |
399 | { |
400 | num = 0; |
401 | gen = 0; |
402 | continue; |
403 | } |
404 | numofs = genofs; |
405 | num = gen; |
406 | genofs = tmpofs; |
407 | gen = buf->i; |
408 | } |
409 | |
410 | else if (tok == PDF_TOK_OBJ) |
411 | { |
412 | pdf_obj *root = NULL; |
413 | |
414 | fz_try(ctx) |
415 | { |
416 | stm_len = 0; |
417 | stm_ofs = 0; |
418 | tok = pdf_repair_obj(ctx, doc, buf, &stm_ofs, &stm_len, &encrypt, &id, NULL, &tmpofs, &root); |
419 | if (root) |
420 | add_root(ctx, root, &roots, &num_roots, &max_roots); |
421 | } |
422 | fz_always(ctx) |
423 | { |
424 | pdf_drop_obj(ctx, root); |
425 | } |
426 | fz_catch(ctx) |
427 | { |
428 | fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); |
429 | /* If we haven't seen a root yet, there is nothing |
430 | * we can do, but give up. Otherwise, we'll make |
431 | * do. */ |
432 | if (!roots) |
433 | fz_rethrow(ctx); |
434 | fz_warn(ctx, "cannot parse object (%d %d R) - ignoring rest of file" , num, gen); |
435 | break; |
436 | } |
437 | |
438 | if (num <= 0 || num > PDF_MAX_OBJECT_NUMBER) |
439 | { |
440 | fz_warn(ctx, "ignoring object with invalid object number (%d %d R)" , num, gen); |
441 | goto have_next_token; |
442 | } |
443 | |
444 | gen = fz_clampi(gen, 0, 65535); |
445 | |
446 | if (listlen + 1 == listcap) |
447 | { |
448 | listcap = (listcap * 3) / 2; |
449 | list = fz_realloc_array(ctx, list, listcap, struct entry); |
450 | } |
451 | |
452 | list[listlen].num = num; |
453 | list[listlen].gen = gen; |
454 | list[listlen].ofs = numofs; |
455 | list[listlen].stm_ofs = stm_ofs; |
456 | list[listlen].stm_len = stm_len; |
457 | listlen ++; |
458 | |
459 | if (num > maxnum) |
460 | maxnum = num; |
461 | |
462 | goto have_next_token; |
463 | } |
464 | |
465 | /* If we find a dictionary it is probably the trailer, |
466 | * but could be a stream (or bogus) dictionary caused |
467 | * by a corrupt file. */ |
468 | else if (tok == PDF_TOK_OPEN_DICT) |
469 | { |
470 | pdf_obj *dictobj; |
471 | |
472 | fz_try(ctx) |
473 | { |
474 | dict = pdf_parse_dict(ctx, doc, doc->file, buf); |
475 | } |
476 | fz_catch(ctx) |
477 | { |
478 | fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); |
479 | /* If this was the real trailer dict |
480 | * it was broken, in which case we are |
481 | * in trouble. Keep going though in |
482 | * case this was just a bogus dict. */ |
483 | continue; |
484 | } |
485 | |
486 | fz_try(ctx) |
487 | { |
488 | dictobj = pdf_dict_get(ctx, dict, PDF_NAME(Encrypt)); |
489 | if (dictobj) |
490 | { |
491 | pdf_drop_obj(ctx, encrypt); |
492 | encrypt = pdf_keep_obj(ctx, dictobj); |
493 | } |
494 | |
495 | dictobj = pdf_dict_get(ctx, dict, PDF_NAME(ID)); |
496 | if (dictobj && (!id || !encrypt || pdf_dict_get(ctx, dict, PDF_NAME(Encrypt)))) |
497 | { |
498 | pdf_drop_obj(ctx, id); |
499 | id = pdf_keep_obj(ctx, dictobj); |
500 | } |
501 | |
502 | dictobj = pdf_dict_get(ctx, dict, PDF_NAME(Root)); |
503 | if (dictobj) |
504 | add_root(ctx, dictobj, &roots, &num_roots, &max_roots); |
505 | |
506 | dictobj = pdf_dict_get(ctx, dict, PDF_NAME(Info)); |
507 | if (dictobj) |
508 | { |
509 | pdf_drop_obj(ctx, info); |
510 | info = pdf_keep_obj(ctx, dictobj); |
511 | } |
512 | } |
513 | fz_always(ctx) |
514 | pdf_drop_obj(ctx, dict); |
515 | fz_catch(ctx) |
516 | fz_rethrow(ctx); |
517 | } |
518 | |
519 | else if (tok == PDF_TOK_EOF) |
520 | { |
521 | break; |
522 | } |
523 | |
524 | else |
525 | { |
526 | num = 0; |
527 | gen = 0; |
528 | } |
529 | } |
530 | |
531 | if (listlen == 0) |
532 | fz_throw(ctx, FZ_ERROR_GENERIC, "no objects found" ); |
533 | |
534 | /* make xref reasonable */ |
535 | |
536 | /* |
537 | Dummy access to entry to assure sufficient space in the xref table |
538 | and avoid repeated reallocs in the loop |
539 | */ |
540 | /* Ensure that the first xref table is a 'solid' one from |
541 | * 0 to maxnum. */ |
542 | pdf_ensure_solid_xref(ctx, doc, maxnum); |
543 | |
544 | for (i = 1; i < maxnum; i++) |
545 | { |
546 | entry = pdf_get_populating_xref_entry(ctx, doc, i); |
547 | if (entry->obj != NULL) |
548 | continue; |
549 | entry->type = 'f'; |
550 | entry->ofs = 0; |
551 | entry->gen = 0; |
552 | entry->num = 0; |
553 | |
554 | entry->stm_ofs = 0; |
555 | } |
556 | |
557 | for (i = 0; i < listlen; i++) |
558 | { |
559 | entry = pdf_get_populating_xref_entry(ctx, doc, list[i].num); |
560 | entry->type = 'n'; |
561 | entry->ofs = list[i].ofs; |
562 | entry->gen = list[i].gen; |
563 | entry->num = list[i].num; |
564 | |
565 | entry->stm_ofs = list[i].stm_ofs; |
566 | |
567 | /* correct stream length for unencrypted documents */ |
568 | if (!encrypt && list[i].stm_len >= 0) |
569 | { |
570 | pdf_obj *old_obj = NULL; |
571 | dict = pdf_load_object(ctx, doc, list[i].num); |
572 | |
573 | fz_try(ctx) |
574 | { |
575 | length = pdf_new_int(ctx, list[i].stm_len); |
576 | pdf_dict_get_put_drop(ctx, dict, PDF_NAME(Length), length, &old_obj); |
577 | if (old_obj) |
578 | orphan_object(ctx, doc, old_obj); |
579 | } |
580 | fz_always(ctx) |
581 | pdf_drop_obj(ctx, dict); |
582 | fz_catch(ctx) |
583 | fz_rethrow(ctx); |
584 | } |
585 | } |
586 | |
587 | entry = pdf_get_populating_xref_entry(ctx, doc, 0); |
588 | entry->type = 'f'; |
589 | entry->ofs = 0; |
590 | entry->gen = 65535; |
591 | entry->num = 0; |
592 | entry->stm_ofs = 0; |
593 | |
594 | next = 0; |
595 | for (i = pdf_xref_len(ctx, doc) - 1; i >= 0; i--) |
596 | { |
597 | entry = pdf_get_populating_xref_entry(ctx, doc, i); |
598 | if (entry->type == 'f') |
599 | { |
600 | entry->ofs = next; |
601 | if (entry->gen < 65535) |
602 | entry->gen ++; |
603 | next = i; |
604 | } |
605 | } |
606 | |
607 | /* create a repaired trailer, Root will be added later */ |
608 | |
609 | obj = pdf_new_dict(ctx, doc, 5); |
610 | /* During repair there is only a single xref section */ |
611 | pdf_set_populating_xref_trailer(ctx, doc, obj); |
612 | pdf_drop_obj(ctx, obj); |
613 | obj = NULL; |
614 | |
615 | obj = pdf_new_int(ctx, maxnum + 1); |
616 | pdf_dict_put(ctx, pdf_trailer(ctx, doc), PDF_NAME(Size), obj); |
617 | pdf_drop_obj(ctx, obj); |
618 | obj = NULL; |
619 | |
620 | if (roots) |
621 | { |
622 | for (i = num_roots-1; i > 0; i--) |
623 | { |
624 | if (pdf_is_dict(ctx, roots[i])) |
625 | break; |
626 | } |
627 | if (i >= 0) |
628 | { |
629 | pdf_dict_put(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), roots[i]); |
630 | } |
631 | } |
632 | if (info) |
633 | { |
634 | pdf_dict_put(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info), info); |
635 | pdf_drop_obj(ctx, info); |
636 | info = NULL; |
637 | } |
638 | |
639 | if (encrypt) |
640 | { |
641 | if (pdf_is_indirect(ctx, encrypt)) |
642 | { |
643 | /* create new reference with non-NULL xref pointer */ |
644 | obj = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, encrypt), pdf_to_gen(ctx, encrypt)); |
645 | pdf_drop_obj(ctx, encrypt); |
646 | encrypt = obj; |
647 | obj = NULL; |
648 | } |
649 | pdf_dict_put(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt), encrypt); |
650 | pdf_drop_obj(ctx, encrypt); |
651 | encrypt = NULL; |
652 | } |
653 | |
654 | if (id) |
655 | { |
656 | if (pdf_is_indirect(ctx, id)) |
657 | { |
658 | /* create new reference with non-NULL xref pointer */ |
659 | obj = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, id), pdf_to_gen(ctx, id)); |
660 | pdf_drop_obj(ctx, id); |
661 | id = obj; |
662 | obj = NULL; |
663 | } |
664 | pdf_dict_put(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID), id); |
665 | pdf_drop_obj(ctx, id); |
666 | id = NULL; |
667 | } |
668 | |
669 | fz_free(ctx, list); |
670 | } |
671 | fz_always(ctx) |
672 | { |
673 | for (i = 0; i < num_roots; i++) |
674 | pdf_drop_obj(ctx, roots[i]); |
675 | fz_free(ctx, roots); |
676 | } |
677 | fz_catch(ctx) |
678 | { |
679 | pdf_drop_obj(ctx, encrypt); |
680 | pdf_drop_obj(ctx, id); |
681 | pdf_drop_obj(ctx, obj); |
682 | pdf_drop_obj(ctx, info); |
683 | fz_free(ctx, list); |
684 | fz_rethrow(ctx); |
685 | } |
686 | } |
687 | |
688 | void |
689 | pdf_repair_obj_stms(fz_context *ctx, pdf_document *doc) |
690 | { |
691 | pdf_obj *dict; |
692 | int i; |
693 | int xref_len = pdf_xref_len(ctx, doc); |
694 | |
695 | for (i = 0; i < xref_len; i++) |
696 | { |
697 | pdf_xref_entry *entry = pdf_get_populating_xref_entry(ctx, doc, i); |
698 | |
699 | if (entry->stm_ofs) |
700 | { |
701 | dict = pdf_load_object(ctx, doc, i); |
702 | fz_try(ctx) |
703 | { |
704 | if (pdf_name_eq(ctx, pdf_dict_get(ctx, dict, PDF_NAME(Type)), PDF_NAME(ObjStm))) |
705 | pdf_repair_obj_stm(ctx, doc, i); |
706 | } |
707 | fz_catch(ctx) |
708 | { |
709 | fz_warn(ctx, "ignoring broken object stream (%d 0 R)" , i); |
710 | } |
711 | pdf_drop_obj(ctx, dict); |
712 | } |
713 | } |
714 | |
715 | /* Ensure that streamed objects reside inside a known non-streamed object */ |
716 | for (i = 0; i < xref_len; i++) |
717 | { |
718 | pdf_xref_entry *entry = pdf_get_populating_xref_entry(ctx, doc, i); |
719 | |
720 | if (entry->type == 'o' && pdf_get_populating_xref_entry(ctx, doc, entry->ofs)->type != 'n') |
721 | fz_throw(ctx, FZ_ERROR_GENERIC, "invalid reference to non-object-stream: %d (%d 0 R)" , (int)entry->ofs, i); |
722 | } |
723 | } |
724 | |