1#include "mupdf/fitz.h"
2#include "mupdf/pdf.h"
3
4#include <zlib.h>
5
6#include <assert.h>
7#include <limits.h>
8#include <string.h>
9
10#include <stdio.h> /* for debug printing */
11/* #define DEBUG_LINEARIZATION */
12/* #define DEBUG_HEAP_SORT */
13/* #define DEBUG_WRITING */
14
15#define SIG_EXTRAS_SIZE (1024)
16
17#define SLASH_BYTE_RANGE ("/ByteRange")
18#define SLASH_CONTENTS ("/Contents")
19#define SLASH_FILTER ("/Filter")
20
21
22typedef struct pdf_write_state_s pdf_write_state;
23
24/*
25 As part of linearization, we need to keep a list of what objects are used
26 by what page. We do this by recording the objects used in a given page
27 in a page_objects structure. We have a list of these structures (one per
28 page) in the page_objects_list structure.
29
30 The page_objects structure maintains a heap in the object array, so
31 insertion takes log n time, and we can heapsort and dedupe at the end for
32 a total worse case n log n time.
33
34 The magic heap invariant is that:
35 entry[n] >= entry[(n+1)*2-1] & entry[n] >= entry[(n+1)*2]
36 or equivalently:
37 entry[(n-1)>>1] >= entry[n]
38
39 For a discussion of the heap data structure (and heapsort) see Kingston,
40 "Algorithms and Data Structures".
41*/
42
43typedef struct {
44 int num_shared;
45 int page_object_number;
46 int num_objects;
47 int min_ofs;
48 int max_ofs;
49 /* Extensible list of objects used on this page */
50 int cap;
51 int len;
52 int object[1];
53} page_objects;
54
55typedef struct {
56 int cap;
57 int len;
58 page_objects *page[1];
59} page_objects_list;
60
61struct pdf_write_state_s
62{
63 fz_output *out;
64
65 int do_incremental;
66 int do_tight;
67 int do_ascii;
68 int do_expand;
69 int do_compress;
70 int do_compress_images;
71 int do_compress_fonts;
72 int do_garbage;
73 int do_linear;
74 int do_clean;
75 int do_encrypt;
76
77 int list_len;
78 int *use_list;
79 int64_t *ofs_list;
80 int *gen_list;
81 int *renumber_map;
82
83 /* The following extras are required for linearization */
84 int *rev_renumber_map;
85 int start;
86 int64_t first_xref_offset;
87 int64_t main_xref_offset;
88 int64_t first_xref_entry_offset;
89 int64_t file_len;
90 int hints_shared_offset;
91 int hintstream_len;
92 pdf_obj *linear_l;
93 pdf_obj *linear_h0;
94 pdf_obj *linear_h1;
95 pdf_obj *linear_o;
96 pdf_obj *linear_e;
97 pdf_obj *linear_n;
98 pdf_obj *linear_t;
99 pdf_obj *hints_s;
100 pdf_obj *hints_length;
101 int page_count;
102 page_objects_list *page_object_lists;
103 int crypt_object_number;
104 char opwd_utf8[128];
105 char upwd_utf8[128];
106 int permissions;
107 pdf_crypt *crypt;
108};
109
110/*
111 * Constants for use with use_list.
112 *
113 * If use_list[num] = 0, then object num is unused.
114 * If use_list[num] & PARAMS, then object num is the linearisation params obj.
115 * If use_list[num] & CATALOGUE, then object num is used by the catalogue.
116 * If use_list[num] & PAGE1, then object num is used by page 1.
117 * If use_list[num] & SHARED, then object num is shared between pages.
118 * If use_list[num] & PAGE_OBJECT then this must be the first object in a page.
119 * If use_list[num] & OTHER_OBJECTS then this must should appear in section 9.
120 * Otherwise object num is used by page (use_list[num]>>USE_PAGE_SHIFT).
121 */
122enum
123{
124 USE_CATALOGUE = 2,
125 USE_PAGE1 = 4,
126 USE_SHARED = 8,
127 USE_PARAMS = 16,
128 USE_HINTS = 32,
129 USE_PAGE_OBJECT = 64,
130 USE_OTHER_OBJECTS = 128,
131 USE_PAGE_MASK = ~255,
132 USE_PAGE_SHIFT = 8
133};
134
135static void
136expand_lists(fz_context *ctx, pdf_write_state *opts, int num)
137{
138 int i;
139
140 /* objects are numbered 0..num and maybe two additional objects for linearization */
141 num += 3;
142 opts->use_list = fz_realloc_array(ctx, opts->use_list, num, int);
143 opts->ofs_list = fz_realloc_array(ctx, opts->ofs_list, num, int64_t);
144 opts->gen_list = fz_realloc_array(ctx, opts->gen_list, num, int);
145 opts->renumber_map = fz_realloc_array(ctx, opts->renumber_map, num, int);
146 opts->rev_renumber_map = fz_realloc_array(ctx, opts->rev_renumber_map, num, int);
147
148 for (i = opts->list_len; i < num; i++)
149 {
150 opts->use_list[i] = 0;
151 opts->ofs_list[i] = 0;
152 opts->gen_list[i] = 0;
153 opts->renumber_map[i] = i;
154 opts->rev_renumber_map[i] = i;
155 }
156 opts->list_len = num;
157}
158
159/*
160 * page_objects and page_object_list handling functions
161 */
162static page_objects_list *
163page_objects_list_create(fz_context *ctx)
164{
165 page_objects_list *pol = fz_calloc(ctx, 1, sizeof(*pol));
166
167 pol->cap = 1;
168 pol->len = 0;
169 return pol;
170}
171
172static void
173page_objects_list_destroy(fz_context *ctx, page_objects_list *pol)
174{
175 int i;
176
177 if (!pol)
178 return;
179 for (i = 0; i < pol->len; i++)
180 {
181 fz_free(ctx, pol->page[i]);
182 }
183 fz_free(ctx, pol);
184}
185
186static void
187page_objects_list_ensure(fz_context *ctx, page_objects_list **pol, int newcap)
188{
189 int oldcap = (*pol)->cap;
190 if (newcap <= oldcap)
191 return;
192 *pol = fz_realloc(ctx, *pol, sizeof(page_objects_list) + (newcap-1)*sizeof(page_objects *));
193 memset(&(*pol)->page[oldcap], 0, (newcap-oldcap)*sizeof(page_objects *));
194 (*pol)->cap = newcap;
195}
196
197static page_objects *
198page_objects_create(fz_context *ctx)
199{
200 int initial_cap = 8;
201 page_objects *po = fz_calloc(ctx, 1, sizeof(*po) + (initial_cap-1) * sizeof(int));
202
203 po->cap = initial_cap;
204 po->len = 0;
205 return po;
206}
207
208static void
209page_objects_insert(fz_context *ctx, page_objects **ppo, int i)
210{
211 page_objects *po;
212
213 /* Make a page_objects if we don't have one */
214 if (*ppo == NULL)
215 *ppo = page_objects_create(ctx);
216
217 po = *ppo;
218 /* page_objects insertion: extend the page_objects by 1, and put us on the end */
219 if (po->len == po->cap)
220 {
221 po = fz_realloc(ctx, po, sizeof(page_objects) + (po->cap*2 - 1)*sizeof(int));
222 po->cap *= 2;
223 *ppo = po;
224 }
225 po->object[po->len++] = i;
226}
227
228static void
229page_objects_list_insert(fz_context *ctx, pdf_write_state *opts, int page, int object)
230{
231 page_objects_list_ensure(ctx, &opts->page_object_lists, page+1);
232 if (object >= opts->list_len)
233 expand_lists(ctx, opts, object);
234 if (opts->page_object_lists->len < page+1)
235 opts->page_object_lists->len = page+1;
236 page_objects_insert(ctx, &opts->page_object_lists->page[page], object);
237}
238
239static void
240page_objects_list_set_page_object(fz_context *ctx, pdf_write_state *opts, int page, int object)
241{
242 page_objects_list_ensure(ctx, &opts->page_object_lists, page+1);
243 if (object >= opts->list_len)
244 expand_lists(ctx, opts, object);
245 opts->page_object_lists->page[page]->page_object_number = object;
246}
247
248static void
249page_objects_sort(fz_context *ctx, page_objects *po)
250{
251 int i, j;
252 int n = po->len;
253
254 /* Step 1: Make a heap */
255 /* Invariant: Valid heap in [0..i), unsorted elements in [i..n) */
256 for (i = 1; i < n; i++)
257 {
258 /* Now bubble backwards to maintain heap invariant */
259 j = i;
260 while (j != 0)
261 {
262 int tmp;
263 int k = (j-1)>>1;
264 if (po->object[k] >= po->object[j])
265 break;
266 tmp = po->object[k];
267 po->object[k] = po->object[j];
268 po->object[j] = tmp;
269 j = k;
270 }
271 }
272
273 /* Step 2: Heap sort */
274 /* Invariant: valid heap in [0..i), sorted list in [i..n) */
275 /* Initially: i = n */
276 for (i = n-1; i > 0; i--)
277 {
278 /* Swap the maximum (0th) element from the page_objects into its place
279 * in the sorted list (position i). */
280 int tmp = po->object[0];
281 po->object[0] = po->object[i];
282 po->object[i] = tmp;
283 /* Now, the page_objects is invalid because the 0th element is out
284 * of place. Bubble it until the page_objects is valid. */
285 j = 0;
286 while (1)
287 {
288 /* Children are k and k+1 */
289 int k = (j+1)*2-1;
290 /* If both children out of the page_objects, we're done */
291 if (k > i-1)
292 break;
293 /* If both are in the page_objects, pick the larger one */
294 if (k < i-1 && po->object[k] < po->object[k+1])
295 k++;
296 /* If j is bigger than k (i.e. both of its children),
297 * we're done */
298 if (po->object[j] > po->object[k])
299 break;
300 tmp = po->object[k];
301 po->object[k] = po->object[j];
302 po->object[j] = tmp;
303 j = k;
304 }
305 }
306}
307
308static int
309order_ge(int ui, int uj)
310{
311 /*
312 For linearization, we need to order the sections as follows:
313
314 Remaining pages (Part 7)
315 Shared objects (Part 8)
316 Objects not associated with any page (Part 9)
317 Any "other" objects
318 (Header)(Part 1)
319 (Linearization params) (Part 2)
320 (1st page Xref/Trailer) (Part 3)
321 Catalogue (and other document level objects) (Part 4)
322 First page (Part 6)
323 (Primary Hint stream) (*) (Part 5)
324 Any free objects
325
326 Note, this is NOT the same order they appear in
327 the final file!
328
329 (*) The PDF reference gives us the option of putting the hint stream
330 after the first page, and we take it, for simplicity.
331 */
332
333 /* If the 2 objects are in the same section, then page object comes first. */
334 if (((ui ^ uj) & ~USE_PAGE_OBJECT) == 0)
335 return ((ui & USE_PAGE_OBJECT) == 0);
336 /* Put unused objects last */
337 else if (ui == 0)
338 return 1;
339 else if (uj == 0)
340 return 0;
341 /* Put the hint stream before that... */
342 else if (ui & USE_HINTS)
343 return 1;
344 else if (uj & USE_HINTS)
345 return 0;
346 /* Put page 1 before that... */
347 else if (ui & USE_PAGE1)
348 return 1;
349 else if (uj & USE_PAGE1)
350 return 0;
351 /* Put the catalogue before that... */
352 else if (ui & USE_CATALOGUE)
353 return 1;
354 else if (uj & USE_CATALOGUE)
355 return 0;
356 /* Put the linearization params before that... */
357 else if (ui & USE_PARAMS)
358 return 1;
359 else if (uj & USE_PARAMS)
360 return 0;
361 /* Put other objects before that */
362 else if (ui & USE_OTHER_OBJECTS)
363 return 1;
364 else if (uj & USE_OTHER_OBJECTS)
365 return 0;
366 /* Put shared objects before that... */
367 else if (ui & USE_SHARED)
368 return 1;
369 else if (uj & USE_SHARED)
370 return 0;
371 /* And otherwise, order by the page number on which
372 * they are used. */
373 return (ui>>USE_PAGE_SHIFT) >= (uj>>USE_PAGE_SHIFT);
374}
375
376static void
377heap_sort(int *list, int n, const int *val, int (*ge)(int, int))
378{
379 int i, j;
380
381#ifdef DEBUG_HEAP_SORT
382 fprintf(stderr, "Initially:\n");
383 for (i=0; i < n; i++)
384 {
385 fprintf(stderr, "%d: %d %x\n", i, list[i], val[list[i]]);
386 }
387#endif
388 /* Step 1: Make a heap */
389 /* Invariant: Valid heap in [0..i), unsorted elements in [i..n) */
390 for (i = 1; i < n; i++)
391 {
392 /* Now bubble backwards to maintain heap invariant */
393 j = i;
394 while (j != 0)
395 {
396 int tmp;
397 int k = (j-1)>>1;
398 if (ge(val[list[k]], val[list[j]]))
399 break;
400 tmp = list[k];
401 list[k] = list[j];
402 list[j] = tmp;
403 j = k;
404 }
405 }
406#ifdef DEBUG_HEAP_SORT
407 fprintf(stderr, "Valid heap:\n");
408 for (i=0; i < n; i++)
409 {
410 int k;
411 fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]);
412 k = (i+1)*2-1;
413 if (k < n)
414 {
415 if (ge(val[list[i]], val[list[k]]))
416 fprintf(stderr, "OK ");
417 else
418 fprintf(stderr, "BAD ");
419 }
420 if (k+1 < n)
421 {
422 if (ge(val[list[i]], val[list[k+1]]))
423 fprintf(stderr, "OK\n");
424 else
425 fprintf(stderr, "BAD\n");
426 }
427 else
428 fprintf(stderr, "\n");
429 }
430#endif
431
432 /* Step 2: Heap sort */
433 /* Invariant: valid heap in [0..i), sorted list in [i..n) */
434 /* Initially: i = n */
435 for (i = n-1; i > 0; i--)
436 {
437 /* Swap the maximum (0th) element from the page_objects into its place
438 * in the sorted list (position i). */
439 int tmp = list[0];
440 list[0] = list[i];
441 list[i] = tmp;
442 /* Now, the page_objects is invalid because the 0th element is out
443 * of place. Bubble it until the page_objects is valid. */
444 j = 0;
445 while (1)
446 {
447 /* Children are k and k+1 */
448 int k = (j+1)*2-1;
449 /* If both children out of the page_objects, we're done */
450 if (k > i-1)
451 break;
452 /* If both are in the page_objects, pick the larger one */
453 if (k < i-1 && ge(val[list[k+1]], val[list[k]]))
454 k++;
455 /* If j is bigger than k (i.e. both of its children),
456 * we're done */
457 if (ge(val[list[j]], val[list[k]]))
458 break;
459 tmp = list[k];
460 list[k] = list[j];
461 list[j] = tmp;
462 j = k;
463 }
464 }
465#ifdef DEBUG_HEAP_SORT
466 fprintf(stderr, "Sorted:\n");
467 for (i=0; i < n; i++)
468 {
469 fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]);
470 if (i+1 < n)
471 {
472 if (ge(val[list[i+1]], val[list[i]]))
473 fprintf(stderr, "OK");
474 else
475 fprintf(stderr, "BAD");
476 }
477 fprintf(stderr, "\n");
478 }
479#endif
480}
481
482static void
483page_objects_dedupe(fz_context *ctx, page_objects *po)
484{
485 int i, j;
486 int n = po->len-1;
487
488 for (i = 0; i < n; i++)
489 {
490 if (po->object[i] == po->object[i+1])
491 break;
492 }
493 j = i; /* j points to the last valid one */
494 i++; /* i points to the first one we haven't looked at */
495 for (; i < n; i++)
496 {
497 if (po->object[j] != po->object[i])
498 po->object[++j] = po->object[i];
499 }
500 po->len = j+1;
501}
502
503static void
504page_objects_list_sort_and_dedupe(fz_context *ctx, page_objects_list *pol)
505{
506 int i;
507 int n = pol->len;
508
509 for (i = 0; i < n; i++)
510 {
511 page_objects_sort(ctx, pol->page[i]);
512 page_objects_dedupe(ctx, pol->page[i]);
513 }
514}
515
516#ifdef DEBUG_LINEARIZATION
517static void
518page_objects_dump(pdf_write_state *opts)
519{
520 page_objects_list *pol = opts->page_object_lists;
521 int i, j;
522
523 for (i = 0; i < pol->len; i++)
524 {
525 page_objects *p = pol->page[i];
526 fprintf(stderr, "Page %d\n", i+1);
527 for (j = 0; j < p->len; j++)
528 {
529 int o = p->object[j];
530 fprintf(stderr, "\tObject %d: use=%x\n", o, opts->use_list[o]);
531 }
532 fprintf(stderr, "Byte range=%d->%d\n", p->min_ofs, p->max_ofs);
533 fprintf(stderr, "Number of objects=%d, Number of shared objects=%d\n", p->num_objects, p->num_shared);
534 fprintf(stderr, "Page object number=%d\n", p->page_object_number);
535 }
536}
537
538static void
539objects_dump(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
540{
541 int i;
542
543 for (i=0; i < pdf_xref_len(ctx, doc); i++)
544 {
545 fprintf(stderr, "Object %d use=%x offset=%d\n", i, opts->use_list[i], (int)opts->ofs_list[i]);
546 }
547}
548#endif
549
550/*
551 * Garbage collect objects not reachable from the trailer.
552 */
553
554/* Mark a reference. If it's been marked already, return NULL (as no further
555 * processing is required). If it's not, return the resolved object so
556 * that we can continue our recursive marking. If it's a duff reference
557 * return the fact so that we can remove the reference at source.
558 */
559static pdf_obj *markref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj, int *duff)
560{
561 int num = pdf_to_num(ctx, obj);
562
563 if (num <= 0 || num >= pdf_xref_len(ctx, doc))
564 {
565 *duff = 1;
566 return NULL;
567 }
568 *duff = 0;
569 if (opts->use_list[num])
570 return NULL;
571
572 opts->use_list[num] = 1;
573
574 /* Bake in /Length in stream objects */
575 fz_try(ctx)
576 {
577 if (pdf_obj_num_is_stream(ctx, doc, num))
578 {
579 pdf_obj *len = pdf_dict_get(ctx, obj, PDF_NAME(Length));
580 if (pdf_is_indirect(ctx, len))
581 {
582 opts->use_list[pdf_to_num(ctx, len)] = 0;
583 len = pdf_resolve_indirect(ctx, len);
584 pdf_dict_put(ctx, obj, PDF_NAME(Length), len);
585 }
586 }
587 }
588 fz_catch(ctx)
589 {
590 /* Leave broken */
591 }
592
593 obj = pdf_resolve_indirect(ctx, obj);
594 if (obj == NULL || pdf_is_null(ctx, obj))
595 {
596 *duff = 1;
597 opts->use_list[num] = 0;
598 }
599
600 return obj;
601}
602
603#ifdef DEBUG_MARK_AND_SWEEP
604static int depth = 0;
605
606static
607void indent()
608{
609 while (depth > 0)
610 {
611 int d = depth;
612 if (d > 16)
613 d = 16;
614 printf("%s", &" "[16-d]);
615 depth -= d;
616 }
617}
618#define DEBUGGING_MARKING(A) do { A; } while (0)
619#else
620#define DEBUGGING_MARKING(A) do { } while (0)
621#endif
622
623/* Recursively mark an object. If any references found are duff, then
624 * replace them with nulls. */
625static int markobj(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj)
626{
627 int i;
628
629 DEBUGGING_MARKING(depth++);
630
631 while (pdf_is_indirect(ctx, obj))
632 {
633 int duff;
634 DEBUGGING_MARKING(indent(); printf("Marking object %d\n", pdf_to_num(ctx, obj)));
635 obj = markref(ctx, doc, opts, obj, &duff);
636 if (duff)
637 {
638 DEBUGGING_MARKING(depth--);
639 return 1;
640 }
641 }
642
643 if (pdf_is_dict(ctx, obj))
644 {
645 int n = pdf_dict_len(ctx, obj);
646 for (i = 0; i < n; i++)
647 {
648 DEBUGGING_MARKING(indent(); printf("DICT[%d/%d] = %s\n", i, n, pdf_to_name(ctx, pdf_dict_get_key(ctx, obj, i))));
649 if (markobj(ctx, doc, opts, pdf_dict_get_val(ctx, obj, i)))
650 pdf_dict_put_val_null(ctx, obj, i);
651 }
652 }
653
654 else if (pdf_is_array(ctx, obj))
655 {
656 int n = pdf_array_len(ctx, obj);
657 for (i = 0; i < n; i++)
658 {
659 DEBUGGING_MARKING(indent(); printf("ARRAY[%d/%d]\n", i, n));
660 if (markobj(ctx, doc, opts, pdf_array_get(ctx, obj, i)))
661 pdf_array_put(ctx, obj, i, PDF_NULL);
662 }
663 }
664
665 DEBUGGING_MARKING(depth--);
666
667 return 0;
668}
669
670/*
671 * Scan for and remove duplicate objects (slow)
672 */
673
674static void removeduplicateobjs(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
675{
676 int num, other, max_num;
677 int xref_len = pdf_xref_len(ctx, doc);
678
679 for (num = 1; num < xref_len; num++)
680 {
681 /* Only compare an object to objects preceding it */
682 for (other = 1; other < num; other++)
683 {
684 pdf_obj *a, *b;
685 int newnum, streama = 0, streamb = 0, differ = 0;
686
687 if (num == other || !opts->use_list[num] || !opts->use_list[other])
688 continue;
689
690 /* TODO: resolve indirect references to see if we can omit them */
691
692 /*
693 * Comparing stream objects data contents would take too long.
694 *
695 * pdf_obj_num_is_stream calls pdf_cache_object and ensures
696 * that the xref table has the objects loaded.
697 */
698 fz_try(ctx)
699 {
700 streama = pdf_obj_num_is_stream(ctx, doc, num);
701 streamb = pdf_obj_num_is_stream(ctx, doc, other);
702 differ = streama || streamb;
703 if (streama && streamb && opts->do_garbage >= 4)
704 differ = 0;
705 }
706 fz_catch(ctx)
707 {
708 /* Assume different */
709 differ = 1;
710 }
711 if (differ)
712 continue;
713
714 a = pdf_get_xref_entry(ctx, doc, num)->obj;
715 b = pdf_get_xref_entry(ctx, doc, other)->obj;
716
717 if (pdf_objcmp(ctx, a, b))
718 continue;
719
720 if (streama && streamb)
721 {
722 /* Check to see if streams match too. */
723 fz_buffer *sa = NULL;
724 fz_buffer *sb = NULL;
725
726 fz_var(sa);
727 fz_var(sb);
728
729 differ = 1;
730 fz_try(ctx)
731 {
732 unsigned char *dataa, *datab;
733 size_t lena, lenb;
734 sa = pdf_load_raw_stream_number(ctx, doc, num);
735 sb = pdf_load_raw_stream_number(ctx, doc, other);
736 lena = fz_buffer_storage(ctx, sa, &dataa);
737 lenb = fz_buffer_storage(ctx, sb, &datab);
738 if (lena == lenb && memcmp(dataa, datab, lena) == 0)
739 differ = 0;
740 }
741 fz_always(ctx)
742 {
743 fz_drop_buffer(ctx, sa);
744 fz_drop_buffer(ctx, sb);
745 }
746 fz_catch(ctx)
747 {
748 fz_rethrow(ctx);
749 }
750 if (differ)
751 continue;
752 }
753
754 /* Keep the lowest numbered object */
755 newnum = fz_mini(num, other);
756 max_num = fz_maxi(num, other);
757 if (max_num >= opts->list_len)
758 expand_lists(ctx, opts, max_num);
759 opts->renumber_map[num] = newnum;
760 opts->renumber_map[other] = newnum;
761 opts->rev_renumber_map[newnum] = num; /* Either will do */
762 opts->use_list[fz_maxi(num, other)] = 0;
763
764 /* One duplicate was found, do not look for another */
765 break;
766 }
767 }
768}
769
770/*
771 * Renumber objects sequentially so the xref is more compact
772 *
773 * This code assumes that any opts->renumber_map[n] <= n for all n.
774 */
775
776static void compactxref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
777{
778 int num, newnum;
779 int xref_len = pdf_xref_len(ctx, doc);
780
781 /*
782 * Update renumber_map in-place, clustering all used
783 * objects together at low object ids. Objects that
784 * already should be renumbered will have their new
785 * object ids be updated to reflect the compaction.
786 */
787
788 if (xref_len > opts->list_len)
789 expand_lists(ctx, opts, xref_len-1);
790
791 newnum = 1;
792 for (num = 1; num < xref_len; num++)
793 {
794 /* If it's not used, map it to zero */
795 if (!opts->use_list[opts->renumber_map[num]])
796 {
797 opts->renumber_map[num] = 0;
798 }
799 /* If it's not moved, compact it. */
800 else if (opts->renumber_map[num] == num)
801 {
802 opts->rev_renumber_map[newnum] = opts->rev_renumber_map[num];
803 opts->renumber_map[num] = newnum++;
804 }
805 /* Otherwise it's used, and moved. We know that it must have
806 * moved down, so the place it's moved to will be in the right
807 * place already. */
808 else
809 {
810 opts->renumber_map[num] = opts->renumber_map[opts->renumber_map[num]];
811 }
812 }
813}
814
815/*
816 * Update indirect objects according to renumbering established when
817 * removing duplicate objects and compacting the xref.
818 */
819
820static void renumberobj(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj)
821{
822 int i;
823 int xref_len = pdf_xref_len(ctx, doc);
824
825 if (pdf_is_dict(ctx, obj))
826 {
827 int n = pdf_dict_len(ctx, obj);
828 for (i = 0; i < n; i++)
829 {
830 pdf_obj *key = pdf_dict_get_key(ctx, obj, i);
831 pdf_obj *val = pdf_dict_get_val(ctx, obj, i);
832 if (pdf_is_indirect(ctx, val))
833 {
834 int o = pdf_to_num(ctx, val);
835 if (o >= xref_len || o <= 0 || opts->renumber_map[o] == 0)
836 val = PDF_NULL;
837 else
838 val = pdf_new_indirect(ctx, doc, opts->renumber_map[o], 0);
839 pdf_dict_put_drop(ctx, obj, key, val);
840 }
841 else
842 {
843 renumberobj(ctx, doc, opts, val);
844 }
845 }
846 }
847
848 else if (pdf_is_array(ctx, obj))
849 {
850 int n = pdf_array_len(ctx, obj);
851 for (i = 0; i < n; i++)
852 {
853 pdf_obj *val = pdf_array_get(ctx, obj, i);
854 if (pdf_is_indirect(ctx, val))
855 {
856 int o = pdf_to_num(ctx, val);
857 if (o >= xref_len || o <= 0 || opts->renumber_map[o] == 0)
858 val = PDF_NULL;
859 else
860 val = pdf_new_indirect(ctx, doc, opts->renumber_map[o], 0);
861 pdf_array_put_drop(ctx, obj, i, val);
862 }
863 else
864 {
865 renumberobj(ctx, doc, opts, val);
866 }
867 }
868 }
869}
870
871static void renumberobjs(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
872{
873 pdf_xref_entry *newxref = NULL;
874 int newlen;
875 int num;
876 int *new_use_list;
877 int xref_len = pdf_xref_len(ctx, doc);
878
879 new_use_list = fz_calloc(ctx, pdf_xref_len(ctx, doc)+3, sizeof(int));
880
881 fz_var(newxref);
882 fz_try(ctx)
883 {
884 /* Apply renumber map to indirect references in all objects in xref */
885 renumberobj(ctx, doc, opts, pdf_trailer(ctx, doc));
886 for (num = 0; num < xref_len; num++)
887 {
888 pdf_obj *obj;
889 int to = opts->renumber_map[num];
890
891 /* If object is going to be dropped, don't bother renumbering */
892 if (to == 0)
893 continue;
894
895 obj = pdf_get_xref_entry(ctx, doc, num)->obj;
896
897 if (pdf_is_indirect(ctx, obj))
898 {
899 obj = pdf_new_indirect(ctx, doc, to, 0);
900 fz_try(ctx)
901 pdf_update_object(ctx, doc, num, obj);
902 fz_always(ctx)
903 pdf_drop_obj(ctx, obj);
904 fz_catch(ctx)
905 fz_rethrow(ctx);
906 }
907 else
908 {
909 renumberobj(ctx, doc, opts, obj);
910 }
911 }
912
913 /* Create new table for the reordered, compacted xref */
914 newxref = fz_malloc_array(ctx, xref_len + 3, pdf_xref_entry);
915 newxref[0] = *pdf_get_xref_entry(ctx, doc, 0);
916
917 /* Move used objects into the new compacted xref */
918 newlen = 0;
919 for (num = 1; num < xref_len; num++)
920 {
921 if (opts->use_list[num])
922 {
923 pdf_xref_entry *e;
924 if (newlen < opts->renumber_map[num])
925 newlen = opts->renumber_map[num];
926 e = pdf_get_xref_entry(ctx, doc, num);
927 newxref[opts->renumber_map[num]] = *e;
928 if (e->obj)
929 {
930 pdf_set_obj_parent(ctx, e->obj, opts->renumber_map[num]);
931 e->obj = NULL;
932 }
933 new_use_list[opts->renumber_map[num]] = opts->use_list[num];
934 }
935 else
936 {
937 pdf_xref_entry *e = pdf_get_xref_entry(ctx, doc, num);
938 pdf_drop_obj(ctx, e->obj);
939 e->obj = NULL;
940 fz_drop_buffer(ctx, e->stm_buf);
941 e->stm_buf = NULL;
942 }
943 }
944
945 pdf_replace_xref(ctx, doc, newxref, newlen + 1);
946 newxref = NULL;
947 }
948 fz_catch(ctx)
949 {
950 fz_free(ctx, newxref);
951 fz_free(ctx, new_use_list);
952 fz_rethrow(ctx);
953 }
954 fz_free(ctx, opts->use_list);
955 opts->use_list = new_use_list;
956
957 for (num = 1; num < xref_len; num++)
958 {
959 opts->renumber_map[num] = num;
960 }
961}
962
963static void page_objects_list_renumber(pdf_write_state *opts)
964{
965 int i, j;
966
967 for (i = 0; i < opts->page_object_lists->len; i++)
968 {
969 page_objects *po = opts->page_object_lists->page[i];
970 for (j = 0; j < po->len; j++)
971 {
972 po->object[j] = opts->renumber_map[po->object[j]];
973 }
974 po->page_object_number = opts->renumber_map[po->page_object_number];
975 }
976}
977
978static void
979mark_all(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *val, int flag, int page)
980{
981 if (pdf_mark_obj(ctx, val))
982 return;
983
984 fz_try(ctx)
985 {
986 if (pdf_is_indirect(ctx, val))
987 {
988 int num = pdf_to_num(ctx, val);
989 if (num >= opts->list_len)
990 expand_lists(ctx, opts, num);
991 if (opts->use_list[num] & USE_PAGE_MASK)
992 /* Already used */
993 opts->use_list[num] |= USE_SHARED;
994 else
995 opts->use_list[num] |= flag;
996 if (page >= 0)
997 page_objects_list_insert(ctx, opts, page, num);
998 }
999
1000 if (pdf_is_dict(ctx, val))
1001 {
1002 int i, n = pdf_dict_len(ctx, val);
1003
1004 for (i = 0; i < n; i++)
1005 {
1006 mark_all(ctx, doc, opts, pdf_dict_get_val(ctx, val, i), flag, page);
1007 }
1008 }
1009 else if (pdf_is_array(ctx, val))
1010 {
1011 int i, n = pdf_array_len(ctx, val);
1012
1013 for (i = 0; i < n; i++)
1014 {
1015 mark_all(ctx, doc, opts, pdf_array_get(ctx, val, i), flag, page);
1016 }
1017 }
1018 }
1019 fz_always(ctx)
1020 {
1021 pdf_unmark_obj(ctx, val);
1022 }
1023 fz_catch(ctx)
1024 {
1025 fz_rethrow(ctx);
1026 }
1027}
1028
1029static int
1030mark_pages(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *val, int pagenum)
1031{
1032 if (pdf_mark_obj(ctx, val))
1033 return pagenum;
1034
1035 fz_try(ctx)
1036 {
1037 if (pdf_is_dict(ctx, val))
1038 {
1039 if (pdf_name_eq(ctx, PDF_NAME(Page), pdf_dict_get(ctx, val, PDF_NAME(Type))))
1040 {
1041 int num = pdf_to_num(ctx, val);
1042 pdf_unmark_obj(ctx, val);
1043 mark_all(ctx, doc, opts, val, pagenum == 0 ? USE_PAGE1 : (pagenum<<USE_PAGE_SHIFT), pagenum);
1044 page_objects_list_set_page_object(ctx, opts, pagenum, num);
1045 pagenum++;
1046 opts->use_list[num] |= USE_PAGE_OBJECT;
1047 }
1048 else
1049 {
1050 int i, n = pdf_dict_len(ctx, val);
1051
1052 for (i = 0; i < n; i++)
1053 {
1054 pdf_obj *key = pdf_dict_get_key(ctx, val, i);
1055 pdf_obj *obj = pdf_dict_get_val(ctx, val, i);
1056
1057 if (pdf_name_eq(ctx, PDF_NAME(Kids), key))
1058 pagenum = mark_pages(ctx, doc, opts, obj, pagenum);
1059 else
1060 mark_all(ctx, doc, opts, obj, USE_CATALOGUE, -1);
1061 }
1062
1063 if (pdf_is_indirect(ctx, val))
1064 {
1065 int num = pdf_to_num(ctx, val);
1066 opts->use_list[num] |= USE_CATALOGUE;
1067 }
1068 }
1069 }
1070 else if (pdf_is_array(ctx, val))
1071 {
1072 int i, n = pdf_array_len(ctx, val);
1073
1074 for (i = 0; i < n; i++)
1075 {
1076 pagenum = mark_pages(ctx, doc, opts, pdf_array_get(ctx, val, i), pagenum);
1077 }
1078 if (pdf_is_indirect(ctx, val))
1079 {
1080 int num = pdf_to_num(ctx, val);
1081 opts->use_list[num] |= USE_CATALOGUE;
1082 }
1083 }
1084 }
1085 fz_always(ctx)
1086 {
1087 pdf_unmark_obj(ctx, val);
1088 }
1089 fz_catch(ctx)
1090 {
1091 fz_rethrow(ctx);
1092 }
1093 return pagenum;
1094}
1095
1096static void
1097mark_root(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *dict)
1098{
1099 int i, n = pdf_dict_len(ctx, dict);
1100
1101 if (pdf_mark_obj(ctx, dict))
1102 return;
1103
1104 fz_try(ctx)
1105 {
1106 if (pdf_is_indirect(ctx, dict))
1107 {
1108 int num = pdf_to_num(ctx, dict);
1109 opts->use_list[num] |= USE_CATALOGUE;
1110 }
1111
1112 for (i = 0; i < n; i++)
1113 {
1114 pdf_obj *key = pdf_dict_get_key(ctx, dict, i);
1115 pdf_obj *val = pdf_dict_get_val(ctx, dict, i);
1116
1117 if (pdf_name_eq(ctx, PDF_NAME(Pages), key))
1118 opts->page_count = mark_pages(ctx, doc, opts, val, 0);
1119 else if (pdf_name_eq(ctx, PDF_NAME(Names), key))
1120 mark_all(ctx, doc, opts, val, USE_OTHER_OBJECTS, -1);
1121 else if (pdf_name_eq(ctx, PDF_NAME(Dests), key))
1122 mark_all(ctx, doc, opts, val, USE_OTHER_OBJECTS, -1);
1123 else if (pdf_name_eq(ctx, PDF_NAME(Outlines), key))
1124 {
1125 int section;
1126 /* Look at PageMode to decide whether to
1127 * USE_OTHER_OBJECTS or USE_PAGE1 here. */
1128 if (pdf_name_eq(ctx, pdf_dict_get(ctx, dict, PDF_NAME(PageMode)), PDF_NAME(UseOutlines)))
1129 section = USE_PAGE1;
1130 else
1131 section = USE_OTHER_OBJECTS;
1132 mark_all(ctx, doc, opts, val, section, -1);
1133 }
1134 else
1135 mark_all(ctx, doc, opts, val, USE_CATALOGUE, -1);
1136 }
1137 }
1138 fz_always(ctx)
1139 {
1140 pdf_unmark_obj(ctx, dict);
1141 }
1142 fz_catch(ctx)
1143 {
1144 fz_rethrow(ctx);
1145 }
1146}
1147
1148static void
1149mark_trailer(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *dict)
1150{
1151 int i, n = pdf_dict_len(ctx, dict);
1152
1153 if (pdf_mark_obj(ctx, dict))
1154 return;
1155
1156 fz_try(ctx)
1157 {
1158 for (i = 0; i < n; i++)
1159 {
1160 pdf_obj *key = pdf_dict_get_key(ctx, dict, i);
1161 pdf_obj *val = pdf_dict_get_val(ctx, dict, i);
1162
1163 if (pdf_name_eq(ctx, PDF_NAME(Root), key))
1164 mark_root(ctx, doc, opts, val);
1165 else
1166 mark_all(ctx, doc, opts, val, USE_CATALOGUE, -1);
1167 }
1168 }
1169 fz_always(ctx)
1170 {
1171 pdf_unmark_obj(ctx, dict);
1172 }
1173 fz_catch(ctx)
1174 {
1175 fz_rethrow(ctx);
1176 }
1177}
1178
1179static void
1180add_linearization_objs(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
1181{
1182 pdf_obj *params_obj = NULL;
1183 pdf_obj *params_ref = NULL;
1184 pdf_obj *hint_obj = NULL;
1185 pdf_obj *hint_ref = NULL;
1186 pdf_obj *o;
1187 int params_num, hint_num;
1188
1189 fz_var(params_obj);
1190 fz_var(params_ref);
1191 fz_var(hint_obj);
1192 fz_var(hint_ref);
1193
1194 fz_try(ctx)
1195 {
1196 /* Linearization params */
1197 params_obj = pdf_new_dict(ctx, doc, 10);
1198 params_ref = pdf_add_object(ctx, doc, params_obj);
1199 params_num = pdf_to_num(ctx, params_ref);
1200
1201 opts->use_list[params_num] = USE_PARAMS;
1202 opts->renumber_map[params_num] = params_num;
1203 opts->rev_renumber_map[params_num] = params_num;
1204 opts->gen_list[params_num] = 0;
1205 pdf_dict_put_real(ctx, params_obj, PDF_NAME(Linearized), 1.0f);
1206 opts->linear_l = pdf_new_int(ctx, INT_MIN);
1207 pdf_dict_put(ctx, params_obj, PDF_NAME(L), opts->linear_l);
1208 opts->linear_h0 = pdf_new_int(ctx, INT_MIN);
1209 o = pdf_new_array(ctx, doc, 2);
1210 pdf_dict_put_drop(ctx, params_obj, PDF_NAME(H), o);
1211 pdf_array_push(ctx, o, opts->linear_h0);
1212 opts->linear_h1 = pdf_new_int(ctx, INT_MIN);
1213 pdf_array_push(ctx, o, opts->linear_h1);
1214 opts->linear_o = pdf_new_int(ctx, INT_MIN);
1215 pdf_dict_put(ctx, params_obj, PDF_NAME(O), opts->linear_o);
1216 opts->linear_e = pdf_new_int(ctx, INT_MIN);
1217 pdf_dict_put(ctx, params_obj, PDF_NAME(E), opts->linear_e);
1218 opts->linear_n = pdf_new_int(ctx, INT_MIN);
1219 pdf_dict_put(ctx, params_obj, PDF_NAME(N), opts->linear_n);
1220 opts->linear_t = pdf_new_int(ctx, INT_MIN);
1221 pdf_dict_put(ctx, params_obj, PDF_NAME(T), opts->linear_t);
1222
1223 /* Primary hint stream */
1224 hint_obj = pdf_new_dict(ctx, doc, 10);
1225 hint_ref = pdf_add_object(ctx, doc, hint_obj);
1226 hint_num = pdf_to_num(ctx, hint_ref);
1227
1228 opts->use_list[hint_num] = USE_HINTS;
1229 opts->renumber_map[hint_num] = hint_num;
1230 opts->rev_renumber_map[hint_num] = hint_num;
1231 opts->gen_list[hint_num] = 0;
1232 pdf_dict_put_int(ctx, hint_obj, PDF_NAME(P), 0);
1233 opts->hints_s = pdf_new_int(ctx, INT_MIN);
1234 pdf_dict_put(ctx, hint_obj, PDF_NAME(S), opts->hints_s);
1235 /* FIXME: Do we have thumbnails? Do a T entry */
1236 /* FIXME: Do we have outlines? Do an O entry */
1237 /* FIXME: Do we have article threads? Do an A entry */
1238 /* FIXME: Do we have named destinations? Do a E entry */
1239 /* FIXME: Do we have interactive forms? Do a V entry */
1240 /* FIXME: Do we have document information? Do an I entry */
1241 /* FIXME: Do we have logical structure hierarchy? Do a C entry */
1242 /* FIXME: Do L, Page Label hint table */
1243 pdf_dict_put(ctx, hint_obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
1244 opts->hints_length = pdf_new_int(ctx, INT_MIN);
1245 pdf_dict_put(ctx, hint_obj, PDF_NAME(Length), opts->hints_length);
1246 pdf_get_xref_entry(ctx, doc, hint_num)->stm_ofs = 0;
1247 }
1248 fz_always(ctx)
1249 {
1250 pdf_drop_obj(ctx, params_obj);
1251 pdf_drop_obj(ctx, params_ref);
1252 pdf_drop_obj(ctx, hint_ref);
1253 pdf_drop_obj(ctx, hint_obj);
1254 }
1255 fz_catch(ctx)
1256 {
1257 fz_rethrow(ctx);
1258 }
1259}
1260
1261static void
1262lpr_inherit_res_contents(fz_context *ctx, pdf_obj *res, pdf_obj *dict, pdf_obj *text)
1263{
1264 pdf_obj *o, *r;
1265 int i, n;
1266
1267 /* If the parent node doesn't have an entry of this type, give up. */
1268 o = pdf_dict_get(ctx, dict, text);
1269 if (!o)
1270 return;
1271
1272 /* If the resources dict we are building doesn't have an entry of this
1273 * type yet, then just copy it (ensuring it's not a reference) */
1274 r = pdf_dict_get(ctx, res, text);
1275 if (r == NULL)
1276 {
1277 o = pdf_resolve_indirect(ctx, o);
1278 if (pdf_is_dict(ctx, o))
1279 o = pdf_copy_dict(ctx, o);
1280 else if (pdf_is_array(ctx, o))
1281 o = pdf_copy_array(ctx, o);
1282 else
1283 o = NULL;
1284 if (o)
1285 pdf_dict_put_drop(ctx, res, text, o);
1286 return;
1287 }
1288
1289 /* Otherwise we need to merge o into r */
1290 if (pdf_is_dict(ctx, o))
1291 {
1292 n = pdf_dict_len(ctx, o);
1293 for (i = 0; i < n; i++)
1294 {
1295 pdf_obj *key = pdf_dict_get_key(ctx, o, i);
1296 pdf_obj *val = pdf_dict_get_val(ctx, o, i);
1297
1298 if (pdf_dict_get(ctx, res, key))
1299 continue;
1300 pdf_dict_put(ctx, res, key, val);
1301 }
1302 }
1303}
1304
1305static void
1306lpr_inherit_res(fz_context *ctx, pdf_obj *node, int depth, pdf_obj *dict)
1307{
1308 while (1)
1309 {
1310 pdf_obj *o;
1311
1312 node = pdf_dict_get(ctx, node, PDF_NAME(Parent));
1313 depth--;
1314 if (!node || depth < 0)
1315 break;
1316
1317 o = pdf_dict_get(ctx, node, PDF_NAME(Resources));
1318 if (o)
1319 {
1320 lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(ExtGState));
1321 lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(ColorSpace));
1322 lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(Pattern));
1323 lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(Shading));
1324 lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(XObject));
1325 lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(Font));
1326 lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(ProcSet));
1327 lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(Properties));
1328 }
1329 }
1330}
1331
1332static pdf_obj *
1333lpr_inherit(fz_context *ctx, pdf_obj *node, char *text, int depth)
1334{
1335 do
1336 {
1337 pdf_obj *o = pdf_dict_gets(ctx, node, text);
1338
1339 if (o)
1340 return pdf_resolve_indirect(ctx, o);
1341 node = pdf_dict_get(ctx, node, PDF_NAME(Parent));
1342 depth--;
1343 }
1344 while (depth >= 0 && node);
1345
1346 return NULL;
1347}
1348
1349static int
1350lpr(fz_context *ctx, pdf_document *doc, pdf_obj *node, int depth, int page)
1351{
1352 pdf_obj *kids;
1353 pdf_obj *o = NULL;
1354 int i, n;
1355
1356 if (pdf_mark_obj(ctx, node))
1357 return page;
1358
1359 fz_var(o);
1360
1361 fz_try(ctx)
1362 {
1363 if (pdf_name_eq(ctx, PDF_NAME(Page), pdf_dict_get(ctx, node, PDF_NAME(Type))))
1364 {
1365 pdf_obj *r; /* r is deliberately not cleaned up */
1366
1367 /* Copy resources down to the child */
1368 o = pdf_keep_obj(ctx, pdf_dict_get(ctx, node, PDF_NAME(Resources)));
1369 if (!o)
1370 {
1371 o = pdf_keep_obj(ctx, pdf_new_dict(ctx, doc, 2));
1372 pdf_dict_put(ctx, node, PDF_NAME(Resources), o);
1373 }
1374 lpr_inherit_res(ctx, node, depth, o);
1375 r = lpr_inherit(ctx, node, "MediaBox", depth);
1376 if (r)
1377 pdf_dict_put(ctx, node, PDF_NAME(MediaBox), r);
1378 r = lpr_inherit(ctx, node, "CropBox", depth);
1379 if (r)
1380 pdf_dict_put(ctx, node, PDF_NAME(CropBox), r);
1381 r = lpr_inherit(ctx, node, "BleedBox", depth);
1382 if (r)
1383 pdf_dict_put(ctx, node, PDF_NAME(BleedBox), r);
1384 r = lpr_inherit(ctx, node, "TrimBox", depth);
1385 if (r)
1386 pdf_dict_put(ctx, node, PDF_NAME(TrimBox), r);
1387 r = lpr_inherit(ctx, node, "ArtBox", depth);
1388 if (r)
1389 pdf_dict_put(ctx, node, PDF_NAME(ArtBox), r);
1390 r = lpr_inherit(ctx, node, "Rotate", depth);
1391 if (r)
1392 pdf_dict_put(ctx, node, PDF_NAME(Rotate), r);
1393 page++;
1394 }
1395 else
1396 {
1397 kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
1398 n = pdf_array_len(ctx, kids);
1399 for(i = 0; i < n; i++)
1400 {
1401 page = lpr(ctx, doc, pdf_array_get(ctx, kids, i), depth+1, page);
1402 }
1403 pdf_dict_del(ctx, node, PDF_NAME(Resources));
1404 pdf_dict_del(ctx, node, PDF_NAME(MediaBox));
1405 pdf_dict_del(ctx, node, PDF_NAME(CropBox));
1406 pdf_dict_del(ctx, node, PDF_NAME(BleedBox));
1407 pdf_dict_del(ctx, node, PDF_NAME(TrimBox));
1408 pdf_dict_del(ctx, node, PDF_NAME(ArtBox));
1409 pdf_dict_del(ctx, node, PDF_NAME(Rotate));
1410 }
1411 }
1412 fz_always(ctx)
1413 {
1414 pdf_drop_obj(ctx, o);
1415 }
1416 fz_catch(ctx)
1417 {
1418 fz_rethrow(ctx);
1419 }
1420
1421 pdf_unmark_obj(ctx, node);
1422
1423 return page;
1424}
1425
1426void
1427pdf_localise_page_resources(fz_context *ctx, pdf_document *doc)
1428{
1429 if (doc->resources_localised)
1430 return;
1431
1432 lpr(ctx, doc, pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), PDF_NAME(Pages), NULL), 0, 0);
1433
1434 doc->resources_localised = 1;
1435}
1436
1437static void
1438linearize(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
1439{
1440 int i;
1441 int n = pdf_xref_len(ctx, doc) + 2;
1442 int *reorder;
1443 int *rev_renumber_map;
1444
1445 opts->page_object_lists = page_objects_list_create(ctx);
1446
1447 /* Ensure that every page has local references of its resources */
1448 /* FIXME: We could 'thin' the resources according to what is actually
1449 * required for each page, but this would require us to run the page
1450 * content streams. */
1451 pdf_localise_page_resources(ctx, doc);
1452
1453 /* Walk the objects for each page, marking which ones are used, where */
1454 memset(opts->use_list, 0, n * sizeof(int));
1455 mark_trailer(ctx, doc, opts, pdf_trailer(ctx, doc));
1456
1457 /* Add new objects required for linearization */
1458 add_linearization_objs(ctx, doc, opts);
1459
1460#ifdef DEBUG_WRITING
1461 fprintf(stderr, "Usage calculated:\n");
1462 for (i=0; i < pdf_xref_len(ctx, doc); i++)
1463 {
1464 fprintf(stderr, "%d: use=%d\n", i, opts->use_list[i]);
1465 }
1466#endif
1467
1468 /* Allocate/init the structures used for renumbering the objects */
1469 reorder = fz_calloc(ctx, n, sizeof(int));
1470 rev_renumber_map = fz_calloc(ctx, n, sizeof(int));
1471 for (i = 0; i < n; i++)
1472 {
1473 reorder[i] = i;
1474 }
1475
1476 /* Heap sort the reordering */
1477 heap_sort(reorder+1, n-1, opts->use_list, &order_ge);
1478
1479#ifdef DEBUG_WRITING
1480 fprintf(stderr, "Reordered:\n");
1481 for (i=1; i < pdf_xref_len(ctx, doc); i++)
1482 {
1483 fprintf(stderr, "%d: use=%d\n", i, opts->use_list[reorder[i]]);
1484 }
1485#endif
1486
1487 /* Find the split point */
1488 for (i = 1; (opts->use_list[reorder[i]] & USE_PARAMS) == 0; i++) {}
1489 opts->start = i;
1490
1491 /* Roll the reordering into the renumber_map */
1492 for (i = 0; i < n; i++)
1493 {
1494 opts->renumber_map[reorder[i]] = i;
1495 rev_renumber_map[i] = opts->rev_renumber_map[reorder[i]];
1496 }
1497 fz_free(ctx, opts->rev_renumber_map);
1498 opts->rev_renumber_map = rev_renumber_map;
1499 fz_free(ctx, reorder);
1500
1501 /* Apply the renumber_map */
1502 page_objects_list_renumber(opts);
1503 renumberobjs(ctx, doc, opts);
1504
1505 page_objects_list_sort_and_dedupe(ctx, opts->page_object_lists);
1506}
1507
1508static void
1509update_linearization_params(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
1510{
1511 int64_t offset;
1512 pdf_set_int(ctx, opts->linear_l, opts->file_len);
1513 /* Primary hint stream offset (of object, not stream!) */
1514 pdf_set_int(ctx, opts->linear_h0, opts->ofs_list[pdf_xref_len(ctx, doc)-1]);
1515 /* Primary hint stream length (of object, not stream!) */
1516 offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
1517 pdf_set_int(ctx, opts->linear_h1, offset - opts->ofs_list[pdf_xref_len(ctx, doc)-1]);
1518 /* Object number of first pages page object (the first object of page 0) */
1519 pdf_set_int(ctx, opts->linear_o, opts->page_object_lists->page[0]->object[0]);
1520 /* Offset of end of first page (first page is followed by primary
1521 * hint stream (object n-1) then remaining pages (object 1...). The
1522 * primary hint stream counts as part of the first pages data, I think.
1523 */
1524 offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
1525 pdf_set_int(ctx, opts->linear_e, offset);
1526 /* Number of pages in document */
1527 pdf_set_int(ctx, opts->linear_n, opts->page_count);
1528 /* Offset of first entry in main xref table */
1529 pdf_set_int(ctx, opts->linear_t, opts->first_xref_entry_offset + opts->hintstream_len);
1530 /* Offset of shared objects hint table in the primary hint stream */
1531 pdf_set_int(ctx, opts->hints_s, opts->hints_shared_offset);
1532 /* Primary hint stream length */
1533 pdf_set_int(ctx, opts->hints_length, opts->hintstream_len);
1534}
1535
1536/*
1537 * Make sure we have loaded objects from object streams.
1538 */
1539
1540static void preloadobjstms(fz_context *ctx, pdf_document *doc)
1541{
1542 pdf_obj *obj;
1543 int num;
1544
1545 /* xref_len may change due to repair, so check it every iteration */
1546 for (num = 0; num < pdf_xref_len(ctx, doc); num++)
1547 {
1548 if (pdf_get_xref_entry(ctx, doc, num)->type == 'o')
1549 {
1550 obj = pdf_load_object(ctx, doc, num);
1551 pdf_drop_obj(ctx, obj);
1552 }
1553 }
1554}
1555
1556/*
1557 * Save streams and objects to the output
1558 */
1559
1560static inline int isbinary(int c)
1561{
1562 if (c == '\n' || c == '\r' || c == '\t')
1563 return 0;
1564 return c < 32 || c > 127;
1565}
1566
1567static int isbinarystream(fz_context *ctx, const unsigned char *data, size_t len)
1568{
1569 size_t i;
1570 for (i = 0; i < len; i++)
1571 if (isbinary(data[i]))
1572 return 1;
1573 return 0;
1574}
1575
1576static fz_buffer *hexbuf(fz_context *ctx, const unsigned char *p, size_t n)
1577{
1578 static const char hex[17] = "0123456789abcdef";
1579 int x = 0;
1580 size_t len = n * 2 + (n / 32) + 1;
1581 unsigned char *data = fz_malloc(ctx, len);
1582 fz_buffer *buf = fz_new_buffer_from_data(ctx, data, len);
1583
1584 while (n--)
1585 {
1586 *data++ = hex[*p >> 4];
1587 *data++ = hex[*p & 15];
1588 if (++x == 32)
1589 {
1590 *data++ = '\n';
1591 x = 0;
1592 }
1593 p++;
1594 }
1595
1596 *data++ = '>';
1597
1598 return buf;
1599}
1600
1601static void addhexfilter(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
1602{
1603 pdf_obj *f, *dp, *newf, *newdp;
1604
1605 newf = newdp = NULL;
1606 f = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
1607 dp = pdf_dict_get(ctx, dict, PDF_NAME(DecodeParms));
1608
1609 fz_var(newf);
1610 fz_var(newdp);
1611
1612 fz_try(ctx)
1613 {
1614 if (pdf_is_name(ctx, f))
1615 {
1616 newf = pdf_new_array(ctx, doc, 2);
1617 pdf_array_push(ctx, newf, PDF_NAME(ASCIIHexDecode));
1618 pdf_array_push(ctx, newf, f);
1619 f = newf;
1620 if (pdf_is_dict(ctx, dp))
1621 {
1622 newdp = pdf_new_array(ctx, doc, 2);
1623 pdf_array_push(ctx, newdp, PDF_NULL);
1624 pdf_array_push(ctx, newdp, dp);
1625 dp = newdp;
1626 }
1627 }
1628 else if (pdf_is_array(ctx, f))
1629 {
1630 pdf_array_insert(ctx, f, PDF_NAME(ASCIIHexDecode), 0);
1631 if (pdf_is_array(ctx, dp))
1632 pdf_array_insert(ctx, dp, PDF_NULL, 0);
1633 }
1634 else
1635 f = PDF_NAME(ASCIIHexDecode);
1636
1637 pdf_dict_put(ctx, dict, PDF_NAME(Filter), f);
1638 if (dp)
1639 pdf_dict_put(ctx, dict, PDF_NAME(DecodeParms), dp);
1640 }
1641 fz_always(ctx)
1642 {
1643 pdf_drop_obj(ctx, newf);
1644 pdf_drop_obj(ctx, newdp);
1645 }
1646 fz_catch(ctx)
1647 fz_rethrow(ctx);
1648}
1649
1650static fz_buffer *deflatebuf(fz_context *ctx, const unsigned char *p, size_t n)
1651{
1652 fz_buffer *buf;
1653 uLongf csize;
1654 int t;
1655 uLong longN = (uLong)n;
1656 unsigned char *data;
1657 size_t cap;
1658
1659 if (n != (size_t)longN)
1660 fz_throw(ctx, FZ_ERROR_GENERIC, "Buffer too large to deflate");
1661
1662 cap = compressBound(longN);
1663 data = fz_malloc(ctx, cap);
1664 buf = fz_new_buffer_from_data(ctx, data, cap);
1665 csize = (uLongf)cap;
1666 t = compress(data, &csize, p, longN);
1667 if (t != Z_OK)
1668 {
1669 fz_drop_buffer(ctx, buf);
1670 fz_throw(ctx, FZ_ERROR_GENERIC, "cannot deflate buffer");
1671 }
1672 fz_resize_buffer(ctx, buf, csize);
1673 return buf;
1674}
1675
1676static int striphexfilter(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
1677{
1678 pdf_obj *f, *dp;
1679 int is_hex = 0;
1680
1681 f = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
1682 dp = pdf_dict_get(ctx, dict, PDF_NAME(DecodeParms));
1683
1684 if (pdf_is_array(ctx, f))
1685 {
1686 /* Remove ASCIIHexDecode from head of filter list */
1687 if (pdf_array_get(ctx, f, 0) == PDF_NAME(ASCIIHexDecode))
1688 {
1689 is_hex = 1;
1690 pdf_array_delete(ctx, f, 0);
1691 if (pdf_is_array(ctx, dp))
1692 pdf_array_delete(ctx, dp, 0);
1693 }
1694 /* Unpack array if only one filter remains */
1695 if (pdf_array_len(ctx, f) == 1)
1696 {
1697 f = pdf_array_get(ctx, f, 0);
1698 pdf_dict_put(ctx, dict, PDF_NAME(Filter), f);
1699 if (dp)
1700 {
1701 dp = pdf_array_get(ctx, dp, 0);
1702 pdf_dict_put(ctx, dict, PDF_NAME(DecodeParms), dp);
1703 }
1704 }
1705 /* Remove array if no filters remain */
1706 else if (pdf_array_len(ctx, f) == 0)
1707 {
1708 pdf_dict_del(ctx, dict, PDF_NAME(Filter));
1709 pdf_dict_del(ctx, dict, PDF_NAME(DecodeParms));
1710 }
1711 }
1712 else if (f == PDF_NAME(ASCIIHexDecode))
1713 {
1714 is_hex = 1;
1715 pdf_dict_del(ctx, dict, PDF_NAME(Filter));
1716 pdf_dict_del(ctx, dict, PDF_NAME(DecodeParms));
1717 }
1718
1719 return is_hex;
1720}
1721
1722static fz_buffer *unhexbuf(fz_context *ctx, const unsigned char *p, size_t n)
1723{
1724 fz_stream *mstm = NULL;
1725 fz_stream *xstm = NULL;
1726 fz_buffer *out = NULL;
1727 fz_var(mstm);
1728 fz_var(xstm);
1729 fz_try(ctx)
1730 {
1731 mstm = fz_open_memory(ctx, p, n);
1732 xstm = fz_open_ahxd(ctx, mstm);
1733 out = fz_read_all(ctx, xstm, n/2);
1734 }
1735 fz_always(ctx)
1736 {
1737 fz_drop_stream(ctx, xstm);
1738 fz_drop_stream(ctx, mstm);
1739 }
1740 fz_catch(ctx)
1741 fz_rethrow(ctx);
1742 return out;
1743}
1744
1745static void write_data(fz_context *ctx, void *arg, const unsigned char *data, int len)
1746{
1747 fz_write_data(ctx, (fz_output *)arg, data, len);
1748}
1749
1750static void copystream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen, int do_deflate, int unenc)
1751{
1752 fz_buffer *tmp_unhex = NULL, *tmp_flate = NULL, *tmp_hex = NULL, *buf = NULL;
1753 pdf_obj *obj = NULL;
1754 size_t len;
1755 unsigned char *data;
1756
1757 fz_var(buf);
1758 fz_var(tmp_flate);
1759 fz_var(tmp_hex);
1760 fz_var(obj);
1761
1762 fz_try(ctx)
1763 {
1764 buf = pdf_load_raw_stream_number(ctx, doc, num);
1765 obj = pdf_copy_dict(ctx, obj_orig);
1766
1767 len = fz_buffer_storage(ctx, buf, &data);
1768
1769 if (do_deflate && striphexfilter(ctx, doc, obj))
1770 {
1771 tmp_unhex = unhexbuf(ctx, data, len);
1772 len = fz_buffer_storage(ctx, tmp_unhex, &data);
1773 }
1774
1775 if (do_deflate && !pdf_dict_get(ctx, obj, PDF_NAME(Filter)))
1776 {
1777 size_t clen;
1778 unsigned char *cdata;
1779 tmp_flate = deflatebuf(ctx, data, len);
1780 clen = fz_buffer_storage(ctx, tmp_flate, &cdata);
1781 if (clen < len)
1782 {
1783 len = clen;
1784 data = cdata;
1785 pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
1786 }
1787 }
1788
1789 if (opts->do_ascii && isbinarystream(ctx, data, len))
1790 {
1791 tmp_hex = hexbuf(ctx, data, len);
1792 len = fz_buffer_storage(ctx, tmp_hex, &data);
1793 addhexfilter(ctx, doc, obj);
1794 }
1795
1796 fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
1797
1798 if (unenc)
1799 {
1800 pdf_dict_put_int(ctx, obj, PDF_NAME(Length), len);
1801 pdf_print_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii);
1802 fz_write_string(ctx, opts->out, "\nstream\n");
1803 fz_write_data(ctx, opts->out, data, len);
1804 }
1805 else
1806 {
1807 pdf_dict_put_int(ctx, obj, PDF_NAME(Length), pdf_encrypted_len(ctx, opts->crypt, num, gen, (int)len));
1808 pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, opts->crypt, num, gen);
1809 fz_write_string(ctx, opts->out, "\nstream\n");
1810 pdf_encrypt_data(ctx, opts->crypt, num, gen, write_data, opts->out, data, len);
1811 }
1812
1813 fz_write_string(ctx, opts->out, "\nendstream\nendobj\n\n");
1814 }
1815 fz_always(ctx)
1816 {
1817 fz_drop_buffer(ctx, tmp_unhex);
1818 fz_drop_buffer(ctx, tmp_hex);
1819 fz_drop_buffer(ctx, tmp_flate);
1820 fz_drop_buffer(ctx, buf);
1821 pdf_drop_obj(ctx, obj);
1822 }
1823 fz_catch(ctx)
1824 {
1825 fz_rethrow(ctx);
1826 }
1827}
1828
1829static void expandstream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *obj_orig, int num, int gen, int do_deflate, int unenc)
1830{
1831 fz_buffer *buf = NULL, *tmp_flate = NULL, *tmp_hex = NULL;
1832 pdf_obj *obj = NULL;
1833 size_t len;
1834 unsigned char *data;
1835
1836 fz_var(buf);
1837 fz_var(tmp_flate);
1838 fz_var(tmp_hex);
1839 fz_var(obj);
1840
1841 fz_try(ctx)
1842 {
1843 buf = pdf_load_stream_number(ctx, doc, num);
1844 obj = pdf_copy_dict(ctx, obj_orig);
1845 pdf_dict_del(ctx, obj, PDF_NAME(Filter));
1846 pdf_dict_del(ctx, obj, PDF_NAME(DecodeParms));
1847
1848 len = fz_buffer_storage(ctx, buf, &data);
1849 if (do_deflate)
1850 {
1851 unsigned char *cdata;
1852 size_t clen;
1853 tmp_flate = deflatebuf(ctx, data, len);
1854 clen = fz_buffer_storage(ctx, tmp_flate, &cdata);
1855 if (clen < len)
1856 {
1857 len = clen;
1858 data = cdata;
1859 pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
1860 }
1861 }
1862
1863 if (opts->do_ascii && isbinarystream(ctx, data, len))
1864 {
1865 tmp_hex = hexbuf(ctx, data, len);
1866 len = fz_buffer_storage(ctx, tmp_hex, &data);
1867 addhexfilter(ctx, doc, obj);
1868 }
1869
1870 fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
1871
1872 if (unenc)
1873 {
1874 pdf_dict_put_int(ctx, obj, PDF_NAME(Length), len);
1875 pdf_print_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii);
1876 fz_write_string(ctx, opts->out, "\nstream\n");
1877 fz_write_data(ctx, opts->out, data, len);
1878 }
1879 else
1880 {
1881 pdf_dict_put_int(ctx, obj, PDF_NAME(Length), pdf_encrypted_len(ctx, opts->crypt, num, gen, (int)len));
1882 pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, opts->crypt, num, gen);
1883 fz_write_string(ctx, opts->out, "\nstream\n");
1884 pdf_encrypt_data(ctx, opts->crypt, num, gen, write_data, opts->out, data, len);
1885 }
1886
1887 fz_write_string(ctx, opts->out, "\nendstream\nendobj\n\n");
1888 }
1889 fz_always(ctx)
1890 {
1891 fz_drop_buffer(ctx, tmp_hex);
1892 fz_drop_buffer(ctx, tmp_flate);
1893 fz_drop_buffer(ctx, buf);
1894 pdf_drop_obj(ctx, obj);
1895 }
1896 fz_catch(ctx)
1897 {
1898 fz_rethrow(ctx);
1899 }
1900}
1901
1902static int is_image_filter(pdf_obj *s)
1903{
1904 return
1905 s == PDF_NAME(CCITTFaxDecode) || s == PDF_NAME(CCF) ||
1906 s == PDF_NAME(DCTDecode) || s == PDF_NAME(DCT) ||
1907 s == PDF_NAME(RunLengthDecode) || s == PDF_NAME(RL) ||
1908 s == PDF_NAME(JBIG2Decode) ||
1909 s == PDF_NAME(JPXDecode);
1910}
1911
1912static int filter_implies_image(fz_context *ctx, pdf_obj *o)
1913{
1914 if (pdf_is_name(ctx, o))
1915 return is_image_filter(o);
1916 if (pdf_is_array(ctx, o))
1917 {
1918 int i, len;
1919 len = pdf_array_len(ctx, o);
1920 for (i = 0; i < len; i++)
1921 if (is_image_filter(pdf_array_get(ctx, o, i)))
1922 return 1;
1923 }
1924 return 0;
1925}
1926
1927static int is_jpx_filter(fz_context *ctx, pdf_obj *o)
1928{
1929 if (o == PDF_NAME(JPXDecode))
1930 return 1;
1931 if (pdf_is_array(ctx, o))
1932 {
1933 int i, len;
1934 len = pdf_array_len(ctx, o);
1935 for (i = 0; i < len; i++)
1936 if (pdf_array_get(ctx, o, i) == PDF_NAME(JPXDecode))
1937 return 1;
1938 }
1939 return 0;
1940}
1941
1942static int is_image_stream(fz_context *ctx, pdf_obj *obj)
1943{
1944 pdf_obj *o;
1945 if ((o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(XObject))))
1946 if ((o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(Image))))
1947 return 1;
1948 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Filter)), filter_implies_image(ctx, o))
1949 return 1;
1950 if (pdf_dict_get(ctx, obj, PDF_NAME(Width)) != NULL && pdf_dict_get(ctx, obj, PDF_NAME(Height)) != NULL)
1951 return 1;
1952 return 0;
1953}
1954
1955static int is_font_stream(fz_context *ctx, pdf_obj *obj)
1956{
1957 pdf_obj *o;
1958 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(Font)))
1959 return 1;
1960 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(FontDescriptor)))
1961 return 1;
1962 if (pdf_dict_get(ctx, obj, PDF_NAME(Length1)) != NULL)
1963 return 1;
1964 if (pdf_dict_get(ctx, obj, PDF_NAME(Length2)) != NULL)
1965 return 1;
1966 if (pdf_dict_get(ctx, obj, PDF_NAME(Length3)) != NULL)
1967 return 1;
1968 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(Type1C)))
1969 return 1;
1970 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(CIDFontType0C)))
1971 return 1;
1972 return 0;
1973}
1974
1975static int is_jpx_stream(fz_context *ctx, pdf_obj *obj)
1976{
1977 pdf_obj *o;
1978 if (o = pdf_dict_get(ctx, obj, PDF_NAME(Filter)), is_jpx_filter(ctx, o))
1979 return 1;
1980 return 0;
1981}
1982
1983
1984static int is_xml_metadata(fz_context *ctx, pdf_obj *obj)
1985{
1986 if (pdf_name_eq(ctx, pdf_dict_get(ctx, obj, PDF_NAME(Type)), PDF_NAME(Metadata)))
1987 if (pdf_name_eq(ctx, pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), PDF_NAME(XML)))
1988 return 1;
1989 return 0;
1990}
1991
1992static void writeobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int num, int gen, int skip_xrefs, int unenc)
1993{
1994 pdf_obj *obj = NULL;
1995 fz_buffer *buf = NULL;
1996 int do_deflate = 0;
1997 int do_expand = 0;
1998 int skip = 0;
1999
2000 fz_var(obj);
2001 fz_var(buf);
2002
2003 if (opts->do_encrypt == PDF_ENCRYPT_NONE)
2004 unenc = 1;
2005
2006 fz_try(ctx)
2007 {
2008 obj = pdf_load_object(ctx, doc, num);
2009
2010 /* skip ObjStm and XRef objects */
2011 if (pdf_is_dict(ctx, obj))
2012 {
2013 pdf_obj *type = pdf_dict_get(ctx, obj, PDF_NAME(Type));
2014 if (type == PDF_NAME(ObjStm))
2015 {
2016 opts->use_list[num] = 0;
2017 skip = 1;
2018 }
2019 if (skip_xrefs && type == PDF_NAME(XRef))
2020 {
2021 opts->use_list[num] = 0;
2022 skip = 1;
2023 }
2024 }
2025
2026 if (!skip)
2027 {
2028 if (pdf_obj_num_is_stream(ctx, doc, num))
2029 {
2030 do_deflate = opts->do_compress;
2031 do_expand = opts->do_expand;
2032 if (opts->do_compress_images && is_image_stream(ctx, obj))
2033 do_deflate = 1, do_expand = 0;
2034 if (opts->do_compress_fonts && is_font_stream(ctx, obj))
2035 do_deflate = 1, do_expand = 0;
2036 if (is_xml_metadata(ctx, obj))
2037 do_deflate = 0, do_expand = 0;
2038 if (is_jpx_stream(ctx, obj))
2039 do_deflate = 0, do_expand = 0;
2040
2041 if (do_expand)
2042 expandstream(ctx, doc, opts, obj, num, gen, do_deflate, unenc);
2043 else
2044 copystream(ctx, doc, opts, obj, num, gen, do_deflate, unenc);
2045 }
2046 else
2047 {
2048 fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
2049 pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, unenc ? NULL : opts->crypt, num, gen);
2050 fz_write_string(ctx, opts->out, "\nendobj\n\n");
2051 }
2052 }
2053 }
2054 fz_always(ctx)
2055 {
2056 fz_drop_buffer(ctx, buf);
2057 pdf_drop_obj(ctx, obj);
2058 }
2059 fz_catch(ctx)
2060 {
2061 fz_rethrow(ctx);
2062 }
2063}
2064
2065static void writexrefsubsect(fz_context *ctx, pdf_write_state *opts, int from, int to)
2066{
2067 int num;
2068
2069 fz_write_printf(ctx, opts->out, "%d %d\n", from, to - from);
2070 for (num = from; num < to; num++)
2071 {
2072 if (opts->use_list[num])
2073 fz_write_printf(ctx, opts->out, "%010lu %05d n \n", opts->ofs_list[num], opts->gen_list[num]);
2074 else
2075 fz_write_printf(ctx, opts->out, "%010lu %05d f \n", opts->ofs_list[num], opts->gen_list[num]);
2076 }
2077}
2078
2079static void writexref(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int from, int to, int first, int64_t main_xref_offset, int64_t startxref)
2080{
2081 pdf_obj *trailer = NULL;
2082 pdf_obj *obj;
2083 pdf_obj *nobj = NULL;
2084
2085 fz_write_string(ctx, opts->out, "xref\n");
2086 opts->first_xref_entry_offset = fz_tell_output(ctx, opts->out);
2087
2088 if (opts->do_incremental)
2089 {
2090 int subfrom = from;
2091 int subto;
2092
2093 while (subfrom < to)
2094 {
2095 while (subfrom < to && !pdf_xref_is_incremental(ctx, doc, subfrom))
2096 subfrom++;
2097
2098 subto = subfrom;
2099 while (subto < to && pdf_xref_is_incremental(ctx, doc, subto))
2100 subto++;
2101
2102 if (subfrom < subto)
2103 writexrefsubsect(ctx, opts, subfrom, subto);
2104
2105 subfrom = subto;
2106 }
2107 }
2108 else
2109 {
2110 writexrefsubsect(ctx, opts, from, to);
2111 }
2112
2113 fz_write_string(ctx, opts->out, "\n");
2114
2115 fz_var(trailer);
2116
2117 if (opts->do_incremental)
2118 {
2119 trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
2120 pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), pdf_xref_len(ctx, doc));
2121 pdf_dict_put_int(ctx, trailer, PDF_NAME(Prev), doc->startxref);
2122 doc->startxref = startxref;
2123 }
2124 else
2125 {
2126 trailer = pdf_new_dict(ctx, doc, 5);
2127
2128 nobj = pdf_new_int(ctx, to);
2129 pdf_dict_put_drop(ctx, trailer, PDF_NAME(Size), nobj);
2130
2131 if (first)
2132 {
2133 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
2134 if (obj)
2135 pdf_dict_put(ctx, trailer, PDF_NAME(Info), obj);
2136
2137 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
2138 if (obj)
2139 pdf_dict_put(ctx, trailer, PDF_NAME(Root), obj);
2140
2141 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
2142 if (obj)
2143 pdf_dict_put(ctx, trailer, PDF_NAME(ID), obj);
2144
2145 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
2146 if (obj)
2147 pdf_dict_put(ctx, trailer, PDF_NAME(Encrypt), obj);
2148 }
2149 if (main_xref_offset != 0)
2150 {
2151 nobj = pdf_new_int(ctx, main_xref_offset);
2152 pdf_dict_put_drop(ctx, trailer, PDF_NAME(Prev), nobj);
2153 }
2154 }
2155
2156 fz_write_string(ctx, opts->out, "trailer\n");
2157 /* Trailer is NOT encrypted */
2158 pdf_print_obj(ctx, opts->out, trailer, opts->do_tight, opts->do_ascii);
2159 fz_write_string(ctx, opts->out, "\n");
2160
2161 pdf_drop_obj(ctx, trailer);
2162
2163 fz_write_printf(ctx, opts->out, "startxref\n%lu\n%%%%EOF\n", startxref);
2164
2165 doc->has_xref_streams = 0;
2166}
2167
2168static void writexrefstreamsubsect(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_obj *index, fz_buffer *fzbuf, int from, int to)
2169{
2170 int num;
2171
2172 pdf_array_push_int(ctx, index, from);
2173 pdf_array_push_int(ctx, index, to - from);
2174 for (num = from; num < to; num++)
2175 {
2176 fz_append_byte(ctx, fzbuf, opts->use_list[num] ? 1 : 0);
2177 fz_append_byte(ctx, fzbuf, opts->ofs_list[num]>>24);
2178 fz_append_byte(ctx, fzbuf, opts->ofs_list[num]>>16);
2179 fz_append_byte(ctx, fzbuf, opts->ofs_list[num]>>8);
2180 fz_append_byte(ctx, fzbuf, opts->ofs_list[num]);
2181 fz_append_byte(ctx, fzbuf, opts->gen_list[num]);
2182 }
2183}
2184
2185static void writexrefstream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int from, int to, int first, int64_t main_xref_offset, int64_t startxref)
2186{
2187 int num;
2188 pdf_obj *dict = NULL;
2189 pdf_obj *obj;
2190 pdf_obj *w = NULL;
2191 pdf_obj *index;
2192 fz_buffer *fzbuf = NULL;
2193
2194 fz_var(dict);
2195 fz_var(w);
2196 fz_var(fzbuf);
2197 fz_try(ctx)
2198 {
2199 num = pdf_create_object(ctx, doc);
2200 dict = pdf_new_dict(ctx, doc, 6);
2201 pdf_update_object(ctx, doc, num, dict);
2202
2203 opts->first_xref_entry_offset = fz_tell_output(ctx, opts->out);
2204
2205 to++;
2206
2207 if (first)
2208 {
2209 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
2210 if (obj)
2211 pdf_dict_put(ctx, dict, PDF_NAME(Info), obj);
2212
2213 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
2214 if (obj)
2215 pdf_dict_put(ctx, dict, PDF_NAME(Root), obj);
2216
2217 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
2218 if (obj)
2219 pdf_dict_put(ctx, dict, PDF_NAME(ID), obj);
2220
2221 if (opts->do_incremental)
2222 {
2223 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
2224 if (obj)
2225 pdf_dict_put(ctx, dict, PDF_NAME(Encrypt), obj);
2226 }
2227 }
2228
2229 pdf_dict_put_int(ctx, dict, PDF_NAME(Size), to);
2230
2231 if (opts->do_incremental)
2232 {
2233 pdf_dict_put_int(ctx, dict, PDF_NAME(Prev), doc->startxref);
2234 doc->startxref = startxref;
2235 }
2236 else
2237 {
2238 if (main_xref_offset != 0)
2239 pdf_dict_put_int(ctx, dict, PDF_NAME(Prev), main_xref_offset);
2240 }
2241
2242 pdf_dict_put(ctx, dict, PDF_NAME(Type), PDF_NAME(XRef));
2243
2244 w = pdf_new_array(ctx, doc, 3);
2245 pdf_dict_put(ctx, dict, PDF_NAME(W), w);
2246 pdf_array_push_int(ctx, w, 1);
2247 pdf_array_push_int(ctx, w, 4);
2248 pdf_array_push_int(ctx, w, 1);
2249
2250 index = pdf_new_array(ctx, doc, 2);
2251 pdf_dict_put_drop(ctx, dict, PDF_NAME(Index), index);
2252
2253 /* opts->gen_list[num] is already initialized by fz_calloc. */
2254 opts->use_list[num] = 1;
2255 opts->ofs_list[num] = opts->first_xref_entry_offset;
2256
2257 fzbuf = fz_new_buffer(ctx, (1 + 4 + 1) * (to-from));
2258
2259 if (opts->do_incremental)
2260 {
2261 int subfrom = from;
2262 int subto;
2263
2264 while (subfrom < to)
2265 {
2266 while (subfrom < to && !pdf_xref_is_incremental(ctx, doc, subfrom))
2267 subfrom++;
2268
2269 subto = subfrom;
2270 while (subto < to && pdf_xref_is_incremental(ctx, doc, subto))
2271 subto++;
2272
2273 if (subfrom < subto)
2274 writexrefstreamsubsect(ctx, doc, opts, index, fzbuf, subfrom, subto);
2275
2276 subfrom = subto;
2277 }
2278 }
2279 else
2280 {
2281 writexrefstreamsubsect(ctx, doc, opts, index, fzbuf, from, to);
2282 }
2283
2284 pdf_update_stream(ctx, doc, dict, fzbuf, 0);
2285
2286 writeobject(ctx, doc, opts, num, 0, 0, 1);
2287 fz_write_printf(ctx, opts->out, "startxref\n%lu\n%%%%EOF\n", startxref);
2288 }
2289 fz_always(ctx)
2290 {
2291 pdf_drop_obj(ctx, dict);
2292 pdf_drop_obj(ctx, w);
2293 fz_drop_buffer(ctx, fzbuf);
2294 }
2295 fz_catch(ctx)
2296 {
2297 fz_rethrow(ctx);
2298 }
2299
2300 doc->has_old_style_xrefs = 0;
2301}
2302
2303static void
2304padto(fz_context *ctx, fz_output *out, int64_t target)
2305{
2306 int64_t pos = fz_tell_output(ctx, out);
2307
2308 assert(pos <= target);
2309 while (pos < target)
2310 {
2311 fz_write_byte(ctx, out, '\n');
2312 pos++;
2313 }
2314}
2315
2316static void
2317dowriteobject(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int num, int pass)
2318{
2319 pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, num);
2320 if (entry->type == 'f')
2321 opts->gen_list[num] = entry->gen;
2322 if (entry->type == 'n')
2323 opts->gen_list[num] = entry->gen;
2324 if (entry->type == 'o')
2325 opts->gen_list[num] = 0;
2326
2327 /* If we are renumbering, then make sure all generation numbers are
2328 * zero (except object 0 which must be free, and have a gen number of
2329 * 65535). Changing the generation numbers (and indeed object numbers)
2330 * will break encryption - so only do this if we are renumbering
2331 * anyway. */
2332 if (opts->do_garbage >= 2)
2333 opts->gen_list[num] = (num == 0 ? 65535 : 0);
2334
2335 if (opts->do_garbage && !opts->use_list[num])
2336 return;
2337
2338 if (entry->type == 'n' || entry->type == 'o')
2339 {
2340 if (pass > 0)
2341 padto(ctx, opts->out, opts->ofs_list[num]);
2342 if (!opts->do_incremental || pdf_xref_is_incremental(ctx, doc, num))
2343 {
2344 opts->ofs_list[num] = fz_tell_output(ctx, opts->out);
2345 writeobject(ctx, doc, opts, num, opts->gen_list[num], 1, num == opts->crypt_object_number);
2346 }
2347 }
2348 else
2349 opts->use_list[num] = 0;
2350}
2351
2352static void
2353writeobjects(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, int pass)
2354{
2355 int num;
2356 int xref_len = pdf_xref_len(ctx, doc);
2357
2358 if (!opts->do_incremental)
2359 {
2360 fz_write_printf(ctx, opts->out, "%%PDF-%d.%d\n", doc->version / 10, doc->version % 10);
2361 fz_write_string(ctx, opts->out, "%\xC2\xB5\xC2\xB6\n\n");
2362 }
2363
2364 dowriteobject(ctx, doc, opts, opts->start, pass);
2365
2366 if (opts->do_linear)
2367 {
2368 /* Write first xref */
2369 if (pass == 0)
2370 opts->first_xref_offset = fz_tell_output(ctx, opts->out);
2371 else
2372 padto(ctx, opts->out, opts->first_xref_offset);
2373 writexref(ctx, doc, opts, opts->start, pdf_xref_len(ctx, doc), 1, opts->main_xref_offset, 0);
2374 }
2375
2376 for (num = opts->start+1; num < xref_len; num++)
2377 dowriteobject(ctx, doc, opts, num, pass);
2378 if (opts->do_linear && pass == 1)
2379 {
2380 int64_t offset = (opts->start == 1 ? opts->main_xref_offset : opts->ofs_list[1] + opts->hintstream_len);
2381 padto(ctx, opts->out, offset);
2382 }
2383 for (num = 1; num < opts->start; num++)
2384 {
2385 if (pass == 1)
2386 opts->ofs_list[num] += opts->hintstream_len;
2387 dowriteobject(ctx, doc, opts, num, pass);
2388 }
2389}
2390
2391static int
2392my_log2(int x)
2393{
2394 int i = 0;
2395
2396 if (x <= 0)
2397 return 0;
2398
2399 while ((1<<i) <= x && (1<<i) > 0)
2400 i++;
2401
2402 if ((1<<i) <= 0)
2403 return 0;
2404
2405 return i;
2406}
2407
2408static void
2409make_page_offset_hints(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, fz_buffer *buf)
2410{
2411 int i, j;
2412 int min_objs_per_page, max_objs_per_page;
2413 int min_page_length, max_page_length;
2414 int objs_per_page_bits;
2415 int min_shared_object, max_shared_object;
2416 int max_shared_object_refs = 0;
2417 int min_shared_length, max_shared_length;
2418 page_objects **pop = &opts->page_object_lists->page[0];
2419 int page_len_bits, shared_object_bits, shared_object_id_bits;
2420 int shared_length_bits;
2421 int xref_len = pdf_xref_len(ctx, doc);
2422
2423 min_shared_object = pdf_xref_len(ctx, doc);
2424 max_shared_object = 1;
2425 min_shared_length = opts->file_len;
2426 max_shared_length = 0;
2427 for (i=1; i < xref_len; i++)
2428 {
2429 int min, max, page;
2430
2431 min = opts->ofs_list[i];
2432 if (i == opts->start-1 || (opts->start == 1 && i == xref_len-1))
2433 max = opts->main_xref_offset;
2434 else if (i == xref_len-1)
2435 max = opts->ofs_list[1];
2436 else
2437 max = opts->ofs_list[i+1];
2438
2439 assert(max > min);
2440
2441 if (opts->use_list[i] & USE_SHARED)
2442 {
2443 page = -1;
2444 if (i < min_shared_object)
2445 min_shared_object = i;
2446 if (i > max_shared_object)
2447 max_shared_object = i;
2448 if (min_shared_length > max - min)
2449 min_shared_length = max - min;
2450 if (max_shared_length < max - min)
2451 max_shared_length = max - min;
2452 }
2453 else if (opts->use_list[i] & (USE_CATALOGUE | USE_HINTS | USE_PARAMS))
2454 page = -1;
2455 else if (opts->use_list[i] & USE_PAGE1)
2456 {
2457 page = 0;
2458 if (min_shared_length > max - min)
2459 min_shared_length = max - min;
2460 if (max_shared_length < max - min)
2461 max_shared_length = max - min;
2462 }
2463 else if (opts->use_list[i] == 0)
2464 page = -1;
2465 else
2466 page = opts->use_list[i]>>USE_PAGE_SHIFT;
2467
2468 if (page >= 0)
2469 {
2470 pop[page]->num_objects++;
2471 if (pop[page]->min_ofs > min)
2472 pop[page]->min_ofs = min;
2473 if (pop[page]->max_ofs < max)
2474 pop[page]->max_ofs = max;
2475 }
2476 }
2477
2478 min_objs_per_page = max_objs_per_page = pop[0]->num_objects;
2479 min_page_length = max_page_length = pop[0]->max_ofs - pop[0]->min_ofs;
2480 for (i=1; i < opts->page_count; i++)
2481 {
2482 int tmp;
2483 if (min_objs_per_page > pop[i]->num_objects)
2484 min_objs_per_page = pop[i]->num_objects;
2485 if (max_objs_per_page < pop[i]->num_objects)
2486 max_objs_per_page = pop[i]->num_objects;
2487 tmp = pop[i]->max_ofs - pop[i]->min_ofs;
2488 if (tmp < min_page_length)
2489 min_page_length = tmp;
2490 if (tmp > max_page_length)
2491 max_page_length = tmp;
2492 }
2493
2494 for (i=0; i < opts->page_count; i++)
2495 {
2496 int count = 0;
2497 page_objects *po = opts->page_object_lists->page[i];
2498 for (j = 0; j < po->len; j++)
2499 {
2500 if (i == 0 && opts->use_list[po->object[j]] & USE_PAGE1)
2501 count++;
2502 else if (i != 0 && opts->use_list[po->object[j]] & USE_SHARED)
2503 count++;
2504 }
2505 po->num_shared = count;
2506 if (i == 0 || count > max_shared_object_refs)
2507 max_shared_object_refs = count;
2508 }
2509 if (min_shared_object > max_shared_object)
2510 min_shared_object = max_shared_object = 0;
2511
2512 /* Table F.3 - Header */
2513 /* Header Item 1: Least number of objects in a page */
2514 fz_append_bits(ctx, buf, min_objs_per_page, 32);
2515 /* Header Item 2: Location of first pages page object */
2516 fz_append_bits(ctx, buf, opts->ofs_list[pop[0]->page_object_number], 32);
2517 /* Header Item 3: Number of bits required to represent the difference
2518 * between the greatest and least number of objects in a page. */
2519 objs_per_page_bits = my_log2(max_objs_per_page - min_objs_per_page);
2520 fz_append_bits(ctx, buf, objs_per_page_bits, 16);
2521 /* Header Item 4: Least length of a page. */
2522 fz_append_bits(ctx, buf, min_page_length, 32);
2523 /* Header Item 5: Number of bits needed to represent the difference
2524 * between the greatest and least length of a page. */
2525 page_len_bits = my_log2(max_page_length - min_page_length);
2526 fz_append_bits(ctx, buf, page_len_bits, 16);
2527 /* Header Item 6: Least offset to start of content stream (Acrobat
2528 * sets this to always be 0) */
2529 fz_append_bits(ctx, buf, 0, 32);
2530 /* Header Item 7: Number of bits needed to represent the difference
2531 * between the greatest and least offset to content stream (Acrobat
2532 * sets this to always be 0) */
2533 fz_append_bits(ctx, buf, 0, 16);
2534 /* Header Item 8: Least content stream length. (Acrobat
2535 * sets this to always be 0) */
2536 fz_append_bits(ctx, buf, 0, 32);
2537 /* Header Item 9: Number of bits needed to represent the difference
2538 * between the greatest and least content stream length (Acrobat
2539 * sets this to always be the same as item 5) */
2540 fz_append_bits(ctx, buf, page_len_bits, 16);
2541 /* Header Item 10: Number of bits needed to represent the greatest
2542 * number of shared object references. */
2543 shared_object_bits = my_log2(max_shared_object_refs);
2544 fz_append_bits(ctx, buf, shared_object_bits, 16);
2545 /* Header Item 11: Number of bits needed to represent the greatest
2546 * shared object identifier. */
2547 shared_object_id_bits = my_log2(max_shared_object - min_shared_object + pop[0]->num_shared);
2548 fz_append_bits(ctx, buf, shared_object_id_bits, 16);
2549 /* Header Item 12: Number of bits needed to represent the numerator
2550 * of the fractions. We always send 0. */
2551 fz_append_bits(ctx, buf, 0, 16);
2552 /* Header Item 13: Number of bits needed to represent the denominator
2553 * of the fractions. We always send 0. */
2554 fz_append_bits(ctx, buf, 0, 16);
2555
2556 /* Table F.4 - Page offset hint table (per page) */
2557 /* Item 1: A number that, when added to the least number of objects
2558 * on a page, gives the number of objects in the page. */
2559 for (i = 0; i < opts->page_count; i++)
2560 {
2561 fz_append_bits(ctx, buf, pop[i]->num_objects - min_objs_per_page, objs_per_page_bits);
2562 }
2563 fz_append_bits_pad(ctx, buf);
2564 /* Item 2: A number that, when added to the least page length, gives
2565 * the length of the page in bytes. */
2566 for (i = 0; i < opts->page_count; i++)
2567 {
2568 fz_append_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits);
2569 }
2570 fz_append_bits_pad(ctx, buf);
2571 /* Item 3: The number of shared objects referenced from the page. */
2572 for (i = 0; i < opts->page_count; i++)
2573 {
2574 fz_append_bits(ctx, buf, pop[i]->num_shared, shared_object_bits);
2575 }
2576 fz_append_bits_pad(ctx, buf);
2577 /* Item 4: Shared object id for each shared object ref in every page.
2578 * Spec says "not for page 1", but acrobat does send page 1's - all
2579 * as zeros. */
2580 for (i = 0; i < opts->page_count; i++)
2581 {
2582 for (j = 0; j < pop[i]->len; j++)
2583 {
2584 int o = pop[i]->object[j];
2585 if (i == 0 && opts->use_list[o] & USE_PAGE1)
2586 fz_append_bits(ctx, buf, 0 /* o - pop[0]->page_object_number */, shared_object_id_bits);
2587 if (i != 0 && opts->use_list[o] & USE_SHARED)
2588 fz_append_bits(ctx, buf, o - min_shared_object + pop[0]->num_shared, shared_object_id_bits);
2589 }
2590 }
2591 fz_append_bits_pad(ctx, buf);
2592 /* Item 5: Numerator of fractional position for each shared object reference. */
2593 /* We always send 0 in 0 bits */
2594 /* Item 6: A number that, when added to the least offset to the start
2595 * of the content stream (F.3 Item 6), gives the offset in bytes of
2596 * start of the pages content stream object relative to the beginning
2597 * of the page. Always 0 in 0 bits. */
2598 /* Item 7: A number that, when added to the least content stream length
2599 * (F.3 Item 8), gives the length of the pages content stream object.
2600 * Always == Item 2 as least content stream length = least page stream
2601 * length.
2602 */
2603 for (i = 0; i < opts->page_count; i++)
2604 {
2605 fz_append_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits);
2606 }
2607
2608 /* Pad, and then do shared object hint table */
2609 fz_append_bits_pad(ctx, buf);
2610 opts->hints_shared_offset = (int)fz_buffer_storage(ctx, buf, NULL);
2611
2612 /* Table F.5: */
2613 /* Header Item 1: Object number of the first object in the shared
2614 * objects section. */
2615 fz_append_bits(ctx, buf, min_shared_object, 32);
2616 /* Header Item 2: Location of first object in the shared objects
2617 * section. */
2618 fz_append_bits(ctx, buf, opts->ofs_list[min_shared_object], 32);
2619 /* Header Item 3: The number of shared object entries for the first
2620 * page. */
2621 fz_append_bits(ctx, buf, pop[0]->num_shared, 32);
2622 /* Header Item 4: The number of shared object entries for the shared
2623 * objects section + first page. */
2624 fz_append_bits(ctx, buf, max_shared_object - min_shared_object + pop[0]->num_shared, 32);
2625 /* Header Item 5: The number of bits needed to represent the greatest
2626 * number of objects in a shared object group (Always 0). */
2627 fz_append_bits(ctx, buf, 0, 16);
2628 /* Header Item 6: The least length of a shared object group in bytes. */
2629 fz_append_bits(ctx, buf, min_shared_length, 32);
2630 /* Header Item 7: The number of bits required to represent the
2631 * difference between the greatest and least length of a shared object
2632 * group. */
2633 shared_length_bits = my_log2(max_shared_length - min_shared_length);
2634 fz_append_bits(ctx, buf, shared_length_bits, 16);
2635
2636 /* Table F.6 */
2637 /* Item 1: Shared object group length (page 1 objects) */
2638 for (j = 0; j < pop[0]->len; j++)
2639 {
2640 int o = pop[0]->object[j];
2641 int64_t min, max;
2642 min = opts->ofs_list[o];
2643 if (o == opts->start-1)
2644 max = opts->main_xref_offset;
2645 else if (o < xref_len-1)
2646 max = opts->ofs_list[o+1];
2647 else
2648 max = opts->ofs_list[1];
2649 if (opts->use_list[o] & USE_PAGE1)
2650 fz_append_bits(ctx, buf, max - min - min_shared_length, shared_length_bits);
2651 }
2652 /* Item 1: Shared object group length (shared objects) */
2653 for (i = min_shared_object; i <= max_shared_object; i++)
2654 {
2655 int min, max;
2656 min = opts->ofs_list[i];
2657 if (i == opts->start-1)
2658 max = opts->main_xref_offset;
2659 else if (i < xref_len-1)
2660 max = opts->ofs_list[i+1];
2661 else
2662 max = opts->ofs_list[1];
2663 fz_append_bits(ctx, buf, max - min - min_shared_length, shared_length_bits);
2664 }
2665 fz_append_bits_pad(ctx, buf);
2666
2667 /* Item 2: MD5 presence flags */
2668 for (i = max_shared_object - min_shared_object + pop[0]->num_shared; i > 0; i--)
2669 {
2670 fz_append_bits(ctx, buf, 0, 1);
2671 }
2672 fz_append_bits_pad(ctx, buf);
2673 /* Item 3: MD5 sums (not present) */
2674 fz_append_bits_pad(ctx, buf);
2675 /* Item 4: Number of objects in the group (not present) */
2676}
2677
2678static void
2679make_hint_stream(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
2680{
2681 fz_buffer *buf;
2682 pdf_obj *obj = NULL;
2683
2684 fz_var(obj);
2685
2686 buf = fz_new_buffer(ctx, 100);
2687 fz_try(ctx)
2688 {
2689 make_page_offset_hints(ctx, doc, opts, buf);
2690 obj = pdf_load_object(ctx, doc, pdf_xref_len(ctx, doc)-1);
2691 pdf_update_stream(ctx, doc, obj, buf, 0);
2692 opts->hintstream_len = (int)fz_buffer_storage(ctx, buf, NULL);
2693 }
2694 fz_always(ctx)
2695 {
2696 pdf_drop_obj(ctx, obj);
2697 fz_drop_buffer(ctx, buf);
2698 }
2699 fz_catch(ctx)
2700 fz_rethrow(ctx);
2701}
2702
2703#ifdef DEBUG_WRITING
2704static void dump_object_details(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
2705{
2706 int i;
2707
2708 for (i = 0; i < pdf_xref_len(ctx, doc); i++)
2709 {
2710 fprintf(stderr, "%d@%d: use=%d\n", i, opts->ofs_list[i], opts->use_list[i]);
2711 }
2712}
2713#endif
2714
2715static void presize_unsaved_signature_byteranges(fz_context *ctx, pdf_document *doc)
2716{
2717 int s;
2718
2719 for (s = 0; s < doc->num_incremental_sections; s++)
2720 {
2721 pdf_xref *xref = &doc->xref_sections[s];
2722
2723 if (xref->unsaved_sigs)
2724 {
2725 /* The ByteRange objects of signatures are initially written out with
2726 * dummy values, and then overwritten later. We need to make sure their
2727 * initial form at least takes enough sufficient file space */
2728 pdf_unsaved_sig *usig;
2729 int n = 0;
2730
2731 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2732 n++;
2733
2734 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2735 {
2736 /* There will be segments of bytes at the beginning, at
2737 * the end and between each consecutive pair of signatures,
2738 * hence n + 1 */
2739 int i;
2740 pdf_obj *byte_range = pdf_dict_getl(ctx, usig->field, PDF_NAME(V), PDF_NAME(ByteRange), NULL);
2741
2742 for (i = 0; i < n+1; i++)
2743 {
2744 pdf_array_push_int(ctx, byte_range, INT_MAX);
2745 pdf_array_push_int(ctx, byte_range, INT_MAX);
2746 }
2747 }
2748 }
2749 }
2750}
2751
2752static void complete_signatures(fz_context *ctx, pdf_document *doc, pdf_write_state *opts)
2753{
2754 pdf_unsaved_sig *usig;
2755 char *buf = NULL, *ptr;
2756 int buf_size;
2757 int s;
2758 int i;
2759 int last_end;
2760 fz_stream *stm = NULL;
2761 fz_var(stm);
2762 fz_var(buf);
2763
2764 fz_try(ctx)
2765 {
2766 for (s = 0; s < doc->num_incremental_sections; s++)
2767 {
2768 pdf_xref *xref = &doc->xref_sections[doc->num_incremental_sections - s - 1];
2769
2770 if (xref->unsaved_sigs)
2771 {
2772 pdf_obj *byte_range;
2773 buf_size = 0;
2774
2775 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2776 {
2777 int size = usig->signer->max_digest_size(usig->signer);
2778
2779 buf_size = fz_maxi(buf_size, size);
2780 }
2781
2782 buf_size = buf_size * 2 + SIG_EXTRAS_SIZE;
2783
2784 buf = fz_calloc(ctx, buf_size, 1);
2785
2786 stm = fz_stream_from_output(ctx, opts->out);
2787 /* Locate the byte ranges and contents in the saved file */
2788 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2789 {
2790 char *bstr, *cstr, *fstr;
2791 int bytes_read;
2792 int pnum = pdf_obj_parent_num(ctx, pdf_dict_getl(ctx, usig->field, PDF_NAME(V), PDF_NAME(ByteRange), NULL));
2793 fz_seek(ctx, stm, opts->ofs_list[pnum], SEEK_SET);
2794 /* SIG_EXTRAS_SIZE is an arbitrary value and its addition above to buf_size
2795 * could cause an attempt to read off the end of the file. That's not an
2796 * error, but we need to keep track of how many bytes are read and search
2797 * for markers only in defined data */
2798 bytes_read = fz_read(ctx, stm, (unsigned char *)buf, buf_size);
2799 assert(bytes_read <= buf_size);
2800
2801 bstr = fz_memmem(buf, bytes_read, SLASH_BYTE_RANGE, sizeof(SLASH_BYTE_RANGE)-1);
2802 cstr = fz_memmem(buf, bytes_read, SLASH_CONTENTS, sizeof(SLASH_CONTENTS)-1);
2803 fstr = fz_memmem(buf, bytes_read, SLASH_FILTER, sizeof(SLASH_FILTER)-1);
2804
2805 if (!(bstr && cstr && fstr && bstr < cstr && cstr < fstr))
2806 fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to determine byte ranges while writing signature");
2807
2808 usig->byte_range_start = bstr - buf + sizeof(SLASH_BYTE_RANGE)-1 + opts->ofs_list[pnum];
2809 usig->byte_range_end = cstr - buf + opts->ofs_list[pnum];
2810 usig->contents_start = cstr - buf + sizeof(SLASH_CONTENTS)-1 + opts->ofs_list[pnum];
2811 usig->contents_end = fstr - buf + opts->ofs_list[pnum];
2812 }
2813
2814 fz_drop_stream(ctx, stm);
2815 stm = NULL;
2816
2817 /* Recreate ByteRange with correct values. Initially store the
2818 * recreated object in the first of the unsaved signatures */
2819 byte_range = pdf_new_array(ctx, doc, 4);
2820 pdf_dict_putl_drop(ctx, xref->unsaved_sigs->field, byte_range, PDF_NAME(V), PDF_NAME(ByteRange), NULL);
2821
2822 last_end = 0;
2823 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2824 {
2825 pdf_array_push_int(ctx, byte_range, last_end);
2826 pdf_array_push_int(ctx, byte_range, usig->contents_start - last_end);
2827 last_end = usig->contents_end;
2828 }
2829 pdf_array_push_int(ctx, byte_range, last_end);
2830 pdf_array_push_int(ctx, byte_range, xref->end_ofs - last_end);
2831
2832 /* Copy the new ByteRange to the other unsaved signatures */
2833 for (usig = xref->unsaved_sigs->next; usig; usig = usig->next)
2834 pdf_dict_putl_drop(ctx, usig->field, pdf_copy_array(ctx, byte_range), PDF_NAME(V), PDF_NAME(ByteRange), NULL);
2835
2836 /* Write the byte range into buf, padding with spaces*/
2837 ptr = pdf_sprint_obj(ctx, buf, buf_size, &i, byte_range, 1, 0);
2838 if (ptr != buf) /* should never happen, since data should fit in buf_size */
2839 fz_free(ctx, ptr);
2840 memset(buf+i, ' ', buf_size-i);
2841
2842 /* Write the byte range to the file */
2843 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2844 {
2845 fz_seek_output(ctx, opts->out, usig->byte_range_start, SEEK_SET);
2846 fz_write_data(ctx, opts->out, buf, usig->byte_range_end - usig->byte_range_start);
2847 }
2848
2849 /* Write the digests into the file */
2850 for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2851 pdf_write_digest(ctx, opts->out, byte_range, usig->contents_start, usig->contents_end - usig->contents_start, usig->signer);
2852
2853 /* delete the unsaved_sigs records */
2854 while ((usig = xref->unsaved_sigs) != NULL)
2855 {
2856 xref->unsaved_sigs = usig->next;
2857 pdf_drop_obj(ctx, usig->field);
2858 usig->signer->drop(usig->signer);
2859 fz_free(ctx, usig);
2860 }
2861
2862 xref->unsaved_sigs_end = NULL;
2863
2864 fz_free(ctx, buf);
2865 buf = NULL;
2866 }
2867 }
2868 }
2869 fz_catch(ctx)
2870 {
2871 fz_drop_stream(ctx, stm);
2872 fz_free(ctx, buf);
2873 fz_rethrow(ctx);
2874 }
2875}
2876
2877static void clean_content_streams(fz_context *ctx, pdf_document *doc, int sanitize, int ascii)
2878{
2879 int n = pdf_count_pages(ctx, doc);
2880 int i;
2881
2882 for (i = 0; i < n; i++)
2883 {
2884 pdf_annot *annot;
2885 pdf_page *page = pdf_load_page(ctx, doc, i);
2886
2887 fz_try(ctx)
2888 {
2889 pdf_clean_page_contents(ctx, doc, page, NULL, NULL, NULL, sanitize, ascii);
2890
2891 for (annot = pdf_first_annot(ctx, page); annot != NULL; annot = pdf_next_annot(ctx, annot))
2892 {
2893 pdf_clean_annot_contents(ctx, doc, annot, NULL, NULL, NULL, sanitize, ascii);
2894 }
2895 }
2896 fz_always(ctx)
2897 fz_drop_page(ctx, &page->super);
2898 fz_catch(ctx)
2899 fz_rethrow(ctx);
2900 }
2901}
2902
2903/* Initialise the pdf_write_state, used dynamically during the write, from the static
2904 * pdf_write_options, passed into pdf_save_document */
2905static void initialise_write_state(fz_context *ctx, pdf_document *doc, const pdf_write_options *in_opts, pdf_write_state *opts)
2906{
2907 int xref_len = pdf_xref_len(ctx, doc);
2908
2909 opts->do_incremental = in_opts->do_incremental;
2910 opts->do_ascii = in_opts->do_ascii;
2911 opts->do_tight = !in_opts->do_pretty;
2912 opts->do_expand = in_opts->do_decompress;
2913 opts->do_compress = in_opts->do_compress;
2914 opts->do_compress_images = in_opts->do_compress_images;
2915 opts->do_compress_fonts = in_opts->do_compress_fonts;
2916
2917 opts->do_garbage = in_opts->do_garbage;
2918 opts->do_linear = in_opts->do_linear;
2919 opts->do_clean = in_opts->do_clean;
2920 opts->do_encrypt = in_opts->do_encrypt;
2921 opts->start = 0;
2922 opts->main_xref_offset = INT_MIN;
2923
2924 opts->permissions = in_opts->permissions;
2925 memcpy(opts->opwd_utf8, in_opts->opwd_utf8, nelem(opts->opwd_utf8));
2926 memcpy(opts->upwd_utf8, in_opts->upwd_utf8, nelem(opts->upwd_utf8));
2927
2928 /* We deliberately make these arrays long enough to cope with
2929 * 1 to n access rather than 0..n-1, and add space for 2 new
2930 * extra entries that may be required for linearization. */
2931 opts->list_len = 0;
2932 opts->use_list = NULL;
2933 opts->ofs_list = NULL;
2934 opts->gen_list = NULL;
2935 opts->renumber_map = NULL;
2936 opts->rev_renumber_map = NULL;
2937
2938 expand_lists(ctx, opts, xref_len);
2939}
2940
2941/* Free the resources held by the dynamic write options */
2942static void finalise_write_state(fz_context *ctx, pdf_write_state *opts)
2943{
2944 fz_free(ctx, opts->use_list);
2945 fz_free(ctx, opts->ofs_list);
2946 fz_free(ctx, opts->gen_list);
2947 fz_free(ctx, opts->renumber_map);
2948 fz_free(ctx, opts->rev_renumber_map);
2949 pdf_drop_obj(ctx, opts->linear_l);
2950 pdf_drop_obj(ctx, opts->linear_h0);
2951 pdf_drop_obj(ctx, opts->linear_h1);
2952 pdf_drop_obj(ctx, opts->linear_o);
2953 pdf_drop_obj(ctx, opts->linear_e);
2954 pdf_drop_obj(ctx, opts->linear_n);
2955 pdf_drop_obj(ctx, opts->linear_t);
2956 pdf_drop_obj(ctx, opts->hints_s);
2957 pdf_drop_obj(ctx, opts->hints_length);
2958 page_objects_list_destroy(ctx, opts->page_object_lists);
2959}
2960
2961const pdf_write_options pdf_default_write_options = {
2962 0, /* do_incremental */
2963 0, /* do_pretty */
2964 0, /* do_ascii */
2965 0, /* do_compress */
2966 0, /* do_compress_images */
2967 0, /* do_compress_fonts */
2968 0, /* do_decompress */
2969 0, /* do_garbage */
2970 0, /* do_linear */
2971 0, /* do_clean */
2972 0, /* do_sanitize */
2973 0, /* do_appearance */
2974 0, /* do_encrypt */
2975 ~0, /* permissions */
2976 "", /* opwd_utf8[128] */
2977 "", /* upwd_utf8[128] */
2978};
2979
2980const char *fz_pdf_write_options_usage =
2981 "PDF output options:\n"
2982 "\tdecompress: decompress all streams (except compress-fonts/images)\n"
2983 "\tcompress: compress all streams\n"
2984 "\tcompress-fonts: compress embedded fonts\n"
2985 "\tcompress-images: compress images\n"
2986 "\tascii: ASCII hex encode binary streams\n"
2987 "\tpretty: pretty-print objects with indentation\n"
2988 "\tlinearize: optimize for web browsers\n"
2989 "\tclean: pretty-print graphics commands in content streams\n"
2990 "\tsanitize: sanitize graphics commands in content streams\n"
2991 "\tgarbage: garbage collect unused objects\n"
2992 "\tincremental: write changes as incremental update\n"
2993 "\tcontinue-on-error: continue saving the document even if there is an error\n"
2994 "\tor garbage=compact: ... and compact cross reference table\n"
2995 "\tor garbage=deduplicate: ... and remove duplicate objects\n"
2996 "\tdecrypt: write unencrypted document\n"
2997 "\tencrypt=rc4-40|rc4-128|aes-128|aes-256: write encrypted document\n"
2998 "\tpermissions=NUMBER: document permissions to grant when encrypting\n"
2999 "\tuser-password=PASSWORD: password required to read document\n"
3000 "\towner-password=PASSWORD: password required to edit document\n"
3001 "\n";
3002
3003/*
3004 Parse option string into a pdf_write_options struct.
3005 Matches the command line options to 'mutool clean':
3006 g: garbage collect
3007 d, i, f: expand all, fonts, images
3008 l: linearize
3009 a: ascii hex encode
3010 z: deflate
3011 c: clean content streams
3012 s: sanitize content streams
3013*/
3014pdf_write_options *
3015pdf_parse_write_options(fz_context *ctx, pdf_write_options *opts, const char *args)
3016{
3017 const char *val;
3018
3019 memset(opts, 0, sizeof *opts);
3020
3021 if (fz_has_option(ctx, args, "decompress", &val))
3022 opts->do_decompress = fz_option_eq(val, "yes");
3023 if (fz_has_option(ctx, args, "compress", &val))
3024 opts->do_compress = fz_option_eq(val, "yes");
3025 if (fz_has_option(ctx, args, "compress-fonts", &val))
3026 opts->do_compress_fonts = fz_option_eq(val, "yes");
3027 if (fz_has_option(ctx, args, "compress-images", &val))
3028 opts->do_compress_images = fz_option_eq(val, "yes");
3029 if (fz_has_option(ctx, args, "ascii", &val))
3030 opts->do_ascii = fz_option_eq(val, "yes");
3031 if (fz_has_option(ctx, args, "pretty", &val))
3032 opts->do_pretty = fz_option_eq(val, "yes");
3033 if (fz_has_option(ctx, args, "linearize", &val))
3034 opts->do_linear = fz_option_eq(val, "yes");
3035 if (fz_has_option(ctx, args, "clean", &val))
3036 opts->do_clean = fz_option_eq(val, "yes");
3037 if (fz_has_option(ctx, args, "sanitize", &val))
3038 opts->do_sanitize = fz_option_eq(val, "yes");
3039 if (fz_has_option(ctx, args, "incremental", &val))
3040 opts->do_incremental = fz_option_eq(val, "yes");
3041 if (fz_has_option(ctx, args, "decrypt", &val))
3042 opts->do_encrypt = fz_option_eq(val, "yes") ? PDF_ENCRYPT_NONE : PDF_ENCRYPT_KEEP;
3043 if (fz_has_option(ctx, args, "encrypt", &val))
3044 {
3045 opts->do_encrypt = PDF_ENCRYPT_UNKNOWN;
3046 if (fz_option_eq(val, "none") || fz_option_eq(val, "no"))
3047 opts->do_encrypt = PDF_ENCRYPT_NONE;
3048 if (fz_option_eq(val, "keep"))
3049 opts->do_encrypt = PDF_ENCRYPT_KEEP;
3050 if (fz_option_eq(val, "rc4-40") || fz_option_eq(val, "yes"))
3051 opts->do_encrypt = PDF_ENCRYPT_RC4_40;
3052 if (fz_option_eq(val, "rc4-128"))
3053 opts->do_encrypt = PDF_ENCRYPT_RC4_128;
3054 if (fz_option_eq(val, "aes-128"))
3055 opts->do_encrypt = PDF_ENCRYPT_AES_128;
3056 if (fz_option_eq(val, "aes-256"))
3057 opts->do_encrypt = PDF_ENCRYPT_AES_256;
3058 }
3059 if (fz_has_option(ctx, args, "owner-password", &val))
3060 fz_copy_option(ctx, val, opts->opwd_utf8, nelem(opts->opwd_utf8));
3061 if (fz_has_option(ctx, args, "user-password", &val))
3062 fz_copy_option(ctx, val, opts->upwd_utf8, nelem(opts->upwd_utf8));
3063 if (fz_has_option(ctx, args, "permissions", &val))
3064 opts->permissions = fz_atoi(val);
3065 else
3066 opts->permissions = ~0;
3067 if (fz_has_option(ctx, args, "garbage", &val))
3068 {
3069 if (fz_option_eq(val, "yes"))
3070 opts->do_garbage = 1;
3071 else if (fz_option_eq(val, "compact"))
3072 opts->do_garbage = 2;
3073 else if (fz_option_eq(val, "deduplicate"))
3074 opts->do_garbage = 3;
3075 else
3076 opts->do_garbage = fz_atoi(val);
3077 }
3078 if (fz_has_option(ctx, args, "appearance", &val))
3079 {
3080 if (fz_option_eq(val, "yes"))
3081 opts->do_appearance = 1;
3082 else if (fz_option_eq(val, "all"))
3083 opts->do_appearance = 2;
3084 }
3085
3086 return opts;
3087}
3088
3089/*
3090 Return true if the document can be saved incrementally. Applying
3091 redactions or having a repaired document make incremental saving
3092 impossible.
3093*/
3094int pdf_can_be_saved_incrementally(fz_context *ctx, pdf_document *doc)
3095{
3096 if (doc->repair_attempted)
3097 return 0;
3098 if (doc->redacted)
3099 return 0;
3100 if (doc->has_xref_streams && doc->has_old_style_xrefs)
3101 return 0;
3102 return 1;
3103}
3104
3105static void
3106prepare_for_save(fz_context *ctx, pdf_document *doc, pdf_write_options *in_opts)
3107{
3108 doc->freeze_updates = 1;
3109
3110 /* Rewrite (and possibly sanitize) the operator streams */
3111 if (in_opts->do_clean || in_opts->do_sanitize)
3112 clean_content_streams(ctx, doc, in_opts->do_sanitize, in_opts->do_ascii);
3113
3114 presize_unsaved_signature_byteranges(ctx, doc);
3115}
3116
3117static pdf_obj *
3118new_identity(fz_context *ctx, pdf_document *doc)
3119{
3120 unsigned char rnd[32];
3121 pdf_obj *id;
3122
3123 fz_memrnd(ctx, rnd, nelem(rnd));
3124
3125 id = pdf_dict_put_array(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID), 2);
3126 pdf_array_push_drop(ctx, id, pdf_new_string(ctx, (char *) rnd + 0, nelem(rnd) / 2));
3127 pdf_array_push_drop(ctx, id, pdf_new_string(ctx, (char *) rnd + 16, nelem(rnd) / 2));
3128
3129 return id;
3130}
3131
3132static void
3133change_identity(fz_context *ctx, pdf_document *doc, pdf_obj *id)
3134{
3135 unsigned char rnd[16];
3136 if (pdf_array_len(ctx, id) >= 2)
3137 {
3138 /* Update second half of ID array with new random data. */
3139 fz_memrnd(ctx, rnd, 16);
3140 pdf_array_put_drop(ctx, id, 1, pdf_new_string(ctx, (char *)rnd, 16));
3141 }
3142}
3143
3144static void
3145create_encryption_dictionary(fz_context *ctx, pdf_document *doc, pdf_crypt *crypt)
3146{
3147 unsigned char *o, *u;
3148 pdf_obj *encrypt;
3149 int r;
3150
3151 r = pdf_crypt_revision(ctx, crypt);
3152
3153 encrypt = pdf_dict_put_dict(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt), 10);
3154
3155 pdf_dict_put_name(ctx, encrypt, PDF_NAME(Filter), "Standard");
3156 pdf_dict_put_int(ctx, encrypt, PDF_NAME(R), r);
3157 pdf_dict_put_int(ctx, encrypt, PDF_NAME(V), pdf_crypt_version(ctx, crypt));
3158 pdf_dict_put_int(ctx, encrypt, PDF_NAME(Length), pdf_crypt_length(ctx, crypt));
3159 pdf_dict_put_int(ctx, encrypt, PDF_NAME(P), pdf_crypt_permissions(ctx, crypt));
3160 pdf_dict_put_bool(ctx, encrypt, PDF_NAME(EncryptMetadata), pdf_crypt_encrypt_metadata(ctx, crypt));
3161
3162 o = pdf_crypt_owner_password(ctx, crypt);
3163 u = pdf_crypt_user_password(ctx, crypt);
3164
3165 if (r < 4)
3166 {
3167 pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 32);
3168 pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 32);
3169 }
3170 else if (r == 4)
3171 {
3172 pdf_obj *cf;
3173
3174 pdf_dict_put_name(ctx, encrypt, PDF_NAME(StmF), "StdCF");
3175 pdf_dict_put_name(ctx, encrypt, PDF_NAME(StrF), "StdCF");
3176
3177 cf = pdf_dict_put_dict(ctx, encrypt, PDF_NAME(CF), 1);
3178 cf = pdf_dict_put_dict(ctx, cf, PDF_NAME(StdCF), 3);
3179 pdf_dict_put_name(ctx, cf, PDF_NAME(AuthEvent), "DocOpen");
3180 pdf_dict_put_name(ctx, cf, PDF_NAME(CFM), "AESV2");
3181 pdf_dict_put_int(ctx, cf, PDF_NAME(Length), 16);
3182 pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 32);
3183 pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 32);
3184 }
3185 else if (r == 6)
3186 {
3187 unsigned char *oe = pdf_crypt_owner_encryption(ctx, crypt);
3188 unsigned char *ue = pdf_crypt_user_encryption(ctx, crypt);
3189 pdf_obj *cf;
3190
3191 pdf_dict_put_name(ctx, encrypt, PDF_NAME(StmF), "StdCF");
3192 pdf_dict_put_name(ctx, encrypt, PDF_NAME(StrF), "StdCF");
3193
3194 cf = pdf_dict_put_dict(ctx, encrypt, PDF_NAME(CF), 1);
3195 cf = pdf_dict_put_dict(ctx, cf, PDF_NAME(StdCF), 3);
3196 pdf_dict_put_name(ctx, cf, PDF_NAME(AuthEvent), "DocOpen");
3197 pdf_dict_put_name(ctx, cf, PDF_NAME(CFM), "AESV3");
3198 pdf_dict_put_int(ctx, cf, PDF_NAME(Length), 32);
3199 pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, 48);
3200 pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, 48);
3201 pdf_dict_put_string(ctx, encrypt, PDF_NAME(OE), (char *) oe, 32);
3202 pdf_dict_put_string(ctx, encrypt, PDF_NAME(UE), (char *) ue, 32);
3203 pdf_dict_put_string(ctx, encrypt, PDF_NAME(Perms), (char *) pdf_crypt_permissions_encryption(ctx, crypt), 16);
3204 }
3205}
3206
3207static void
3208do_pdf_save_document(fz_context *ctx, pdf_document *doc, pdf_write_state *opts, pdf_write_options *in_opts)
3209{
3210 int lastfree;
3211 int num;
3212 int xref_len;
3213 pdf_obj *id, *id1;
3214
3215 if (in_opts->do_incremental)
3216 {
3217 /* If no changes, nothing to write */
3218 if (doc->num_incremental_sections == 0)
3219 return;
3220 if (opts->out)
3221 {
3222 fz_seek_output(ctx, opts->out, 0, SEEK_END);
3223 fz_write_string(ctx, opts->out, "\n");
3224 }
3225 }
3226
3227 xref_len = pdf_xref_len(ctx, doc);
3228
3229 fz_try(ctx)
3230 {
3231 initialise_write_state(ctx, doc, in_opts, opts);
3232
3233 /* Update second half of ID array if it exists. */
3234 id = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
3235 if (id)
3236 change_identity(ctx, doc, id);
3237
3238 /* Remove encryption dictionary if saving without encryption. */
3239 if (opts->do_encrypt == PDF_ENCRYPT_NONE)
3240 {
3241 pdf_dict_del(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
3242 }
3243
3244 /* Keep encryption dictionary if saving with old encryption. */
3245 else if (opts->do_encrypt == PDF_ENCRYPT_KEEP)
3246 {
3247 opts->crypt = doc->crypt;
3248 }
3249
3250 /* Create encryption dictionary if saving with new encryption. */
3251 else
3252 {
3253 if (!id)
3254 id = new_identity(ctx, doc);
3255 id1 = pdf_array_get(ctx, id, 0);
3256 opts->crypt = pdf_new_encrypt(ctx, opts->opwd_utf8, opts->upwd_utf8, id1, opts->permissions, opts->do_encrypt);
3257 create_encryption_dictionary(ctx, doc, opts->crypt);
3258 }
3259
3260 /* Make sure any objects hidden in compressed streams have been loaded */
3261 if (!opts->do_incremental)
3262 {
3263 pdf_ensure_solid_xref(ctx, doc, xref_len);
3264 preloadobjstms(ctx, doc);
3265 xref_len = pdf_xref_len(ctx, doc); /* May have changed due to repair */
3266 expand_lists(ctx, opts, xref_len);
3267 }
3268
3269 /* Sweep & mark objects from the trailer */
3270 if (opts->do_garbage >= 1 || opts->do_linear)
3271 (void)markobj(ctx, doc, opts, pdf_trailer(ctx, doc));
3272 else
3273 {
3274 xref_len = pdf_xref_len(ctx, doc); /* May have changed due to repair */
3275 expand_lists(ctx, opts, xref_len);
3276 for (num = 0; num < xref_len; num++)
3277 opts->use_list[num] = 1;
3278 }
3279
3280 /* Coalesce and renumber duplicate objects */
3281 if (opts->do_garbage >= 3)
3282 removeduplicateobjs(ctx, doc, opts);
3283
3284 /* Compact xref by renumbering and removing unused objects */
3285 if (opts->do_garbage >= 2 || opts->do_linear)
3286 compactxref(ctx, doc, opts);
3287
3288 opts->crypt_object_number = 0;
3289 if (opts->crypt)
3290 {
3291 pdf_obj *crypt = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
3292 int crypt_num = pdf_to_num(ctx, crypt);
3293 opts->crypt_object_number = opts->renumber_map[crypt_num];
3294 }
3295
3296 /* Make renumbering affect all indirect references and update xref */
3297 if (opts->do_garbage >= 2 || opts->do_linear)
3298 renumberobjs(ctx, doc, opts);
3299
3300 /* Truncate the xref after compacting and renumbering */
3301 if ((opts->do_garbage >= 2 || opts->do_linear) && !opts->do_incremental)
3302 {
3303 xref_len = pdf_xref_len(ctx, doc); /* May have changed due to repair */
3304 expand_lists(ctx, opts, xref_len);
3305 while (xref_len > 0 && !opts->use_list[xref_len-1])
3306 xref_len--;
3307 }
3308
3309 if (opts->do_linear)
3310 linearize(ctx, doc, opts);
3311
3312 if (opts->do_incremental)
3313 {
3314 int i;
3315
3316 doc->disallow_new_increments = 1;
3317
3318 for (i = 0; i < doc->num_incremental_sections; i++)
3319 {
3320 doc->xref_base = doc->num_incremental_sections - i - 1;
3321
3322 writeobjects(ctx, doc, opts, 0);
3323
3324#ifdef DEBUG_WRITING
3325 dump_object_details(ctx, doc, opts);
3326#endif
3327
3328 for (num = 0; num < xref_len; num++)
3329 {
3330 if (!opts->use_list[num] && pdf_xref_is_incremental(ctx, doc, num))
3331 {
3332 /* Make unreusable. FIXME: would be better to link to existing free list */
3333 opts->gen_list[num] = 65535;
3334 opts->ofs_list[num] = 0;
3335 }
3336 }
3337
3338 opts->first_xref_offset = fz_tell_output(ctx, opts->out);
3339 if (doc->has_xref_streams)
3340 writexrefstream(ctx, doc, opts, 0, xref_len, 1, 0, opts->first_xref_offset);
3341 else
3342 writexref(ctx, doc, opts, 0, xref_len, 1, 0, opts->first_xref_offset);
3343
3344 doc->xref_sections[doc->xref_base].end_ofs = fz_tell_output(ctx, opts->out);
3345 }
3346
3347 doc->xref_base = 0;
3348 doc->disallow_new_increments = 0;
3349 }
3350 else
3351 {
3352 writeobjects(ctx, doc, opts, 0);
3353
3354#ifdef DEBUG_WRITING
3355 dump_object_details(ctx, doc, opts);
3356#endif
3357
3358 /* Construct linked list of free object slots */
3359 lastfree = 0;
3360 for (num = 0; num < xref_len; num++)
3361 {
3362 if (!opts->use_list[num])
3363 {
3364 opts->gen_list[num]++;
3365 opts->ofs_list[lastfree] = num;
3366 lastfree = num;
3367 }
3368 }
3369
3370 if (opts->do_linear && opts->page_count > 0)
3371 {
3372 opts->main_xref_offset = fz_tell_output(ctx, opts->out);
3373 writexref(ctx, doc, opts, 0, opts->start, 0, 0, opts->first_xref_offset);
3374 opts->file_len = fz_tell_output(ctx, opts->out);
3375
3376 make_hint_stream(ctx, doc, opts);
3377 if (opts->do_ascii)
3378 {
3379 opts->hintstream_len *= 2;
3380 opts->hintstream_len += 1 + ((opts->hintstream_len+63)>>6);
3381 }
3382 opts->file_len += opts->hintstream_len;
3383 opts->main_xref_offset += opts->hintstream_len;
3384 update_linearization_params(ctx, doc, opts);
3385 fz_seek_output(ctx, opts->out, 0, 0);
3386 writeobjects(ctx, doc, opts, 1);
3387
3388 padto(ctx, opts->out, opts->main_xref_offset);
3389 writexref(ctx, doc, opts, 0, opts->start, 0, 0, opts->first_xref_offset);
3390 }
3391 else
3392 {
3393 opts->first_xref_offset = fz_tell_output(ctx, opts->out);
3394 writexref(ctx, doc, opts, 0, xref_len, 1, 0, opts->first_xref_offset);
3395 }
3396
3397 doc->xref_sections[0].end_ofs = fz_tell_output(ctx, opts->out);
3398 }
3399
3400 complete_signatures(ctx, doc, opts);
3401
3402 doc->dirty = 0;
3403 }
3404 fz_always(ctx)
3405 {
3406#ifdef DEBUG_LINEARIZATION
3407 page_objects_dump(opts);
3408 objects_dump(ctx, doc, opts);
3409#endif
3410 finalise_write_state(ctx, opts);
3411 if (opts->crypt != doc->crypt)
3412 pdf_drop_crypt(ctx, opts->crypt);
3413 doc->freeze_updates = 0;
3414 }
3415 fz_catch(ctx)
3416 {
3417 fz_rethrow(ctx);
3418 }
3419}
3420
3421/*
3422 Returns true if there are digital signatures waiting to
3423 to updated on save.
3424*/
3425int pdf_has_unsaved_sigs(fz_context *ctx, pdf_document *doc)
3426{
3427 int s;
3428 for (s = 0; s < doc->num_incremental_sections; s++)
3429 {
3430 pdf_xref *xref = &doc->xref_sections[doc->num_incremental_sections - s - 1];
3431
3432 if (xref->unsaved_sigs)
3433 return 1;
3434 }
3435 return 0;
3436}
3437
3438/*
3439 Write out the document to an output stream with all changes finalised.
3440*/
3441void pdf_write_document(fz_context *ctx, pdf_document *doc, fz_output *out, pdf_write_options *in_opts)
3442{
3443 pdf_write_options opts_defaults = pdf_default_write_options;
3444 pdf_write_state opts = { 0 };
3445
3446 if (!doc)
3447 return;
3448
3449 if (!in_opts)
3450 in_opts = &opts_defaults;
3451
3452 if (in_opts->do_incremental && doc->repair_attempted)
3453 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes on a repaired file");
3454 if (in_opts->do_incremental && in_opts->do_garbage)
3455 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with garbage collection");
3456 if (in_opts->do_incremental && in_opts->do_linear)
3457 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with linearisation");
3458 if (in_opts->do_incremental && in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
3459 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes when changing encryption");
3460 if (pdf_has_unsaved_sigs(ctx, doc) && !out->as_stream)
3461 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't write pdf that has unsaved sigs to a fz_output unless it supports fz_stream_from_output!");
3462
3463 prepare_for_save(ctx, doc, in_opts);
3464
3465 opts.out = out;
3466
3467 do_pdf_save_document(ctx, doc, &opts, in_opts);
3468}
3469
3470/*
3471 Write out the document to a file with all changes finalised.
3472*/
3473void pdf_save_document(fz_context *ctx, pdf_document *doc, const char *filename, pdf_write_options *in_opts)
3474{
3475 pdf_write_options opts_defaults = pdf_default_write_options;
3476 pdf_write_state opts = { 0 };
3477
3478 if (!doc)
3479 return;
3480
3481 if (!in_opts)
3482 in_opts = &opts_defaults;
3483
3484 if (in_opts->do_incremental && !doc->file)
3485 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes on a new document");
3486 if (in_opts->do_incremental && doc->repair_attempted)
3487 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes on a repaired file");
3488 if (in_opts->do_incremental && in_opts->do_garbage)
3489 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with garbage collection");
3490 if (in_opts->do_incremental && in_opts->do_linear)
3491 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with linearisation");
3492 if (in_opts->do_incremental && in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
3493 fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes when changing encryption");
3494
3495 if (in_opts->do_appearance > 0)
3496 {
3497 int i, n = pdf_count_pages(ctx, doc);
3498 for (i = 0; i < n; ++i)
3499 {
3500 pdf_page *page = pdf_load_page(ctx, doc, i);
3501 fz_try(ctx)
3502 {
3503 if (in_opts->do_appearance > 1)
3504 {
3505 pdf_annot *annot;
3506 for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
3507 annot->needs_new_ap = 1;
3508 for (annot = pdf_first_widget(ctx, page); annot; annot = pdf_next_widget(ctx, annot))
3509 annot->needs_new_ap = 1;
3510 }
3511 pdf_update_page(ctx, page);
3512 }
3513 fz_always(ctx)
3514 fz_drop_page(ctx, &page->super);
3515 fz_catch(ctx)
3516 fz_warn(ctx, "could not create annotation appearances");
3517 }
3518 }
3519
3520 prepare_for_save(ctx, doc, in_opts);
3521
3522 if (in_opts->do_incremental)
3523 {
3524 /* If no changes, nothing to write */
3525 if (doc->num_incremental_sections == 0)
3526 return;
3527 opts.out = fz_new_output_with_path(ctx, filename, 1);
3528 }
3529 else
3530 {
3531 opts.out = fz_new_output_with_path(ctx, filename, 0);
3532 }
3533 fz_try(ctx)
3534 {
3535 do_pdf_save_document(ctx, doc, &opts, in_opts);
3536 fz_close_output(ctx, opts.out);
3537 }
3538 fz_always(ctx)
3539 {
3540 fz_drop_output(ctx, opts.out);
3541 opts.out = NULL;
3542 }
3543 fz_catch(ctx)
3544 {
3545 fz_rethrow(ctx);
3546 }
3547}
3548
3549typedef struct pdf_writer_s pdf_writer;
3550
3551struct pdf_writer_s
3552{
3553 fz_document_writer super;
3554 pdf_document *pdf;
3555 pdf_write_options opts;
3556 char *filename;
3557
3558 fz_rect mediabox;
3559 pdf_obj *resources;
3560 fz_buffer *contents;
3561};
3562
3563static fz_device *
3564pdf_writer_begin_page(fz_context *ctx, fz_document_writer *wri_, fz_rect mediabox)
3565{
3566 pdf_writer *wri = (pdf_writer*)wri_;
3567 wri->mediabox = mediabox;
3568 return pdf_page_write(ctx, wri->pdf, wri->mediabox, &wri->resources, &wri->contents);
3569}
3570
3571static void
3572pdf_writer_end_page(fz_context *ctx, fz_document_writer *wri_, fz_device *dev)
3573{
3574 pdf_writer *wri = (pdf_writer*)wri_;
3575 pdf_obj *obj = NULL;
3576
3577 fz_var(obj);
3578
3579 fz_try(ctx)
3580 {
3581 fz_close_device(ctx, dev);
3582 obj = pdf_add_page(ctx, wri->pdf, wri->mediabox, 0, wri->resources, wri->contents);
3583 pdf_insert_page(ctx, wri->pdf, -1, obj);
3584 }
3585 fz_always(ctx)
3586 {
3587 fz_drop_device(ctx, dev);
3588 pdf_drop_obj(ctx, obj);
3589 fz_drop_buffer(ctx, wri->contents);
3590 wri->contents = NULL;
3591 pdf_drop_obj(ctx, wri->resources);
3592 wri->resources = NULL;
3593 }
3594 fz_catch(ctx)
3595 fz_rethrow(ctx);
3596}
3597
3598static void
3599pdf_writer_close_writer(fz_context *ctx, fz_document_writer *wri_)
3600{
3601 pdf_writer *wri = (pdf_writer*)wri_;
3602 pdf_save_document(ctx, wri->pdf, wri->filename, &wri->opts);
3603}
3604
3605static void
3606pdf_writer_drop_writer(fz_context *ctx, fz_document_writer *wri_)
3607{
3608 pdf_writer *wri = (pdf_writer*)wri_;
3609 fz_drop_buffer(ctx, wri->contents);
3610 pdf_drop_obj(ctx, wri->resources);
3611 pdf_drop_document(ctx, wri->pdf);
3612 fz_free(ctx, wri->filename);
3613}
3614
3615fz_document_writer *
3616fz_new_pdf_writer(fz_context *ctx, const char *path, const char *options)
3617{
3618 pdf_writer *wri = fz_new_derived_document_writer(ctx, pdf_writer, pdf_writer_begin_page, pdf_writer_end_page, pdf_writer_close_writer, pdf_writer_drop_writer);
3619
3620 fz_try(ctx)
3621 {
3622 pdf_parse_write_options(ctx, &wri->opts, options);
3623 wri->filename = fz_strdup(ctx, path ? path : "out.pdf");
3624 wri->pdf = pdf_create_document(ctx);
3625 }
3626 fz_catch(ctx)
3627 {
3628 pdf_drop_document(ctx, wri->pdf);
3629 fz_free(ctx, wri->filename);
3630 fz_free(ctx, wri);
3631 fz_rethrow(ctx);
3632 }
3633
3634 return (fz_document_writer*)wri;
3635}
3636