pdf-write.c source code [MuPDF/source/pdf/pdf-write.c]

1	#include "mupdf/fitz.h"
2	#include "mupdf/pdf.h"
3
4	#include <zlib.h>
5
6	#include <assert.h>
7	#include <limits.h>
8	#include <string.h>
9
10	#include <stdio.h> /* for debug printing */
11	/ #define DEBUG_LINEARIZATION /
12	/ #define DEBUG_HEAP_SORT /
13	/ #define DEBUG_WRITING /
14
15	#define SIG_EXTRAS_SIZE (1024)
16
17	#define SLASH_BYTE_RANGE ("/ByteRange")
18	#define SLASH_CONTENTS ("/Contents")
19	#define SLASH_FILTER ("/Filter")
20
21
22	typedef struct pdf_write_state_s pdf_write_state;
23
24	/*
25	As part of linearization, we need to keep a list of what objects are used
26	by what page. We do this by recording the objects used in a given page
27	in a page_objects structure. We have a list of these structures (one per
28	page) in the page_objects_list structure.
29
30	The page_objects structure maintains a heap in the object array, so
31	insertion takes log n time, and we can heapsort and dedupe at the end for
32	a total worse case n log n time.
33
34	The magic heap invariant is that:
35	entry[n] >= entry[(n+1)2-1] & entry[n] >= entry[(n+1)2]
36	or equivalently:
37	entry[(n-1)>>1] >= entry[n]
38
39	For a discussion of the heap data structure (and heapsort) see Kingston,
40	"Algorithms and Data Structures".
41	*/
42
43	typedef struct {
44	int num_shared;
45	int page_object_number;
46	int num_objects;
47	int min_ofs;
48	int max_ofs;
49	/ Extensible list of objects used on this page /
50	int cap;
51	int len;
52	int object[`1`];
53	} page_objects;
54
55	typedef struct {
56	int cap;
57	int len;
58	page_objects *page[`1`];
59	} page_objects_list;
60
61	struct pdf_write_state_s
62	{
63	fz_output *out;
64
65	int do_incremental;
66	int do_tight;
67	int do_ascii;
68	int do_expand;
69	int do_compress;
70	int do_compress_images;
71	int do_compress_fonts;
72	int do_garbage;
73	int do_linear;
74	int do_clean;
75	int do_encrypt;
76
77	int list_len;
78	int *use_list;
79	int64_t *ofs_list;
80	int *gen_list;
81	int *renumber_map;
82
83	/ The following extras are required for linearization /
84	int *rev_renumber_map;
85	int start;
86	int64_t first_xref_offset;
87	int64_t main_xref_offset;
88	int64_t first_xref_entry_offset;
89	int64_t file_len;
90	int hints_shared_offset;
91	int hintstream_len;
92	pdf_obj *linear_l;
93	pdf_obj *linear_h0;
94	pdf_obj *linear_h1;
95	pdf_obj *linear_o;
96	pdf_obj *linear_e;
97	pdf_obj *linear_n;
98	pdf_obj *linear_t;
99	pdf_obj *hints_s;
100	pdf_obj *hints_length;
101	int page_count;
102	page_objects_list *page_object_lists;
103	int crypt_object_number;
104	char opwd_utf8[`128`];
105	char upwd_utf8[`128`];
106	int permissions;
107	pdf_crypt *crypt;
108	};
109
110	/*
111	* Constants for use with use_list.
112	*
113	* If use_list[num] = 0, then object num is unused.
114	* If use_list[num] & PARAMS, then object num is the linearisation params obj.
115	* If use_list[num] & CATALOGUE, then object num is used by the catalogue.
116	* If use_list[num] & PAGE1, then object num is used by page 1.
117	* If use_list[num] & SHARED, then object num is shared between pages.
118	* If use_list[num] & PAGE_OBJECT then this must be the first object in a page.
119	* If use_list[num] & OTHER_OBJECTS then this must should appear in section 9.
120	* Otherwise object num is used by page (use_list[num]>>USE_PAGE_SHIFT).
121	*/
122	enum
123	{
124	USE_CATALOGUE = `2`,
125	USE_PAGE1 = `4`,
126	USE_SHARED = `8`,
127	USE_PARAMS = `16`,
128	USE_HINTS = `32`,
129	USE_PAGE_OBJECT = `64`,
130	USE_OTHER_OBJECTS = `128`,
131	USE_PAGE_MASK = ~`255`,
132	USE_PAGE_SHIFT = `8`
133	};
134
135	static void
136	expand_lists(fz_context ctx, pdf_write_state opts, int num)
137	{
138	int i;
139
140	/ objects are numbered 0..num and maybe two additional objects for linearization /
141	num += `3`;
142	opts->use_list = fz_realloc_array(ctx, opts->use_list, num, int);
143	opts->ofs_list = fz_realloc_array(ctx, opts->ofs_list, num, int64_t);
144	opts->gen_list = fz_realloc_array(ctx, opts->gen_list, num, int);
145	opts->renumber_map = fz_realloc_array(ctx, opts->renumber_map, num, int);
146	opts->rev_renumber_map = fz_realloc_array(ctx, opts->rev_renumber_map, num, int);
147
148	for (i = opts->list_len; i < num; i++)
149	{
150	opts->use_list[i] = `0`;
151	opts->ofs_list[i] = `0`;
152	opts->gen_list[i] = `0`;
153	opts->renumber_map[i] = i;
154	opts->rev_renumber_map[i] = i;
155	}
156	opts->list_len = num;
157	}
158
159	/*
160	* page_objects and page_object_list handling functions
161	*/
162	static page_objects_list *
163	page_objects_list_create(fz_context *ctx)
164	{
165	page_objects_list pol = fz_calloc(ctx, `1`, sizeof(pol));
166
167	pol->cap = `1`;
168	pol->len = `0`;
169	return pol;
170	}
171
172	static void
173	page_objects_list_destroy(fz_context ctx, page_objects_list pol)
174	{
175	int i;
176
177	if (!pol)
178	return;
179	for (i = `0`; i < pol->len; i++)
180	{
181	fz_free(ctx, pol->page[i]);
182	}
183	fz_free(ctx, pol);
184	}
185
186	static void
187	page_objects_list_ensure(fz_context ctx, page_objects_list pol, int* newcap)
188	{
189	int oldcap = (*pol)->cap;
190	if (newcap <= oldcap)
191	return;
192	pol = fz_realloc(ctx, pol, sizeof(page_objects_list) + (newcap-`1`)*sizeof(page_objects *));
193	memset(&(pol)->page[oldcap], `0`, (newcap-oldcap)sizeof(page_objects *));
194	(*pol)->cap = newcap;
195	}
196
197	static page_objects *
198	page_objects_create(fz_context *ctx)
199	{
200	int initial_cap = `8`;
201	page_objects po = fz_calloc(ctx, `1`, sizeof(po) + (initial_cap-`1`) * sizeof(int));
202
203	po->cap = initial_cap;
204	po->len = `0`;
205	return po;
206	}
207
208	static void
209	page_objects_insert(fz_context ctx, page_objects ppo, int* i)
210	{
211	page_objects *po;
212
213	/ Make a page_objects if we don't have one /
214	if (*ppo == NULL)
215	*ppo = page_objects_create(ctx);
216
217	po = *ppo;
218	/ page_objects insertion: extend the page_objects by 1, and put us on the end /
219	if (po->len == po->cap)
220	{
221	po = fz_realloc(ctx, po, sizeof(page_objects) + (po->cap`2` - `1`)sizeof(int));
222	po->cap *= `2`;
223	*ppo = po;
224	}
225	po->object[po->len++] = i;
226	}
227
228	static void
229	page_objects_list_insert(fz_context ctx, pdf_write_state opts, int page, int object)
230	{
231	page_objects_list_ensure(ctx, &opts->page_object_lists, page+`1`);
232	if (object >= opts->list_len)
233	expand_lists(ctx, opts, object);
234	if (opts->page_object_lists->len < page+`1`)
235	opts->page_object_lists->len = page+`1`;
236	page_objects_insert(ctx, &opts->page_object_lists->page[page], object);
237	}
238
239	static void
240	page_objects_list_set_page_object(fz_context ctx, pdf_write_state opts, int page, int object)
241	{
242	page_objects_list_ensure(ctx, &opts->page_object_lists, page+`1`);
243	if (object >= opts->list_len)
244	expand_lists(ctx, opts, object);
245	opts->page_object_lists->page[page]->page_object_number = object;
246	}
247
248	static void
249	page_objects_sort(fz_context ctx, page_objects po)
250	{
251	int i, j;
252	int n = po->len;
253
254	/ Step 1: Make a heap /
255	/ Invariant: Valid heap in [0..i), unsorted elements in [i..n) /
256	for (i = `1`; i < n; i++)
257	{
258	/ Now bubble backwards to maintain heap invariant /
259	j = i;
260	while (j != `0`)
261	{
262	int tmp;
263	int k = (j-`1`)>>`1`;
264	if (po->object[k] >= po->object[j])
265	break;
266	tmp = po->object[k];
267	po->object[k] = po->object[j];
268	po->object[j] = tmp;
269	j = k;
270	}
271	}
272
273	/ Step 2: Heap sort /
274	/ Invariant: valid heap in [0..i), sorted list in [i..n) /
275	/ Initially: i = n /
276	for (i = n-`1`; i > `0`; i--)
277	{
278	/ Swap the maximum (0th) element from the page_objects into its place*
279	* in the sorted list (position i). */
280	int tmp = po->object[`0`];
281	po->object[`0`] = po->object[i];
282	po->object[i] = tmp;
283	/ Now, the page_objects is invalid because the 0th element is out*
284	* of place. Bubble it until the page_objects is valid. */
285	j = `0`;
286	while (`1`)
287	{
288	/ Children are k and k+1 /
289	int k = (j+`1`)*`2`-`1`;
290	/ If both children out of the page_objects, we're done /
291	if (k > i-`1`)
292	break;
293	/ If both are in the page_objects, pick the larger one /
294	if (k < i-`1` && po->object[k] < po->object[k+`1`])
295	k++;
296	/ If j is bigger than k (i.e. both of its children),*
297	* we're done */
298	if (po->object[j] > po->object[k])
299	break;
300	tmp = po->object[k];
301	po->object[k] = po->object[j];
302	po->object[j] = tmp;
303	j = k;
304	}
305	}
306	}
307
308	static int
309	order_ge(int ui, int uj)
310	{
311	/*
312	For linearization, we need to order the sections as follows:
313
314	Remaining pages (Part 7)
315	Shared objects (Part 8)
316	Objects not associated with any page (Part 9)
317	Any "other" objects
318	(Header)(Part 1)
319	(Linearization params) (Part 2)
320	(1st page Xref/Trailer) (Part 3)
321	Catalogue (and other document level objects) (Part 4)
322	First page (Part 6)
323	(Primary Hint stream) () (Part 5)*
324	Any free objects
325
326	Note, this is NOT the same order they appear in
327	the final file!
328
329	() The PDF reference gives us the option of putting the hint stream*
330	after the first page, and we take it, for simplicity.
331	*/
332
333	/ If the 2 objects are in the same section, then page object comes first. /
334	if (((ui ^ uj) & ~USE_PAGE_OBJECT) == `0`)
335	return ((ui & USE_PAGE_OBJECT) == `0`);
336	/ Put unused objects last /
337	else if (ui == `0`)
338	return `1`;
339	else if (uj == `0`)
340	return `0`;
341	/ Put the hint stream before that... /
342	else if (ui & USE_HINTS)
343	return `1`;
344	else if (uj & USE_HINTS)
345	return `0`;
346	/ Put page 1 before that... /
347	else if (ui & USE_PAGE1)
348	return `1`;
349	else if (uj & USE_PAGE1)
350	return `0`;
351	/ Put the catalogue before that... /
352	else if (ui & USE_CATALOGUE)
353	return `1`;
354	else if (uj & USE_CATALOGUE)
355	return `0`;
356	/ Put the linearization params before that... /
357	else if (ui & USE_PARAMS)
358	return `1`;
359	else if (uj & USE_PARAMS)
360	return `0`;
361	/ Put other objects before that /
362	else if (ui & USE_OTHER_OBJECTS)
363	return `1`;
364	else if (uj & USE_OTHER_OBJECTS)
365	return `0`;
366	/ Put shared objects before that... /
367	else if (ui & USE_SHARED)
368	return `1`;
369	else if (uj & USE_SHARED)
370	return `0`;
371	/ And otherwise, order by the page number on which*
372	* they are used. */
373	return (ui>>USE_PAGE_SHIFT) >= (uj>>USE_PAGE_SHIFT);
374	}
375
376	static void
377	heap_sort(int list, int* n, const int val, int* (ge)(int, int*))
378	{
379	int i, j;
380
381	#ifdef DEBUG_HEAP_SORT
382	fprintf(stderr, "Initially:\n");
383	for (i=`0`; i < n; i++)
384	{
385	fprintf(stderr, "%d: %d %x\n", i, list[i], val[list[i]]);
386	}
387	#endif
388	/ Step 1: Make a heap /
389	/ Invariant: Valid heap in [0..i), unsorted elements in [i..n) /
390	for (i = `1`; i < n; i++)
391	{
392	/ Now bubble backwards to maintain heap invariant /
393	j = i;
394	while (j != `0`)
395	{
396	int tmp;
397	int k = (j-`1`)>>`1`;
398	if (ge(val[list[k]], val[list[j]]))
399	break;
400	tmp = list[k];
401	list[k] = list[j];
402	list[j] = tmp;
403	j = k;
404	}
405	}
406	#ifdef DEBUG_HEAP_SORT
407	fprintf(stderr, "Valid heap:\n");
408	for (i=`0`; i < n; i++)
409	{
410	int k;
411	fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]);
412	k = (i+`1`)*`2`-`1`;
413	if (k < n)
414	{
415	if (ge(val[list[i]], val[list[k]]))
416	fprintf(stderr, "OK ");
417	else
418	fprintf(stderr, "BAD ");
419	}
420	if (k+`1` < n)
421	{
422	if (ge(val[list[i]], val[list[k+`1`]]))
423	fprintf(stderr, "OK\n");
424	else
425	fprintf(stderr, "BAD\n");
426	}
427	else
428	fprintf(stderr, "\n");
429	}
430	#endif
431
432	/ Step 2: Heap sort /
433	/ Invariant: valid heap in [0..i), sorted list in [i..n) /
434	/ Initially: i = n /
435	for (i = n-`1`; i > `0`; i--)
436	{
437	/ Swap the maximum (0th) element from the page_objects into its place*
438	* in the sorted list (position i). */
439	int tmp = list[`0`];
440	list[`0`] = list[i];
441	list[i] = tmp;
442	/ Now, the page_objects is invalid because the 0th element is out*
443	* of place. Bubble it until the page_objects is valid. */
444	j = `0`;
445	while (`1`)
446	{
447	/ Children are k and k+1 /
448	int k = (j+`1`)*`2`-`1`;
449	/ If both children out of the page_objects, we're done /
450	if (k > i-`1`)
451	break;
452	/ If both are in the page_objects, pick the larger one /
453	if (k < i-`1` && ge(val[list[k+`1`]], val[list[k]]))
454	k++;
455	/ If j is bigger than k (i.e. both of its children),*
456	* we're done */
457	if (ge(val[list[j]], val[list[k]]))
458	break;
459	tmp = list[k];
460	list[k] = list[j];
461	list[j] = tmp;
462	j = k;
463	}
464	}
465	#ifdef DEBUG_HEAP_SORT
466	fprintf(stderr, "Sorted:\n");
467	for (i=`0`; i < n; i++)
468	{
469	fprintf(stderr, "%d: %d %x ", i, list[i], val[list[i]]);
470	if (i+`1` < n)
471	{
472	if (ge(val[list[i+`1`]], val[list[i]]))
473	fprintf(stderr, "OK");
474	else
475	fprintf(stderr, "BAD");
476	}
477	fprintf(stderr, "\n");
478	}
479	#endif
480	}
481
482	static void
483	page_objects_dedupe(fz_context ctx, page_objects po)
484	{
485	int i, j;
486	int n = po->len-`1`;
487
488	for (i = `0`; i < n; i++)
489	{
490	if (po->object[i] == po->object[i+`1`])
491	break;
492	}
493	j = i; / j points to the last valid one /
494	i++; / i points to the first one we haven't looked at /
495	for (; i < n; i++)
496	{
497	if (po->object[j] != po->object[i])
498	po->object[++j] = po->object[i];
499	}
500	po->len = j+`1`;
501	}
502
503	static void
504	page_objects_list_sort_and_dedupe(fz_context ctx, page_objects_list pol)
505	{
506	int i;
507	int n = pol->len;
508
509	for (i = `0`; i < n; i++)
510	{
511	page_objects_sort(ctx, pol->page[i]);
512	page_objects_dedupe(ctx, pol->page[i]);
513	}
514	}
515
516	#ifdef DEBUG_LINEARIZATION
517	static void
518	page_objects_dump(pdf_write_state *opts)
519	{
520	page_objects_list *pol = opts->page_object_lists;
521	int i, j;
522
523	for (i = `0`; i < pol->len; i++)
524	{
525	page_objects *p = pol->page[i];
526	fprintf(stderr, "Page %d\n", i+`1`);
527	for (j = `0`; j < p->len; j++)
528	{
529	int o = p->object[j];
530	fprintf(stderr, "\tObject %d: use=%x\n", o, opts->use_list[o]);
531	}
532	fprintf(stderr, "Byte range=%d->%d\n", p->min_ofs, p->max_ofs);
533	fprintf(stderr, "Number of objects=%d, Number of shared objects=%d\n", p->num_objects, p->num_shared);
534	fprintf(stderr, "Page object number=%d\n", p->page_object_number);
535	}
536	}
537
538	static void
539	objects_dump(fz_context ctx, pdf_document doc, pdf_write_state *opts)
540	{
541	int i;
542
543	for (i=`0`; i < pdf_xref_len(ctx, doc); i++)
544	{
545	fprintf(stderr, "Object %d use=%x offset=%d\n", i, opts->use_list[i], (int)opts->ofs_list[i]);
546	}
547	}
548	#endif
549
550	/*
551	* Garbage collect objects not reachable from the trailer.
552	*/
553
554	/ Mark a reference. If it's been marked already, return NULL (as no further*
555	* processing is required). If it's not, return the resolved object so
556	* that we can continue our recursive marking. If it's a duff reference
557	* return the fact so that we can remove the reference at source.
558	*/
559	static pdf_obj markref(fz_context ctx, pdf_document doc, pdf_write_state opts, pdf_obj obj, int* *duff)
560	{
561	int num = pdf_to_num(ctx, obj);
562
563	if (num <= `0` \|\| num >= pdf_xref_len(ctx, doc))
564	{
565	*duff = `1`;
566	return NULL;
567	}
568	*duff = `0`;
569	if (opts->use_list[num])
570	return NULL;
571
572	opts->use_list[num] = `1`;
573
574	/ Bake in /Length in stream objects /
575	fz_try(ctx)
576	{
577	if (pdf_obj_num_is_stream(ctx, doc, num))
578	{
579	pdf_obj *len = pdf_dict_get(ctx, obj, PDF_NAME(Length));
580	if (pdf_is_indirect(ctx, len))
581	{
582	opts->use_list[pdf_to_num(ctx, len)] = `0`;
583	len = pdf_resolve_indirect(ctx, len);
584	pdf_dict_put(ctx, obj, PDF_NAME(Length), len);
585	}
586	}
587	}
588	fz_catch(ctx)
589	{
590	/ Leave broken /
591	}
592
593	obj = pdf_resolve_indirect(ctx, obj);
594	if (obj == NULL \|\| pdf_is_null(ctx, obj))
595	{
596	*duff = `1`;
597	opts->use_list[num] = `0`;
598	}
599
600	return obj;
601	}
602
603	#ifdef DEBUG_MARK_AND_SWEEP
604	static int depth = `0`;
605
606	static
607	void indent()
608	{
609	while (depth > `0`)
610	{
611	int d = depth;
612	if (d > `16`)
613	d = `16`;
614	printf("%s", &" "[`16`-d]);
615	depth -= d;
616	}
617	}
618	#define DEBUGGING_MARKING(A) do { A; } while (0)
619	#else
620	#define DEBUGGING_MARKING(A) do { } while (0)
621	#endif
622
623	/ Recursively mark an object. If any references found are duff, then*
624	* replace them with nulls. */
625	static int markobj(fz_context ctx, pdf_document doc, pdf_write_state opts, pdf_obj obj)
626	{
627	int i;
628
629	DEBUGGING_MARKING(depth++);
630
631	while (pdf_is_indirect(ctx, obj))
632	{
633	int duff;
634	DEBUGGING_MARKING(indent(); printf("Marking object %d\n", pdf_to_num(ctx, obj)));
635	obj = markref(ctx, doc, opts, obj, &duff);
636	if (duff)
637	{
638	DEBUGGING_MARKING(depth--);
639	return `1`;
640	}
641	}
642
643	if (pdf_is_dict(ctx, obj))
644	{
645	int n = pdf_dict_len(ctx, obj);
646	for (i = `0`; i < n; i++)
647	{
648	DEBUGGING_MARKING(indent(); printf("DICT[%d/%d] = %s\n", i, n, pdf_to_name(ctx, pdf_dict_get_key(ctx, obj, i))));
649	if (markobj(ctx, doc, opts, pdf_dict_get_val(ctx, obj, i)))
650	pdf_dict_put_val_null(ctx, obj, i);
651	}
652	}
653
654	else if (pdf_is_array(ctx, obj))
655	{
656	int n = pdf_array_len(ctx, obj);
657	for (i = `0`; i < n; i++)
658	{
659	DEBUGGING_MARKING(indent(); printf("ARRAY[%d/%d]\n", i, n));
660	if (markobj(ctx, doc, opts, pdf_array_get(ctx, obj, i)))
661	pdf_array_put(ctx, obj, i, PDF_NULL);
662	}
663	}
664
665	DEBUGGING_MARKING(depth--);
666
667	return `0`;
668	}
669
670	/*
671	* Scan for and remove duplicate objects (slow)
672	*/
673
674	static void removeduplicateobjs(fz_context ctx, pdf_document doc, pdf_write_state *opts)
675	{
676	int num, other, max_num;
677	int xref_len = pdf_xref_len(ctx, doc);
678
679	for (num = `1`; num < xref_len; num++)
680	{
681	/ Only compare an object to objects preceding it /
682	for (other = `1`; other < num; other++)
683	{
684	pdf_obj a, b;
685	int newnum, streama = `0`, streamb = `0`, differ = `0`;
686
687	if (num == other \|\| !opts->use_list[num] \|\| !opts->use_list[other])
688	continue;
689
690	/ TODO: resolve indirect references to see if we can omit them /
691
692	/*
693	* Comparing stream objects data contents would take too long.
694	*
695	* pdf_obj_num_is_stream calls pdf_cache_object and ensures
696	* that the xref table has the objects loaded.
697	*/
698	fz_try(ctx)
699	{
700	streama = pdf_obj_num_is_stream(ctx, doc, num);
701	streamb = pdf_obj_num_is_stream(ctx, doc, other);
702	differ = streama \|\| streamb;
703	if (streama && streamb && opts->do_garbage >= `4`)
704	differ = `0`;
705	}
706	fz_catch(ctx)
707	{
708	/ Assume different /
709	differ = `1`;
710	}
711	if (differ)
712	continue;
713
714	a = pdf_get_xref_entry(ctx, doc, num)->obj;
715	b = pdf_get_xref_entry(ctx, doc, other)->obj;
716
717	if (pdf_objcmp(ctx, a, b))
718	continue;
719
720	if (streama && streamb)
721	{
722	/ Check to see if streams match too. /
723	fz_buffer *sa = NULL;
724	fz_buffer *sb = NULL;
725
726	fz_var(sa);
727	fz_var(sb);
728
729	differ = `1`;
730	fz_try(ctx)
731	{
732	unsigned char dataa, datab;
733	size_t lena, lenb;
734	sa = pdf_load_raw_stream_number(ctx, doc, num);
735	sb = pdf_load_raw_stream_number(ctx, doc, other);
736	lena = fz_buffer_storage(ctx, sa, &dataa);
737	lenb = fz_buffer_storage(ctx, sb, &datab);
738	if (lena == lenb && memcmp(dataa, datab, lena) == `0`)
739	differ = `0`;
740	}
741	fz_always(ctx)
742	{
743	fz_drop_buffer(ctx, sa);
744	fz_drop_buffer(ctx, sb);
745	}
746	fz_catch(ctx)
747	{
748	fz_rethrow(ctx);
749	}
750	if (differ)
751	continue;
752	}
753
754	/ Keep the lowest numbered object /
755	newnum = fz_mini(num, other);
756	max_num = fz_maxi(num, other);
757	if (max_num >= opts->list_len)
758	expand_lists(ctx, opts, max_num);
759	opts->renumber_map[num] = newnum;
760	opts->renumber_map[other] = newnum;
761	opts->rev_renumber_map[newnum] = num; / Either will do /
762	opts->use_list[fz_maxi(num, other)] = `0`;
763
764	/ One duplicate was found, do not look for another /
765	break;
766	}
767	}
768	}
769
770	/*
771	* Renumber objects sequentially so the xref is more compact
772	*
773	* This code assumes that any opts->renumber_map[n] <= n for all n.
774	*/
775
776	static void compactxref(fz_context ctx, pdf_document doc, pdf_write_state *opts)
777	{
778	int num, newnum;
779	int xref_len = pdf_xref_len(ctx, doc);
780
781	/*
782	* Update renumber_map in-place, clustering all used
783	* objects together at low object ids. Objects that
784	* already should be renumbered will have their new
785	* object ids be updated to reflect the compaction.
786	*/
787
788	if (xref_len > opts->list_len)
789	expand_lists(ctx, opts, xref_len-`1`);
790
791	newnum = `1`;
792	for (num = `1`; num < xref_len; num++)
793	{
794	/ If it's not used, map it to zero /
795	if (!opts->use_list[opts->renumber_map[num]])
796	{
797	opts->renumber_map[num] = `0`;
798	}
799	/ If it's not moved, compact it. /
800	else if (opts->renumber_map[num] == num)
801	{
802	opts->rev_renumber_map[newnum] = opts->rev_renumber_map[num];
803	opts->renumber_map[num] = newnum++;
804	}
805	/ Otherwise it's used, and moved. We know that it must have*
806	* moved down, so the place it's moved to will be in the right
807	* place already. */
808	else
809	{
810	opts->renumber_map[num] = opts->renumber_map[opts->renumber_map[num]];
811	}
812	}
813	}
814
815	/*
816	* Update indirect objects according to renumbering established when
817	* removing duplicate objects and compacting the xref.
818	*/
819
820	static void renumberobj(fz_context ctx, pdf_document doc, pdf_write_state opts, pdf_obj obj)
821	{
822	int i;
823	int xref_len = pdf_xref_len(ctx, doc);
824
825	if (pdf_is_dict(ctx, obj))
826	{
827	int n = pdf_dict_len(ctx, obj);
828	for (i = `0`; i < n; i++)
829	{
830	pdf_obj *key = pdf_dict_get_key(ctx, obj, i);
831	pdf_obj *val = pdf_dict_get_val(ctx, obj, i);
832	if (pdf_is_indirect(ctx, val))
833	{
834	int o = pdf_to_num(ctx, val);
835	if (o >= xref_len \|\| o <= `0` \|\| opts->renumber_map[o] == `0`)
836	val = PDF_NULL;
837	else
838	val = pdf_new_indirect(ctx, doc, opts->renumber_map[o], `0`);
839	pdf_dict_put_drop(ctx, obj, key, val);
840	}
841	else
842	{
843	renumberobj(ctx, doc, opts, val);
844	}
845	}
846	}
847
848	else if (pdf_is_array(ctx, obj))
849	{
850	int n = pdf_array_len(ctx, obj);
851	for (i = `0`; i < n; i++)
852	{
853	pdf_obj *val = pdf_array_get(ctx, obj, i);
854	if (pdf_is_indirect(ctx, val))
855	{
856	int o = pdf_to_num(ctx, val);
857	if (o >= xref_len \|\| o <= `0` \|\| opts->renumber_map[o] == `0`)
858	val = PDF_NULL;
859	else
860	val = pdf_new_indirect(ctx, doc, opts->renumber_map[o], `0`);
861	pdf_array_put_drop(ctx, obj, i, val);
862	}
863	else
864	{
865	renumberobj(ctx, doc, opts, val);
866	}
867	}
868	}
869	}
870
871	static void renumberobjs(fz_context ctx, pdf_document doc, pdf_write_state *opts)
872	{
873	pdf_xref_entry *newxref = NULL;
874	int newlen;
875	int num;
876	int *new_use_list;
877	int xref_len = pdf_xref_len(ctx, doc);
878
879	new_use_list = fz_calloc(ctx, pdf_xref_len(ctx, doc)+`3`, sizeof(int));
880
881	fz_var(newxref);
882	fz_try(ctx)
883	{
884	/ Apply renumber map to indirect references in all objects in xref /
885	renumberobj(ctx, doc, opts, pdf_trailer(ctx, doc));
886	for (num = `0`; num < xref_len; num++)
887	{
888	pdf_obj *obj;
889	int to = opts->renumber_map[num];
890
891	/ If object is going to be dropped, don't bother renumbering /
892	if (to == `0`)
893	continue;
894
895	obj = pdf_get_xref_entry(ctx, doc, num)->obj;
896
897	if (pdf_is_indirect(ctx, obj))
898	{
899	obj = pdf_new_indirect(ctx, doc, to, `0`);
900	fz_try(ctx)
901	pdf_update_object(ctx, doc, num, obj);
902	fz_always(ctx)
903	pdf_drop_obj(ctx, obj);
904	fz_catch(ctx)
905	fz_rethrow(ctx);
906	}
907	else
908	{
909	renumberobj(ctx, doc, opts, obj);
910	}
911	}
912
913	/ Create new table for the reordered, compacted xref /
914	newxref = fz_malloc_array(ctx, xref_len + `3`, pdf_xref_entry);
915	newxref[`0`] = *pdf_get_xref_entry(ctx, doc, `0`);
916
917	/ Move used objects into the new compacted xref /
918	newlen = `0`;
919	for (num = `1`; num < xref_len; num++)
920	{
921	if (opts->use_list[num])
922	{
923	pdf_xref_entry *e;
924	if (newlen < opts->renumber_map[num])
925	newlen = opts->renumber_map[num];
926	e = pdf_get_xref_entry(ctx, doc, num);
927	newxref[opts->renumber_map[num]] = *e;
928	if (e->obj)
929	{
930	pdf_set_obj_parent(ctx, e->obj, opts->renumber_map[num]);
931	e->obj = NULL;
932	}
933	new_use_list[opts->renumber_map[num]] = opts->use_list[num];
934	}
935	else
936	{
937	pdf_xref_entry *e = pdf_get_xref_entry(ctx, doc, num);
938	pdf_drop_obj(ctx, e->obj);
939	e->obj = NULL;
940	fz_drop_buffer(ctx, e->stm_buf);
941	e->stm_buf = NULL;
942	}
943	}
944
945	pdf_replace_xref(ctx, doc, newxref, newlen + `1`);
946	newxref = NULL;
947	}
948	fz_catch(ctx)
949	{
950	fz_free(ctx, newxref);
951	fz_free(ctx, new_use_list);
952	fz_rethrow(ctx);
953	}
954	fz_free(ctx, opts->use_list);
955	opts->use_list = new_use_list;
956
957	for (num = `1`; num < xref_len; num++)
958	{
959	opts->renumber_map[num] = num;
960	}
961	}
962
963	static void page_objects_list_renumber(pdf_write_state *opts)
964	{
965	int i, j;
966
967	for (i = `0`; i < opts->page_object_lists->len; i++)
968	{
969	page_objects *po = opts->page_object_lists->page[i];
970	for (j = `0`; j < po->len; j++)
971	{
972	po->object[j] = opts->renumber_map[po->object[j]];
973	}
974	po->page_object_number = opts->renumber_map[po->page_object_number];
975	}
976	}
977
978	static void
979	mark_all(fz_context ctx, pdf_document doc, pdf_write_state opts, pdf_obj val, int flag, int page)
980	{
981	if (pdf_mark_obj(ctx, val))
982	return;
983
984	fz_try(ctx)
985	{
986	if (pdf_is_indirect(ctx, val))
987	{
988	int num = pdf_to_num(ctx, val);
989	if (num >= opts->list_len)
990	expand_lists(ctx, opts, num);
991	if (opts->use_list[num] & USE_PAGE_MASK)
992	/ Already used /
993	opts->use_list[num] \|= USE_SHARED;
994	else
995	opts->use_list[num] \|= flag;
996	if (page >= `0`)
997	page_objects_list_insert(ctx, opts, page, num);
998	}
999
1000	if (pdf_is_dict(ctx, val))
1001	{
1002	int i, n = pdf_dict_len(ctx, val);
1003
1004	for (i = `0`; i < n; i++)
1005	{
1006	mark_all(ctx, doc, opts, pdf_dict_get_val(ctx, val, i), flag, page);
1007	}
1008	}
1009	else if (pdf_is_array(ctx, val))
1010	{
1011	int i, n = pdf_array_len(ctx, val);
1012
1013	for (i = `0`; i < n; i++)
1014	{
1015	mark_all(ctx, doc, opts, pdf_array_get(ctx, val, i), flag, page);
1016	}
1017	}
1018	}
1019	fz_always(ctx)
1020	{
1021	pdf_unmark_obj(ctx, val);
1022	}
1023	fz_catch(ctx)
1024	{
1025	fz_rethrow(ctx);
1026	}
1027	}
1028
1029	static int
1030	mark_pages(fz_context ctx, pdf_document doc, pdf_write_state opts, pdf_obj val, int pagenum)
1031	{
1032	if (pdf_mark_obj(ctx, val))
1033	return pagenum;
1034
1035	fz_try(ctx)
1036	{
1037	if (pdf_is_dict(ctx, val))
1038	{
1039	if (pdf_name_eq(ctx, PDF_NAME(Page), pdf_dict_get(ctx, val, PDF_NAME(Type))))
1040	{
1041	int num = pdf_to_num(ctx, val);
1042	pdf_unmark_obj(ctx, val);
1043	mark_all(ctx, doc, opts, val, pagenum == `0` ? USE_PAGE1 : (pagenum<<USE_PAGE_SHIFT), pagenum);
1044	page_objects_list_set_page_object(ctx, opts, pagenum, num);
1045	pagenum++;
1046	opts->use_list[num] \|= USE_PAGE_OBJECT;
1047	}
1048	else
1049	{
1050	int i, n = pdf_dict_len(ctx, val);
1051
1052	for (i = `0`; i < n; i++)
1053	{
1054	pdf_obj *key = pdf_dict_get_key(ctx, val, i);
1055	pdf_obj *obj = pdf_dict_get_val(ctx, val, i);
1056
1057	if (pdf_name_eq(ctx, PDF_NAME(Kids), key))
1058	pagenum = mark_pages(ctx, doc, opts, obj, pagenum);
1059	else
1060	mark_all(ctx, doc, opts, obj, USE_CATALOGUE, -`1`);
1061	}
1062
1063	if (pdf_is_indirect(ctx, val))
1064	{
1065	int num = pdf_to_num(ctx, val);
1066	opts->use_list[num] \|= USE_CATALOGUE;
1067	}
1068	}
1069	}
1070	else if (pdf_is_array(ctx, val))
1071	{
1072	int i, n = pdf_array_len(ctx, val);
1073
1074	for (i = `0`; i < n; i++)
1075	{
1076	pagenum = mark_pages(ctx, doc, opts, pdf_array_get(ctx, val, i), pagenum);
1077	}
1078	if (pdf_is_indirect(ctx, val))
1079	{
1080	int num = pdf_to_num(ctx, val);
1081	opts->use_list[num] \|= USE_CATALOGUE;
1082	}
1083	}
1084	}
1085	fz_always(ctx)
1086	{
1087	pdf_unmark_obj(ctx, val);
1088	}
1089	fz_catch(ctx)
1090	{
1091	fz_rethrow(ctx);
1092	}
1093	return pagenum;
1094	}
1095
1096	static void
1097	mark_root(fz_context ctx, pdf_document doc, pdf_write_state opts, pdf_obj dict)
1098	{
1099	int i, n = pdf_dict_len(ctx, dict);
1100
1101	if (pdf_mark_obj(ctx, dict))
1102	return;
1103
1104	fz_try(ctx)
1105	{
1106	if (pdf_is_indirect(ctx, dict))
1107	{
1108	int num = pdf_to_num(ctx, dict);
1109	opts->use_list[num] \|= USE_CATALOGUE;
1110	}
1111
1112	for (i = `0`; i < n; i++)
1113	{
1114	pdf_obj *key = pdf_dict_get_key(ctx, dict, i);
1115	pdf_obj *val = pdf_dict_get_val(ctx, dict, i);
1116
1117	if (pdf_name_eq(ctx, PDF_NAME(Pages), key))
1118	opts->page_count = mark_pages(ctx, doc, opts, val, `0`);
1119	else if (pdf_name_eq(ctx, PDF_NAME(Names), key))
1120	mark_all(ctx, doc, opts, val, USE_OTHER_OBJECTS, -`1`);
1121	else if (pdf_name_eq(ctx, PDF_NAME(Dests), key))
1122	mark_all(ctx, doc, opts, val, USE_OTHER_OBJECTS, -`1`);
1123	else if (pdf_name_eq(ctx, PDF_NAME(Outlines), key))
1124	{
1125	int section;
1126	/ Look at PageMode to decide whether to*
1127	* USE_OTHER_OBJECTS or USE_PAGE1 here. */
1128	if (pdf_name_eq(ctx, pdf_dict_get(ctx, dict, PDF_NAME(PageMode)), PDF_NAME(UseOutlines)))
1129	section = USE_PAGE1;
1130	else
1131	section = USE_OTHER_OBJECTS;
1132	mark_all(ctx, doc, opts, val, section, -`1`);
1133	}
1134	else
1135	mark_all(ctx, doc, opts, val, USE_CATALOGUE, -`1`);
1136	}
1137	}
1138	fz_always(ctx)
1139	{
1140	pdf_unmark_obj(ctx, dict);
1141	}
1142	fz_catch(ctx)
1143	{
1144	fz_rethrow(ctx);
1145	}
1146	}
1147
1148	static void
1149	mark_trailer(fz_context ctx, pdf_document doc, pdf_write_state opts, pdf_obj dict)
1150	{
1151	int i, n = pdf_dict_len(ctx, dict);
1152
1153	if (pdf_mark_obj(ctx, dict))
1154	return;
1155
1156	fz_try(ctx)
1157	{
1158	for (i = `0`; i < n; i++)
1159	{
1160	pdf_obj *key = pdf_dict_get_key(ctx, dict, i);
1161	pdf_obj *val = pdf_dict_get_val(ctx, dict, i);
1162
1163	if (pdf_name_eq(ctx, PDF_NAME(Root), key))
1164	mark_root(ctx, doc, opts, val);
1165	else
1166	mark_all(ctx, doc, opts, val, USE_CATALOGUE, -`1`);
1167	}
1168	}
1169	fz_always(ctx)
1170	{
1171	pdf_unmark_obj(ctx, dict);
1172	}
1173	fz_catch(ctx)
1174	{
1175	fz_rethrow(ctx);
1176	}
1177	}
1178
1179	static void
1180	add_linearization_objs(fz_context ctx, pdf_document doc, pdf_write_state *opts)
1181	{
1182	pdf_obj *params_obj = NULL;
1183	pdf_obj *params_ref = NULL;
1184	pdf_obj *hint_obj = NULL;
1185	pdf_obj *hint_ref = NULL;
1186	pdf_obj *o;
1187	int params_num, hint_num;
1188
1189	fz_var(params_obj);
1190	fz_var(params_ref);
1191	fz_var(hint_obj);
1192	fz_var(hint_ref);
1193
1194	fz_try(ctx)
1195	{
1196	/ Linearization params /
1197	params_obj = pdf_new_dict(ctx, doc, `10`);
1198	params_ref = pdf_add_object(ctx, doc, params_obj);
1199	params_num = pdf_to_num(ctx, params_ref);
1200
1201	opts->use_list[params_num] = USE_PARAMS;
1202	opts->renumber_map[params_num] = params_num;
1203	opts->rev_renumber_map[params_num] = params_num;
1204	opts->gen_list[params_num] = `0`;
1205	pdf_dict_put_real(ctx, params_obj, PDF_NAME(Linearized), `1.0f`);
1206	opts->linear_l = pdf_new_int(ctx, INT_MIN);
1207	pdf_dict_put(ctx, params_obj, PDF_NAME(L), opts->linear_l);
1208	opts->linear_h0 = pdf_new_int(ctx, INT_MIN);
1209	o = pdf_new_array(ctx, doc, `2`);
1210	pdf_dict_put_drop(ctx, params_obj, PDF_NAME(H), o);
1211	pdf_array_push(ctx, o, opts->linear_h0);
1212	opts->linear_h1 = pdf_new_int(ctx, INT_MIN);
1213	pdf_array_push(ctx, o, opts->linear_h1);
1214	opts->linear_o = pdf_new_int(ctx, INT_MIN);
1215	pdf_dict_put(ctx, params_obj, PDF_NAME(O), opts->linear_o);
1216	opts->linear_e = pdf_new_int(ctx, INT_MIN);
1217	pdf_dict_put(ctx, params_obj, PDF_NAME(E), opts->linear_e);
1218	opts->linear_n = pdf_new_int(ctx, INT_MIN);
1219	pdf_dict_put(ctx, params_obj, PDF_NAME(N), opts->linear_n);
1220	opts->linear_t = pdf_new_int(ctx, INT_MIN);
1221	pdf_dict_put(ctx, params_obj, PDF_NAME(T), opts->linear_t);
1222
1223	/ Primary hint stream /
1224	hint_obj = pdf_new_dict(ctx, doc, `10`);
1225	hint_ref = pdf_add_object(ctx, doc, hint_obj);
1226	hint_num = pdf_to_num(ctx, hint_ref);
1227
1228	opts->use_list[hint_num] = USE_HINTS;
1229	opts->renumber_map[hint_num] = hint_num;
1230	opts->rev_renumber_map[hint_num] = hint_num;
1231	opts->gen_list[hint_num] = `0`;
1232	pdf_dict_put_int(ctx, hint_obj, PDF_NAME(P), `0`);
1233	opts->hints_s = pdf_new_int(ctx, INT_MIN);
1234	pdf_dict_put(ctx, hint_obj, PDF_NAME(S), opts->hints_s);
1235	/ FIXME: Do we have thumbnails? Do a T entry /
1236	/ FIXME: Do we have outlines? Do an O entry /
1237	/ FIXME: Do we have article threads? Do an A entry /
1238	/ FIXME: Do we have named destinations? Do a E entry /
1239	/ FIXME: Do we have interactive forms? Do a V entry /
1240	/ FIXME: Do we have document information? Do an I entry /
1241	/ FIXME: Do we have logical structure hierarchy? Do a C entry /
1242	/ FIXME: Do L, Page Label hint table /
1243	pdf_dict_put(ctx, hint_obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
1244	opts->hints_length = pdf_new_int(ctx, INT_MIN);
1245	pdf_dict_put(ctx, hint_obj, PDF_NAME(Length), opts->hints_length);
1246	pdf_get_xref_entry(ctx, doc, hint_num)->stm_ofs = `0`;
1247	}
1248	fz_always(ctx)
1249	{
1250	pdf_drop_obj(ctx, params_obj);
1251	pdf_drop_obj(ctx, params_ref);
1252	pdf_drop_obj(ctx, hint_ref);
1253	pdf_drop_obj(ctx, hint_obj);
1254	}
1255	fz_catch(ctx)
1256	{
1257	fz_rethrow(ctx);
1258	}
1259	}
1260
1261	static void
1262	lpr_inherit_res_contents(fz_context ctx, pdf_obj res, pdf_obj dict, pdf_obj text)
1263	{
1264	pdf_obj o, r;
1265	int i, n;
1266
1267	/ If the parent node doesn't have an entry of this type, give up. /
1268	o = pdf_dict_get(ctx, dict, text);
1269	if (!o)
1270	return;
1271
1272	/ If the resources dict we are building doesn't have an entry of this*
1273	* type yet, then just copy it (ensuring it's not a reference) */
1274	r = pdf_dict_get(ctx, res, text);
1275	if (r == NULL)
1276	{
1277	o = pdf_resolve_indirect(ctx, o);
1278	if (pdf_is_dict(ctx, o))
1279	o = pdf_copy_dict(ctx, o);
1280	else if (pdf_is_array(ctx, o))
1281	o = pdf_copy_array(ctx, o);
1282	else
1283	o = NULL;
1284	if (o)
1285	pdf_dict_put_drop(ctx, res, text, o);
1286	return;
1287	}
1288
1289	/ Otherwise we need to merge o into r /
1290	if (pdf_is_dict(ctx, o))
1291	{
1292	n = pdf_dict_len(ctx, o);
1293	for (i = `0`; i < n; i++)
1294	{
1295	pdf_obj *key = pdf_dict_get_key(ctx, o, i);
1296	pdf_obj *val = pdf_dict_get_val(ctx, o, i);
1297
1298	if (pdf_dict_get(ctx, res, key))
1299	continue;
1300	pdf_dict_put(ctx, res, key, val);
1301	}
1302	}
1303	}
1304
1305	static void
1306	lpr_inherit_res(fz_context ctx, pdf_obj node, int depth, pdf_obj *dict)
1307	{
1308	while (`1`)
1309	{
1310	pdf_obj *o;
1311
1312	node = pdf_dict_get(ctx, node, PDF_NAME(Parent));
1313	depth--;
1314	if (!node \|\| depth < `0`)
1315	break;
1316
1317	o = pdf_dict_get(ctx, node, PDF_NAME(Resources));
1318	if (o)
1319	{
1320	lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(ExtGState));
1321	lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(ColorSpace));
1322	lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(Pattern));
1323	lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(Shading));
1324	lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(XObject));
1325	lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(Font));
1326	lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(ProcSet));
1327	lpr_inherit_res_contents(ctx, dict, o, PDF_NAME(Properties));
1328	}
1329	}
1330	}
1331
1332	static pdf_obj *
1333	lpr_inherit(fz_context ctx, pdf_obj node, char text, int* depth)
1334	{
1335	do
1336	{
1337	pdf_obj *o = pdf_dict_gets(ctx, node, text);
1338
1339	if (o)
1340	return pdf_resolve_indirect(ctx, o);
1341	node = pdf_dict_get(ctx, node, PDF_NAME(Parent));
1342	depth--;
1343	}
1344	while (depth >= `0` && node);
1345
1346	return NULL;
1347	}
1348
1349	static int
1350	lpr(fz_context ctx, pdf_document doc, pdf_obj node, int* depth, int page)
1351	{
1352	pdf_obj *kids;
1353	pdf_obj *o = NULL;
1354	int i, n;
1355
1356	if (pdf_mark_obj(ctx, node))
1357	return page;
1358
1359	fz_var(o);
1360
1361	fz_try(ctx)
1362	{
1363	if (pdf_name_eq(ctx, PDF_NAME(Page), pdf_dict_get(ctx, node, PDF_NAME(Type))))
1364	{
1365	pdf_obj r; /* r is deliberately not cleaned up /
1366
1367	/ Copy resources down to the child /
1368	o = pdf_keep_obj(ctx, pdf_dict_get(ctx, node, PDF_NAME(Resources)));
1369	if (!o)
1370	{
1371	o = pdf_keep_obj(ctx, pdf_new_dict(ctx, doc, `2`));
1372	pdf_dict_put(ctx, node, PDF_NAME(Resources), o);
1373	}
1374	lpr_inherit_res(ctx, node, depth, o);
1375	r = lpr_inherit(ctx, node, "MediaBox", depth);
1376	if (r)
1377	pdf_dict_put(ctx, node, PDF_NAME(MediaBox), r);
1378	r = lpr_inherit(ctx, node, "CropBox", depth);
1379	if (r)
1380	pdf_dict_put(ctx, node, PDF_NAME(CropBox), r);
1381	r = lpr_inherit(ctx, node, "BleedBox", depth);
1382	if (r)
1383	pdf_dict_put(ctx, node, PDF_NAME(BleedBox), r);
1384	r = lpr_inherit(ctx, node, "TrimBox", depth);
1385	if (r)
1386	pdf_dict_put(ctx, node, PDF_NAME(TrimBox), r);
1387	r = lpr_inherit(ctx, node, "ArtBox", depth);
1388	if (r)
1389	pdf_dict_put(ctx, node, PDF_NAME(ArtBox), r);
1390	r = lpr_inherit(ctx, node, "Rotate", depth);
1391	if (r)
1392	pdf_dict_put(ctx, node, PDF_NAME(Rotate), r);
1393	page++;
1394	}
1395	else
1396	{
1397	kids = pdf_dict_get(ctx, node, PDF_NAME(Kids));
1398	n = pdf_array_len(ctx, kids);
1399	for(i = `0`; i < n; i++)
1400	{
1401	page = lpr(ctx, doc, pdf_array_get(ctx, kids, i), depth+`1`, page);
1402	}
1403	pdf_dict_del(ctx, node, PDF_NAME(Resources));
1404	pdf_dict_del(ctx, node, PDF_NAME(MediaBox));
1405	pdf_dict_del(ctx, node, PDF_NAME(CropBox));
1406	pdf_dict_del(ctx, node, PDF_NAME(BleedBox));
1407	pdf_dict_del(ctx, node, PDF_NAME(TrimBox));
1408	pdf_dict_del(ctx, node, PDF_NAME(ArtBox));
1409	pdf_dict_del(ctx, node, PDF_NAME(Rotate));
1410	}
1411	}
1412	fz_always(ctx)
1413	{
1414	pdf_drop_obj(ctx, o);
1415	}
1416	fz_catch(ctx)
1417	{
1418	fz_rethrow(ctx);
1419	}
1420
1421	pdf_unmark_obj(ctx, node);
1422
1423	return page;
1424	}
1425
1426	void
1427	pdf_localise_page_resources(fz_context ctx, pdf_document doc)
1428	{
1429	if (doc->resources_localised)
1430	return;
1431
1432	lpr(ctx, doc, pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), PDF_NAME(Pages), NULL), `0`, `0`);
1433
1434	doc->resources_localised = `1`;
1435	}
1436
1437	static void
1438	linearize(fz_context ctx, pdf_document doc, pdf_write_state *opts)
1439	{
1440	int i;
1441	int n = pdf_xref_len(ctx, doc) + `2`;
1442	int *reorder;
1443	int *rev_renumber_map;
1444
1445	opts->page_object_lists = page_objects_list_create(ctx);
1446
1447	/ Ensure that every page has local references of its resources /
1448	/ FIXME: We could 'thin' the resources according to what is actually*
1449	* required for each page, but this would require us to run the page
1450	* content streams. */
1451	pdf_localise_page_resources(ctx, doc);
1452
1453	/ Walk the objects for each page, marking which ones are used, where /
1454	memset(opts->use_list, `0`, n * sizeof(int));
1455	mark_trailer(ctx, doc, opts, pdf_trailer(ctx, doc));
1456
1457	/ Add new objects required for linearization /
1458	add_linearization_objs(ctx, doc, opts);
1459
1460	#ifdef DEBUG_WRITING
1461	fprintf(stderr, "Usage calculated:\n");
1462	for (i=`0`; i < pdf_xref_len(ctx, doc); i++)
1463	{
1464	fprintf(stderr, "%d: use=%d\n", i, opts->use_list[i]);
1465	}
1466	#endif
1467
1468	/ Allocate/init the structures used for renumbering the objects /
1469	reorder = fz_calloc(ctx, n, sizeof(int));
1470	rev_renumber_map = fz_calloc(ctx, n, sizeof(int));
1471	for (i = `0`; i < n; i++)
1472	{
1473	reorder[i] = i;
1474	}
1475
1476	/ Heap sort the reordering /
1477	heap_sort(reorder+`1`, n-`1`, opts->use_list, &order_ge);
1478
1479	#ifdef DEBUG_WRITING
1480	fprintf(stderr, "Reordered:\n");
1481	for (i=`1`; i < pdf_xref_len(ctx, doc); i++)
1482	{
1483	fprintf(stderr, "%d: use=%d\n", i, opts->use_list[reorder[i]]);
1484	}
1485	#endif
1486
1487	/ Find the split point /
1488	for (i = `1`; (opts->use_list[reorder[i]] & USE_PARAMS) == `0`; i++) {}
1489	opts->start = i;
1490
1491	/ Roll the reordering into the renumber_map /
1492	for (i = `0`; i < n; i++)
1493	{
1494	opts->renumber_map[reorder[i]] = i;
1495	rev_renumber_map[i] = opts->rev_renumber_map[reorder[i]];
1496	}
1497	fz_free(ctx, opts->rev_renumber_map);
1498	opts->rev_renumber_map = rev_renumber_map;
1499	fz_free(ctx, reorder);
1500
1501	/ Apply the renumber_map /
1502	page_objects_list_renumber(opts);
1503	renumberobjs(ctx, doc, opts);
1504
1505	page_objects_list_sort_and_dedupe(ctx, opts->page_object_lists);
1506	}
1507
1508	static void
1509	update_linearization_params(fz_context ctx, pdf_document doc, pdf_write_state *opts)
1510	{
1511	int64_t offset;
1512	pdf_set_int(ctx, opts->linear_l, opts->file_len);
1513	/ Primary hint stream offset (of object, not stream!) /
1514	pdf_set_int(ctx, opts->linear_h0, opts->ofs_list[pdf_xref_len(ctx, doc)-`1`]);
1515	/ Primary hint stream length (of object, not stream!) /
1516	offset = (opts->start == `1` ? opts->main_xref_offset : opts->ofs_list[`1`] + opts->hintstream_len);
1517	pdf_set_int(ctx, opts->linear_h1, offset - opts->ofs_list[pdf_xref_len(ctx, doc)-`1`]);
1518	/ Object number of first pages page object (the first object of page 0) /
1519	pdf_set_int(ctx, opts->linear_o, opts->page_object_lists->page[`0`]->object[`0`]);
1520	/ Offset of end of first page (first page is followed by primary*
1521	* hint stream (object n-1) then remaining pages (object 1...). The
1522	* primary hint stream counts as part of the first pages data, I think.
1523	*/
1524	offset = (opts->start == `1` ? opts->main_xref_offset : opts->ofs_list[`1`] + opts->hintstream_len);
1525	pdf_set_int(ctx, opts->linear_e, offset);
1526	/ Number of pages in document /
1527	pdf_set_int(ctx, opts->linear_n, opts->page_count);
1528	/ Offset of first entry in main xref table /
1529	pdf_set_int(ctx, opts->linear_t, opts->first_xref_entry_offset + opts->hintstream_len);
1530	/ Offset of shared objects hint table in the primary hint stream /
1531	pdf_set_int(ctx, opts->hints_s, opts->hints_shared_offset);
1532	/ Primary hint stream length /
1533	pdf_set_int(ctx, opts->hints_length, opts->hintstream_len);
1534	}
1535
1536	/*
1537	* Make sure we have loaded objects from object streams.
1538	*/
1539
1540	static void preloadobjstms(fz_context ctx, pdf_document doc)
1541	{
1542	pdf_obj *obj;
1543	int num;
1544
1545	/ xref_len may change due to repair, so check it every iteration /
1546	for (num = `0`; num < pdf_xref_len(ctx, doc); num++)
1547	{
1548	if (pdf_get_xref_entry(ctx, doc, num)->type == `'o'`)
1549	{
1550	obj = pdf_load_object(ctx, doc, num);
1551	pdf_drop_obj(ctx, obj);
1552	}
1553	}
1554	}
1555
1556	/*
1557	* Save streams and objects to the output
1558	*/
1559
1560	static inline int isbinary(int c)
1561	{
1562	if (c == `'\n'` \|\| c == `'\r'` \|\| c == `'\t'`)
1563	return `0`;
1564	return c < `32` \|\| c > `127`;
1565	}
1566
1567	static int isbinarystream(fz_context ctx, const* unsigned char *data, size_t len)
1568	{
1569	size_t i;
1570	for (i = `0`; i < len; i++)
1571	if (isbinary(data[i]))
1572	return `1`;
1573	return `0`;
1574	}
1575
1576	static fz_buffer hexbuf(fz_context ctx, const unsigned char *p, size_t n)
1577	{
1578	static const char hex[`17`] = "0123456789abcdef";
1579	int x = `0`;
1580	size_t len = n * `2` + (n / `32`) + `1`;
1581	unsigned char *data = fz_malloc(ctx, len);
1582	fz_buffer *buf = fz_new_buffer_from_data(ctx, data, len);
1583
1584	while (n--)
1585	{
1586	data++ = hex[p >> `4`];
1587	data++ = hex[p & `15`];
1588	if (++x == `32`)
1589	{
1590	*data++ = `'\n'`;
1591	x = `0`;
1592	}
1593	p++;
1594	}
1595
1596	*data++ = `'>'`;
1597
1598	return buf;
1599	}
1600
1601	static void addhexfilter(fz_context ctx, pdf_document doc, pdf_obj *dict)
1602	{
1603	pdf_obj f, dp, newf, newdp;
1604
1605	newf = newdp = NULL;
1606	f = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
1607	dp = pdf_dict_get(ctx, dict, PDF_NAME(DecodeParms));
1608
1609	fz_var(newf);
1610	fz_var(newdp);
1611
1612	fz_try(ctx)
1613	{
1614	if (pdf_is_name(ctx, f))
1615	{
1616	newf = pdf_new_array(ctx, doc, `2`);
1617	pdf_array_push(ctx, newf, PDF_NAME(ASCIIHexDecode));
1618	pdf_array_push(ctx, newf, f);
1619	f = newf;
1620	if (pdf_is_dict(ctx, dp))
1621	{
1622	newdp = pdf_new_array(ctx, doc, `2`);
1623	pdf_array_push(ctx, newdp, PDF_NULL);
1624	pdf_array_push(ctx, newdp, dp);
1625	dp = newdp;
1626	}
1627	}
1628	else if (pdf_is_array(ctx, f))
1629	{
1630	pdf_array_insert(ctx, f, PDF_NAME(ASCIIHexDecode), `0`);
1631	if (pdf_is_array(ctx, dp))
1632	pdf_array_insert(ctx, dp, PDF_NULL, `0`);
1633	}
1634	else
1635	f = PDF_NAME(ASCIIHexDecode);
1636
1637	pdf_dict_put(ctx, dict, PDF_NAME(Filter), f);
1638	if (dp)
1639	pdf_dict_put(ctx, dict, PDF_NAME(DecodeParms), dp);
1640	}
1641	fz_always(ctx)
1642	{
1643	pdf_drop_obj(ctx, newf);
1644	pdf_drop_obj(ctx, newdp);
1645	}
1646	fz_catch(ctx)
1647	fz_rethrow(ctx);
1648	}
1649
1650	static fz_buffer deflatebuf(fz_context ctx, const unsigned char *p, size_t n)
1651	{
1652	fz_buffer *buf;
1653	uLongf csize;
1654	int t;
1655	uLong longN = (uLong)n;
1656	unsigned char *data;
1657	size_t cap;
1658
1659	if (n != (size_t)longN)
1660	fz_throw(ctx, FZ_ERROR_GENERIC, "Buffer too large to deflate");
1661
1662	cap = compressBound(longN);
1663	data = fz_malloc(ctx, cap);
1664	buf = fz_new_buffer_from_data(ctx, data, cap);
1665	csize = (uLongf)cap;
1666	t = compress(data, &csize, p, longN);
1667	if (t != Z_OK)
1668	{
1669	fz_drop_buffer(ctx, buf);
1670	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot deflate buffer");
1671	}
1672	fz_resize_buffer(ctx, buf, csize);
1673	return buf;
1674	}
1675
1676	static int striphexfilter(fz_context ctx, pdf_document doc, pdf_obj *dict)
1677	{
1678	pdf_obj f, dp;
1679	int is_hex = `0`;
1680
1681	f = pdf_dict_get(ctx, dict, PDF_NAME(Filter));
1682	dp = pdf_dict_get(ctx, dict, PDF_NAME(DecodeParms));
1683
1684	if (pdf_is_array(ctx, f))
1685	{
1686	/ Remove ASCIIHexDecode from head of filter list /
1687	if (pdf_array_get(ctx, f, `0`) == PDF_NAME(ASCIIHexDecode))
1688	{
1689	is_hex = `1`;
1690	pdf_array_delete(ctx, f, `0`);
1691	if (pdf_is_array(ctx, dp))
1692	pdf_array_delete(ctx, dp, `0`);
1693	}
1694	/ Unpack array if only one filter remains /
1695	if (pdf_array_len(ctx, f) == `1`)
1696	{
1697	f = pdf_array_get(ctx, f, `0`);
1698	pdf_dict_put(ctx, dict, PDF_NAME(Filter), f);
1699	if (dp)
1700	{
1701	dp = pdf_array_get(ctx, dp, `0`);
1702	pdf_dict_put(ctx, dict, PDF_NAME(DecodeParms), dp);
1703	}
1704	}
1705	/ Remove array if no filters remain /
1706	else if (pdf_array_len(ctx, f) == `0`)
1707	{
1708	pdf_dict_del(ctx, dict, PDF_NAME(Filter));
1709	pdf_dict_del(ctx, dict, PDF_NAME(DecodeParms));
1710	}
1711	}
1712	else if (f == PDF_NAME(ASCIIHexDecode))
1713	{
1714	is_hex = `1`;
1715	pdf_dict_del(ctx, dict, PDF_NAME(Filter));
1716	pdf_dict_del(ctx, dict, PDF_NAME(DecodeParms));
1717	}
1718
1719	return is_hex;
1720	}
1721
1722	static fz_buffer unhexbuf(fz_context ctx, const unsigned char *p, size_t n)
1723	{
1724	fz_stream *mstm = NULL;
1725	fz_stream *xstm = NULL;
1726	fz_buffer *out = NULL;
1727	fz_var(mstm);
1728	fz_var(xstm);
1729	fz_try(ctx)
1730	{
1731	mstm = fz_open_memory(ctx, p, n);
1732	xstm = fz_open_ahxd(ctx, mstm);
1733	out = fz_read_all(ctx, xstm, n/`2`);
1734	}
1735	fz_always(ctx)
1736	{
1737	fz_drop_stream(ctx, xstm);
1738	fz_drop_stream(ctx, mstm);
1739	}
1740	fz_catch(ctx)
1741	fz_rethrow(ctx);
1742	return out;
1743	}
1744
1745	static void write_data(fz_context ctx, void* arg, const* unsigned char data, int* len)
1746	{
1747	fz_write_data(ctx, (fz_output *)arg, data, len);
1748	}
1749
1750	static void copystream(fz_context ctx, pdf_document doc, pdf_write_state opts, pdf_obj obj_orig, int num, int gen, int do_deflate, int unenc)
1751	{
1752	fz_buffer tmp_unhex = NULL, tmp_flate = NULL, tmp_hex = NULL, buf = NULL;
1753	pdf_obj *obj = NULL;
1754	size_t len;
1755	unsigned char *data;
1756
1757	fz_var(buf);
1758	fz_var(tmp_flate);
1759	fz_var(tmp_hex);
1760	fz_var(obj);
1761
1762	fz_try(ctx)
1763	{
1764	buf = pdf_load_raw_stream_number(ctx, doc, num);
1765	obj = pdf_copy_dict(ctx, obj_orig);
1766
1767	len = fz_buffer_storage(ctx, buf, &data);
1768
1769	if (do_deflate && striphexfilter(ctx, doc, obj))
1770	{
1771	tmp_unhex = unhexbuf(ctx, data, len);
1772	len = fz_buffer_storage(ctx, tmp_unhex, &data);
1773	}
1774
1775	if (do_deflate && !pdf_dict_get(ctx, obj, PDF_NAME(Filter)))
1776	{
1777	size_t clen;
1778	unsigned char *cdata;
1779	tmp_flate = deflatebuf(ctx, data, len);
1780	clen = fz_buffer_storage(ctx, tmp_flate, &cdata);
1781	if (clen < len)
1782	{
1783	len = clen;
1784	data = cdata;
1785	pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
1786	}
1787	}
1788
1789	if (opts->do_ascii && isbinarystream(ctx, data, len))
1790	{
1791	tmp_hex = hexbuf(ctx, data, len);
1792	len = fz_buffer_storage(ctx, tmp_hex, &data);
1793	addhexfilter(ctx, doc, obj);
1794	}
1795
1796	fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
1797
1798	if (unenc)
1799	{
1800	pdf_dict_put_int(ctx, obj, PDF_NAME(Length), len);
1801	pdf_print_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii);
1802	fz_write_string(ctx, opts->out, "\nstream\n");
1803	fz_write_data(ctx, opts->out, data, len);
1804	}
1805	else
1806	{
1807	pdf_dict_put_int(ctx, obj, PDF_NAME(Length), pdf_encrypted_len(ctx, opts->crypt, num, gen, (int)len));
1808	pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, opts->crypt, num, gen);
1809	fz_write_string(ctx, opts->out, "\nstream\n");
1810	pdf_encrypt_data(ctx, opts->crypt, num, gen, write_data, opts->out, data, len);
1811	}
1812
1813	fz_write_string(ctx, opts->out, "\nendstream\nendobj\n\n");
1814	}
1815	fz_always(ctx)
1816	{
1817	fz_drop_buffer(ctx, tmp_unhex);
1818	fz_drop_buffer(ctx, tmp_hex);
1819	fz_drop_buffer(ctx, tmp_flate);
1820	fz_drop_buffer(ctx, buf);
1821	pdf_drop_obj(ctx, obj);
1822	}
1823	fz_catch(ctx)
1824	{
1825	fz_rethrow(ctx);
1826	}
1827	}
1828
1829	static void expandstream(fz_context ctx, pdf_document doc, pdf_write_state opts, pdf_obj obj_orig, int num, int gen, int do_deflate, int unenc)
1830	{
1831	fz_buffer buf = NULL, tmp_flate = NULL, *tmp_hex = NULL;
1832	pdf_obj *obj = NULL;
1833	size_t len;
1834	unsigned char *data;
1835
1836	fz_var(buf);
1837	fz_var(tmp_flate);
1838	fz_var(tmp_hex);
1839	fz_var(obj);
1840
1841	fz_try(ctx)
1842	{
1843	buf = pdf_load_stream_number(ctx, doc, num);
1844	obj = pdf_copy_dict(ctx, obj_orig);
1845	pdf_dict_del(ctx, obj, PDF_NAME(Filter));
1846	pdf_dict_del(ctx, obj, PDF_NAME(DecodeParms));
1847
1848	len = fz_buffer_storage(ctx, buf, &data);
1849	if (do_deflate)
1850	{
1851	unsigned char *cdata;
1852	size_t clen;
1853	tmp_flate = deflatebuf(ctx, data, len);
1854	clen = fz_buffer_storage(ctx, tmp_flate, &cdata);
1855	if (clen < len)
1856	{
1857	len = clen;
1858	data = cdata;
1859	pdf_dict_put(ctx, obj, PDF_NAME(Filter), PDF_NAME(FlateDecode));
1860	}
1861	}
1862
1863	if (opts->do_ascii && isbinarystream(ctx, data, len))
1864	{
1865	tmp_hex = hexbuf(ctx, data, len);
1866	len = fz_buffer_storage(ctx, tmp_hex, &data);
1867	addhexfilter(ctx, doc, obj);
1868	}
1869
1870	fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
1871
1872	if (unenc)
1873	{
1874	pdf_dict_put_int(ctx, obj, PDF_NAME(Length), len);
1875	pdf_print_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii);
1876	fz_write_string(ctx, opts->out, "\nstream\n");
1877	fz_write_data(ctx, opts->out, data, len);
1878	}
1879	else
1880	{
1881	pdf_dict_put_int(ctx, obj, PDF_NAME(Length), pdf_encrypted_len(ctx, opts->crypt, num, gen, (int)len));
1882	pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, opts->crypt, num, gen);
1883	fz_write_string(ctx, opts->out, "\nstream\n");
1884	pdf_encrypt_data(ctx, opts->crypt, num, gen, write_data, opts->out, data, len);
1885	}
1886
1887	fz_write_string(ctx, opts->out, "\nendstream\nendobj\n\n");
1888	}
1889	fz_always(ctx)
1890	{
1891	fz_drop_buffer(ctx, tmp_hex);
1892	fz_drop_buffer(ctx, tmp_flate);
1893	fz_drop_buffer(ctx, buf);
1894	pdf_drop_obj(ctx, obj);
1895	}
1896	fz_catch(ctx)
1897	{
1898	fz_rethrow(ctx);
1899	}
1900	}
1901
1902	static int is_image_filter(pdf_obj *s)
1903	{
1904	return
1905	s == PDF_NAME(CCITTFaxDecode) \|\| s == PDF_NAME(CCF) \|\|
1906	s == PDF_NAME(DCTDecode) \|\| s == PDF_NAME(DCT) \|\|
1907	s == PDF_NAME(RunLengthDecode) \|\| s == PDF_NAME(RL) \|\|
1908	s == PDF_NAME(JBIG2Decode) \|\|
1909	s == PDF_NAME(JPXDecode);
1910	}
1911
1912	static int filter_implies_image(fz_context ctx, pdf_obj o)
1913	{
1914	if (pdf_is_name(ctx, o))
1915	return is_image_filter(o);
1916	if (pdf_is_array(ctx, o))
1917	{
1918	int i, len;
1919	len = pdf_array_len(ctx, o);
1920	for (i = `0`; i < len; i++)
1921	if (is_image_filter(pdf_array_get(ctx, o, i)))
1922	return `1`;
1923	}
1924	return `0`;
1925	}
1926
1927	static int is_jpx_filter(fz_context ctx, pdf_obj o)
1928	{
1929	if (o == PDF_NAME(JPXDecode))
1930	return `1`;
1931	if (pdf_is_array(ctx, o))
1932	{
1933	int i, len;
1934	len = pdf_array_len(ctx, o);
1935	for (i = `0`; i < len; i++)
1936	if (pdf_array_get(ctx, o, i) == PDF_NAME(JPXDecode))
1937	return `1`;
1938	}
1939	return `0`;
1940	}
1941
1942	static int is_image_stream(fz_context ctx, pdf_obj obj)
1943	{
1944	pdf_obj *o;
1945	if ((o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(XObject))))
1946	if ((o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(Image))))
1947	return `1`;
1948	if (o = pdf_dict_get(ctx, obj, PDF_NAME(Filter)), filter_implies_image(ctx, o))
1949	return `1`;
1950	if (pdf_dict_get(ctx, obj, PDF_NAME(Width)) != NULL && pdf_dict_get(ctx, obj, PDF_NAME(Height)) != NULL)
1951	return `1`;
1952	return `0`;
1953	}
1954
1955	static int is_font_stream(fz_context ctx, pdf_obj obj)
1956	{
1957	pdf_obj *o;
1958	if (o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(Font)))
1959	return `1`;
1960	if (o = pdf_dict_get(ctx, obj, PDF_NAME(Type)), pdf_name_eq(ctx, o, PDF_NAME(FontDescriptor)))
1961	return `1`;
1962	if (pdf_dict_get(ctx, obj, PDF_NAME(Length1)) != NULL)
1963	return `1`;
1964	if (pdf_dict_get(ctx, obj, PDF_NAME(Length2)) != NULL)
1965	return `1`;
1966	if (pdf_dict_get(ctx, obj, PDF_NAME(Length3)) != NULL)
1967	return `1`;
1968	if (o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(Type1C)))
1969	return `1`;
1970	if (o = pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), pdf_name_eq(ctx, o, PDF_NAME(CIDFontType0C)))
1971	return `1`;
1972	return `0`;
1973	}
1974
1975	static int is_jpx_stream(fz_context ctx, pdf_obj obj)
1976	{
1977	pdf_obj *o;
1978	if (o = pdf_dict_get(ctx, obj, PDF_NAME(Filter)), is_jpx_filter(ctx, o))
1979	return `1`;
1980	return `0`;
1981	}
1982
1983
1984	static int is_xml_metadata(fz_context ctx, pdf_obj obj)
1985	{
1986	if (pdf_name_eq(ctx, pdf_dict_get(ctx, obj, PDF_NAME(Type)), PDF_NAME(Metadata)))
1987	if (pdf_name_eq(ctx, pdf_dict_get(ctx, obj, PDF_NAME(Subtype)), PDF_NAME(XML)))
1988	return `1`;
1989	return `0`;
1990	}
1991
1992	static void writeobject(fz_context ctx, pdf_document doc, pdf_write_state opts, int* num, int gen, int skip_xrefs, int unenc)
1993	{
1994	pdf_obj *obj = NULL;
1995	fz_buffer *buf = NULL;
1996	int do_deflate = `0`;
1997	int do_expand = `0`;
1998	int skip = `0`;
1999
2000	fz_var(obj);
2001	fz_var(buf);
2002
2003	if (opts->do_encrypt == PDF_ENCRYPT_NONE)
2004	unenc = `1`;
2005
2006	fz_try(ctx)
2007	{
2008	obj = pdf_load_object(ctx, doc, num);
2009
2010	/ skip ObjStm and XRef objects /
2011	if (pdf_is_dict(ctx, obj))
2012	{
2013	pdf_obj *type = pdf_dict_get(ctx, obj, PDF_NAME(Type));
2014	if (type == PDF_NAME(ObjStm))
2015	{
2016	opts->use_list[num] = `0`;
2017	skip = `1`;
2018	}
2019	if (skip_xrefs && type == PDF_NAME(XRef))
2020	{
2021	opts->use_list[num] = `0`;
2022	skip = `1`;
2023	}
2024	}
2025
2026	if (!skip)
2027	{
2028	if (pdf_obj_num_is_stream(ctx, doc, num))
2029	{
2030	do_deflate = opts->do_compress;
2031	do_expand = opts->do_expand;
2032	if (opts->do_compress_images && is_image_stream(ctx, obj))
2033	do_deflate = `1`, do_expand = `0`;
2034	if (opts->do_compress_fonts && is_font_stream(ctx, obj))
2035	do_deflate = `1`, do_expand = `0`;
2036	if (is_xml_metadata(ctx, obj))
2037	do_deflate = `0`, do_expand = `0`;
2038	if (is_jpx_stream(ctx, obj))
2039	do_deflate = `0`, do_expand = `0`;
2040
2041	if (do_expand)
2042	expandstream(ctx, doc, opts, obj, num, gen, do_deflate, unenc);
2043	else
2044	copystream(ctx, doc, opts, obj, num, gen, do_deflate, unenc);
2045	}
2046	else
2047	{
2048	fz_write_printf(ctx, opts->out, "%d %d obj\n", num, gen);
2049	pdf_print_encrypted_obj(ctx, opts->out, obj, opts->do_tight, opts->do_ascii, unenc ? NULL : opts->crypt, num, gen);
2050	fz_write_string(ctx, opts->out, "\nendobj\n\n");
2051	}
2052	}
2053	}
2054	fz_always(ctx)
2055	{
2056	fz_drop_buffer(ctx, buf);
2057	pdf_drop_obj(ctx, obj);
2058	}
2059	fz_catch(ctx)
2060	{
2061	fz_rethrow(ctx);
2062	}
2063	}
2064
2065	static void writexrefsubsect(fz_context ctx, pdf_write_state opts, int from, int to)
2066	{
2067	int num;
2068
2069	fz_write_printf(ctx, opts->out, "%d %d\n", from, to - from);
2070	for (num = from; num < to; num++)
2071	{
2072	if (opts->use_list[num])
2073	fz_write_printf(ctx, opts->out, "%010lu %05d n \n", opts->ofs_list[num], opts->gen_list[num]);
2074	else
2075	fz_write_printf(ctx, opts->out, "%010lu %05d f \n", opts->ofs_list[num], opts->gen_list[num]);
2076	}
2077	}
2078
2079	static void writexref(fz_context ctx, pdf_document doc, pdf_write_state opts, int* from, int to, int first, int64_t main_xref_offset, int64_t startxref)
2080	{
2081	pdf_obj *trailer = NULL;
2082	pdf_obj *obj;
2083	pdf_obj *nobj = NULL;
2084
2085	fz_write_string(ctx, opts->out, "xref\n");
2086	opts->first_xref_entry_offset = fz_tell_output(ctx, opts->out);
2087
2088	if (opts->do_incremental)
2089	{
2090	int subfrom = from;
2091	int subto;
2092
2093	while (subfrom < to)
2094	{
2095	while (subfrom < to && !pdf_xref_is_incremental(ctx, doc, subfrom))
2096	subfrom++;
2097
2098	subto = subfrom;
2099	while (subto < to && pdf_xref_is_incremental(ctx, doc, subto))
2100	subto++;
2101
2102	if (subfrom < subto)
2103	writexrefsubsect(ctx, opts, subfrom, subto);
2104
2105	subfrom = subto;
2106	}
2107	}
2108	else
2109	{
2110	writexrefsubsect(ctx, opts, from, to);
2111	}
2112
2113	fz_write_string(ctx, opts->out, "\n");
2114
2115	fz_var(trailer);
2116
2117	if (opts->do_incremental)
2118	{
2119	trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
2120	pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), pdf_xref_len(ctx, doc));
2121	pdf_dict_put_int(ctx, trailer, PDF_NAME(Prev), doc->startxref);
2122	doc->startxref = startxref;
2123	}
2124	else
2125	{
2126	trailer = pdf_new_dict(ctx, doc, `5`);
2127
2128	nobj = pdf_new_int(ctx, to);
2129	pdf_dict_put_drop(ctx, trailer, PDF_NAME(Size), nobj);
2130
2131	if (first)
2132	{
2133	obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
2134	if (obj)
2135	pdf_dict_put(ctx, trailer, PDF_NAME(Info), obj);
2136
2137	obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
2138	if (obj)
2139	pdf_dict_put(ctx, trailer, PDF_NAME(Root), obj);
2140
2141	obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
2142	if (obj)
2143	pdf_dict_put(ctx, trailer, PDF_NAME(ID), obj);
2144
2145	obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
2146	if (obj)
2147	pdf_dict_put(ctx, trailer, PDF_NAME(Encrypt), obj);
2148	}
2149	if (main_xref_offset != `0`)
2150	{
2151	nobj = pdf_new_int(ctx, main_xref_offset);
2152	pdf_dict_put_drop(ctx, trailer, PDF_NAME(Prev), nobj);
2153	}
2154	}
2155
2156	fz_write_string(ctx, opts->out, "trailer\n");
2157	/ Trailer is NOT encrypted /
2158	pdf_print_obj(ctx, opts->out, trailer, opts->do_tight, opts->do_ascii);
2159	fz_write_string(ctx, opts->out, "\n");
2160
2161	pdf_drop_obj(ctx, trailer);
2162
2163	fz_write_printf(ctx, opts->out, "startxref\n%lu\n%%%%EOF\n", startxref);
2164
2165	doc->has_xref_streams = `0`;
2166	}
2167
2168	static void writexrefstreamsubsect(fz_context ctx, pdf_document doc, pdf_write_state opts, pdf_obj index, fz_buffer fzbuf, int* from, int to)
2169	{
2170	int num;
2171
2172	pdf_array_push_int(ctx, index, from);
2173	pdf_array_push_int(ctx, index, to - from);
2174	for (num = from; num < to; num++)
2175	{
2176	fz_append_byte(ctx, fzbuf, opts->use_list[num] ? `1` : `0`);
2177	fz_append_byte(ctx, fzbuf, opts->ofs_list[num]>>`24`);
2178	fz_append_byte(ctx, fzbuf, opts->ofs_list[num]>>`16`);
2179	fz_append_byte(ctx, fzbuf, opts->ofs_list[num]>>`8`);
2180	fz_append_byte(ctx, fzbuf, opts->ofs_list[num]);
2181	fz_append_byte(ctx, fzbuf, opts->gen_list[num]);
2182	}
2183	}
2184
2185	static void writexrefstream(fz_context ctx, pdf_document doc, pdf_write_state opts, int* from, int to, int first, int64_t main_xref_offset, int64_t startxref)
2186	{
2187	int num;
2188	pdf_obj *dict = NULL;
2189	pdf_obj *obj;
2190	pdf_obj *w = NULL;
2191	pdf_obj *index;
2192	fz_buffer *fzbuf = NULL;
2193
2194	fz_var(dict);
2195	fz_var(w);
2196	fz_var(fzbuf);
2197	fz_try(ctx)
2198	{
2199	num = pdf_create_object(ctx, doc);
2200	dict = pdf_new_dict(ctx, doc, `6`);
2201	pdf_update_object(ctx, doc, num, dict);
2202
2203	opts->first_xref_entry_offset = fz_tell_output(ctx, opts->out);
2204
2205	to++;
2206
2207	if (first)
2208	{
2209	obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
2210	if (obj)
2211	pdf_dict_put(ctx, dict, PDF_NAME(Info), obj);
2212
2213	obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
2214	if (obj)
2215	pdf_dict_put(ctx, dict, PDF_NAME(Root), obj);
2216
2217	obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
2218	if (obj)
2219	pdf_dict_put(ctx, dict, PDF_NAME(ID), obj);
2220
2221	if (opts->do_incremental)
2222	{
2223	obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
2224	if (obj)
2225	pdf_dict_put(ctx, dict, PDF_NAME(Encrypt), obj);
2226	}
2227	}
2228
2229	pdf_dict_put_int(ctx, dict, PDF_NAME(Size), to);
2230
2231	if (opts->do_incremental)
2232	{
2233	pdf_dict_put_int(ctx, dict, PDF_NAME(Prev), doc->startxref);
2234	doc->startxref = startxref;
2235	}
2236	else
2237	{
2238	if (main_xref_offset != `0`)
2239	pdf_dict_put_int(ctx, dict, PDF_NAME(Prev), main_xref_offset);
2240	}
2241
2242	pdf_dict_put(ctx, dict, PDF_NAME(Type), PDF_NAME(XRef));
2243
2244	w = pdf_new_array(ctx, doc, `3`);
2245	pdf_dict_put(ctx, dict, PDF_NAME(W), w);
2246	pdf_array_push_int(ctx, w, `1`);
2247	pdf_array_push_int(ctx, w, `4`);
2248	pdf_array_push_int(ctx, w, `1`);
2249
2250	index = pdf_new_array(ctx, doc, `2`);
2251	pdf_dict_put_drop(ctx, dict, PDF_NAME(Index), index);
2252
2253	/ opts->gen_list[num] is already initialized by fz_calloc. /
2254	opts->use_list[num] = `1`;
2255	opts->ofs_list[num] = opts->first_xref_entry_offset;
2256
2257	fzbuf = fz_new_buffer(ctx, (`1` + `4` + `1`) * (to-from));
2258
2259	if (opts->do_incremental)
2260	{
2261	int subfrom = from;
2262	int subto;
2263
2264	while (subfrom < to)
2265	{
2266	while (subfrom < to && !pdf_xref_is_incremental(ctx, doc, subfrom))
2267	subfrom++;
2268
2269	subto = subfrom;
2270	while (subto < to && pdf_xref_is_incremental(ctx, doc, subto))
2271	subto++;
2272
2273	if (subfrom < subto)
2274	writexrefstreamsubsect(ctx, doc, opts, index, fzbuf, subfrom, subto);
2275
2276	subfrom = subto;
2277	}
2278	}
2279	else
2280	{
2281	writexrefstreamsubsect(ctx, doc, opts, index, fzbuf, from, to);
2282	}
2283
2284	pdf_update_stream(ctx, doc, dict, fzbuf, `0`);
2285
2286	writeobject(ctx, doc, opts, num, `0`, `0`, `1`);
2287	fz_write_printf(ctx, opts->out, "startxref\n%lu\n%%%%EOF\n", startxref);
2288	}
2289	fz_always(ctx)
2290	{
2291	pdf_drop_obj(ctx, dict);
2292	pdf_drop_obj(ctx, w);
2293	fz_drop_buffer(ctx, fzbuf);
2294	}
2295	fz_catch(ctx)
2296	{
2297	fz_rethrow(ctx);
2298	}
2299
2300	doc->has_old_style_xrefs = `0`;
2301	}
2302
2303	static void
2304	padto(fz_context ctx, fz_output out, int64_t target)
2305	{
2306	int64_t pos = fz_tell_output(ctx, out);
2307
2308	assert(pos <= target);
2309	while (pos < target)
2310	{
2311	fz_write_byte(ctx, out, `'\n'`);
2312	pos++;
2313	}
2314	}
2315
2316	static void
2317	dowriteobject(fz_context ctx, pdf_document doc, pdf_write_state opts, int* num, int pass)
2318	{
2319	pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, num);
2320	if (entry->type == `'f'`)
2321	opts->gen_list[num] = entry->gen;
2322	if (entry->type == `'n'`)
2323	opts->gen_list[num] = entry->gen;
2324	if (entry->type == `'o'`)
2325	opts->gen_list[num] = `0`;
2326
2327	/ If we are renumbering, then make sure all generation numbers are*
2328	* zero (except object 0 which must be free, and have a gen number of
2329	* 65535). Changing the generation numbers (and indeed object numbers)
2330	* will break encryption - so only do this if we are renumbering
2331	* anyway. */
2332	if (opts->do_garbage >= `2`)
2333	opts->gen_list[num] = (num == `0` ? `65535` : `0`);
2334
2335	if (opts->do_garbage && !opts->use_list[num])
2336	return;
2337
2338	if (entry->type == `'n'` \|\| entry->type == `'o'`)
2339	{
2340	if (pass > `0`)
2341	padto(ctx, opts->out, opts->ofs_list[num]);
2342	if (!opts->do_incremental \|\| pdf_xref_is_incremental(ctx, doc, num))
2343	{
2344	opts->ofs_list[num] = fz_tell_output(ctx, opts->out);
2345	writeobject(ctx, doc, opts, num, opts->gen_list[num], `1`, num == opts->crypt_object_number);
2346	}
2347	}
2348	else
2349	opts->use_list[num] = `0`;
2350	}
2351
2352	static void
2353	writeobjects(fz_context ctx, pdf_document doc, pdf_write_state opts, int* pass)
2354	{
2355	int num;
2356	int xref_len = pdf_xref_len(ctx, doc);
2357
2358	if (!opts->do_incremental)
2359	{
2360	fz_write_printf(ctx, opts->out, "%%PDF-%d.%d\n", doc->version / `10`, doc->version % `10`);
2361	fz_write_string(ctx, opts->out, "%\xC2\xB5\xC2\xB6\n\n");
2362	}
2363
2364	dowriteobject(ctx, doc, opts, opts->start, pass);
2365
2366	if (opts->do_linear)
2367	{
2368	/ Write first xref /
2369	if (pass == `0`)
2370	opts->first_xref_offset = fz_tell_output(ctx, opts->out);
2371	else
2372	padto(ctx, opts->out, opts->first_xref_offset);
2373	writexref(ctx, doc, opts, opts->start, pdf_xref_len(ctx, doc), `1`, opts->main_xref_offset, `0`);
2374	}
2375
2376	for (num = opts->start+`1`; num < xref_len; num++)
2377	dowriteobject(ctx, doc, opts, num, pass);
2378	if (opts->do_linear && pass == `1`)
2379	{
2380	int64_t offset = (opts->start == `1` ? opts->main_xref_offset : opts->ofs_list[`1`] + opts->hintstream_len);
2381	padto(ctx, opts->out, offset);
2382	}
2383	for (num = `1`; num < opts->start; num++)
2384	{
2385	if (pass == `1`)
2386	opts->ofs_list[num] += opts->hintstream_len;
2387	dowriteobject(ctx, doc, opts, num, pass);
2388	}
2389	}
2390
2391	static int
2392	my_log2(int x)
2393	{
2394	int i = `0`;
2395
2396	if (x <= `0`)
2397	return `0`;
2398
2399	while ((`1`<<i) <= x && (`1`<<i) > `0`)
2400	i++;
2401
2402	if ((`1`<<i) <= `0`)
2403	return `0`;
2404
2405	return i;
2406	}
2407
2408	static void
2409	make_page_offset_hints(fz_context ctx, pdf_document doc, pdf_write_state opts, fz_buffer buf)
2410	{
2411	int i, j;
2412	int min_objs_per_page, max_objs_per_page;
2413	int min_page_length, max_page_length;
2414	int objs_per_page_bits;
2415	int min_shared_object, max_shared_object;
2416	int max_shared_object_refs = `0`;
2417	int min_shared_length, max_shared_length;
2418	page_objects **pop = &opts->page_object_lists->page[`0`];
2419	int page_len_bits, shared_object_bits, shared_object_id_bits;
2420	int shared_length_bits;
2421	int xref_len = pdf_xref_len(ctx, doc);
2422
2423	min_shared_object = pdf_xref_len(ctx, doc);
2424	max_shared_object = `1`;
2425	min_shared_length = opts->file_len;
2426	max_shared_length = `0`;
2427	for (i=`1`; i < xref_len; i++)
2428	{
2429	int min, max, page;
2430
2431	min = opts->ofs_list[i];
2432	if (i == opts->start-`1` \|\| (opts->start == `1` && i == xref_len-`1`))
2433	max = opts->main_xref_offset;
2434	else if (i == xref_len-`1`)
2435	max = opts->ofs_list[`1`];
2436	else
2437	max = opts->ofs_list[i+`1`];
2438
2439	assert(max > min);
2440
2441	if (opts->use_list[i] & USE_SHARED)
2442	{
2443	page = -`1`;
2444	if (i < min_shared_object)
2445	min_shared_object = i;
2446	if (i > max_shared_object)
2447	max_shared_object = i;
2448	if (min_shared_length > max - min)
2449	min_shared_length = max - min;
2450	if (max_shared_length < max - min)
2451	max_shared_length = max - min;
2452	}
2453	else if (opts->use_list[i] & (USE_CATALOGUE \| USE_HINTS \| USE_PARAMS))
2454	page = -`1`;
2455	else if (opts->use_list[i] & USE_PAGE1)
2456	{
2457	page = `0`;
2458	if (min_shared_length > max - min)
2459	min_shared_length = max - min;
2460	if (max_shared_length < max - min)
2461	max_shared_length = max - min;
2462	}
2463	else if (opts->use_list[i] == `0`)
2464	page = -`1`;
2465	else
2466	page = opts->use_list[i]>>USE_PAGE_SHIFT;
2467
2468	if (page >= `0`)
2469	{
2470	pop[page]->num_objects++;
2471	if (pop[page]->min_ofs > min)
2472	pop[page]->min_ofs = min;
2473	if (pop[page]->max_ofs < max)
2474	pop[page]->max_ofs = max;
2475	}
2476	}
2477
2478	min_objs_per_page = max_objs_per_page = pop[`0`]->num_objects;
2479	min_page_length = max_page_length = pop[`0`]->max_ofs - pop[`0`]->min_ofs;
2480	for (i=`1`; i < opts->page_count; i++)
2481	{
2482	int tmp;
2483	if (min_objs_per_page > pop[i]->num_objects)
2484	min_objs_per_page = pop[i]->num_objects;
2485	if (max_objs_per_page < pop[i]->num_objects)
2486	max_objs_per_page = pop[i]->num_objects;
2487	tmp = pop[i]->max_ofs - pop[i]->min_ofs;
2488	if (tmp < min_page_length)
2489	min_page_length = tmp;
2490	if (tmp > max_page_length)
2491	max_page_length = tmp;
2492	}
2493
2494	for (i=`0`; i < opts->page_count; i++)
2495	{
2496	int count = `0`;
2497	page_objects *po = opts->page_object_lists->page[i];
2498	for (j = `0`; j < po->len; j++)
2499	{
2500	if (i == `0` && opts->use_list[po->object[j]] & USE_PAGE1)
2501	count++;
2502	else if (i != `0` && opts->use_list[po->object[j]] & USE_SHARED)
2503	count++;
2504	}
2505	po->num_shared = count;
2506	if (i == `0` \|\| count > max_shared_object_refs)
2507	max_shared_object_refs = count;
2508	}
2509	if (min_shared_object > max_shared_object)
2510	min_shared_object = max_shared_object = `0`;
2511
2512	/ Table F.3 - Header /
2513	/ Header Item 1: Least number of objects in a page /
2514	fz_append_bits(ctx, buf, min_objs_per_page, `32`);
2515	/ Header Item 2: Location of first pages page object /
2516	fz_append_bits(ctx, buf, opts->ofs_list[pop[`0`]->page_object_number], `32`);
2517	/ Header Item 3: Number of bits required to represent the difference*
2518	* between the greatest and least number of objects in a page. */
2519	objs_per_page_bits = my_log2(max_objs_per_page - min_objs_per_page);
2520	fz_append_bits(ctx, buf, objs_per_page_bits, `16`);
2521	/ Header Item 4: Least length of a page. /
2522	fz_append_bits(ctx, buf, min_page_length, `32`);
2523	/ Header Item 5: Number of bits needed to represent the difference*
2524	* between the greatest and least length of a page. */
2525	page_len_bits = my_log2(max_page_length - min_page_length);
2526	fz_append_bits(ctx, buf, page_len_bits, `16`);
2527	/ Header Item 6: Least offset to start of content stream (Acrobat*
2528	* sets this to always be 0) */
2529	fz_append_bits(ctx, buf, `0`, `32`);
2530	/ Header Item 7: Number of bits needed to represent the difference*
2531	* between the greatest and least offset to content stream (Acrobat
2532	* sets this to always be 0) */
2533	fz_append_bits(ctx, buf, `0`, `16`);
2534	/ Header Item 8: Least content stream length. (Acrobat*
2535	* sets this to always be 0) */
2536	fz_append_bits(ctx, buf, `0`, `32`);
2537	/ Header Item 9: Number of bits needed to represent the difference*
2538	* between the greatest and least content stream length (Acrobat
2539	* sets this to always be the same as item 5) */
2540	fz_append_bits(ctx, buf, page_len_bits, `16`);
2541	/ Header Item 10: Number of bits needed to represent the greatest*
2542	* number of shared object references. */
2543	shared_object_bits = my_log2(max_shared_object_refs);
2544	fz_append_bits(ctx, buf, shared_object_bits, `16`);
2545	/ Header Item 11: Number of bits needed to represent the greatest*
2546	* shared object identifier. */
2547	shared_object_id_bits = my_log2(max_shared_object - min_shared_object + pop[`0`]->num_shared);
2548	fz_append_bits(ctx, buf, shared_object_id_bits, `16`);
2549	/ Header Item 12: Number of bits needed to represent the numerator*
2550	* of the fractions. We always send 0. */
2551	fz_append_bits(ctx, buf, `0`, `16`);
2552	/ Header Item 13: Number of bits needed to represent the denominator*
2553	* of the fractions. We always send 0. */
2554	fz_append_bits(ctx, buf, `0`, `16`);
2555
2556	/ Table F.4 - Page offset hint table (per page) /
2557	/ Item 1: A number that, when added to the least number of objects*
2558	* on a page, gives the number of objects in the page. */
2559	for (i = `0`; i < opts->page_count; i++)
2560	{
2561	fz_append_bits(ctx, buf, pop[i]->num_objects - min_objs_per_page, objs_per_page_bits);
2562	}
2563	fz_append_bits_pad(ctx, buf);
2564	/ Item 2: A number that, when added to the least page length, gives*
2565	* the length of the page in bytes. */
2566	for (i = `0`; i < opts->page_count; i++)
2567	{
2568	fz_append_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits);
2569	}
2570	fz_append_bits_pad(ctx, buf);
2571	/ Item 3: The number of shared objects referenced from the page. /
2572	for (i = `0`; i < opts->page_count; i++)
2573	{
2574	fz_append_bits(ctx, buf, pop[i]->num_shared, shared_object_bits);
2575	}
2576	fz_append_bits_pad(ctx, buf);
2577	/ Item 4: Shared object id for each shared object ref in every page.*
2578	* Spec says "not for page 1", but acrobat does send page 1's - all
2579	* as zeros. */
2580	for (i = `0`; i < opts->page_count; i++)
2581	{
2582	for (j = `0`; j < pop[i]->len; j++)
2583	{
2584	int o = pop[i]->object[j];
2585	if (i == `0` && opts->use_list[o] & USE_PAGE1)
2586	fz_append_bits(ctx, buf, `0` / o - pop[0]->page_object_number /, shared_object_id_bits);
2587	if (i != `0` && opts->use_list[o] & USE_SHARED)
2588	fz_append_bits(ctx, buf, o - min_shared_object + pop[`0`]->num_shared, shared_object_id_bits);
2589	}
2590	}
2591	fz_append_bits_pad(ctx, buf);
2592	/ Item 5: Numerator of fractional position for each shared object reference. /
2593	/ We always send 0 in 0 bits /
2594	/ Item 6: A number that, when added to the least offset to the start*
2595	* of the content stream (F.3 Item 6), gives the offset in bytes of
2596	* start of the pages content stream object relative to the beginning
2597	* of the page. Always 0 in 0 bits. */
2598	/ Item 7: A number that, when added to the least content stream length*
2599	* (F.3 Item 8), gives the length of the pages content stream object.
2600	* Always == Item 2 as least content stream length = least page stream
2601	* length.
2602	*/
2603	for (i = `0`; i < opts->page_count; i++)
2604	{
2605	fz_append_bits(ctx, buf, pop[i]->max_ofs - pop[i]->min_ofs - min_page_length, page_len_bits);
2606	}
2607
2608	/ Pad, and then do shared object hint table /
2609	fz_append_bits_pad(ctx, buf);
2610	opts->hints_shared_offset = (int)fz_buffer_storage(ctx, buf, NULL);
2611
2612	/ Table F.5: /
2613	/ Header Item 1: Object number of the first object in the shared*
2614	* objects section. */
2615	fz_append_bits(ctx, buf, min_shared_object, `32`);
2616	/ Header Item 2: Location of first object in the shared objects*
2617	* section. */
2618	fz_append_bits(ctx, buf, opts->ofs_list[min_shared_object], `32`);
2619	/ Header Item 3: The number of shared object entries for the first*
2620	* page. */
2621	fz_append_bits(ctx, buf, pop[`0`]->num_shared, `32`);
2622	/ Header Item 4: The number of shared object entries for the shared*
2623	* objects section + first page. */
2624	fz_append_bits(ctx, buf, max_shared_object - min_shared_object + pop[`0`]->num_shared, `32`);
2625	/ Header Item 5: The number of bits needed to represent the greatest*
2626	* number of objects in a shared object group (Always 0). */
2627	fz_append_bits(ctx, buf, `0`, `16`);
2628	/ Header Item 6: The least length of a shared object group in bytes. /
2629	fz_append_bits(ctx, buf, min_shared_length, `32`);
2630	/ Header Item 7: The number of bits required to represent the*
2631	* difference between the greatest and least length of a shared object
2632	* group. */
2633	shared_length_bits = my_log2(max_shared_length - min_shared_length);
2634	fz_append_bits(ctx, buf, shared_length_bits, `16`);
2635
2636	/ Table F.6 /
2637	/ Item 1: Shared object group length (page 1 objects) /
2638	for (j = `0`; j < pop[`0`]->len; j++)
2639	{
2640	int o = pop[`0`]->object[j];
2641	int64_t min, max;
2642	min = opts->ofs_list[o];
2643	if (o == opts->start-`1`)
2644	max = opts->main_xref_offset;
2645	else if (o < xref_len-`1`)
2646	max = opts->ofs_list[o+`1`];
2647	else
2648	max = opts->ofs_list[`1`];
2649	if (opts->use_list[o] & USE_PAGE1)
2650	fz_append_bits(ctx, buf, max - min - min_shared_length, shared_length_bits);
2651	}
2652	/ Item 1: Shared object group length (shared objects) /
2653	for (i = min_shared_object; i <= max_shared_object; i++)
2654	{
2655	int min, max;
2656	min = opts->ofs_list[i];
2657	if (i == opts->start-`1`)
2658	max = opts->main_xref_offset;
2659	else if (i < xref_len-`1`)
2660	max = opts->ofs_list[i+`1`];
2661	else
2662	max = opts->ofs_list[`1`];
2663	fz_append_bits(ctx, buf, max - min - min_shared_length, shared_length_bits);
2664	}
2665	fz_append_bits_pad(ctx, buf);
2666
2667	/ Item 2: MD5 presence flags /
2668	for (i = max_shared_object - min_shared_object + pop[`0`]->num_shared; i > `0`; i--)
2669	{
2670	fz_append_bits(ctx, buf, `0`, `1`);
2671	}
2672	fz_append_bits_pad(ctx, buf);
2673	/ Item 3: MD5 sums (not present) /
2674	fz_append_bits_pad(ctx, buf);
2675	/ Item 4: Number of objects in the group (not present) /
2676	}
2677
2678	static void
2679	make_hint_stream(fz_context ctx, pdf_document doc, pdf_write_state *opts)
2680	{
2681	fz_buffer *buf;
2682	pdf_obj *obj = NULL;
2683
2684	fz_var(obj);
2685
2686	buf = fz_new_buffer(ctx, `100`);
2687	fz_try(ctx)
2688	{
2689	make_page_offset_hints(ctx, doc, opts, buf);
2690	obj = pdf_load_object(ctx, doc, pdf_xref_len(ctx, doc)-`1`);
2691	pdf_update_stream(ctx, doc, obj, buf, `0`);
2692	opts->hintstream_len = (int)fz_buffer_storage(ctx, buf, NULL);
2693	}
2694	fz_always(ctx)
2695	{
2696	pdf_drop_obj(ctx, obj);
2697	fz_drop_buffer(ctx, buf);
2698	}
2699	fz_catch(ctx)
2700	fz_rethrow(ctx);
2701	}
2702
2703	#ifdef DEBUG_WRITING
2704	static void dump_object_details(fz_context ctx, pdf_document doc, pdf_write_state *opts)
2705	{
2706	int i;
2707
2708	for (i = `0`; i < pdf_xref_len(ctx, doc); i++)
2709	{
2710	fprintf(stderr, "%d@%d: use=%d\n", i, opts->ofs_list[i], opts->use_list[i]);
2711	}
2712	}
2713	#endif
2714
2715	static void presize_unsaved_signature_byteranges(fz_context ctx, pdf_document doc)
2716	{
2717	int s;
2718
2719	for (s = `0`; s < doc->num_incremental_sections; s++)
2720	{
2721	pdf_xref *xref = &doc->xref_sections[s];
2722
2723	if (xref->unsaved_sigs)
2724	{
2725	/ The ByteRange objects of signatures are initially written out with*
2726	* dummy values, and then overwritten later. We need to make sure their
2727	* initial form at least takes enough sufficient file space */
2728	pdf_unsaved_sig *usig;
2729	int n = `0`;
2730
2731	for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2732	n++;
2733
2734	for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2735	{
2736	/ There will be segments of bytes at the beginning, at*
2737	* the end and between each consecutive pair of signatures,
2738	* hence n + 1 */
2739	int i;
2740	pdf_obj *byte_range = pdf_dict_getl(ctx, usig->field, PDF_NAME(V), PDF_NAME(ByteRange), NULL);
2741
2742	for (i = `0`; i < n+`1`; i++)
2743	{
2744	pdf_array_push_int(ctx, byte_range, INT_MAX);
2745	pdf_array_push_int(ctx, byte_range, INT_MAX);
2746	}
2747	}
2748	}
2749	}
2750	}
2751
2752	static void complete_signatures(fz_context ctx, pdf_document doc, pdf_write_state *opts)
2753	{
2754	pdf_unsaved_sig *usig;
2755	char buf = NULL, ptr;
2756	int buf_size;
2757	int s;
2758	int i;
2759	int last_end;
2760	fz_stream *stm = NULL;
2761	fz_var(stm);
2762	fz_var(buf);
2763
2764	fz_try(ctx)
2765	{
2766	for (s = `0`; s < doc->num_incremental_sections; s++)
2767	{
2768	pdf_xref *xref = &doc->xref_sections[doc->num_incremental_sections - s - `1`];
2769
2770	if (xref->unsaved_sigs)
2771	{
2772	pdf_obj *byte_range;
2773	buf_size = `0`;
2774
2775	for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2776	{
2777	int size = usig->signer->max_digest_size(usig->signer);
2778
2779	buf_size = fz_maxi(buf_size, size);
2780	}
2781
2782	buf_size = buf_size * `2` + SIG_EXTRAS_SIZE;
2783
2784	buf = fz_calloc(ctx, buf_size, `1`);
2785
2786	stm = fz_stream_from_output(ctx, opts->out);
2787	/ Locate the byte ranges and contents in the saved file /
2788	for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2789	{
2790	char bstr, cstr, *fstr;
2791	int bytes_read;
2792	int pnum = pdf_obj_parent_num(ctx, pdf_dict_getl(ctx, usig->field, PDF_NAME(V), PDF_NAME(ByteRange), NULL));
2793	fz_seek(ctx, stm, opts->ofs_list[pnum], SEEK_SET);
2794	/ SIG_EXTRAS_SIZE is an arbitrary value and its addition above to buf_size*
2795	* could cause an attempt to read off the end of the file. That's not an
2796	* error, but we need to keep track of how many bytes are read and search
2797	* for markers only in defined data */
2798	bytes_read = fz_read(ctx, stm, (unsigned char *)buf, buf_size);
2799	assert(bytes_read <= buf_size);
2800
2801	bstr = fz_memmem(buf, bytes_read, SLASH_BYTE_RANGE, sizeof(SLASH_BYTE_RANGE)-`1`);
2802	cstr = fz_memmem(buf, bytes_read, SLASH_CONTENTS, sizeof(SLASH_CONTENTS)-`1`);
2803	fstr = fz_memmem(buf, bytes_read, SLASH_FILTER, sizeof(SLASH_FILTER)-`1`);
2804
2805	if (!(bstr && cstr && fstr && bstr < cstr && cstr < fstr))
2806	fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to determine byte ranges while writing signature");
2807
2808	usig->byte_range_start = bstr - buf + sizeof(SLASH_BYTE_RANGE)-`1` + opts->ofs_list[pnum];
2809	usig->byte_range_end = cstr - buf + opts->ofs_list[pnum];
2810	usig->contents_start = cstr - buf + sizeof(SLASH_CONTENTS)-`1` + opts->ofs_list[pnum];
2811	usig->contents_end = fstr - buf + opts->ofs_list[pnum];
2812	}
2813
2814	fz_drop_stream(ctx, stm);
2815	stm = NULL;
2816
2817	/ Recreate ByteRange with correct values. Initially store the*
2818	* recreated object in the first of the unsaved signatures */
2819	byte_range = pdf_new_array(ctx, doc, `4`);
2820	pdf_dict_putl_drop(ctx, xref->unsaved_sigs->field, byte_range, PDF_NAME(V), PDF_NAME(ByteRange), NULL);
2821
2822	last_end = `0`;
2823	for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2824	{
2825	pdf_array_push_int(ctx, byte_range, last_end);
2826	pdf_array_push_int(ctx, byte_range, usig->contents_start - last_end);
2827	last_end = usig->contents_end;
2828	}
2829	pdf_array_push_int(ctx, byte_range, last_end);
2830	pdf_array_push_int(ctx, byte_range, xref->end_ofs - last_end);
2831
2832	/ Copy the new ByteRange to the other unsaved signatures /
2833	for (usig = xref->unsaved_sigs->next; usig; usig = usig->next)
2834	pdf_dict_putl_drop(ctx, usig->field, pdf_copy_array(ctx, byte_range), PDF_NAME(V), PDF_NAME(ByteRange), NULL);
2835
2836	/ Write the byte range into buf, padding with spaces/
2837	ptr = pdf_sprint_obj(ctx, buf, buf_size, &i, byte_range, `1`, `0`);
2838	if (ptr != buf) / should never happen, since data should fit in buf_size /
2839	fz_free(ctx, ptr);
2840	memset(buf+i, `' '`, buf_size-i);
2841
2842	/ Write the byte range to the file /
2843	for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2844	{
2845	fz_seek_output(ctx, opts->out, usig->byte_range_start, SEEK_SET);
2846	fz_write_data(ctx, opts->out, buf, usig->byte_range_end - usig->byte_range_start);
2847	}
2848
2849	/ Write the digests into the file /
2850	for (usig = xref->unsaved_sigs; usig; usig = usig->next)
2851	pdf_write_digest(ctx, opts->out, byte_range, usig->contents_start, usig->contents_end - usig->contents_start, usig->signer);
2852
2853	/ delete the unsaved_sigs records /
2854	while ((usig = xref->unsaved_sigs) != NULL)
2855	{
2856	xref->unsaved_sigs = usig->next;
2857	pdf_drop_obj(ctx, usig->field);
2858	usig->signer->drop(usig->signer);
2859	fz_free(ctx, usig);
2860	}
2861
2862	xref->unsaved_sigs_end = NULL;
2863
2864	fz_free(ctx, buf);
2865	buf = NULL;
2866	}
2867	}
2868	}
2869	fz_catch(ctx)
2870	{
2871	fz_drop_stream(ctx, stm);
2872	fz_free(ctx, buf);
2873	fz_rethrow(ctx);
2874	}
2875	}
2876
2877	static void clean_content_streams(fz_context ctx, pdf_document doc, int sanitize, int ascii)
2878	{
2879	int n = pdf_count_pages(ctx, doc);
2880	int i;
2881
2882	for (i = `0`; i < n; i++)
2883	{
2884	pdf_annot *annot;
2885	pdf_page *page = pdf_load_page(ctx, doc, i);
2886
2887	fz_try(ctx)
2888	{
2889	pdf_clean_page_contents(ctx, doc, page, NULL, NULL, NULL, sanitize, ascii);
2890
2891	for (annot = pdf_first_annot(ctx, page); annot != NULL; annot = pdf_next_annot(ctx, annot))
2892	{
2893	pdf_clean_annot_contents(ctx, doc, annot, NULL, NULL, NULL, sanitize, ascii);
2894	}
2895	}
2896	fz_always(ctx)
2897	fz_drop_page(ctx, &page->super);
2898	fz_catch(ctx)
2899	fz_rethrow(ctx);
2900	}
2901	}
2902
2903	/ Initialise the pdf_write_state, used dynamically during the write, from the static*
2904	* pdf_write_options, passed into pdf_save_document */
2905	static void initialise_write_state(fz_context ctx, pdf_document doc, const pdf_write_options in_opts, pdf_write_state opts)
2906	{
2907	int xref_len = pdf_xref_len(ctx, doc);
2908
2909	opts->do_incremental = in_opts->do_incremental;
2910	opts->do_ascii = in_opts->do_ascii;
2911	opts->do_tight = !in_opts->do_pretty;
2912	opts->do_expand = in_opts->do_decompress;
2913	opts->do_compress = in_opts->do_compress;
2914	opts->do_compress_images = in_opts->do_compress_images;
2915	opts->do_compress_fonts = in_opts->do_compress_fonts;
2916
2917	opts->do_garbage = in_opts->do_garbage;
2918	opts->do_linear = in_opts->do_linear;
2919	opts->do_clean = in_opts->do_clean;
2920	opts->do_encrypt = in_opts->do_encrypt;
2921	opts->start = `0`;
2922	opts->main_xref_offset = INT_MIN;
2923
2924	opts->permissions = in_opts->permissions;
2925	memcpy(opts->opwd_utf8, in_opts->opwd_utf8, nelem(opts->opwd_utf8));
2926	memcpy(opts->upwd_utf8, in_opts->upwd_utf8, nelem(opts->upwd_utf8));
2927
2928	/ We deliberately make these arrays long enough to cope with*
2929	* 1 to n access rather than 0..n-1, and add space for 2 new
2930	* extra entries that may be required for linearization. */
2931	opts->list_len = `0`;
2932	opts->use_list = NULL;
2933	opts->ofs_list = NULL;
2934	opts->gen_list = NULL;
2935	opts->renumber_map = NULL;
2936	opts->rev_renumber_map = NULL;
2937
2938	expand_lists(ctx, opts, xref_len);
2939	}
2940
2941	/ Free the resources held by the dynamic write options /
2942	static void finalise_write_state(fz_context ctx, pdf_write_state opts)
2943	{
2944	fz_free(ctx, opts->use_list);
2945	fz_free(ctx, opts->ofs_list);
2946	fz_free(ctx, opts->gen_list);
2947	fz_free(ctx, opts->renumber_map);
2948	fz_free(ctx, opts->rev_renumber_map);
2949	pdf_drop_obj(ctx, opts->linear_l);
2950	pdf_drop_obj(ctx, opts->linear_h0);
2951	pdf_drop_obj(ctx, opts->linear_h1);
2952	pdf_drop_obj(ctx, opts->linear_o);
2953	pdf_drop_obj(ctx, opts->linear_e);
2954	pdf_drop_obj(ctx, opts->linear_n);
2955	pdf_drop_obj(ctx, opts->linear_t);
2956	pdf_drop_obj(ctx, opts->hints_s);
2957	pdf_drop_obj(ctx, opts->hints_length);
2958	page_objects_list_destroy(ctx, opts->page_object_lists);
2959	}
2960
2961	const pdf_write_options pdf_default_write_options = {
2962	`0`, / do_incremental /
2963	`0`, / do_pretty /
2964	`0`, / do_ascii /
2965	`0`, / do_compress /
2966	`0`, / do_compress_images /
2967	`0`, / do_compress_fonts /
2968	`0`, / do_decompress /
2969	`0`, / do_garbage /
2970	`0`, / do_linear /
2971	`0`, / do_clean /
2972	`0`, / do_sanitize /
2973	`0`, / do_appearance /
2974	`0`, / do_encrypt /
2975	~`0`, / permissions /
2976	"", / opwd_utf8[128] /
2977	"", / upwd_utf8[128] /
2978	};
2979
2980	const char *fz_pdf_write_options_usage =
2981	"PDF output options:\n"
2982	"\tdecompress: decompress all streams (except compress-fonts/images)\n"
2983	"\tcompress: compress all streams\n"
2984	"\tcompress-fonts: compress embedded fonts\n"
2985	"\tcompress-images: compress images\n"
2986	"\tascii: ASCII hex encode binary streams\n"
2987	"\tpretty: pretty-print objects with indentation\n"
2988	"\tlinearize: optimize for web browsers\n"
2989	"\tclean: pretty-print graphics commands in content streams\n"
2990	"\tsanitize: sanitize graphics commands in content streams\n"
2991	"\tgarbage: garbage collect unused objects\n"
2992	"\tincremental: write changes as incremental update\n"
2993	"\tcontinue-on-error: continue saving the document even if there is an error\n"
2994	"\tor garbage=compact: ... and compact cross reference table\n"
2995	"\tor garbage=deduplicate: ... and remove duplicate objects\n"
2996	"\tdecrypt: write unencrypted document\n"
2997	"\tencrypt=rc4-40\|rc4-128\|aes-128\|aes-256: write encrypted document\n"
2998	"\tpermissions=NUMBER: document permissions to grant when encrypting\n"
2999	"\tuser-password=PASSWORD: password required to read document\n"
3000	"\towner-password=PASSWORD: password required to edit document\n"
3001	"\n";
3002
3003	/*
3004	Parse option string into a pdf_write_options struct.
3005	Matches the command line options to 'mutool clean':
3006	g: garbage collect
3007	d, i, f: expand all, fonts, images
3008	l: linearize
3009	a: ascii hex encode
3010	z: deflate
3011	c: clean content streams
3012	s: sanitize content streams
3013	*/
3014	pdf_write_options *
3015	pdf_parse_write_options(fz_context ctx, pdf_write_options opts, const char *args)
3016	{
3017	const char *val;
3018
3019	memset(opts, `0`, sizeof *opts);
3020
3021	if (fz_has_option(ctx, args, "decompress", &val))
3022	opts->do_decompress = fz_option_eq(val, "yes");
3023	if (fz_has_option(ctx, args, "compress", &val))
3024	opts->do_compress = fz_option_eq(val, "yes");
3025	if (fz_has_option(ctx, args, "compress-fonts", &val))
3026	opts->do_compress_fonts = fz_option_eq(val, "yes");
3027	if (fz_has_option(ctx, args, "compress-images", &val))
3028	opts->do_compress_images = fz_option_eq(val, "yes");
3029	if (fz_has_option(ctx, args, "ascii", &val))
3030	opts->do_ascii = fz_option_eq(val, "yes");
3031	if (fz_has_option(ctx, args, "pretty", &val))
3032	opts->do_pretty = fz_option_eq(val, "yes");
3033	if (fz_has_option(ctx, args, "linearize", &val))
3034	opts->do_linear = fz_option_eq(val, "yes");
3035	if (fz_has_option(ctx, args, "clean", &val))
3036	opts->do_clean = fz_option_eq(val, "yes");
3037	if (fz_has_option(ctx, args, "sanitize", &val))
3038	opts->do_sanitize = fz_option_eq(val, "yes");
3039	if (fz_has_option(ctx, args, "incremental", &val))
3040	opts->do_incremental = fz_option_eq(val, "yes");
3041	if (fz_has_option(ctx, args, "decrypt", &val))
3042	opts->do_encrypt = fz_option_eq(val, "yes") ? PDF_ENCRYPT_NONE : PDF_ENCRYPT_KEEP;
3043	if (fz_has_option(ctx, args, "encrypt", &val))
3044	{
3045	opts->do_encrypt = PDF_ENCRYPT_UNKNOWN;
3046	if (fz_option_eq(val, "none") \|\| fz_option_eq(val, "no"))
3047	opts->do_encrypt = PDF_ENCRYPT_NONE;
3048	if (fz_option_eq(val, "keep"))
3049	opts->do_encrypt = PDF_ENCRYPT_KEEP;
3050	if (fz_option_eq(val, "rc4-40") \|\| fz_option_eq(val, "yes"))
3051	opts->do_encrypt = PDF_ENCRYPT_RC4_40;
3052	if (fz_option_eq(val, "rc4-128"))
3053	opts->do_encrypt = PDF_ENCRYPT_RC4_128;
3054	if (fz_option_eq(val, "aes-128"))
3055	opts->do_encrypt = PDF_ENCRYPT_AES_128;
3056	if (fz_option_eq(val, "aes-256"))
3057	opts->do_encrypt = PDF_ENCRYPT_AES_256;
3058	}
3059	if (fz_has_option(ctx, args, "owner-password", &val))
3060	fz_copy_option(ctx, val, opts->opwd_utf8, nelem(opts->opwd_utf8));
3061	if (fz_has_option(ctx, args, "user-password", &val))
3062	fz_copy_option(ctx, val, opts->upwd_utf8, nelem(opts->upwd_utf8));
3063	if (fz_has_option(ctx, args, "permissions", &val))
3064	opts->permissions = fz_atoi(val);
3065	else
3066	opts->permissions = ~`0`;
3067	if (fz_has_option(ctx, args, "garbage", &val))
3068	{
3069	if (fz_option_eq(val, "yes"))
3070	opts->do_garbage = `1`;
3071	else if (fz_option_eq(val, "compact"))
3072	opts->do_garbage = `2`;
3073	else if (fz_option_eq(val, "deduplicate"))
3074	opts->do_garbage = `3`;
3075	else
3076	opts->do_garbage = fz_atoi(val);
3077	}
3078	if (fz_has_option(ctx, args, "appearance", &val))
3079	{
3080	if (fz_option_eq(val, "yes"))
3081	opts->do_appearance = `1`;
3082	else if (fz_option_eq(val, "all"))
3083	opts->do_appearance = `2`;
3084	}
3085
3086	return opts;
3087	}
3088
3089	/*
3090	Return true if the document can be saved incrementally. Applying
3091	redactions or having a repaired document make incremental saving
3092	impossible.
3093	*/
3094	int pdf_can_be_saved_incrementally(fz_context ctx, pdf_document doc)
3095	{
3096	if (doc->repair_attempted)
3097	return `0`;
3098	if (doc->redacted)
3099	return `0`;
3100	if (doc->has_xref_streams && doc->has_old_style_xrefs)
3101	return `0`;
3102	return `1`;
3103	}
3104
3105	static void
3106	prepare_for_save(fz_context ctx, pdf_document doc, pdf_write_options *in_opts)
3107	{
3108	doc->freeze_updates = `1`;
3109
3110	/ Rewrite (and possibly sanitize) the operator streams /
3111	if (in_opts->do_clean \|\| in_opts->do_sanitize)
3112	clean_content_streams(ctx, doc, in_opts->do_sanitize, in_opts->do_ascii);
3113
3114	presize_unsaved_signature_byteranges(ctx, doc);
3115	}
3116
3117	static pdf_obj *
3118	new_identity(fz_context ctx, pdf_document doc)
3119	{
3120	unsigned char rnd[`32`];
3121	pdf_obj *id;
3122
3123	fz_memrnd(ctx, rnd, nelem(rnd));
3124
3125	id = pdf_dict_put_array(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID), `2`);
3126	pdf_array_push_drop(ctx, id, pdf_new_string(ctx, (char *) rnd + `0`, nelem(rnd) / `2`));
3127	pdf_array_push_drop(ctx, id, pdf_new_string(ctx, (char *) rnd + `16`, nelem(rnd) / `2`));
3128
3129	return id;
3130	}
3131
3132	static void
3133	change_identity(fz_context ctx, pdf_document doc, pdf_obj *id)
3134	{
3135	unsigned char rnd[`16`];
3136	if (pdf_array_len(ctx, id) >= `2`)
3137	{
3138	/ Update second half of ID array with new random data. /
3139	fz_memrnd(ctx, rnd, `16`);
3140	pdf_array_put_drop(ctx, id, `1`, pdf_new_string(ctx, (char *)rnd, `16`));
3141	}
3142	}
3143
3144	static void
3145	create_encryption_dictionary(fz_context ctx, pdf_document doc, pdf_crypt *crypt)
3146	{
3147	unsigned char o, u;
3148	pdf_obj *encrypt;
3149	int r;
3150
3151	r = pdf_crypt_revision(ctx, crypt);
3152
3153	encrypt = pdf_dict_put_dict(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt), `10`);
3154
3155	pdf_dict_put_name(ctx, encrypt, PDF_NAME(Filter), "Standard");
3156	pdf_dict_put_int(ctx, encrypt, PDF_NAME(R), r);
3157	pdf_dict_put_int(ctx, encrypt, PDF_NAME(V), pdf_crypt_version(ctx, crypt));
3158	pdf_dict_put_int(ctx, encrypt, PDF_NAME(Length), pdf_crypt_length(ctx, crypt));
3159	pdf_dict_put_int(ctx, encrypt, PDF_NAME(P), pdf_crypt_permissions(ctx, crypt));
3160	pdf_dict_put_bool(ctx, encrypt, PDF_NAME(EncryptMetadata), pdf_crypt_encrypt_metadata(ctx, crypt));
3161
3162	o = pdf_crypt_owner_password(ctx, crypt);
3163	u = pdf_crypt_user_password(ctx, crypt);
3164
3165	if (r < `4`)
3166	{
3167	pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, `32`);
3168	pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, `32`);
3169	}
3170	else if (r == `4`)
3171	{
3172	pdf_obj *cf;
3173
3174	pdf_dict_put_name(ctx, encrypt, PDF_NAME(StmF), "StdCF");
3175	pdf_dict_put_name(ctx, encrypt, PDF_NAME(StrF), "StdCF");
3176
3177	cf = pdf_dict_put_dict(ctx, encrypt, PDF_NAME(CF), `1`);
3178	cf = pdf_dict_put_dict(ctx, cf, PDF_NAME(StdCF), `3`);
3179	pdf_dict_put_name(ctx, cf, PDF_NAME(AuthEvent), "DocOpen");
3180	pdf_dict_put_name(ctx, cf, PDF_NAME(CFM), "AESV2");
3181	pdf_dict_put_int(ctx, cf, PDF_NAME(Length), `16`);
3182	pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, `32`);
3183	pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, `32`);
3184	}
3185	else if (r == `6`)
3186	{
3187	unsigned char *oe = pdf_crypt_owner_encryption(ctx, crypt);
3188	unsigned char *ue = pdf_crypt_user_encryption(ctx, crypt);
3189	pdf_obj *cf;
3190
3191	pdf_dict_put_name(ctx, encrypt, PDF_NAME(StmF), "StdCF");
3192	pdf_dict_put_name(ctx, encrypt, PDF_NAME(StrF), "StdCF");
3193
3194	cf = pdf_dict_put_dict(ctx, encrypt, PDF_NAME(CF), `1`);
3195	cf = pdf_dict_put_dict(ctx, cf, PDF_NAME(StdCF), `3`);
3196	pdf_dict_put_name(ctx, cf, PDF_NAME(AuthEvent), "DocOpen");
3197	pdf_dict_put_name(ctx, cf, PDF_NAME(CFM), "AESV3");
3198	pdf_dict_put_int(ctx, cf, PDF_NAME(Length), `32`);
3199	pdf_dict_put_string(ctx, encrypt, PDF_NAME(O), (char *) o, `48`);
3200	pdf_dict_put_string(ctx, encrypt, PDF_NAME(U), (char *) u, `48`);
3201	pdf_dict_put_string(ctx, encrypt, PDF_NAME(OE), (char *) oe, `32`);
3202	pdf_dict_put_string(ctx, encrypt, PDF_NAME(UE), (char *) ue, `32`);
3203	pdf_dict_put_string(ctx, encrypt, PDF_NAME(Perms), (char *) pdf_crypt_permissions_encryption(ctx, crypt), `16`);
3204	}
3205	}
3206
3207	static void
3208	do_pdf_save_document(fz_context ctx, pdf_document doc, pdf_write_state opts, pdf_write_options in_opts)
3209	{
3210	int lastfree;
3211	int num;
3212	int xref_len;
3213	pdf_obj id, id1;
3214
3215	if (in_opts->do_incremental)
3216	{
3217	/ If no changes, nothing to write /
3218	if (doc->num_incremental_sections == `0`)
3219	return;
3220	if (opts->out)
3221	{
3222	fz_seek_output(ctx, opts->out, `0`, SEEK_END);
3223	fz_write_string(ctx, opts->out, "\n");
3224	}
3225	}
3226
3227	xref_len = pdf_xref_len(ctx, doc);
3228
3229	fz_try(ctx)
3230	{
3231	initialise_write_state(ctx, doc, in_opts, opts);
3232
3233	/ Update second half of ID array if it exists. /
3234	id = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
3235	if (id)
3236	change_identity(ctx, doc, id);
3237
3238	/ Remove encryption dictionary if saving without encryption. /
3239	if (opts->do_encrypt == PDF_ENCRYPT_NONE)
3240	{
3241	pdf_dict_del(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
3242	}
3243
3244	/ Keep encryption dictionary if saving with old encryption. /
3245	else if (opts->do_encrypt == PDF_ENCRYPT_KEEP)
3246	{
3247	opts->crypt = doc->crypt;
3248	}
3249
3250	/ Create encryption dictionary if saving with new encryption. /
3251	else
3252	{
3253	if (!id)
3254	id = new_identity(ctx, doc);
3255	id1 = pdf_array_get(ctx, id, `0`);
3256	opts->crypt = pdf_new_encrypt(ctx, opts->opwd_utf8, opts->upwd_utf8, id1, opts->permissions, opts->do_encrypt);
3257	create_encryption_dictionary(ctx, doc, opts->crypt);
3258	}
3259
3260	/ Make sure any objects hidden in compressed streams have been loaded /
3261	if (!opts->do_incremental)
3262	{
3263	pdf_ensure_solid_xref(ctx, doc, xref_len);
3264	preloadobjstms(ctx, doc);
3265	xref_len = pdf_xref_len(ctx, doc); / May have changed due to repair /
3266	expand_lists(ctx, opts, xref_len);
3267	}
3268
3269	/ Sweep & mark objects from the trailer /
3270	if (opts->do_garbage >= `1` \|\| opts->do_linear)
3271	(void)markobj(ctx, doc, opts, pdf_trailer(ctx, doc));
3272	else
3273	{
3274	xref_len = pdf_xref_len(ctx, doc); / May have changed due to repair /
3275	expand_lists(ctx, opts, xref_len);
3276	for (num = `0`; num < xref_len; num++)
3277	opts->use_list[num] = `1`;
3278	}
3279
3280	/ Coalesce and renumber duplicate objects /
3281	if (opts->do_garbage >= `3`)
3282	removeduplicateobjs(ctx, doc, opts);
3283
3284	/ Compact xref by renumbering and removing unused objects /
3285	if (opts->do_garbage >= `2` \|\| opts->do_linear)
3286	compactxref(ctx, doc, opts);
3287
3288	opts->crypt_object_number = `0`;
3289	if (opts->crypt)
3290	{
3291	pdf_obj *crypt = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
3292	int crypt_num = pdf_to_num(ctx, crypt);
3293	opts->crypt_object_number = opts->renumber_map[crypt_num];
3294	}
3295
3296	/ Make renumbering affect all indirect references and update xref /
3297	if (opts->do_garbage >= `2` \|\| opts->do_linear)
3298	renumberobjs(ctx, doc, opts);
3299
3300	/ Truncate the xref after compacting and renumbering /
3301	if ((opts->do_garbage >= `2` \|\| opts->do_linear) && !opts->do_incremental)
3302	{
3303	xref_len = pdf_xref_len(ctx, doc); / May have changed due to repair /
3304	expand_lists(ctx, opts, xref_len);
3305	while (xref_len > `0` && !opts->use_list[xref_len-`1`])
3306	xref_len--;
3307	}
3308
3309	if (opts->do_linear)
3310	linearize(ctx, doc, opts);
3311
3312	if (opts->do_incremental)
3313	{
3314	int i;
3315
3316	doc->disallow_new_increments = `1`;
3317
3318	for (i = `0`; i < doc->num_incremental_sections; i++)
3319	{
3320	doc->xref_base = doc->num_incremental_sections - i - `1`;
3321
3322	writeobjects(ctx, doc, opts, `0`);
3323
3324	#ifdef DEBUG_WRITING
3325	dump_object_details(ctx, doc, opts);
3326	#endif
3327
3328	for (num = `0`; num < xref_len; num++)
3329	{
3330	if (!opts->use_list[num] && pdf_xref_is_incremental(ctx, doc, num))
3331	{
3332	/ Make unreusable. FIXME: would be better to link to existing free list /
3333	opts->gen_list[num] = `65535`;
3334	opts->ofs_list[num] = `0`;
3335	}
3336	}
3337
3338	opts->first_xref_offset = fz_tell_output(ctx, opts->out);
3339	if (doc->has_xref_streams)
3340	writexrefstream(ctx, doc, opts, `0`, xref_len, `1`, `0`, opts->first_xref_offset);
3341	else
3342	writexref(ctx, doc, opts, `0`, xref_len, `1`, `0`, opts->first_xref_offset);
3343
3344	doc->xref_sections[doc->xref_base].end_ofs = fz_tell_output(ctx, opts->out);
3345	}
3346
3347	doc->xref_base = `0`;
3348	doc->disallow_new_increments = `0`;
3349	}
3350	else
3351	{
3352	writeobjects(ctx, doc, opts, `0`);
3353
3354	#ifdef DEBUG_WRITING
3355	dump_object_details(ctx, doc, opts);
3356	#endif
3357
3358	/ Construct linked list of free object slots /
3359	lastfree = `0`;
3360	for (num = `0`; num < xref_len; num++)
3361	{
3362	if (!opts->use_list[num])
3363	{
3364	opts->gen_list[num]++;
3365	opts->ofs_list[lastfree] = num;
3366	lastfree = num;
3367	}
3368	}
3369
3370	if (opts->do_linear && opts->page_count > `0`)
3371	{
3372	opts->main_xref_offset = fz_tell_output(ctx, opts->out);
3373	writexref(ctx, doc, opts, `0`, opts->start, `0`, `0`, opts->first_xref_offset);
3374	opts->file_len = fz_tell_output(ctx, opts->out);
3375
3376	make_hint_stream(ctx, doc, opts);
3377	if (opts->do_ascii)
3378	{
3379	opts->hintstream_len *= `2`;
3380	opts->hintstream_len += `1` + ((opts->hintstream_len+`63`)>>`6`);
3381	}
3382	opts->file_len += opts->hintstream_len;
3383	opts->main_xref_offset += opts->hintstream_len;
3384	update_linearization_params(ctx, doc, opts);
3385	fz_seek_output(ctx, opts->out, `0`, `0`);
3386	writeobjects(ctx, doc, opts, `1`);
3387
3388	padto(ctx, opts->out, opts->main_xref_offset);
3389	writexref(ctx, doc, opts, `0`, opts->start, `0`, `0`, opts->first_xref_offset);
3390	}
3391	else
3392	{
3393	opts->first_xref_offset = fz_tell_output(ctx, opts->out);
3394	writexref(ctx, doc, opts, `0`, xref_len, `1`, `0`, opts->first_xref_offset);
3395	}
3396
3397	doc->xref_sections[`0`].end_ofs = fz_tell_output(ctx, opts->out);
3398	}
3399
3400	complete_signatures(ctx, doc, opts);
3401
3402	doc->dirty = `0`;
3403	}
3404	fz_always(ctx)
3405	{
3406	#ifdef DEBUG_LINEARIZATION
3407	page_objects_dump(opts);
3408	objects_dump(ctx, doc, opts);
3409	#endif
3410	finalise_write_state(ctx, opts);
3411	if (opts->crypt != doc->crypt)
3412	pdf_drop_crypt(ctx, opts->crypt);
3413	doc->freeze_updates = `0`;
3414	}
3415	fz_catch(ctx)
3416	{
3417	fz_rethrow(ctx);
3418	}
3419	}
3420
3421	/*
3422	Returns true if there are digital signatures waiting to
3423	to updated on save.
3424	*/
3425	int pdf_has_unsaved_sigs(fz_context ctx, pdf_document doc)
3426	{
3427	int s;
3428	for (s = `0`; s < doc->num_incremental_sections; s++)
3429	{
3430	pdf_xref *xref = &doc->xref_sections[doc->num_incremental_sections - s - `1`];
3431
3432	if (xref->unsaved_sigs)
3433	return `1`;
3434	}
3435	return `0`;
3436	}
3437
3438	/*
3439	Write out the document to an output stream with all changes finalised.
3440	*/
3441	void pdf_write_document(fz_context ctx, pdf_document doc, fz_output out, pdf_write_options in_opts)
3442	{
3443	pdf_write_options opts_defaults = pdf_default_write_options;
3444	pdf_write_state opts = { `0` };
3445
3446	if (!doc)
3447	return;
3448
3449	if (!in_opts)
3450	in_opts = &opts_defaults;
3451
3452	if (in_opts->do_incremental && doc->repair_attempted)
3453	fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes on a repaired file");
3454	if (in_opts->do_incremental && in_opts->do_garbage)
3455	fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with garbage collection");
3456	if (in_opts->do_incremental && in_opts->do_linear)
3457	fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with linearisation");
3458	if (in_opts->do_incremental && in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
3459	fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes when changing encryption");
3460	if (pdf_has_unsaved_sigs(ctx, doc) && !out->as_stream)
3461	fz_throw(ctx, FZ_ERROR_GENERIC, "Can't write pdf that has unsaved sigs to a fz_output unless it supports fz_stream_from_output!");
3462
3463	prepare_for_save(ctx, doc, in_opts);
3464
3465	opts.out = out;
3466
3467	do_pdf_save_document(ctx, doc, &opts, in_opts);
3468	}
3469
3470	/*
3471	Write out the document to a file with all changes finalised.
3472	*/
3473	void pdf_save_document(fz_context ctx, pdf_document doc, const char filename, pdf_write_options in_opts)
3474	{
3475	pdf_write_options opts_defaults = pdf_default_write_options;
3476	pdf_write_state opts = { `0` };
3477
3478	if (!doc)
3479	return;
3480
3481	if (!in_opts)
3482	in_opts = &opts_defaults;
3483
3484	if (in_opts->do_incremental && !doc->file)
3485	fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes on a new document");
3486	if (in_opts->do_incremental && doc->repair_attempted)
3487	fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes on a repaired file");
3488	if (in_opts->do_incremental && in_opts->do_garbage)
3489	fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with garbage collection");
3490	if (in_opts->do_incremental && in_opts->do_linear)
3491	fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes with linearisation");
3492	if (in_opts->do_incremental && in_opts->do_encrypt != PDF_ENCRYPT_KEEP)
3493	fz_throw(ctx, FZ_ERROR_GENERIC, "Can't do incremental writes when changing encryption");
3494
3495	if (in_opts->do_appearance > `0`)
3496	{
3497	int i, n = pdf_count_pages(ctx, doc);
3498	for (i = `0`; i < n; ++i)
3499	{
3500	pdf_page *page = pdf_load_page(ctx, doc, i);
3501	fz_try(ctx)
3502	{
3503	if (in_opts->do_appearance > `1`)
3504	{
3505	pdf_annot *annot;
3506	for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot))
3507	annot->needs_new_ap = `1`;
3508	for (annot = pdf_first_widget(ctx, page); annot; annot = pdf_next_widget(ctx, annot))
3509	annot->needs_new_ap = `1`;
3510	}
3511	pdf_update_page(ctx, page);
3512	}
3513	fz_always(ctx)
3514	fz_drop_page(ctx, &page->super);
3515	fz_catch(ctx)
3516	fz_warn(ctx, "could not create annotation appearances");
3517	}
3518	}
3519
3520	prepare_for_save(ctx, doc, in_opts);
3521
3522	if (in_opts->do_incremental)
3523	{
3524	/ If no changes, nothing to write /
3525	if (doc->num_incremental_sections == `0`)
3526	return;
3527	opts.out = fz_new_output_with_path(ctx, filename, `1`);
3528	}
3529	else
3530	{
3531	opts.out = fz_new_output_with_path(ctx, filename, `0`);
3532	}
3533	fz_try(ctx)
3534	{
3535	do_pdf_save_document(ctx, doc, &opts, in_opts);
3536	fz_close_output(ctx, opts.out);
3537	}
3538	fz_always(ctx)
3539	{
3540	fz_drop_output(ctx, opts.out);
3541	opts.out = NULL;
3542	}
3543	fz_catch(ctx)
3544	{
3545	fz_rethrow(ctx);
3546	}
3547	}
3548
3549	typedef struct pdf_writer_s pdf_writer;
3550
3551	struct pdf_writer_s
3552	{
3553	fz_document_writer super;
3554	pdf_document *pdf;
3555	pdf_write_options opts;
3556	char *filename;
3557
3558	fz_rect mediabox;
3559	pdf_obj *resources;
3560	fz_buffer *contents;
3561	};
3562
3563	static fz_device *
3564	pdf_writer_begin_page(fz_context ctx, fz_document_writer wri_, fz_rect mediabox)
3565	{
3566	pdf_writer wri = (pdf_writer)wri_;
3567	wri->mediabox = mediabox;
3568	return pdf_page_write(ctx, wri->pdf, wri->mediabox, &wri->resources, &wri->contents);
3569	}
3570
3571	static void
3572	pdf_writer_end_page(fz_context ctx, fz_document_writer wri_, fz_device *dev)
3573	{
3574	pdf_writer wri = (pdf_writer)wri_;
3575	pdf_obj *obj = NULL;
3576
3577	fz_var(obj);
3578
3579	fz_try(ctx)
3580	{
3581	fz_close_device(ctx, dev);
3582	obj = pdf_add_page(ctx, wri->pdf, wri->mediabox, `0`, wri->resources, wri->contents);
3583	pdf_insert_page(ctx, wri->pdf, -`1`, obj);
3584	}
3585	fz_always(ctx)
3586	{
3587	fz_drop_device(ctx, dev);
3588	pdf_drop_obj(ctx, obj);
3589	fz_drop_buffer(ctx, wri->contents);
3590	wri->contents = NULL;
3591	pdf_drop_obj(ctx, wri->resources);
3592	wri->resources = NULL;
3593	}
3594	fz_catch(ctx)
3595	fz_rethrow(ctx);
3596	}
3597
3598	static void
3599	pdf_writer_close_writer(fz_context ctx, fz_document_writer wri_)
3600	{
3601	pdf_writer wri = (pdf_writer)wri_;
3602	pdf_save_document(ctx, wri->pdf, wri->filename, &wri->opts);
3603	}
3604
3605	static void
3606	pdf_writer_drop_writer(fz_context ctx, fz_document_writer wri_)
3607	{
3608	pdf_writer wri = (pdf_writer)wri_;
3609	fz_drop_buffer(ctx, wri->contents);
3610	pdf_drop_obj(ctx, wri->resources);
3611	pdf_drop_document(ctx, wri->pdf);
3612	fz_free(ctx, wri->filename);
3613	}
3614
3615	fz_document_writer *
3616	fz_new_pdf_writer(fz_context ctx, const* char path, const* char *options)
3617	{
3618	pdf_writer *wri = fz_new_derived_document_writer(ctx, pdf_writer, pdf_writer_begin_page, pdf_writer_end_page, pdf_writer_close_writer, pdf_writer_drop_writer);
3619
3620	fz_try(ctx)
3621	{
3622	pdf_parse_write_options(ctx, &wri->opts, options);
3623	wri->filename = fz_strdup(ctx, path ? path : "out.pdf");
3624	wri->pdf = pdf_create_document(ctx);
3625	}
3626	fz_catch(ctx)
3627	{
3628	pdf_drop_document(ctx, wri->pdf);
3629	fz_free(ctx, wri->filename);
3630	fz_free(ctx, wri);
3631	fz_rethrow(ctx);
3632	}
3633
3634	return (fz_document_writer*)wri;
3635	}
3636

Browse the source code of MuPDF/source/pdf/pdf-write.c