pdf-xref.c source code [MuPDF/source/pdf/pdf-xref.c]

1	#include "mupdf/fitz.h"
2	#include "mupdf/pdf.h"
3
4	#include <assert.h>
5	#include <limits.h>
6	#include <string.h>
7
8	#undef DEBUG_PROGESSIVE_ADVANCE
9
10	#ifdef DEBUG_PROGESSIVE_ADVANCE
11	#define DEBUGMESS(A) do { fz_warn A; } while (0)
12	#else
13	#define DEBUGMESS(A) do { } while (0)
14	#endif
15
16	#define isdigit(c) (c >= '0' && c <= '9')
17
18	static inline int iswhite(int ch)
19	{
20	return
21	ch == `'\000'` \|\| ch == `'\011'` \|\| ch == `'\012'` \|\|
22	ch == `'\014'` \|\| ch == `'\015'` \|\| ch == `'\040'`;
23	}
24
25	/*
26	* xref tables
27	*/
28
29	static void pdf_drop_xref_sections_imp(fz_context ctx, pdf_document doc, pdf_xref xref_sections, int* num_xref_sections)
30	{
31	pdf_unsaved_sig *usig;
32	int x, e;
33
34	for (x = `0`; x < num_xref_sections; x++)
35	{
36	pdf_xref *xref = &xref_sections[x];
37	pdf_xref_subsec *sub = xref->subsec;
38
39	while (sub != NULL)
40	{
41	pdf_xref_subsec *next_sub = sub->next;
42	for (e = `0`; e < sub->len; e++)
43	{
44	pdf_xref_entry *entry = &sub->table[e];
45	if (entry->obj)
46	{
47	pdf_drop_obj(ctx, entry->obj);
48	fz_drop_buffer(ctx, entry->stm_buf);
49	}
50	}
51	fz_free(ctx, sub->table);
52	fz_free(ctx, sub);
53	sub = next_sub;
54	}
55
56	pdf_drop_obj(ctx, xref->pre_repair_trailer);
57	pdf_drop_obj(ctx, xref->trailer);
58
59	while ((usig = xref->unsaved_sigs) != NULL)
60	{
61	xref->unsaved_sigs = usig->next;
62	pdf_drop_obj(ctx, usig->field);
63	usig->signer->drop(usig->signer);
64	fz_free(ctx, usig);
65	}
66	}
67
68	fz_free(ctx, xref_sections);
69	}
70
71	static void pdf_drop_xref_sections(fz_context ctx, pdf_document doc)
72	{
73	pdf_drop_xref_sections_imp(ctx, doc, doc->saved_xref_sections, doc->saved_num_xref_sections);
74	pdf_drop_xref_sections_imp(ctx, doc, doc->xref_sections, doc->num_xref_sections);
75
76	doc->saved_xref_sections = NULL;
77	doc->saved_num_xref_sections = `0`;
78	doc->xref_sections = NULL;
79	doc->num_xref_sections = `0`;
80	doc->num_incremental_sections = `0`;
81	}
82
83	static void
84	extend_xref_index(fz_context ctx, pdf_document doc, int newlen)
85	{
86	int i;
87
88	doc->xref_index = fz_realloc_array(ctx, doc->xref_index, newlen, int);
89	for (i = doc->max_xref_len; i < newlen; i++)
90	{
91	doc->xref_index[i] = `0`;
92	}
93	doc->max_xref_len = newlen;
94	}
95
96	/ This is only ever called when we already have an incremental*
97	* xref. This means there will only be 1 subsec, and it will be
98	* a complete subsec. */
99	static void pdf_resize_xref(fz_context ctx, pdf_document doc, int newlen)
100	{
101	int i;
102	pdf_xref *xref = &doc->xref_sections[doc->xref_base];
103	pdf_xref_subsec *sub;
104
105	assert(xref != NULL);
106	sub = xref->subsec;
107	assert(sub->next == NULL && sub->start == `0` && sub->len == xref->num_objects);
108	assert(newlen > xref->num_objects);
109
110	sub->table = fz_realloc_array(ctx, sub->table, newlen, pdf_xref_entry);
111	for (i = xref->num_objects; i < newlen; i++)
112	{
113	sub->table[i].type = `0`;
114	sub->table[i].ofs = `0`;
115	sub->table[i].gen = `0`;
116	sub->table[i].num = `0`;
117	sub->table[i].stm_ofs = `0`;
118	sub->table[i].stm_buf = NULL;
119	sub->table[i].obj = NULL;
120	}
121	xref->num_objects = newlen;
122	sub->len = newlen;
123	if (doc->max_xref_len < newlen)
124	extend_xref_index(ctx, doc, newlen);
125	}
126
127	static void pdf_populate_next_xref_level(fz_context ctx, pdf_document doc)
128	{
129	pdf_xref *xref;
130	doc->xref_sections = fz_realloc_array(ctx, doc->xref_sections, doc->num_xref_sections + `1`, pdf_xref);
131	doc->num_xref_sections++;
132
133	xref = &doc->xref_sections[doc->num_xref_sections - `1`];
134	xref->subsec = NULL;
135	xref->num_objects = `0`;
136	xref->trailer = NULL;
137	xref->pre_repair_trailer = NULL;
138	xref->unsaved_sigs = NULL;
139	xref->unsaved_sigs_end = NULL;
140	}
141
142	pdf_obj pdf_trailer(fz_context ctx, pdf_document *doc)
143	{
144	/ Return the document's final trailer /
145	pdf_xref *xref = &doc->xref_sections[`0`];
146
147	return xref ? xref->trailer : NULL;
148	}
149
150	void pdf_set_populating_xref_trailer(fz_context ctx, pdf_document doc, pdf_obj *trailer)
151	{
152	/ Update the trailer of the xref section being populated /
153	pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections - `1`];
154	if (xref->trailer)
155	{
156	pdf_drop_obj(ctx, xref->pre_repair_trailer);
157	xref->pre_repair_trailer = xref->trailer;
158	}
159	xref->trailer = pdf_keep_obj(ctx, trailer);
160	}
161
162	int pdf_xref_len(fz_context ctx, pdf_document doc)
163	{
164	return doc->max_xref_len;
165	}
166
167	/ Ensure that the given xref has a single subsection*
168	* that covers the entire range. */
169	static void
170	ensure_solid_xref(fz_context ctx, pdf_document doc, int num, int which)
171	{
172	pdf_xref *xref = &doc->xref_sections[which];
173	pdf_xref_subsec *sub = xref->subsec;
174	pdf_xref_subsec *new_sub;
175
176	if (num < xref->num_objects)
177	num = xref->num_objects;
178
179	if (sub != NULL && sub->next == NULL && sub->start == `0` && sub->len >= num)
180	return;
181
182	new_sub = fz_malloc_struct(ctx, pdf_xref_subsec);
183	fz_try(ctx)
184	{
185	new_sub->table = fz_calloc(ctx, num, sizeof(pdf_xref_entry));
186	new_sub->start = `0`;
187	new_sub->len = num;
188	new_sub->next = NULL;
189	}
190	fz_catch(ctx)
191	{
192	fz_free(ctx, new_sub);
193	fz_rethrow(ctx);
194	}
195
196	/ Move objects over to the new subsection and destroy the old*
197	* ones */
198	sub = xref->subsec;
199	while (sub != NULL)
200	{
201	pdf_xref_subsec *next = sub->next;
202	int i;
203
204	for (i = `0`; i < sub->len; i++)
205	{
206	new_sub->table[i+sub->start] = sub->table[i];
207	}
208	fz_free(ctx, sub->table);
209	fz_free(ctx, sub);
210	sub = next;
211	}
212	xref->num_objects = num;
213	xref->subsec = new_sub;
214	if (doc->max_xref_len < num)
215	extend_xref_index(ctx, doc, num);
216	}
217
218	/ Used while reading the individual xref sections from a file /
219	pdf_xref_entry pdf_get_populating_xref_entry(fz_context ctx, pdf_document doc, int* num)
220	{
221	/ Return an entry within the xref currently being populated /
222	pdf_xref *xref;
223	pdf_xref_subsec *sub;
224
225	if (doc->num_xref_sections == `0`)
226	{
227	doc->xref_sections = fz_malloc_struct(ctx, pdf_xref);
228	doc->num_xref_sections = `1`;
229	}
230
231	/ Prevent accidental heap underflow /
232	if (num < `0` \|\| num > PDF_MAX_OBJECT_NUMBER)
233	fz_throw(ctx, FZ_ERROR_GENERIC, "object number out of range (%d)", num);
234
235	/ Return the pointer to the entry in the last section. /
236	xref = &doc->xref_sections[doc->num_xref_sections-`1`];
237
238	for (sub = xref->subsec; sub != NULL; sub = sub->next)
239	{
240	if (num >= sub->start && num < sub->start + sub->len)
241	return &sub->table[num-sub->start];
242	}
243
244	/ We've been asked for an object that's not in a subsec. /
245	ensure_solid_xref(ctx, doc, num+`1`, doc->num_xref_sections-`1`);
246	xref = &doc->xref_sections[doc->num_xref_sections-`1`];
247	sub = xref->subsec;
248
249	return &sub->table[num-sub->start];
250	}
251
252	/ Used after loading a document to access entries /
253	/ This will never throw anything, or return NULL if it is*
254	* only asked to return objects in range within a 'solid'
255	* xref. */
256	pdf_xref_entry pdf_get_xref_entry(fz_context ctx, pdf_document doc, int* i)
257	{
258	pdf_xref *xref = NULL;
259	pdf_xref_subsec *sub;
260	int j;
261
262	if (i < `0`)
263	fz_throw(ctx, FZ_ERROR_GENERIC, "Negative object number requested");
264
265	if (i <= doc->max_xref_len)
266	j = doc->xref_index[i];
267	else
268	j = `0`;
269
270	/ We may be accessing an earlier version of the document using xref_base*
271	* and j may be an index into a later xref section */
272	if (doc->xref_base > j)
273	j = doc->xref_base;
274
275	/ Find the first xref section where the entry is defined. /
276	for (; j < doc->num_xref_sections; j++)
277	{
278	xref = &doc->xref_sections[j];
279
280	if (i < xref->num_objects)
281	{
282	for (sub = xref->subsec; sub != NULL; sub = sub->next)
283	{
284	pdf_xref_entry *entry;
285
286	if (i < sub->start \|\| i >= sub->start + sub->len)
287	continue;
288
289	entry = &sub->table[i - sub->start];
290	if (entry->type)
291	{
292	/ Don't update xref_index if xref_base may have*
293	* influenced the value of j */
294	if (doc->xref_base == `0`)
295	doc->xref_index[i] = j;
296	return entry;
297	}
298	}
299	}
300	}
301
302	/ Didn't find the entry in any section. Return the entry from*
303	* the final section. */
304	doc->xref_index[i] = `0`;
305	if (xref == NULL \|\| i < xref->num_objects)
306	{
307	xref = &doc->xref_sections[doc->xref_base];
308	for (sub = xref->subsec; sub != NULL; sub = sub->next)
309	{
310	if (i >= sub->start && i < sub->start + sub->len)
311	return &sub->table[i - sub->start];
312	}
313	}
314
315	/ At this point, we solidify the xref. This ensures that we*
316	* can return a pointer. This is the only case where this function
317	* might throw an exception, and it will never happen when we are
318	* working within a 'solid' xref. */
319	ensure_solid_xref(ctx, doc, i+`1`, `0`);
320	xref = &doc->xref_sections[`0`];
321	sub = xref->subsec;
322	return &sub->table[i - sub->start];
323	}
324
325	/*
326	Ensure we have an incremental xref section where we can store
327	updated versions of indirect objects. This is a new xref section
328	consisting of a single xref subsection.
329	*/
330	static void ensure_incremental_xref(fz_context ctx, pdf_document doc)
331	{
332	/ If there are as yet no incremental sections, or if the most recent*
333	* one has been used to sign a signature field, then we need a new one.
334	* After a signing, any further document changes require a new increment */
335	if ((doc->num_incremental_sections == `0` \|\| doc->xref_sections[`0`].unsaved_sigs != NULL)
336	&& !doc->disallow_new_increments)
337	{
338	pdf_xref *xref = &doc->xref_sections[`0`];
339	pdf_xref *pxref;
340	pdf_xref_entry new_table = fz_calloc(ctx, xref->num_objects, sizeof*(pdf_xref_entry));
341	pdf_xref_subsec *sub = NULL;
342	pdf_obj *trailer = NULL;
343	int i;
344
345	fz_var(trailer);
346	fz_var(sub);
347	fz_try(ctx)
348	{
349	sub = fz_malloc_struct(ctx, pdf_xref_subsec);
350	trailer = xref->trailer ? pdf_copy_dict(ctx, xref->trailer) : NULL;
351	doc->xref_sections = fz_realloc_array(ctx, doc->xref_sections, doc->num_xref_sections + `1`, pdf_xref);
352	xref = &doc->xref_sections[`0`];
353	pxref = &doc->xref_sections[`1`];
354	memmove(pxref, xref, doc->num_xref_sections * sizeof(pdf_xref));
355	/ xref->num_objects is already correct /
356	xref->subsec = sub;
357	sub = NULL;
358	xref->trailer = trailer;
359	xref->pre_repair_trailer = NULL;
360	xref->unsaved_sigs = NULL;
361	xref->unsaved_sigs_end = NULL;
362	xref->subsec->next = NULL;
363	xref->subsec->len = xref->num_objects;
364	xref->subsec->start = `0`;
365	xref->subsec->table = new_table;
366	doc->num_xref_sections++;
367	doc->num_incremental_sections++;
368	}
369	fz_catch(ctx)
370	{
371	fz_free(ctx, sub);
372	fz_free(ctx, new_table);
373	pdf_drop_obj(ctx, trailer);
374	fz_rethrow(ctx);
375	}
376
377	/ Update the xref_index /
378	for (i = `0`; i < doc->max_xref_len; i++)
379	{
380	doc->xref_index[i]++;
381	}
382	}
383	}
384
385	/ Used when altering a document /
386	static pdf_xref_entry pdf_get_incremental_xref_entry(fz_context ctx, pdf_document doc, int* i)
387	{
388	pdf_xref *xref;
389	pdf_xref_subsec *sub;
390
391	/ Make a new final xref section if we haven't already /
392	ensure_incremental_xref(ctx, doc);
393
394	xref = &doc->xref_sections[doc->xref_base];
395	if (i >= xref->num_objects)
396	pdf_resize_xref(ctx, doc, i + `1`);
397
398	sub = xref->subsec;
399	assert(sub != NULL && sub->next == NULL);
400	assert(i >= sub->start && i < sub->start + sub->len);
401	doc->xref_index[i] = `0`;
402	return &sub->table[i - sub->start];
403	}
404
405	int pdf_xref_is_incremental(fz_context ctx, pdf_document doc, int num)
406	{
407	pdf_xref *xref = &doc->xref_sections[doc->xref_base];
408	pdf_xref_subsec *sub = xref->subsec;
409
410	assert(sub != NULL && sub->next == NULL && sub->len == xref->num_objects && sub->start == `0`);
411
412	return num < xref->num_objects && sub->table[num].type;
413	}
414
415	void pdf_xref_store_unsaved_signature(fz_context ctx, pdf_document doc, pdf_obj field, pdf_pkcs7_signer signer)
416	{
417	pdf_xref *xref = &doc->xref_sections[`0`];
418	pdf_unsaved_sig *unsaved_sig;
419
420	/ Record details within the document structure so that contents*
421	* and byte_range can be updated with their correct values at
422	* saving time */
423	unsaved_sig = fz_malloc_struct(ctx, pdf_unsaved_sig);
424	unsaved_sig->field = pdf_keep_obj(ctx, field);
425	unsaved_sig->signer = signer->keep(signer);
426	unsaved_sig->next = NULL;
427	if (xref->unsaved_sigs_end == NULL)
428	xref->unsaved_sigs_end = &xref->unsaved_sigs;
429
430	*xref->unsaved_sigs_end = unsaved_sig;
431	xref->unsaved_sigs_end = &unsaved_sig->next;
432	}
433
434	int pdf_xref_obj_is_unsaved_signature(pdf_document doc, pdf_obj obj)
435	{
436	int i;
437	for (i = `0`; i < doc->num_incremental_sections; i++)
438	{
439	pdf_xref *xref = &doc->xref_sections[i];
440	pdf_unsaved_sig *usig;
441
442	for (usig = xref->unsaved_sigs; usig; usig = usig->next)
443	{
444	if (usig->field == obj)
445	return `1`;
446	}
447	}
448
449	return `0`;
450	}
451
452	/ Ensure that the current populating xref has a single subsection*
453	* that covers the entire range. */
454	void pdf_ensure_solid_xref(fz_context ctx, pdf_document doc, int num)
455	{
456	if (doc->num_xref_sections == `0`)
457	pdf_populate_next_xref_level(ctx, doc);
458
459	ensure_solid_xref(ctx, doc, num, doc->num_xref_sections-`1`);
460	}
461
462	/ Ensure that an object has been cloned into the incremental xref section /
463	void pdf_xref_ensure_incremental_object(fz_context ctx, pdf_document doc, int num)
464	{
465	pdf_xref_entry new_entry, old_entry;
466	pdf_xref_subsec *sub = NULL;
467	int i;
468
469	/ Make sure we have created an xref section for incremental updates /
470	ensure_incremental_xref(ctx, doc);
471
472	/ Search for the section that contains this object /
473	for (i = doc->xref_index[num]; i < doc->num_xref_sections; i++)
474	{
475	pdf_xref *xref = &doc->xref_sections[i];
476
477	if (num < `0` && num >= xref->num_objects)
478	break;
479	for (sub = xref->subsec; sub != NULL; sub = sub->next)
480	{
481	if (sub->start <= num && num < sub->start + sub->len && sub->table[num - sub->start].type)
482	break;
483	}
484	if (sub != NULL)
485	break;
486	}
487	/ sub == NULL implies we did not find it /
488
489	/ If we don't find it, or it's already in the incremental section, return /
490	if (i == `0` \|\| sub == NULL)
491	return;
492
493	/ Move the object to the incremental section /
494	doc->xref_index[num] = `0`;
495	old_entry = &sub->table[num - sub->start];
496	new_entry = pdf_get_incremental_xref_entry(ctx, doc, num);
497	new_entry = old_entry;
498	if (i < doc->num_incremental_sections)
499	{
500	/ old entry is incremental and may have changes.*
501	* Better keep a copy. We must override the old entry with
502	* the copy because the caller may be holding a reference to
503	* the original and expect it to end up in the new entry */
504	old_entry->obj = pdf_deep_copy_obj(ctx, old_entry->obj);
505	}
506	else
507	{
508	old_entry->obj = NULL;
509	}
510	old_entry->stm_buf = NULL;
511	}
512
513	void pdf_replace_xref(fz_context ctx, pdf_document doc, pdf_xref_entry entries, int* n)
514	{
515	int *xref_index = NULL;
516	pdf_xref *xref = NULL;
517	pdf_xref_subsec *sub;
518
519	fz_var(xref_index);
520	fz_var(xref);
521
522	fz_try(ctx)
523	{
524	xref_index = fz_calloc(ctx, n, sizeof(int));
525	xref = fz_malloc_struct(ctx, pdf_xref);
526	sub = fz_malloc_struct(ctx, pdf_xref_subsec);
527	}
528	fz_catch(ctx)
529	{
530	fz_free(ctx, xref);
531	fz_free(ctx, xref_index);
532	fz_rethrow(ctx);
533	}
534
535	sub->table = entries;
536	sub->start = `0`;
537	sub->len = n;
538
539	xref->subsec = sub;
540	xref->num_objects = n;
541	xref->trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
542
543	/ The new table completely replaces the previous separate sections /
544	pdf_drop_xref_sections(ctx, doc);
545
546	doc->xref_sections = xref;
547	doc->num_xref_sections = `1`;
548	doc->num_incremental_sections = `0`;
549	doc->xref_base = `0`;
550	doc->disallow_new_increments = `0`;
551	doc->max_xref_len = n;
552
553	fz_free(ctx, doc->xref_index);
554	doc->xref_index = xref_index;
555	}
556
557	void pdf_forget_xref(fz_context ctx, pdf_document doc)
558	{
559	pdf_obj *trailer = pdf_keep_obj(ctx, pdf_trailer(ctx, doc));
560
561	if (doc->saved_xref_sections)
562	pdf_drop_xref_sections_imp(ctx, doc, doc->saved_xref_sections, doc->saved_num_xref_sections);
563
564	doc->saved_xref_sections = doc->xref_sections;
565	doc->saved_num_xref_sections = doc->num_xref_sections;
566
567	doc->startxref = `0`;
568	doc->num_xref_sections = `0`;
569	doc->num_incremental_sections = `0`;
570	doc->xref_base = `0`;
571	doc->disallow_new_increments = `0`;
572
573	fz_try(ctx)
574	{
575	pdf_get_populating_xref_entry(ctx, doc, `0`);
576	}
577	fz_catch(ctx)
578	{
579	pdf_drop_obj(ctx, trailer);
580	fz_rethrow(ctx);
581	}
582
583	/ Set the trailer of the final xref section. /
584	doc->xref_sections[`0`].trailer = trailer;
585	}
586
587	/*
588	* magic version tag and startxref
589	*/
590
591	static void
592	pdf_load_version(fz_context ctx, pdf_document doc)
593	{
594	char buf[`20`];
595
596	fz_seek(ctx, doc->file, `0`, SEEK_SET);
597	fz_read_line(ctx, doc->file, buf, sizeof buf);
598	if (strlen(buf) < `5` \|\| memcmp(buf, "%PDF-", `5`) != `0`)
599	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot recognize version marker");
600
601	doc->version = `10` * (fz_atof(buf+`5`) + `0.05f`);
602	if (doc->version < `10` \|\| doc->version > `17`)
603	if (doc->version != `20`)
604	fz_warn(ctx, "unknown PDF version: %d.%d", doc->version / `10`, doc->version % `10`);
605	}
606
607	static void
608	pdf_read_start_xref(fz_context ctx, pdf_document doc)
609	{
610	unsigned char buf[`1024`];
611	size_t i, n;
612	int64_t t;
613
614	fz_seek(ctx, doc->file, `0`, SEEK_END);
615
616	doc->file_size = fz_tell(ctx, doc->file);
617
618	t = fz_maxi64(`0`, doc->file_size - (int64_t)sizeof buf);
619	fz_seek(ctx, doc->file, t, SEEK_SET);
620
621	n = fz_read(ctx, doc->file, buf, sizeof buf);
622	if (n < `9`)
623	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find startxref");
624
625	i = n - `9`;
626	do
627	{
628	if (memcmp(buf + i, "startxref", `9`) == `0`)
629	{
630	i += `9`;
631	while (i < n && iswhite(buf[i]))
632	i ++;
633	doc->startxref = `0`;
634	while (i < n && isdigit(buf[i]))
635	{
636	if (doc->startxref >= INT64_MAX/`10`)
637	fz_throw(ctx, FZ_ERROR_GENERIC, "startxref too large");
638	doc->startxref = doc->startxref * `10` + (buf[i++] - `'0'`);
639	}
640	if (doc->startxref != `0`)
641	return;
642	break;
643	}
644	} while (i-- > `0`);
645
646	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find startxref");
647	}
648
649	static void
650	fz_skip_space(fz_context ctx, fz_stream stm)
651	{
652	do
653	{
654	int c = fz_peek_byte(ctx, stm);
655	if (c == EOF \|\| c > `32`)
656	return;
657	(void)fz_read_byte(ctx, stm);
658	}
659	while (`1`);
660	}
661
662	static int fz_skip_string(fz_context ctx, fz_stream stm, const char *str)
663	{
664	while (*str)
665	{
666	int c = fz_peek_byte(ctx, stm);
667	if (c == EOF \|\| c != *str++)
668	return `1`;
669	(void)fz_read_byte(ctx, stm);
670	}
671	return `0`;
672	}
673
674	/*
675	* trailer dictionary
676	*/
677
678	static int
679	pdf_xref_size_from_old_trailer(fz_context ctx, pdf_document doc, pdf_lexbuf *buf)
680	{
681	int len;
682	char *s;
683	int64_t t;
684	pdf_token tok;
685	int c;
686	int size = `0`;
687	int64_t ofs;
688	pdf_obj *trailer = NULL;
689	size_t n;
690
691	fz_var(trailer);
692
693	/ Record the current file read offset so that we can reinstate it /
694	ofs = fz_tell(ctx, doc->file);
695
696	fz_skip_space(ctx, doc->file);
697	if (fz_skip_string(ctx, doc->file, "xref"))
698	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find xref marker");
699	fz_skip_space(ctx, doc->file);
700
701	while (`1`)
702	{
703	c = fz_peek_byte(ctx, doc->file);
704	if (!isdigit(c))
705	break;
706
707	fz_read_line(ctx, doc->file, buf->scratch, buf->size);
708	s = buf->scratch;
709	fz_strsep(&s, " "); / ignore start /
710	if (!s)
711	fz_throw(ctx, FZ_ERROR_GENERIC, "xref subsection length missing");
712	len = fz_atoi(fz_strsep(&s, " "));
713	if (len < `0`)
714	fz_throw(ctx, FZ_ERROR_GENERIC, "xref subsection length must be positive");
715
716	/ broken pdfs where the section is not on a separate line /
717	if (s && *s != `'\0'`)
718	fz_seek(ctx, doc->file, -(`2` + (int)strlen(s)), SEEK_CUR);
719
720	t = fz_tell(ctx, doc->file);
721	if (t < `0`)
722	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file");
723
724	/ Spec says xref entries should be 20 bytes, but it's not infrequent*
725	* to see 19, in particular for some PCLm drivers. Cope. */
726	if (len > `0`)
727	{
728	n = fz_read(ctx, doc->file, (unsigned char *)buf->scratch, `20`);
729	if (n < `19`)
730	fz_throw(ctx, FZ_ERROR_GENERIC, "malformed xref table");
731	if (n == `20` && buf->scratch[`19`] > `32`)
732	n = `19`;
733	}
734	else
735	n = `20`;
736
737	if (len > (int64_t)((INT64_MAX - t) / n))
738	fz_throw(ctx, FZ_ERROR_GENERIC, "xref has too many entries");
739
740	fz_seek(ctx, doc->file, t + n * len, SEEK_SET);
741	}
742
743	fz_try(ctx)
744	{
745	tok = pdf_lex(ctx, doc->file, buf);
746	if (tok != PDF_TOK_TRAILER)
747	fz_throw(ctx, FZ_ERROR_GENERIC, "expected trailer marker");
748
749	tok = pdf_lex(ctx, doc->file, buf);
750	if (tok != PDF_TOK_OPEN_DICT)
751	fz_throw(ctx, FZ_ERROR_GENERIC, "expected trailer dictionary");
752
753	trailer = pdf_parse_dict(ctx, doc, doc->file, buf);
754
755	size = pdf_dict_get_int(ctx, trailer, PDF_NAME(Size));
756	if (size < `0` \|\| size > PDF_MAX_OBJECT_NUMBER + `1`)
757	fz_throw(ctx, FZ_ERROR_GENERIC, "trailer Size entry out of range");
758	}
759	fz_always(ctx)
760	{
761	pdf_drop_obj(ctx, trailer);
762	}
763	fz_catch(ctx)
764	{
765	fz_rethrow(ctx);
766	}
767
768	fz_seek(ctx, doc->file, ofs, SEEK_SET);
769
770	return size;
771	}
772
773	static pdf_xref_entry *
774	pdf_xref_find_subsection(fz_context ctx, pdf_document doc, int start, int len)
775	{
776	pdf_xref *xref = &doc->xref_sections[doc->num_xref_sections-`1`];
777	pdf_xref_subsec *sub;
778	int num_objects;
779
780	/ Different cases here. Case 1) We might be asking for a*
781	* subsection (or a subset of a subsection) that we already
782	* have - Just return it. Case 2) We might be asking for a
783	* completely new subsection - Create it and return it.
784	* Case 3) We might have an overlapping one - Create a 'solid'
785	* subsection and return that. */
786
787	/ Sanity check /
788	for (sub = xref->subsec; sub != NULL; sub = sub->next)
789	{
790	if (start >= sub->start && start + len <= sub->start + sub->len)
791	return &sub->table[start-sub->start]; / Case 1 /
792	if (start + len > sub->start && start <= sub->start + sub->len)
793	break; / Case 3 /
794	}
795
796	num_objects = xref->num_objects;
797	if (num_objects < start + len)
798	num_objects = start + len;
799
800	if (sub == NULL)
801	{
802	/ Case 2 /
803	sub = fz_malloc_struct(ctx, pdf_xref_subsec);
804	fz_try(ctx)
805	{
806	sub->table = fz_calloc(ctx, len, sizeof(pdf_xref_entry));
807	sub->start = start;
808	sub->len = len;
809	sub->next = xref->subsec;
810	xref->subsec = sub;
811	}
812	fz_catch(ctx)
813	{
814	fz_free(ctx, sub);
815	fz_rethrow(ctx);
816	}
817	xref->num_objects = num_objects;
818	if (doc->max_xref_len < num_objects)
819	extend_xref_index(ctx, doc, num_objects);
820	}
821	else
822	{
823	/ Case 3 /
824	ensure_solid_xref(ctx, doc, num_objects, doc->num_xref_sections-`1`);
825	xref = &doc->xref_sections[doc->num_xref_sections-`1`];
826	sub = xref->subsec;
827	}
828	return &sub->table[start-sub->start];
829	}
830
831	static pdf_obj *
832	pdf_read_old_xref(fz_context ctx, pdf_document doc, pdf_lexbuf *buf)
833	{
834	int start, len, c, i, xref_len, carried;
835	fz_stream *file = doc->file;
836	pdf_xref_entry *table;
837	pdf_token tok;
838	size_t n;
839	char s, e;
840
841	xref_len = pdf_xref_size_from_old_trailer(ctx, doc, buf);
842
843	fz_skip_space(ctx, doc->file);
844	if (fz_skip_string(ctx, doc->file, "xref"))
845	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find xref marker");
846	fz_skip_space(ctx, doc->file);
847
848	while (`1`)
849	{
850	c = fz_peek_byte(ctx, file);
851	if (!isdigit(c))
852	break;
853
854	fz_read_line(ctx, file, buf->scratch, buf->size);
855	s = buf->scratch;
856	start = fz_atoi(fz_strsep(&s, " "));
857	len = fz_atoi(fz_strsep(&s, " "));
858
859	/ broken pdfs where the section is not on a separate line /
860	if (s && *s != `'\0'`)
861	{
862	fz_warn(ctx, "broken xref subsection. proceeding anyway.");
863	fz_seek(ctx, file, -(`2` + (int)strlen(s)), SEEK_CUR);
864	}
865
866	if (start < `0` \|\| start > PDF_MAX_OBJECT_NUMBER
867	\|\| len < `0` \|\| len > PDF_MAX_OBJECT_NUMBER
868	\|\| start + len - `1` > PDF_MAX_OBJECT_NUMBER)
869	{
870	fz_throw(ctx, FZ_ERROR_GENERIC, "xref subsection object numbers are out of range");
871	}
872	/ broken pdfs where size in trailer undershoots entries in xref sections /
873	if (start + len > xref_len)
874	{
875	fz_warn(ctx, "broken xref subsection, proceeding anyway.");
876	}
877
878	table = pdf_xref_find_subsection(ctx, doc, start, len);
879
880	/ Xref entries SHOULD be 20 bytes long, but we see 19 byte*
881	* ones more frequently than we'd like (e.g. PCLm drivers).
882	* Cope with this by 'carrying' data forward. */
883	carried = `0`;
884	for (i = `0`; i < len; i++)
885	{
886	pdf_xref_entry *entry = &table[i];
887	n = fz_read(ctx, file, (unsigned char *) buf->scratch + carried, `20`-carried);
888	if (n != `20`-carried)
889	fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected EOF in xref table");
890	n += carried;
891	buf->scratch[n] = `'\0'`;
892	if (!entry->type)
893	{
894	s = buf->scratch;
895	e = s + n;
896
897	entry->num = start + i;
898
899	/ broken pdfs where line start with white space /
900	while (s < e && iswhite(*s))
901	s++;
902
903	if (s == e \|\| !isdigit(*s))
904	fz_throw(ctx, FZ_ERROR_GENERIC, "xref offset missing");
905	while (s < e && isdigit(*s))
906	entry->ofs = entry->ofs * `10` + *s++ - `'0'`;
907
908	while (s < e && iswhite(*s))
909	s++;
910	if (s == e \|\| !isdigit(*s))
911	fz_throw(ctx, FZ_ERROR_GENERIC, "xref generation number missing");
912	while (s < e && isdigit(*s))
913	entry->gen = entry->gen * `10` + *s++ - `'0'`;
914
915	while (s < e && iswhite(*s))
916	s++;
917	if (s == e \|\| (s != `'f'` && s != `'n'` && *s != `'o'`))
918	fz_throw(ctx, FZ_ERROR_GENERIC, "unexpected xref type: 0x%x (%d %d R)", s == e ? `0` : *s, entry->num, entry->gen);
919	entry->type = *s++;
920
921	/ If the last byte of our buffer isn't an EOL (or space), carry one byte forward /
922	carried = buf->scratch[`19`] > `32`;
923	if (carried)
924	buf->scratch[`0`] = buf->scratch[`19`];
925	}
926	}
927	if (carried)
928	fz_unread_byte(ctx, file);
929	}
930
931	tok = pdf_lex(ctx, file, buf);
932	if (tok != PDF_TOK_TRAILER)
933	fz_throw(ctx, FZ_ERROR_GENERIC, "expected trailer marker");
934
935	tok = pdf_lex(ctx, file, buf);
936	if (tok != PDF_TOK_OPEN_DICT)
937	fz_throw(ctx, FZ_ERROR_GENERIC, "expected trailer dictionary");
938
939	doc->has_old_style_xrefs = `1`;
940
941	return pdf_parse_dict(ctx, doc, file, buf);
942	}
943
944	static void
945	pdf_read_new_xref_section(fz_context ctx, pdf_document doc, fz_stream stm, int* i0, int i1, int w0, int w1, int w2)
946	{
947	pdf_xref_entry *table;
948	int i, n;
949
950	if (i0 < `0` \|\| i0 > PDF_MAX_OBJECT_NUMBER \|\| i1 < `0` \|\| i1 > PDF_MAX_OBJECT_NUMBER \|\| i0 + i1 - `1` > PDF_MAX_OBJECT_NUMBER)
951	fz_throw(ctx, FZ_ERROR_GENERIC, "xref subsection object numbers are out of range");
952
953	table = pdf_xref_find_subsection(ctx, doc, i0, i1);
954	for (i = i0; i < i0 + i1; i++)
955	{
956	pdf_xref_entry *entry = &table[i-i0];
957	int a = `0`;
958	int64_t b = `0`;
959	int c = `0`;
960
961	if (fz_is_eof(ctx, stm))
962	fz_throw(ctx, FZ_ERROR_GENERIC, "truncated xref stream");
963
964	for (n = `0`; n < w0; n++)
965	a = (a << `8`) + fz_read_byte(ctx, stm);
966	for (n = `0`; n < w1; n++)
967	b = (b << `8`) + fz_read_byte(ctx, stm);
968	for (n = `0`; n < w2; n++)
969	c = (c << `8`) + fz_read_byte(ctx, stm);
970
971	if (!entry->type)
972	{
973	int t = w0 ? a : `1`;
974	entry->type = t == `0` ? `'f'` : t == `1` ? `'n'` : t == `2` ? `'o'` : `0`;
975	entry->ofs = w1 ? b : `0`;
976	entry->gen = w2 ? c : `0`;
977	entry->num = i;
978	}
979	}
980
981	doc->has_xref_streams = `1`;
982	}
983
984	/ Entered with file locked, remains locked throughout. /
985	static pdf_obj *
986	pdf_read_new_xref(fz_context ctx, pdf_document doc, pdf_lexbuf *buf)
987	{
988	fz_stream *stm = NULL;
989	pdf_obj *trailer = NULL;
990	pdf_obj *index = NULL;
991	pdf_obj *obj = NULL;
992	int gen, num = `0`;
993	int64_t ofs, stm_ofs;
994	int size, w0, w1, w2;
995	int t;
996
997	fz_var(trailer);
998	fz_var(stm);
999
1000	fz_try(ctx)
1001	{
1002	ofs = fz_tell(ctx, doc->file);
1003	trailer = pdf_parse_ind_obj(ctx, doc, doc->file, buf, &num, &gen, &stm_ofs, NULL);
1004	}
1005	fz_catch(ctx)
1006	{
1007	pdf_drop_obj(ctx, trailer);
1008	fz_rethrow(ctx);
1009	}
1010
1011	fz_try(ctx)
1012	{
1013	pdf_xref_entry *entry;
1014
1015	obj = pdf_dict_get(ctx, trailer, PDF_NAME(Size));
1016	if (!obj)
1017	fz_throw(ctx, FZ_ERROR_GENERIC, "xref stream missing Size entry (%d 0 R)", num);
1018
1019	size = pdf_to_int(ctx, obj);
1020
1021	obj = pdf_dict_get(ctx, trailer, PDF_NAME(W));
1022	if (!obj)
1023	fz_throw(ctx, FZ_ERROR_GENERIC, "xref stream missing W entry (%d R)", num);
1024	w0 = pdf_array_get_int(ctx, obj, `0`);
1025	w1 = pdf_array_get_int(ctx, obj, `1`);
1026	w2 = pdf_array_get_int(ctx, obj, `2`);
1027
1028	if (w0 < `0`)
1029	fz_warn(ctx, "xref stream objects have corrupt type");
1030	if (w1 < `0`)
1031	fz_warn(ctx, "xref stream objects have corrupt offset");
1032	if (w2 < `0`)
1033	fz_warn(ctx, "xref stream objects have corrupt generation");
1034
1035	w0 = w0 < `0` ? `0` : w0;
1036	w1 = w1 < `0` ? `0` : w1;
1037	w2 = w2 < `0` ? `0` : w2;
1038
1039	index = pdf_dict_get(ctx, trailer, PDF_NAME(Index));
1040
1041	stm = pdf_open_stream_with_offset(ctx, doc, num, trailer, stm_ofs);
1042
1043	if (!index)
1044	{
1045	pdf_read_new_xref_section(ctx, doc, stm, `0`, size, w0, w1, w2);
1046	}
1047	else
1048	{
1049	int n = pdf_array_len(ctx, index);
1050	for (t = `0`; t < n; t += `2`)
1051	{
1052	int i0 = pdf_array_get_int(ctx, index, t + `0`);
1053	int i1 = pdf_array_get_int(ctx, index, t + `1`);
1054	pdf_read_new_xref_section(ctx, doc, stm, i0, i1, w0, w1, w2);
1055	}
1056	}
1057	entry = pdf_get_populating_xref_entry(ctx, doc, num);
1058	entry->ofs = ofs;
1059	entry->gen = gen;
1060	entry->num = num;
1061	entry->stm_ofs = stm_ofs;
1062	pdf_drop_obj(ctx, entry->obj);
1063	entry->obj = pdf_keep_obj(ctx, trailer);
1064	entry->type = `'n'`;
1065	}
1066	fz_always(ctx)
1067	{
1068	fz_drop_stream(ctx, stm);
1069	}
1070	fz_catch(ctx)
1071	{
1072	pdf_drop_obj(ctx, trailer);
1073	fz_rethrow(ctx);
1074	}
1075
1076	return trailer;
1077	}
1078
1079	static pdf_obj *
1080	pdf_read_xref(fz_context ctx, pdf_document doc, int64_t ofs, pdf_lexbuf *buf)
1081	{
1082	pdf_obj *trailer;
1083	int c;
1084
1085	fz_seek(ctx, doc->file, ofs, SEEK_SET);
1086
1087	while (iswhite(fz_peek_byte(ctx, doc->file)))
1088	fz_read_byte(ctx, doc->file);
1089
1090	c = fz_peek_byte(ctx, doc->file);
1091	if (c == `'x'`)
1092	trailer = pdf_read_old_xref(ctx, doc, buf);
1093	else if (isdigit(c))
1094	trailer = pdf_read_new_xref(ctx, doc, buf);
1095	else
1096	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot recognize xref format");
1097
1098	return trailer;
1099	}
1100
1101	static int64_t
1102	read_xref_section(fz_context ctx, pdf_document doc, int64_t ofs, pdf_lexbuf *buf)
1103	{
1104	pdf_obj *trailer = NULL;
1105	pdf_obj *prevobj;
1106	int64_t xrefstmofs = `0`;
1107	int64_t prevofs = `0`;
1108
1109	trailer = pdf_read_xref(ctx, doc, ofs, buf);
1110	fz_try(ctx)
1111	{
1112	pdf_set_populating_xref_trailer(ctx, doc, trailer);
1113
1114	/ FIXME: do we overwrite free entries properly? /
1115	/ FIXME: Does this work properly with progression? /
1116	xrefstmofs = pdf_to_int64(ctx, pdf_dict_get(ctx, trailer, PDF_NAME(XRefStm)));
1117	if (xrefstmofs)
1118	{
1119	if (xrefstmofs < `0`)
1120	fz_throw(ctx, FZ_ERROR_GENERIC, "negative xref stream offset");
1121
1122	/*
1123	Read the XRefStm stream, but throw away the resulting trailer. We do not
1124	follow any Prev tag therein, as specified on Page 108 of the PDF reference
1125	1.7
1126	*/
1127	pdf_drop_obj(ctx, pdf_read_xref(ctx, doc, xrefstmofs, buf));
1128	}
1129
1130	prevobj = pdf_dict_get(ctx, trailer, PDF_NAME(Prev));
1131	if (pdf_is_int(ctx, prevobj))
1132	{
1133	prevofs = pdf_to_int64(ctx, prevobj);
1134	if (prevofs <= `0`)
1135	fz_throw(ctx, FZ_ERROR_GENERIC, "invalid offset for previous xref section");
1136	}
1137	}
1138	fz_always(ctx)
1139	pdf_drop_obj(ctx, trailer);
1140	fz_catch(ctx)
1141	fz_rethrow(ctx);
1142
1143	return prevofs;
1144	}
1145
1146	static void
1147	pdf_read_xref_sections(fz_context ctx, pdf_document doc, int64_t ofs, pdf_lexbuf buf, int* read_previous)
1148	{
1149	int i, len, cap;
1150	int64_t *offsets;
1151
1152	len = `0`;
1153	cap = `10`;
1154	offsets = fz_malloc_array(ctx, cap, int64_t);
1155
1156	fz_try(ctx)
1157	{
1158	while(ofs)
1159	{
1160	for (i = `0`; i < len; i ++)
1161	{
1162	if (offsets[i] == ofs)
1163	break;
1164	}
1165	if (i < len)
1166	{
1167	fz_warn(ctx, "ignoring xref section recursion at offset %d", (int)ofs);
1168	break;
1169	}
1170	if (len == cap)
1171	{
1172	cap *= `2`;
1173	offsets = fz_realloc_array(ctx, offsets, cap, int64_t);
1174	}
1175	offsets[len++] = ofs;
1176
1177	pdf_populate_next_xref_level(ctx, doc);
1178	ofs = read_xref_section(ctx, doc, ofs, buf);
1179	if (!read_previous)
1180	break;
1181	}
1182	}
1183	fz_always(ctx)
1184	{
1185	fz_free(ctx, offsets);
1186	}
1187	fz_catch(ctx)
1188	{
1189	fz_rethrow(ctx);
1190	}
1191	}
1192
1193	static void
1194	pdf_prime_xref_index(fz_context ctx, pdf_document doc)
1195	{
1196	int i, j;
1197	int *idx = doc->xref_index;
1198
1199	for (i = doc->num_xref_sections-`1`; i >= `0`; i--)
1200	{
1201	pdf_xref *xref = &doc->xref_sections[i];
1202	pdf_xref_subsec *subsec = xref->subsec;
1203	while (subsec != NULL)
1204	{
1205	int start = subsec->start;
1206	int end = subsec->start + subsec->len;
1207	for (j = start; j < end; j++)
1208	{
1209	char t = subsec->table[j-start].type;
1210	if (t != `0` && t != `'f'`)
1211	idx[j] = i;
1212	}
1213
1214	subsec = subsec->next;
1215	}
1216	}
1217	}
1218
1219	/*
1220	* load xref tables from pdf
1221	*
1222	* File locked on entry, throughout and on exit.
1223	*/
1224
1225	static void
1226	pdf_load_xref(fz_context ctx, pdf_document doc, pdf_lexbuf *buf)
1227	{
1228	int i;
1229	int xref_len;
1230	pdf_xref_entry *entry;
1231
1232	pdf_read_start_xref(ctx, doc);
1233
1234	pdf_read_xref_sections(ctx, doc, doc->startxref, buf, `1`);
1235
1236	if (pdf_xref_len(ctx, doc) == `0`)
1237	fz_throw(ctx, FZ_ERROR_GENERIC, "found xref was empty");
1238
1239	pdf_prime_xref_index(ctx, doc);
1240
1241	entry = pdf_get_xref_entry(ctx, doc, `0`);
1242	/ broken pdfs where first object is missing /
1243	if (!entry->type)
1244	{
1245	entry->type = `'f'`;
1246	entry->gen = `65535`;
1247	entry->num = `0`;
1248	}
1249	/ broken pdfs where first object is not free /
1250	else if (entry->type != `'f'`)
1251	fz_warn(ctx, "first object in xref is not free");
1252
1253	/ broken pdfs where object offsets are out of range /
1254	xref_len = pdf_xref_len(ctx, doc);
1255	for (i = `0`; i < xref_len; i++)
1256	{
1257	entry = pdf_get_xref_entry(ctx, doc, i);
1258	if (entry->type == `'n'`)
1259	{
1260	/ Special case code: "0000000000 * n" means free,*
1261	* according to some producers (inc Quartz) */
1262	if (entry->ofs == `0`)
1263	entry->type = `'f'`;
1264	else if (entry->ofs <= `0` \|\| entry->ofs >= doc->file_size)
1265	fz_throw(ctx, FZ_ERROR_GENERIC, "object offset out of range: %d (%d 0 R)", (int)entry->ofs, i);
1266	}
1267	if (entry->type == `'o'`)
1268	{
1269	/ Read this into a local variable here, because pdf_get_xref_entry*
1270	* may solidify the xref, hence invalidating "entry", meaning we
1271	* need a stashed value for the throw. */
1272	int64_t ofs = entry->ofs;
1273	if (ofs <= `0` \|\| ofs >= xref_len \|\| pdf_get_xref_entry(ctx, doc, ofs)->type != `'n'`)
1274	fz_throw(ctx, FZ_ERROR_GENERIC, "invalid reference to an objstm that does not exist: %d (%d 0 R)", (int)ofs, i);
1275	}
1276	}
1277	}
1278
1279	static void
1280	pdf_load_linear(fz_context ctx, pdf_document doc)
1281	{
1282	pdf_obj *dict = NULL;
1283	pdf_obj *hint = NULL;
1284	pdf_obj *o;
1285	int num, gen, lin, len;
1286	int64_t stmofs;
1287
1288	fz_var(dict);
1289	fz_var(hint);
1290
1291	fz_try(ctx)
1292	{
1293	pdf_xref_entry *entry;
1294
1295	dict = pdf_parse_ind_obj(ctx, doc, doc->file, &doc->lexbuf.base, &num, &gen, &stmofs, NULL);
1296	if (!pdf_is_dict(ctx, dict))
1297	fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read linearized dictionary");
1298	o = pdf_dict_get(ctx, dict, PDF_NAME(Linearized));
1299	if (o == NULL)
1300	fz_throw(ctx, FZ_ERROR_GENERIC, "Failed to read linearized dictionary");
1301	lin = pdf_to_int(ctx, o);
1302	if (lin != `1`)
1303	fz_throw(ctx, FZ_ERROR_GENERIC, "Unexpected version of Linearized tag (%d)", lin);
1304	len = pdf_dict_get_int(ctx, dict, PDF_NAME(L));
1305	if (len != doc->file_length)
1306	fz_throw(ctx, FZ_ERROR_GENERIC, "File has been updated since linearization");
1307
1308	pdf_read_xref_sections(ctx, doc, fz_tell(ctx, doc->file), &doc->lexbuf.base, `0`);
1309
1310	doc->linear_page_count = pdf_dict_get_int(ctx, dict, PDF_NAME(N));
1311	doc->linear_page_refs = fz_realloc_array(ctx, doc->linear_page_refs, doc->linear_page_count, pdf_obj *);
1312	memset(doc->linear_page_refs, `0`, doc->linear_page_count * sizeof(pdf_obj*));
1313	doc->linear_obj = dict;
1314	doc->linear_pos = fz_tell(ctx, doc->file);
1315	doc->linear_page1_obj_num = pdf_dict_get_int(ctx, dict, PDF_NAME(O));
1316	doc->linear_page_refs[`0`] = pdf_new_indirect(ctx, doc, doc->linear_page1_obj_num, `0`);
1317	doc->linear_page_num = `0`;
1318	hint = pdf_dict_get(ctx, dict, PDF_NAME(H));
1319	doc->hint_object_offset = pdf_array_get_int(ctx, hint, `0`);
1320	doc->hint_object_length = pdf_array_get_int(ctx, hint, `1`);
1321
1322	entry = pdf_get_populating_xref_entry(ctx, doc, `0`);
1323	entry->type = `'f'`;
1324	}
1325	fz_catch(ctx)
1326	{
1327	pdf_drop_obj(ctx, dict);
1328	fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1329	/ Drop back to non linearized reading mode /
1330	doc->file_reading_linearly = `0`;
1331	}
1332	}
1333
1334	/*
1335	* Initialize and load xref tables.
1336	* If password is not null, try to decrypt.
1337	*/
1338
1339	static void
1340	pdf_init_document(fz_context ctx, pdf_document doc)
1341	{
1342	pdf_obj encrypt, id;
1343	pdf_obj *dict = NULL;
1344	pdf_obj *obj;
1345	pdf_obj *nobj = NULL;
1346	int i, repaired = `0`;
1347
1348	fz_var(dict);
1349	fz_var(nobj);
1350
1351	fz_try(ctx)
1352	{
1353	/ Check to see if we should work in progressive mode /
1354	if (doc->file->progressive)
1355	{
1356	doc->file_reading_linearly = `1`;
1357	fz_seek(ctx, doc->file, `0`, SEEK_END);
1358	doc->file_length = fz_tell(ctx, doc->file);
1359	if (doc->file_length < `0`)
1360	doc->file_length = `0`;
1361	fz_seek(ctx, doc->file, `0`, SEEK_SET);
1362	}
1363
1364	pdf_load_version(ctx, doc);
1365
1366	/ Try to load the linearized file if we are in progressive*
1367	* mode. */
1368	if (doc->file_reading_linearly)
1369	pdf_load_linear(ctx, doc);
1370
1371	/ If we aren't in progressive mode (or the linear load failed*
1372	* and has set us back to non-progressive mode), load normally.
1373	*/
1374	if (!doc->file_reading_linearly)
1375	pdf_load_xref(ctx, doc, &doc->lexbuf.base);
1376	}
1377	fz_catch(ctx)
1378	{
1379	pdf_drop_xref_sections(ctx, doc);
1380	fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1381	fz_warn(ctx, "trying to repair broken xref");
1382	repaired = `1`;
1383	}
1384
1385	fz_try(ctx)
1386	{
1387	int hasroot, hasinfo;
1388
1389	if (repaired)
1390	{
1391	/ pdf_repair_xref may access xref_index, so reset it properly /
1392	if (doc->xref_index)
1393	memset(doc->xref_index, `0`, sizeof(int) * doc->max_xref_len);
1394	pdf_repair_xref(ctx, doc);
1395	pdf_prime_xref_index(ctx, doc);
1396	}
1397
1398	encrypt = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
1399	id = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID));
1400	if (pdf_is_dict(ctx, encrypt))
1401	doc->crypt = pdf_new_crypt(ctx, encrypt, id);
1402
1403	/ Allow lazy clients to read encrypted files with a blank password /
1404	pdf_authenticate_password(ctx, doc, "");
1405
1406	if (repaired)
1407	{
1408	int xref_len = pdf_xref_len(ctx, doc);
1409	pdf_repair_obj_stms(ctx, doc);
1410
1411	hasroot = (pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)) != NULL);
1412	hasinfo = (pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info)) != NULL);
1413
1414	for (i = `1`; i < xref_len && !hasinfo && !hasroot; ++i)
1415	{
1416	pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, i);
1417	if (entry->type == `0` \|\| entry->type == `'f'`)
1418	continue;
1419
1420	fz_try(ctx)
1421	{
1422	dict = pdf_load_object(ctx, doc, i);
1423	}
1424	fz_catch(ctx)
1425	{
1426	fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1427	fz_warn(ctx, "ignoring broken object (%d 0 R)", i);
1428	continue;
1429	}
1430
1431	if (!hasroot)
1432	{
1433	obj = pdf_dict_get(ctx, dict, PDF_NAME(Type));
1434	if (pdf_name_eq(ctx, obj, PDF_NAME(Catalog)))
1435	{
1436	nobj = pdf_new_indirect(ctx, doc, i, `0`);
1437	pdf_dict_put_drop(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), nobj);
1438	hasroot = `1`;
1439	}
1440	}
1441
1442	if (!hasinfo)
1443	{
1444	if (pdf_dict_get(ctx, dict, PDF_NAME(Creator)) \|\| pdf_dict_get(ctx, dict, PDF_NAME(Producer)))
1445	{
1446	nobj = pdf_new_indirect(ctx, doc, i, `0`);
1447	pdf_dict_put_drop(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info), nobj);
1448	hasinfo = `1`;
1449	}
1450	}
1451
1452	pdf_drop_obj(ctx, dict);
1453	dict = NULL;
1454	}
1455
1456	/ ensure that strings are not used in their repaired, non-decrypted form /
1457	if (doc->crypt)
1458	pdf_clear_xref(ctx, doc);
1459	}
1460	}
1461	fz_catch(ctx)
1462	{
1463	pdf_drop_obj(ctx, dict);
1464	fz_rethrow(ctx);
1465	}
1466
1467	fz_try(ctx)
1468	{
1469	pdf_read_ocg(ctx, doc);
1470	}
1471	fz_catch(ctx)
1472	{
1473	fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1474	fz_warn(ctx, "Ignoring broken Optional Content configuration");
1475	}
1476
1477	fz_try(ctx)
1478	{
1479	const char *version_str;
1480	obj = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), PDF_NAME(Version), NULL);
1481	version_str = pdf_to_name(ctx, obj);
1482	if (*version_str)
1483	{
1484	int version = `10` * (fz_atof(version_str) + `0.05f`);
1485	if (version > doc->version)
1486	doc->version = version;
1487	}
1488	}
1489	fz_catch(ctx) { }
1490	}
1491
1492	static void
1493	pdf_drop_document_imp(fz_context ctx, pdf_document doc)
1494	{
1495	int i;
1496
1497	fz_defer_reap_start(ctx);
1498
1499	/ Type3 glyphs in the glyph cache can contain pdf_obj pointers*
1500	* that we are about to destroy. Simplest solution is to bin the
1501	* glyph cache at this point. */
1502	fz_try(ctx)
1503	fz_purge_glyph_cache(ctx);
1504	fz_catch(ctx)
1505	{
1506	/ Swallow error, but continue dropping /
1507	}
1508
1509	pdf_drop_js(ctx, doc->js);
1510
1511	pdf_drop_xref_sections(ctx, doc);
1512	fz_free(ctx, doc->xref_index);
1513
1514	fz_drop_stream(ctx, doc->file);
1515	pdf_drop_crypt(ctx, doc->crypt);
1516
1517	pdf_drop_obj(ctx, doc->linear_obj);
1518	if (doc->linear_page_refs)
1519	{
1520	for (i=`0`; i < doc->linear_page_count; i++)
1521	pdf_drop_obj(ctx, doc->linear_page_refs[i]);
1522
1523	fz_free(ctx, doc->linear_page_refs);
1524	}
1525
1526	fz_free(ctx, doc->hint_page);
1527	fz_free(ctx, doc->hint_shared_ref);
1528	fz_free(ctx, doc->hint_shared);
1529	fz_free(ctx, doc->hint_obj_offsets);
1530
1531	for (i=`0`; i < doc->num_type3_fonts; i++)
1532	{
1533	fz_try(ctx)
1534	fz_decouple_type3_font(ctx, doc->type3_fonts[i], (void *)doc);
1535	fz_always(ctx)
1536	fz_drop_font(ctx, doc->type3_fonts[i]);
1537	fz_catch(ctx)
1538	{
1539	/ Swallow error, but continue dropping /
1540	}
1541	}
1542
1543	fz_free(ctx, doc->type3_fonts);
1544
1545	pdf_drop_ocg(ctx, doc);
1546
1547	pdf_empty_store(ctx, doc);
1548
1549	pdf_lexbuf_fin(ctx, &doc->lexbuf.base);
1550
1551	pdf_drop_resource_tables(ctx, doc);
1552
1553	fz_drop_colorspace(ctx, doc->oi);
1554
1555	for (i = `0`; i < doc->orphans_count; i++)
1556	pdf_drop_obj(ctx, doc->orphans[i]);
1557
1558	fz_free(ctx, doc->orphans);
1559
1560	fz_free(ctx, doc->rev_page_map);
1561
1562	fz_defer_reap_end(ctx);
1563	}
1564
1565	/*
1566	Closes and frees an opened PDF document.
1567
1568	The resource store in the context associated with pdf_document
1569	is emptied.
1570	*/
1571	void
1572	pdf_drop_document(fz_context ctx, pdf_document doc)
1573	{
1574	fz_drop_document(ctx, &doc->super);
1575	}
1576
1577	pdf_document *
1578	pdf_keep_document(fz_context ctx, pdf_document doc)
1579	{
1580	return (pdf_document *)fz_keep_document(ctx, &doc->super);
1581	}
1582
1583	/*
1584	* compressed object streams
1585	*/
1586
1587	static pdf_xref_entry *
1588	pdf_load_obj_stm(fz_context ctx, pdf_document doc, int num, pdf_lexbuf buf, int* target)
1589	{
1590	fz_stream *stm = NULL;
1591	pdf_obj *objstm = NULL;
1592	int *numbuf = NULL;
1593	int64_t *ofsbuf = NULL;
1594
1595	pdf_obj *obj;
1596	int64_t first;
1597	int count;
1598	int i;
1599	pdf_token tok;
1600	pdf_xref_entry *ret_entry = NULL;
1601	int xref_len;
1602	int found;
1603
1604	fz_var(numbuf);
1605	fz_var(ofsbuf);
1606	fz_var(objstm);
1607	fz_var(stm);
1608
1609	fz_try(ctx)
1610	{
1611	objstm = pdf_load_object(ctx, doc, num);
1612
1613	if (pdf_obj_marked(ctx, objstm))
1614	fz_throw(ctx, FZ_ERROR_GENERIC, "recursive object stream lookup");
1615	}
1616	fz_catch(ctx)
1617	{
1618	pdf_drop_obj(ctx, objstm);
1619	fz_rethrow(ctx);
1620	}
1621
1622	fz_try(ctx)
1623	{
1624	pdf_mark_obj(ctx, objstm);
1625
1626	count = pdf_dict_get_int(ctx, objstm, PDF_NAME(N));
1627	first = pdf_dict_get_int(ctx, objstm, PDF_NAME(First));
1628
1629	if (count < `0` \|\| count > PDF_MAX_OBJECT_NUMBER)
1630	fz_throw(ctx, FZ_ERROR_GENERIC, "number of objects in object stream out of range");
1631	if (first < `0` \|\| first > PDF_MAX_OBJECT_NUMBER
1632	\|\| count < `0` \|\| count > PDF_MAX_OBJECT_NUMBER
1633	\|\| first + count - `1` > PDF_MAX_OBJECT_NUMBER)
1634	fz_throw(ctx, FZ_ERROR_GENERIC, "object stream object numbers are out of range");
1635
1636	numbuf = fz_calloc(ctx, count, sizeof(*numbuf));
1637	ofsbuf = fz_calloc(ctx, count, sizeof(*ofsbuf));
1638
1639	xref_len = pdf_xref_len(ctx, doc);
1640
1641	found = `0`;
1642
1643	stm = pdf_open_stream_number(ctx, doc, num);
1644	for (i = `0`; i < count; i++)
1645	{
1646	tok = pdf_lex(ctx, stm, buf);
1647	if (tok != PDF_TOK_INT)
1648	fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d 0 R)", num);
1649	numbuf[found] = buf->i;
1650
1651	tok = pdf_lex(ctx, stm, buf);
1652	if (tok != PDF_TOK_INT)
1653	fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d 0 R)", num);
1654	ofsbuf[found] = buf->i;
1655
1656	if (numbuf[found] <= `0` \|\| numbuf[found] >= xref_len)
1657	fz_warn(ctx, "object stream object out of range, skipping");
1658	else
1659	found++;
1660	}
1661
1662	for (i = `0`; i < found; i++)
1663	{
1664	pdf_xref_entry *entry;
1665
1666	fz_seek(ctx, stm, first + ofsbuf[i], SEEK_SET);
1667
1668	obj = pdf_parse_stm_obj(ctx, doc, stm, buf);
1669
1670	entry = pdf_get_xref_entry(ctx, doc, numbuf[i]);
1671
1672	pdf_set_obj_parent(ctx, obj, numbuf[i]);
1673
1674	if (entry->type == `'o'` && entry->ofs == num)
1675	{
1676	/ If we already have an entry for this object,*
1677	* we'd like to drop it and use the new one -
1678	* but this means that anyone currently holding
1679	* a pointer to the old one will be left with a
1680	* stale pointer. Instead, we drop the new one
1681	* and trust that the old one is correct. */
1682	if (entry->obj)
1683	{
1684	if (pdf_objcmp(ctx, entry->obj, obj))
1685	fz_warn(ctx, "Encountered new definition for object %d - keeping the original one", numbuf[i]);
1686	pdf_drop_obj(ctx, obj);
1687	}
1688	else
1689	{
1690	entry->obj = obj;
1691	fz_drop_buffer(ctx, entry->stm_buf);
1692	entry->stm_buf = NULL;
1693	}
1694	if (numbuf[i] == target)
1695	ret_entry = entry;
1696	}
1697	else
1698	{
1699	pdf_drop_obj(ctx, obj);
1700	}
1701	}
1702	}
1703	fz_always(ctx)
1704	{
1705	fz_drop_stream(ctx, stm);
1706	fz_free(ctx, ofsbuf);
1707	fz_free(ctx, numbuf);
1708	pdf_unmark_obj(ctx, objstm);
1709	pdf_drop_obj(ctx, objstm);
1710	}
1711	fz_catch(ctx)
1712	{
1713	fz_rethrow(ctx);
1714	}
1715	return ret_entry;
1716	}
1717
1718	/*
1719	* object loading
1720	*/
1721	static int
1722	pdf_obj_read(fz_context ctx, pdf_document doc, int64_t offset, int* nump, pdf_obj *page)
1723	{
1724	pdf_lexbuf *buf = &doc->lexbuf.base;
1725	int num, gen, tok;
1726	int64_t numofs, genofs, stmofs, tmpofs, newtmpofs;
1727	int xref_len;
1728	pdf_xref_entry *entry;
1729
1730	numofs = *offset;
1731	fz_seek(ctx, doc->file, numofs, SEEK_SET);
1732
1733	/ We expect to read 'num' here /
1734	tok = pdf_lex(ctx, doc->file, buf);
1735	genofs = fz_tell(ctx, doc->file);
1736	if (tok != PDF_TOK_INT)
1737	{
1738	/ Failed! /
1739	DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, *offset));
1740	*offset = genofs;
1741	return tok == PDF_TOK_EOF;
1742	}
1743	*nump = num = buf->i;
1744
1745	/ We expect to read 'gen' here /
1746	tok = pdf_lex(ctx, doc->file, buf);
1747	tmpofs = fz_tell(ctx, doc->file);
1748	if (tok != PDF_TOK_INT)
1749	{
1750	/ Failed! /
1751	DEBUGMESS((ctx, "skipping unexpected data after \"%d\" (tok=%d) at %d", num, tok, *offset));
1752	*offset = tmpofs;
1753	return tok == PDF_TOK_EOF;
1754	}
1755	gen = buf->i;
1756
1757	/ We expect to read 'obj' here /
1758	do
1759	{
1760	tmpofs = fz_tell(ctx, doc->file);
1761	tok = pdf_lex(ctx, doc->file, buf);
1762	if (tok == PDF_TOK_OBJ)
1763	break;
1764	if (tok != PDF_TOK_INT)
1765	{
1766	DEBUGMESS((ctx, "skipping unexpected data (tok=%d) at %d", tok, tmpofs));
1767	*offset = fz_tell(ctx, doc->file);
1768	return tok == PDF_TOK_EOF;
1769	}
1770	DEBUGMESS((ctx, "skipping unexpected int %d at %d", num, numofs));
1771	*nump = num = gen;
1772	numofs = genofs;
1773	gen = buf->i;
1774	genofs = tmpofs;
1775	}
1776	while (`1`);
1777
1778	/ Now we read the actual object /
1779	xref_len = pdf_xref_len(ctx, doc);
1780
1781	/ When we are reading a progressive file, we typically see:*
1782	* File Header
1783	* obj m (Linearization params)
1784	* xref #1 (refers to objects m-n)
1785	* obj m+1
1786	* ...
1787	* obj n
1788	* obj 1
1789	* ...
1790	* obj n-1
1791	* xref #2
1792	*
1793	* The linearisation params are read elsewhere, hence
1794	* whenever we read an object it should just go into the
1795	* previous xref.
1796	*/
1797	tok = pdf_repair_obj(ctx, doc, buf, &stmofs, NULL, NULL, NULL, page, &newtmpofs, NULL);
1798
1799	do / So we can break out of it /
1800	{
1801	if (num <= `0` \|\| num >= xref_len)
1802	{
1803	fz_warn(ctx, "Not a valid object number (%d %d obj)", num, gen);
1804	break;
1805	}
1806	if (gen != `0`)
1807	{
1808	fz_warn(ctx, "Unexpected non zero generation number in linearized file");
1809	}
1810	entry = pdf_get_populating_xref_entry(ctx, doc, num);
1811	if (entry->type != `0`)
1812	{
1813	DEBUGMESS((ctx, "Duplicate object found (%d %d obj)", num, gen));
1814	break;
1815	}
1816	if (page && *page)
1817	{
1818	DEBUGMESS((ctx, "Successfully read object %d @ %d - and found page %d!", num, numofs, doc->linear_page_num));
1819	if (!entry->obj)
1820	entry->obj = pdf_keep_obj(ctx, *page);
1821
1822	if (doc->linear_page_refs[doc->linear_page_num] == NULL)
1823	doc->linear_page_refs[doc->linear_page_num] = pdf_new_indirect(ctx, doc, num, gen);
1824	}
1825	else
1826	{
1827	DEBUGMESS((ctx, "Successfully read object %d @ %d", num, numofs));
1828	}
1829	entry->type = `'n'`;
1830	entry->gen = gen; // XXX: was 0
1831	entry->num = num;
1832	entry->ofs = numofs;
1833	entry->stm_ofs = stmofs;
1834	}
1835	while (`0`);
1836	if (page && *page)
1837	doc->linear_page_num++;
1838
1839	if (tok == PDF_TOK_ENDOBJ)
1840	{
1841	*offset = fz_tell(ctx, doc->file);
1842	}
1843	else
1844	{
1845	*offset = newtmpofs;
1846	}
1847	return `0`;
1848	}
1849
1850	static void
1851	pdf_load_hinted_page(fz_context ctx, pdf_document doc, int pagenum)
1852	{
1853	pdf_obj *page = NULL;
1854
1855	if (!doc->hints_loaded \|\| !doc->linear_page_refs)
1856	return;
1857
1858	if (doc->linear_page_refs[pagenum])
1859	return;
1860
1861	fz_var(page);
1862
1863	fz_try(ctx)
1864	{
1865	int num = doc->hint_page[pagenum].number;
1866	page = pdf_load_object(ctx, doc, num);
1867	if (pdf_name_eq(ctx, PDF_NAME(Page), pdf_dict_get(ctx, page, PDF_NAME(Type))))
1868	{
1869	/ We have found the page object! /
1870	DEBUGMESS((ctx, "LoadHintedPage pagenum=%d num=%d", pagenum, num));
1871	doc->linear_page_refs[pagenum] = pdf_new_indirect(ctx, doc, num, `0`);
1872	}
1873	}
1874	fz_always(ctx)
1875	pdf_drop_obj(ctx, page);
1876	fz_catch(ctx)
1877	{
1878	fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1879	/ Silently swallow the error and proceed as normal /
1880	}
1881	}
1882
1883	static int
1884	read_hinted_object(fz_context ctx, pdf_document doc, int num)
1885	{
1886	/ Try to find the object using our hint table. Find the closest*
1887	* object <= the one we want that has a hint and read forward from
1888	* there. */
1889	int expected = num;
1890	int curr_pos;
1891	int64_t start, offset;
1892
1893	while (doc->hint_obj_offsets[expected] == `0` && expected > `0`)
1894	expected--;
1895	if (expected != num)
1896	DEBUGMESS((ctx, "object %d is unhinted, will search forward from %d", expected, num));
1897	if (expected == `0`) / No hints found, just bail /
1898	return `0`;
1899
1900	curr_pos = fz_tell(ctx, doc->file);
1901	offset = doc->hint_obj_offsets[expected];
1902
1903	fz_var(expected);
1904
1905	fz_try(ctx)
1906	{
1907	int found;
1908
1909	/ Try to read forward from there /
1910	do
1911	{
1912	start = offset;
1913	DEBUGMESS((ctx, "Searching for object %d @ %d", expected, offset));
1914	pdf_obj_read(ctx, doc, &offset, &found, `0`);
1915	DEBUGMESS((ctx, "Found object %d - next will be @ %d", found, offset));
1916	if (found <= expected)
1917	{
1918	/ We found the right one (or one earlier than*
1919	* we expected). Update the hints. */
1920	doc->hint_obj_offsets[expected] = offset;
1921	doc->hint_obj_offsets[found] = start;
1922	doc->hint_obj_offsets[found+`1`] = offset;
1923	/ Retry with the next one /
1924	expected = found+`1`;
1925	}
1926	else
1927	{
1928	/ We found one later than we expected. /
1929	doc->hint_obj_offsets[expected] = `0`;
1930	doc->hint_obj_offsets[found] = start;
1931	doc->hint_obj_offsets[found+`1`] = offset;
1932	while (doc->hint_obj_offsets[expected] == `0` && expected > `0`)
1933	expected--;
1934	if (expected == `0`) / No hints found, we give up /
1935	break;
1936	}
1937	}
1938	while (found != num);
1939	}
1940	fz_always(ctx)
1941	{
1942	fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
1943	}
1944	fz_catch(ctx)
1945	{
1946	fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1947	/ FIXME: Currently we ignore the hint. Perhaps we should*
1948	* drop back to non-hinted operation here. */
1949	doc->hint_obj_offsets[expected] = `0`;
1950	fz_rethrow(ctx);
1951	}
1952	return expected != `0`;
1953	}
1954
1955	pdf_obj *
1956	pdf_load_unencrypted_object(fz_context ctx, pdf_document doc, int num)
1957	{
1958	pdf_xref_entry *x;
1959
1960	if (num <= `0` \|\| num >= pdf_xref_len(ctx, doc))
1961	fz_throw(ctx, FZ_ERROR_GENERIC, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
1962
1963	x = pdf_get_xref_entry(ctx, doc, num);
1964	if (x->type == `'n'`)
1965	{
1966	fz_seek(ctx, doc->file, x->ofs, SEEK_SET);
1967	return pdf_parse_ind_obj(ctx, doc, doc->file, &doc->lexbuf.base, NULL, NULL, NULL, NULL);
1968	}
1969	return NULL;
1970	}
1971
1972	pdf_xref_entry *
1973	pdf_cache_object(fz_context ctx, pdf_document doc, int num)
1974	{
1975	pdf_xref_entry *x;
1976	int rnum, rgen, try_repair;
1977
1978	fz_var(try_repair);
1979
1980	if (num <= `0` \|\| num >= pdf_xref_len(ctx, doc))
1981	fz_throw(ctx, FZ_ERROR_GENERIC, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
1982
1983	object_updated:
1984	try_repair = `0`;
1985	rnum = num;
1986
1987	x = pdf_get_xref_entry(ctx, doc, num);
1988
1989	if (x->obj != NULL)
1990	return x;
1991
1992	if (x->type == `'f'`)
1993	{
1994	x->obj = PDF_NULL;
1995	}
1996	else if (x->type == `'n'`)
1997	{
1998	fz_seek(ctx, doc->file, x->ofs, SEEK_SET);
1999
2000	fz_try(ctx)
2001	{
2002	x->obj = pdf_parse_ind_obj(ctx, doc, doc->file, &doc->lexbuf.base,
2003	&rnum, &rgen, &x->stm_ofs, &try_repair);
2004	}
2005	fz_catch(ctx)
2006	{
2007	if (!try_repair \|\| fz_caught(ctx) == FZ_ERROR_TRYLATER)
2008	fz_rethrow(ctx);
2009	}
2010
2011	if (!try_repair && rnum != num)
2012	{
2013	pdf_drop_obj(ctx, x->obj);
2014	x->type = `'f'`;
2015	x->ofs = -`1`;
2016	x->gen = `0`;
2017	x->num = `0`;
2018	x->stm_ofs = `0`;
2019	x->obj = NULL;
2020	try_repair = (doc->repair_attempted == `0`);
2021	}
2022
2023	if (try_repair)
2024	{
2025	fz_try(ctx)
2026	{
2027	pdf_repair_xref(ctx, doc);
2028	pdf_prime_xref_index(ctx, doc);
2029	pdf_repair_obj_stms(ctx, doc);
2030	}
2031	fz_catch(ctx)
2032	{
2033	fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2034	if (rnum == num)
2035	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot parse object (%d 0 R)", num);
2036	else
2037	fz_throw(ctx, FZ_ERROR_GENERIC, "found object (%d 0 R) instead of (%d 0 R)", rnum, num);
2038	}
2039	goto object_updated;
2040	}
2041
2042	if (doc->crypt)
2043	pdf_crypt_obj(ctx, doc->crypt, x->obj, x->num, x->gen);
2044	}
2045	else if (x->type == `'o'`)
2046	{
2047	if (!x->obj)
2048	{
2049	x = pdf_load_obj_stm(ctx, doc, x->ofs, &doc->lexbuf.base, num);
2050	if (x == NULL)
2051	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot load object stream containing object (%d 0 R)", num);
2052	if (!x->obj)
2053	fz_throw(ctx, FZ_ERROR_GENERIC, "object (%d 0 R) was not found in its object stream", num);
2054	}
2055	}
2056	else if (doc->hint_obj_offsets && read_hinted_object(ctx, doc, num))
2057	{
2058	goto object_updated;
2059	}
2060	else if (doc->file_length && doc->linear_pos < doc->file_length)
2061	{
2062	fz_throw(ctx, FZ_ERROR_TRYLATER, "cannot find object in xref (%d 0 R) - not loaded yet?", num);
2063	}
2064	else
2065	{
2066	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find object in xref (%d 0 R)", num);
2067	}
2068
2069	pdf_set_obj_parent(ctx, x->obj, num);
2070	return x;
2071	}
2072
2073	pdf_obj *
2074	pdf_load_object(fz_context ctx, pdf_document doc, int num)
2075	{
2076	pdf_xref_entry *entry = pdf_cache_object(ctx, doc, num);
2077	return pdf_keep_obj(ctx, entry->obj);
2078	}
2079
2080	pdf_obj *
2081	pdf_resolve_indirect(fz_context ctx, pdf_obj ref)
2082	{
2083	if (pdf_is_indirect(ctx, ref))
2084	{
2085	pdf_document *doc = pdf_get_indirect_document(ctx, ref);
2086	int num = pdf_to_num(ctx, ref);
2087	pdf_xref_entry *entry;
2088
2089	if (!doc)
2090	return NULL;
2091	if (num <= `0`)
2092	{
2093	fz_warn(ctx, "invalid indirect reference (%d 0 R)", num);
2094	return NULL;
2095	}
2096
2097	fz_try(ctx)
2098	entry = pdf_cache_object(ctx, doc, num);
2099	fz_catch(ctx)
2100	{
2101	fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2102	fz_warn(ctx, "cannot load object (%d 0 R) into cache", num);
2103	return NULL;
2104	}
2105
2106	ref = entry->obj;
2107	}
2108	return ref;
2109	}
2110
2111	pdf_obj *
2112	pdf_resolve_indirect_chain(fz_context ctx, pdf_obj ref)
2113	{
2114	int sanity = `10`;
2115
2116	while (pdf_is_indirect(ctx, ref))
2117	{
2118	if (--sanity == `0`)
2119	{
2120	fz_warn(ctx, "too many indirections (possible indirection cycle involving %d 0 R)", pdf_to_num(ctx, ref));
2121	return NULL;
2122	}
2123
2124	ref = pdf_resolve_indirect(ctx, ref);
2125	}
2126
2127	return ref;
2128	}
2129
2130	int
2131	pdf_count_objects(fz_context ctx, pdf_document doc)
2132	{
2133	return pdf_xref_len(ctx, doc);
2134	}
2135
2136	/*
2137	Allocate a slot in the xref table and return a fresh unused object number.
2138	*/
2139	int
2140	pdf_create_object(fz_context ctx, pdf_document doc)
2141	{
2142	/ TODO: reuse free object slots by properly linking free object chains in the ofs field /
2143	pdf_xref_entry *entry;
2144	int num = pdf_xref_len(ctx, doc);
2145
2146	if (num > PDF_MAX_OBJECT_NUMBER)
2147	fz_throw(ctx, FZ_ERROR_GENERIC, "too many objects stored in pdf");
2148
2149	entry = pdf_get_incremental_xref_entry(ctx, doc, num);
2150	entry->type = `'f'`;
2151	entry->ofs = -`1`;
2152	entry->gen = `0`;
2153	entry->num = num;
2154	entry->stm_ofs = `0`;
2155	entry->stm_buf = NULL;
2156	entry->obj = NULL;
2157	return num;
2158	}
2159
2160	/*
2161	Remove object from xref table, marking the slot as free.
2162	*/
2163	void
2164	pdf_delete_object(fz_context ctx, pdf_document doc, int num)
2165	{
2166	pdf_xref_entry *x;
2167
2168	if (num <= `0` \|\| num >= pdf_xref_len(ctx, doc))
2169	{
2170	fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2171	return;
2172	}
2173
2174	x = pdf_get_incremental_xref_entry(ctx, doc, num);
2175
2176	fz_drop_buffer(ctx, x->stm_buf);
2177	pdf_drop_obj(ctx, x->obj);
2178
2179	x->type = `'f'`;
2180	x->ofs = `0`;
2181	x->gen += `1`;
2182	x->num = `0`;
2183	x->stm_ofs = `0`;
2184	x->stm_buf = NULL;
2185	x->obj = NULL;
2186	}
2187
2188	/*
2189	Replace object in xref table with the passed in object.
2190	*/
2191	void
2192	pdf_update_object(fz_context ctx, pdf_document doc, int num, pdf_obj *newobj)
2193	{
2194	pdf_xref_entry *x;
2195
2196	if (num <= `0` \|\| num >= pdf_xref_len(ctx, doc))
2197	{
2198	fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2199	return;
2200	}
2201
2202	if (!newobj)
2203	{
2204	pdf_delete_object(ctx, doc, num);
2205	return;
2206	}
2207
2208	x = pdf_get_incremental_xref_entry(ctx, doc, num);
2209
2210	pdf_drop_obj(ctx, x->obj);
2211
2212	x->type = `'n'`;
2213	x->ofs = `0`;
2214	x->obj = pdf_keep_obj(ctx, newobj);
2215
2216	pdf_set_obj_parent(ctx, newobj, num);
2217	}
2218
2219	/*
2220	Replace stream contents for object in xref table with the passed in buffer.
2221
2222	The buffer contents must match the /Filter setting if 'compressed' is true.
2223	If 'compressed' is false, the /Filter and /DecodeParms entries are deleted.
2224	The /Length entry is updated.
2225	*/
2226	void
2227	pdf_update_stream(fz_context ctx, pdf_document doc, pdf_obj obj, fz_buffer newbuf, int compressed)
2228	{
2229	int num;
2230	pdf_xref_entry *x;
2231
2232	if (pdf_is_indirect(ctx, obj))
2233	num = pdf_to_num(ctx, obj);
2234	else
2235	num = pdf_obj_parent_num(ctx, obj);
2236	if (num <= `0` \|\| num >= pdf_xref_len(ctx, doc))
2237	{
2238	fz_warn(ctx, "object out of range (%d 0 R); xref size %d", num, pdf_xref_len(ctx, doc));
2239	return;
2240	}
2241
2242	x = pdf_get_xref_entry(ctx, doc, num);
2243
2244	fz_drop_buffer(ctx, x->stm_buf);
2245	x->stm_buf = fz_keep_buffer(ctx, newbuf);
2246
2247	pdf_dict_put_int(ctx, obj, PDF_NAME(Length), (int)fz_buffer_storage(ctx, newbuf, NULL));
2248	if (!compressed)
2249	{
2250	pdf_dict_del(ctx, obj, PDF_NAME(Filter));
2251	pdf_dict_del(ctx, obj, PDF_NAME(DecodeParms));
2252	}
2253	}
2254
2255	int
2256	pdf_lookup_metadata(fz_context ctx, pdf_document doc, const char key, char* buf, int* size)
2257	{
2258	if (!strcmp(key, "format"))
2259	return (int)fz_snprintf(buf, size, "PDF %d.%d", doc->version/`10`, doc->version % `10`);
2260
2261	if (!strcmp(key, "encryption"))
2262	{
2263	if (doc->crypt)
2264	return (int)fz_snprintf(buf, size, "Standard V%d R%d %d-bit %s",
2265	pdf_crypt_version(ctx, doc->crypt),
2266	pdf_crypt_revision(ctx, doc->crypt),
2267	pdf_crypt_length(ctx, doc->crypt),
2268	pdf_crypt_method(ctx, doc->crypt));
2269	else
2270	return (int)fz_strlcpy(buf, "None", size);
2271	}
2272
2273	if (strstr(key, "info:") == key)
2274	{
2275	pdf_obj *info;
2276	const char *s;
2277	int n;
2278
2279	info = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
2280	if (!info)
2281	return -`1`;
2282
2283	info = pdf_dict_gets(ctx, info, key + `5`);
2284	if (!info)
2285	return -`1`;
2286
2287	s = pdf_to_text_string(ctx, info);
2288	n = (int)fz_strlcpy(buf, s, size);
2289	return n;
2290	}
2291
2292	return -`1`;
2293	}
2294
2295	/*
2296	Initializers for the fz_document interface.
2297
2298	The functions are split across two files to allow calls to a
2299	version of the constructor that does not link in the interpreter.
2300	The interpreter references the built-in font and cmap resources
2301	which are quite big. Not linking those into the mutool binary
2302	saves roughly 6MB of space.
2303	*/
2304
2305	static pdf_document *
2306	pdf_new_document(fz_context ctx, fz_stream file)
2307	{
2308	pdf_document *doc = fz_new_derived_document(ctx, pdf_document);
2309
2310	doc->super.drop_document = (fz_document_drop_fn*)pdf_drop_document_imp;
2311	doc->super.get_output_intent = (fz_document_output_intent_fn*)pdf_document_output_intent;
2312	doc->super.needs_password = (fz_document_needs_password_fn*)pdf_needs_password;
2313	doc->super.authenticate_password = (fz_document_authenticate_password_fn*)pdf_authenticate_password;
2314	doc->super.has_permission = (fz_document_has_permission_fn*)pdf_has_permission;
2315	doc->super.load_outline = (fz_document_load_outline_fn*)pdf_load_outline;
2316	doc->super.resolve_link = (fz_document_resolve_link_fn*)pdf_resolve_link;
2317	doc->super.count_pages = (fz_document_count_pages_fn*)pdf_count_pages;
2318	doc->super.load_page = (fz_document_load_page_fn*)pdf_load_page;
2319	doc->super.lookup_metadata = (fz_document_lookup_metadata_fn*)pdf_lookup_metadata;
2320
2321	pdf_lexbuf_init(ctx, &doc->lexbuf.base, PDF_LEXBUF_LARGE);
2322	doc->file = fz_keep_stream(ctx, file);
2323
2324	return doc;
2325	}
2326
2327	/*
2328	Opens a PDF document.
2329
2330	Same as pdf_open_document, but takes a stream instead of a
2331	filename to locate the PDF document to open. Increments the
2332	reference count of the stream. See fz_open_file,
2333	fz_open_file_w or fz_open_fd for opening a stream, and
2334	fz_drop_stream for closing an open stream.
2335	*/
2336	pdf_document *
2337	pdf_open_document_with_stream(fz_context ctx, fz_stream file)
2338	{
2339	pdf_document *doc = pdf_new_document(ctx, file);
2340	fz_try(ctx)
2341	{
2342	pdf_init_document(ctx, doc);
2343	}
2344	fz_catch(ctx)
2345	{
2346	int caught = fz_caught(ctx);
2347	fz_drop_document(ctx, &doc->super);
2348	fz_throw(ctx, caught, "Failed to open doc from stream");
2349	}
2350	return doc;
2351	}
2352
2353	/*
2354	Open a PDF document.
2355
2356	Open a PDF document by reading its cross reference table, so
2357	MuPDF can locate PDF objects inside the file. Upon an broken
2358	cross reference table or other parse errors MuPDF will restart
2359	parsing the file from the beginning to try to rebuild a
2360	(hopefully correct) cross reference table to allow further
2361	processing of the file.
2362
2363	The returned pdf_document should be used when calling most
2364	other PDF functions. Note that it wraps the context, so those
2365	functions implicitly get access to the global state in
2366	context.
2367
2368	filename: a path to a file as it would be given to open(2).
2369	*/
2370	pdf_document *
2371	pdf_open_document(fz_context ctx, const* char *filename)
2372	{
2373	fz_stream *file = NULL;
2374	pdf_document *doc = NULL;
2375
2376	fz_var(file);
2377	fz_var(doc);
2378
2379	fz_try(ctx)
2380	{
2381	file = fz_open_file(ctx, filename);
2382	doc = pdf_new_document(ctx, file);
2383	pdf_init_document(ctx, doc);
2384	}
2385	fz_always(ctx)
2386	{
2387	fz_drop_stream(ctx, file);
2388	}
2389	fz_catch(ctx)
2390	{
2391	fz_drop_document(ctx, &doc->super);
2392	fz_rethrow(ctx);
2393	}
2394	return doc;
2395	}
2396
2397	static void
2398	pdf_load_hints(fz_context ctx, pdf_document doc, int objnum)
2399	{
2400	fz_stream *stream = NULL;
2401	pdf_obj *dict;
2402
2403	fz_var(stream);
2404	fz_var(dict);
2405
2406	fz_try(ctx)
2407	{
2408	int i, j, least_num_page_objs, page_obj_num_bits;
2409	int least_page_len, page_len_num_bits, shared_hint_offset;
2410	/ int least_page_offset, page_offset_num_bits; /
2411	/ int least_content_stream_len, content_stream_len_num_bits; /
2412	int num_shared_obj_num_bits, shared_obj_num_bits;
2413	/ int numerator_bits, denominator_bits; /
2414	int shared;
2415	int shared_obj_num, shared_obj_offset, shared_obj_count_page1;
2416	int shared_obj_count_total;
2417	int least_shared_group_len, shared_group_len_num_bits;
2418	int max_object_num = pdf_xref_len(ctx, doc);
2419
2420	stream = pdf_open_stream_number(ctx, doc, objnum);
2421	dict = pdf_get_xref_entry(ctx, doc, objnum)->obj;
2422	if (dict == NULL \|\| !pdf_is_dict(ctx, dict))
2423	fz_throw(ctx, FZ_ERROR_GENERIC, "malformed hint object");
2424
2425	shared_hint_offset = pdf_dict_get_int(ctx, dict, PDF_NAME(S));
2426
2427	/ Malloc the structures (use realloc to cope with the fact we*
2428	* may try this several times before enough data is loaded) */
2429	doc->hint_page = fz_realloc_array(ctx, doc->hint_page, doc->linear_page_count+`1`, pdf_hint_page);
2430	memset(doc->hint_page, `0`, sizeof(doc->hint_page) (doc->linear_page_count+`1`));
2431	doc->hint_obj_offsets = fz_realloc_array(ctx, doc->hint_obj_offsets, max_object_num, int64_t);
2432	memset(doc->hint_obj_offsets, `0`, sizeof(doc->hint_obj_offsets) max_object_num);
2433	doc->hint_obj_offsets_max = max_object_num;
2434
2435	/ Read the page object hints table: Header first /
2436	least_num_page_objs = fz_read_bits(ctx, stream, `32`);
2437	/ The following is sometimes a lie, but we read this version,*
2438	* as other table values are built from it. In
2439	* pdf_reference17.pdf, this points to 2 objects before the
2440	* first pages page object. */
2441	doc->hint_page[`0`].offset = fz_read_bits(ctx, stream, `32`);
2442	if (doc->hint_page[`0`].offset > doc->hint_object_offset)
2443	doc->hint_page[`0`].offset += doc->hint_object_length;
2444	page_obj_num_bits = fz_read_bits(ctx, stream, `16`);
2445	least_page_len = fz_read_bits(ctx, stream, `32`);
2446	page_len_num_bits = fz_read_bits(ctx, stream, `16`);
2447	/ least_page_offset = / (void) fz_read_bits(ctx, stream, `32`);
2448	/ page_offset_num_bits = / (void) fz_read_bits(ctx, stream, `16`);
2449	/ least_content_stream_len = / (void) fz_read_bits(ctx, stream, `32`);
2450	/ content_stream_len_num_bits = / (void) fz_read_bits(ctx, stream, `16`);
2451	num_shared_obj_num_bits = fz_read_bits(ctx, stream, `16`);
2452	shared_obj_num_bits = fz_read_bits(ctx, stream, `16`);
2453	/ numerator_bits = / (void) fz_read_bits(ctx, stream, `16`);
2454	/ denominator_bits = / (void) fz_read_bits(ctx, stream, `16`);
2455
2456	/ Item 1: Page object numbers /
2457	doc->hint_page[`0`].number = doc->linear_page1_obj_num;
2458	/ We don't care about the number of objects in the first page /
2459	(void)fz_read_bits(ctx, stream, page_obj_num_bits);
2460	j = `1`;
2461	for (i = `1`; i < doc->linear_page_count; i++)
2462	{
2463	int delta_page_objs = fz_read_bits(ctx, stream, page_obj_num_bits);
2464
2465	doc->hint_page[i].number = j;
2466	j += least_num_page_objs + delta_page_objs;
2467	}
2468	doc->hint_page[i].number = j; / Not a real page object /
2469	fz_sync_bits(ctx, stream);
2470	/ Item 2: Page lengths /
2471	j = doc->hint_page[`0`].offset;
2472	for (i = `0`; i < doc->linear_page_count; i++)
2473	{
2474	int delta_page_len = fz_read_bits(ctx, stream, page_len_num_bits);
2475	int old = j;
2476
2477	doc->hint_page[i].offset = j;
2478	j += least_page_len + delta_page_len;
2479	if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
2480	j += doc->hint_object_length;
2481	}
2482	doc->hint_page[i].offset = j;
2483	fz_sync_bits(ctx, stream);
2484	/ Item 3: Shared references /
2485	shared = `0`;
2486	for (i = `0`; i < doc->linear_page_count; i++)
2487	{
2488	int num_shared_objs = fz_read_bits(ctx, stream, num_shared_obj_num_bits);
2489	doc->hint_page[i].index = shared;
2490	shared += num_shared_objs;
2491	}
2492	doc->hint_page[i].index = shared;
2493	doc->hint_shared_ref = fz_realloc_array(ctx, doc->hint_shared_ref, shared, int);
2494	memset(doc->hint_shared_ref, `0`, sizeof(doc->hint_shared_ref) shared);
2495	fz_sync_bits(ctx, stream);
2496	/ Item 4: Shared references /
2497	for (i = `0`; i < shared; i++)
2498	{
2499	int ref = fz_read_bits(ctx, stream, shared_obj_num_bits);
2500	doc->hint_shared_ref[i] = ref;
2501	}
2502	/ Skip items 5,6,7 as we don't use them /
2503
2504	fz_seek(ctx, stream, shared_hint_offset, SEEK_SET);
2505
2506	/ Read the shared object hints table: Header first /
2507	shared_obj_num = fz_read_bits(ctx, stream, `32`);
2508	shared_obj_offset = fz_read_bits(ctx, stream, `32`);
2509	if (shared_obj_offset > doc->hint_object_offset)
2510	shared_obj_offset += doc->hint_object_length;
2511	shared_obj_count_page1 = fz_read_bits(ctx, stream, `32`);
2512	shared_obj_count_total = fz_read_bits(ctx, stream, `32`);
2513	shared_obj_num_bits = fz_read_bits(ctx, stream, `16`);
2514	least_shared_group_len = fz_read_bits(ctx, stream, `32`);
2515	shared_group_len_num_bits = fz_read_bits(ctx, stream, `16`);
2516
2517	/ Sanity check the references in Item 4 above to ensure we*
2518	* don't access out of range with malicious files. */
2519	for (i = `0`; i < shared; i++)
2520	{
2521	if (doc->hint_shared_ref[i] >= shared_obj_count_total)
2522	{
2523	fz_throw(ctx, FZ_ERROR_GENERIC, "malformed hint stream (shared refs)");
2524	}
2525	}
2526
2527	doc->hint_shared = fz_realloc_array(ctx, doc->hint_shared, shared_obj_count_total+`1`, pdf_hint_shared);
2528	memset(doc->hint_shared, `0`, sizeof(doc->hint_shared) (shared_obj_count_total+`1`));
2529
2530	/ Item 1: Shared references /
2531	j = doc->hint_page[`0`].offset;
2532	for (i = `0`; i < shared_obj_count_page1; i++)
2533	{
2534	int off = fz_read_bits(ctx, stream, shared_group_len_num_bits);
2535	int old = j;
2536	doc->hint_shared[i].offset = j;
2537	j += off + least_shared_group_len;
2538	if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
2539	j += doc->hint_object_length;
2540	}
2541	/ FIXME: We would have problems recreating the length of the*
2542	* last page 1 shared reference group. But we'll never need
2543	* to, so ignore it. */
2544	j = shared_obj_offset;
2545	for (; i < shared_obj_count_total; i++)
2546	{
2547	int off = fz_read_bits(ctx, stream, shared_group_len_num_bits);
2548	int old = j;
2549	doc->hint_shared[i].offset = j;
2550	j += off + least_shared_group_len;
2551	if (old <= doc->hint_object_offset && j > doc->hint_object_offset)
2552	j += doc->hint_object_length;
2553	}
2554	doc->hint_shared[i].offset = j;
2555	fz_sync_bits(ctx, stream);
2556	/ Item 2: Signature flags: read these just so we can skip /
2557	for (i = `0`; i < shared_obj_count_total; i++)
2558	{
2559	doc->hint_shared[i].number = fz_read_bits(ctx, stream, `1`);
2560	}
2561	fz_sync_bits(ctx, stream);
2562	/ Item 3: Signatures: just skip /
2563	for (i = `0`; i < shared_obj_count_total; i++)
2564	{
2565	if (doc->hint_shared[i].number)
2566	{
2567	(void) fz_read_bits(ctx, stream, `128`);
2568	}
2569	}
2570	fz_sync_bits(ctx, stream);
2571	/ Item 4: Shared object object numbers /
2572	j = doc->linear_page1_obj_num; / FIXME: This is a lie! /
2573	for (i = `0`; i < shared_obj_count_page1; i++)
2574	{
2575	doc->hint_shared[i].number = j;
2576	j += fz_read_bits(ctx, stream, shared_obj_num_bits) + `1`;
2577	}
2578	j = shared_obj_num;
2579	for (; i < shared_obj_count_total; i++)
2580	{
2581	doc->hint_shared[i].number = j;
2582	j += fz_read_bits(ctx, stream, shared_obj_num_bits) + `1`;
2583	}
2584	doc->hint_shared[i].number = j;
2585
2586	/ Now, actually use the data we have gathered. /
2587	for (i = `0` /shared_obj_count_page1/; i < shared_obj_count_total; i++)
2588	{
2589	doc->hint_obj_offsets[doc->hint_shared[i].number] = doc->hint_shared[i].offset;
2590	}
2591	for (i = `0`; i < doc->linear_page_count; i++)
2592	{
2593	doc->hint_obj_offsets[doc->hint_page[i].number] = doc->hint_page[i].offset;
2594	}
2595	}
2596	fz_always(ctx)
2597	{
2598	fz_drop_stream(ctx, stream);
2599	}
2600	fz_catch(ctx)
2601	{
2602	fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
2603	/ Don't try to load hints again /
2604	doc->hints_loaded = `1`;
2605	/ We won't use the linearized object anymore. /
2606	doc->file_reading_linearly = `0`;
2607	/ Any other error becomes a TRYLATER /
2608	fz_throw(ctx, FZ_ERROR_TRYLATER, "malformed hints object");
2609	}
2610	doc->hints_loaded = `1`;
2611	}
2612
2613	static void
2614	pdf_load_hint_object(fz_context ctx, pdf_document doc)
2615	{
2616	pdf_lexbuf *buf = &doc->lexbuf.base;
2617	int64_t curr_pos;
2618
2619	curr_pos = fz_tell(ctx, doc->file);
2620	fz_seek(ctx, doc->file, doc->hint_object_offset, SEEK_SET);
2621	fz_try(ctx)
2622	{
2623	while (`1`)
2624	{
2625	pdf_obj *page = NULL;
2626	int64_t tmpofs;
2627	int num, tok;
2628
2629	tok = pdf_lex(ctx, doc->file, buf);
2630	if (tok != PDF_TOK_INT)
2631	break;
2632	num = buf->i;
2633	tok = pdf_lex(ctx, doc->file, buf);
2634	if (tok != PDF_TOK_INT)
2635	break;
2636	/ Ignore gen = buf->i /
2637	tok = pdf_lex(ctx, doc->file, buf);
2638	if (tok != PDF_TOK_OBJ)
2639	break;
2640	(void)pdf_repair_obj(ctx, doc, buf, &tmpofs, NULL, NULL, NULL, &page, &tmpofs, NULL);
2641	pdf_load_hints(ctx, doc, num);
2642	}
2643	}
2644	fz_always(ctx)
2645	{
2646	fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
2647	}
2648	fz_catch(ctx)
2649	{
2650	fz_rethrow(ctx);
2651	}
2652	}
2653
2654	pdf_obj pdf_progressive_advance(fz_context ctx, pdf_document doc, int* pagenum)
2655	{
2656	pdf_lexbuf *buf = &doc->lexbuf.base;
2657	int curr_pos;
2658	pdf_obj *page = NULL;
2659
2660	pdf_load_hinted_page(ctx, doc, pagenum);
2661
2662	if (pagenum < `0` \|\| pagenum >= doc->linear_page_count)
2663	fz_throw(ctx, FZ_ERROR_GENERIC, "page load out of range (%d of %d)", pagenum, doc->linear_page_count);
2664
2665	if (doc->linear_pos == doc->file_length)
2666	return doc->linear_page_refs[pagenum];
2667
2668	/ Only load hints once, and then only after we have got page 0 /
2669	if (pagenum > `0` && !doc->hints_loaded && doc->hint_object_offset > `0` && doc->linear_pos >= doc->hint_object_offset)
2670	{
2671	/ Found hint object /
2672	pdf_load_hint_object(ctx, doc);
2673	}
2674
2675	DEBUGMESS((ctx, "continuing to try to advance from %d", doc->linear_pos));
2676	curr_pos = fz_tell(ctx, doc->file);
2677
2678	fz_var(page);
2679
2680	fz_try(ctx)
2681	{
2682	int eof;
2683	do
2684	{
2685	int num;
2686	eof = pdf_obj_read(ctx, doc, &doc->linear_pos, &num, &page);
2687	pdf_drop_obj(ctx, page);
2688	page = NULL;
2689	}
2690	while (!eof);
2691
2692	{
2693	pdf_obj *catalog;
2694	pdf_obj *pages;
2695	doc->linear_pos = doc->file_length;
2696	pdf_load_xref(ctx, doc, buf);
2697	catalog = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root));
2698	pages = pdf_dict_get(ctx, catalog, PDF_NAME(Pages));
2699
2700	if (!pdf_is_dict(ctx, pages))
2701	fz_throw(ctx, FZ_ERROR_GENERIC, "missing page tree");
2702	break;
2703	}
2704	}
2705	fz_always(ctx)
2706	{
2707	fz_seek(ctx, doc->file, curr_pos, SEEK_SET);
2708	}
2709	fz_catch(ctx)
2710	{
2711	pdf_drop_obj(ctx, page);
2712	if (fz_caught(ctx) == FZ_ERROR_TRYLATER)
2713	{
2714	if (doc->linear_page_refs[pagenum] == NULL)
2715	{
2716	/ Still not got a page /
2717	fz_rethrow(ctx);
2718	}
2719	}
2720	else
2721	fz_rethrow(ctx);
2722	}
2723
2724	return doc->linear_page_refs[pagenum];
2725	}
2726
2727	/*
2728	Down-cast generic fitz objects into pdf specific variants.
2729	Returns NULL if the objects are not from a PDF document.
2730	*/
2731	pdf_document pdf_document_from_fz_document(fz_context ctx, fz_document *ptr)
2732	{
2733	return (pdf_document )((ptr && ptr->count_pages == (fz_document_count_pages_fn)pdf_count_pages) ? ptr : NULL);
2734	}
2735
2736	pdf_page pdf_page_from_fz_page(fz_context ctx, fz_page *ptr)
2737	{
2738	return (pdf_page )((ptr && ptr->bound_page == (fz_page_bound_page_fn)pdf_bound_page) ? ptr : NULL);
2739	}
2740
2741	/*
2742	down-cast a fz_document to a pdf_document.
2743	Returns NULL if underlying document is not PDF
2744	*/
2745	pdf_document pdf_specifics(fz_context ctx, fz_document *doc)
2746	{
2747	return pdf_document_from_fz_document(ctx, doc);
2748	}
2749
2750	pdf_obj *
2751	pdf_add_object(fz_context ctx, pdf_document doc, pdf_obj *obj)
2752	{
2753	pdf_document *orig_doc;
2754	int num;
2755
2756	orig_doc = pdf_get_bound_document(ctx, obj);
2757	if (orig_doc && orig_doc != doc)
2758	fz_throw(ctx, FZ_ERROR_GENERIC, "tried to add an object belonging to a different document");
2759	if (pdf_is_indirect(ctx, obj))
2760	return pdf_keep_obj(ctx, obj);
2761	num = pdf_create_object(ctx, doc);
2762	pdf_update_object(ctx, doc, num, obj);
2763	return pdf_new_indirect(ctx, doc, num, `0`);
2764	}
2765
2766	pdf_obj *
2767	pdf_add_object_drop(fz_context ctx, pdf_document doc, pdf_obj *obj)
2768	{
2769	pdf_obj *ind = NULL;
2770	fz_try(ctx)
2771	ind = pdf_add_object(ctx, doc, obj);
2772	fz_always(ctx)
2773	pdf_drop_obj(ctx, obj);
2774	fz_catch(ctx)
2775	fz_rethrow(ctx);
2776	return ind;
2777	}
2778
2779	pdf_obj *
2780	pdf_add_new_dict(fz_context ctx, pdf_document doc, int initial)
2781	{
2782	return pdf_add_object_drop(ctx, doc, pdf_new_dict(ctx, doc, initial));
2783	}
2784
2785	pdf_obj *
2786	pdf_add_new_array(fz_context ctx, pdf_document doc, int initial)
2787	{
2788	return pdf_add_object_drop(ctx, doc, pdf_new_array(ctx, doc, initial));
2789	}
2790
2791	pdf_obj *
2792	pdf_add_stream(fz_context ctx, pdf_document doc, fz_buffer buf, pdf_obj obj, int compressed)
2793	{
2794	pdf_obj *ind;
2795	if (!obj)
2796	ind = pdf_add_new_dict(ctx, doc, `4`);
2797	else
2798	ind = pdf_add_object(ctx, doc, obj);
2799	fz_try(ctx)
2800	pdf_update_stream(ctx, doc, ind, buf, compressed);
2801	fz_catch(ctx)
2802	{
2803	pdf_drop_obj(ctx, ind);
2804	fz_rethrow(ctx);
2805	}
2806	return ind;
2807	}
2808
2809	pdf_document pdf_create_document(fz_context ctx)
2810	{
2811	pdf_document *doc;
2812	pdf_obj *root;
2813	pdf_obj *pages;
2814	pdf_obj *trailer = NULL;
2815
2816	fz_var(trailer);
2817
2818	doc = pdf_new_document(ctx, NULL);
2819	fz_try(ctx)
2820	{
2821	doc->version = `14`;
2822	doc->file_size = `0`;
2823	doc->startxref = `0`;
2824	doc->num_xref_sections = `0`;
2825	doc->num_incremental_sections = `0`;
2826	doc->xref_base = `0`;
2827	doc->disallow_new_increments = `0`;
2828	pdf_get_populating_xref_entry(ctx, doc, `0`);
2829
2830	trailer = pdf_new_dict(ctx, doc, `2`);
2831	pdf_dict_put_int(ctx, trailer, PDF_NAME(Size), `3`);
2832	pdf_dict_put_drop(ctx, trailer, PDF_NAME(Root), root = pdf_add_new_dict(ctx, doc, `2`));
2833	pdf_dict_put(ctx, root, PDF_NAME(Type), PDF_NAME(Catalog));
2834	pdf_dict_put_drop(ctx, root, PDF_NAME(Pages), pages = pdf_add_new_dict(ctx, doc, `3`));
2835	pdf_dict_put(ctx, pages, PDF_NAME(Type), PDF_NAME(Pages));
2836	pdf_dict_put_int(ctx, pages, PDF_NAME(Count), `0`);
2837	pdf_dict_put_array(ctx, pages, PDF_NAME(Kids), `1`);
2838
2839	/ Set the trailer of the final xref section. /
2840	doc->xref_sections[`0`].trailer = trailer;
2841	}
2842	fz_catch(ctx)
2843	{
2844	pdf_drop_obj(ctx, trailer);
2845	fz_drop_document(ctx, &doc->super);
2846	fz_rethrow(ctx);
2847	}
2848	return doc;
2849	}
2850
2851	static const char *pdf_extensions[] =
2852	{
2853	"pdf",
2854	"pclm",
2855	"ai",
2856	NULL
2857	};
2858
2859	static const char *pdf_mimetypes[] =
2860	{
2861	"application/pdf",
2862	"application/PCLm",
2863	NULL
2864	};
2865
2866	fz_document_handler pdf_document_handler =
2867	{
2868	NULL,
2869	(fz_document_open_fn*)pdf_open_document,
2870	(fz_document_open_with_stream_fn*)pdf_open_document_with_stream,
2871	pdf_extensions,
2872	pdf_mimetypes
2873	};
2874
2875	void pdf_mark_xref(fz_context ctx, pdf_document doc)
2876	{
2877	int x, e;
2878
2879	for (x = `0`; x < doc->num_xref_sections; x++)
2880	{
2881	pdf_xref *xref = &doc->xref_sections[x];
2882	pdf_xref_subsec *sub;
2883
2884	for (sub = xref->subsec; sub != NULL; sub = sub->next)
2885	{
2886	for (e = `0`; e < sub->len; e++)
2887	{
2888	pdf_xref_entry *entry = &sub->table[e];
2889	if (entry->obj)
2890	{
2891	entry->marked = `1`;
2892	}
2893	}
2894	}
2895	}
2896	}
2897
2898	void pdf_clear_xref(fz_context ctx, pdf_document doc)
2899	{
2900	int x, e;
2901
2902	for (x = `0`; x < doc->num_xref_sections; x++)
2903	{
2904	pdf_xref *xref = &doc->xref_sections[x];
2905	pdf_xref_subsec *sub;
2906
2907	for (sub = xref->subsec; sub != NULL; sub = sub->next)
2908	{
2909	for (e = `0`; e < sub->len; e++)
2910	{
2911	pdf_xref_entry *entry = &sub->table[e];
2912	/ We cannot drop objects if the stream*
2913	* buffer has been updated */
2914	if (entry->obj != NULL && entry->stm_buf == NULL)
2915	{
2916	if (pdf_obj_refs(ctx, entry->obj) == `1`)
2917	{
2918	pdf_drop_obj(ctx, entry->obj);
2919	entry->obj = NULL;
2920	}
2921	}
2922	}
2923	}
2924	}
2925	}
2926
2927	void pdf_clear_xref_to_mark(fz_context ctx, pdf_document doc)
2928	{
2929	int x, e;
2930
2931	for (x = `0`; x < doc->num_xref_sections; x++)
2932	{
2933	pdf_xref *xref = &doc->xref_sections[x];
2934	pdf_xref_subsec *sub;
2935
2936	for (sub = xref->subsec; sub != NULL; sub = sub->next)
2937	{
2938	for (e = `0`; e < sub->len; e++)
2939	{
2940	pdf_xref_entry *entry = &sub->table[e];
2941
2942	/ We cannot drop objects if the stream buffer has*
2943	* been updated */
2944	if (entry->obj != NULL && entry->stm_buf == NULL)
2945	{
2946	if (!entry->marked && pdf_obj_refs(ctx, entry->obj) == `1`)
2947	{
2948	pdf_drop_obj(ctx, entry->obj);
2949	entry->obj = NULL;
2950	}
2951	}
2952	}
2953	}
2954	}
2955	}
2956

Browse the source code of MuPDF/source/pdf/pdf-xref.c