pdf-repair.c source code [MuPDF/source/pdf/pdf-repair.c]

1	#include "mupdf/fitz.h"
2	#include "mupdf/pdf.h"
3
4	#include <string.h>
5
6	/ Scan file for objects and reconstruct xref table /
7
8	struct entry
9	{
10	int num;
11	int gen;
12	int64_t ofs;
13	int64_t stm_ofs;
14	int stm_len;
15	};
16
17	static void add_root(fz_context ctx, pdf_obj obj, pdf_obj **roots, int* num_roots, int* *max_roots)
18	{
19	if (num_roots == max_roots)
20	{
21	int new_max_roots = max_roots `2`;
22	if (new_max_roots == `0`)
23	new_max_roots = `4`;
24	roots = fz_realloc_array(ctx, roots, new_max_roots, pdf_obj*);
25	*max_roots = new_max_roots;
26	}
27	(roots)[(num_roots)++] = pdf_keep_obj(ctx, obj);
28	}
29
30	int
31	pdf_repair_obj(fz_context ctx, pdf_document doc, pdf_lexbuf buf, int64_t stmofsp, int stmlenp, pdf_obj encrypt, pdf_obj id, pdf_obj page, int64_t tmpofs, pdf_obj **root)
32	{
33	fz_stream *file = doc->file;
34	pdf_token tok;
35	int stm_len;
36
37	*stmofsp = `0`;
38	if (stmlenp)
39	*stmlenp = -`1`;
40
41	stm_len = `0`;
42
43	/ On entry to this function, we know that we've just seen*
44	* '<int> <int> obj'. We expect the next thing we see to be a
45	* pdf object. Regardless of the type of thing we meet next
46	* we only need to fully parse it if it is a dictionary. */
47	tok = pdf_lex(ctx, file, buf);
48
49	if (tok == PDF_TOK_OPEN_DICT)
50	{
51	pdf_obj obj, dict = NULL;
52
53	fz_try(ctx)
54	{
55	dict = pdf_parse_dict(ctx, doc, file, buf);
56	}
57	fz_catch(ctx)
58	{
59	fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
60	/ Don't let a broken object at EOF overwrite a good one /
61	if (file->eof)
62	fz_rethrow(ctx);
63	/ Silently swallow the error /
64	dict = pdf_new_dict(ctx, NULL, `2`);
65	}
66
67	/ We must be careful not to try to resolve any indirections*
68	* here. We have just read dict, so we know it to be a non
69	* indirected dictionary. Before we look at any values that
70	* we get back from looking up in it, we need to check they
71	* aren't indirected. */
72
73	if (encrypt \|\| id \|\| root)
74	{
75	obj = pdf_dict_get(ctx, dict, PDF_NAME(Type));
76	if (!pdf_is_indirect(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME(XRef)))
77	{
78	if (encrypt)
79	{
80	obj = pdf_dict_get(ctx, dict, PDF_NAME(Encrypt));
81	if (obj)
82	{
83	pdf_drop_obj(ctx, *encrypt);
84	*encrypt = pdf_keep_obj(ctx, obj);
85	}
86	}
87
88	if (id)
89	{
90	obj = pdf_dict_get(ctx, dict, PDF_NAME(ID));
91	if (obj)
92	{
93	pdf_drop_obj(ctx, *id);
94	*id = pdf_keep_obj(ctx, obj);
95	}
96	}
97
98	if (root)
99	*root = pdf_keep_obj(ctx, pdf_dict_get(ctx, dict, PDF_NAME(Root)));
100	}
101	}
102
103	obj = pdf_dict_get(ctx, dict, PDF_NAME(Length));
104	if (!pdf_is_indirect(ctx, obj) && pdf_is_int(ctx, obj))
105	stm_len = pdf_to_int(ctx, obj);
106
107	if (doc->file_reading_linearly && page)
108	{
109	obj = pdf_dict_get(ctx, dict, PDF_NAME(Type));
110	if (!pdf_is_indirect(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME(Page)))
111	{
112	pdf_drop_obj(ctx, *page);
113	*page = pdf_keep_obj(ctx, dict);
114	}
115	}
116
117	pdf_drop_obj(ctx, dict);
118	}
119
120	while ( tok != PDF_TOK_STREAM &&
121	tok != PDF_TOK_ENDOBJ &&
122	tok != PDF_TOK_ERROR &&
123	tok != PDF_TOK_EOF &&
124	tok != PDF_TOK_INT )
125	{
126	*tmpofs = fz_tell(ctx, file);
127	if (*tmpofs < `0`)
128	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file");
129	tok = pdf_lex(ctx, file, buf);
130	}
131
132	if (tok == PDF_TOK_STREAM)
133	{
134	int c = fz_read_byte(ctx, file);
135	if (c == `'\r'`) {
136	c = fz_peek_byte(ctx, file);
137	if (c == `'\n'`)
138	fz_read_byte(ctx, file);
139	}
140
141	*stmofsp = fz_tell(ctx, file);
142	if (*stmofsp < `0`)
143	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot seek in file");
144
145	if (stm_len > `0`)
146	{
147	fz_seek(ctx, file, *stmofsp + stm_len, `0`);
148	fz_try(ctx)
149	{
150	tok = pdf_lex(ctx, file, buf);
151	}
152	fz_catch(ctx)
153	{
154	fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
155	fz_warn(ctx, "cannot find endstream token, falling back to scanning");
156	}
157	if (tok == PDF_TOK_ENDSTREAM)
158	goto atobjend;
159	fz_seek(ctx, file, *stmofsp, `0`);
160	}
161
162	(void)fz_read(ctx, file, (unsigned char *) buf->scratch, `9`);
163
164	while (memcmp(buf->scratch, "endstream", `9`) != `0`)
165	{
166	c = fz_read_byte(ctx, file);
167	if (c == EOF)
168	break;
169	memmove(&buf->scratch[`0`], &buf->scratch[`1`], `8`);
170	buf->scratch[`8`] = c;
171	}
172
173	if (stmlenp)
174	stmlenp = fz_tell(ctx, file) - stmofsp - `9`;
175
176	atobjend:
177	*tmpofs = fz_tell(ctx, file);
178	if (*tmpofs < `0`)
179	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file");
180	tok = pdf_lex(ctx, file, buf);
181	if (tok != PDF_TOK_ENDOBJ)
182	fz_warn(ctx, "object missing 'endobj' token");
183	else
184	{
185	/ Read another token as we always return the next one /
186	*tmpofs = fz_tell(ctx, file);
187	if (*tmpofs < `0`)
188	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file");
189	tok = pdf_lex(ctx, file, buf);
190	}
191	}
192	return tok;
193	}
194
195	static void
196	pdf_repair_obj_stm(fz_context ctx, pdf_document doc, int stm_num)
197	{
198	pdf_obj *obj;
199	fz_stream *stm = NULL;
200	pdf_token tok;
201	int i, n, count;
202	pdf_lexbuf buf;
203
204	fz_var(stm);
205
206	pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL);
207
208	fz_try(ctx)
209	{
210	obj = pdf_load_object(ctx, doc, stm_num);
211
212	count = pdf_dict_get_int(ctx, obj, PDF_NAME(N));
213
214	pdf_drop_obj(ctx, obj);
215
216	stm = pdf_open_stream_number(ctx, doc, stm_num);
217
218	for (i = `0`; i < count; i++)
219	{
220	pdf_xref_entry *entry;
221
222	tok = pdf_lex(ctx, stm, &buf);
223	if (tok != PDF_TOK_INT)
224	fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d 0 R)", stm_num);
225
226	n = buf.i;
227	if (n < `0`)
228	{
229	fz_warn(ctx, "ignoring object with invalid object number (%d %d R)", n, i);
230	continue;
231	}
232	else if (n >= pdf_xref_len(ctx, doc))
233	{
234	fz_warn(ctx, "ignoring object with invalid object number (%d %d R)", n, i);
235	continue;
236	}
237
238	entry = pdf_get_populating_xref_entry(ctx, doc, n);
239	entry->ofs = stm_num;
240	entry->gen = i;
241	entry->num = n;
242	entry->stm_ofs = `0`;
243	pdf_drop_obj(ctx, entry->obj);
244	entry->obj = NULL;
245	entry->type = `'o'`;
246
247	tok = pdf_lex(ctx, stm, &buf);
248	if (tok != PDF_TOK_INT)
249	fz_throw(ctx, FZ_ERROR_GENERIC, "corrupt object stream (%d 0 R)", stm_num);
250	}
251	}
252	fz_always(ctx)
253	{
254	fz_drop_stream(ctx, stm);
255	pdf_lexbuf_fin(ctx, &buf);
256	}
257	fz_catch(ctx)
258	{
259	fz_rethrow(ctx);
260	}
261	}
262
263	static void
264	orphan_object(fz_context ctx, pdf_document doc, pdf_obj *obj)
265	{
266	if (doc->orphans_count == doc->orphans_max)
267	{
268	int new_max = (doc->orphans_max ? doc->orphans_max*`2` : `32`);
269
270	fz_try(ctx)
271	{
272	doc->orphans = fz_realloc_array(ctx, doc->orphans, new_max, pdf_obj*);
273	doc->orphans_max = new_max;
274	}
275	fz_catch(ctx)
276	{
277	pdf_drop_obj(ctx, obj);
278	fz_rethrow(ctx);
279	}
280	}
281	doc->orphans[doc->orphans_count++] = obj;
282	}
283
284	static int is_white(int c)
285	{
286	return c == `'\x00'` \|\| c == `'\x09'` \|\| c == `'\x0a'` \|\| c == `'\x0c'` \|\| c == `'\x0d'` \|\| c == `'\x20'`;
287	}
288
289	void
290	pdf_repair_xref(fz_context ctx, pdf_document doc)
291	{
292	pdf_obj dict, obj = NULL;
293	pdf_obj *length;
294
295	pdf_obj *encrypt = NULL;
296	pdf_obj *id = NULL;
297	pdf_obj **roots = NULL;
298	pdf_obj *info = NULL;
299
300	struct entry *list = NULL;
301	int listlen;
302	int listcap;
303	int maxnum = `0`;
304
305	int num = `0`;
306	int gen = `0`;
307	int64_t tmpofs, stm_ofs, numofs = `0`, genofs = `0`;
308	int stm_len;
309	pdf_token tok;
310	int next;
311	int i;
312	size_t j, n;
313	int c;
314	pdf_lexbuf *buf = &doc->lexbuf.base;
315	int num_roots = `0`;
316	int max_roots = `0`;
317
318	fz_var(encrypt);
319	fz_var(id);
320	fz_var(roots);
321	fz_var(num_roots);
322	fz_var(max_roots);
323	fz_var(info);
324	fz_var(list);
325	fz_var(obj);
326
327	fz_warn(ctx, "repairing PDF document");
328
329	if (doc->repair_attempted)
330	fz_throw(ctx, FZ_ERROR_GENERIC, "Repair failed already - not trying again");
331	doc->repair_attempted = `1`;
332
333	doc->dirty = `1`;
334	doc->freeze_updates = `1`; / Can't support incremental update after repair /
335
336	pdf_forget_xref(ctx, doc);
337
338	fz_seek(ctx, doc->file, `0`, `0`);
339
340	fz_try(ctx)
341	{
342	pdf_xref_entry *entry;
343	listlen = `0`;
344	listcap = `1024`;
345	list = fz_malloc_array(ctx, listcap, struct entry);
346
347	/ look for '%PDF' version marker within first kilobyte of file /
348	n = fz_read(ctx, doc->file, (unsigned char *)buf->scratch, fz_mini(buf->size, `1024`));
349
350	fz_seek(ctx, doc->file, `0`, `0`);
351	if (n >= `4`)
352	{
353	for (j = `0`; j < n - `4`; j++)
354	{
355	if (memcmp(&buf->scratch[j], "%PDF", `4`) == `0`)
356	{
357	fz_seek(ctx, doc->file, (int64_t)(j + `8`), `0`); / skip "%PDF-X.Y" /
358	break;
359	}
360	}
361	}
362
363	/ skip comment line after version marker since some generators*
364	* forget to terminate the comment with a newline */
365	c = fz_read_byte(ctx, doc->file);
366	while (c >= `0` && (c == `' '` \|\| c == `'%'`))
367	c = fz_read_byte(ctx, doc->file);
368	fz_unread_byte(ctx, doc->file);
369
370	while (`1`)
371	{
372	tmpofs = fz_tell(ctx, doc->file);
373	if (tmpofs < `0`)
374	fz_throw(ctx, FZ_ERROR_GENERIC, "cannot tell in file");
375
376	fz_try(ctx)
377	tok = pdf_lex_no_string(ctx, doc->file, buf);
378	fz_catch(ctx)
379	{
380	fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
381	fz_warn(ctx, "skipping ahead to next token");
382	do
383	c = fz_read_byte(ctx, doc->file);
384	while (c != EOF && !is_white(c));
385	if (c == EOF)
386	tok = PDF_TOK_EOF;
387	else
388	continue;
389	}
390
391	/ If we have the next token already, then we'll jump*
392	* back here, rather than going through the top of
393	* the loop. */
394	have_next_token:
395
396	if (tok == PDF_TOK_INT)
397	{
398	if (buf->i < `0`)
399	{
400	num = `0`;
401	gen = `0`;
402	continue;
403	}
404	numofs = genofs;
405	num = gen;
406	genofs = tmpofs;
407	gen = buf->i;
408	}
409
410	else if (tok == PDF_TOK_OBJ)
411	{
412	pdf_obj *root = NULL;
413
414	fz_try(ctx)
415	{
416	stm_len = `0`;
417	stm_ofs = `0`;
418	tok = pdf_repair_obj(ctx, doc, buf, &stm_ofs, &stm_len, &encrypt, &id, NULL, &tmpofs, &root);
419	if (root)
420	add_root(ctx, root, &roots, &num_roots, &max_roots);
421	}
422	fz_always(ctx)
423	{
424	pdf_drop_obj(ctx, root);
425	}
426	fz_catch(ctx)
427	{
428	fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
429	/ If we haven't seen a root yet, there is nothing*
430	* we can do, but give up. Otherwise, we'll make
431	* do. */
432	if (!roots)
433	fz_rethrow(ctx);
434	fz_warn(ctx, "cannot parse object (%d %d R) - ignoring rest of file", num, gen);
435	break;
436	}
437
438	if (num <= `0` \|\| num > PDF_MAX_OBJECT_NUMBER)
439	{
440	fz_warn(ctx, "ignoring object with invalid object number (%d %d R)", num, gen);
441	goto have_next_token;
442	}
443
444	gen = fz_clampi(gen, `0`, `65535`);
445
446	if (listlen + `1` == listcap)
447	{
448	listcap = (listcap * `3`) / `2`;
449	list = fz_realloc_array(ctx, list, listcap, struct entry);
450	}
451
452	list[listlen].num = num;
453	list[listlen].gen = gen;
454	list[listlen].ofs = numofs;
455	list[listlen].stm_ofs = stm_ofs;
456	list[listlen].stm_len = stm_len;
457	listlen ++;
458
459	if (num > maxnum)
460	maxnum = num;
461
462	goto have_next_token;
463	}
464
465	/ If we find a dictionary it is probably the trailer,*
466	* but could be a stream (or bogus) dictionary caused
467	* by a corrupt file. */
468	else if (tok == PDF_TOK_OPEN_DICT)
469	{
470	pdf_obj *dictobj;
471
472	fz_try(ctx)
473	{
474	dict = pdf_parse_dict(ctx, doc, doc->file, buf);
475	}
476	fz_catch(ctx)
477	{
478	fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
479	/ If this was the real trailer dict*
480	* it was broken, in which case we are
481	* in trouble. Keep going though in
482	* case this was just a bogus dict. */
483	continue;
484	}
485
486	fz_try(ctx)
487	{
488	dictobj = pdf_dict_get(ctx, dict, PDF_NAME(Encrypt));
489	if (dictobj)
490	{
491	pdf_drop_obj(ctx, encrypt);
492	encrypt = pdf_keep_obj(ctx, dictobj);
493	}
494
495	dictobj = pdf_dict_get(ctx, dict, PDF_NAME(ID));
496	if (dictobj && (!id \|\| !encrypt \|\| pdf_dict_get(ctx, dict, PDF_NAME(Encrypt))))
497	{
498	pdf_drop_obj(ctx, id);
499	id = pdf_keep_obj(ctx, dictobj);
500	}
501
502	dictobj = pdf_dict_get(ctx, dict, PDF_NAME(Root));
503	if (dictobj)
504	add_root(ctx, dictobj, &roots, &num_roots, &max_roots);
505
506	dictobj = pdf_dict_get(ctx, dict, PDF_NAME(Info));
507	if (dictobj)
508	{
509	pdf_drop_obj(ctx, info);
510	info = pdf_keep_obj(ctx, dictobj);
511	}
512	}
513	fz_always(ctx)
514	pdf_drop_obj(ctx, dict);
515	fz_catch(ctx)
516	fz_rethrow(ctx);
517	}
518
519	else if (tok == PDF_TOK_EOF)
520	{
521	break;
522	}
523
524	else
525	{
526	num = `0`;
527	gen = `0`;
528	}
529	}
530
531	if (listlen == `0`)
532	fz_throw(ctx, FZ_ERROR_GENERIC, "no objects found");
533
534	/ make xref reasonable /
535
536	/*
537	Dummy access to entry to assure sufficient space in the xref table
538	and avoid repeated reallocs in the loop
539	*/
540	/ Ensure that the first xref table is a 'solid' one from*
541	* 0 to maxnum. */
542	pdf_ensure_solid_xref(ctx, doc, maxnum);
543
544	for (i = `1`; i < maxnum; i++)
545	{
546	entry = pdf_get_populating_xref_entry(ctx, doc, i);
547	if (entry->obj != NULL)
548	continue;
549	entry->type = `'f'`;
550	entry->ofs = `0`;
551	entry->gen = `0`;
552	entry->num = `0`;
553
554	entry->stm_ofs = `0`;
555	}
556
557	for (i = `0`; i < listlen; i++)
558	{
559	entry = pdf_get_populating_xref_entry(ctx, doc, list[i].num);
560	entry->type = `'n'`;
561	entry->ofs = list[i].ofs;
562	entry->gen = list[i].gen;
563	entry->num = list[i].num;
564
565	entry->stm_ofs = list[i].stm_ofs;
566
567	/ correct stream length for unencrypted documents /
568	if (!encrypt && list[i].stm_len >= `0`)
569	{
570	pdf_obj *old_obj = NULL;
571	dict = pdf_load_object(ctx, doc, list[i].num);
572
573	fz_try(ctx)
574	{
575	length = pdf_new_int(ctx, list[i].stm_len);
576	pdf_dict_get_put_drop(ctx, dict, PDF_NAME(Length), length, &old_obj);
577	if (old_obj)
578	orphan_object(ctx, doc, old_obj);
579	}
580	fz_always(ctx)
581	pdf_drop_obj(ctx, dict);
582	fz_catch(ctx)
583	fz_rethrow(ctx);
584	}
585	}
586
587	entry = pdf_get_populating_xref_entry(ctx, doc, `0`);
588	entry->type = `'f'`;
589	entry->ofs = `0`;
590	entry->gen = `65535`;
591	entry->num = `0`;
592	entry->stm_ofs = `0`;
593
594	next = `0`;
595	for (i = pdf_xref_len(ctx, doc) - `1`; i >= `0`; i--)
596	{
597	entry = pdf_get_populating_xref_entry(ctx, doc, i);
598	if (entry->type == `'f'`)
599	{
600	entry->ofs = next;
601	if (entry->gen < `65535`)
602	entry->gen ++;
603	next = i;
604	}
605	}
606
607	/ create a repaired trailer, Root will be added later /
608
609	obj = pdf_new_dict(ctx, doc, `5`);
610	/ During repair there is only a single xref section /
611	pdf_set_populating_xref_trailer(ctx, doc, obj);
612	pdf_drop_obj(ctx, obj);
613	obj = NULL;
614
615	obj = pdf_new_int(ctx, maxnum + `1`);
616	pdf_dict_put(ctx, pdf_trailer(ctx, doc), PDF_NAME(Size), obj);
617	pdf_drop_obj(ctx, obj);
618	obj = NULL;
619
620	if (roots)
621	{
622	for (i = num_roots-`1`; i > `0`; i--)
623	{
624	if (pdf_is_dict(ctx, roots[i]))
625	break;
626	}
627	if (i >= `0`)
628	{
629	pdf_dict_put(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), roots[i]);
630	}
631	}
632	if (info)
633	{
634	pdf_dict_put(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info), info);
635	pdf_drop_obj(ctx, info);
636	info = NULL;
637	}
638
639	if (encrypt)
640	{
641	if (pdf_is_indirect(ctx, encrypt))
642	{
643	/ create new reference with non-NULL xref pointer /
644	obj = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, encrypt), pdf_to_gen(ctx, encrypt));
645	pdf_drop_obj(ctx, encrypt);
646	encrypt = obj;
647	obj = NULL;
648	}
649	pdf_dict_put(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt), encrypt);
650	pdf_drop_obj(ctx, encrypt);
651	encrypt = NULL;
652	}
653
654	if (id)
655	{
656	if (pdf_is_indirect(ctx, id))
657	{
658	/ create new reference with non-NULL xref pointer /
659	obj = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, id), pdf_to_gen(ctx, id));
660	pdf_drop_obj(ctx, id);
661	id = obj;
662	obj = NULL;
663	}
664	pdf_dict_put(ctx, pdf_trailer(ctx, doc), PDF_NAME(ID), id);
665	pdf_drop_obj(ctx, id);
666	id = NULL;
667	}
668
669	fz_free(ctx, list);
670	}
671	fz_always(ctx)
672	{
673	for (i = `0`; i < num_roots; i++)
674	pdf_drop_obj(ctx, roots[i]);
675	fz_free(ctx, roots);
676	}
677	fz_catch(ctx)
678	{
679	pdf_drop_obj(ctx, encrypt);
680	pdf_drop_obj(ctx, id);
681	pdf_drop_obj(ctx, obj);
682	pdf_drop_obj(ctx, info);
683	fz_free(ctx, list);
684	fz_rethrow(ctx);
685	}
686	}
687
688	void
689	pdf_repair_obj_stms(fz_context ctx, pdf_document doc)
690	{
691	pdf_obj *dict;
692	int i;
693	int xref_len = pdf_xref_len(ctx, doc);
694
695	for (i = `0`; i < xref_len; i++)
696	{
697	pdf_xref_entry *entry = pdf_get_populating_xref_entry(ctx, doc, i);
698
699	if (entry->stm_ofs)
700	{
701	dict = pdf_load_object(ctx, doc, i);
702	fz_try(ctx)
703	{
704	if (pdf_name_eq(ctx, pdf_dict_get(ctx, dict, PDF_NAME(Type)), PDF_NAME(ObjStm)))
705	pdf_repair_obj_stm(ctx, doc, i);
706	}
707	fz_catch(ctx)
708	{
709	fz_warn(ctx, "ignoring broken object stream (%d 0 R)", i);
710	}
711	pdf_drop_obj(ctx, dict);
712	}
713	}
714
715	/ Ensure that streamed objects reside inside a known non-streamed object /
716	for (i = `0`; i < xref_len; i++)
717	{
718	pdf_xref_entry *entry = pdf_get_populating_xref_entry(ctx, doc, i);
719
720	if (entry->type == `'o'` && pdf_get_populating_xref_entry(ctx, doc, entry->ofs)->type != `'n'`)
721	fz_throw(ctx, FZ_ERROR_GENERIC, "invalid reference to non-object-stream: %d (%d 0 R)", (int)entry->ofs, i);
722	}
723	}
724

Browse the source code of MuPDF/source/pdf/pdf-repair.c