1/*
2 * Information tool.
3 * Print information about the input pdf.
4 */
5
6#include "mupdf/fitz.h"
7#include "mupdf/pdf.h"
8
9#include <string.h>
10#include <stdlib.h>
11#include <stdio.h>
12
13enum
14{
15 DIMENSIONS = 0x01,
16 FONTS = 0x02,
17 IMAGES = 0x04,
18 SHADINGS = 0x08,
19 PATTERNS = 0x10,
20 XOBJS = 0x20,
21 ALL = DIMENSIONS | FONTS | IMAGES | SHADINGS | PATTERNS | XOBJS
22};
23
24struct info
25{
26 int page;
27 pdf_obj *pageref;
28 union {
29 struct {
30 pdf_obj *obj;
31 } info;
32 struct {
33 pdf_obj *obj;
34 } crypt;
35 struct {
36 pdf_obj *obj;
37 fz_rect *bbox;
38 } dim;
39 struct {
40 pdf_obj *obj;
41 pdf_obj *subtype;
42 pdf_obj *name;
43 pdf_obj *encoding;
44 } font;
45 struct {
46 pdf_obj *obj;
47 pdf_obj *width;
48 pdf_obj *height;
49 pdf_obj *bpc;
50 pdf_obj *filter;
51 pdf_obj *cs;
52 pdf_obj *altcs;
53 } image;
54 struct {
55 pdf_obj *obj;
56 pdf_obj *type;
57 } shading;
58 struct {
59 pdf_obj *obj;
60 pdf_obj *type;
61 pdf_obj *paint;
62 pdf_obj *tiling;
63 pdf_obj *shading;
64 } pattern;
65 struct {
66 pdf_obj *obj;
67 pdf_obj *groupsubtype;
68 pdf_obj *reference;
69 } form;
70 } u;
71};
72
73typedef struct globals_s
74{
75 pdf_document *doc;
76 fz_context *ctx;
77 fz_output *out;
78 int pagecount;
79 struct info *dim;
80 int dims;
81 struct info *font;
82 int fonts;
83 struct info *image;
84 int images;
85 struct info *shading;
86 int shadings;
87 struct info *pattern;
88 int patterns;
89 struct info *form;
90 int forms;
91 struct info *psobj;
92 int psobjs;
93} globals;
94
95static void clearinfo(fz_context *ctx, globals *glo)
96{
97 int i;
98
99 if (glo->dim)
100 {
101 for (i = 0; i < glo->dims; i++)
102 fz_free(ctx, glo->dim[i].u.dim.bbox);
103 fz_free(ctx, glo->dim);
104 glo->dim = NULL;
105 glo->dims = 0;
106 }
107
108 if (glo->font)
109 {
110 fz_free(ctx, glo->font);
111 glo->font = NULL;
112 glo->fonts = 0;
113 }
114
115 if (glo->image)
116 {
117 fz_free(ctx, glo->image);
118 glo->image = NULL;
119 glo->images = 0;
120 }
121
122 if (glo->shading)
123 {
124 fz_free(ctx, glo->shading);
125 glo->shading = NULL;
126 glo->shadings = 0;
127 }
128
129 if (glo->pattern)
130 {
131 fz_free(ctx, glo->pattern);
132 glo->pattern = NULL;
133 glo->patterns = 0;
134 }
135
136 if (glo->form)
137 {
138 fz_free(ctx, glo->form);
139 glo->form = NULL;
140 glo->forms = 0;
141 }
142
143 if (glo->psobj)
144 {
145 fz_free(ctx, glo->psobj);
146 glo->psobj = NULL;
147 glo->psobjs = 0;
148 }
149}
150
151static void closexref(fz_context *ctx, globals *glo)
152{
153 if (glo->doc)
154 {
155 pdf_drop_document(ctx, glo->doc);
156 glo->doc = NULL;
157 }
158
159 clearinfo(ctx, glo);
160}
161
162static void
163infousage(void)
164{
165 fprintf(stderr,
166 "usage: mutool info [options] file.pdf [pages]\n"
167 "\t-p -\tpassword for decryption\n"
168 "\t-F\tlist fonts\n"
169 "\t-I\tlist images\n"
170 "\t-M\tlist dimensions\n"
171 "\t-P\tlist patterns\n"
172 "\t-S\tlist shadings\n"
173 "\t-X\tlist form and postscript xobjects\n"
174 "\tpages\tcomma separated list of page numbers and ranges\n"
175 );
176 exit(1);
177}
178
179static void
180showglobalinfo(fz_context *ctx, globals *glo)
181{
182 pdf_obj *obj;
183 fz_output *out = glo->out;
184 pdf_document *doc = glo->doc;
185
186 fz_write_printf(ctx, out, "\nPDF-%d.%d\n", doc->version / 10, doc->version % 10);
187
188 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
189 if (obj)
190 {
191 fz_write_printf(ctx, out, "Info object (%d 0 R):\n", pdf_to_num(ctx, obj));
192 pdf_print_obj(ctx, out, pdf_resolve_indirect(ctx, obj), 1, 1);
193 }
194
195 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
196 if (obj)
197 {
198 fz_write_printf(ctx, out, "\nEncryption object (%d 0 R):\n", pdf_to_num(ctx, obj));
199 pdf_print_obj(ctx, out, pdf_resolve_indirect(ctx, obj), 1, 1);
200 }
201
202 fz_write_printf(ctx, out, "\nPages: %d\n\n", glo->pagecount);
203}
204
205static void
206gatherdimensions(fz_context *ctx, globals *glo, int page, pdf_obj *pageref)
207{
208 fz_rect bbox;
209 pdf_obj *obj;
210 int j;
211
212 obj = pdf_dict_get(ctx, pageref, PDF_NAME(MediaBox));
213 if (!pdf_is_array(ctx, obj))
214 return;
215
216 bbox = pdf_to_rect(ctx, obj);
217
218 obj = pdf_dict_get(ctx, pageref, PDF_NAME(UserUnit));
219 if (pdf_is_real(ctx, obj))
220 {
221 float unit = pdf_to_real(ctx, obj);
222 bbox.x0 *= unit;
223 bbox.y0 *= unit;
224 bbox.x1 *= unit;
225 bbox.y1 *= unit;
226 }
227
228 for (j = 0; j < glo->dims; j++)
229 if (!memcmp(glo->dim[j].u.dim.bbox, &bbox, sizeof (fz_rect)))
230 break;
231
232 if (j < glo->dims)
233 return;
234
235 glo->dim = fz_realloc_array(ctx, glo->dim, glo->dims+1, struct info);
236 glo->dims++;
237
238 glo->dim[glo->dims - 1].page = page;
239 glo->dim[glo->dims - 1].pageref = pageref;
240 glo->dim[glo->dims - 1].u.dim.bbox = NULL;
241 glo->dim[glo->dims - 1].u.dim.bbox = fz_malloc(ctx, sizeof(fz_rect));
242 memcpy(glo->dim[glo->dims - 1].u.dim.bbox, &bbox, sizeof (fz_rect));
243
244 return;
245}
246
247static void
248gatherfonts(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
249{
250 int i, n;
251
252 n = pdf_dict_len(ctx, dict);
253 for (i = 0; i < n; i++)
254 {
255 pdf_obj *fontdict = NULL;
256 pdf_obj *subtype = NULL;
257 pdf_obj *basefont = NULL;
258 pdf_obj *name = NULL;
259 pdf_obj *encoding = NULL;
260 int k;
261
262 fontdict = pdf_dict_get_val(ctx, dict, i);
263 if (!pdf_is_dict(ctx, fontdict))
264 {
265 fz_warn(ctx, "not a font dict (%d 0 R)", pdf_to_num(ctx, fontdict));
266 continue;
267 }
268
269 subtype = pdf_dict_get(ctx, fontdict, PDF_NAME(Subtype));
270 basefont = pdf_dict_get(ctx, fontdict, PDF_NAME(BaseFont));
271 if (!basefont || pdf_is_null(ctx, basefont))
272 name = pdf_dict_get(ctx, fontdict, PDF_NAME(Name));
273 encoding = pdf_dict_get(ctx, fontdict, PDF_NAME(Encoding));
274 if (pdf_is_dict(ctx, encoding))
275 encoding = pdf_dict_get(ctx, encoding, PDF_NAME(BaseEncoding));
276
277 for (k = 0; k < glo->fonts; k++)
278 if (!pdf_objcmp(ctx, glo->font[k].u.font.obj, fontdict))
279 break;
280
281 if (k < glo->fonts)
282 continue;
283
284 glo->font = fz_realloc_array(ctx, glo->font, glo->fonts+1, struct info);
285 glo->fonts++;
286
287 glo->font[glo->fonts - 1].page = page;
288 glo->font[glo->fonts - 1].pageref = pageref;
289 glo->font[glo->fonts - 1].u.font.obj = fontdict;
290 glo->font[glo->fonts - 1].u.font.subtype = subtype;
291 glo->font[glo->fonts - 1].u.font.name = basefont ? basefont : name;
292 glo->font[glo->fonts - 1].u.font.encoding = encoding;
293 }
294}
295
296static void
297gatherimages(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
298{
299 int i, n;
300
301 n = pdf_dict_len(ctx, dict);
302 for (i = 0; i < n; i++)
303 {
304 pdf_obj *imagedict;
305 pdf_obj *type;
306 pdf_obj *width;
307 pdf_obj *height;
308 pdf_obj *bpc = NULL;
309 pdf_obj *filter = NULL;
310 pdf_obj *cs = NULL;
311 pdf_obj *altcs;
312 int k;
313
314 imagedict = pdf_dict_get_val(ctx, dict, i);
315 if (!pdf_is_dict(ctx, imagedict))
316 {
317 fz_warn(ctx, "not an image dict (%d 0 R)", pdf_to_num(ctx, imagedict));
318 continue;
319 }
320
321 type = pdf_dict_get(ctx, imagedict, PDF_NAME(Subtype));
322 if (!pdf_name_eq(ctx, type, PDF_NAME(Image)))
323 continue;
324
325 filter = pdf_dict_get(ctx, imagedict, PDF_NAME(Filter));
326
327 altcs = NULL;
328 cs = pdf_dict_get(ctx, imagedict, PDF_NAME(ColorSpace));
329 if (pdf_is_array(ctx, cs))
330 {
331 pdf_obj *cses = cs;
332
333 cs = pdf_array_get(ctx, cses, 0);
334 if (pdf_name_eq(ctx, cs, PDF_NAME(DeviceN)) || pdf_name_eq(ctx, cs, PDF_NAME(Separation)))
335 {
336 altcs = pdf_array_get(ctx, cses, 2);
337 if (pdf_is_array(ctx, altcs))
338 altcs = pdf_array_get(ctx, altcs, 0);
339 }
340 }
341
342 width = pdf_dict_get(ctx, imagedict, PDF_NAME(Width));
343 height = pdf_dict_get(ctx, imagedict, PDF_NAME(Height));
344 bpc = pdf_dict_get(ctx, imagedict, PDF_NAME(BitsPerComponent));
345
346 for (k = 0; k < glo->images; k++)
347 if (!pdf_objcmp(ctx, glo->image[k].u.image.obj, imagedict))
348 break;
349
350 if (k < glo->images)
351 continue;
352
353 glo->image = fz_realloc_array(ctx, glo->image, glo->images+1, struct info);
354 glo->images++;
355
356 glo->image[glo->images - 1].page = page;
357 glo->image[glo->images - 1].pageref = pageref;
358 glo->image[glo->images - 1].u.image.obj = imagedict;
359 glo->image[glo->images - 1].u.image.width = width;
360 glo->image[glo->images - 1].u.image.height = height;
361 glo->image[glo->images - 1].u.image.bpc = bpc;
362 glo->image[glo->images - 1].u.image.filter = filter;
363 glo->image[glo->images - 1].u.image.cs = cs;
364 glo->image[glo->images - 1].u.image.altcs = altcs;
365 }
366}
367
368static void
369gatherforms(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
370{
371 int i, n;
372
373 n = pdf_dict_len(ctx, dict);
374 for (i = 0; i < n; i++)
375 {
376 pdf_obj *xobjdict;
377 pdf_obj *type;
378 pdf_obj *subtype;
379 pdf_obj *group;
380 pdf_obj *groupsubtype;
381 pdf_obj *reference;
382 int k;
383
384 xobjdict = pdf_dict_get_val(ctx, dict, i);
385 if (!pdf_is_dict(ctx, xobjdict))
386 {
387 fz_warn(ctx, "not a xobject dict (%d 0 R)", pdf_to_num(ctx, xobjdict));
388 continue;
389 }
390
391 type = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype));
392 if (!pdf_name_eq(ctx, type, PDF_NAME(Form)))
393 continue;
394
395 subtype = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype2));
396 if (!pdf_name_eq(ctx, subtype, PDF_NAME(PS)))
397 continue;
398
399 group = pdf_dict_get(ctx, xobjdict, PDF_NAME(Group));
400 groupsubtype = pdf_dict_get(ctx, group, PDF_NAME(S));
401 reference = pdf_dict_get(ctx, xobjdict, PDF_NAME(Ref));
402
403 for (k = 0; k < glo->forms; k++)
404 if (!pdf_objcmp(ctx, glo->form[k].u.form.obj, xobjdict))
405 break;
406
407 if (k < glo->forms)
408 continue;
409
410 glo->form = fz_realloc_array(ctx, glo->form, glo->forms+1, struct info);
411 glo->forms++;
412
413 glo->form[glo->forms - 1].page = page;
414 glo->form[glo->forms - 1].pageref = pageref;
415 glo->form[glo->forms - 1].u.form.obj = xobjdict;
416 glo->form[glo->forms - 1].u.form.groupsubtype = groupsubtype;
417 glo->form[glo->forms - 1].u.form.reference = reference;
418 }
419}
420
421static void
422gatherpsobjs(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
423{
424 int i, n;
425
426 n = pdf_dict_len(ctx, dict);
427 for (i = 0; i < n; i++)
428 {
429 pdf_obj *xobjdict;
430 pdf_obj *type;
431 pdf_obj *subtype;
432 int k;
433
434 xobjdict = pdf_dict_get_val(ctx, dict, i);
435 if (!pdf_is_dict(ctx, xobjdict))
436 {
437 fz_warn(ctx, "not a xobject dict (%d 0 R)", pdf_to_num(ctx, xobjdict));
438 continue;
439 }
440
441 type = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype));
442 subtype = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype2));
443 if (!pdf_name_eq(ctx, type, PDF_NAME(PS)) &&
444 (!pdf_name_eq(ctx, type, PDF_NAME(Form)) || !pdf_name_eq(ctx, subtype, PDF_NAME(PS))))
445 continue;
446
447 for (k = 0; k < glo->psobjs; k++)
448 if (!pdf_objcmp(ctx, glo->psobj[k].u.form.obj, xobjdict))
449 break;
450
451 if (k < glo->psobjs)
452 continue;
453
454 glo->psobj = fz_realloc_array(ctx, glo->psobj, glo->psobjs+1, struct info);
455 glo->psobjs++;
456
457 glo->psobj[glo->psobjs - 1].page = page;
458 glo->psobj[glo->psobjs - 1].pageref = pageref;
459 glo->psobj[glo->psobjs - 1].u.form.obj = xobjdict;
460 }
461}
462
463static void
464gathershadings(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
465{
466 int i, n;
467
468 n = pdf_dict_len(ctx, dict);
469 for (i = 0; i < n; i++)
470 {
471 pdf_obj *shade;
472 pdf_obj *type;
473 int k;
474
475 shade = pdf_dict_get_val(ctx, dict, i);
476 if (!pdf_is_dict(ctx, shade))
477 {
478 fz_warn(ctx, "not a shading dict (%d 0 R)", pdf_to_num(ctx, shade));
479 continue;
480 }
481
482 type = pdf_dict_get(ctx, shade, PDF_NAME(ShadingType));
483 if (!pdf_is_int(ctx, type) || pdf_to_int(ctx, type) < 1 || pdf_to_int(ctx, type) > 7)
484 {
485 fz_warn(ctx, "not a shading type (%d 0 R)", pdf_to_num(ctx, shade));
486 type = NULL;
487 }
488
489 for (k = 0; k < glo->shadings; k++)
490 if (!pdf_objcmp(ctx, glo->shading[k].u.shading.obj, shade))
491 break;
492
493 if (k < glo->shadings)
494 continue;
495
496 glo->shading = fz_realloc_array(ctx, glo->shading, glo->shadings+1, struct info);
497 glo->shadings++;
498
499 glo->shading[glo->shadings - 1].page = page;
500 glo->shading[glo->shadings - 1].pageref = pageref;
501 glo->shading[glo->shadings - 1].u.shading.obj = shade;
502 glo->shading[glo->shadings - 1].u.shading.type = type;
503 }
504}
505
506static void
507gatherpatterns(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
508{
509 int i, n;
510
511 n = pdf_dict_len(ctx, dict);
512 for (i = 0; i < n; i++)
513 {
514 pdf_obj *patterndict;
515 pdf_obj *type;
516 pdf_obj *paint = NULL;
517 pdf_obj *tiling = NULL;
518 pdf_obj *shading = NULL;
519 int k;
520
521 patterndict = pdf_dict_get_val(ctx, dict, i);
522 if (!pdf_is_dict(ctx, patterndict))
523 {
524 fz_warn(ctx, "not a pattern dict (%d 0 R)", pdf_to_num(ctx, patterndict));
525 continue;
526 }
527
528 type = pdf_dict_get(ctx, patterndict, PDF_NAME(PatternType));
529 if (!pdf_is_int(ctx, type) || pdf_to_int(ctx, type) < 1 || pdf_to_int(ctx, type) > 2)
530 {
531 fz_warn(ctx, "not a pattern type (%d 0 R)", pdf_to_num(ctx, patterndict));
532 type = NULL;
533 }
534
535 if (pdf_to_int(ctx, type) == 1)
536 {
537 paint = pdf_dict_get(ctx, patterndict, PDF_NAME(PaintType));
538 if (!pdf_is_int(ctx, paint) || pdf_to_int(ctx, paint) < 1 || pdf_to_int(ctx, paint) > 2)
539 {
540 fz_warn(ctx, "not a pattern paint type (%d 0 R)", pdf_to_num(ctx, patterndict));
541 paint = NULL;
542 }
543
544 tiling = pdf_dict_get(ctx, patterndict, PDF_NAME(TilingType));
545 if (!pdf_is_int(ctx, tiling) || pdf_to_int(ctx, tiling) < 1 || pdf_to_int(ctx, tiling) > 3)
546 {
547 fz_warn(ctx, "not a pattern tiling type (%d 0 R)", pdf_to_num(ctx, patterndict));
548 tiling = NULL;
549 }
550 }
551 else
552 {
553 shading = pdf_dict_get(ctx, patterndict, PDF_NAME(Shading));
554 }
555
556 for (k = 0; k < glo->patterns; k++)
557 if (!pdf_objcmp(ctx, glo->pattern[k].u.pattern.obj, patterndict))
558 break;
559
560 if (k < glo->patterns)
561 continue;
562
563 glo->pattern = fz_realloc_array(ctx, glo->pattern, glo->patterns+1, struct info);
564 glo->patterns++;
565
566 glo->pattern[glo->patterns - 1].page = page;
567 glo->pattern[glo->patterns - 1].pageref = pageref;
568 glo->pattern[glo->patterns - 1].u.pattern.obj = patterndict;
569 glo->pattern[glo->patterns - 1].u.pattern.type = type;
570 glo->pattern[glo->patterns - 1].u.pattern.paint = paint;
571 glo->pattern[glo->patterns - 1].u.pattern.tiling = tiling;
572 glo->pattern[glo->patterns - 1].u.pattern.shading = shading;
573 }
574}
575
576static void
577gatherresourceinfo(fz_context *ctx, globals *glo, int page, pdf_obj *rsrc, int show)
578{
579 pdf_obj *pageref;
580 pdf_obj *font;
581 pdf_obj *xobj;
582 pdf_obj *shade;
583 pdf_obj *pattern;
584 pdf_obj *subrsrc;
585 int i;
586
587 pageref = pdf_lookup_page_obj(ctx, glo->doc, page-1);
588 if (!pageref)
589 fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d", page);
590
591 /* stop on cyclic resource dependencies */
592 if (pdf_mark_obj(ctx, rsrc))
593 return;
594
595 fz_try(ctx)
596 {
597 font = pdf_dict_get(ctx, rsrc, PDF_NAME(Font));
598 if (show & FONTS && font)
599 {
600 int n;
601
602 gatherfonts(ctx, glo, page, pageref, font);
603 n = pdf_dict_len(ctx, font);
604 for (i = 0; i < n; i++)
605 {
606 pdf_obj *obj = pdf_dict_get_val(ctx, font, i);
607
608 subrsrc = pdf_dict_get(ctx, obj, PDF_NAME(Resources));
609 if (subrsrc && pdf_objcmp(ctx, rsrc, subrsrc))
610 gatherresourceinfo(ctx, glo, page, subrsrc, show);
611 }
612 }
613
614 xobj = pdf_dict_get(ctx, rsrc, PDF_NAME(XObject));
615 if (show & (IMAGES|XOBJS) && xobj)
616 {
617 int n;
618
619 if (show & IMAGES)
620 gatherimages(ctx, glo, page, pageref, xobj);
621 if (show & XOBJS)
622 {
623 gatherforms(ctx, glo, page, pageref, xobj);
624 gatherpsobjs(ctx, glo, page, pageref, xobj);
625 }
626 n = pdf_dict_len(ctx, xobj);
627 for (i = 0; i < n; i++)
628 {
629 pdf_obj *obj = pdf_dict_get_val(ctx, xobj, i);
630 subrsrc = pdf_dict_get(ctx, obj, PDF_NAME(Resources));
631 if (subrsrc && pdf_objcmp(ctx, rsrc, subrsrc))
632 gatherresourceinfo(ctx, glo, page, subrsrc, show);
633 }
634 }
635
636 shade = pdf_dict_get(ctx, rsrc, PDF_NAME(Shading));
637 if (show & SHADINGS && shade)
638 gathershadings(ctx, glo, page, pageref, shade);
639
640 pattern = pdf_dict_get(ctx, rsrc, PDF_NAME(Pattern));
641 if (show & PATTERNS && pattern)
642 {
643 int n;
644 gatherpatterns(ctx, glo, page, pageref, pattern);
645 n = pdf_dict_len(ctx, pattern);
646 for (i = 0; i < n; i++)
647 {
648 pdf_obj *obj = pdf_dict_get_val(ctx, pattern, i);
649 subrsrc = pdf_dict_get(ctx, obj, PDF_NAME(Resources));
650 if (subrsrc && pdf_objcmp(ctx, rsrc, subrsrc))
651 gatherresourceinfo(ctx, glo, page, subrsrc, show);
652 }
653 }
654 }
655 fz_always(ctx)
656 pdf_unmark_obj(ctx, rsrc);
657 fz_catch(ctx)
658 fz_rethrow(ctx);
659}
660
661static void
662gatherpageinfo(fz_context *ctx, globals *glo, int page, int show)
663{
664 pdf_obj *pageref;
665 pdf_obj *rsrc;
666
667 pageref = pdf_lookup_page_obj(ctx, glo->doc, page-1);
668
669 if (!pageref)
670 fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d", page);
671
672 gatherdimensions(ctx, glo, page, pageref);
673
674 rsrc = pdf_dict_get(ctx, pageref, PDF_NAME(Resources));
675 gatherresourceinfo(ctx, glo, page, rsrc, show);
676}
677
678static void
679printinfo(fz_context *ctx, globals *glo, char *filename, int show, int page)
680{
681 int i;
682 int j;
683 fz_output *out = glo->out;
684
685#define PAGE_FMT_zu "\t%d\t(%d 0 R):\t"
686
687 if (show & DIMENSIONS && glo->dims > 0)
688 {
689 fz_write_printf(ctx, out, "Mediaboxes (%d):\n", glo->dims);
690 for (i = 0; i < glo->dims; i++)
691 {
692 fz_write_printf(ctx, out, PAGE_FMT_zu "[ %g %g %g %g ]\n",
693 glo->dim[i].page,
694 pdf_to_num(ctx, glo->dim[i].pageref),
695 glo->dim[i].u.dim.bbox->x0,
696 glo->dim[i].u.dim.bbox->y0,
697 glo->dim[i].u.dim.bbox->x1,
698 glo->dim[i].u.dim.bbox->y1);
699 }
700 fz_write_printf(ctx, out, "\n");
701 }
702
703 if (show & FONTS && glo->fonts > 0)
704 {
705 fz_write_printf(ctx, out, "Fonts (%d):\n", glo->fonts);
706 for (i = 0; i < glo->fonts; i++)
707 {
708 fz_write_printf(ctx, out, PAGE_FMT_zu "%s '%s' %s%s(%d 0 R)\n",
709 glo->font[i].page,
710 pdf_to_num(ctx, glo->font[i].pageref),
711 pdf_to_name(ctx, glo->font[i].u.font.subtype),
712 pdf_to_name(ctx, glo->font[i].u.font.name),
713 glo->font[i].u.font.encoding ? pdf_to_name(ctx, glo->font[i].u.font.encoding) : "",
714 glo->font[i].u.font.encoding ? " " : "",
715 pdf_to_num(ctx, glo->font[i].u.font.obj));
716 }
717 fz_write_printf(ctx, out, "\n");
718 }
719
720 if (show & IMAGES && glo->images > 0)
721 {
722 fz_write_printf(ctx, out, "Images (%d):\n", glo->images);
723 for (i = 0; i < glo->images; i++)
724 {
725 char *cs = NULL;
726 char *altcs = NULL;
727
728 fz_write_printf(ctx, out, PAGE_FMT_zu "[ ",
729 glo->image[i].page,
730 pdf_to_num(ctx, glo->image[i].pageref));
731
732 if (pdf_is_array(ctx, glo->image[i].u.image.filter))
733 {
734 int n = pdf_array_len(ctx, glo->image[i].u.image.filter);
735 for (j = 0; j < n; j++)
736 {
737 pdf_obj *obj = pdf_array_get(ctx, glo->image[i].u.image.filter, j);
738 char *filter = fz_strdup(ctx, pdf_to_name(ctx, obj));
739
740 if (strstr(filter, "Decode"))
741 *(strstr(filter, "Decode")) = '\0';
742
743 fz_write_printf(ctx, out, "%s%s",
744 filter,
745 j == pdf_array_len(ctx, glo->image[i].u.image.filter) - 1 ? "" : " ");
746 fz_free(ctx, filter);
747 }
748 }
749 else if (glo->image[i].u.image.filter)
750 {
751 pdf_obj *obj = glo->image[i].u.image.filter;
752 char *filter = fz_strdup(ctx, pdf_to_name(ctx, obj));
753
754 if (strstr(filter, "Decode"))
755 *(strstr(filter, "Decode")) = '\0';
756
757 fz_write_printf(ctx, out, "%s", filter);
758 fz_free(ctx, filter);
759 }
760 else
761 fz_write_printf(ctx, out, "Raw");
762
763 if (glo->image[i].u.image.cs)
764 {
765 cs = fz_strdup(ctx, pdf_to_name(ctx, glo->image[i].u.image.cs));
766
767 if (!strncmp(cs, "Device", 6))
768 {
769 size_t len = strlen(cs + 6);
770 memmove(cs + 3, cs + 6, len + 1);
771 cs[3 + len + 1] = '\0';
772 }
773 if (strstr(cs, "ICC"))
774 fz_strlcpy(cs, "ICC", 4);
775 if (strstr(cs, "Indexed"))
776 fz_strlcpy(cs, "Idx", 4);
777 if (strstr(cs, "Pattern"))
778 fz_strlcpy(cs, "Pat", 4);
779 if (strstr(cs, "Separation"))
780 fz_strlcpy(cs, "Sep", 4);
781 }
782 if (glo->image[i].u.image.altcs)
783 {
784 altcs = fz_strdup(ctx, pdf_to_name(ctx, glo->image[i].u.image.altcs));
785
786 if (!strncmp(altcs, "Device", 6))
787 {
788 size_t len = strlen(altcs + 6);
789 memmove(altcs + 3, altcs + 6, len + 1);
790 altcs[3 + len + 1] = '\0';
791 }
792 if (strstr(altcs, "ICC"))
793 fz_strlcpy(altcs, "ICC", 4);
794 if (strstr(altcs, "Indexed"))
795 fz_strlcpy(altcs, "Idx", 4);
796 if (strstr(altcs, "Pattern"))
797 fz_strlcpy(altcs, "Pat", 4);
798 if (strstr(altcs, "Separation"))
799 fz_strlcpy(altcs, "Sep", 4);
800 }
801
802 fz_write_printf(ctx, out, " ] %dx%d %dbpc %s%s%s (%d 0 R)\n",
803 pdf_to_int(ctx, glo->image[i].u.image.width),
804 pdf_to_int(ctx, glo->image[i].u.image.height),
805 glo->image[i].u.image.bpc ? pdf_to_int(ctx, glo->image[i].u.image.bpc) : 1,
806 glo->image[i].u.image.cs ? cs : "ImageMask",
807 glo->image[i].u.image.altcs ? " " : "",
808 glo->image[i].u.image.altcs ? altcs : "",
809 pdf_to_num(ctx, glo->image[i].u.image.obj));
810
811 fz_free(ctx, cs);
812 fz_free(ctx, altcs);
813 }
814 fz_write_printf(ctx, out, "\n");
815 }
816
817 if (show & SHADINGS && glo->shadings > 0)
818 {
819 fz_write_printf(ctx, out, "Shading patterns (%d):\n", glo->shadings);
820 for (i = 0; i < glo->shadings; i++)
821 {
822 char *shadingtype[] =
823 {
824 "",
825 "Function",
826 "Axial",
827 "Radial",
828 "Triangle mesh",
829 "Lattice",
830 "Coons patch",
831 "Tensor patch",
832 };
833
834 fz_write_printf(ctx, out, PAGE_FMT_zu "%s (%d 0 R)\n",
835 glo->shading[i].page,
836 pdf_to_num(ctx, glo->shading[i].pageref),
837 shadingtype[pdf_to_int(ctx, glo->shading[i].u.shading.type)],
838 pdf_to_num(ctx, glo->shading[i].u.shading.obj));
839 }
840 fz_write_printf(ctx, out, "\n");
841 }
842
843 if (show & PATTERNS && glo->patterns > 0)
844 {
845 fz_write_printf(ctx, out, "Patterns (%d):\n", glo->patterns);
846 for (i = 0; i < glo->patterns; i++)
847 {
848 if (pdf_to_int(ctx, glo->pattern[i].u.pattern.type) == 1)
849 {
850 char *painttype[] =
851 {
852 "",
853 "Colored",
854 "Uncolored",
855 };
856 char *tilingtype[] =
857 {
858 "",
859 "Constant",
860 "No distortion",
861 "Constant/fast tiling",
862 };
863
864 fz_write_printf(ctx, out, PAGE_FMT_zu "Tiling %s %s (%d 0 R)\n",
865 glo->pattern[i].page,
866 pdf_to_num(ctx, glo->pattern[i].pageref),
867 painttype[pdf_to_int(ctx, glo->pattern[i].u.pattern.paint)],
868 tilingtype[pdf_to_int(ctx, glo->pattern[i].u.pattern.tiling)],
869 pdf_to_num(ctx, glo->pattern[i].u.pattern.obj));
870 }
871 else
872 {
873 fz_write_printf(ctx, out, PAGE_FMT_zu "Shading %d 0 R (%d 0 R)\n",
874 glo->pattern[i].page,
875 pdf_to_num(ctx, glo->pattern[i].pageref),
876 pdf_to_num(ctx, glo->pattern[i].u.pattern.shading),
877 pdf_to_num(ctx, glo->pattern[i].u.pattern.obj));
878 }
879 }
880 fz_write_printf(ctx, out, "\n");
881 }
882
883 if (show & XOBJS && glo->forms > 0)
884 {
885 fz_write_printf(ctx, out, "Form xobjects (%d):\n", glo->forms);
886 for (i = 0; i < glo->forms; i++)
887 {
888 fz_write_printf(ctx, out, PAGE_FMT_zu "Form%s%s%s%s (%d 0 R)\n",
889 glo->form[i].page,
890 pdf_to_num(ctx, glo->form[i].pageref),
891 glo->form[i].u.form.groupsubtype ? " " : "",
892 glo->form[i].u.form.groupsubtype ? pdf_to_name(ctx, glo->form[i].u.form.groupsubtype) : "",
893 glo->form[i].u.form.groupsubtype ? " Group" : "",
894 glo->form[i].u.form.reference ? " Reference" : "",
895 pdf_to_num(ctx, glo->form[i].u.form.obj));
896 }
897 fz_write_printf(ctx, out, "\n");
898 }
899
900 if (show & XOBJS && glo->psobjs > 0)
901 {
902 fz_write_printf(ctx, out, "Postscript xobjects (%d):\n", glo->psobjs);
903 for (i = 0; i < glo->psobjs; i++)
904 {
905 fz_write_printf(ctx, out, PAGE_FMT_zu "(%d 0 R)\n",
906 glo->psobj[i].page,
907 pdf_to_num(ctx, glo->psobj[i].pageref),
908 pdf_to_num(ctx, glo->psobj[i].u.form.obj));
909 }
910 fz_write_printf(ctx, out, "\n");
911 }
912}
913
914static void
915showinfo(fz_context *ctx, globals *glo, char *filename, int show, const char *pagelist)
916{
917 int page, spage, epage;
918 int allpages;
919 int pagecount;
920 fz_output *out = glo->out;
921
922 if (!glo->doc)
923 infousage();
924
925 allpages = !strcmp(pagelist, "1-N");
926
927 pagecount = pdf_count_pages(ctx, glo->doc);
928
929 while ((pagelist = fz_parse_page_range(ctx, pagelist, &spage, &epage, pagecount)))
930 {
931 if (allpages)
932 fz_write_printf(ctx, out, "Retrieving info from pages %d-%d...\n", spage, epage);
933 for (page = spage; page <= epage; page++)
934 {
935 gatherpageinfo(ctx, glo, page, show);
936 if (!allpages)
937 {
938 fz_write_printf(ctx, out, "Page %d:\n", page);
939 printinfo(ctx, glo, filename, show, page);
940 fz_write_printf(ctx, out, "\n");
941 clearinfo(ctx, glo);
942 }
943 }
944 }
945
946 if (allpages)
947 printinfo(ctx, glo, filename, show, -1);
948}
949
950static void
951pdfinfo_info(fz_context *ctx, fz_output *out, char *filename, char *password, int show, char *argv[], int argc)
952{
953 enum { NO_FILE_OPENED, NO_INFO_GATHERED, INFO_SHOWN } state;
954 int argidx = 0;
955 globals glo = { 0 };
956
957 glo.out = out;
958 glo.ctx = ctx;
959
960 state = NO_FILE_OPENED;
961
962 fz_try(ctx)
963 {
964 while (argidx < argc)
965 {
966 if (state == NO_FILE_OPENED || !fz_is_page_range(ctx, argv[argidx]))
967 {
968 if (state == NO_INFO_GATHERED)
969 {
970 showinfo(ctx, &glo, filename, show, "1-N");
971 }
972
973 closexref(ctx, &glo);
974
975 filename = argv[argidx];
976 fz_write_printf(ctx, out, "%s:\n", filename);
977 glo.doc = pdf_open_document(glo.ctx, filename);
978 if (pdf_needs_password(ctx, glo.doc))
979 if (!pdf_authenticate_password(ctx, glo.doc, password))
980 fz_throw(glo.ctx, FZ_ERROR_GENERIC, "cannot authenticate password: %s", filename);
981 glo.pagecount = pdf_count_pages(ctx, glo.doc);
982
983 showglobalinfo(ctx, &glo);
984 state = NO_INFO_GATHERED;
985 }
986 else
987 {
988 showinfo(ctx, &glo, filename, show, argv[argidx]);
989 state = INFO_SHOWN;
990 }
991
992 argidx++;
993 }
994
995 if (state == NO_INFO_GATHERED)
996 showinfo(ctx, &glo, filename, show, "1-N");
997 }
998 fz_always(ctx)
999 closexref(ctx, &glo);
1000 fz_catch(ctx)
1001 fz_rethrow(ctx);
1002}
1003
1004int pdfinfo_main(int argc, char **argv)
1005{
1006 char *filename = "";
1007 char *password = "";
1008 int show = ALL;
1009 int c;
1010 int ret;
1011 fz_context *ctx;
1012
1013 while ((c = fz_getopt(argc, argv, "FISPXMp:")) != -1)
1014 {
1015 switch (c)
1016 {
1017 case 'F': if (show == ALL) show = FONTS; else show |= FONTS; break;
1018 case 'I': if (show == ALL) show = IMAGES; else show |= IMAGES; break;
1019 case 'S': if (show == ALL) show = SHADINGS; else show |= SHADINGS; break;
1020 case 'P': if (show == ALL) show = PATTERNS; else show |= PATTERNS; break;
1021 case 'X': if (show == ALL) show = XOBJS; else show |= XOBJS; break;
1022 case 'M': if (show == ALL) show = DIMENSIONS; else show |= DIMENSIONS; break;
1023 case 'p': password = fz_optarg; break;
1024 default:
1025 infousage();
1026 break;
1027 }
1028 }
1029
1030 if (fz_optind == argc)
1031 infousage();
1032
1033 ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
1034 if (!ctx)
1035 {
1036 fprintf(stderr, "cannot initialise context\n");
1037 exit(1);
1038 }
1039
1040 ret = 0;
1041 fz_try(ctx)
1042 pdfinfo_info(ctx, fz_stdout(ctx), filename, password, show, &argv[fz_optind], argc-fz_optind);
1043 fz_catch(ctx)
1044 ret = 1;
1045 fz_drop_context(ctx);
1046 return ret;
1047}
1048