1 | /* |
2 | * Information tool. |
3 | * Print information about the input pdf. |
4 | */ |
5 | |
6 | #include "mupdf/fitz.h" |
7 | #include "mupdf/pdf.h" |
8 | |
9 | #include <string.h> |
10 | #include <stdlib.h> |
11 | #include <stdio.h> |
12 | |
13 | enum |
14 | { |
15 | DIMENSIONS = 0x01, |
16 | FONTS = 0x02, |
17 | IMAGES = 0x04, |
18 | SHADINGS = 0x08, |
19 | PATTERNS = 0x10, |
20 | XOBJS = 0x20, |
21 | ALL = DIMENSIONS | FONTS | IMAGES | SHADINGS | PATTERNS | XOBJS |
22 | }; |
23 | |
24 | struct info |
25 | { |
26 | int page; |
27 | pdf_obj *; |
28 | union { |
29 | struct { |
30 | pdf_obj *obj; |
31 | } info; |
32 | struct { |
33 | pdf_obj *obj; |
34 | } crypt; |
35 | struct { |
36 | pdf_obj *obj; |
37 | fz_rect *bbox; |
38 | } dim; |
39 | struct { |
40 | pdf_obj *obj; |
41 | pdf_obj *subtype; |
42 | pdf_obj *name; |
43 | pdf_obj *encoding; |
44 | } font; |
45 | struct { |
46 | pdf_obj *obj; |
47 | pdf_obj *width; |
48 | pdf_obj *height; |
49 | pdf_obj *bpc; |
50 | pdf_obj *filter; |
51 | pdf_obj *cs; |
52 | pdf_obj *altcs; |
53 | } image; |
54 | struct { |
55 | pdf_obj *obj; |
56 | pdf_obj *type; |
57 | } shading; |
58 | struct { |
59 | pdf_obj *obj; |
60 | pdf_obj *type; |
61 | pdf_obj *paint; |
62 | pdf_obj *tiling; |
63 | pdf_obj *shading; |
64 | } pattern; |
65 | struct { |
66 | pdf_obj *obj; |
67 | pdf_obj *groupsubtype; |
68 | pdf_obj *reference; |
69 | } form; |
70 | } u; |
71 | }; |
72 | |
73 | typedef struct globals_s |
74 | { |
75 | pdf_document *doc; |
76 | fz_context *ctx; |
77 | fz_output *out; |
78 | int pagecount; |
79 | struct info *dim; |
80 | int dims; |
81 | struct info *font; |
82 | int fonts; |
83 | struct info *image; |
84 | int images; |
85 | struct info *shading; |
86 | int shadings; |
87 | struct info *pattern; |
88 | int patterns; |
89 | struct info *form; |
90 | int forms; |
91 | struct info *psobj; |
92 | int psobjs; |
93 | } globals; |
94 | |
95 | static void clearinfo(fz_context *ctx, globals *glo) |
96 | { |
97 | int i; |
98 | |
99 | if (glo->dim) |
100 | { |
101 | for (i = 0; i < glo->dims; i++) |
102 | fz_free(ctx, glo->dim[i].u.dim.bbox); |
103 | fz_free(ctx, glo->dim); |
104 | glo->dim = NULL; |
105 | glo->dims = 0; |
106 | } |
107 | |
108 | if (glo->font) |
109 | { |
110 | fz_free(ctx, glo->font); |
111 | glo->font = NULL; |
112 | glo->fonts = 0; |
113 | } |
114 | |
115 | if (glo->image) |
116 | { |
117 | fz_free(ctx, glo->image); |
118 | glo->image = NULL; |
119 | glo->images = 0; |
120 | } |
121 | |
122 | if (glo->shading) |
123 | { |
124 | fz_free(ctx, glo->shading); |
125 | glo->shading = NULL; |
126 | glo->shadings = 0; |
127 | } |
128 | |
129 | if (glo->pattern) |
130 | { |
131 | fz_free(ctx, glo->pattern); |
132 | glo->pattern = NULL; |
133 | glo->patterns = 0; |
134 | } |
135 | |
136 | if (glo->form) |
137 | { |
138 | fz_free(ctx, glo->form); |
139 | glo->form = NULL; |
140 | glo->forms = 0; |
141 | } |
142 | |
143 | if (glo->psobj) |
144 | { |
145 | fz_free(ctx, glo->psobj); |
146 | glo->psobj = NULL; |
147 | glo->psobjs = 0; |
148 | } |
149 | } |
150 | |
151 | static void closexref(fz_context *ctx, globals *glo) |
152 | { |
153 | if (glo->doc) |
154 | { |
155 | pdf_drop_document(ctx, glo->doc); |
156 | glo->doc = NULL; |
157 | } |
158 | |
159 | clearinfo(ctx, glo); |
160 | } |
161 | |
162 | static void |
163 | infousage(void) |
164 | { |
165 | fprintf(stderr, |
166 | "usage: mutool info [options] file.pdf [pages]\n" |
167 | "\t-p -\tpassword for decryption\n" |
168 | "\t-F\tlist fonts\n" |
169 | "\t-I\tlist images\n" |
170 | "\t-M\tlist dimensions\n" |
171 | "\t-P\tlist patterns\n" |
172 | "\t-S\tlist shadings\n" |
173 | "\t-X\tlist form and postscript xobjects\n" |
174 | "\tpages\tcomma separated list of page numbers and ranges\n" |
175 | ); |
176 | exit(1); |
177 | } |
178 | |
179 | static void |
180 | showglobalinfo(fz_context *ctx, globals *glo) |
181 | { |
182 | pdf_obj *obj; |
183 | fz_output *out = glo->out; |
184 | pdf_document *doc = glo->doc; |
185 | |
186 | fz_write_printf(ctx, out, "\nPDF-%d.%d\n" , doc->version / 10, doc->version % 10); |
187 | |
188 | obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info)); |
189 | if (obj) |
190 | { |
191 | fz_write_printf(ctx, out, "Info object (%d 0 R):\n" , pdf_to_num(ctx, obj)); |
192 | pdf_print_obj(ctx, out, pdf_resolve_indirect(ctx, obj), 1, 1); |
193 | } |
194 | |
195 | obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt)); |
196 | if (obj) |
197 | { |
198 | fz_write_printf(ctx, out, "\nEncryption object (%d 0 R):\n" , pdf_to_num(ctx, obj)); |
199 | pdf_print_obj(ctx, out, pdf_resolve_indirect(ctx, obj), 1, 1); |
200 | } |
201 | |
202 | fz_write_printf(ctx, out, "\nPages: %d\n\n" , glo->pagecount); |
203 | } |
204 | |
205 | static void |
206 | gatherdimensions(fz_context *ctx, globals *glo, int page, pdf_obj *) |
207 | { |
208 | fz_rect bbox; |
209 | pdf_obj *obj; |
210 | int j; |
211 | |
212 | obj = pdf_dict_get(ctx, pageref, PDF_NAME(MediaBox)); |
213 | if (!pdf_is_array(ctx, obj)) |
214 | return; |
215 | |
216 | bbox = pdf_to_rect(ctx, obj); |
217 | |
218 | obj = pdf_dict_get(ctx, pageref, PDF_NAME(UserUnit)); |
219 | if (pdf_is_real(ctx, obj)) |
220 | { |
221 | float unit = pdf_to_real(ctx, obj); |
222 | bbox.x0 *= unit; |
223 | bbox.y0 *= unit; |
224 | bbox.x1 *= unit; |
225 | bbox.y1 *= unit; |
226 | } |
227 | |
228 | for (j = 0; j < glo->dims; j++) |
229 | if (!memcmp(glo->dim[j].u.dim.bbox, &bbox, sizeof (fz_rect))) |
230 | break; |
231 | |
232 | if (j < glo->dims) |
233 | return; |
234 | |
235 | glo->dim = fz_realloc_array(ctx, glo->dim, glo->dims+1, struct info); |
236 | glo->dims++; |
237 | |
238 | glo->dim[glo->dims - 1].page = page; |
239 | glo->dim[glo->dims - 1].pageref = pageref; |
240 | glo->dim[glo->dims - 1].u.dim.bbox = NULL; |
241 | glo->dim[glo->dims - 1].u.dim.bbox = fz_malloc(ctx, sizeof(fz_rect)); |
242 | memcpy(glo->dim[glo->dims - 1].u.dim.bbox, &bbox, sizeof (fz_rect)); |
243 | |
244 | return; |
245 | } |
246 | |
247 | static void |
248 | gatherfonts(fz_context *ctx, globals *glo, int page, pdf_obj *, pdf_obj *dict) |
249 | { |
250 | int i, n; |
251 | |
252 | n = pdf_dict_len(ctx, dict); |
253 | for (i = 0; i < n; i++) |
254 | { |
255 | pdf_obj *fontdict = NULL; |
256 | pdf_obj *subtype = NULL; |
257 | pdf_obj *basefont = NULL; |
258 | pdf_obj *name = NULL; |
259 | pdf_obj *encoding = NULL; |
260 | int k; |
261 | |
262 | fontdict = pdf_dict_get_val(ctx, dict, i); |
263 | if (!pdf_is_dict(ctx, fontdict)) |
264 | { |
265 | fz_warn(ctx, "not a font dict (%d 0 R)" , pdf_to_num(ctx, fontdict)); |
266 | continue; |
267 | } |
268 | |
269 | subtype = pdf_dict_get(ctx, fontdict, PDF_NAME(Subtype)); |
270 | basefont = pdf_dict_get(ctx, fontdict, PDF_NAME(BaseFont)); |
271 | if (!basefont || pdf_is_null(ctx, basefont)) |
272 | name = pdf_dict_get(ctx, fontdict, PDF_NAME(Name)); |
273 | encoding = pdf_dict_get(ctx, fontdict, PDF_NAME(Encoding)); |
274 | if (pdf_is_dict(ctx, encoding)) |
275 | encoding = pdf_dict_get(ctx, encoding, PDF_NAME(BaseEncoding)); |
276 | |
277 | for (k = 0; k < glo->fonts; k++) |
278 | if (!pdf_objcmp(ctx, glo->font[k].u.font.obj, fontdict)) |
279 | break; |
280 | |
281 | if (k < glo->fonts) |
282 | continue; |
283 | |
284 | glo->font = fz_realloc_array(ctx, glo->font, glo->fonts+1, struct info); |
285 | glo->fonts++; |
286 | |
287 | glo->font[glo->fonts - 1].page = page; |
288 | glo->font[glo->fonts - 1].pageref = pageref; |
289 | glo->font[glo->fonts - 1].u.font.obj = fontdict; |
290 | glo->font[glo->fonts - 1].u.font.subtype = subtype; |
291 | glo->font[glo->fonts - 1].u.font.name = basefont ? basefont : name; |
292 | glo->font[glo->fonts - 1].u.font.encoding = encoding; |
293 | } |
294 | } |
295 | |
296 | static void |
297 | gatherimages(fz_context *ctx, globals *glo, int page, pdf_obj *, pdf_obj *dict) |
298 | { |
299 | int i, n; |
300 | |
301 | n = pdf_dict_len(ctx, dict); |
302 | for (i = 0; i < n; i++) |
303 | { |
304 | pdf_obj *imagedict; |
305 | pdf_obj *type; |
306 | pdf_obj *width; |
307 | pdf_obj *height; |
308 | pdf_obj *bpc = NULL; |
309 | pdf_obj *filter = NULL; |
310 | pdf_obj *cs = NULL; |
311 | pdf_obj *altcs; |
312 | int k; |
313 | |
314 | imagedict = pdf_dict_get_val(ctx, dict, i); |
315 | if (!pdf_is_dict(ctx, imagedict)) |
316 | { |
317 | fz_warn(ctx, "not an image dict (%d 0 R)" , pdf_to_num(ctx, imagedict)); |
318 | continue; |
319 | } |
320 | |
321 | type = pdf_dict_get(ctx, imagedict, PDF_NAME(Subtype)); |
322 | if (!pdf_name_eq(ctx, type, PDF_NAME(Image))) |
323 | continue; |
324 | |
325 | filter = pdf_dict_get(ctx, imagedict, PDF_NAME(Filter)); |
326 | |
327 | altcs = NULL; |
328 | cs = pdf_dict_get(ctx, imagedict, PDF_NAME(ColorSpace)); |
329 | if (pdf_is_array(ctx, cs)) |
330 | { |
331 | pdf_obj *cses = cs; |
332 | |
333 | cs = pdf_array_get(ctx, cses, 0); |
334 | if (pdf_name_eq(ctx, cs, PDF_NAME(DeviceN)) || pdf_name_eq(ctx, cs, PDF_NAME(Separation))) |
335 | { |
336 | altcs = pdf_array_get(ctx, cses, 2); |
337 | if (pdf_is_array(ctx, altcs)) |
338 | altcs = pdf_array_get(ctx, altcs, 0); |
339 | } |
340 | } |
341 | |
342 | width = pdf_dict_get(ctx, imagedict, PDF_NAME(Width)); |
343 | height = pdf_dict_get(ctx, imagedict, PDF_NAME(Height)); |
344 | bpc = pdf_dict_get(ctx, imagedict, PDF_NAME(BitsPerComponent)); |
345 | |
346 | for (k = 0; k < glo->images; k++) |
347 | if (!pdf_objcmp(ctx, glo->image[k].u.image.obj, imagedict)) |
348 | break; |
349 | |
350 | if (k < glo->images) |
351 | continue; |
352 | |
353 | glo->image = fz_realloc_array(ctx, glo->image, glo->images+1, struct info); |
354 | glo->images++; |
355 | |
356 | glo->image[glo->images - 1].page = page; |
357 | glo->image[glo->images - 1].pageref = pageref; |
358 | glo->image[glo->images - 1].u.image.obj = imagedict; |
359 | glo->image[glo->images - 1].u.image.width = width; |
360 | glo->image[glo->images - 1].u.image.height = height; |
361 | glo->image[glo->images - 1].u.image.bpc = bpc; |
362 | glo->image[glo->images - 1].u.image.filter = filter; |
363 | glo->image[glo->images - 1].u.image.cs = cs; |
364 | glo->image[glo->images - 1].u.image.altcs = altcs; |
365 | } |
366 | } |
367 | |
368 | static void |
369 | gatherforms(fz_context *ctx, globals *glo, int page, pdf_obj *, pdf_obj *dict) |
370 | { |
371 | int i, n; |
372 | |
373 | n = pdf_dict_len(ctx, dict); |
374 | for (i = 0; i < n; i++) |
375 | { |
376 | pdf_obj *xobjdict; |
377 | pdf_obj *type; |
378 | pdf_obj *subtype; |
379 | pdf_obj *group; |
380 | pdf_obj *groupsubtype; |
381 | pdf_obj *reference; |
382 | int k; |
383 | |
384 | xobjdict = pdf_dict_get_val(ctx, dict, i); |
385 | if (!pdf_is_dict(ctx, xobjdict)) |
386 | { |
387 | fz_warn(ctx, "not a xobject dict (%d 0 R)" , pdf_to_num(ctx, xobjdict)); |
388 | continue; |
389 | } |
390 | |
391 | type = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype)); |
392 | if (!pdf_name_eq(ctx, type, PDF_NAME(Form))) |
393 | continue; |
394 | |
395 | subtype = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype2)); |
396 | if (!pdf_name_eq(ctx, subtype, PDF_NAME(PS))) |
397 | continue; |
398 | |
399 | group = pdf_dict_get(ctx, xobjdict, PDF_NAME(Group)); |
400 | groupsubtype = pdf_dict_get(ctx, group, PDF_NAME(S)); |
401 | reference = pdf_dict_get(ctx, xobjdict, PDF_NAME(Ref)); |
402 | |
403 | for (k = 0; k < glo->forms; k++) |
404 | if (!pdf_objcmp(ctx, glo->form[k].u.form.obj, xobjdict)) |
405 | break; |
406 | |
407 | if (k < glo->forms) |
408 | continue; |
409 | |
410 | glo->form = fz_realloc_array(ctx, glo->form, glo->forms+1, struct info); |
411 | glo->forms++; |
412 | |
413 | glo->form[glo->forms - 1].page = page; |
414 | glo->form[glo->forms - 1].pageref = pageref; |
415 | glo->form[glo->forms - 1].u.form.obj = xobjdict; |
416 | glo->form[glo->forms - 1].u.form.groupsubtype = groupsubtype; |
417 | glo->form[glo->forms - 1].u.form.reference = reference; |
418 | } |
419 | } |
420 | |
421 | static void |
422 | gatherpsobjs(fz_context *ctx, globals *glo, int page, pdf_obj *, pdf_obj *dict) |
423 | { |
424 | int i, n; |
425 | |
426 | n = pdf_dict_len(ctx, dict); |
427 | for (i = 0; i < n; i++) |
428 | { |
429 | pdf_obj *xobjdict; |
430 | pdf_obj *type; |
431 | pdf_obj *subtype; |
432 | int k; |
433 | |
434 | xobjdict = pdf_dict_get_val(ctx, dict, i); |
435 | if (!pdf_is_dict(ctx, xobjdict)) |
436 | { |
437 | fz_warn(ctx, "not a xobject dict (%d 0 R)" , pdf_to_num(ctx, xobjdict)); |
438 | continue; |
439 | } |
440 | |
441 | type = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype)); |
442 | subtype = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype2)); |
443 | if (!pdf_name_eq(ctx, type, PDF_NAME(PS)) && |
444 | (!pdf_name_eq(ctx, type, PDF_NAME(Form)) || !pdf_name_eq(ctx, subtype, PDF_NAME(PS)))) |
445 | continue; |
446 | |
447 | for (k = 0; k < glo->psobjs; k++) |
448 | if (!pdf_objcmp(ctx, glo->psobj[k].u.form.obj, xobjdict)) |
449 | break; |
450 | |
451 | if (k < glo->psobjs) |
452 | continue; |
453 | |
454 | glo->psobj = fz_realloc_array(ctx, glo->psobj, glo->psobjs+1, struct info); |
455 | glo->psobjs++; |
456 | |
457 | glo->psobj[glo->psobjs - 1].page = page; |
458 | glo->psobj[glo->psobjs - 1].pageref = pageref; |
459 | glo->psobj[glo->psobjs - 1].u.form.obj = xobjdict; |
460 | } |
461 | } |
462 | |
463 | static void |
464 | gathershadings(fz_context *ctx, globals *glo, int page, pdf_obj *, pdf_obj *dict) |
465 | { |
466 | int i, n; |
467 | |
468 | n = pdf_dict_len(ctx, dict); |
469 | for (i = 0; i < n; i++) |
470 | { |
471 | pdf_obj *shade; |
472 | pdf_obj *type; |
473 | int k; |
474 | |
475 | shade = pdf_dict_get_val(ctx, dict, i); |
476 | if (!pdf_is_dict(ctx, shade)) |
477 | { |
478 | fz_warn(ctx, "not a shading dict (%d 0 R)" , pdf_to_num(ctx, shade)); |
479 | continue; |
480 | } |
481 | |
482 | type = pdf_dict_get(ctx, shade, PDF_NAME(ShadingType)); |
483 | if (!pdf_is_int(ctx, type) || pdf_to_int(ctx, type) < 1 || pdf_to_int(ctx, type) > 7) |
484 | { |
485 | fz_warn(ctx, "not a shading type (%d 0 R)" , pdf_to_num(ctx, shade)); |
486 | type = NULL; |
487 | } |
488 | |
489 | for (k = 0; k < glo->shadings; k++) |
490 | if (!pdf_objcmp(ctx, glo->shading[k].u.shading.obj, shade)) |
491 | break; |
492 | |
493 | if (k < glo->shadings) |
494 | continue; |
495 | |
496 | glo->shading = fz_realloc_array(ctx, glo->shading, glo->shadings+1, struct info); |
497 | glo->shadings++; |
498 | |
499 | glo->shading[glo->shadings - 1].page = page; |
500 | glo->shading[glo->shadings - 1].pageref = pageref; |
501 | glo->shading[glo->shadings - 1].u.shading.obj = shade; |
502 | glo->shading[glo->shadings - 1].u.shading.type = type; |
503 | } |
504 | } |
505 | |
506 | static void |
507 | gatherpatterns(fz_context *ctx, globals *glo, int page, pdf_obj *, pdf_obj *dict) |
508 | { |
509 | int i, n; |
510 | |
511 | n = pdf_dict_len(ctx, dict); |
512 | for (i = 0; i < n; i++) |
513 | { |
514 | pdf_obj *patterndict; |
515 | pdf_obj *type; |
516 | pdf_obj *paint = NULL; |
517 | pdf_obj *tiling = NULL; |
518 | pdf_obj *shading = NULL; |
519 | int k; |
520 | |
521 | patterndict = pdf_dict_get_val(ctx, dict, i); |
522 | if (!pdf_is_dict(ctx, patterndict)) |
523 | { |
524 | fz_warn(ctx, "not a pattern dict (%d 0 R)" , pdf_to_num(ctx, patterndict)); |
525 | continue; |
526 | } |
527 | |
528 | type = pdf_dict_get(ctx, patterndict, PDF_NAME(PatternType)); |
529 | if (!pdf_is_int(ctx, type) || pdf_to_int(ctx, type) < 1 || pdf_to_int(ctx, type) > 2) |
530 | { |
531 | fz_warn(ctx, "not a pattern type (%d 0 R)" , pdf_to_num(ctx, patterndict)); |
532 | type = NULL; |
533 | } |
534 | |
535 | if (pdf_to_int(ctx, type) == 1) |
536 | { |
537 | paint = pdf_dict_get(ctx, patterndict, PDF_NAME(PaintType)); |
538 | if (!pdf_is_int(ctx, paint) || pdf_to_int(ctx, paint) < 1 || pdf_to_int(ctx, paint) > 2) |
539 | { |
540 | fz_warn(ctx, "not a pattern paint type (%d 0 R)" , pdf_to_num(ctx, patterndict)); |
541 | paint = NULL; |
542 | } |
543 | |
544 | tiling = pdf_dict_get(ctx, patterndict, PDF_NAME(TilingType)); |
545 | if (!pdf_is_int(ctx, tiling) || pdf_to_int(ctx, tiling) < 1 || pdf_to_int(ctx, tiling) > 3) |
546 | { |
547 | fz_warn(ctx, "not a pattern tiling type (%d 0 R)" , pdf_to_num(ctx, patterndict)); |
548 | tiling = NULL; |
549 | } |
550 | } |
551 | else |
552 | { |
553 | shading = pdf_dict_get(ctx, patterndict, PDF_NAME(Shading)); |
554 | } |
555 | |
556 | for (k = 0; k < glo->patterns; k++) |
557 | if (!pdf_objcmp(ctx, glo->pattern[k].u.pattern.obj, patterndict)) |
558 | break; |
559 | |
560 | if (k < glo->patterns) |
561 | continue; |
562 | |
563 | glo->pattern = fz_realloc_array(ctx, glo->pattern, glo->patterns+1, struct info); |
564 | glo->patterns++; |
565 | |
566 | glo->pattern[glo->patterns - 1].page = page; |
567 | glo->pattern[glo->patterns - 1].pageref = pageref; |
568 | glo->pattern[glo->patterns - 1].u.pattern.obj = patterndict; |
569 | glo->pattern[glo->patterns - 1].u.pattern.type = type; |
570 | glo->pattern[glo->patterns - 1].u.pattern.paint = paint; |
571 | glo->pattern[glo->patterns - 1].u.pattern.tiling = tiling; |
572 | glo->pattern[glo->patterns - 1].u.pattern.shading = shading; |
573 | } |
574 | } |
575 | |
576 | static void |
577 | gatherresourceinfo(fz_context *ctx, globals *glo, int page, pdf_obj *rsrc, int show) |
578 | { |
579 | pdf_obj *; |
580 | pdf_obj *font; |
581 | pdf_obj *xobj; |
582 | pdf_obj *shade; |
583 | pdf_obj *pattern; |
584 | pdf_obj *subrsrc; |
585 | int i; |
586 | |
587 | pageref = pdf_lookup_page_obj(ctx, glo->doc, page-1); |
588 | if (!pageref) |
589 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d" , page); |
590 | |
591 | /* stop on cyclic resource dependencies */ |
592 | if (pdf_mark_obj(ctx, rsrc)) |
593 | return; |
594 | |
595 | fz_try(ctx) |
596 | { |
597 | font = pdf_dict_get(ctx, rsrc, PDF_NAME(Font)); |
598 | if (show & FONTS && font) |
599 | { |
600 | int n; |
601 | |
602 | gatherfonts(ctx, glo, page, pageref, font); |
603 | n = pdf_dict_len(ctx, font); |
604 | for (i = 0; i < n; i++) |
605 | { |
606 | pdf_obj *obj = pdf_dict_get_val(ctx, font, i); |
607 | |
608 | subrsrc = pdf_dict_get(ctx, obj, PDF_NAME(Resources)); |
609 | if (subrsrc && pdf_objcmp(ctx, rsrc, subrsrc)) |
610 | gatherresourceinfo(ctx, glo, page, subrsrc, show); |
611 | } |
612 | } |
613 | |
614 | xobj = pdf_dict_get(ctx, rsrc, PDF_NAME(XObject)); |
615 | if (show & (IMAGES|XOBJS) && xobj) |
616 | { |
617 | int n; |
618 | |
619 | if (show & IMAGES) |
620 | gatherimages(ctx, glo, page, pageref, xobj); |
621 | if (show & XOBJS) |
622 | { |
623 | gatherforms(ctx, glo, page, pageref, xobj); |
624 | gatherpsobjs(ctx, glo, page, pageref, xobj); |
625 | } |
626 | n = pdf_dict_len(ctx, xobj); |
627 | for (i = 0; i < n; i++) |
628 | { |
629 | pdf_obj *obj = pdf_dict_get_val(ctx, xobj, i); |
630 | subrsrc = pdf_dict_get(ctx, obj, PDF_NAME(Resources)); |
631 | if (subrsrc && pdf_objcmp(ctx, rsrc, subrsrc)) |
632 | gatherresourceinfo(ctx, glo, page, subrsrc, show); |
633 | } |
634 | } |
635 | |
636 | shade = pdf_dict_get(ctx, rsrc, PDF_NAME(Shading)); |
637 | if (show & SHADINGS && shade) |
638 | gathershadings(ctx, glo, page, pageref, shade); |
639 | |
640 | pattern = pdf_dict_get(ctx, rsrc, PDF_NAME(Pattern)); |
641 | if (show & PATTERNS && pattern) |
642 | { |
643 | int n; |
644 | gatherpatterns(ctx, glo, page, pageref, pattern); |
645 | n = pdf_dict_len(ctx, pattern); |
646 | for (i = 0; i < n; i++) |
647 | { |
648 | pdf_obj *obj = pdf_dict_get_val(ctx, pattern, i); |
649 | subrsrc = pdf_dict_get(ctx, obj, PDF_NAME(Resources)); |
650 | if (subrsrc && pdf_objcmp(ctx, rsrc, subrsrc)) |
651 | gatherresourceinfo(ctx, glo, page, subrsrc, show); |
652 | } |
653 | } |
654 | } |
655 | fz_always(ctx) |
656 | pdf_unmark_obj(ctx, rsrc); |
657 | fz_catch(ctx) |
658 | fz_rethrow(ctx); |
659 | } |
660 | |
661 | static void |
662 | gatherpageinfo(fz_context *ctx, globals *glo, int page, int show) |
663 | { |
664 | pdf_obj *; |
665 | pdf_obj *rsrc; |
666 | |
667 | pageref = pdf_lookup_page_obj(ctx, glo->doc, page-1); |
668 | |
669 | if (!pageref) |
670 | fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d" , page); |
671 | |
672 | gatherdimensions(ctx, glo, page, pageref); |
673 | |
674 | rsrc = pdf_dict_get(ctx, pageref, PDF_NAME(Resources)); |
675 | gatherresourceinfo(ctx, glo, page, rsrc, show); |
676 | } |
677 | |
678 | static void |
679 | printinfo(fz_context *ctx, globals *glo, char *filename, int show, int page) |
680 | { |
681 | int i; |
682 | int j; |
683 | fz_output *out = glo->out; |
684 | |
685 | #define PAGE_FMT_zu "\t%d\t(%d 0 R):\t" |
686 | |
687 | if (show & DIMENSIONS && glo->dims > 0) |
688 | { |
689 | fz_write_printf(ctx, out, "Mediaboxes (%d):\n" , glo->dims); |
690 | for (i = 0; i < glo->dims; i++) |
691 | { |
692 | fz_write_printf(ctx, out, PAGE_FMT_zu "[ %g %g %g %g ]\n" , |
693 | glo->dim[i].page, |
694 | pdf_to_num(ctx, glo->dim[i].pageref), |
695 | glo->dim[i].u.dim.bbox->x0, |
696 | glo->dim[i].u.dim.bbox->y0, |
697 | glo->dim[i].u.dim.bbox->x1, |
698 | glo->dim[i].u.dim.bbox->y1); |
699 | } |
700 | fz_write_printf(ctx, out, "\n" ); |
701 | } |
702 | |
703 | if (show & FONTS && glo->fonts > 0) |
704 | { |
705 | fz_write_printf(ctx, out, "Fonts (%d):\n" , glo->fonts); |
706 | for (i = 0; i < glo->fonts; i++) |
707 | { |
708 | fz_write_printf(ctx, out, PAGE_FMT_zu "%s '%s' %s%s(%d 0 R)\n" , |
709 | glo->font[i].page, |
710 | pdf_to_num(ctx, glo->font[i].pageref), |
711 | pdf_to_name(ctx, glo->font[i].u.font.subtype), |
712 | pdf_to_name(ctx, glo->font[i].u.font.name), |
713 | glo->font[i].u.font.encoding ? pdf_to_name(ctx, glo->font[i].u.font.encoding) : "" , |
714 | glo->font[i].u.font.encoding ? " " : "" , |
715 | pdf_to_num(ctx, glo->font[i].u.font.obj)); |
716 | } |
717 | fz_write_printf(ctx, out, "\n" ); |
718 | } |
719 | |
720 | if (show & IMAGES && glo->images > 0) |
721 | { |
722 | fz_write_printf(ctx, out, "Images (%d):\n" , glo->images); |
723 | for (i = 0; i < glo->images; i++) |
724 | { |
725 | char *cs = NULL; |
726 | char *altcs = NULL; |
727 | |
728 | fz_write_printf(ctx, out, PAGE_FMT_zu "[ " , |
729 | glo->image[i].page, |
730 | pdf_to_num(ctx, glo->image[i].pageref)); |
731 | |
732 | if (pdf_is_array(ctx, glo->image[i].u.image.filter)) |
733 | { |
734 | int n = pdf_array_len(ctx, glo->image[i].u.image.filter); |
735 | for (j = 0; j < n; j++) |
736 | { |
737 | pdf_obj *obj = pdf_array_get(ctx, glo->image[i].u.image.filter, j); |
738 | char *filter = fz_strdup(ctx, pdf_to_name(ctx, obj)); |
739 | |
740 | if (strstr(filter, "Decode" )) |
741 | *(strstr(filter, "Decode" )) = '\0'; |
742 | |
743 | fz_write_printf(ctx, out, "%s%s" , |
744 | filter, |
745 | j == pdf_array_len(ctx, glo->image[i].u.image.filter) - 1 ? "" : " " ); |
746 | fz_free(ctx, filter); |
747 | } |
748 | } |
749 | else if (glo->image[i].u.image.filter) |
750 | { |
751 | pdf_obj *obj = glo->image[i].u.image.filter; |
752 | char *filter = fz_strdup(ctx, pdf_to_name(ctx, obj)); |
753 | |
754 | if (strstr(filter, "Decode" )) |
755 | *(strstr(filter, "Decode" )) = '\0'; |
756 | |
757 | fz_write_printf(ctx, out, "%s" , filter); |
758 | fz_free(ctx, filter); |
759 | } |
760 | else |
761 | fz_write_printf(ctx, out, "Raw" ); |
762 | |
763 | if (glo->image[i].u.image.cs) |
764 | { |
765 | cs = fz_strdup(ctx, pdf_to_name(ctx, glo->image[i].u.image.cs)); |
766 | |
767 | if (!strncmp(cs, "Device" , 6)) |
768 | { |
769 | size_t len = strlen(cs + 6); |
770 | memmove(cs + 3, cs + 6, len + 1); |
771 | cs[3 + len + 1] = '\0'; |
772 | } |
773 | if (strstr(cs, "ICC" )) |
774 | fz_strlcpy(cs, "ICC" , 4); |
775 | if (strstr(cs, "Indexed" )) |
776 | fz_strlcpy(cs, "Idx" , 4); |
777 | if (strstr(cs, "Pattern" )) |
778 | fz_strlcpy(cs, "Pat" , 4); |
779 | if (strstr(cs, "Separation" )) |
780 | fz_strlcpy(cs, "Sep" , 4); |
781 | } |
782 | if (glo->image[i].u.image.altcs) |
783 | { |
784 | altcs = fz_strdup(ctx, pdf_to_name(ctx, glo->image[i].u.image.altcs)); |
785 | |
786 | if (!strncmp(altcs, "Device" , 6)) |
787 | { |
788 | size_t len = strlen(altcs + 6); |
789 | memmove(altcs + 3, altcs + 6, len + 1); |
790 | altcs[3 + len + 1] = '\0'; |
791 | } |
792 | if (strstr(altcs, "ICC" )) |
793 | fz_strlcpy(altcs, "ICC" , 4); |
794 | if (strstr(altcs, "Indexed" )) |
795 | fz_strlcpy(altcs, "Idx" , 4); |
796 | if (strstr(altcs, "Pattern" )) |
797 | fz_strlcpy(altcs, "Pat" , 4); |
798 | if (strstr(altcs, "Separation" )) |
799 | fz_strlcpy(altcs, "Sep" , 4); |
800 | } |
801 | |
802 | fz_write_printf(ctx, out, " ] %dx%d %dbpc %s%s%s (%d 0 R)\n" , |
803 | pdf_to_int(ctx, glo->image[i].u.image.width), |
804 | pdf_to_int(ctx, glo->image[i].u.image.height), |
805 | glo->image[i].u.image.bpc ? pdf_to_int(ctx, glo->image[i].u.image.bpc) : 1, |
806 | glo->image[i].u.image.cs ? cs : "ImageMask" , |
807 | glo->image[i].u.image.altcs ? " " : "" , |
808 | glo->image[i].u.image.altcs ? altcs : "" , |
809 | pdf_to_num(ctx, glo->image[i].u.image.obj)); |
810 | |
811 | fz_free(ctx, cs); |
812 | fz_free(ctx, altcs); |
813 | } |
814 | fz_write_printf(ctx, out, "\n" ); |
815 | } |
816 | |
817 | if (show & SHADINGS && glo->shadings > 0) |
818 | { |
819 | fz_write_printf(ctx, out, "Shading patterns (%d):\n" , glo->shadings); |
820 | for (i = 0; i < glo->shadings; i++) |
821 | { |
822 | char *shadingtype[] = |
823 | { |
824 | "" , |
825 | "Function" , |
826 | "Axial" , |
827 | "Radial" , |
828 | "Triangle mesh" , |
829 | "Lattice" , |
830 | "Coons patch" , |
831 | "Tensor patch" , |
832 | }; |
833 | |
834 | fz_write_printf(ctx, out, PAGE_FMT_zu "%s (%d 0 R)\n" , |
835 | glo->shading[i].page, |
836 | pdf_to_num(ctx, glo->shading[i].pageref), |
837 | shadingtype[pdf_to_int(ctx, glo->shading[i].u.shading.type)], |
838 | pdf_to_num(ctx, glo->shading[i].u.shading.obj)); |
839 | } |
840 | fz_write_printf(ctx, out, "\n" ); |
841 | } |
842 | |
843 | if (show & PATTERNS && glo->patterns > 0) |
844 | { |
845 | fz_write_printf(ctx, out, "Patterns (%d):\n" , glo->patterns); |
846 | for (i = 0; i < glo->patterns; i++) |
847 | { |
848 | if (pdf_to_int(ctx, glo->pattern[i].u.pattern.type) == 1) |
849 | { |
850 | char *painttype[] = |
851 | { |
852 | "" , |
853 | "Colored" , |
854 | "Uncolored" , |
855 | }; |
856 | char *tilingtype[] = |
857 | { |
858 | "" , |
859 | "Constant" , |
860 | "No distortion" , |
861 | "Constant/fast tiling" , |
862 | }; |
863 | |
864 | fz_write_printf(ctx, out, PAGE_FMT_zu "Tiling %s %s (%d 0 R)\n" , |
865 | glo->pattern[i].page, |
866 | pdf_to_num(ctx, glo->pattern[i].pageref), |
867 | painttype[pdf_to_int(ctx, glo->pattern[i].u.pattern.paint)], |
868 | tilingtype[pdf_to_int(ctx, glo->pattern[i].u.pattern.tiling)], |
869 | pdf_to_num(ctx, glo->pattern[i].u.pattern.obj)); |
870 | } |
871 | else |
872 | { |
873 | fz_write_printf(ctx, out, PAGE_FMT_zu "Shading %d 0 R (%d 0 R)\n" , |
874 | glo->pattern[i].page, |
875 | pdf_to_num(ctx, glo->pattern[i].pageref), |
876 | pdf_to_num(ctx, glo->pattern[i].u.pattern.shading), |
877 | pdf_to_num(ctx, glo->pattern[i].u.pattern.obj)); |
878 | } |
879 | } |
880 | fz_write_printf(ctx, out, "\n" ); |
881 | } |
882 | |
883 | if (show & XOBJS && glo->forms > 0) |
884 | { |
885 | fz_write_printf(ctx, out, "Form xobjects (%d):\n" , glo->forms); |
886 | for (i = 0; i < glo->forms; i++) |
887 | { |
888 | fz_write_printf(ctx, out, PAGE_FMT_zu "Form%s%s%s%s (%d 0 R)\n" , |
889 | glo->form[i].page, |
890 | pdf_to_num(ctx, glo->form[i].pageref), |
891 | glo->form[i].u.form.groupsubtype ? " " : "" , |
892 | glo->form[i].u.form.groupsubtype ? pdf_to_name(ctx, glo->form[i].u.form.groupsubtype) : "" , |
893 | glo->form[i].u.form.groupsubtype ? " Group" : "" , |
894 | glo->form[i].u.form.reference ? " Reference" : "" , |
895 | pdf_to_num(ctx, glo->form[i].u.form.obj)); |
896 | } |
897 | fz_write_printf(ctx, out, "\n" ); |
898 | } |
899 | |
900 | if (show & XOBJS && glo->psobjs > 0) |
901 | { |
902 | fz_write_printf(ctx, out, "Postscript xobjects (%d):\n" , glo->psobjs); |
903 | for (i = 0; i < glo->psobjs; i++) |
904 | { |
905 | fz_write_printf(ctx, out, PAGE_FMT_zu "(%d 0 R)\n" , |
906 | glo->psobj[i].page, |
907 | pdf_to_num(ctx, glo->psobj[i].pageref), |
908 | pdf_to_num(ctx, glo->psobj[i].u.form.obj)); |
909 | } |
910 | fz_write_printf(ctx, out, "\n" ); |
911 | } |
912 | } |
913 | |
914 | static void |
915 | showinfo(fz_context *ctx, globals *glo, char *filename, int show, const char *pagelist) |
916 | { |
917 | int page, spage, epage; |
918 | int allpages; |
919 | int pagecount; |
920 | fz_output *out = glo->out; |
921 | |
922 | if (!glo->doc) |
923 | infousage(); |
924 | |
925 | allpages = !strcmp(pagelist, "1-N" ); |
926 | |
927 | pagecount = pdf_count_pages(ctx, glo->doc); |
928 | |
929 | while ((pagelist = fz_parse_page_range(ctx, pagelist, &spage, &epage, pagecount))) |
930 | { |
931 | if (allpages) |
932 | fz_write_printf(ctx, out, "Retrieving info from pages %d-%d...\n" , spage, epage); |
933 | for (page = spage; page <= epage; page++) |
934 | { |
935 | gatherpageinfo(ctx, glo, page, show); |
936 | if (!allpages) |
937 | { |
938 | fz_write_printf(ctx, out, "Page %d:\n" , page); |
939 | printinfo(ctx, glo, filename, show, page); |
940 | fz_write_printf(ctx, out, "\n" ); |
941 | clearinfo(ctx, glo); |
942 | } |
943 | } |
944 | } |
945 | |
946 | if (allpages) |
947 | printinfo(ctx, glo, filename, show, -1); |
948 | } |
949 | |
950 | static void |
951 | pdfinfo_info(fz_context *ctx, fz_output *out, char *filename, char *password, int show, char *argv[], int argc) |
952 | { |
953 | enum { NO_FILE_OPENED, NO_INFO_GATHERED, INFO_SHOWN } state; |
954 | int argidx = 0; |
955 | globals glo = { 0 }; |
956 | |
957 | glo.out = out; |
958 | glo.ctx = ctx; |
959 | |
960 | state = NO_FILE_OPENED; |
961 | |
962 | fz_try(ctx) |
963 | { |
964 | while (argidx < argc) |
965 | { |
966 | if (state == NO_FILE_OPENED || !fz_is_page_range(ctx, argv[argidx])) |
967 | { |
968 | if (state == NO_INFO_GATHERED) |
969 | { |
970 | showinfo(ctx, &glo, filename, show, "1-N" ); |
971 | } |
972 | |
973 | closexref(ctx, &glo); |
974 | |
975 | filename = argv[argidx]; |
976 | fz_write_printf(ctx, out, "%s:\n" , filename); |
977 | glo.doc = pdf_open_document(glo.ctx, filename); |
978 | if (pdf_needs_password(ctx, glo.doc)) |
979 | if (!pdf_authenticate_password(ctx, glo.doc, password)) |
980 | fz_throw(glo.ctx, FZ_ERROR_GENERIC, "cannot authenticate password: %s" , filename); |
981 | glo.pagecount = pdf_count_pages(ctx, glo.doc); |
982 | |
983 | showglobalinfo(ctx, &glo); |
984 | state = NO_INFO_GATHERED; |
985 | } |
986 | else |
987 | { |
988 | showinfo(ctx, &glo, filename, show, argv[argidx]); |
989 | state = INFO_SHOWN; |
990 | } |
991 | |
992 | argidx++; |
993 | } |
994 | |
995 | if (state == NO_INFO_GATHERED) |
996 | showinfo(ctx, &glo, filename, show, "1-N" ); |
997 | } |
998 | fz_always(ctx) |
999 | closexref(ctx, &glo); |
1000 | fz_catch(ctx) |
1001 | fz_rethrow(ctx); |
1002 | } |
1003 | |
1004 | int pdfinfo_main(int argc, char **argv) |
1005 | { |
1006 | char *filename = "" ; |
1007 | char *password = "" ; |
1008 | int show = ALL; |
1009 | int c; |
1010 | int ret; |
1011 | fz_context *ctx; |
1012 | |
1013 | while ((c = fz_getopt(argc, argv, "FISPXMp:" )) != -1) |
1014 | { |
1015 | switch (c) |
1016 | { |
1017 | case 'F': if (show == ALL) show = FONTS; else show |= FONTS; break; |
1018 | case 'I': if (show == ALL) show = IMAGES; else show |= IMAGES; break; |
1019 | case 'S': if (show == ALL) show = SHADINGS; else show |= SHADINGS; break; |
1020 | case 'P': if (show == ALL) show = PATTERNS; else show |= PATTERNS; break; |
1021 | case 'X': if (show == ALL) show = XOBJS; else show |= XOBJS; break; |
1022 | case 'M': if (show == ALL) show = DIMENSIONS; else show |= DIMENSIONS; break; |
1023 | case 'p': password = fz_optarg; break; |
1024 | default: |
1025 | infousage(); |
1026 | break; |
1027 | } |
1028 | } |
1029 | |
1030 | if (fz_optind == argc) |
1031 | infousage(); |
1032 | |
1033 | ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); |
1034 | if (!ctx) |
1035 | { |
1036 | fprintf(stderr, "cannot initialise context\n" ); |
1037 | exit(1); |
1038 | } |
1039 | |
1040 | ret = 0; |
1041 | fz_try(ctx) |
1042 | pdfinfo_info(ctx, fz_stdout(ctx), filename, password, show, &argv[fz_optind], argc-fz_optind); |
1043 | fz_catch(ctx) |
1044 | ret = 1; |
1045 | fz_drop_context(ctx); |
1046 | return ret; |
1047 | } |
1048 | |