1/*
2 * pdfshow -- the ultimate pdf debugging tool
3 */
4
5#include "mupdf/fitz.h"
6#include "mupdf/pdf.h"
7
8#include <stdlib.h>
9#include <stdio.h>
10#include <string.h>
11
12static pdf_document *doc = NULL;
13static fz_context *ctx = NULL;
14static fz_output *out = NULL;
15static int showbinary = 0;
16static int showdecode = 1;
17static int tight = 0;
18static int showcolumn;
19
20static void usage(void)
21{
22 fprintf(stderr,
23 "usage: mutool show [options] file.pdf ( xref | outline | grep | js | <path> ) *\n"
24 "\t-p -\tpassword\n"
25 "\t-o -\toutput file\n"
26 "\t-e\tleave stream contents in their original form\n"
27 "\t-b\tprint only stream contents, as raw binary data\n"
28 "\t-g\tprint only object, one line per object, suitable for grep\n"
29 "\tpath: path to an object, starting with either an object number,\n"
30 "\t\t'pages', 'trailer', or a property in the trailer;\n"
31 "\t\tpath elements separated by '.' or '/'. Path elements must be\n"
32 "\t\tarray index numbers, dictionary property names, or '*'.\n"
33 );
34 exit(1);
35}
36
37static void showtrailer(void)
38{
39 if (tight)
40 fz_write_printf(ctx, out, "trailer ");
41 else
42 fz_write_printf(ctx, out, "trailer\n");
43 pdf_print_obj(ctx, out, pdf_trailer(ctx, doc), tight, 1);
44 fz_write_printf(ctx, out, "\n");
45}
46
47static void showxref(void)
48{
49 int i;
50 int xref_len = pdf_xref_len(ctx, doc);
51 fz_write_printf(ctx, out, "xref\n0 %d\n", xref_len);
52 for (i = 0; i < xref_len; i++)
53 {
54 pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, i);
55 fz_write_printf(ctx, out, "%05d: %010d %05d %c \n",
56 i,
57 (int)entry->ofs,
58 entry->gen,
59 entry->type ? entry->type : '-');
60 }
61}
62
63static void showpages(void)
64{
65 pdf_obj *ref;
66 int i, n = pdf_count_pages(ctx, doc);
67 for (i = 0; i < n; ++i)
68 {
69 ref = pdf_lookup_page_obj(ctx, doc, i);
70 fz_write_printf(ctx, out, "page %d = %d 0 R\n", i + 1, pdf_to_num(ctx, ref));
71 }
72}
73
74static void showsafe(unsigned char *buf, size_t n)
75{
76 size_t i;
77 for (i = 0; i < n; i++) {
78 if (buf[i] == '\r' || buf[i] == '\n') {
79 putchar('\n');
80 showcolumn = 0;
81 }
82 else if (buf[i] < 32 || buf[i] > 126) {
83 putchar('.');
84 showcolumn ++;
85 }
86 else {
87 putchar(buf[i]);
88 showcolumn ++;
89 }
90 if (showcolumn == 79) {
91 putchar('\n');
92 showcolumn = 0;
93 }
94 }
95}
96
97static void showstream(int num)
98{
99 fz_stream *stm;
100 unsigned char buf[2048];
101 size_t n;
102
103 showcolumn = 0;
104
105 if (showdecode)
106 stm = pdf_open_stream_number(ctx, doc, num);
107 else
108 stm = pdf_open_raw_stream_number(ctx, doc, num);
109
110 while (1)
111 {
112 n = fz_read(ctx, stm, buf, sizeof buf);
113 if (n == 0)
114 break;
115 if (showbinary)
116 fz_write_data(ctx, out, buf, n);
117 else
118 showsafe(buf, n);
119 }
120
121 fz_drop_stream(ctx, stm);
122}
123
124static void showobject(pdf_obj *ref)
125{
126 pdf_obj *obj = pdf_resolve_indirect(ctx, ref);
127 int num = pdf_to_num(ctx, ref);
128 if (pdf_is_stream(ctx, ref))
129 {
130 if (showbinary)
131 {
132 showstream(num);
133 }
134 else
135 {
136 if (tight)
137 {
138 fz_write_printf(ctx, out, "%d 0 obj ", num);
139 pdf_print_obj(ctx, out, obj, 1, 1);
140 fz_write_printf(ctx, out, " stream\n");
141 }
142 else
143 {
144 fz_write_printf(ctx, out, "%d 0 obj\n", num);
145 pdf_print_obj(ctx, out, obj, 0, 1);
146 fz_write_printf(ctx, out, "\nstream\n");
147 showstream(num);
148 fz_write_printf(ctx, out, "endstream\n");
149 fz_write_printf(ctx, out, "endobj\n");
150 }
151 }
152 }
153 else
154 {
155 if (tight)
156 {
157 fz_write_printf(ctx, out, "%d 0 obj ", num);
158 pdf_print_obj(ctx, out, obj, 1, 1);
159 fz_write_printf(ctx, out, "\n");
160 }
161 else
162 {
163 fz_write_printf(ctx, out, "%d 0 obj\n", num);
164 pdf_print_obj(ctx, out, obj, 0, 1);
165 fz_write_printf(ctx, out, "\nendobj\n");
166 }
167 }
168}
169
170static void showgrep(void)
171{
172 pdf_obj *ref, *obj;
173 int i, len;
174
175 len = pdf_count_objects(ctx, doc);
176 for (i = 0; i < len; i++)
177 {
178 pdf_xref_entry *entry = pdf_get_xref_entry(ctx, doc, i);
179 if (entry->type == 'n' || entry->type == 'o')
180 {
181 fz_try(ctx)
182 {
183 ref = pdf_new_indirect(ctx, doc, i, 0);
184 obj = pdf_resolve_indirect(ctx, ref);
185 }
186 fz_catch(ctx)
187 {
188 pdf_drop_obj(ctx, ref);
189 fz_warn(ctx, "skipping object (%d 0 R)", i);
190 continue;
191 }
192
193 pdf_sort_dict(ctx, obj);
194
195 fz_write_printf(ctx, out, "%d 0 obj ", i);
196 pdf_print_obj(ctx, out, obj, 1, 1);
197 if (pdf_is_stream(ctx, ref))
198 fz_write_printf(ctx, out, " stream");
199 fz_write_printf(ctx, out, "\n");
200
201 pdf_drop_obj(ctx, ref);
202 }
203 }
204
205 fz_write_printf(ctx, out, "trailer ");
206 pdf_print_obj(ctx, out, pdf_trailer(ctx, doc), 1, 1);
207 fz_write_printf(ctx, out, "\n");
208}
209
210static void
211fz_print_outline(fz_context *ctx, fz_output *out, fz_outline *outline, int level)
212{
213 int i;
214 while (outline)
215 {
216 if (outline->down)
217 fz_write_byte(ctx, out, outline->is_open ? '-' : '+');
218 else
219 fz_write_byte(ctx, out, '|');
220
221 for (i = 0; i < level; i++)
222 fz_write_byte(ctx, out, '\t');
223 fz_write_printf(ctx, out, "%q\t%s\n", outline->title, outline->uri);
224 if (outline->down)
225 fz_print_outline(ctx, out, outline->down, level + 1);
226 outline = outline->next;
227 }
228}
229
230static void showoutline(void)
231{
232 fz_outline *outline = fz_load_outline(ctx, (fz_document*)doc);
233 fz_try(ctx)
234 fz_print_outline(ctx, out, outline, 1);
235 fz_always(ctx)
236 fz_drop_outline(ctx, outline);
237 fz_catch(ctx)
238 fz_rethrow(ctx);
239}
240
241static void showtext(char *buf, int indent)
242{
243 int bol = 1;
244 int c = *buf;
245 while (*buf)
246 {
247 c = *buf++;
248 if (c == '\r')
249 {
250 if (*buf == '\n')
251 ++buf;
252 c = '\n';
253 }
254 if (indent && bol)
255 fz_write_byte(ctx, out, '\t');
256 fz_write_byte(ctx, out, c);
257 bol = (c == '\n');
258 }
259 if (!bol)
260 fz_write_byte(ctx, out, '\n');
261}
262
263static void showjs(void)
264{
265 pdf_obj *tree;
266 int i;
267
268 tree = pdf_load_name_tree(ctx, doc, PDF_NAME(JavaScript));
269 for (i = 0; i < pdf_dict_len(ctx, tree); ++i)
270 {
271 pdf_obj *name = pdf_dict_get_key(ctx, tree, i);
272 pdf_obj *action = pdf_dict_get_val(ctx, tree, i);
273 pdf_obj *js = pdf_dict_get(ctx, action, PDF_NAME(JS));
274 char *src = pdf_load_stream_or_string_as_utf8(ctx, js);
275 fz_write_printf(ctx, out, "// %s\n", pdf_to_name(ctx, name));
276 showtext(src, 0);
277 fz_free(ctx, src);
278 }
279}
280
281static void showaction(pdf_obj *action, const char *name)
282{
283 if (action)
284 {
285 pdf_obj *js = pdf_dict_get(ctx, action, PDF_NAME(JS));
286 if (js)
287 {
288 char *src = pdf_load_stream_or_string_as_utf8(ctx, js);
289 fz_write_printf(ctx, out, " %s: {\n", name);
290 showtext(src, 1);
291 fz_write_printf(ctx, out, " }\n", name);
292 fz_free(ctx, src);
293 }
294 else
295 {
296 fz_write_printf(ctx, out, " %s: ", name);
297 if (pdf_is_indirect(ctx, action))
298 action = pdf_resolve_indirect(ctx, action);
299 pdf_print_obj(ctx, out, action, 1, 1);
300 fz_write_printf(ctx, out, "\n");
301 }
302 }
303}
304
305static void showfield(pdf_obj *field)
306{
307 pdf_obj *kids, *ft, *parent;
308 const char *tu;
309 char *t;
310 int ff;
311 int i, n;
312
313 t = pdf_field_name(ctx, field);
314 tu = pdf_dict_get_text_string(ctx, field, PDF_NAME(TU));
315 ft = pdf_dict_get_inheritable(ctx, field, PDF_NAME(FT));
316 ff = pdf_field_flags(ctx, field);
317 parent = pdf_dict_get(ctx, field, PDF_NAME(Parent));
318
319 fz_write_printf(ctx, out, "field %d\n", pdf_to_num(ctx, field));
320 fz_write_printf(ctx, out, " Type: %s\n", pdf_to_name(ctx, ft));
321 if (ff)
322 {
323 fz_write_printf(ctx, out, " Flags:");
324 if (ff & PDF_FIELD_IS_READ_ONLY) fz_write_string(ctx, out, " readonly");
325 if (ff & PDF_FIELD_IS_REQUIRED) fz_write_string(ctx, out, " required");
326 if (ff & PDF_FIELD_IS_NO_EXPORT) fz_write_string(ctx, out, " noExport");
327 if (ft == PDF_NAME(Btn))
328 {
329 if (ff & PDF_BTN_FIELD_IS_NO_TOGGLE_TO_OFF) fz_write_string(ctx, out, " noToggleToOff");
330 if (ff & PDF_BTN_FIELD_IS_RADIO) fz_write_string(ctx, out, " radio");
331 if (ff & PDF_BTN_FIELD_IS_PUSHBUTTON) fz_write_string(ctx, out, " pushButton");
332 if (ff & PDF_BTN_FIELD_IS_RADIOS_IN_UNISON) fz_write_string(ctx, out, " radiosInUnison");
333 }
334 if (ft == PDF_NAME(Tx))
335 {
336 if (ff & PDF_TX_FIELD_IS_MULTILINE) fz_write_string(ctx, out, " multiline");
337 if (ff & PDF_TX_FIELD_IS_PASSWORD) fz_write_string(ctx, out, " password");
338 if (ff & PDF_TX_FIELD_IS_FILE_SELECT) fz_write_string(ctx, out, " fileSelect");
339 if (ff & PDF_TX_FIELD_IS_DO_NOT_SPELL_CHECK) fz_write_string(ctx, out, " dontSpellCheck");
340 if (ff & PDF_TX_FIELD_IS_DO_NOT_SCROLL) fz_write_string(ctx, out, " dontScroll");
341 if (ff & PDF_TX_FIELD_IS_COMB) fz_write_string(ctx, out, " comb");
342 if (ff & PDF_TX_FIELD_IS_RICH_TEXT) fz_write_string(ctx, out, " richText");
343 }
344 if (ft == PDF_NAME(Ch))
345 {
346 if (ff & PDF_CH_FIELD_IS_COMBO) fz_write_string(ctx, out, " combo");
347 if (ff & PDF_CH_FIELD_IS_EDIT) fz_write_string(ctx, out, " edit");
348 if (ff & PDF_CH_FIELD_IS_SORT) fz_write_string(ctx, out, " sort");
349 if (ff & PDF_CH_FIELD_IS_MULTI_SELECT) fz_write_string(ctx, out, " multiSelect");
350 if (ff & PDF_CH_FIELD_IS_DO_NOT_SPELL_CHECK) fz_write_string(ctx, out, " dontSpellCheck");
351 if (ff & PDF_CH_FIELD_IS_COMMIT_ON_SEL_CHANGE) fz_write_string(ctx, out, " commitOnSelChange");
352 }
353 fz_write_string(ctx, out, "\n");
354 }
355 fz_write_printf(ctx, out, " Name: %(\n", t);
356 if (*tu)
357 fz_write_printf(ctx, out, " Label: %q\n", tu);
358 if (parent)
359 fz_write_printf(ctx, out, " Parent: %d\n", pdf_to_num(ctx, parent));
360
361 showaction(pdf_dict_getp(ctx, field, "A"), "Action");
362
363 showaction(pdf_dict_getp(ctx, field, "AA/K"), "Keystroke");
364 showaction(pdf_dict_getp(ctx, field, "AA/V"), "Validate");
365 showaction(pdf_dict_getp(ctx, field, "AA/F"), "Format");
366 showaction(pdf_dict_getp(ctx, field, "AA/C"), "Calculate");
367
368 showaction(pdf_dict_getp(ctx, field, "AA/E"), "Enter");
369 showaction(pdf_dict_getp(ctx, field, "AA/X"), "Exit");
370 showaction(pdf_dict_getp(ctx, field, "AA/D"), "Down");
371 showaction(pdf_dict_getp(ctx, field, "AA/U"), "Up");
372 showaction(pdf_dict_getp(ctx, field, "AA/Fo"), "Focus");
373 showaction(pdf_dict_getp(ctx, field, "AA/Bl"), "Blur");
374
375 fz_write_string(ctx, out, "\n");
376
377 kids = pdf_dict_get(ctx, field, PDF_NAME(Kids));
378 n = pdf_array_len(ctx, kids);
379 for (i = 0; i < n; ++i)
380 showfield(pdf_array_get(ctx, kids, i));
381}
382
383static void showform(void)
384{
385 pdf_obj *fields;
386 int i, n;
387
388 fields = pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/AcroForm/Fields");
389 n = pdf_array_len(ctx, fields);
390 for (i = 0; i < n; ++i)
391 showfield(pdf_array_get(ctx, fields, i));
392}
393
394#define SEP ".[]/"
395
396static int isnumber(char *s)
397{
398 while (*s)
399 {
400 if (*s < '0' || *s > '9')
401 return 0;
402 ++s;
403 }
404 return 1;
405}
406
407static void showpath(char *path, pdf_obj *obj)
408{
409 if (path && path[0])
410 {
411 char *part = fz_strsep(&path, SEP);
412 if (part && part[0])
413 {
414 if (!strcmp(part, "*"))
415 {
416 int i, n;
417 char buf[1000];
418 if (pdf_is_array(ctx, obj))
419 {
420 n = pdf_array_len(ctx, obj);
421 for (i = 0; i < n; ++i)
422 {
423 if (path)
424 {
425 fz_strlcpy(buf, path, sizeof buf);
426 showpath(buf, pdf_array_get(ctx, obj, i));
427 }
428 else
429 showpath(NULL, pdf_array_get(ctx, obj, i));
430 }
431 }
432 else if (pdf_is_dict(ctx, obj))
433 {
434 n = pdf_dict_len(ctx, obj);
435 for (i = 0; i < n; ++i)
436 {
437 if (path)
438 {
439 fz_strlcpy(buf, path, sizeof buf);
440 showpath(buf, pdf_dict_get_val(ctx, obj, i));
441 }
442 else
443 showpath(NULL, pdf_dict_get_val(ctx, obj, i));
444 }
445 }
446 else
447 {
448 fz_write_string(ctx, out, "null\n");
449 }
450 }
451 else if (isnumber(part) && pdf_is_array(ctx, obj))
452 showpath(path, pdf_array_get(ctx, obj, atoi(part)-1));
453 else
454 showpath(path, pdf_dict_gets(ctx, obj, part));
455 }
456 else
457 fz_write_string(ctx, out, "null\n");
458 }
459 else
460 {
461 if (pdf_is_indirect(ctx, obj))
462 showobject(obj);
463 else
464 {
465 pdf_print_obj(ctx, out, obj, tight, 0);
466 fz_write_string(ctx, out, "\n");
467 }
468 }
469}
470
471static void showpathpage(char *path)
472{
473 if (path)
474 {
475 char *part = fz_strsep(&path, SEP);
476 if (part && part[0])
477 {
478 if (!strcmp(part, "*"))
479 {
480 int i, n;
481 char buf[1000];
482 n = pdf_count_pages(ctx, doc);
483 for (i = 0; i < n; ++i)
484 {
485 if (path)
486 {
487 fz_strlcpy(buf, path, sizeof buf);
488 showpath(buf, pdf_lookup_page_obj(ctx, doc, i));
489 }
490 else
491 showpath(NULL, pdf_lookup_page_obj(ctx, doc, i));
492 }
493 }
494 else if (isnumber(part))
495 showpath(path, pdf_lookup_page_obj(ctx, doc, atoi(part)-1));
496 else
497 fz_write_string(ctx, out, "null\n");
498 }
499 else
500 fz_write_string(ctx, out, "null\n");
501 }
502 else
503 {
504 showpages();
505 }
506}
507
508static void showpathroot(char *path)
509{
510 char buf[2000], *list = buf, *part;
511 fz_strlcpy(buf, path, sizeof buf);
512 part = fz_strsep(&list, SEP);
513 if (part && part[0])
514 {
515 if (!strcmp(part, "trailer"))
516 showpath(list, pdf_trailer(ctx, doc));
517 else if (!strcmp(part, "pages"))
518 showpathpage(list);
519 else if (isnumber(part))
520 {
521 pdf_obj *num = pdf_new_indirect(ctx, doc, atoi(part), 0);
522 fz_try(ctx)
523 showpath(list, num);
524 fz_always(ctx)
525 pdf_drop_obj(ctx, num);
526 fz_catch(ctx)
527 ;
528 }
529 else
530 showpath(list, pdf_dict_gets(ctx, pdf_trailer(ctx, doc), part));
531 }
532 else
533 fz_write_string(ctx, out, "null\n");
534}
535
536static void show(char *sel)
537{
538 if (!strcmp(sel, "trailer"))
539 showtrailer();
540 else if (!strcmp(sel, "xref"))
541 showxref();
542 else if (!strcmp(sel, "pages"))
543 showpages();
544 else if (!strcmp(sel, "grep"))
545 showgrep();
546 else if (!strcmp(sel, "outline"))
547 showoutline();
548 else if (!strcmp(sel, "js"))
549 showjs();
550 else if (!strcmp(sel, "form"))
551 showform();
552 else
553 showpathroot(sel);
554}
555
556int pdfshow_main(int argc, char **argv)
557{
558 char *password = NULL; /* don't throw errors if encrypted */
559 char *filename = NULL;
560 char *output = NULL;
561 int c;
562
563 ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
564 if (!ctx)
565 {
566 fprintf(stderr, "cannot initialise context\n");
567 exit(1);
568 }
569
570 while ((c = fz_getopt(argc, argv, "p:o:beg")) != -1)
571 {
572 switch (c)
573 {
574 case 'p': password = fz_optarg; break;
575 case 'o': output = fz_optarg; break;
576 case 'b': showbinary = 1; break;
577 case 'e': showdecode = 0; break;
578 case 'g': tight = 1; break;
579 default: usage(); break;
580 }
581 }
582
583 if (fz_optind == argc)
584 usage();
585
586 filename = argv[fz_optind++];
587
588 if (output)
589 out = fz_new_output_with_path(ctx, output, 0);
590 else
591 out = fz_stdout(ctx);
592
593 fz_var(doc);
594 fz_try(ctx)
595 {
596 doc = pdf_open_document(ctx, filename);
597 if (pdf_needs_password(ctx, doc))
598 if (!pdf_authenticate_password(ctx, doc, password))
599 fz_warn(ctx, "cannot authenticate password: %s", filename);
600
601 if (fz_optind == argc)
602 showtrailer();
603
604 while (fz_optind < argc)
605 show(argv[fz_optind++]);
606
607 fz_close_output(ctx, out);
608 }
609 fz_catch(ctx)
610 {
611 }
612
613 fz_drop_output(ctx, out);
614 pdf_drop_document(ctx, doc);
615 fz_drop_context(ctx);
616 return 0;
617}
618