1 | /* |
2 | * PDF cleaning tool: general purpose pdf syntax washer. |
3 | * |
4 | * Rewrite PDF with pretty printed objects. |
5 | * Garbage collect unreachable objects. |
6 | * Inflate compressed streams. |
7 | * Create subset documents. |
8 | * |
9 | * TODO: linearize document for fast web view |
10 | */ |
11 | |
12 | #include "mupdf/fitz.h" |
13 | #include "mupdf/pdf.h" |
14 | |
15 | #include <string.h> |
16 | #include <stdlib.h> |
17 | #include <stdio.h> |
18 | |
19 | static void usage(void) |
20 | { |
21 | fprintf(stderr, |
22 | "usage: mutool clean [options] input.pdf [output.pdf] [pages]\n" |
23 | "\t-p -\tpassword\n" |
24 | "\t-g\tgarbage collect unused objects\n" |
25 | "\t-gg\tin addition to -g compact xref table\n" |
26 | "\t-ggg\tin addition to -gg merge duplicate objects\n" |
27 | "\t-gggg\tin addition to -ggg check streams for duplication\n" |
28 | "\t-l\tlinearize PDF\n" |
29 | "\t-D\tsave file without encryption\n" |
30 | "\t-E -\tsave file with new encryption (rc4-40, rc4-128, aes-128, or aes-256)\n" |
31 | "\t-O -\towner password (only if encrypting)\n" |
32 | "\t-U -\tuser password (only if encrypting)\n" |
33 | "\t-P -\tpermission flags (only if encrypting)\n" |
34 | "\t-a\tascii hex encode binary streams\n" |
35 | "\t-d\tdecompress streams\n" |
36 | "\t-z\tdeflate uncompressed streams\n" |
37 | "\t-f\tcompress font streams\n" |
38 | "\t-i\tcompress image streams\n" |
39 | "\t-c\tclean content streams\n" |
40 | "\t-s\tsanitize content streams\n" |
41 | "\t-A\tcreate appearance streams for annotations\n" |
42 | "\t-AA\trecreate appearance streams for annotations\n" |
43 | "\tpages\tcomma separated list of page numbers and ranges\n" |
44 | ); |
45 | exit(1); |
46 | } |
47 | |
48 | static int encrypt_method_from_string(const char *name) |
49 | { |
50 | if (!strcmp(name, "rc4-40" )) return PDF_ENCRYPT_RC4_40; |
51 | if (!strcmp(name, "rc4-128" )) return PDF_ENCRYPT_RC4_128; |
52 | if (!strcmp(name, "aes-128" )) return PDF_ENCRYPT_AES_128; |
53 | if (!strcmp(name, "aes-256" )) return PDF_ENCRYPT_AES_256; |
54 | return PDF_ENCRYPT_UNKNOWN; |
55 | } |
56 | |
57 | int pdfclean_main(int argc, char **argv) |
58 | { |
59 | char *infile; |
60 | char *outfile = "out.pdf" ; |
61 | char *password = "" ; |
62 | int c; |
63 | pdf_write_options opts = pdf_default_write_options; |
64 | int errors = 0; |
65 | fz_context *ctx; |
66 | |
67 | while ((c = fz_getopt(argc, argv, "adfgilp:sczDAE:O:U:P:" )) != -1) |
68 | { |
69 | switch (c) |
70 | { |
71 | case 'p': password = fz_optarg; break; |
72 | |
73 | case 'd': opts.do_decompress += 1; break; |
74 | case 'z': opts.do_compress += 1; break; |
75 | case 'f': opts.do_compress_fonts += 1; break; |
76 | case 'i': opts.do_compress_images += 1; break; |
77 | case 'a': opts.do_ascii += 1; break; |
78 | case 'g': opts.do_garbage += 1; break; |
79 | case 'l': opts.do_linear += 1; break; |
80 | case 'c': opts.do_clean += 1; break; |
81 | case 's': opts.do_sanitize += 1; break; |
82 | case 'A': opts.do_appearance += 1; break; |
83 | |
84 | case 'D': opts.do_encrypt = PDF_ENCRYPT_NONE; break; |
85 | case 'E': opts.do_encrypt = encrypt_method_from_string(fz_optarg); break; |
86 | case 'P': opts.permissions = fz_atoi(fz_optarg); break; |
87 | case 'O': fz_strlcpy(opts.opwd_utf8, fz_optarg, sizeof opts.opwd_utf8); break; |
88 | case 'U': fz_strlcpy(opts.upwd_utf8, fz_optarg, sizeof opts.upwd_utf8); break; |
89 | |
90 | default: usage(); break; |
91 | } |
92 | } |
93 | |
94 | if ((opts.do_ascii || opts.do_decompress) && !opts.do_compress) |
95 | opts.do_pretty = 1; |
96 | |
97 | if (argc - fz_optind < 1) |
98 | usage(); |
99 | |
100 | infile = argv[fz_optind++]; |
101 | |
102 | if (argc - fz_optind > 0 && |
103 | (strstr(argv[fz_optind], ".pdf" ) || strstr(argv[fz_optind], ".PDF" ))) |
104 | { |
105 | outfile = argv[fz_optind++]; |
106 | } |
107 | |
108 | ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); |
109 | if (!ctx) |
110 | { |
111 | fprintf(stderr, "cannot initialise context\n" ); |
112 | exit(1); |
113 | } |
114 | |
115 | fz_try(ctx) |
116 | { |
117 | pdf_clean_file(ctx, infile, outfile, password, &opts, &argv[fz_optind], argc - fz_optind); |
118 | } |
119 | fz_catch(ctx) |
120 | { |
121 | errors++; |
122 | } |
123 | fz_drop_context(ctx); |
124 | |
125 | return errors != 0; |
126 | } |
127 | |