1/*
2 * PDF cleaning tool: general purpose pdf syntax washer.
3 *
4 * Rewrite PDF with pretty printed objects.
5 * Garbage collect unreachable objects.
6 * Inflate compressed streams.
7 * Create subset documents.
8 *
9 * TODO: linearize document for fast web view
10 */
11
12#include "mupdf/fitz.h"
13#include "mupdf/pdf.h"
14
15#include <string.h>
16#include <stdlib.h>
17#include <stdio.h>
18
19static void usage(void)
20{
21 fprintf(stderr,
22 "usage: mutool clean [options] input.pdf [output.pdf] [pages]\n"
23 "\t-p -\tpassword\n"
24 "\t-g\tgarbage collect unused objects\n"
25 "\t-gg\tin addition to -g compact xref table\n"
26 "\t-ggg\tin addition to -gg merge duplicate objects\n"
27 "\t-gggg\tin addition to -ggg check streams for duplication\n"
28 "\t-l\tlinearize PDF\n"
29 "\t-D\tsave file without encryption\n"
30 "\t-E -\tsave file with new encryption (rc4-40, rc4-128, aes-128, or aes-256)\n"
31 "\t-O -\towner password (only if encrypting)\n"
32 "\t-U -\tuser password (only if encrypting)\n"
33 "\t-P -\tpermission flags (only if encrypting)\n"
34 "\t-a\tascii hex encode binary streams\n"
35 "\t-d\tdecompress streams\n"
36 "\t-z\tdeflate uncompressed streams\n"
37 "\t-f\tcompress font streams\n"
38 "\t-i\tcompress image streams\n"
39 "\t-c\tclean content streams\n"
40 "\t-s\tsanitize content streams\n"
41 "\t-A\tcreate appearance streams for annotations\n"
42 "\t-AA\trecreate appearance streams for annotations\n"
43 "\tpages\tcomma separated list of page numbers and ranges\n"
44 );
45 exit(1);
46}
47
48static int encrypt_method_from_string(const char *name)
49{
50 if (!strcmp(name, "rc4-40")) return PDF_ENCRYPT_RC4_40;
51 if (!strcmp(name, "rc4-128")) return PDF_ENCRYPT_RC4_128;
52 if (!strcmp(name, "aes-128")) return PDF_ENCRYPT_AES_128;
53 if (!strcmp(name, "aes-256")) return PDF_ENCRYPT_AES_256;
54 return PDF_ENCRYPT_UNKNOWN;
55}
56
57int pdfclean_main(int argc, char **argv)
58{
59 char *infile;
60 char *outfile = "out.pdf";
61 char *password = "";
62 int c;
63 pdf_write_options opts = pdf_default_write_options;
64 int errors = 0;
65 fz_context *ctx;
66
67 while ((c = fz_getopt(argc, argv, "adfgilp:sczDAE:O:U:P:")) != -1)
68 {
69 switch (c)
70 {
71 case 'p': password = fz_optarg; break;
72
73 case 'd': opts.do_decompress += 1; break;
74 case 'z': opts.do_compress += 1; break;
75 case 'f': opts.do_compress_fonts += 1; break;
76 case 'i': opts.do_compress_images += 1; break;
77 case 'a': opts.do_ascii += 1; break;
78 case 'g': opts.do_garbage += 1; break;
79 case 'l': opts.do_linear += 1; break;
80 case 'c': opts.do_clean += 1; break;
81 case 's': opts.do_sanitize += 1; break;
82 case 'A': opts.do_appearance += 1; break;
83
84 case 'D': opts.do_encrypt = PDF_ENCRYPT_NONE; break;
85 case 'E': opts.do_encrypt = encrypt_method_from_string(fz_optarg); break;
86 case 'P': opts.permissions = fz_atoi(fz_optarg); break;
87 case 'O': fz_strlcpy(opts.opwd_utf8, fz_optarg, sizeof opts.opwd_utf8); break;
88 case 'U': fz_strlcpy(opts.upwd_utf8, fz_optarg, sizeof opts.upwd_utf8); break;
89
90 default: usage(); break;
91 }
92 }
93
94 if ((opts.do_ascii || opts.do_decompress) && !opts.do_compress)
95 opts.do_pretty = 1;
96
97 if (argc - fz_optind < 1)
98 usage();
99
100 infile = argv[fz_optind++];
101
102 if (argc - fz_optind > 0 &&
103 (strstr(argv[fz_optind], ".pdf") || strstr(argv[fz_optind], ".PDF")))
104 {
105 outfile = argv[fz_optind++];
106 }
107
108 ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
109 if (!ctx)
110 {
111 fprintf(stderr, "cannot initialise context\n");
112 exit(1);
113 }
114
115 fz_try(ctx)
116 {
117 pdf_clean_file(ctx, infile, outfile, password, &opts, &argv[fz_optind], argc - fz_optind);
118 }
119 fz_catch(ctx)
120 {
121 errors++;
122 }
123 fz_drop_context(ctx);
124
125 return errors != 0;
126}
127