1 | #include "mupdf/fitz.h" |
2 | #include "mupdf/pdf.h" |
3 | |
4 | #include <string.h> |
5 | |
6 | /* |
7 | * CMap parser |
8 | */ |
9 | |
10 | static int |
11 | pdf_code_from_string(char *buf, int len) |
12 | { |
13 | unsigned int a = 0; |
14 | while (len--) |
15 | a = (a << 8) | *(unsigned char *)buf++; |
16 | return a; |
17 | } |
18 | |
19 | static void |
20 | pdf_parse_cmap_name(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf) |
21 | { |
22 | pdf_token tok; |
23 | |
24 | tok = pdf_lex(ctx, file, buf); |
25 | |
26 | if (tok == PDF_TOK_NAME) |
27 | fz_strlcpy(cmap->cmap_name, buf->scratch, sizeof(cmap->cmap_name)); |
28 | else |
29 | fz_warn(ctx, "expected name after CMapName in cmap" ); |
30 | } |
31 | |
32 | static void |
33 | pdf_parse_wmode(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf) |
34 | { |
35 | pdf_token tok; |
36 | |
37 | tok = pdf_lex(ctx, file, buf); |
38 | |
39 | if (tok == PDF_TOK_INT) |
40 | pdf_set_cmap_wmode(ctx, cmap, buf->i); |
41 | else |
42 | fz_warn(ctx, "expected integer after WMode in cmap" ); |
43 | } |
44 | |
45 | static void |
46 | pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf) |
47 | { |
48 | pdf_token tok; |
49 | int lo, hi; |
50 | |
51 | while (1) |
52 | { |
53 | tok = pdf_lex(ctx, file, buf); |
54 | |
55 | if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endcodespacerange" )) |
56 | return; |
57 | |
58 | else if (tok == PDF_TOK_STRING) |
59 | { |
60 | lo = pdf_code_from_string(buf->scratch, buf->len); |
61 | tok = pdf_lex(ctx, file, buf); |
62 | if (tok == PDF_TOK_STRING) |
63 | { |
64 | hi = pdf_code_from_string(buf->scratch, buf->len); |
65 | pdf_add_codespace(ctx, cmap, lo, hi, buf->len); |
66 | } |
67 | else break; |
68 | } |
69 | |
70 | else break; |
71 | } |
72 | |
73 | fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endcodespacerange" ); |
74 | } |
75 | |
76 | static void |
77 | pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf) |
78 | { |
79 | pdf_token tok; |
80 | int lo, hi, dst; |
81 | |
82 | while (1) |
83 | { |
84 | tok = pdf_lex(ctx, file, buf); |
85 | |
86 | if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endcidrange" )) |
87 | return; |
88 | |
89 | else if (tok != PDF_TOK_STRING) |
90 | fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endcidrange" ); |
91 | |
92 | lo = pdf_code_from_string(buf->scratch, buf->len); |
93 | |
94 | tok = pdf_lex(ctx, file, buf); |
95 | if (tok != PDF_TOK_STRING) |
96 | fz_throw(ctx, FZ_ERROR_GENERIC, "expected string" ); |
97 | |
98 | hi = pdf_code_from_string(buf->scratch, buf->len); |
99 | |
100 | tok = pdf_lex(ctx, file, buf); |
101 | if (tok != PDF_TOK_INT) |
102 | fz_throw(ctx, FZ_ERROR_GENERIC, "expected integer" ); |
103 | |
104 | dst = buf->i; |
105 | |
106 | pdf_map_range_to_range(ctx, cmap, lo, hi, dst); |
107 | } |
108 | } |
109 | |
110 | static void |
111 | pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf) |
112 | { |
113 | pdf_token tok; |
114 | int src, dst; |
115 | |
116 | while (1) |
117 | { |
118 | tok = pdf_lex(ctx, file, buf); |
119 | |
120 | if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endcidchar" )) |
121 | return; |
122 | |
123 | else if (tok != PDF_TOK_STRING) |
124 | fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endcidchar" ); |
125 | |
126 | src = pdf_code_from_string(buf->scratch, buf->len); |
127 | |
128 | tok = pdf_lex(ctx, file, buf); |
129 | if (tok != PDF_TOK_INT) |
130 | fz_throw(ctx, FZ_ERROR_GENERIC, "expected integer" ); |
131 | |
132 | dst = buf->i; |
133 | |
134 | pdf_map_range_to_range(ctx, cmap, src, src, dst); |
135 | } |
136 | } |
137 | |
138 | static void |
139 | pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf, int lo, int hi) |
140 | { |
141 | pdf_token tok; |
142 | int dst[256]; |
143 | int i; |
144 | |
145 | while (1) |
146 | { |
147 | tok = pdf_lex(ctx, file, buf); |
148 | |
149 | if (tok == PDF_TOK_CLOSE_ARRAY) |
150 | return; |
151 | |
152 | /* Note: does not handle [ /Name /Name ... ] */ |
153 | else if (tok != PDF_TOK_STRING) |
154 | fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or ]" ); |
155 | |
156 | if (buf->len / 2) |
157 | { |
158 | int len = fz_mini(buf->len / 2, nelem(dst)); |
159 | for (i = 0; i < len; i++) |
160 | dst[i] = pdf_code_from_string(&buf->scratch[i * 2], 2); |
161 | |
162 | pdf_map_one_to_many(ctx, cmap, lo, dst, buf->len / 2); |
163 | } |
164 | |
165 | lo ++; |
166 | } |
167 | } |
168 | |
169 | static void |
170 | pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf) |
171 | { |
172 | pdf_token tok; |
173 | int lo, hi, dst; |
174 | |
175 | while (1) |
176 | { |
177 | tok = pdf_lex(ctx, file, buf); |
178 | |
179 | if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endbfrange" )) |
180 | return; |
181 | |
182 | else if (tok != PDF_TOK_STRING) |
183 | fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endbfrange" ); |
184 | |
185 | lo = pdf_code_from_string(buf->scratch, buf->len); |
186 | |
187 | tok = pdf_lex(ctx, file, buf); |
188 | if (tok != PDF_TOK_STRING) |
189 | fz_throw(ctx, FZ_ERROR_GENERIC, "expected string" ); |
190 | |
191 | hi = pdf_code_from_string(buf->scratch, buf->len); |
192 | if (lo < 0 || lo > 65535 || hi < 0 || hi > 65535 || lo > hi) |
193 | { |
194 | fz_warn(ctx, "bf_range limits out of range in cmap %s" , cmap->cmap_name); |
195 | return; |
196 | } |
197 | |
198 | tok = pdf_lex(ctx, file, buf); |
199 | |
200 | if (tok == PDF_TOK_STRING) |
201 | { |
202 | if (buf->len == 2) |
203 | { |
204 | dst = pdf_code_from_string(buf->scratch, buf->len); |
205 | pdf_map_range_to_range(ctx, cmap, lo, hi, dst); |
206 | } |
207 | else |
208 | { |
209 | int dststr[256]; |
210 | int i; |
211 | |
212 | if (buf->len / 2) |
213 | { |
214 | int len = fz_mini(buf->len / 2, nelem(dststr)); |
215 | for (i = 0; i < len; i++) |
216 | dststr[i] = pdf_code_from_string(&buf->scratch[i * 2], 2); |
217 | |
218 | while (lo <= hi) |
219 | { |
220 | pdf_map_one_to_many(ctx, cmap, lo, dststr, i); |
221 | dststr[i-1] ++; |
222 | lo ++; |
223 | } |
224 | } |
225 | } |
226 | } |
227 | |
228 | else if (tok == PDF_TOK_OPEN_ARRAY) |
229 | { |
230 | pdf_parse_bf_range_array(ctx, cmap, file, buf, lo, hi); |
231 | } |
232 | |
233 | else |
234 | { |
235 | fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or array or endbfrange" ); |
236 | } |
237 | } |
238 | } |
239 | |
240 | static void |
241 | pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf) |
242 | { |
243 | pdf_token tok; |
244 | int dst[256]; |
245 | int src; |
246 | int i; |
247 | |
248 | while (1) |
249 | { |
250 | tok = pdf_lex(ctx, file, buf); |
251 | |
252 | if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endbfchar" )) |
253 | return; |
254 | |
255 | else if (tok != PDF_TOK_STRING) |
256 | fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endbfchar" ); |
257 | |
258 | src = pdf_code_from_string(buf->scratch, buf->len); |
259 | |
260 | tok = pdf_lex(ctx, file, buf); |
261 | /* Note: does not handle /dstName */ |
262 | if (tok != PDF_TOK_STRING) |
263 | fz_throw(ctx, FZ_ERROR_GENERIC, "expected string" ); |
264 | |
265 | if (buf->len / 2) |
266 | { |
267 | int len = fz_mini(buf->len / 2, nelem(dst)); |
268 | for (i = 0; i < len; i++) |
269 | dst[i] = pdf_code_from_string(&buf->scratch[i * 2], 2); |
270 | pdf_map_one_to_many(ctx, cmap, src, dst, i); |
271 | } |
272 | } |
273 | } |
274 | |
275 | pdf_cmap * |
276 | pdf_load_cmap(fz_context *ctx, fz_stream *file) |
277 | { |
278 | pdf_cmap *cmap; |
279 | char key[64]; |
280 | pdf_lexbuf buf; |
281 | pdf_token tok; |
282 | |
283 | pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL); |
284 | cmap = pdf_new_cmap(ctx); |
285 | |
286 | strcpy(key, ".notdef" ); |
287 | |
288 | fz_try(ctx) |
289 | { |
290 | while (1) |
291 | { |
292 | tok = pdf_lex(ctx, file, &buf); |
293 | |
294 | if (tok == PDF_TOK_EOF) |
295 | break; |
296 | |
297 | else if (tok == PDF_TOK_NAME) |
298 | { |
299 | if (!strcmp(buf.scratch, "CMapName" )) |
300 | pdf_parse_cmap_name(ctx, cmap, file, &buf); |
301 | else if (!strcmp(buf.scratch, "WMode" )) |
302 | pdf_parse_wmode(ctx, cmap, file, &buf); |
303 | else |
304 | fz_strlcpy(key, buf.scratch, sizeof key); |
305 | } |
306 | |
307 | else if (tok == PDF_TOK_KEYWORD) |
308 | { |
309 | if (!strcmp(buf.scratch, "endcmap" )) |
310 | break; |
311 | |
312 | else if (!strcmp(buf.scratch, "usecmap" )) |
313 | fz_strlcpy(cmap->usecmap_name, key, sizeof(cmap->usecmap_name)); |
314 | |
315 | else if (!strcmp(buf.scratch, "begincodespacerange" )) |
316 | pdf_parse_codespace_range(ctx, cmap, file, &buf); |
317 | |
318 | else if (!strcmp(buf.scratch, "beginbfchar" )) |
319 | pdf_parse_bf_char(ctx, cmap, file, &buf); |
320 | |
321 | else if (!strcmp(buf.scratch, "begincidchar" )) |
322 | pdf_parse_cid_char(ctx, cmap, file, &buf); |
323 | |
324 | else if (!strcmp(buf.scratch, "beginbfrange" )) |
325 | pdf_parse_bf_range(ctx, cmap, file, &buf); |
326 | |
327 | else if (!strcmp(buf.scratch, "begincidrange" )) |
328 | pdf_parse_cid_range(ctx, cmap, file, &buf); |
329 | } |
330 | |
331 | /* ignore everything else */ |
332 | } |
333 | |
334 | pdf_sort_cmap(ctx, cmap); |
335 | } |
336 | fz_always(ctx) |
337 | { |
338 | pdf_lexbuf_fin(ctx, &buf); |
339 | } |
340 | fz_catch(ctx) |
341 | { |
342 | pdf_drop_cmap(ctx, cmap); |
343 | fz_rethrow(ctx); |
344 | } |
345 | |
346 | return cmap; |
347 | } |
348 | |