1#include "mupdf/fitz.h"
2#include "mupdf/pdf.h"
3
4#include <string.h>
5
6/*
7 * CMap parser
8 */
9
10static int
11pdf_code_from_string(char *buf, int len)
12{
13 unsigned int a = 0;
14 while (len--)
15 a = (a << 8) | *(unsigned char *)buf++;
16 return a;
17}
18
19static void
20pdf_parse_cmap_name(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
21{
22 pdf_token tok;
23
24 tok = pdf_lex(ctx, file, buf);
25
26 if (tok == PDF_TOK_NAME)
27 fz_strlcpy(cmap->cmap_name, buf->scratch, sizeof(cmap->cmap_name));
28 else
29 fz_warn(ctx, "expected name after CMapName in cmap");
30}
31
32static void
33pdf_parse_wmode(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
34{
35 pdf_token tok;
36
37 tok = pdf_lex(ctx, file, buf);
38
39 if (tok == PDF_TOK_INT)
40 pdf_set_cmap_wmode(ctx, cmap, buf->i);
41 else
42 fz_warn(ctx, "expected integer after WMode in cmap");
43}
44
45static void
46pdf_parse_codespace_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
47{
48 pdf_token tok;
49 int lo, hi;
50
51 while (1)
52 {
53 tok = pdf_lex(ctx, file, buf);
54
55 if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endcodespacerange"))
56 return;
57
58 else if (tok == PDF_TOK_STRING)
59 {
60 lo = pdf_code_from_string(buf->scratch, buf->len);
61 tok = pdf_lex(ctx, file, buf);
62 if (tok == PDF_TOK_STRING)
63 {
64 hi = pdf_code_from_string(buf->scratch, buf->len);
65 pdf_add_codespace(ctx, cmap, lo, hi, buf->len);
66 }
67 else break;
68 }
69
70 else break;
71 }
72
73 fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endcodespacerange");
74}
75
76static void
77pdf_parse_cid_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
78{
79 pdf_token tok;
80 int lo, hi, dst;
81
82 while (1)
83 {
84 tok = pdf_lex(ctx, file, buf);
85
86 if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endcidrange"))
87 return;
88
89 else if (tok != PDF_TOK_STRING)
90 fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endcidrange");
91
92 lo = pdf_code_from_string(buf->scratch, buf->len);
93
94 tok = pdf_lex(ctx, file, buf);
95 if (tok != PDF_TOK_STRING)
96 fz_throw(ctx, FZ_ERROR_GENERIC, "expected string");
97
98 hi = pdf_code_from_string(buf->scratch, buf->len);
99
100 tok = pdf_lex(ctx, file, buf);
101 if (tok != PDF_TOK_INT)
102 fz_throw(ctx, FZ_ERROR_GENERIC, "expected integer");
103
104 dst = buf->i;
105
106 pdf_map_range_to_range(ctx, cmap, lo, hi, dst);
107 }
108}
109
110static void
111pdf_parse_cid_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
112{
113 pdf_token tok;
114 int src, dst;
115
116 while (1)
117 {
118 tok = pdf_lex(ctx, file, buf);
119
120 if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endcidchar"))
121 return;
122
123 else if (tok != PDF_TOK_STRING)
124 fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endcidchar");
125
126 src = pdf_code_from_string(buf->scratch, buf->len);
127
128 tok = pdf_lex(ctx, file, buf);
129 if (tok != PDF_TOK_INT)
130 fz_throw(ctx, FZ_ERROR_GENERIC, "expected integer");
131
132 dst = buf->i;
133
134 pdf_map_range_to_range(ctx, cmap, src, src, dst);
135 }
136}
137
138static void
139pdf_parse_bf_range_array(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf, int lo, int hi)
140{
141 pdf_token tok;
142 int dst[256];
143 int i;
144
145 while (1)
146 {
147 tok = pdf_lex(ctx, file, buf);
148
149 if (tok == PDF_TOK_CLOSE_ARRAY)
150 return;
151
152 /* Note: does not handle [ /Name /Name ... ] */
153 else if (tok != PDF_TOK_STRING)
154 fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or ]");
155
156 if (buf->len / 2)
157 {
158 int len = fz_mini(buf->len / 2, nelem(dst));
159 for (i = 0; i < len; i++)
160 dst[i] = pdf_code_from_string(&buf->scratch[i * 2], 2);
161
162 pdf_map_one_to_many(ctx, cmap, lo, dst, buf->len / 2);
163 }
164
165 lo ++;
166 }
167}
168
169static void
170pdf_parse_bf_range(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
171{
172 pdf_token tok;
173 int lo, hi, dst;
174
175 while (1)
176 {
177 tok = pdf_lex(ctx, file, buf);
178
179 if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endbfrange"))
180 return;
181
182 else if (tok != PDF_TOK_STRING)
183 fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endbfrange");
184
185 lo = pdf_code_from_string(buf->scratch, buf->len);
186
187 tok = pdf_lex(ctx, file, buf);
188 if (tok != PDF_TOK_STRING)
189 fz_throw(ctx, FZ_ERROR_GENERIC, "expected string");
190
191 hi = pdf_code_from_string(buf->scratch, buf->len);
192 if (lo < 0 || lo > 65535 || hi < 0 || hi > 65535 || lo > hi)
193 {
194 fz_warn(ctx, "bf_range limits out of range in cmap %s", cmap->cmap_name);
195 return;
196 }
197
198 tok = pdf_lex(ctx, file, buf);
199
200 if (tok == PDF_TOK_STRING)
201 {
202 if (buf->len == 2)
203 {
204 dst = pdf_code_from_string(buf->scratch, buf->len);
205 pdf_map_range_to_range(ctx, cmap, lo, hi, dst);
206 }
207 else
208 {
209 int dststr[256];
210 int i;
211
212 if (buf->len / 2)
213 {
214 int len = fz_mini(buf->len / 2, nelem(dststr));
215 for (i = 0; i < len; i++)
216 dststr[i] = pdf_code_from_string(&buf->scratch[i * 2], 2);
217
218 while (lo <= hi)
219 {
220 pdf_map_one_to_many(ctx, cmap, lo, dststr, i);
221 dststr[i-1] ++;
222 lo ++;
223 }
224 }
225 }
226 }
227
228 else if (tok == PDF_TOK_OPEN_ARRAY)
229 {
230 pdf_parse_bf_range_array(ctx, cmap, file, buf, lo, hi);
231 }
232
233 else
234 {
235 fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or array or endbfrange");
236 }
237 }
238}
239
240static void
241pdf_parse_bf_char(fz_context *ctx, pdf_cmap *cmap, fz_stream *file, pdf_lexbuf *buf)
242{
243 pdf_token tok;
244 int dst[256];
245 int src;
246 int i;
247
248 while (1)
249 {
250 tok = pdf_lex(ctx, file, buf);
251
252 if (tok == PDF_TOK_KEYWORD && !strcmp(buf->scratch, "endbfchar"))
253 return;
254
255 else if (tok != PDF_TOK_STRING)
256 fz_throw(ctx, FZ_ERROR_GENERIC, "expected string or endbfchar");
257
258 src = pdf_code_from_string(buf->scratch, buf->len);
259
260 tok = pdf_lex(ctx, file, buf);
261 /* Note: does not handle /dstName */
262 if (tok != PDF_TOK_STRING)
263 fz_throw(ctx, FZ_ERROR_GENERIC, "expected string");
264
265 if (buf->len / 2)
266 {
267 int len = fz_mini(buf->len / 2, nelem(dst));
268 for (i = 0; i < len; i++)
269 dst[i] = pdf_code_from_string(&buf->scratch[i * 2], 2);
270 pdf_map_one_to_many(ctx, cmap, src, dst, i);
271 }
272 }
273}
274
275pdf_cmap *
276pdf_load_cmap(fz_context *ctx, fz_stream *file)
277{
278 pdf_cmap *cmap;
279 char key[64];
280 pdf_lexbuf buf;
281 pdf_token tok;
282
283 pdf_lexbuf_init(ctx, &buf, PDF_LEXBUF_SMALL);
284 cmap = pdf_new_cmap(ctx);
285
286 strcpy(key, ".notdef");
287
288 fz_try(ctx)
289 {
290 while (1)
291 {
292 tok = pdf_lex(ctx, file, &buf);
293
294 if (tok == PDF_TOK_EOF)
295 break;
296
297 else if (tok == PDF_TOK_NAME)
298 {
299 if (!strcmp(buf.scratch, "CMapName"))
300 pdf_parse_cmap_name(ctx, cmap, file, &buf);
301 else if (!strcmp(buf.scratch, "WMode"))
302 pdf_parse_wmode(ctx, cmap, file, &buf);
303 else
304 fz_strlcpy(key, buf.scratch, sizeof key);
305 }
306
307 else if (tok == PDF_TOK_KEYWORD)
308 {
309 if (!strcmp(buf.scratch, "endcmap"))
310 break;
311
312 else if (!strcmp(buf.scratch, "usecmap"))
313 fz_strlcpy(cmap->usecmap_name, key, sizeof(cmap->usecmap_name));
314
315 else if (!strcmp(buf.scratch, "begincodespacerange"))
316 pdf_parse_codespace_range(ctx, cmap, file, &buf);
317
318 else if (!strcmp(buf.scratch, "beginbfchar"))
319 pdf_parse_bf_char(ctx, cmap, file, &buf);
320
321 else if (!strcmp(buf.scratch, "begincidchar"))
322 pdf_parse_cid_char(ctx, cmap, file, &buf);
323
324 else if (!strcmp(buf.scratch, "beginbfrange"))
325 pdf_parse_bf_range(ctx, cmap, file, &buf);
326
327 else if (!strcmp(buf.scratch, "begincidrange"))
328 pdf_parse_cid_range(ctx, cmap, file, &buf);
329 }
330
331 /* ignore everything else */
332 }
333
334 pdf_sort_cmap(ctx, cmap);
335 }
336 fz_always(ctx)
337 {
338 pdf_lexbuf_fin(ctx, &buf);
339 }
340 fz_catch(ctx)
341 {
342 pdf_drop_cmap(ctx, cmap);
343 fz_rethrow(ctx);
344 }
345
346 return cmap;
347}
348