1 | #include "mupdf/fitz.h" |
2 | #include "mupdf/pdf.h" |
3 | |
4 | #include "../fitz/fitz-imp.h" |
5 | |
6 | #include <assert.h> |
7 | |
8 | #include <ft2build.h> |
9 | #include FT_FREETYPE_H |
10 | #include FT_ADVANCES_H |
11 | #ifdef FT_FONT_FORMATS_H |
12 | #include FT_FONT_FORMATS_H |
13 | #else |
14 | #include FT_XFREE86_H |
15 | #endif |
16 | #include FT_TRUETYPE_TABLES_H |
17 | |
18 | #ifndef FT_SFNT_HEAD |
19 | #define FT_SFNT_HEAD ft_sfnt_head |
20 | #endif |
21 | |
22 | void |
23 | pdf_load_encoding(const char **estrings, const char *encoding) |
24 | { |
25 | const char * const *bstrings = NULL; |
26 | int i; |
27 | |
28 | if (!strcmp(encoding, "StandardEncoding" )) |
29 | bstrings = fz_glyph_name_from_adobe_standard; |
30 | if (!strcmp(encoding, "MacRomanEncoding" )) |
31 | bstrings = fz_glyph_name_from_mac_roman; |
32 | if (!strcmp(encoding, "MacExpertEncoding" )) |
33 | bstrings = fz_glyph_name_from_mac_expert; |
34 | if (!strcmp(encoding, "WinAnsiEncoding" )) |
35 | bstrings = fz_glyph_name_from_win_ansi; |
36 | |
37 | if (bstrings) |
38 | for (i = 0; i < 256; i++) |
39 | estrings[i] = bstrings[i]; |
40 | } |
41 | |
42 | static void pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict, |
43 | const char *collection, const char *basefont, int iscidfont); |
44 | |
45 | static const char *base_font_names[][10] = |
46 | { |
47 | { "Courier" , "CourierNew" , "CourierNewPSMT" , NULL }, |
48 | { "Courier-Bold" , "CourierNew,Bold" , "Courier,Bold" , |
49 | "CourierNewPS-BoldMT" , "CourierNew-Bold" , NULL }, |
50 | { "Courier-Oblique" , "CourierNew,Italic" , "Courier,Italic" , |
51 | "CourierNewPS-ItalicMT" , "CourierNew-Italic" , NULL }, |
52 | { "Courier-BoldOblique" , "CourierNew,BoldItalic" , "Courier,BoldItalic" , |
53 | "CourierNewPS-BoldItalicMT" , "CourierNew-BoldItalic" , NULL }, |
54 | { "Helvetica" , "ArialMT" , "Arial" , NULL }, |
55 | { "Helvetica-Bold" , "Arial-BoldMT" , "Arial,Bold" , "Arial-Bold" , |
56 | "Helvetica,Bold" , NULL }, |
57 | { "Helvetica-Oblique" , "Arial-ItalicMT" , "Arial,Italic" , "Arial-Italic" , |
58 | "Helvetica,Italic" , "Helvetica-Italic" , NULL }, |
59 | { "Helvetica-BoldOblique" , "Arial-BoldItalicMT" , |
60 | "Arial,BoldItalic" , "Arial-BoldItalic" , |
61 | "Helvetica,BoldItalic" , "Helvetica-BoldItalic" , NULL }, |
62 | { "Times-Roman" , "TimesNewRomanPSMT" , "TimesNewRoman" , |
63 | "TimesNewRomanPS" , NULL }, |
64 | { "Times-Bold" , "TimesNewRomanPS-BoldMT" , "TimesNewRoman,Bold" , |
65 | "TimesNewRomanPS-Bold" , "TimesNewRoman-Bold" , NULL }, |
66 | { "Times-Italic" , "TimesNewRomanPS-ItalicMT" , "TimesNewRoman,Italic" , |
67 | "TimesNewRomanPS-Italic" , "TimesNewRoman-Italic" , NULL }, |
68 | { "Times-BoldItalic" , "TimesNewRomanPS-BoldItalicMT" , |
69 | "TimesNewRoman,BoldItalic" , "TimesNewRomanPS-BoldItalic" , |
70 | "TimesNewRoman-BoldItalic" , NULL }, |
71 | { "Symbol" , "Symbol,Italic" , "Symbol,Bold" , "Symbol,BoldItalic" , |
72 | "SymbolMT" , "SymbolMT,Italic" , "SymbolMT,Bold" , "SymbolMT,BoldItalic" , NULL }, |
73 | { "ZapfDingbats" , NULL } |
74 | }; |
75 | |
76 | const unsigned char * |
77 | pdf_lookup_substitute_font(fz_context *ctx, int mono, int serif, int bold, int italic, int *len) |
78 | { |
79 | if (mono) { |
80 | if (bold) { |
81 | if (italic) return fz_lookup_base14_font(ctx, "Courier-BoldOblique" , len); |
82 | else return fz_lookup_base14_font(ctx, "Courier-Bold" , len); |
83 | } else { |
84 | if (italic) return fz_lookup_base14_font(ctx, "Courier-Oblique" , len); |
85 | else return fz_lookup_base14_font(ctx, "Courier" , len); |
86 | } |
87 | } else if (serif) { |
88 | if (bold) { |
89 | if (italic) return fz_lookup_base14_font(ctx, "Times-BoldItalic" , len); |
90 | else return fz_lookup_base14_font(ctx, "Times-Bold" , len); |
91 | } else { |
92 | if (italic) return fz_lookup_base14_font(ctx, "Times-Italic" , len); |
93 | else return fz_lookup_base14_font(ctx, "Times-Roman" , len); |
94 | } |
95 | } else { |
96 | if (bold) { |
97 | if (italic) return fz_lookup_base14_font(ctx, "Helvetica-BoldOblique" , len); |
98 | else return fz_lookup_base14_font(ctx, "Helvetica-Bold" , len); |
99 | } else { |
100 | if (italic) return fz_lookup_base14_font(ctx, "Helvetica-Oblique" , len); |
101 | else return fz_lookup_base14_font(ctx, "Helvetica" , len); |
102 | } |
103 | } |
104 | } |
105 | |
106 | static int is_dynalab(char *name) |
107 | { |
108 | if (strstr(name, "HuaTian" )) |
109 | return 1; |
110 | if (strstr(name, "MingLi" )) |
111 | return 1; |
112 | if ((strstr(name, "DF" ) == name) || strstr(name, "+DF" )) |
113 | return 1; |
114 | if ((strstr(name, "DLC" ) == name) || strstr(name, "+DLC" )) |
115 | return 1; |
116 | return 0; |
117 | } |
118 | |
119 | static int strcmp_ignore_space(const char *a, const char *b) |
120 | { |
121 | while (1) |
122 | { |
123 | while (*a == ' ') |
124 | a++; |
125 | while (*b == ' ') |
126 | b++; |
127 | if (*a != *b) |
128 | return 1; |
129 | if (*a == 0) |
130 | return *a != *b; |
131 | if (*b == 0) |
132 | return *a != *b; |
133 | a++; |
134 | b++; |
135 | } |
136 | } |
137 | |
138 | const char *pdf_clean_font_name(const char *fontname) |
139 | { |
140 | int i, k; |
141 | for (i = 0; i < nelem(base_font_names); i++) |
142 | for (k = 0; base_font_names[i][k]; k++) |
143 | if (!strcmp_ignore_space(base_font_names[i][k], fontname)) |
144 | return base_font_names[i][0]; |
145 | return fontname; |
146 | } |
147 | |
148 | /* |
149 | * FreeType and Rendering glue |
150 | */ |
151 | |
152 | enum { UNKNOWN, TYPE1, TRUETYPE }; |
153 | |
154 | static int ft_kind(FT_Face face) |
155 | { |
156 | #ifdef FT_FONT_FORMATS_H |
157 | const char *kind = FT_Get_Font_Format(face); |
158 | #else |
159 | const char *kind = FT_Get_X11_Font_Format(face); |
160 | #endif |
161 | if (!strcmp(kind, "TrueType" )) return TRUETYPE; |
162 | if (!strcmp(kind, "Type 1" )) return TYPE1; |
163 | if (!strcmp(kind, "CFF" )) return TYPE1; |
164 | if (!strcmp(kind, "CID Type 1" )) return TYPE1; |
165 | return UNKNOWN; |
166 | } |
167 | |
168 | static int ft_cid_to_gid(pdf_font_desc *fontdesc, int cid) |
169 | { |
170 | if (fontdesc->to_ttf_cmap) |
171 | { |
172 | cid = pdf_lookup_cmap(fontdesc->to_ttf_cmap, cid); |
173 | |
174 | /* vertical presentation forms */ |
175 | if (fontdesc->font->flags.ft_substitute && fontdesc->wmode) |
176 | { |
177 | switch (cid) |
178 | { |
179 | case 0x0021: cid = 0xFE15; break; /* ! */ |
180 | case 0x0028: cid = 0xFE35; break; /* ( */ |
181 | case 0x0029: cid = 0xFE36; break; /* ) */ |
182 | case 0x002C: cid = 0xFE10; break; /* , */ |
183 | case 0x003A: cid = 0xFE13; break; /* : */ |
184 | case 0x003B: cid = 0xFE14; break; /* ; */ |
185 | case 0x003F: cid = 0xFE16; break; /* ? */ |
186 | case 0x005B: cid = 0xFE47; break; /* [ */ |
187 | case 0x005D: cid = 0xFE48; break; /* ] */ |
188 | case 0x005F: cid = 0xFE33; break; /* _ */ |
189 | case 0x007B: cid = 0xFE37; break; /* { */ |
190 | case 0x007D: cid = 0xFE38; break; /* } */ |
191 | case 0x2013: cid = 0xFE32; break; /* EN DASH */ |
192 | case 0x2014: cid = 0xFE31; break; /* EM DASH */ |
193 | case 0x2025: cid = 0xFE30; break; /* TWO DOT LEADER */ |
194 | case 0x2026: cid = 0xFE19; break; /* HORIZONTAL ELLIPSIS */ |
195 | case 0x3001: cid = 0xFE11; break; /* IDEOGRAPHIC COMMA */ |
196 | case 0x3002: cid = 0xFE12; break; /* IDEOGRAPHIC FULL STOP */ |
197 | case 0x3008: cid = 0xFE3F; break; /* OPENING ANGLE BRACKET */ |
198 | case 0x3009: cid = 0xFE40; break; /* CLOSING ANGLE BRACKET */ |
199 | case 0x300A: cid = 0xFE3D; break; /* LEFT DOUBLE ANGLE BRACKET */ |
200 | case 0x300B: cid = 0xFE3E; break; /* RIGHT DOUBLE ANGLE BRACKET */ |
201 | case 0x300C: cid = 0xFE41; break; /* LEFT CORNER BRACKET */ |
202 | case 0x300D: cid = 0xFE42; break; /* RIGHT CORNER BRACKET */ |
203 | case 0x300E: cid = 0xFE43; break; /* LEFT WHITE CORNER BRACKET */ |
204 | case 0x300F: cid = 0xFE44; break; /* RIGHT WHITE CORNER BRACKET */ |
205 | case 0x3010: cid = 0xFE3B; break; /* LEFT BLACK LENTICULAR BRACKET */ |
206 | case 0x3011: cid = 0xFE3C; break; /* RIGHT BLACK LENTICULAR BRACKET */ |
207 | case 0x3014: cid = 0xFE39; break; /* LEFT TORTOISE SHELL BRACKET */ |
208 | case 0x3015: cid = 0xFE3A; break; /* RIGHT TORTOISE SHELL BRACKET */ |
209 | case 0x3016: cid = 0xFE17; break; /* LEFT WHITE LENTICULAR BRACKET */ |
210 | case 0x3017: cid = 0xFE18; break; /* RIGHT WHITE LENTICULAR BRACKET */ |
211 | |
212 | case 0xFF01: cid = 0xFE15; break; /* FULLWIDTH EXCLAMATION MARK */ |
213 | case 0xFF08: cid = 0xFE35; break; /* FULLWIDTH LEFT PARENTHESIS */ |
214 | case 0xFF09: cid = 0xFE36; break; /* FULLWIDTH RIGHT PARENTHESIS */ |
215 | case 0xFF0C: cid = 0xFE10; break; /* FULLWIDTH COMMA */ |
216 | case 0xFF1A: cid = 0xFE13; break; /* FULLWIDTH COLON */ |
217 | case 0xFF1B: cid = 0xFE14; break; /* FULLWIDTH SEMICOLON */ |
218 | case 0xFF1F: cid = 0xFE16; break; /* FULLWIDTH QUESTION MARK */ |
219 | case 0xFF3B: cid = 0xFE47; break; /* FULLWIDTH LEFT SQUARE BRACKET */ |
220 | case 0xFF3D: cid = 0xFE48; break; /* FULLWIDTH RIGHT SQUARE BRACKET */ |
221 | case 0xFF3F: cid = 0xFE33; break; /* FULLWIDTH LOW LINE */ |
222 | case 0xFF5B: cid = 0xFE37; break; /* FULLWIDTH LEFT CURLY BRACKET */ |
223 | case 0xFF5D: cid = 0xFE38; break; /* FULLWIDTH RIGHT CURLY BRACKET */ |
224 | |
225 | case 0x30FC: cid = 0xFE31; break; /* KATAKANA-HIRAGANA PROLONGED SOUND MARK */ |
226 | case 0xFF0D: cid = 0xFE31; break; /* FULLWIDTH HYPHEN-MINUS */ |
227 | } |
228 | } |
229 | |
230 | return ft_char_index(fontdesc->font->ft_face, cid); |
231 | } |
232 | |
233 | if (fontdesc->cid_to_gid && (size_t)cid < fontdesc->cid_to_gid_len && cid >= 0) |
234 | return fontdesc->cid_to_gid[cid]; |
235 | |
236 | return cid; |
237 | } |
238 | |
239 | int |
240 | pdf_font_cid_to_gid(fz_context *ctx, pdf_font_desc *fontdesc, int cid) |
241 | { |
242 | if (fontdesc->font->ft_face) |
243 | return ft_cid_to_gid(fontdesc, cid); |
244 | return cid; |
245 | } |
246 | |
247 | static int ft_width(fz_context *ctx, pdf_font_desc *fontdesc, int cid) |
248 | { |
249 | int mask = FT_LOAD_NO_SCALE | FT_LOAD_NO_HINTING | FT_LOAD_NO_BITMAP | FT_LOAD_IGNORE_TRANSFORM; |
250 | int gid = ft_cid_to_gid(fontdesc, cid); |
251 | FT_Fixed adv = 0; |
252 | int fterr; |
253 | FT_Face face = fontdesc->font->ft_face; |
254 | FT_UShort units_per_EM; |
255 | |
256 | fterr = FT_Get_Advance(face, gid, mask, &adv); |
257 | if (fterr && fterr != FT_Err_Invalid_Argument) |
258 | fz_warn(ctx, "FT_Get_Advance(%d): %s" , gid, ft_error_string(fterr)); |
259 | |
260 | units_per_EM = face->units_per_EM; |
261 | if (units_per_EM == 0) |
262 | units_per_EM = 2048; |
263 | |
264 | return adv * 1000 / units_per_EM; |
265 | } |
266 | |
267 | static const struct { int code; const char *name; } mre_diff_table[] = |
268 | { |
269 | { 173, "notequal" }, |
270 | { 176, "infinity" }, |
271 | { 178, "lessequal" }, |
272 | { 179, "greaterequal" }, |
273 | { 182, "partialdiff" }, |
274 | { 183, "summation" }, |
275 | { 184, "product" }, |
276 | { 185, "pi" }, |
277 | { 186, "integral" }, |
278 | { 189, "Omega" }, |
279 | { 195, "radical" }, |
280 | { 197, "approxequal" }, |
281 | { 198, "Delta" }, |
282 | { 215, "lozenge" }, |
283 | { 219, "Euro" }, |
284 | { 240, "apple" }, |
285 | }; |
286 | |
287 | static int lookup_mre_code(const char *name) |
288 | { |
289 | int i; |
290 | for (i = 0; i < nelem(mre_diff_table); ++i) |
291 | if (!strcmp(name, mre_diff_table[i].name)) |
292 | return mre_diff_table[i].code; |
293 | for (i = 0; i < 256; i++) |
294 | if (fz_glyph_name_from_mac_roman[i] && !strcmp(name, fz_glyph_name_from_mac_roman[i])) |
295 | return i; |
296 | return -1; |
297 | } |
298 | |
299 | static int ft_find_glyph_by_unicode_name(FT_Face face, const char *name) |
300 | { |
301 | int unicode, glyph; |
302 | |
303 | /* Prefer exact unicode match if available. */ |
304 | unicode = fz_unicode_from_glyph_name_strict(name); |
305 | if (unicode > 0) |
306 | { |
307 | glyph = ft_char_index(face, unicode); |
308 | if (glyph > 0) |
309 | return glyph; |
310 | } |
311 | |
312 | /* Fall back to font glyph name if we can. */ |
313 | glyph = ft_name_index(face, name); |
314 | if (glyph > 0) |
315 | return glyph; |
316 | |
317 | /* Fuzzy unicode match as last attempt. */ |
318 | unicode = fz_unicode_from_glyph_name(name); |
319 | if (unicode > 0) |
320 | return ft_char_index(face, unicode); |
321 | |
322 | /* Failed. */ |
323 | return 0; |
324 | } |
325 | |
326 | /* |
327 | * Load font files. |
328 | */ |
329 | |
330 | static void |
331 | pdf_load_builtin_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int has_descriptor) |
332 | { |
333 | FT_Face face; |
334 | const char *clean_name = pdf_clean_font_name(fontname); |
335 | if (clean_name == fontname) |
336 | clean_name = "Times-Roman" ; |
337 | |
338 | fontdesc->font = fz_load_system_font(ctx, fontname, 0, 0, !has_descriptor); |
339 | if (!fontdesc->font) |
340 | { |
341 | const unsigned char *data; |
342 | int len; |
343 | |
344 | data = fz_lookup_base14_font(ctx, clean_name, &len); |
345 | if (!data) |
346 | fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin font: '%s'" , fontname); |
347 | |
348 | fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1); |
349 | fontdesc->font->flags.is_serif = !!strstr(clean_name, "Times" ); |
350 | } |
351 | |
352 | if (!strcmp(clean_name, "Symbol" ) || !strcmp(clean_name, "ZapfDingbats" )) |
353 | fontdesc->flags |= PDF_FD_SYMBOLIC; |
354 | |
355 | face = fontdesc->font->ft_face; |
356 | fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM; |
357 | fontdesc->descent = 1000.0f * face->descender / face->units_per_EM; |
358 | } |
359 | |
360 | static void |
361 | pdf_load_substitute_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int mono, int serif, int bold, int italic) |
362 | { |
363 | fontdesc->font = fz_load_system_font(ctx, fontname, bold, italic, 0); |
364 | if (!fontdesc->font) |
365 | { |
366 | const unsigned char *data; |
367 | int len; |
368 | |
369 | data = pdf_lookup_substitute_font(ctx, mono, serif, bold, italic, &len); |
370 | if (!data) |
371 | fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find substitute font" ); |
372 | |
373 | fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1); |
374 | fontdesc->font->flags.fake_bold = bold && !fontdesc->font->flags.is_bold; |
375 | fontdesc->font->flags.fake_italic = italic && !fontdesc->font->flags.is_italic; |
376 | |
377 | fontdesc->font->flags.is_mono = mono; |
378 | fontdesc->font->flags.is_serif = serif; |
379 | fontdesc->font->flags.is_bold = bold; |
380 | fontdesc->font->flags.is_italic = italic; |
381 | } |
382 | |
383 | fontdesc->font->flags.ft_substitute = 1; |
384 | fontdesc->font->flags.ft_stretch = 1; |
385 | } |
386 | |
387 | static void |
388 | pdf_load_substitute_cjk_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int ros, int serif) |
389 | { |
390 | fontdesc->font = fz_load_system_cjk_font(ctx, fontname, ros, serif); |
391 | if (!fontdesc->font) |
392 | { |
393 | const unsigned char *data; |
394 | int size; |
395 | int subfont; |
396 | |
397 | data = fz_lookup_cjk_font(ctx, ros, &size, &subfont); |
398 | if (!data) |
399 | fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin CJK font" ); |
400 | |
401 | /* A glyph bbox cache is too big for CJK fonts. */ |
402 | fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, size, subfont, 0); |
403 | } |
404 | |
405 | fontdesc->font->flags.ft_substitute = 1; |
406 | fontdesc->font->flags.ft_stretch = 0; |
407 | } |
408 | |
409 | static void |
410 | pdf_load_system_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, const char *collection) |
411 | { |
412 | int bold = 0; |
413 | int italic = 0; |
414 | int serif = 0; |
415 | int mono = 0; |
416 | |
417 | if (strstr(fontname, "Bold" )) |
418 | bold = 1; |
419 | if (strstr(fontname, "Italic" )) |
420 | italic = 1; |
421 | if (strstr(fontname, "Oblique" )) |
422 | italic = 1; |
423 | |
424 | if (fontdesc->flags & PDF_FD_FIXED_PITCH) |
425 | mono = 1; |
426 | if (fontdesc->flags & PDF_FD_SERIF) |
427 | serif = 1; |
428 | if (fontdesc->flags & PDF_FD_ITALIC) |
429 | italic = 1; |
430 | if (fontdesc->flags & PDF_FD_FORCE_BOLD) |
431 | bold = 1; |
432 | |
433 | if (collection) |
434 | { |
435 | if (!strcmp(collection, "Adobe-CNS1" )) |
436 | pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_CNS, serif); |
437 | else if (!strcmp(collection, "Adobe-GB1" )) |
438 | pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_GB, serif); |
439 | else if (!strcmp(collection, "Adobe-Japan1" )) |
440 | pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_JAPAN, serif); |
441 | else if (!strcmp(collection, "Adobe-Korea1" )) |
442 | pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_KOREA, serif); |
443 | else |
444 | { |
445 | if (strcmp(collection, "Adobe-Identity" ) != 0) |
446 | fz_warn(ctx, "unknown cid collection: %s" , collection); |
447 | pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic); |
448 | } |
449 | } |
450 | else |
451 | { |
452 | pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic); |
453 | } |
454 | } |
455 | |
456 | static void |
457 | pdf_load_embedded_font(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, const char *fontname, pdf_obj *stmref) |
458 | { |
459 | fz_buffer *buf; |
460 | |
461 | buf = pdf_load_stream(ctx, stmref); |
462 | fz_try(ctx) |
463 | fontdesc->font = fz_new_font_from_buffer(ctx, fontname, buf, 0, 1); |
464 | fz_always(ctx) |
465 | fz_drop_buffer(ctx, buf); |
466 | fz_catch(ctx) |
467 | fz_rethrow(ctx); |
468 | |
469 | fontdesc->size += fz_buffer_storage(ctx, buf, NULL); |
470 | fontdesc->is_embedded = 1; |
471 | } |
472 | |
473 | /* |
474 | * Create and destroy |
475 | */ |
476 | |
477 | pdf_font_desc * |
478 | pdf_keep_font(fz_context *ctx, pdf_font_desc *fontdesc) |
479 | { |
480 | return fz_keep_storable(ctx, &fontdesc->storable); |
481 | } |
482 | |
483 | void |
484 | pdf_drop_font(fz_context *ctx, pdf_font_desc *fontdesc) |
485 | { |
486 | fz_drop_storable(ctx, &fontdesc->storable); |
487 | } |
488 | |
489 | static void |
490 | pdf_drop_font_imp(fz_context *ctx, fz_storable *fontdesc_) |
491 | { |
492 | pdf_font_desc *fontdesc = (pdf_font_desc *)fontdesc_; |
493 | |
494 | fz_drop_font(ctx, fontdesc->font); |
495 | pdf_drop_cmap(ctx, fontdesc->encoding); |
496 | pdf_drop_cmap(ctx, fontdesc->to_ttf_cmap); |
497 | pdf_drop_cmap(ctx, fontdesc->to_unicode); |
498 | fz_free(ctx, fontdesc->cid_to_gid); |
499 | fz_free(ctx, fontdesc->cid_to_ucs); |
500 | fz_free(ctx, fontdesc->hmtx); |
501 | fz_free(ctx, fontdesc->vmtx); |
502 | fz_free(ctx, fontdesc); |
503 | } |
504 | |
505 | pdf_font_desc * |
506 | pdf_new_font_desc(fz_context *ctx) |
507 | { |
508 | pdf_font_desc *fontdesc; |
509 | |
510 | fontdesc = fz_malloc_struct(ctx, pdf_font_desc); |
511 | FZ_INIT_STORABLE(fontdesc, 1, pdf_drop_font_imp); |
512 | fontdesc->size = sizeof(pdf_font_desc); |
513 | |
514 | fontdesc->font = NULL; |
515 | |
516 | fontdesc->flags = 0; |
517 | fontdesc->italic_angle = 0; |
518 | fontdesc->ascent = 800; |
519 | fontdesc->descent = -200; |
520 | fontdesc->cap_height = 800; |
521 | fontdesc->x_height = 500; |
522 | fontdesc->missing_width = 0; |
523 | |
524 | fontdesc->encoding = NULL; |
525 | fontdesc->to_ttf_cmap = NULL; |
526 | fontdesc->cid_to_gid_len = 0; |
527 | fontdesc->cid_to_gid = NULL; |
528 | |
529 | fontdesc->to_unicode = NULL; |
530 | fontdesc->cid_to_ucs_len = 0; |
531 | fontdesc->cid_to_ucs = NULL; |
532 | |
533 | fontdesc->wmode = 0; |
534 | |
535 | fontdesc->hmtx_cap = 0; |
536 | fontdesc->vmtx_cap = 0; |
537 | fontdesc->hmtx_len = 0; |
538 | fontdesc->vmtx_len = 0; |
539 | fontdesc->hmtx = NULL; |
540 | fontdesc->vmtx = NULL; |
541 | |
542 | fontdesc->dhmtx.lo = 0x0000; |
543 | fontdesc->dhmtx.hi = 0xFFFF; |
544 | fontdesc->dhmtx.w = 1000; |
545 | |
546 | fontdesc->dvmtx.lo = 0x0000; |
547 | fontdesc->dvmtx.hi = 0xFFFF; |
548 | fontdesc->dvmtx.x = 0; |
549 | fontdesc->dvmtx.y = 880; |
550 | fontdesc->dvmtx.w = -1000; |
551 | |
552 | fontdesc->is_embedded = 0; |
553 | |
554 | return fontdesc; |
555 | } |
556 | |
557 | /* |
558 | * Simple fonts (Type1 and TrueType) |
559 | */ |
560 | |
561 | static FT_CharMap |
562 | select_type1_cmap(FT_Face face) |
563 | { |
564 | int i; |
565 | for (i = 0; i < face->num_charmaps; i++) |
566 | if (face->charmaps[i]->platform_id == 7) |
567 | return face->charmaps[i]; |
568 | if (face->num_charmaps > 0) |
569 | return face->charmaps[0]; |
570 | return NULL; |
571 | } |
572 | |
573 | static FT_CharMap |
574 | select_truetype_cmap(FT_Face face, int symbolic) |
575 | { |
576 | int i; |
577 | |
578 | /* First look for a Microsoft symbolic cmap, if applicable */ |
579 | if (symbolic) |
580 | { |
581 | for (i = 0; i < face->num_charmaps; i++) |
582 | if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 0) |
583 | return face->charmaps[i]; |
584 | } |
585 | |
586 | /* Then look for a Microsoft Unicode cmap */ |
587 | for (i = 0; i < face->num_charmaps; i++) |
588 | if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 1) |
589 | if (FT_Get_CMap_Format(face->charmaps[i]) != -1) |
590 | return face->charmaps[i]; |
591 | |
592 | /* Finally look for an Apple MacRoman cmap */ |
593 | for (i = 0; i < face->num_charmaps; i++) |
594 | if (face->charmaps[i]->platform_id == 1 && face->charmaps[i]->encoding_id == 0) |
595 | if (FT_Get_CMap_Format(face->charmaps[i]) != -1) |
596 | return face->charmaps[i]; |
597 | |
598 | if (face->num_charmaps > 0) |
599 | if (FT_Get_CMap_Format(face->charmaps[0]) != -1) |
600 | return face->charmaps[0]; |
601 | return NULL; |
602 | } |
603 | |
604 | static FT_CharMap |
605 | select_unknown_cmap(FT_Face face) |
606 | { |
607 | if (face->num_charmaps > 0) |
608 | return face->charmaps[0]; |
609 | return NULL; |
610 | } |
611 | |
612 | static pdf_font_desc * |
613 | pdf_load_simple_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict) |
614 | { |
615 | const char *basefont; |
616 | pdf_obj *descriptor; |
617 | pdf_obj *encoding; |
618 | pdf_obj *widths; |
619 | unsigned short *etable = NULL; |
620 | pdf_font_desc *fontdesc = NULL; |
621 | pdf_obj *subtype; |
622 | FT_Face face; |
623 | FT_CharMap cmap; |
624 | int symbolic; |
625 | int kind; |
626 | int glyph; |
627 | |
628 | const char *estrings[256]; |
629 | char ebuffer[256][32]; |
630 | int i, k, n; |
631 | int fterr; |
632 | int has_lock = 0; |
633 | |
634 | fz_var(fontdesc); |
635 | fz_var(etable); |
636 | fz_var(has_lock); |
637 | |
638 | /* Load font file */ |
639 | fz_try(ctx) |
640 | { |
641 | fontdesc = pdf_new_font_desc(ctx); |
642 | |
643 | basefont = pdf_to_name(ctx, pdf_dict_get(ctx, dict, PDF_NAME(BaseFont))); |
644 | |
645 | descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor)); |
646 | if (descriptor) |
647 | pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, NULL, basefont, 0); |
648 | else |
649 | pdf_load_builtin_font(ctx, fontdesc, basefont, 0); |
650 | |
651 | /* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */ |
652 | if (descriptor && pdf_is_string(ctx, pdf_dict_get(ctx, descriptor, PDF_NAME(FontName))) && |
653 | !pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode)) && |
654 | pdf_name_eq(ctx, pdf_dict_get(ctx, dict, PDF_NAME(Encoding)), PDF_NAME(WinAnsiEncoding)) && |
655 | pdf_dict_get_int(ctx, descriptor, PDF_NAME(Flags)) == 4) |
656 | { |
657 | char *cp936fonts[] = { |
658 | "\xCB\xCE\xCC\xE5" , "SimSun,Regular" , |
659 | "\xBA\xDA\xCC\xE5" , "SimHei,Regular" , |
660 | "\xBF\xAC\xCC\xE5_GB2312" , "SimKai,Regular" , |
661 | "\xB7\xC2\xCB\xCE_GB2312" , "SimFang,Regular" , |
662 | "\xC1\xA5\xCA\xE9" , "SimLi,Regular" , |
663 | NULL |
664 | }; |
665 | for (i = 0; cp936fonts[i]; i += 2) |
666 | if (!strcmp(basefont, cp936fonts[i])) |
667 | break; |
668 | if (cp936fonts[i]) |
669 | { |
670 | fz_warn(ctx, "workaround for S22PDF lying about chinese font encodings" ); |
671 | pdf_drop_font(ctx, fontdesc); |
672 | fontdesc = NULL; |
673 | fontdesc = pdf_new_font_desc(ctx); |
674 | pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, "Adobe-GB1" , cp936fonts[i+1], 0); |
675 | fontdesc->encoding = pdf_load_system_cmap(ctx, "GBK-EUC-H" ); |
676 | fontdesc->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2" ); |
677 | fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2" ); |
678 | |
679 | goto skip_encoding; |
680 | } |
681 | } |
682 | |
683 | face = fontdesc->font->ft_face; |
684 | kind = ft_kind(face); |
685 | |
686 | /* Encoding */ |
687 | |
688 | symbolic = fontdesc->flags & 4; |
689 | |
690 | if (kind == TYPE1) |
691 | cmap = select_type1_cmap(face); |
692 | else if (kind == TRUETYPE) |
693 | cmap = select_truetype_cmap(face, symbolic); |
694 | else |
695 | cmap = select_unknown_cmap(face); |
696 | |
697 | if (cmap) |
698 | { |
699 | fterr = FT_Set_Charmap(face, cmap); |
700 | if (fterr) |
701 | fz_warn(ctx, "freetype could not set cmap: %s" , ft_error_string(fterr)); |
702 | } |
703 | else |
704 | fz_warn(ctx, "freetype could not find any cmaps" ); |
705 | |
706 | etable = fz_malloc_array(ctx, 256, unsigned short); |
707 | fontdesc->size += 256 * sizeof(unsigned short); |
708 | for (i = 0; i < 256; i++) |
709 | { |
710 | estrings[i] = NULL; |
711 | etable[i] = 0; |
712 | } |
713 | |
714 | encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding)); |
715 | if (encoding) |
716 | { |
717 | if (pdf_is_name(ctx, encoding)) |
718 | pdf_load_encoding(estrings, pdf_to_name(ctx, encoding)); |
719 | |
720 | if (pdf_is_dict(ctx, encoding)) |
721 | { |
722 | pdf_obj *base, *diff, *item; |
723 | |
724 | base = pdf_dict_get(ctx, encoding, PDF_NAME(BaseEncoding)); |
725 | if (pdf_is_name(ctx, base)) |
726 | pdf_load_encoding(estrings, pdf_to_name(ctx, base)); |
727 | else if (!fontdesc->is_embedded && !symbolic) |
728 | pdf_load_encoding(estrings, "StandardEncoding" ); |
729 | |
730 | diff = pdf_dict_get(ctx, encoding, PDF_NAME(Differences)); |
731 | if (pdf_is_array(ctx, diff)) |
732 | { |
733 | n = pdf_array_len(ctx, diff); |
734 | k = 0; |
735 | for (i = 0; i < n; i++) |
736 | { |
737 | item = pdf_array_get(ctx, diff, i); |
738 | if (pdf_is_int(ctx, item)) |
739 | k = pdf_to_int(ctx, item); |
740 | if (pdf_is_name(ctx, item) && k >= 0 && k < nelem(estrings)) |
741 | estrings[k++] = pdf_to_name(ctx, item); |
742 | } |
743 | } |
744 | } |
745 | } |
746 | else if (!fontdesc->is_embedded && !symbolic) |
747 | pdf_load_encoding(estrings, "StandardEncoding" ); |
748 | |
749 | /* start with the builtin encoding */ |
750 | for (i = 0; i < 256; i++) |
751 | etable[i] = ft_char_index(face, i); |
752 | |
753 | fz_lock(ctx, FZ_LOCK_FREETYPE); |
754 | has_lock = 1; |
755 | |
756 | /* built-in and substitute fonts may be a different type than what the document expects */ |
757 | subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype)); |
758 | if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1))) |
759 | kind = TYPE1; |
760 | else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1))) |
761 | kind = TYPE1; |
762 | else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType))) |
763 | kind = TRUETYPE; |
764 | else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0))) |
765 | kind = TYPE1; |
766 | else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2))) |
767 | kind = TRUETYPE; |
768 | |
769 | /* encode by glyph name where we can */ |
770 | if (kind == TYPE1) |
771 | { |
772 | for (i = 0; i < 256; i++) |
773 | { |
774 | if (estrings[i]) |
775 | { |
776 | glyph = ft_name_index(face, estrings[i]); |
777 | if (glyph > 0) |
778 | etable[i] = glyph; |
779 | } |
780 | } |
781 | } |
782 | |
783 | /* encode by glyph name where we can */ |
784 | if (kind == TRUETYPE) |
785 | { |
786 | /* Unicode cmap */ |
787 | if (!symbolic && face->charmap && face->charmap->platform_id == 3) |
788 | { |
789 | for (i = 0; i < 256; i++) |
790 | { |
791 | if (estrings[i]) |
792 | { |
793 | glyph = ft_find_glyph_by_unicode_name(face, estrings[i]); |
794 | if (glyph > 0) |
795 | etable[i] = glyph; |
796 | } |
797 | } |
798 | } |
799 | |
800 | /* MacRoman cmap */ |
801 | else if (!symbolic && face->charmap && face->charmap->platform_id == 1) |
802 | { |
803 | for (i = 0; i < 256; i++) |
804 | { |
805 | if (estrings[i]) |
806 | { |
807 | int mrcode = lookup_mre_code(estrings[i]); |
808 | glyph = 0; |
809 | if (mrcode > 0) |
810 | glyph = ft_char_index(face, mrcode); |
811 | if (glyph == 0) |
812 | glyph = ft_name_index(face, estrings[i]); |
813 | if (glyph > 0) |
814 | etable[i] = glyph; |
815 | } |
816 | } |
817 | } |
818 | |
819 | /* Symbolic cmap */ |
820 | else if (!face->charmap || face->charmap->encoding != FT_ENCODING_MS_SYMBOL) |
821 | { |
822 | for (i = 0; i < 256; i++) |
823 | { |
824 | if (estrings[i]) |
825 | { |
826 | glyph = ft_name_index(face, estrings[i]); |
827 | if (glyph > 0) |
828 | etable[i] = glyph; |
829 | } |
830 | } |
831 | } |
832 | } |
833 | |
834 | /* try to reverse the glyph names from the builtin encoding */ |
835 | for (i = 0; i < 256; i++) |
836 | { |
837 | if (etable[i] && !estrings[i]) |
838 | { |
839 | if (FT_HAS_GLYPH_NAMES(face)) |
840 | { |
841 | fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32); |
842 | if (fterr) |
843 | fz_warn(ctx, "freetype get glyph name (gid %d): %s" , etable[i], ft_error_string(fterr)); |
844 | if (ebuffer[i][0]) |
845 | estrings[i] = ebuffer[i]; |
846 | } |
847 | else |
848 | { |
849 | estrings[i] = (char*) fz_glyph_name_from_win_ansi[i]; /* discard const */ |
850 | } |
851 | } |
852 | } |
853 | |
854 | /* symbolic Type 1 fonts with an implicit encoding and non-standard glyph names */ |
855 | if (kind == TYPE1 && symbolic) |
856 | { |
857 | for (i = 0; i < 256; i++) |
858 | if (etable[i] && estrings[i] && !fz_unicode_from_glyph_name(estrings[i])) |
859 | estrings[i] = (char*) fz_glyph_name_from_adobe_standard[i]; |
860 | } |
861 | |
862 | fz_unlock(ctx, FZ_LOCK_FREETYPE); |
863 | has_lock = 0; |
864 | |
865 | fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); |
866 | fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); |
867 | fontdesc->cid_to_gid_len = 256; |
868 | fontdesc->cid_to_gid = etable; |
869 | |
870 | fz_try(ctx) |
871 | { |
872 | pdf_load_to_unicode(ctx, doc, fontdesc, estrings, NULL, pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode))); |
873 | } |
874 | fz_catch(ctx) |
875 | { |
876 | fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); |
877 | fz_warn(ctx, "cannot load ToUnicode CMap" ); |
878 | } |
879 | |
880 | skip_encoding: |
881 | |
882 | /* Widths */ |
883 | |
884 | pdf_set_default_hmtx(ctx, fontdesc, fontdesc->missing_width); |
885 | |
886 | widths = pdf_dict_get(ctx, dict, PDF_NAME(Widths)); |
887 | if (widths) |
888 | { |
889 | int first, last; |
890 | |
891 | first = pdf_dict_get_int(ctx, dict, PDF_NAME(FirstChar)); |
892 | last = pdf_dict_get_int(ctx, dict, PDF_NAME(LastChar)); |
893 | |
894 | if (first < 0 || last > 255 || first > last) |
895 | first = last = 0; |
896 | |
897 | for (i = 0; i < last - first + 1; i++) |
898 | { |
899 | int wid = pdf_array_get_int(ctx, widths, i); |
900 | pdf_add_hmtx(ctx, fontdesc, i + first, i + first, wid); |
901 | } |
902 | } |
903 | else |
904 | { |
905 | for (i = 0; i < 256; i++) |
906 | pdf_add_hmtx(ctx, fontdesc, i, i, ft_width(ctx, fontdesc, i)); |
907 | } |
908 | |
909 | pdf_end_hmtx(ctx, fontdesc); |
910 | } |
911 | fz_catch(ctx) |
912 | { |
913 | if (has_lock) |
914 | fz_unlock(ctx, FZ_LOCK_FREETYPE); |
915 | if (fontdesc && etable != fontdesc->cid_to_gid) |
916 | fz_free(ctx, etable); |
917 | pdf_drop_font(ctx, fontdesc); |
918 | fz_rethrow(ctx); |
919 | } |
920 | return fontdesc; |
921 | } |
922 | |
923 | static int |
924 | hail_mary_make_hash_key(fz_context *ctx, fz_store_hash *hash, void *key_) |
925 | { |
926 | hash->u.pi.i = 0; |
927 | hash->u.pi.ptr = NULL; |
928 | return 1; |
929 | } |
930 | |
931 | static void * |
932 | hail_mary_keep_key(fz_context *ctx, void *key) |
933 | { |
934 | return key; |
935 | } |
936 | |
937 | static void |
938 | hail_mary_drop_key(fz_context *ctx, void *key) |
939 | { |
940 | } |
941 | |
942 | static int |
943 | hail_mary_cmp_key(fz_context *ctx, void *k0, void *k1) |
944 | { |
945 | return k0 == k1; |
946 | } |
947 | |
948 | static void |
949 | hail_mary_format_key(fz_context *ctx, char *s, int n, void *key_) |
950 | { |
951 | fz_strlcpy(s, "(hail mary font)" , n); |
952 | } |
953 | |
954 | static int hail_mary_store_key; /* Dummy */ |
955 | |
956 | static const fz_store_type hail_mary_store_type = |
957 | { |
958 | hail_mary_make_hash_key, |
959 | hail_mary_keep_key, |
960 | hail_mary_drop_key, |
961 | hail_mary_cmp_key, |
962 | hail_mary_format_key, |
963 | NULL |
964 | }; |
965 | |
966 | pdf_font_desc * |
967 | pdf_load_hail_mary_font(fz_context *ctx, pdf_document *doc) |
968 | { |
969 | pdf_font_desc *fontdesc; |
970 | pdf_font_desc *existing; |
971 | |
972 | if ((fontdesc = fz_find_item(ctx, pdf_drop_font_imp, &hail_mary_store_key, &hail_mary_store_type)) != NULL) |
973 | { |
974 | return fontdesc; |
975 | } |
976 | |
977 | /* FIXME: Get someone with a clue about fonts to fix this */ |
978 | fontdesc = pdf_load_simple_font(ctx, doc, NULL); |
979 | |
980 | existing = fz_store_item(ctx, &hail_mary_store_key, fontdesc, fontdesc->size, &hail_mary_store_type); |
981 | assert(existing == NULL); |
982 | (void)existing; /* Silence warning in release builds */ |
983 | |
984 | return fontdesc; |
985 | } |
986 | |
987 | /* |
988 | * CID Fonts |
989 | */ |
990 | |
991 | static pdf_font_desc * |
992 | load_cid_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode) |
993 | { |
994 | pdf_obj *widths; |
995 | pdf_obj *descriptor; |
996 | pdf_font_desc *fontdesc = NULL; |
997 | fz_buffer *buf = NULL; |
998 | pdf_cmap *cmap; |
999 | FT_Face face; |
1000 | char collection[256]; |
1001 | const char *basefont; |
1002 | int i, k, fterr; |
1003 | pdf_obj *cidtogidmap; |
1004 | pdf_obj *obj; |
1005 | int dw; |
1006 | |
1007 | fz_var(fontdesc); |
1008 | fz_var(buf); |
1009 | |
1010 | fz_try(ctx) |
1011 | { |
1012 | /* Get font name and CID collection */ |
1013 | |
1014 | basefont = pdf_to_name(ctx, pdf_dict_get(ctx, dict, PDF_NAME(BaseFont))); |
1015 | |
1016 | { |
1017 | pdf_obj *cidinfo; |
1018 | const char *reg, *ord; |
1019 | |
1020 | cidinfo = pdf_dict_get(ctx, dict, PDF_NAME(CIDSystemInfo)); |
1021 | if (!cidinfo) |
1022 | fz_throw(ctx, FZ_ERROR_SYNTAX, "cid font is missing info" ); |
1023 | |
1024 | reg = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Registry), NULL); |
1025 | ord = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Ordering), NULL); |
1026 | fz_snprintf(collection, sizeof collection, "%s-%s" , reg, ord); |
1027 | } |
1028 | |
1029 | /* Encoding */ |
1030 | |
1031 | if (pdf_is_name(ctx, encoding)) |
1032 | { |
1033 | cmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, encoding)); |
1034 | } |
1035 | else if (pdf_is_indirect(ctx, encoding)) |
1036 | { |
1037 | cmap = pdf_load_embedded_cmap(ctx, doc, encoding); |
1038 | } |
1039 | else |
1040 | { |
1041 | fz_throw(ctx, FZ_ERROR_SYNTAX, "font missing encoding" ); |
1042 | } |
1043 | |
1044 | /* Load font file */ |
1045 | |
1046 | fontdesc = pdf_new_font_desc(ctx); |
1047 | |
1048 | fontdesc->encoding = cmap; |
1049 | fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); |
1050 | |
1051 | pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding)); |
1052 | |
1053 | descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor)); |
1054 | if (!descriptor) |
1055 | fz_throw(ctx, FZ_ERROR_SYNTAX, "missing font descriptor" ); |
1056 | pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, collection, basefont, 1); |
1057 | |
1058 | face = fontdesc->font->ft_face; |
1059 | |
1060 | /* Apply encoding */ |
1061 | |
1062 | cidtogidmap = pdf_dict_get(ctx, dict, PDF_NAME(CIDToGIDMap)); |
1063 | if (pdf_is_stream(ctx, cidtogidmap)) |
1064 | { |
1065 | size_t z, len; |
1066 | unsigned char *data; |
1067 | |
1068 | buf = pdf_load_stream(ctx, cidtogidmap); |
1069 | |
1070 | len = fz_buffer_storage(ctx, buf, &data); |
1071 | fontdesc->cid_to_gid_len = len / 2; |
1072 | fontdesc->cid_to_gid = fz_malloc_array(ctx, fontdesc->cid_to_gid_len, unsigned short); |
1073 | fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short); |
1074 | for (z = 0; z < fontdesc->cid_to_gid_len; z++) |
1075 | fontdesc->cid_to_gid[z] = (data[z * 2] << 8) + data[z * 2 + 1]; |
1076 | } |
1077 | else if (cidtogidmap && !pdf_name_eq(ctx, PDF_NAME(Identity), cidtogidmap)) |
1078 | { |
1079 | fz_warn(ctx, "ignoring unknown CIDToGIDMap entry" ); |
1080 | } |
1081 | |
1082 | /* if font is external, cidtogidmap should not be identity */ |
1083 | /* so we map from cid to unicode and then map that through the (3 1) */ |
1084 | /* unicode cmap to get a glyph id */ |
1085 | else if (fontdesc->font->flags.ft_substitute) |
1086 | { |
1087 | fterr = FT_Select_Charmap(face, ft_encoding_unicode); |
1088 | if (fterr) |
1089 | fz_throw(ctx, FZ_ERROR_SYNTAX, "no unicode cmap when emulating CID font: %s" , ft_error_string(fterr)); |
1090 | |
1091 | if (!strcmp(collection, "Adobe-CNS1" )) |
1092 | fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2" ); |
1093 | else if (!strcmp(collection, "Adobe-GB1" )) |
1094 | fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2" ); |
1095 | else if (!strcmp(collection, "Adobe-Japan1" )) |
1096 | fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2" ); |
1097 | else if (!strcmp(collection, "Adobe-Japan2" )) |
1098 | fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2" ); |
1099 | else if (!strcmp(collection, "Adobe-Korea1" )) |
1100 | fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2" ); |
1101 | } |
1102 | |
1103 | pdf_load_to_unicode(ctx, doc, fontdesc, NULL, collection, to_unicode); |
1104 | |
1105 | /* If we have an identity encoding, we're supposed to use the glyph ids directly. |
1106 | * If we only have a substitute font, that won't work. |
1107 | * Make a last ditch attempt by using |
1108 | * the ToUnicode table if it exists to map via the substitute font's cmap. */ |
1109 | if (strstr(fontdesc->encoding->cmap_name, "Identity-" ) && fontdesc->font->flags.ft_substitute) |
1110 | { |
1111 | fz_warn(ctx, "non-embedded font using identity encoding: %s" , basefont); |
1112 | if (fontdesc->to_unicode && !fontdesc->to_ttf_cmap) |
1113 | fontdesc->to_ttf_cmap = pdf_keep_cmap(ctx, fontdesc->to_unicode); |
1114 | } |
1115 | |
1116 | /* Horizontal */ |
1117 | |
1118 | dw = 1000; |
1119 | obj = pdf_dict_get(ctx, dict, PDF_NAME(DW)); |
1120 | if (obj) |
1121 | dw = pdf_to_int(ctx, obj); |
1122 | pdf_set_default_hmtx(ctx, fontdesc, dw); |
1123 | |
1124 | widths = pdf_dict_get(ctx, dict, PDF_NAME(W)); |
1125 | if (widths) |
1126 | { |
1127 | int c0, c1, w, n, m; |
1128 | |
1129 | n = pdf_array_len(ctx, widths); |
1130 | for (i = 0; i < n; ) |
1131 | { |
1132 | c0 = pdf_array_get_int(ctx, widths, i); |
1133 | obj = pdf_array_get(ctx, widths, i + 1); |
1134 | if (pdf_is_array(ctx, obj)) |
1135 | { |
1136 | m = pdf_array_len(ctx, obj); |
1137 | for (k = 0; k < m; k++) |
1138 | { |
1139 | w = pdf_array_get_int(ctx, obj, k); |
1140 | pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w); |
1141 | } |
1142 | i += 2; |
1143 | } |
1144 | else |
1145 | { |
1146 | c1 = pdf_to_int(ctx, obj); |
1147 | w = pdf_array_get_int(ctx, widths, i + 2); |
1148 | pdf_add_hmtx(ctx, fontdesc, c0, c1, w); |
1149 | i += 3; |
1150 | } |
1151 | } |
1152 | } |
1153 | |
1154 | pdf_end_hmtx(ctx, fontdesc); |
1155 | |
1156 | /* Vertical */ |
1157 | |
1158 | if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1) |
1159 | { |
1160 | int dw2y = 880; |
1161 | int dw2w = -1000; |
1162 | |
1163 | obj = pdf_dict_get(ctx, dict, PDF_NAME(DW2)); |
1164 | if (obj) |
1165 | { |
1166 | dw2y = pdf_array_get_int(ctx, obj, 0); |
1167 | dw2w = pdf_array_get_int(ctx, obj, 1); |
1168 | } |
1169 | |
1170 | pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w); |
1171 | |
1172 | widths = pdf_dict_get(ctx, dict, PDF_NAME(W2)); |
1173 | if (widths) |
1174 | { |
1175 | int c0, c1, w, x, y, n; |
1176 | |
1177 | n = pdf_array_len(ctx, widths); |
1178 | for (i = 0; i < n; ) |
1179 | { |
1180 | c0 = pdf_array_get_int(ctx, widths, i); |
1181 | obj = pdf_array_get(ctx, widths, i + 1); |
1182 | if (pdf_is_array(ctx, obj)) |
1183 | { |
1184 | int m = pdf_array_len(ctx, obj); |
1185 | for (k = 0; k * 3 < m; k ++) |
1186 | { |
1187 | w = pdf_array_get_int(ctx, obj, k * 3 + 0); |
1188 | x = pdf_array_get_int(ctx, obj, k * 3 + 1); |
1189 | y = pdf_array_get_int(ctx, obj, k * 3 + 2); |
1190 | pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w); |
1191 | } |
1192 | i += 2; |
1193 | } |
1194 | else |
1195 | { |
1196 | c1 = pdf_to_int(ctx, obj); |
1197 | w = pdf_array_get_int(ctx, widths, i + 2); |
1198 | x = pdf_array_get_int(ctx, widths, i + 3); |
1199 | y = pdf_array_get_int(ctx, widths, i + 4); |
1200 | pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w); |
1201 | i += 5; |
1202 | } |
1203 | } |
1204 | } |
1205 | |
1206 | pdf_end_vmtx(ctx, fontdesc); |
1207 | } |
1208 | } |
1209 | fz_always(ctx) |
1210 | fz_drop_buffer(ctx, buf); |
1211 | fz_catch(ctx) |
1212 | { |
1213 | pdf_drop_font(ctx, fontdesc); |
1214 | fz_rethrow(ctx); |
1215 | } |
1216 | |
1217 | return fontdesc; |
1218 | } |
1219 | |
1220 | static pdf_font_desc * |
1221 | pdf_load_type0_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict) |
1222 | { |
1223 | pdf_obj *dfonts; |
1224 | pdf_obj *dfont; |
1225 | pdf_obj *subtype; |
1226 | pdf_obj *encoding; |
1227 | pdf_obj *to_unicode; |
1228 | |
1229 | dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts)); |
1230 | if (!dfonts) |
1231 | fz_throw(ctx, FZ_ERROR_SYNTAX, "cid font is missing descendant fonts" ); |
1232 | |
1233 | dfont = pdf_array_get(ctx, dfonts, 0); |
1234 | |
1235 | subtype = pdf_dict_get(ctx, dfont, PDF_NAME(Subtype)); |
1236 | encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding)); |
1237 | to_unicode = pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode)); |
1238 | |
1239 | if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0))) |
1240 | return load_cid_font(ctx, doc, dfont, encoding, to_unicode); |
1241 | if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2))) |
1242 | return load_cid_font(ctx, doc, dfont, encoding, to_unicode); |
1243 | fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown cid font type" ); |
1244 | } |
1245 | |
1246 | /* |
1247 | * FontDescriptor |
1248 | */ |
1249 | |
1250 | static void |
1251 | pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict, |
1252 | const char *collection, const char *basefont, int iscidfont) |
1253 | { |
1254 | pdf_obj *obj1, *obj2, *obj3, *obj; |
1255 | const char *fontname; |
1256 | FT_Face face; |
1257 | |
1258 | /* Prefer BaseFont; don't bother with FontName */ |
1259 | fontname = basefont; |
1260 | |
1261 | fontdesc->flags = pdf_dict_get_int(ctx, dict, PDF_NAME(Flags)); |
1262 | fontdesc->italic_angle = pdf_dict_get_real(ctx, dict, PDF_NAME(ItalicAngle)); |
1263 | fontdesc->ascent = pdf_dict_get_real(ctx, dict, PDF_NAME(Ascent)); |
1264 | fontdesc->descent = pdf_dict_get_real(ctx, dict, PDF_NAME(Descent)); |
1265 | fontdesc->cap_height = pdf_dict_get_real(ctx, dict, PDF_NAME(CapHeight)); |
1266 | fontdesc->x_height = pdf_dict_get_real(ctx, dict, PDF_NAME(XHeight)); |
1267 | fontdesc->missing_width = pdf_dict_get_real(ctx, dict, PDF_NAME(MissingWidth)); |
1268 | |
1269 | obj1 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile)); |
1270 | obj2 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile2)); |
1271 | obj3 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile3)); |
1272 | obj = obj1 ? obj1 : obj2 ? obj2 : obj3; |
1273 | |
1274 | if (pdf_is_indirect(ctx, obj)) |
1275 | { |
1276 | fz_try(ctx) |
1277 | { |
1278 | pdf_load_embedded_font(ctx, doc, fontdesc, fontname, obj); |
1279 | } |
1280 | fz_catch(ctx) |
1281 | { |
1282 | fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); |
1283 | fz_warn(ctx, "ignored error when loading embedded font; attempting to load system font" ); |
1284 | if (!iscidfont && fontname != pdf_clean_font_name(fontname)) |
1285 | pdf_load_builtin_font(ctx, fontdesc, fontname, 1); |
1286 | else |
1287 | pdf_load_system_font(ctx, fontdesc, fontname, collection); |
1288 | } |
1289 | } |
1290 | else |
1291 | { |
1292 | if (!iscidfont && fontname != pdf_clean_font_name(fontname)) |
1293 | pdf_load_builtin_font(ctx, fontdesc, fontname, 1); |
1294 | else |
1295 | pdf_load_system_font(ctx, fontdesc, fontname, collection); |
1296 | } |
1297 | |
1298 | /* Check for DynaLab fonts that must use hinting */ |
1299 | face = fontdesc->font->ft_face; |
1300 | if (ft_kind(face) == TRUETYPE) |
1301 | { |
1302 | /* FreeType's own 'tricky' font detection needs a bit of help */ |
1303 | if (is_dynalab(fontdesc->font->name)) |
1304 | face->face_flags |= FT_FACE_FLAG_TRICKY; |
1305 | |
1306 | if (fontdesc->ascent == 0.0f) |
1307 | fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM; |
1308 | |
1309 | if (fontdesc->descent == 0.0f) |
1310 | fontdesc->descent = 1000.0f * face->descender / face->units_per_EM; |
1311 | } |
1312 | } |
1313 | |
1314 | static void |
1315 | pdf_make_width_table(fz_context *ctx, pdf_font_desc *fontdesc) |
1316 | { |
1317 | fz_font *font = fontdesc->font; |
1318 | int i, k, n, cid, gid; |
1319 | |
1320 | n = 0; |
1321 | for (i = 0; i < fontdesc->hmtx_len; i++) |
1322 | { |
1323 | for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++) |
1324 | { |
1325 | cid = pdf_lookup_cmap(fontdesc->encoding, k); |
1326 | gid = pdf_font_cid_to_gid(ctx, fontdesc, cid); |
1327 | if (gid > n) |
1328 | n = gid; |
1329 | } |
1330 | } |
1331 | |
1332 | font->width_count = n + 1; |
1333 | font->width_table = fz_malloc_array(ctx, font->width_count, short); |
1334 | fontdesc->size += font->width_count * sizeof(short); |
1335 | |
1336 | font->width_default = fontdesc->dhmtx.w; |
1337 | for (i = 0; i < font->width_count; i++) |
1338 | font->width_table[i] = -1; |
1339 | |
1340 | for (i = 0; i < fontdesc->hmtx_len; i++) |
1341 | { |
1342 | for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++) |
1343 | { |
1344 | cid = pdf_lookup_cmap(fontdesc->encoding, k); |
1345 | gid = pdf_font_cid_to_gid(ctx, fontdesc, cid); |
1346 | if (gid >= 0 && gid < font->width_count) |
1347 | font->width_table[gid] = fz_maxi(fontdesc->hmtx[i].w, font->width_table[gid]); |
1348 | } |
1349 | } |
1350 | |
1351 | for (i = 0; i < font->width_count; i++) |
1352 | if (font->width_table[i] == -1) |
1353 | font->width_table[i] = font->width_default; |
1354 | } |
1355 | |
1356 | pdf_font_desc * |
1357 | pdf_load_font(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict) |
1358 | { |
1359 | pdf_obj *subtype; |
1360 | pdf_obj *dfonts; |
1361 | pdf_obj *charprocs; |
1362 | pdf_font_desc *fontdesc = NULL; |
1363 | int type3 = 0; |
1364 | |
1365 | if (pdf_obj_marked(ctx, dict)) |
1366 | fz_throw(ctx, FZ_ERROR_SYNTAX, "Recursive Type3 font definition." ); |
1367 | |
1368 | if ((fontdesc = pdf_find_item(ctx, pdf_drop_font_imp, dict)) != NULL) |
1369 | { |
1370 | return fontdesc; |
1371 | } |
1372 | |
1373 | subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype)); |
1374 | dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts)); |
1375 | charprocs = pdf_dict_get(ctx, dict, PDF_NAME(CharProcs)); |
1376 | |
1377 | if (pdf_name_eq(ctx, subtype, PDF_NAME(Type0))) |
1378 | fontdesc = pdf_load_type0_font(ctx, doc, dict); |
1379 | else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1))) |
1380 | fontdesc = pdf_load_simple_font(ctx, doc, dict); |
1381 | else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1))) |
1382 | fontdesc = pdf_load_simple_font(ctx, doc, dict); |
1383 | else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType))) |
1384 | fontdesc = pdf_load_simple_font(ctx, doc, dict); |
1385 | else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type3))) |
1386 | { |
1387 | fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict); |
1388 | type3 = 1; |
1389 | } |
1390 | else if (charprocs) |
1391 | { |
1392 | fz_warn(ctx, "unknown font format, guessing type3." ); |
1393 | fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict); |
1394 | type3 = 1; |
1395 | } |
1396 | else if (dfonts) |
1397 | { |
1398 | fz_warn(ctx, "unknown font format, guessing type0." ); |
1399 | fontdesc = pdf_load_type0_font(ctx, doc, dict); |
1400 | } |
1401 | else |
1402 | { |
1403 | fz_warn(ctx, "unknown font format, guessing type1 or truetype." ); |
1404 | fontdesc = pdf_load_simple_font(ctx, doc, dict); |
1405 | } |
1406 | |
1407 | pdf_mark_obj(ctx, dict); |
1408 | fz_try(ctx) |
1409 | { |
1410 | /* Create glyph width table for stretching substitute fonts and text extraction. */ |
1411 | pdf_make_width_table(ctx, fontdesc); |
1412 | |
1413 | /* Load CharProcs */ |
1414 | if (type3) |
1415 | pdf_load_type3_glyphs(ctx, doc, fontdesc); |
1416 | |
1417 | pdf_store_item(ctx, dict, fontdesc, fontdesc->size); |
1418 | } |
1419 | fz_always(ctx) |
1420 | pdf_unmark_obj(ctx, dict); |
1421 | fz_catch(ctx) |
1422 | { |
1423 | pdf_drop_font(ctx, fontdesc); |
1424 | fz_rethrow(ctx); |
1425 | } |
1426 | |
1427 | return fontdesc; |
1428 | } |
1429 | |
1430 | void |
1431 | pdf_print_font(fz_context *ctx, fz_output *out, pdf_font_desc *fontdesc) |
1432 | { |
1433 | int i; |
1434 | |
1435 | fz_write_printf(ctx, out, "fontdesc {\n" ); |
1436 | |
1437 | if (fontdesc->font->ft_face) |
1438 | fz_write_printf(ctx, out, "\tfreetype font\n" ); |
1439 | if (fontdesc->font->t3procs) |
1440 | fz_write_printf(ctx, out, "\ttype3 font\n" ); |
1441 | |
1442 | fz_write_printf(ctx, out, "\twmode %d\n" , fontdesc->wmode); |
1443 | fz_write_printf(ctx, out, "\tDW %d\n" , fontdesc->dhmtx.w); |
1444 | |
1445 | fz_write_printf(ctx, out, "\tW {\n" ); |
1446 | for (i = 0; i < fontdesc->hmtx_len; i++) |
1447 | fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d\n" , |
1448 | fontdesc->hmtx[i].lo, fontdesc->hmtx[i].hi, fontdesc->hmtx[i].w); |
1449 | fz_write_printf(ctx, out, "\t}\n" ); |
1450 | |
1451 | if (fontdesc->wmode) |
1452 | { |
1453 | fz_write_printf(ctx, out, "\tDW2 [%d %d]\n" , fontdesc->dvmtx.y, fontdesc->dvmtx.w); |
1454 | fz_write_printf(ctx, out, "\tW2 {\n" ); |
1455 | for (i = 0; i < fontdesc->vmtx_len; i++) |
1456 | fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d %d %d\n" , fontdesc->vmtx[i].lo, fontdesc->vmtx[i].hi, |
1457 | fontdesc->vmtx[i].x, fontdesc->vmtx[i].y, fontdesc->vmtx[i].w); |
1458 | fz_write_printf(ctx, out, "\t}\n" ); |
1459 | } |
1460 | } |
1461 | |