1#include "mupdf/fitz.h"
2#include "mupdf/pdf.h"
3
4#include "../fitz/fitz-imp.h"
5
6#include <assert.h>
7
8#include <ft2build.h>
9#include FT_FREETYPE_H
10#include FT_ADVANCES_H
11#ifdef FT_FONT_FORMATS_H
12#include FT_FONT_FORMATS_H
13#else
14#include FT_XFREE86_H
15#endif
16#include FT_TRUETYPE_TABLES_H
17
18#ifndef FT_SFNT_HEAD
19#define FT_SFNT_HEAD ft_sfnt_head
20#endif
21
22void
23pdf_load_encoding(const char **estrings, const char *encoding)
24{
25 const char * const *bstrings = NULL;
26 int i;
27
28 if (!strcmp(encoding, "StandardEncoding"))
29 bstrings = fz_glyph_name_from_adobe_standard;
30 if (!strcmp(encoding, "MacRomanEncoding"))
31 bstrings = fz_glyph_name_from_mac_roman;
32 if (!strcmp(encoding, "MacExpertEncoding"))
33 bstrings = fz_glyph_name_from_mac_expert;
34 if (!strcmp(encoding, "WinAnsiEncoding"))
35 bstrings = fz_glyph_name_from_win_ansi;
36
37 if (bstrings)
38 for (i = 0; i < 256; i++)
39 estrings[i] = bstrings[i];
40}
41
42static void pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict,
43 const char *collection, const char *basefont, int iscidfont);
44
45static const char *base_font_names[][10] =
46{
47 { "Courier", "CourierNew", "CourierNewPSMT", NULL },
48 { "Courier-Bold", "CourierNew,Bold", "Courier,Bold",
49 "CourierNewPS-BoldMT", "CourierNew-Bold", NULL },
50 { "Courier-Oblique", "CourierNew,Italic", "Courier,Italic",
51 "CourierNewPS-ItalicMT", "CourierNew-Italic", NULL },
52 { "Courier-BoldOblique", "CourierNew,BoldItalic", "Courier,BoldItalic",
53 "CourierNewPS-BoldItalicMT", "CourierNew-BoldItalic", NULL },
54 { "Helvetica", "ArialMT", "Arial", NULL },
55 { "Helvetica-Bold", "Arial-BoldMT", "Arial,Bold", "Arial-Bold",
56 "Helvetica,Bold", NULL },
57 { "Helvetica-Oblique", "Arial-ItalicMT", "Arial,Italic", "Arial-Italic",
58 "Helvetica,Italic", "Helvetica-Italic", NULL },
59 { "Helvetica-BoldOblique", "Arial-BoldItalicMT",
60 "Arial,BoldItalic", "Arial-BoldItalic",
61 "Helvetica,BoldItalic", "Helvetica-BoldItalic", NULL },
62 { "Times-Roman", "TimesNewRomanPSMT", "TimesNewRoman",
63 "TimesNewRomanPS", NULL },
64 { "Times-Bold", "TimesNewRomanPS-BoldMT", "TimesNewRoman,Bold",
65 "TimesNewRomanPS-Bold", "TimesNewRoman-Bold", NULL },
66 { "Times-Italic", "TimesNewRomanPS-ItalicMT", "TimesNewRoman,Italic",
67 "TimesNewRomanPS-Italic", "TimesNewRoman-Italic", NULL },
68 { "Times-BoldItalic", "TimesNewRomanPS-BoldItalicMT",
69 "TimesNewRoman,BoldItalic", "TimesNewRomanPS-BoldItalic",
70 "TimesNewRoman-BoldItalic", NULL },
71 { "Symbol", "Symbol,Italic", "Symbol,Bold", "Symbol,BoldItalic",
72 "SymbolMT", "SymbolMT,Italic", "SymbolMT,Bold", "SymbolMT,BoldItalic", NULL },
73 { "ZapfDingbats", NULL }
74};
75
76const unsigned char *
77pdf_lookup_substitute_font(fz_context *ctx, int mono, int serif, int bold, int italic, int *len)
78{
79 if (mono) {
80 if (bold) {
81 if (italic) return fz_lookup_base14_font(ctx, "Courier-BoldOblique", len);
82 else return fz_lookup_base14_font(ctx, "Courier-Bold", len);
83 } else {
84 if (italic) return fz_lookup_base14_font(ctx, "Courier-Oblique", len);
85 else return fz_lookup_base14_font(ctx, "Courier", len);
86 }
87 } else if (serif) {
88 if (bold) {
89 if (italic) return fz_lookup_base14_font(ctx, "Times-BoldItalic", len);
90 else return fz_lookup_base14_font(ctx, "Times-Bold", len);
91 } else {
92 if (italic) return fz_lookup_base14_font(ctx, "Times-Italic", len);
93 else return fz_lookup_base14_font(ctx, "Times-Roman", len);
94 }
95 } else {
96 if (bold) {
97 if (italic) return fz_lookup_base14_font(ctx, "Helvetica-BoldOblique", len);
98 else return fz_lookup_base14_font(ctx, "Helvetica-Bold", len);
99 } else {
100 if (italic) return fz_lookup_base14_font(ctx, "Helvetica-Oblique", len);
101 else return fz_lookup_base14_font(ctx, "Helvetica", len);
102 }
103 }
104}
105
106static int is_dynalab(char *name)
107{
108 if (strstr(name, "HuaTian"))
109 return 1;
110 if (strstr(name, "MingLi"))
111 return 1;
112 if ((strstr(name, "DF") == name) || strstr(name, "+DF"))
113 return 1;
114 if ((strstr(name, "DLC") == name) || strstr(name, "+DLC"))
115 return 1;
116 return 0;
117}
118
119static int strcmp_ignore_space(const char *a, const char *b)
120{
121 while (1)
122 {
123 while (*a == ' ')
124 a++;
125 while (*b == ' ')
126 b++;
127 if (*a != *b)
128 return 1;
129 if (*a == 0)
130 return *a != *b;
131 if (*b == 0)
132 return *a != *b;
133 a++;
134 b++;
135 }
136}
137
138const char *pdf_clean_font_name(const char *fontname)
139{
140 int i, k;
141 for (i = 0; i < nelem(base_font_names); i++)
142 for (k = 0; base_font_names[i][k]; k++)
143 if (!strcmp_ignore_space(base_font_names[i][k], fontname))
144 return base_font_names[i][0];
145 return fontname;
146}
147
148/*
149 * FreeType and Rendering glue
150 */
151
152enum { UNKNOWN, TYPE1, TRUETYPE };
153
154static int ft_kind(FT_Face face)
155{
156#ifdef FT_FONT_FORMATS_H
157 const char *kind = FT_Get_Font_Format(face);
158#else
159 const char *kind = FT_Get_X11_Font_Format(face);
160#endif
161 if (!strcmp(kind, "TrueType")) return TRUETYPE;
162 if (!strcmp(kind, "Type 1")) return TYPE1;
163 if (!strcmp(kind, "CFF")) return TYPE1;
164 if (!strcmp(kind, "CID Type 1")) return TYPE1;
165 return UNKNOWN;
166}
167
168static int ft_cid_to_gid(pdf_font_desc *fontdesc, int cid)
169{
170 if (fontdesc->to_ttf_cmap)
171 {
172 cid = pdf_lookup_cmap(fontdesc->to_ttf_cmap, cid);
173
174 /* vertical presentation forms */
175 if (fontdesc->font->flags.ft_substitute && fontdesc->wmode)
176 {
177 switch (cid)
178 {
179 case 0x0021: cid = 0xFE15; break; /* ! */
180 case 0x0028: cid = 0xFE35; break; /* ( */
181 case 0x0029: cid = 0xFE36; break; /* ) */
182 case 0x002C: cid = 0xFE10; break; /* , */
183 case 0x003A: cid = 0xFE13; break; /* : */
184 case 0x003B: cid = 0xFE14; break; /* ; */
185 case 0x003F: cid = 0xFE16; break; /* ? */
186 case 0x005B: cid = 0xFE47; break; /* [ */
187 case 0x005D: cid = 0xFE48; break; /* ] */
188 case 0x005F: cid = 0xFE33; break; /* _ */
189 case 0x007B: cid = 0xFE37; break; /* { */
190 case 0x007D: cid = 0xFE38; break; /* } */
191 case 0x2013: cid = 0xFE32; break; /* EN DASH */
192 case 0x2014: cid = 0xFE31; break; /* EM DASH */
193 case 0x2025: cid = 0xFE30; break; /* TWO DOT LEADER */
194 case 0x2026: cid = 0xFE19; break; /* HORIZONTAL ELLIPSIS */
195 case 0x3001: cid = 0xFE11; break; /* IDEOGRAPHIC COMMA */
196 case 0x3002: cid = 0xFE12; break; /* IDEOGRAPHIC FULL STOP */
197 case 0x3008: cid = 0xFE3F; break; /* OPENING ANGLE BRACKET */
198 case 0x3009: cid = 0xFE40; break; /* CLOSING ANGLE BRACKET */
199 case 0x300A: cid = 0xFE3D; break; /* LEFT DOUBLE ANGLE BRACKET */
200 case 0x300B: cid = 0xFE3E; break; /* RIGHT DOUBLE ANGLE BRACKET */
201 case 0x300C: cid = 0xFE41; break; /* LEFT CORNER BRACKET */
202 case 0x300D: cid = 0xFE42; break; /* RIGHT CORNER BRACKET */
203 case 0x300E: cid = 0xFE43; break; /* LEFT WHITE CORNER BRACKET */
204 case 0x300F: cid = 0xFE44; break; /* RIGHT WHITE CORNER BRACKET */
205 case 0x3010: cid = 0xFE3B; break; /* LEFT BLACK LENTICULAR BRACKET */
206 case 0x3011: cid = 0xFE3C; break; /* RIGHT BLACK LENTICULAR BRACKET */
207 case 0x3014: cid = 0xFE39; break; /* LEFT TORTOISE SHELL BRACKET */
208 case 0x3015: cid = 0xFE3A; break; /* RIGHT TORTOISE SHELL BRACKET */
209 case 0x3016: cid = 0xFE17; break; /* LEFT WHITE LENTICULAR BRACKET */
210 case 0x3017: cid = 0xFE18; break; /* RIGHT WHITE LENTICULAR BRACKET */
211
212 case 0xFF01: cid = 0xFE15; break; /* FULLWIDTH EXCLAMATION MARK */
213 case 0xFF08: cid = 0xFE35; break; /* FULLWIDTH LEFT PARENTHESIS */
214 case 0xFF09: cid = 0xFE36; break; /* FULLWIDTH RIGHT PARENTHESIS */
215 case 0xFF0C: cid = 0xFE10; break; /* FULLWIDTH COMMA */
216 case 0xFF1A: cid = 0xFE13; break; /* FULLWIDTH COLON */
217 case 0xFF1B: cid = 0xFE14; break; /* FULLWIDTH SEMICOLON */
218 case 0xFF1F: cid = 0xFE16; break; /* FULLWIDTH QUESTION MARK */
219 case 0xFF3B: cid = 0xFE47; break; /* FULLWIDTH LEFT SQUARE BRACKET */
220 case 0xFF3D: cid = 0xFE48; break; /* FULLWIDTH RIGHT SQUARE BRACKET */
221 case 0xFF3F: cid = 0xFE33; break; /* FULLWIDTH LOW LINE */
222 case 0xFF5B: cid = 0xFE37; break; /* FULLWIDTH LEFT CURLY BRACKET */
223 case 0xFF5D: cid = 0xFE38; break; /* FULLWIDTH RIGHT CURLY BRACKET */
224
225 case 0x30FC: cid = 0xFE31; break; /* KATAKANA-HIRAGANA PROLONGED SOUND MARK */
226 case 0xFF0D: cid = 0xFE31; break; /* FULLWIDTH HYPHEN-MINUS */
227 }
228 }
229
230 return ft_char_index(fontdesc->font->ft_face, cid);
231 }
232
233 if (fontdesc->cid_to_gid && (size_t)cid < fontdesc->cid_to_gid_len && cid >= 0)
234 return fontdesc->cid_to_gid[cid];
235
236 return cid;
237}
238
239int
240pdf_font_cid_to_gid(fz_context *ctx, pdf_font_desc *fontdesc, int cid)
241{
242 if (fontdesc->font->ft_face)
243 return ft_cid_to_gid(fontdesc, cid);
244 return cid;
245}
246
247static int ft_width(fz_context *ctx, pdf_font_desc *fontdesc, int cid)
248{
249 int mask = FT_LOAD_NO_SCALE | FT_LOAD_NO_HINTING | FT_LOAD_NO_BITMAP | FT_LOAD_IGNORE_TRANSFORM;
250 int gid = ft_cid_to_gid(fontdesc, cid);
251 FT_Fixed adv = 0;
252 int fterr;
253 FT_Face face = fontdesc->font->ft_face;
254 FT_UShort units_per_EM;
255
256 fterr = FT_Get_Advance(face, gid, mask, &adv);
257 if (fterr && fterr != FT_Err_Invalid_Argument)
258 fz_warn(ctx, "FT_Get_Advance(%d): %s", gid, ft_error_string(fterr));
259
260 units_per_EM = face->units_per_EM;
261 if (units_per_EM == 0)
262 units_per_EM = 2048;
263
264 return adv * 1000 / units_per_EM;
265}
266
267static const struct { int code; const char *name; } mre_diff_table[] =
268{
269 { 173, "notequal" },
270 { 176, "infinity" },
271 { 178, "lessequal" },
272 { 179, "greaterequal" },
273 { 182, "partialdiff" },
274 { 183, "summation" },
275 { 184, "product" },
276 { 185, "pi" },
277 { 186, "integral" },
278 { 189, "Omega" },
279 { 195, "radical" },
280 { 197, "approxequal" },
281 { 198, "Delta" },
282 { 215, "lozenge" },
283 { 219, "Euro" },
284 { 240, "apple" },
285};
286
287static int lookup_mre_code(const char *name)
288{
289 int i;
290 for (i = 0; i < nelem(mre_diff_table); ++i)
291 if (!strcmp(name, mre_diff_table[i].name))
292 return mre_diff_table[i].code;
293 for (i = 0; i < 256; i++)
294 if (fz_glyph_name_from_mac_roman[i] && !strcmp(name, fz_glyph_name_from_mac_roman[i]))
295 return i;
296 return -1;
297}
298
299static int ft_find_glyph_by_unicode_name(FT_Face face, const char *name)
300{
301 int unicode, glyph;
302
303 /* Prefer exact unicode match if available. */
304 unicode = fz_unicode_from_glyph_name_strict(name);
305 if (unicode > 0)
306 {
307 glyph = ft_char_index(face, unicode);
308 if (glyph > 0)
309 return glyph;
310 }
311
312 /* Fall back to font glyph name if we can. */
313 glyph = ft_name_index(face, name);
314 if (glyph > 0)
315 return glyph;
316
317 /* Fuzzy unicode match as last attempt. */
318 unicode = fz_unicode_from_glyph_name(name);
319 if (unicode > 0)
320 return ft_char_index(face, unicode);
321
322 /* Failed. */
323 return 0;
324}
325
326/*
327 * Load font files.
328 */
329
330static void
331pdf_load_builtin_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int has_descriptor)
332{
333 FT_Face face;
334 const char *clean_name = pdf_clean_font_name(fontname);
335 if (clean_name == fontname)
336 clean_name = "Times-Roman";
337
338 fontdesc->font = fz_load_system_font(ctx, fontname, 0, 0, !has_descriptor);
339 if (!fontdesc->font)
340 {
341 const unsigned char *data;
342 int len;
343
344 data = fz_lookup_base14_font(ctx, clean_name, &len);
345 if (!data)
346 fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin font: '%s'", fontname);
347
348 fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1);
349 fontdesc->font->flags.is_serif = !!strstr(clean_name, "Times");
350 }
351
352 if (!strcmp(clean_name, "Symbol") || !strcmp(clean_name, "ZapfDingbats"))
353 fontdesc->flags |= PDF_FD_SYMBOLIC;
354
355 face = fontdesc->font->ft_face;
356 fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM;
357 fontdesc->descent = 1000.0f * face->descender / face->units_per_EM;
358}
359
360static void
361pdf_load_substitute_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int mono, int serif, int bold, int italic)
362{
363 fontdesc->font = fz_load_system_font(ctx, fontname, bold, italic, 0);
364 if (!fontdesc->font)
365 {
366 const unsigned char *data;
367 int len;
368
369 data = pdf_lookup_substitute_font(ctx, mono, serif, bold, italic, &len);
370 if (!data)
371 fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find substitute font");
372
373 fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, len, 0, 1);
374 fontdesc->font->flags.fake_bold = bold && !fontdesc->font->flags.is_bold;
375 fontdesc->font->flags.fake_italic = italic && !fontdesc->font->flags.is_italic;
376
377 fontdesc->font->flags.is_mono = mono;
378 fontdesc->font->flags.is_serif = serif;
379 fontdesc->font->flags.is_bold = bold;
380 fontdesc->font->flags.is_italic = italic;
381 }
382
383 fontdesc->font->flags.ft_substitute = 1;
384 fontdesc->font->flags.ft_stretch = 1;
385}
386
387static void
388pdf_load_substitute_cjk_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, int ros, int serif)
389{
390 fontdesc->font = fz_load_system_cjk_font(ctx, fontname, ros, serif);
391 if (!fontdesc->font)
392 {
393 const unsigned char *data;
394 int size;
395 int subfont;
396
397 data = fz_lookup_cjk_font(ctx, ros, &size, &subfont);
398 if (!data)
399 fz_throw(ctx, FZ_ERROR_SYNTAX, "cannot find builtin CJK font");
400
401 /* A glyph bbox cache is too big for CJK fonts. */
402 fontdesc->font = fz_new_font_from_memory(ctx, fontname, data, size, subfont, 0);
403 }
404
405 fontdesc->font->flags.ft_substitute = 1;
406 fontdesc->font->flags.ft_stretch = 0;
407}
408
409static void
410pdf_load_system_font(fz_context *ctx, pdf_font_desc *fontdesc, const char *fontname, const char *collection)
411{
412 int bold = 0;
413 int italic = 0;
414 int serif = 0;
415 int mono = 0;
416
417 if (strstr(fontname, "Bold"))
418 bold = 1;
419 if (strstr(fontname, "Italic"))
420 italic = 1;
421 if (strstr(fontname, "Oblique"))
422 italic = 1;
423
424 if (fontdesc->flags & PDF_FD_FIXED_PITCH)
425 mono = 1;
426 if (fontdesc->flags & PDF_FD_SERIF)
427 serif = 1;
428 if (fontdesc->flags & PDF_FD_ITALIC)
429 italic = 1;
430 if (fontdesc->flags & PDF_FD_FORCE_BOLD)
431 bold = 1;
432
433 if (collection)
434 {
435 if (!strcmp(collection, "Adobe-CNS1"))
436 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_CNS, serif);
437 else if (!strcmp(collection, "Adobe-GB1"))
438 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_GB, serif);
439 else if (!strcmp(collection, "Adobe-Japan1"))
440 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_JAPAN, serif);
441 else if (!strcmp(collection, "Adobe-Korea1"))
442 pdf_load_substitute_cjk_font(ctx, fontdesc, fontname, FZ_ADOBE_KOREA, serif);
443 else
444 {
445 if (strcmp(collection, "Adobe-Identity") != 0)
446 fz_warn(ctx, "unknown cid collection: %s", collection);
447 pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic);
448 }
449 }
450 else
451 {
452 pdf_load_substitute_font(ctx, fontdesc, fontname, mono, serif, bold, italic);
453 }
454}
455
456static void
457pdf_load_embedded_font(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, const char *fontname, pdf_obj *stmref)
458{
459 fz_buffer *buf;
460
461 buf = pdf_load_stream(ctx, stmref);
462 fz_try(ctx)
463 fontdesc->font = fz_new_font_from_buffer(ctx, fontname, buf, 0, 1);
464 fz_always(ctx)
465 fz_drop_buffer(ctx, buf);
466 fz_catch(ctx)
467 fz_rethrow(ctx);
468
469 fontdesc->size += fz_buffer_storage(ctx, buf, NULL);
470 fontdesc->is_embedded = 1;
471}
472
473/*
474 * Create and destroy
475 */
476
477pdf_font_desc *
478pdf_keep_font(fz_context *ctx, pdf_font_desc *fontdesc)
479{
480 return fz_keep_storable(ctx, &fontdesc->storable);
481}
482
483void
484pdf_drop_font(fz_context *ctx, pdf_font_desc *fontdesc)
485{
486 fz_drop_storable(ctx, &fontdesc->storable);
487}
488
489static void
490pdf_drop_font_imp(fz_context *ctx, fz_storable *fontdesc_)
491{
492 pdf_font_desc *fontdesc = (pdf_font_desc *)fontdesc_;
493
494 fz_drop_font(ctx, fontdesc->font);
495 pdf_drop_cmap(ctx, fontdesc->encoding);
496 pdf_drop_cmap(ctx, fontdesc->to_ttf_cmap);
497 pdf_drop_cmap(ctx, fontdesc->to_unicode);
498 fz_free(ctx, fontdesc->cid_to_gid);
499 fz_free(ctx, fontdesc->cid_to_ucs);
500 fz_free(ctx, fontdesc->hmtx);
501 fz_free(ctx, fontdesc->vmtx);
502 fz_free(ctx, fontdesc);
503}
504
505pdf_font_desc *
506pdf_new_font_desc(fz_context *ctx)
507{
508 pdf_font_desc *fontdesc;
509
510 fontdesc = fz_malloc_struct(ctx, pdf_font_desc);
511 FZ_INIT_STORABLE(fontdesc, 1, pdf_drop_font_imp);
512 fontdesc->size = sizeof(pdf_font_desc);
513
514 fontdesc->font = NULL;
515
516 fontdesc->flags = 0;
517 fontdesc->italic_angle = 0;
518 fontdesc->ascent = 800;
519 fontdesc->descent = -200;
520 fontdesc->cap_height = 800;
521 fontdesc->x_height = 500;
522 fontdesc->missing_width = 0;
523
524 fontdesc->encoding = NULL;
525 fontdesc->to_ttf_cmap = NULL;
526 fontdesc->cid_to_gid_len = 0;
527 fontdesc->cid_to_gid = NULL;
528
529 fontdesc->to_unicode = NULL;
530 fontdesc->cid_to_ucs_len = 0;
531 fontdesc->cid_to_ucs = NULL;
532
533 fontdesc->wmode = 0;
534
535 fontdesc->hmtx_cap = 0;
536 fontdesc->vmtx_cap = 0;
537 fontdesc->hmtx_len = 0;
538 fontdesc->vmtx_len = 0;
539 fontdesc->hmtx = NULL;
540 fontdesc->vmtx = NULL;
541
542 fontdesc->dhmtx.lo = 0x0000;
543 fontdesc->dhmtx.hi = 0xFFFF;
544 fontdesc->dhmtx.w = 1000;
545
546 fontdesc->dvmtx.lo = 0x0000;
547 fontdesc->dvmtx.hi = 0xFFFF;
548 fontdesc->dvmtx.x = 0;
549 fontdesc->dvmtx.y = 880;
550 fontdesc->dvmtx.w = -1000;
551
552 fontdesc->is_embedded = 0;
553
554 return fontdesc;
555}
556
557/*
558 * Simple fonts (Type1 and TrueType)
559 */
560
561static FT_CharMap
562select_type1_cmap(FT_Face face)
563{
564 int i;
565 for (i = 0; i < face->num_charmaps; i++)
566 if (face->charmaps[i]->platform_id == 7)
567 return face->charmaps[i];
568 if (face->num_charmaps > 0)
569 return face->charmaps[0];
570 return NULL;
571}
572
573static FT_CharMap
574select_truetype_cmap(FT_Face face, int symbolic)
575{
576 int i;
577
578 /* First look for a Microsoft symbolic cmap, if applicable */
579 if (symbolic)
580 {
581 for (i = 0; i < face->num_charmaps; i++)
582 if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 0)
583 return face->charmaps[i];
584 }
585
586 /* Then look for a Microsoft Unicode cmap */
587 for (i = 0; i < face->num_charmaps; i++)
588 if (face->charmaps[i]->platform_id == 3 && face->charmaps[i]->encoding_id == 1)
589 if (FT_Get_CMap_Format(face->charmaps[i]) != -1)
590 return face->charmaps[i];
591
592 /* Finally look for an Apple MacRoman cmap */
593 for (i = 0; i < face->num_charmaps; i++)
594 if (face->charmaps[i]->platform_id == 1 && face->charmaps[i]->encoding_id == 0)
595 if (FT_Get_CMap_Format(face->charmaps[i]) != -1)
596 return face->charmaps[i];
597
598 if (face->num_charmaps > 0)
599 if (FT_Get_CMap_Format(face->charmaps[0]) != -1)
600 return face->charmaps[0];
601 return NULL;
602}
603
604static FT_CharMap
605select_unknown_cmap(FT_Face face)
606{
607 if (face->num_charmaps > 0)
608 return face->charmaps[0];
609 return NULL;
610}
611
612static pdf_font_desc *
613pdf_load_simple_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
614{
615 const char *basefont;
616 pdf_obj *descriptor;
617 pdf_obj *encoding;
618 pdf_obj *widths;
619 unsigned short *etable = NULL;
620 pdf_font_desc *fontdesc = NULL;
621 pdf_obj *subtype;
622 FT_Face face;
623 FT_CharMap cmap;
624 int symbolic;
625 int kind;
626 int glyph;
627
628 const char *estrings[256];
629 char ebuffer[256][32];
630 int i, k, n;
631 int fterr;
632 int has_lock = 0;
633
634 fz_var(fontdesc);
635 fz_var(etable);
636 fz_var(has_lock);
637
638 /* Load font file */
639 fz_try(ctx)
640 {
641 fontdesc = pdf_new_font_desc(ctx);
642
643 basefont = pdf_to_name(ctx, pdf_dict_get(ctx, dict, PDF_NAME(BaseFont)));
644
645 descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
646 if (descriptor)
647 pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, NULL, basefont, 0);
648 else
649 pdf_load_builtin_font(ctx, fontdesc, basefont, 0);
650
651 /* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */
652 if (descriptor && pdf_is_string(ctx, pdf_dict_get(ctx, descriptor, PDF_NAME(FontName))) &&
653 !pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode)) &&
654 pdf_name_eq(ctx, pdf_dict_get(ctx, dict, PDF_NAME(Encoding)), PDF_NAME(WinAnsiEncoding)) &&
655 pdf_dict_get_int(ctx, descriptor, PDF_NAME(Flags)) == 4)
656 {
657 char *cp936fonts[] = {
658 "\xCB\xCE\xCC\xE5", "SimSun,Regular",
659 "\xBA\xDA\xCC\xE5", "SimHei,Regular",
660 "\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular",
661 "\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular",
662 "\xC1\xA5\xCA\xE9", "SimLi,Regular",
663 NULL
664 };
665 for (i = 0; cp936fonts[i]; i += 2)
666 if (!strcmp(basefont, cp936fonts[i]))
667 break;
668 if (cp936fonts[i])
669 {
670 fz_warn(ctx, "workaround for S22PDF lying about chinese font encodings");
671 pdf_drop_font(ctx, fontdesc);
672 fontdesc = NULL;
673 fontdesc = pdf_new_font_desc(ctx);
674 pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, "Adobe-GB1", cp936fonts[i+1], 0);
675 fontdesc->encoding = pdf_load_system_cmap(ctx, "GBK-EUC-H");
676 fontdesc->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
677 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
678
679 goto skip_encoding;
680 }
681 }
682
683 face = fontdesc->font->ft_face;
684 kind = ft_kind(face);
685
686 /* Encoding */
687
688 symbolic = fontdesc->flags & 4;
689
690 if (kind == TYPE1)
691 cmap = select_type1_cmap(face);
692 else if (kind == TRUETYPE)
693 cmap = select_truetype_cmap(face, symbolic);
694 else
695 cmap = select_unknown_cmap(face);
696
697 if (cmap)
698 {
699 fterr = FT_Set_Charmap(face, cmap);
700 if (fterr)
701 fz_warn(ctx, "freetype could not set cmap: %s", ft_error_string(fterr));
702 }
703 else
704 fz_warn(ctx, "freetype could not find any cmaps");
705
706 etable = fz_malloc_array(ctx, 256, unsigned short);
707 fontdesc->size += 256 * sizeof(unsigned short);
708 for (i = 0; i < 256; i++)
709 {
710 estrings[i] = NULL;
711 etable[i] = 0;
712 }
713
714 encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding));
715 if (encoding)
716 {
717 if (pdf_is_name(ctx, encoding))
718 pdf_load_encoding(estrings, pdf_to_name(ctx, encoding));
719
720 if (pdf_is_dict(ctx, encoding))
721 {
722 pdf_obj *base, *diff, *item;
723
724 base = pdf_dict_get(ctx, encoding, PDF_NAME(BaseEncoding));
725 if (pdf_is_name(ctx, base))
726 pdf_load_encoding(estrings, pdf_to_name(ctx, base));
727 else if (!fontdesc->is_embedded && !symbolic)
728 pdf_load_encoding(estrings, "StandardEncoding");
729
730 diff = pdf_dict_get(ctx, encoding, PDF_NAME(Differences));
731 if (pdf_is_array(ctx, diff))
732 {
733 n = pdf_array_len(ctx, diff);
734 k = 0;
735 for (i = 0; i < n; i++)
736 {
737 item = pdf_array_get(ctx, diff, i);
738 if (pdf_is_int(ctx, item))
739 k = pdf_to_int(ctx, item);
740 if (pdf_is_name(ctx, item) && k >= 0 && k < nelem(estrings))
741 estrings[k++] = pdf_to_name(ctx, item);
742 }
743 }
744 }
745 }
746 else if (!fontdesc->is_embedded && !symbolic)
747 pdf_load_encoding(estrings, "StandardEncoding");
748
749 /* start with the builtin encoding */
750 for (i = 0; i < 256; i++)
751 etable[i] = ft_char_index(face, i);
752
753 fz_lock(ctx, FZ_LOCK_FREETYPE);
754 has_lock = 1;
755
756 /* built-in and substitute fonts may be a different type than what the document expects */
757 subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
758 if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1)))
759 kind = TYPE1;
760 else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1)))
761 kind = TYPE1;
762 else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType)))
763 kind = TRUETYPE;
764 else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0)))
765 kind = TYPE1;
766 else if (pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2)))
767 kind = TRUETYPE;
768
769 /* encode by glyph name where we can */
770 if (kind == TYPE1)
771 {
772 for (i = 0; i < 256; i++)
773 {
774 if (estrings[i])
775 {
776 glyph = ft_name_index(face, estrings[i]);
777 if (glyph > 0)
778 etable[i] = glyph;
779 }
780 }
781 }
782
783 /* encode by glyph name where we can */
784 if (kind == TRUETYPE)
785 {
786 /* Unicode cmap */
787 if (!symbolic && face->charmap && face->charmap->platform_id == 3)
788 {
789 for (i = 0; i < 256; i++)
790 {
791 if (estrings[i])
792 {
793 glyph = ft_find_glyph_by_unicode_name(face, estrings[i]);
794 if (glyph > 0)
795 etable[i] = glyph;
796 }
797 }
798 }
799
800 /* MacRoman cmap */
801 else if (!symbolic && face->charmap && face->charmap->platform_id == 1)
802 {
803 for (i = 0; i < 256; i++)
804 {
805 if (estrings[i])
806 {
807 int mrcode = lookup_mre_code(estrings[i]);
808 glyph = 0;
809 if (mrcode > 0)
810 glyph = ft_char_index(face, mrcode);
811 if (glyph == 0)
812 glyph = ft_name_index(face, estrings[i]);
813 if (glyph > 0)
814 etable[i] = glyph;
815 }
816 }
817 }
818
819 /* Symbolic cmap */
820 else if (!face->charmap || face->charmap->encoding != FT_ENCODING_MS_SYMBOL)
821 {
822 for (i = 0; i < 256; i++)
823 {
824 if (estrings[i])
825 {
826 glyph = ft_name_index(face, estrings[i]);
827 if (glyph > 0)
828 etable[i] = glyph;
829 }
830 }
831 }
832 }
833
834 /* try to reverse the glyph names from the builtin encoding */
835 for (i = 0; i < 256; i++)
836 {
837 if (etable[i] && !estrings[i])
838 {
839 if (FT_HAS_GLYPH_NAMES(face))
840 {
841 fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32);
842 if (fterr)
843 fz_warn(ctx, "freetype get glyph name (gid %d): %s", etable[i], ft_error_string(fterr));
844 if (ebuffer[i][0])
845 estrings[i] = ebuffer[i];
846 }
847 else
848 {
849 estrings[i] = (char*) fz_glyph_name_from_win_ansi[i]; /* discard const */
850 }
851 }
852 }
853
854 /* symbolic Type 1 fonts with an implicit encoding and non-standard glyph names */
855 if (kind == TYPE1 && symbolic)
856 {
857 for (i = 0; i < 256; i++)
858 if (etable[i] && estrings[i] && !fz_unicode_from_glyph_name(estrings[i]))
859 estrings[i] = (char*) fz_glyph_name_from_adobe_standard[i];
860 }
861
862 fz_unlock(ctx, FZ_LOCK_FREETYPE);
863 has_lock = 0;
864
865 fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1);
866 fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);
867 fontdesc->cid_to_gid_len = 256;
868 fontdesc->cid_to_gid = etable;
869
870 fz_try(ctx)
871 {
872 pdf_load_to_unicode(ctx, doc, fontdesc, estrings, NULL, pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode)));
873 }
874 fz_catch(ctx)
875 {
876 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
877 fz_warn(ctx, "cannot load ToUnicode CMap");
878 }
879
880 skip_encoding:
881
882 /* Widths */
883
884 pdf_set_default_hmtx(ctx, fontdesc, fontdesc->missing_width);
885
886 widths = pdf_dict_get(ctx, dict, PDF_NAME(Widths));
887 if (widths)
888 {
889 int first, last;
890
891 first = pdf_dict_get_int(ctx, dict, PDF_NAME(FirstChar));
892 last = pdf_dict_get_int(ctx, dict, PDF_NAME(LastChar));
893
894 if (first < 0 || last > 255 || first > last)
895 first = last = 0;
896
897 for (i = 0; i < last - first + 1; i++)
898 {
899 int wid = pdf_array_get_int(ctx, widths, i);
900 pdf_add_hmtx(ctx, fontdesc, i + first, i + first, wid);
901 }
902 }
903 else
904 {
905 for (i = 0; i < 256; i++)
906 pdf_add_hmtx(ctx, fontdesc, i, i, ft_width(ctx, fontdesc, i));
907 }
908
909 pdf_end_hmtx(ctx, fontdesc);
910 }
911 fz_catch(ctx)
912 {
913 if (has_lock)
914 fz_unlock(ctx, FZ_LOCK_FREETYPE);
915 if (fontdesc && etable != fontdesc->cid_to_gid)
916 fz_free(ctx, etable);
917 pdf_drop_font(ctx, fontdesc);
918 fz_rethrow(ctx);
919 }
920 return fontdesc;
921}
922
923static int
924hail_mary_make_hash_key(fz_context *ctx, fz_store_hash *hash, void *key_)
925{
926 hash->u.pi.i = 0;
927 hash->u.pi.ptr = NULL;
928 return 1;
929}
930
931static void *
932hail_mary_keep_key(fz_context *ctx, void *key)
933{
934 return key;
935}
936
937static void
938hail_mary_drop_key(fz_context *ctx, void *key)
939{
940}
941
942static int
943hail_mary_cmp_key(fz_context *ctx, void *k0, void *k1)
944{
945 return k0 == k1;
946}
947
948static void
949hail_mary_format_key(fz_context *ctx, char *s, int n, void *key_)
950{
951 fz_strlcpy(s, "(hail mary font)", n);
952}
953
954static int hail_mary_store_key; /* Dummy */
955
956static const fz_store_type hail_mary_store_type =
957{
958 hail_mary_make_hash_key,
959 hail_mary_keep_key,
960 hail_mary_drop_key,
961 hail_mary_cmp_key,
962 hail_mary_format_key,
963 NULL
964};
965
966pdf_font_desc *
967pdf_load_hail_mary_font(fz_context *ctx, pdf_document *doc)
968{
969 pdf_font_desc *fontdesc;
970 pdf_font_desc *existing;
971
972 if ((fontdesc = fz_find_item(ctx, pdf_drop_font_imp, &hail_mary_store_key, &hail_mary_store_type)) != NULL)
973 {
974 return fontdesc;
975 }
976
977 /* FIXME: Get someone with a clue about fonts to fix this */
978 fontdesc = pdf_load_simple_font(ctx, doc, NULL);
979
980 existing = fz_store_item(ctx, &hail_mary_store_key, fontdesc, fontdesc->size, &hail_mary_store_type);
981 assert(existing == NULL);
982 (void)existing; /* Silence warning in release builds */
983
984 return fontdesc;
985}
986
987/*
988 * CID Fonts
989 */
990
991static pdf_font_desc *
992load_cid_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode)
993{
994 pdf_obj *widths;
995 pdf_obj *descriptor;
996 pdf_font_desc *fontdesc = NULL;
997 fz_buffer *buf = NULL;
998 pdf_cmap *cmap;
999 FT_Face face;
1000 char collection[256];
1001 const char *basefont;
1002 int i, k, fterr;
1003 pdf_obj *cidtogidmap;
1004 pdf_obj *obj;
1005 int dw;
1006
1007 fz_var(fontdesc);
1008 fz_var(buf);
1009
1010 fz_try(ctx)
1011 {
1012 /* Get font name and CID collection */
1013
1014 basefont = pdf_to_name(ctx, pdf_dict_get(ctx, dict, PDF_NAME(BaseFont)));
1015
1016 {
1017 pdf_obj *cidinfo;
1018 const char *reg, *ord;
1019
1020 cidinfo = pdf_dict_get(ctx, dict, PDF_NAME(CIDSystemInfo));
1021 if (!cidinfo)
1022 fz_throw(ctx, FZ_ERROR_SYNTAX, "cid font is missing info");
1023
1024 reg = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Registry), NULL);
1025 ord = pdf_dict_get_string(ctx, cidinfo, PDF_NAME(Ordering), NULL);
1026 fz_snprintf(collection, sizeof collection, "%s-%s", reg, ord);
1027 }
1028
1029 /* Encoding */
1030
1031 if (pdf_is_name(ctx, encoding))
1032 {
1033 cmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, encoding));
1034 }
1035 else if (pdf_is_indirect(ctx, encoding))
1036 {
1037 cmap = pdf_load_embedded_cmap(ctx, doc, encoding);
1038 }
1039 else
1040 {
1041 fz_throw(ctx, FZ_ERROR_SYNTAX, "font missing encoding");
1042 }
1043
1044 /* Load font file */
1045
1046 fontdesc = pdf_new_font_desc(ctx);
1047
1048 fontdesc->encoding = cmap;
1049 fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding);
1050
1051 pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding));
1052
1053 descriptor = pdf_dict_get(ctx, dict, PDF_NAME(FontDescriptor));
1054 if (!descriptor)
1055 fz_throw(ctx, FZ_ERROR_SYNTAX, "missing font descriptor");
1056 pdf_load_font_descriptor(ctx, doc, fontdesc, descriptor, collection, basefont, 1);
1057
1058 face = fontdesc->font->ft_face;
1059
1060 /* Apply encoding */
1061
1062 cidtogidmap = pdf_dict_get(ctx, dict, PDF_NAME(CIDToGIDMap));
1063 if (pdf_is_stream(ctx, cidtogidmap))
1064 {
1065 size_t z, len;
1066 unsigned char *data;
1067
1068 buf = pdf_load_stream(ctx, cidtogidmap);
1069
1070 len = fz_buffer_storage(ctx, buf, &data);
1071 fontdesc->cid_to_gid_len = len / 2;
1072 fontdesc->cid_to_gid = fz_malloc_array(ctx, fontdesc->cid_to_gid_len, unsigned short);
1073 fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short);
1074 for (z = 0; z < fontdesc->cid_to_gid_len; z++)
1075 fontdesc->cid_to_gid[z] = (data[z * 2] << 8) + data[z * 2 + 1];
1076 }
1077 else if (cidtogidmap && !pdf_name_eq(ctx, PDF_NAME(Identity), cidtogidmap))
1078 {
1079 fz_warn(ctx, "ignoring unknown CIDToGIDMap entry");
1080 }
1081
1082 /* if font is external, cidtogidmap should not be identity */
1083 /* so we map from cid to unicode and then map that through the (3 1) */
1084 /* unicode cmap to get a glyph id */
1085 else if (fontdesc->font->flags.ft_substitute)
1086 {
1087 fterr = FT_Select_Charmap(face, ft_encoding_unicode);
1088 if (fterr)
1089 fz_throw(ctx, FZ_ERROR_SYNTAX, "no unicode cmap when emulating CID font: %s", ft_error_string(fterr));
1090
1091 if (!strcmp(collection, "Adobe-CNS1"))
1092 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
1093 else if (!strcmp(collection, "Adobe-GB1"))
1094 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
1095 else if (!strcmp(collection, "Adobe-Japan1"))
1096 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
1097 else if (!strcmp(collection, "Adobe-Japan2"))
1098 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2");
1099 else if (!strcmp(collection, "Adobe-Korea1"))
1100 fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");
1101 }
1102
1103 pdf_load_to_unicode(ctx, doc, fontdesc, NULL, collection, to_unicode);
1104
1105 /* If we have an identity encoding, we're supposed to use the glyph ids directly.
1106 * If we only have a substitute font, that won't work.
1107 * Make a last ditch attempt by using
1108 * the ToUnicode table if it exists to map via the substitute font's cmap. */
1109 if (strstr(fontdesc->encoding->cmap_name, "Identity-") && fontdesc->font->flags.ft_substitute)
1110 {
1111 fz_warn(ctx, "non-embedded font using identity encoding: %s", basefont);
1112 if (fontdesc->to_unicode && !fontdesc->to_ttf_cmap)
1113 fontdesc->to_ttf_cmap = pdf_keep_cmap(ctx, fontdesc->to_unicode);
1114 }
1115
1116 /* Horizontal */
1117
1118 dw = 1000;
1119 obj = pdf_dict_get(ctx, dict, PDF_NAME(DW));
1120 if (obj)
1121 dw = pdf_to_int(ctx, obj);
1122 pdf_set_default_hmtx(ctx, fontdesc, dw);
1123
1124 widths = pdf_dict_get(ctx, dict, PDF_NAME(W));
1125 if (widths)
1126 {
1127 int c0, c1, w, n, m;
1128
1129 n = pdf_array_len(ctx, widths);
1130 for (i = 0; i < n; )
1131 {
1132 c0 = pdf_array_get_int(ctx, widths, i);
1133 obj = pdf_array_get(ctx, widths, i + 1);
1134 if (pdf_is_array(ctx, obj))
1135 {
1136 m = pdf_array_len(ctx, obj);
1137 for (k = 0; k < m; k++)
1138 {
1139 w = pdf_array_get_int(ctx, obj, k);
1140 pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w);
1141 }
1142 i += 2;
1143 }
1144 else
1145 {
1146 c1 = pdf_to_int(ctx, obj);
1147 w = pdf_array_get_int(ctx, widths, i + 2);
1148 pdf_add_hmtx(ctx, fontdesc, c0, c1, w);
1149 i += 3;
1150 }
1151 }
1152 }
1153
1154 pdf_end_hmtx(ctx, fontdesc);
1155
1156 /* Vertical */
1157
1158 if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1)
1159 {
1160 int dw2y = 880;
1161 int dw2w = -1000;
1162
1163 obj = pdf_dict_get(ctx, dict, PDF_NAME(DW2));
1164 if (obj)
1165 {
1166 dw2y = pdf_array_get_int(ctx, obj, 0);
1167 dw2w = pdf_array_get_int(ctx, obj, 1);
1168 }
1169
1170 pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w);
1171
1172 widths = pdf_dict_get(ctx, dict, PDF_NAME(W2));
1173 if (widths)
1174 {
1175 int c0, c1, w, x, y, n;
1176
1177 n = pdf_array_len(ctx, widths);
1178 for (i = 0; i < n; )
1179 {
1180 c0 = pdf_array_get_int(ctx, widths, i);
1181 obj = pdf_array_get(ctx, widths, i + 1);
1182 if (pdf_is_array(ctx, obj))
1183 {
1184 int m = pdf_array_len(ctx, obj);
1185 for (k = 0; k * 3 < m; k ++)
1186 {
1187 w = pdf_array_get_int(ctx, obj, k * 3 + 0);
1188 x = pdf_array_get_int(ctx, obj, k * 3 + 1);
1189 y = pdf_array_get_int(ctx, obj, k * 3 + 2);
1190 pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w);
1191 }
1192 i += 2;
1193 }
1194 else
1195 {
1196 c1 = pdf_to_int(ctx, obj);
1197 w = pdf_array_get_int(ctx, widths, i + 2);
1198 x = pdf_array_get_int(ctx, widths, i + 3);
1199 y = pdf_array_get_int(ctx, widths, i + 4);
1200 pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w);
1201 i += 5;
1202 }
1203 }
1204 }
1205
1206 pdf_end_vmtx(ctx, fontdesc);
1207 }
1208 }
1209 fz_always(ctx)
1210 fz_drop_buffer(ctx, buf);
1211 fz_catch(ctx)
1212 {
1213 pdf_drop_font(ctx, fontdesc);
1214 fz_rethrow(ctx);
1215 }
1216
1217 return fontdesc;
1218}
1219
1220static pdf_font_desc *
1221pdf_load_type0_font(fz_context *ctx, pdf_document *doc, pdf_obj *dict)
1222{
1223 pdf_obj *dfonts;
1224 pdf_obj *dfont;
1225 pdf_obj *subtype;
1226 pdf_obj *encoding;
1227 pdf_obj *to_unicode;
1228
1229 dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts));
1230 if (!dfonts)
1231 fz_throw(ctx, FZ_ERROR_SYNTAX, "cid font is missing descendant fonts");
1232
1233 dfont = pdf_array_get(ctx, dfonts, 0);
1234
1235 subtype = pdf_dict_get(ctx, dfont, PDF_NAME(Subtype));
1236 encoding = pdf_dict_get(ctx, dict, PDF_NAME(Encoding));
1237 to_unicode = pdf_dict_get(ctx, dict, PDF_NAME(ToUnicode));
1238
1239 if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType0)))
1240 return load_cid_font(ctx, doc, dfont, encoding, to_unicode);
1241 if (pdf_is_name(ctx, subtype) && pdf_name_eq(ctx, subtype, PDF_NAME(CIDFontType2)))
1242 return load_cid_font(ctx, doc, dfont, encoding, to_unicode);
1243 fz_throw(ctx, FZ_ERROR_SYNTAX, "unknown cid font type");
1244}
1245
1246/*
1247 * FontDescriptor
1248 */
1249
1250static void
1251pdf_load_font_descriptor(fz_context *ctx, pdf_document *doc, pdf_font_desc *fontdesc, pdf_obj *dict,
1252 const char *collection, const char *basefont, int iscidfont)
1253{
1254 pdf_obj *obj1, *obj2, *obj3, *obj;
1255 const char *fontname;
1256 FT_Face face;
1257
1258 /* Prefer BaseFont; don't bother with FontName */
1259 fontname = basefont;
1260
1261 fontdesc->flags = pdf_dict_get_int(ctx, dict, PDF_NAME(Flags));
1262 fontdesc->italic_angle = pdf_dict_get_real(ctx, dict, PDF_NAME(ItalicAngle));
1263 fontdesc->ascent = pdf_dict_get_real(ctx, dict, PDF_NAME(Ascent));
1264 fontdesc->descent = pdf_dict_get_real(ctx, dict, PDF_NAME(Descent));
1265 fontdesc->cap_height = pdf_dict_get_real(ctx, dict, PDF_NAME(CapHeight));
1266 fontdesc->x_height = pdf_dict_get_real(ctx, dict, PDF_NAME(XHeight));
1267 fontdesc->missing_width = pdf_dict_get_real(ctx, dict, PDF_NAME(MissingWidth));
1268
1269 obj1 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile));
1270 obj2 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile2));
1271 obj3 = pdf_dict_get(ctx, dict, PDF_NAME(FontFile3));
1272 obj = obj1 ? obj1 : obj2 ? obj2 : obj3;
1273
1274 if (pdf_is_indirect(ctx, obj))
1275 {
1276 fz_try(ctx)
1277 {
1278 pdf_load_embedded_font(ctx, doc, fontdesc, fontname, obj);
1279 }
1280 fz_catch(ctx)
1281 {
1282 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
1283 fz_warn(ctx, "ignored error when loading embedded font; attempting to load system font");
1284 if (!iscidfont && fontname != pdf_clean_font_name(fontname))
1285 pdf_load_builtin_font(ctx, fontdesc, fontname, 1);
1286 else
1287 pdf_load_system_font(ctx, fontdesc, fontname, collection);
1288 }
1289 }
1290 else
1291 {
1292 if (!iscidfont && fontname != pdf_clean_font_name(fontname))
1293 pdf_load_builtin_font(ctx, fontdesc, fontname, 1);
1294 else
1295 pdf_load_system_font(ctx, fontdesc, fontname, collection);
1296 }
1297
1298 /* Check for DynaLab fonts that must use hinting */
1299 face = fontdesc->font->ft_face;
1300 if (ft_kind(face) == TRUETYPE)
1301 {
1302 /* FreeType's own 'tricky' font detection needs a bit of help */
1303 if (is_dynalab(fontdesc->font->name))
1304 face->face_flags |= FT_FACE_FLAG_TRICKY;
1305
1306 if (fontdesc->ascent == 0.0f)
1307 fontdesc->ascent = 1000.0f * face->ascender / face->units_per_EM;
1308
1309 if (fontdesc->descent == 0.0f)
1310 fontdesc->descent = 1000.0f * face->descender / face->units_per_EM;
1311 }
1312}
1313
1314static void
1315pdf_make_width_table(fz_context *ctx, pdf_font_desc *fontdesc)
1316{
1317 fz_font *font = fontdesc->font;
1318 int i, k, n, cid, gid;
1319
1320 n = 0;
1321 for (i = 0; i < fontdesc->hmtx_len; i++)
1322 {
1323 for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++)
1324 {
1325 cid = pdf_lookup_cmap(fontdesc->encoding, k);
1326 gid = pdf_font_cid_to_gid(ctx, fontdesc, cid);
1327 if (gid > n)
1328 n = gid;
1329 }
1330 }
1331
1332 font->width_count = n + 1;
1333 font->width_table = fz_malloc_array(ctx, font->width_count, short);
1334 fontdesc->size += font->width_count * sizeof(short);
1335
1336 font->width_default = fontdesc->dhmtx.w;
1337 for (i = 0; i < font->width_count; i++)
1338 font->width_table[i] = -1;
1339
1340 for (i = 0; i < fontdesc->hmtx_len; i++)
1341 {
1342 for (k = fontdesc->hmtx[i].lo; k <= fontdesc->hmtx[i].hi; k++)
1343 {
1344 cid = pdf_lookup_cmap(fontdesc->encoding, k);
1345 gid = pdf_font_cid_to_gid(ctx, fontdesc, cid);
1346 if (gid >= 0 && gid < font->width_count)
1347 font->width_table[gid] = fz_maxi(fontdesc->hmtx[i].w, font->width_table[gid]);
1348 }
1349 }
1350
1351 for (i = 0; i < font->width_count; i++)
1352 if (font->width_table[i] == -1)
1353 font->width_table[i] = font->width_default;
1354}
1355
1356pdf_font_desc *
1357pdf_load_font(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *dict)
1358{
1359 pdf_obj *subtype;
1360 pdf_obj *dfonts;
1361 pdf_obj *charprocs;
1362 pdf_font_desc *fontdesc = NULL;
1363 int type3 = 0;
1364
1365 if (pdf_obj_marked(ctx, dict))
1366 fz_throw(ctx, FZ_ERROR_SYNTAX, "Recursive Type3 font definition.");
1367
1368 if ((fontdesc = pdf_find_item(ctx, pdf_drop_font_imp, dict)) != NULL)
1369 {
1370 return fontdesc;
1371 }
1372
1373 subtype = pdf_dict_get(ctx, dict, PDF_NAME(Subtype));
1374 dfonts = pdf_dict_get(ctx, dict, PDF_NAME(DescendantFonts));
1375 charprocs = pdf_dict_get(ctx, dict, PDF_NAME(CharProcs));
1376
1377 if (pdf_name_eq(ctx, subtype, PDF_NAME(Type0)))
1378 fontdesc = pdf_load_type0_font(ctx, doc, dict);
1379 else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type1)))
1380 fontdesc = pdf_load_simple_font(ctx, doc, dict);
1381 else if (pdf_name_eq(ctx, subtype, PDF_NAME(MMType1)))
1382 fontdesc = pdf_load_simple_font(ctx, doc, dict);
1383 else if (pdf_name_eq(ctx, subtype, PDF_NAME(TrueType)))
1384 fontdesc = pdf_load_simple_font(ctx, doc, dict);
1385 else if (pdf_name_eq(ctx, subtype, PDF_NAME(Type3)))
1386 {
1387 fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict);
1388 type3 = 1;
1389 }
1390 else if (charprocs)
1391 {
1392 fz_warn(ctx, "unknown font format, guessing type3.");
1393 fontdesc = pdf_load_type3_font(ctx, doc, rdb, dict);
1394 type3 = 1;
1395 }
1396 else if (dfonts)
1397 {
1398 fz_warn(ctx, "unknown font format, guessing type0.");
1399 fontdesc = pdf_load_type0_font(ctx, doc, dict);
1400 }
1401 else
1402 {
1403 fz_warn(ctx, "unknown font format, guessing type1 or truetype.");
1404 fontdesc = pdf_load_simple_font(ctx, doc, dict);
1405 }
1406
1407 pdf_mark_obj(ctx, dict);
1408 fz_try(ctx)
1409 {
1410 /* Create glyph width table for stretching substitute fonts and text extraction. */
1411 pdf_make_width_table(ctx, fontdesc);
1412
1413 /* Load CharProcs */
1414 if (type3)
1415 pdf_load_type3_glyphs(ctx, doc, fontdesc);
1416
1417 pdf_store_item(ctx, dict, fontdesc, fontdesc->size);
1418 }
1419 fz_always(ctx)
1420 pdf_unmark_obj(ctx, dict);
1421 fz_catch(ctx)
1422 {
1423 pdf_drop_font(ctx, fontdesc);
1424 fz_rethrow(ctx);
1425 }
1426
1427 return fontdesc;
1428}
1429
1430void
1431pdf_print_font(fz_context *ctx, fz_output *out, pdf_font_desc *fontdesc)
1432{
1433 int i;
1434
1435 fz_write_printf(ctx, out, "fontdesc {\n");
1436
1437 if (fontdesc->font->ft_face)
1438 fz_write_printf(ctx, out, "\tfreetype font\n");
1439 if (fontdesc->font->t3procs)
1440 fz_write_printf(ctx, out, "\ttype3 font\n");
1441
1442 fz_write_printf(ctx, out, "\twmode %d\n", fontdesc->wmode);
1443 fz_write_printf(ctx, out, "\tDW %d\n", fontdesc->dhmtx.w);
1444
1445 fz_write_printf(ctx, out, "\tW {\n");
1446 for (i = 0; i < fontdesc->hmtx_len; i++)
1447 fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d\n",
1448 fontdesc->hmtx[i].lo, fontdesc->hmtx[i].hi, fontdesc->hmtx[i].w);
1449 fz_write_printf(ctx, out, "\t}\n");
1450
1451 if (fontdesc->wmode)
1452 {
1453 fz_write_printf(ctx, out, "\tDW2 [%d %d]\n", fontdesc->dvmtx.y, fontdesc->dvmtx.w);
1454 fz_write_printf(ctx, out, "\tW2 {\n");
1455 for (i = 0; i < fontdesc->vmtx_len; i++)
1456 fz_write_printf(ctx, out, "\t\t<%04x> <%04x> %d %d %d\n", fontdesc->vmtx[i].lo, fontdesc->vmtx[i].hi,
1457 fontdesc->vmtx[i].x, fontdesc->vmtx[i].y, fontdesc->vmtx[i].w);
1458 fz_write_printf(ctx, out, "\t}\n");
1459 }
1460}
1461