1#include "mupdf/fitz.h"
2#include "mupdf/pdf.h"
3
4#include <string.h>
5
6/* Load or synthesize ToUnicode map for fonts */
7
8static void
9pdf_remap_cmap_range(fz_context *ctx, pdf_cmap *ucs_from_gid,
10 unsigned int cpt, unsigned int gid, unsigned int n, pdf_cmap *ucs_from_cpt)
11{
12 unsigned int k;
13 int ucsbuf[8];
14 int ucslen;
15
16 for (k = 0; k <= n; ++k)
17 {
18 ucslen = pdf_lookup_cmap_full(ucs_from_cpt, cpt + k, ucsbuf);
19 if (ucslen == 1)
20 pdf_map_range_to_range(ctx, ucs_from_gid, gid + k, gid + k, ucsbuf[0]);
21 else if (ucslen > 1)
22 pdf_map_one_to_many(ctx, ucs_from_gid, gid + k, ucsbuf, ucslen);
23 }
24}
25
26static pdf_cmap *
27pdf_remap_cmap(fz_context *ctx, pdf_cmap *gid_from_cpt, pdf_cmap *ucs_from_cpt)
28{
29 pdf_cmap *ucs_from_gid;
30 unsigned int a, b, x;
31 int i;
32
33 ucs_from_gid = pdf_new_cmap(ctx);
34
35 fz_try(ctx)
36 {
37 if (gid_from_cpt->usecmap)
38 ucs_from_gid->usecmap = pdf_remap_cmap(ctx, gid_from_cpt->usecmap, ucs_from_cpt);
39
40 for (i = 0; i < gid_from_cpt->codespace_len; i++)
41 {
42 pdf_add_codespace(ctx, ucs_from_gid,
43 gid_from_cpt->codespace[i].low,
44 gid_from_cpt->codespace[i].high,
45 gid_from_cpt->codespace[i].n);
46 }
47
48 for (i = 0; i < gid_from_cpt->rlen; ++i)
49 {
50 a = gid_from_cpt->ranges[i].low;
51 b = gid_from_cpt->ranges[i].high;
52 x = gid_from_cpt->ranges[i].out;
53 pdf_remap_cmap_range(ctx, ucs_from_gid, a, x, b - a, ucs_from_cpt);
54 }
55
56 for (i = 0; i < gid_from_cpt->xlen; ++i)
57 {
58 a = gid_from_cpt->xranges[i].low;
59 b = gid_from_cpt->xranges[i].high;
60 x = gid_from_cpt->xranges[i].out;
61 pdf_remap_cmap_range(ctx, ucs_from_gid, a, x, b - a, ucs_from_cpt);
62 }
63
64 /* Font encoding CMaps don't have one-to-many mappings, so we can ignore the mranges. */
65
66 pdf_sort_cmap(ctx, ucs_from_gid);
67 }
68 fz_catch(ctx)
69 {
70 pdf_drop_cmap(ctx, ucs_from_gid);
71 fz_rethrow(ctx);
72 }
73
74 return ucs_from_gid;
75}
76
77void
78pdf_load_to_unicode(fz_context *ctx, pdf_document *doc, pdf_font_desc *font,
79 const char **strings, char *collection, pdf_obj *cmapstm)
80{
81 unsigned int cpt;
82
83 if (pdf_is_stream(ctx, cmapstm))
84 {
85 pdf_cmap *ucs_from_cpt = pdf_load_embedded_cmap(ctx, doc, cmapstm);
86 fz_try(ctx)
87 font->to_unicode = pdf_remap_cmap(ctx, font->encoding, ucs_from_cpt);
88 fz_always(ctx)
89 pdf_drop_cmap(ctx, ucs_from_cpt);
90 fz_catch(ctx)
91 fz_rethrow(ctx);
92 font->size += pdf_cmap_size(ctx, font->to_unicode);
93 }
94
95 else if (collection)
96 {
97 if (!strcmp(collection, "Adobe-CNS1"))
98 font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
99 else if (!strcmp(collection, "Adobe-GB1"))
100 font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
101 else if (!strcmp(collection, "Adobe-Japan1"))
102 font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
103 else if (!strcmp(collection, "Adobe-Korea1"))
104 font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");
105
106 return;
107 }
108
109 if (strings)
110 {
111 /* TODO one-to-many mappings */
112
113 font->cid_to_ucs = fz_malloc_array(ctx, 256, unsigned short);
114 font->cid_to_ucs_len = 256;
115 font->size += 256 * sizeof *font->cid_to_ucs;
116
117 for (cpt = 0; cpt < 256; cpt++)
118 {
119 if (strings[cpt])
120 font->cid_to_ucs[cpt] = fz_unicode_from_glyph_name(strings[cpt]);
121 else
122 font->cid_to_ucs[cpt] = FZ_REPLACEMENT_CHARACTER;
123 }
124 }
125
126 if (!font->to_unicode && !font->cid_to_ucs)
127 {
128 /* TODO: synthesize a ToUnicode if it's a freetype font with
129 * cmap and/or post tables or if it has glyph names. */
130 }
131}
132