pdf-unicode.c source code [MuPDF/source/pdf/pdf-unicode.c]

1	#include "mupdf/fitz.h"
2	#include "mupdf/pdf.h"
3
4	#include <string.h>
5
6	/ Load or synthesize ToUnicode map for fonts /
7
8	static void
9	pdf_remap_cmap_range(fz_context ctx, pdf_cmap ucs_from_gid,
10	unsigned int cpt, unsigned int gid, unsigned int n, pdf_cmap *ucs_from_cpt)
11	{
12	unsigned int k;
13	int ucsbuf[`8`];
14	int ucslen;
15
16	for (k = `0`; k <= n; ++k)
17	{
18	ucslen = pdf_lookup_cmap_full(ucs_from_cpt, cpt + k, ucsbuf);
19	if (ucslen == `1`)
20	pdf_map_range_to_range(ctx, ucs_from_gid, gid + k, gid + k, ucsbuf[`0`]);
21	else if (ucslen > `1`)
22	pdf_map_one_to_many(ctx, ucs_from_gid, gid + k, ucsbuf, ucslen);
23	}
24	}
25
26	static pdf_cmap *
27	pdf_remap_cmap(fz_context ctx, pdf_cmap gid_from_cpt, pdf_cmap *ucs_from_cpt)
28	{
29	pdf_cmap *ucs_from_gid;
30	unsigned int a, b, x;
31	int i;
32
33	ucs_from_gid = pdf_new_cmap(ctx);
34
35	fz_try(ctx)
36	{
37	if (gid_from_cpt->usecmap)
38	ucs_from_gid->usecmap = pdf_remap_cmap(ctx, gid_from_cpt->usecmap, ucs_from_cpt);
39
40	for (i = `0`; i < gid_from_cpt->codespace_len; i++)
41	{
42	pdf_add_codespace(ctx, ucs_from_gid,
43	gid_from_cpt->codespace[i].low,
44	gid_from_cpt->codespace[i].high,
45	gid_from_cpt->codespace[i].n);
46	}
47
48	for (i = `0`; i < gid_from_cpt->rlen; ++i)
49	{
50	a = gid_from_cpt->ranges[i].low;
51	b = gid_from_cpt->ranges[i].high;
52	x = gid_from_cpt->ranges[i].out;
53	pdf_remap_cmap_range(ctx, ucs_from_gid, a, x, b - a, ucs_from_cpt);
54	}
55
56	for (i = `0`; i < gid_from_cpt->xlen; ++i)
57	{
58	a = gid_from_cpt->xranges[i].low;
59	b = gid_from_cpt->xranges[i].high;
60	x = gid_from_cpt->xranges[i].out;
61	pdf_remap_cmap_range(ctx, ucs_from_gid, a, x, b - a, ucs_from_cpt);
62	}
63
64	/ Font encoding CMaps don't have one-to-many mappings, so we can ignore the mranges. /
65
66	pdf_sort_cmap(ctx, ucs_from_gid);
67	}
68	fz_catch(ctx)
69	{
70	pdf_drop_cmap(ctx, ucs_from_gid);
71	fz_rethrow(ctx);
72	}
73
74	return ucs_from_gid;
75	}
76
77	void
78	pdf_load_to_unicode(fz_context ctx, pdf_document doc, pdf_font_desc *font,
79	const char *strings, char* collection, pdf_obj cmapstm)
80	{
81	unsigned int cpt;
82
83	if (pdf_is_stream(ctx, cmapstm))
84	{
85	pdf_cmap *ucs_from_cpt = pdf_load_embedded_cmap(ctx, doc, cmapstm);
86	fz_try(ctx)
87	font->to_unicode = pdf_remap_cmap(ctx, font->encoding, ucs_from_cpt);
88	fz_always(ctx)
89	pdf_drop_cmap(ctx, ucs_from_cpt);
90	fz_catch(ctx)
91	fz_rethrow(ctx);
92	font->size += pdf_cmap_size(ctx, font->to_unicode);
93	}
94
95	else if (collection)
96	{
97	if (!strcmp(collection, "Adobe-CNS1"))
98	font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2");
99	else if (!strcmp(collection, "Adobe-GB1"))
100	font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2");
101	else if (!strcmp(collection, "Adobe-Japan1"))
102	font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2");
103	else if (!strcmp(collection, "Adobe-Korea1"))
104	font->to_unicode = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2");
105
106	return;
107	}
108
109	if (strings)
110	{
111	/ TODO one-to-many mappings /
112
113	font->cid_to_ucs = fz_malloc_array(ctx, `256`, unsigned short);
114	font->cid_to_ucs_len = `256`;
115	font->size += `256` * sizeof *font->cid_to_ucs;
116
117	for (cpt = `0`; cpt < `256`; cpt++)
118	{
119	if (strings[cpt])
120	font->cid_to_ucs[cpt] = fz_unicode_from_glyph_name(strings[cpt]);
121	else
122	font->cid_to_ucs[cpt] = FZ_REPLACEMENT_CHARACTER;
123	}
124	}
125
126	if (!font->to_unicode && !font->cid_to_ucs)
127	{
128	/ TODO: synthesize a ToUnicode if it's a freetype font with*
129	* cmap and/or post tables or if it has glyph names. */
130	}
131	}
132

Browse the source code of MuPDF/source/pdf/pdf-unicode.c