SkPDFMakeToUnicodeCmap.cpp source code [Skia/src/pdf/SkPDFMakeToUnicodeCmap.cpp]

1	/*
2	* Copyright 2011 Google Inc.
3	*
4	* Use of this source code is governed by a BSD-style license that can be
5	* found in the LICENSE file.
6	*/
7
8	#include "src/pdf/SkPDFMakeToUnicodeCmap.h"
9
10	#include "include/private/SkTo.h"
11	#include "src/pdf/SkPDFUtils.h"
12	#include "src/utils/SkUTF.h"
13
14	static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
15	bool multibyte) {
16	// 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
17	// It's there to prevent old version Adobe Readers from malfunctioning.
18	const char* kHeader =
19	"/CIDInit /ProcSet findresource begin\n"
20	"12 dict begin\n"
21	"begincmap\n";
22	cmap->writeText(kHeader);
23
24	// The /CIDSystemInfo must be consistent to the one in
25	// SkPDFFont::populateCIDFont().
26	// We can not pass over the system info object here because the format is
27	// different. This is not a reference object.
28	const char* kSysInfo =
29	"/CIDSystemInfo\n"
30	"<< /Registry (Adobe)\n"
31	"/Ordering (UCS)\n"
32	"/Supplement 0\n"
33	">> def\n";
34	cmap->writeText(kSysInfo);
35
36	// The CMapName must be consistent to /CIDSystemInfo above.
37	// /CMapType 2 means ToUnicode.
38	// Codespace range just tells the PDF processor the valid range.
39	const char* kTypeInfoHeader =
40	"/CMapName /Adobe-Identity-UCS def\n"
41	"/CMapType 2 def\n"
42	"1 begincodespacerange\n";
43	cmap->writeText(kTypeInfoHeader);
44	if (multibyte) {
45	cmap->writeText("<0000> <FFFF>\n");
46	} else {
47	cmap->writeText("<00> <FF>\n");
48	}
49	cmap->writeText("endcodespacerange\n");
50	}
51
52	static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
53	const char kFooter[] =
54	"endcmap\n"
55	"CMapName currentdict /CMap defineresource pop\n"
56	"end\n"
57	"end";
58	cmap->writeText(kFooter);
59	}
60
61	namespace {
62	struct BFChar {
63	SkGlyphID fGlyphId;
64	SkUnichar fUnicode;
65	};
66
67	struct BFRange {
68	SkGlyphID fStart;
69	SkGlyphID fEnd;
70	SkUnichar fUnicode;
71	};
72	} // namespace
73
74	static void write_glyph(SkDynamicMemoryWStream* cmap,
75	bool multiByte,
76	SkGlyphID gid) {
77	if (multiByte) {
78	SkPDFUtils::WriteUInt16BE(cmap, gid);
79	} else {
80	SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
81	}
82	}
83
84	static void append_bfchar_section(const std::vector<BFChar>& bfchar,
85	bool multiByte,
86	SkDynamicMemoryWStream* cmap) {
87	// PDF spec defines that every bf list can have at most 100 entries.*
88	for (size_t i = `0`; i < bfchar.size(); i += `100`) {
89	int count = SkToInt(bfchar.size() - i);
90	count = std::min(count, `100`);
91	cmap->writeDecAsText(count);
92	cmap->writeText(" beginbfchar\n");
93	for (int j = `0`; j < count; ++j) {
94	cmap->writeText("<");
95	write_glyph(cmap, multiByte, bfchar [i + j].fGlyphId);
96	cmap->writeText("> <");
97	SkPDFUtils::WriteUTF16beHex(cmap, bfchar [i + j].fUnicode);
98	cmap->writeText(">\n");
99	}
100	cmap->writeText("endbfchar\n");
101	}
102	}
103
104	static void append_bfrange_section(const std::vector<BFRange>& bfrange,
105	bool multiByte,
106	SkDynamicMemoryWStream* cmap) {
107	// PDF spec defines that every bf list can have at most 100 entries.*
108	for (size_t i = `0`; i < bfrange.size(); i += `100`) {
109	int count = SkToInt(bfrange.size() - i);
110	count = std::min(count, `100`);
111	cmap->writeDecAsText(count);
112	cmap->writeText(" beginbfrange\n");
113	for (int j = `0`; j < count; ++j) {
114	cmap->writeText("<");
115	write_glyph(cmap, multiByte, bfrange [i + j].fStart);
116	cmap->writeText("> <");
117	write_glyph(cmap, multiByte, bfrange [i + j].fEnd);
118	cmap->writeText("> <");
119	SkPDFUtils::WriteUTF16beHex(cmap, bfrange [i + j].fUnicode);
120	cmap->writeText(">\n");
121	}
122	cmap->writeText("endbfrange\n");
123	}
124	}
125
126	// Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe
127	// Technote 5014.
128	// The function is not static so we can test it in unit tests.
129	//
130	// Current implementation guarantees bfchar and bfrange entries do not overlap.
131	//
132	// Current implementation does not attempt aggressive optimizations against
133	// following case because the specification is not clear.
134	//
135	// 4 beginbfchar 1 beginbfchar
136	// <0003> <0013> <0020> <0014>
137	// <0005> <0015> to endbfchar
138	// <0007> <0017> 1 beginbfrange
139	// <0020> <0014> <0003> <0007> <0013>
140	// endbfchar endbfrange
141	//
142	// Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may
143	// overlap, but succeeding maps supersede preceding maps."
144	//
145	// In case of searching text in PDF, bfrange will have higher precedence so
146	// typing char id 0x0014 in search box will get glyph id 0x0004 first. However,
147	// the spec does not mention how will this kind of conflict being resolved.
148	//
149	// For the worst case (having 65536 continuous unicode and we use every other
150	// one of them), the possible savings by aggressive optimization is 416KB
151	// pre-compressed and does not provide enough motivation for implementation.
152	void SkPDFAppendCmapSections(const SkUnichar* glyphToUnicode,
153	const SkPDFGlyphUse* subset,
154	SkDynamicMemoryWStream* cmap,
155	bool multiByteGlyphs,
156	SkGlyphID firstGlyphID,
157	SkGlyphID lastGlyphID) {
158	int glyphOffset = `0`;
159	if (!multiByteGlyphs) {
160	glyphOffset = firstGlyphID - `1`;
161	}
162
163	std::vector<BFChar> bfcharEntries;
164	std::vector<BFRange> bfrangeEntries;
165
166	BFRange currentRangeEntry = {`0`, `0`, `0`};
167	bool rangeEmpty = true;
168	const int limit = (int)lastGlyphID + `1` - glyphOffset;
169
170	for (int i = firstGlyphID - glyphOffset; i < limit + `1`; ++i) {
171	SkGlyphID gid = i + glyphOffset;
172	bool inSubset = i < limit && (subset == nullptr \|\| subset->has(gid));
173	if (!rangeEmpty) {
174	// PDF spec requires bfrange not changing the higher byte,
175	// e.g. <1035> <10FF> <2222> is ok, but
176	// <1035> <1100> <2222> is no good
177	bool inRange =
178	i == currentRangeEntry.fEnd + `1` &&
179	i >> `8` == currentRangeEntry.fStart >> `8` &&
180	i < limit &&
181	glyphToUnicode[gid] ==
182	currentRangeEntry.fUnicode + i - currentRangeEntry.fStart;
183	if (!inSubset \|\| !inRange) {
184	if (currentRangeEntry.fEnd > currentRangeEntry.fStart) {
185	bfrangeEntries.push_back(currentRangeEntry);
186	} else {
187	bfcharEntries.push_back({currentRangeEntry.fStart, currentRangeEntry.fUnicode});
188	}
189	rangeEmpty = true;
190	}
191	}
192	if (inSubset) {
193	currentRangeEntry.fEnd = i;
194	if (rangeEmpty) {
195	currentRangeEntry.fStart = i;
196	currentRangeEntry.fUnicode = glyphToUnicode[gid];
197	rangeEmpty = false;
198	}
199	}
200	}
201
202	// The spec requires all bfchar entries for a font must come before bfrange
203	// entries.
204	append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
205	append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
206	}
207
208	std::unique_ptr<SkStreamAsset> SkPDFMakeToUnicodeCmap(
209	const SkUnichar* glyphToUnicode,
210	const SkPDFGlyphUse* subset,
211	bool multiByteGlyphs,
212	SkGlyphID firstGlyphID,
213	SkGlyphID lastGlyphID) {
214	SkDynamicMemoryWStream cmap;
215	append_tounicode_header(&cmap, multiByteGlyphs);
216	SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
217	firstGlyphID, lastGlyphID);
218	append_cmap_footer(&cmap);
219	return cmap.detachAsStream();
220	}
221

Browse the source code of Skia/src/pdf/SkPDFMakeToUnicodeCmap.cpp