1 | /* |
2 | * Copyright 2011 Google Inc. |
3 | * |
4 | * Use of this source code is governed by a BSD-style license that can be |
5 | * found in the LICENSE file. |
6 | */ |
7 | |
8 | #include "src/pdf/SkPDFMakeToUnicodeCmap.h" |
9 | |
10 | #include "include/private/SkTo.h" |
11 | #include "src/pdf/SkPDFUtils.h" |
12 | #include "src/utils/SkUTF.h" |
13 | |
14 | static void (SkDynamicMemoryWStream* cmap, |
15 | bool multibyte) { |
16 | // 12 dict begin: 12 is an Adobe-suggested value. Shall not change. |
17 | // It's there to prevent old version Adobe Readers from malfunctioning. |
18 | const char* = |
19 | "/CIDInit /ProcSet findresource begin\n" |
20 | "12 dict begin\n" |
21 | "begincmap\n" ; |
22 | cmap->writeText(kHeader); |
23 | |
24 | // The /CIDSystemInfo must be consistent to the one in |
25 | // SkPDFFont::populateCIDFont(). |
26 | // We can not pass over the system info object here because the format is |
27 | // different. This is not a reference object. |
28 | const char* kSysInfo = |
29 | "/CIDSystemInfo\n" |
30 | "<< /Registry (Adobe)\n" |
31 | "/Ordering (UCS)\n" |
32 | "/Supplement 0\n" |
33 | ">> def\n" ; |
34 | cmap->writeText(kSysInfo); |
35 | |
36 | // The CMapName must be consistent to /CIDSystemInfo above. |
37 | // /CMapType 2 means ToUnicode. |
38 | // Codespace range just tells the PDF processor the valid range. |
39 | const char* = |
40 | "/CMapName /Adobe-Identity-UCS def\n" |
41 | "/CMapType 2 def\n" |
42 | "1 begincodespacerange\n" ; |
43 | cmap->writeText(kTypeInfoHeader); |
44 | if (multibyte) { |
45 | cmap->writeText("<0000> <FFFF>\n" ); |
46 | } else { |
47 | cmap->writeText("<00> <FF>\n" ); |
48 | } |
49 | cmap->writeText("endcodespacerange\n" ); |
50 | } |
51 | |
52 | static void (SkDynamicMemoryWStream* cmap) { |
53 | const char [] = |
54 | "endcmap\n" |
55 | "CMapName currentdict /CMap defineresource pop\n" |
56 | "end\n" |
57 | "end" ; |
58 | cmap->writeText(kFooter); |
59 | } |
60 | |
61 | namespace { |
62 | struct BFChar { |
63 | SkGlyphID fGlyphId; |
64 | SkUnichar fUnicode; |
65 | }; |
66 | |
67 | struct BFRange { |
68 | SkGlyphID fStart; |
69 | SkGlyphID fEnd; |
70 | SkUnichar fUnicode; |
71 | }; |
72 | } // namespace |
73 | |
74 | static void write_glyph(SkDynamicMemoryWStream* cmap, |
75 | bool multiByte, |
76 | SkGlyphID gid) { |
77 | if (multiByte) { |
78 | SkPDFUtils::WriteUInt16BE(cmap, gid); |
79 | } else { |
80 | SkPDFUtils::WriteUInt8(cmap, SkToU8(gid)); |
81 | } |
82 | } |
83 | |
84 | static void append_bfchar_section(const std::vector<BFChar>& bfchar, |
85 | bool multiByte, |
86 | SkDynamicMemoryWStream* cmap) { |
87 | // PDF spec defines that every bf* list can have at most 100 entries. |
88 | for (size_t i = 0; i < bfchar.size(); i += 100) { |
89 | int count = SkToInt(bfchar.size() - i); |
90 | count = std::min(count, 100); |
91 | cmap->writeDecAsText(count); |
92 | cmap->writeText(" beginbfchar\n" ); |
93 | for (int j = 0; j < count; ++j) { |
94 | cmap->writeText("<" ); |
95 | write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId); |
96 | cmap->writeText("> <" ); |
97 | SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode); |
98 | cmap->writeText(">\n" ); |
99 | } |
100 | cmap->writeText("endbfchar\n" ); |
101 | } |
102 | } |
103 | |
104 | static void append_bfrange_section(const std::vector<BFRange>& bfrange, |
105 | bool multiByte, |
106 | SkDynamicMemoryWStream* cmap) { |
107 | // PDF spec defines that every bf* list can have at most 100 entries. |
108 | for (size_t i = 0; i < bfrange.size(); i += 100) { |
109 | int count = SkToInt(bfrange.size() - i); |
110 | count = std::min(count, 100); |
111 | cmap->writeDecAsText(count); |
112 | cmap->writeText(" beginbfrange\n" ); |
113 | for (int j = 0; j < count; ++j) { |
114 | cmap->writeText("<" ); |
115 | write_glyph(cmap, multiByte, bfrange[i + j].fStart); |
116 | cmap->writeText("> <" ); |
117 | write_glyph(cmap, multiByte, bfrange[i + j].fEnd); |
118 | cmap->writeText("> <" ); |
119 | SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode); |
120 | cmap->writeText(">\n" ); |
121 | } |
122 | cmap->writeText("endbfrange\n" ); |
123 | } |
124 | } |
125 | |
126 | // Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe |
127 | // Technote 5014. |
128 | // The function is not static so we can test it in unit tests. |
129 | // |
130 | // Current implementation guarantees bfchar and bfrange entries do not overlap. |
131 | // |
132 | // Current implementation does not attempt aggressive optimizations against |
133 | // following case because the specification is not clear. |
134 | // |
135 | // 4 beginbfchar 1 beginbfchar |
136 | // <0003> <0013> <0020> <0014> |
137 | // <0005> <0015> to endbfchar |
138 | // <0007> <0017> 1 beginbfrange |
139 | // <0020> <0014> <0003> <0007> <0013> |
140 | // endbfchar endbfrange |
141 | // |
142 | // Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may |
143 | // overlap, but succeeding maps supersede preceding maps." |
144 | // |
145 | // In case of searching text in PDF, bfrange will have higher precedence so |
146 | // typing char id 0x0014 in search box will get glyph id 0x0004 first. However, |
147 | // the spec does not mention how will this kind of conflict being resolved. |
148 | // |
149 | // For the worst case (having 65536 continuous unicode and we use every other |
150 | // one of them), the possible savings by aggressive optimization is 416KB |
151 | // pre-compressed and does not provide enough motivation for implementation. |
152 | void SkPDFAppendCmapSections(const SkUnichar* glyphToUnicode, |
153 | const SkPDFGlyphUse* subset, |
154 | SkDynamicMemoryWStream* cmap, |
155 | bool multiByteGlyphs, |
156 | SkGlyphID firstGlyphID, |
157 | SkGlyphID lastGlyphID) { |
158 | int glyphOffset = 0; |
159 | if (!multiByteGlyphs) { |
160 | glyphOffset = firstGlyphID - 1; |
161 | } |
162 | |
163 | std::vector<BFChar> bfcharEntries; |
164 | std::vector<BFRange> bfrangeEntries; |
165 | |
166 | BFRange currentRangeEntry = {0, 0, 0}; |
167 | bool rangeEmpty = true; |
168 | const int limit = (int)lastGlyphID + 1 - glyphOffset; |
169 | |
170 | for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) { |
171 | SkGlyphID gid = i + glyphOffset; |
172 | bool inSubset = i < limit && (subset == nullptr || subset->has(gid)); |
173 | if (!rangeEmpty) { |
174 | // PDF spec requires bfrange not changing the higher byte, |
175 | // e.g. <1035> <10FF> <2222> is ok, but |
176 | // <1035> <1100> <2222> is no good |
177 | bool inRange = |
178 | i == currentRangeEntry.fEnd + 1 && |
179 | i >> 8 == currentRangeEntry.fStart >> 8 && |
180 | i < limit && |
181 | glyphToUnicode[gid] == |
182 | currentRangeEntry.fUnicode + i - currentRangeEntry.fStart; |
183 | if (!inSubset || !inRange) { |
184 | if (currentRangeEntry.fEnd > currentRangeEntry.fStart) { |
185 | bfrangeEntries.push_back(currentRangeEntry); |
186 | } else { |
187 | bfcharEntries.push_back({currentRangeEntry.fStart, currentRangeEntry.fUnicode}); |
188 | } |
189 | rangeEmpty = true; |
190 | } |
191 | } |
192 | if (inSubset) { |
193 | currentRangeEntry.fEnd = i; |
194 | if (rangeEmpty) { |
195 | currentRangeEntry.fStart = i; |
196 | currentRangeEntry.fUnicode = glyphToUnicode[gid]; |
197 | rangeEmpty = false; |
198 | } |
199 | } |
200 | } |
201 | |
202 | // The spec requires all bfchar entries for a font must come before bfrange |
203 | // entries. |
204 | append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap); |
205 | append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap); |
206 | } |
207 | |
208 | std::unique_ptr<SkStreamAsset> SkPDFMakeToUnicodeCmap( |
209 | const SkUnichar* glyphToUnicode, |
210 | const SkPDFGlyphUse* subset, |
211 | bool multiByteGlyphs, |
212 | SkGlyphID firstGlyphID, |
213 | SkGlyphID lastGlyphID) { |
214 | SkDynamicMemoryWStream cmap; |
215 | append_tounicode_header(&cmap, multiByteGlyphs); |
216 | SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs, |
217 | firstGlyphID, lastGlyphID); |
218 | append_cmap_footer(&cmap); |
219 | return cmap.detachAsStream(); |
220 | } |
221 | |