| 1 | /* |
| 2 | * Copyright 2011 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #include "src/pdf/SkPDFMakeToUnicodeCmap.h" |
| 9 | |
| 10 | #include "include/private/SkTo.h" |
| 11 | #include "src/pdf/SkPDFUtils.h" |
| 12 | #include "src/utils/SkUTF.h" |
| 13 | |
| 14 | static void (SkDynamicMemoryWStream* cmap, |
| 15 | bool multibyte) { |
| 16 | // 12 dict begin: 12 is an Adobe-suggested value. Shall not change. |
| 17 | // It's there to prevent old version Adobe Readers from malfunctioning. |
| 18 | const char* = |
| 19 | "/CIDInit /ProcSet findresource begin\n" |
| 20 | "12 dict begin\n" |
| 21 | "begincmap\n" ; |
| 22 | cmap->writeText(kHeader); |
| 23 | |
| 24 | // The /CIDSystemInfo must be consistent to the one in |
| 25 | // SkPDFFont::populateCIDFont(). |
| 26 | // We can not pass over the system info object here because the format is |
| 27 | // different. This is not a reference object. |
| 28 | const char* kSysInfo = |
| 29 | "/CIDSystemInfo\n" |
| 30 | "<< /Registry (Adobe)\n" |
| 31 | "/Ordering (UCS)\n" |
| 32 | "/Supplement 0\n" |
| 33 | ">> def\n" ; |
| 34 | cmap->writeText(kSysInfo); |
| 35 | |
| 36 | // The CMapName must be consistent to /CIDSystemInfo above. |
| 37 | // /CMapType 2 means ToUnicode. |
| 38 | // Codespace range just tells the PDF processor the valid range. |
| 39 | const char* = |
| 40 | "/CMapName /Adobe-Identity-UCS def\n" |
| 41 | "/CMapType 2 def\n" |
| 42 | "1 begincodespacerange\n" ; |
| 43 | cmap->writeText(kTypeInfoHeader); |
| 44 | if (multibyte) { |
| 45 | cmap->writeText("<0000> <FFFF>\n" ); |
| 46 | } else { |
| 47 | cmap->writeText("<00> <FF>\n" ); |
| 48 | } |
| 49 | cmap->writeText("endcodespacerange\n" ); |
| 50 | } |
| 51 | |
| 52 | static void (SkDynamicMemoryWStream* cmap) { |
| 53 | const char [] = |
| 54 | "endcmap\n" |
| 55 | "CMapName currentdict /CMap defineresource pop\n" |
| 56 | "end\n" |
| 57 | "end" ; |
| 58 | cmap->writeText(kFooter); |
| 59 | } |
| 60 | |
| 61 | namespace { |
| 62 | struct BFChar { |
| 63 | SkGlyphID fGlyphId; |
| 64 | SkUnichar fUnicode; |
| 65 | }; |
| 66 | |
| 67 | struct BFRange { |
| 68 | SkGlyphID fStart; |
| 69 | SkGlyphID fEnd; |
| 70 | SkUnichar fUnicode; |
| 71 | }; |
| 72 | } // namespace |
| 73 | |
| 74 | static void write_glyph(SkDynamicMemoryWStream* cmap, |
| 75 | bool multiByte, |
| 76 | SkGlyphID gid) { |
| 77 | if (multiByte) { |
| 78 | SkPDFUtils::WriteUInt16BE(cmap, gid); |
| 79 | } else { |
| 80 | SkPDFUtils::WriteUInt8(cmap, SkToU8(gid)); |
| 81 | } |
| 82 | } |
| 83 | |
| 84 | static void append_bfchar_section(const std::vector<BFChar>& bfchar, |
| 85 | bool multiByte, |
| 86 | SkDynamicMemoryWStream* cmap) { |
| 87 | // PDF spec defines that every bf* list can have at most 100 entries. |
| 88 | for (size_t i = 0; i < bfchar.size(); i += 100) { |
| 89 | int count = SkToInt(bfchar.size() - i); |
| 90 | count = std::min(count, 100); |
| 91 | cmap->writeDecAsText(count); |
| 92 | cmap->writeText(" beginbfchar\n" ); |
| 93 | for (int j = 0; j < count; ++j) { |
| 94 | cmap->writeText("<" ); |
| 95 | write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId); |
| 96 | cmap->writeText("> <" ); |
| 97 | SkPDFUtils::WriteUTF16beHex(cmap, bfchar[i + j].fUnicode); |
| 98 | cmap->writeText(">\n" ); |
| 99 | } |
| 100 | cmap->writeText("endbfchar\n" ); |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | static void append_bfrange_section(const std::vector<BFRange>& bfrange, |
| 105 | bool multiByte, |
| 106 | SkDynamicMemoryWStream* cmap) { |
| 107 | // PDF spec defines that every bf* list can have at most 100 entries. |
| 108 | for (size_t i = 0; i < bfrange.size(); i += 100) { |
| 109 | int count = SkToInt(bfrange.size() - i); |
| 110 | count = std::min(count, 100); |
| 111 | cmap->writeDecAsText(count); |
| 112 | cmap->writeText(" beginbfrange\n" ); |
| 113 | for (int j = 0; j < count; ++j) { |
| 114 | cmap->writeText("<" ); |
| 115 | write_glyph(cmap, multiByte, bfrange[i + j].fStart); |
| 116 | cmap->writeText("> <" ); |
| 117 | write_glyph(cmap, multiByte, bfrange[i + j].fEnd); |
| 118 | cmap->writeText("> <" ); |
| 119 | SkPDFUtils::WriteUTF16beHex(cmap, bfrange[i + j].fUnicode); |
| 120 | cmap->writeText(">\n" ); |
| 121 | } |
| 122 | cmap->writeText("endbfrange\n" ); |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | // Generate <bfchar> and <bfrange> table according to PDF spec 1.4 and Adobe |
| 127 | // Technote 5014. |
| 128 | // The function is not static so we can test it in unit tests. |
| 129 | // |
| 130 | // Current implementation guarantees bfchar and bfrange entries do not overlap. |
| 131 | // |
| 132 | // Current implementation does not attempt aggressive optimizations against |
| 133 | // following case because the specification is not clear. |
| 134 | // |
| 135 | // 4 beginbfchar 1 beginbfchar |
| 136 | // <0003> <0013> <0020> <0014> |
| 137 | // <0005> <0015> to endbfchar |
| 138 | // <0007> <0017> 1 beginbfrange |
| 139 | // <0020> <0014> <0003> <0007> <0013> |
| 140 | // endbfchar endbfrange |
| 141 | // |
| 142 | // Adobe Technote 5014 said: "Code mappings (unlike codespace ranges) may |
| 143 | // overlap, but succeeding maps supersede preceding maps." |
| 144 | // |
| 145 | // In case of searching text in PDF, bfrange will have higher precedence so |
| 146 | // typing char id 0x0014 in search box will get glyph id 0x0004 first. However, |
| 147 | // the spec does not mention how will this kind of conflict being resolved. |
| 148 | // |
| 149 | // For the worst case (having 65536 continuous unicode and we use every other |
| 150 | // one of them), the possible savings by aggressive optimization is 416KB |
| 151 | // pre-compressed and does not provide enough motivation for implementation. |
| 152 | void SkPDFAppendCmapSections(const SkUnichar* glyphToUnicode, |
| 153 | const SkPDFGlyphUse* subset, |
| 154 | SkDynamicMemoryWStream* cmap, |
| 155 | bool multiByteGlyphs, |
| 156 | SkGlyphID firstGlyphID, |
| 157 | SkGlyphID lastGlyphID) { |
| 158 | int glyphOffset = 0; |
| 159 | if (!multiByteGlyphs) { |
| 160 | glyphOffset = firstGlyphID - 1; |
| 161 | } |
| 162 | |
| 163 | std::vector<BFChar> bfcharEntries; |
| 164 | std::vector<BFRange> bfrangeEntries; |
| 165 | |
| 166 | BFRange currentRangeEntry = {0, 0, 0}; |
| 167 | bool rangeEmpty = true; |
| 168 | const int limit = (int)lastGlyphID + 1 - glyphOffset; |
| 169 | |
| 170 | for (int i = firstGlyphID - glyphOffset; i < limit + 1; ++i) { |
| 171 | SkGlyphID gid = i + glyphOffset; |
| 172 | bool inSubset = i < limit && (subset == nullptr || subset->has(gid)); |
| 173 | if (!rangeEmpty) { |
| 174 | // PDF spec requires bfrange not changing the higher byte, |
| 175 | // e.g. <1035> <10FF> <2222> is ok, but |
| 176 | // <1035> <1100> <2222> is no good |
| 177 | bool inRange = |
| 178 | i == currentRangeEntry.fEnd + 1 && |
| 179 | i >> 8 == currentRangeEntry.fStart >> 8 && |
| 180 | i < limit && |
| 181 | glyphToUnicode[gid] == |
| 182 | currentRangeEntry.fUnicode + i - currentRangeEntry.fStart; |
| 183 | if (!inSubset || !inRange) { |
| 184 | if (currentRangeEntry.fEnd > currentRangeEntry.fStart) { |
| 185 | bfrangeEntries.push_back(currentRangeEntry); |
| 186 | } else { |
| 187 | bfcharEntries.push_back({currentRangeEntry.fStart, currentRangeEntry.fUnicode}); |
| 188 | } |
| 189 | rangeEmpty = true; |
| 190 | } |
| 191 | } |
| 192 | if (inSubset) { |
| 193 | currentRangeEntry.fEnd = i; |
| 194 | if (rangeEmpty) { |
| 195 | currentRangeEntry.fStart = i; |
| 196 | currentRangeEntry.fUnicode = glyphToUnicode[gid]; |
| 197 | rangeEmpty = false; |
| 198 | } |
| 199 | } |
| 200 | } |
| 201 | |
| 202 | // The spec requires all bfchar entries for a font must come before bfrange |
| 203 | // entries. |
| 204 | append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap); |
| 205 | append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap); |
| 206 | } |
| 207 | |
| 208 | std::unique_ptr<SkStreamAsset> SkPDFMakeToUnicodeCmap( |
| 209 | const SkUnichar* glyphToUnicode, |
| 210 | const SkPDFGlyphUse* subset, |
| 211 | bool multiByteGlyphs, |
| 212 | SkGlyphID firstGlyphID, |
| 213 | SkGlyphID lastGlyphID) { |
| 214 | SkDynamicMemoryWStream cmap; |
| 215 | append_tounicode_header(&cmap, multiByteGlyphs); |
| 216 | SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs, |
| 217 | firstGlyphID, lastGlyphID); |
| 218 | append_cmap_footer(&cmap); |
| 219 | return cmap.detachAsStream(); |
| 220 | } |
| 221 | |