1 | /* |
2 | * Copyright © 2014 Google, Inc. |
3 | * |
4 | * This is part of HarfBuzz, a text shaping library. |
5 | * |
6 | * Permission is hereby granted, without written agreement and without |
7 | * license or royalty fees, to use, copy, modify, and distribute this |
8 | * software and its documentation for any purpose, provided that the |
9 | * above copyright notice and the following two paragraphs appear in |
10 | * all copies of this software. |
11 | * |
12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | * DAMAGE. |
17 | * |
18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | * |
24 | * Google Author(s): Behdad Esfahbod |
25 | */ |
26 | |
27 | #ifndef HB_OT_CMAP_TABLE_HH |
28 | #define HB_OT_CMAP_TABLE_HH |
29 | |
30 | #include "hb-open-type.hh" |
31 | #include "hb-set.hh" |
32 | |
33 | /* |
34 | * cmap -- Character to Glyph Index Mapping |
35 | * https://docs.microsoft.com/en-us/typography/opentype/spec/cmap |
36 | */ |
37 | #define HB_OT_TAG_cmap HB_TAG('c','m','a','p') |
38 | |
39 | namespace OT { |
40 | |
41 | |
42 | struct CmapSubtableFormat0 |
43 | { |
44 | inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const |
45 | { |
46 | hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0; |
47 | if (!gid) |
48 | return false; |
49 | *glyph = gid; |
50 | return true; |
51 | } |
52 | inline void collect_unicodes (hb_set_t *out) const |
53 | { |
54 | for (unsigned int i = 0; i < 256; i++) |
55 | if (glyphIdArray[i]) |
56 | out->add (i); |
57 | } |
58 | |
59 | inline bool sanitize (hb_sanitize_context_t *c) const |
60 | { |
61 | TRACE_SANITIZE (this); |
62 | return_trace (c->check_struct (this)); |
63 | } |
64 | |
65 | protected: |
66 | HBUINT16 format; /* Format number is set to 0. */ |
67 | HBUINT16 length; /* Byte length of this subtable. */ |
68 | HBUINT16 language; /* Ignore. */ |
69 | HBUINT8 glyphIdArray[256];/* An array that maps character |
70 | * code to glyph index values. */ |
71 | public: |
72 | DEFINE_SIZE_STATIC (6 + 256); |
73 | }; |
74 | |
75 | struct CmapSubtableFormat4 |
76 | { |
77 | struct segment_plan |
78 | { |
79 | HBUINT16 start_code; |
80 | HBUINT16 end_code; |
81 | bool use_delta; |
82 | }; |
83 | |
84 | bool serialize (hb_serialize_context_t *c, |
85 | const hb_subset_plan_t *plan, |
86 | const hb_vector_t<segment_plan> &segments) |
87 | { |
88 | TRACE_SERIALIZE (this); |
89 | |
90 | if (unlikely (!c->extend_min (*this))) return_trace (false); |
91 | |
92 | this->format.set (4); |
93 | this->length.set (get_sub_table_size (segments)); |
94 | |
95 | this->segCountX2.set (segments.len * 2); |
96 | this->entrySelector.set (MAX (1u, hb_bit_storage (segments.len)) - 1); |
97 | this->searchRange.set (2 * (1u << this->entrySelector)); |
98 | this->rangeShift.set (segments.len * 2 > this->searchRange |
99 | ? 2 * segments.len - this->searchRange |
100 | : 0); |
101 | |
102 | HBUINT16 *end_count = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len); |
103 | c->allocate_size<HBUINT16> (HBUINT16::static_size); // 2 bytes of padding. |
104 | HBUINT16 *start_count = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len); |
105 | HBINT16 *id_delta = c->allocate_size<HBINT16> (HBUINT16::static_size * segments.len); |
106 | HBUINT16 *id_range_offset = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len); |
107 | |
108 | if (id_range_offset == nullptr) |
109 | return_trace (false); |
110 | |
111 | for (unsigned int i = 0; i < segments.len; i++) |
112 | { |
113 | end_count[i].set (segments[i].end_code); |
114 | start_count[i].set (segments[i].start_code); |
115 | if (segments[i].use_delta) |
116 | { |
117 | hb_codepoint_t cp = segments[i].start_code; |
118 | hb_codepoint_t start_gid = 0; |
119 | if (unlikely (!plan->new_gid_for_codepoint (cp, &start_gid) && cp != 0xFFFF)) |
120 | return_trace (false); |
121 | id_delta[i].set (start_gid - segments[i].start_code); |
122 | } else { |
123 | id_delta[i].set (0); |
124 | unsigned int num_codepoints = segments[i].end_code - segments[i].start_code + 1; |
125 | HBUINT16 *glyph_id_array = c->allocate_size<HBUINT16> (HBUINT16::static_size * num_codepoints); |
126 | if (glyph_id_array == nullptr) |
127 | return_trace (false); |
128 | // From the cmap spec: |
129 | // |
130 | // id_range_offset[i]/2 |
131 | // + (cp - segments[i].start_code) |
132 | // + (id_range_offset + i) |
133 | // = |
134 | // glyph_id_array + (cp - segments[i].start_code) |
135 | // |
136 | // So, solve for id_range_offset[i]: |
137 | // |
138 | // id_range_offset[i] |
139 | // = |
140 | // 2 * (glyph_id_array - id_range_offset - i) |
141 | id_range_offset[i].set (2 * ( |
142 | glyph_id_array - id_range_offset - i)); |
143 | for (unsigned int j = 0; j < num_codepoints; j++) |
144 | { |
145 | hb_codepoint_t cp = segments[i].start_code + j; |
146 | hb_codepoint_t new_gid; |
147 | if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid))) |
148 | return_trace (false); |
149 | glyph_id_array[j].set (new_gid); |
150 | } |
151 | } |
152 | } |
153 | |
154 | return_trace (true); |
155 | } |
156 | |
157 | static inline size_t get_sub_table_size (const hb_vector_t<segment_plan> &segments) |
158 | { |
159 | size_t segment_size = 0; |
160 | for (unsigned int i = 0; i < segments.len; i++) |
161 | { |
162 | // Parallel array entries |
163 | segment_size += |
164 | 2 // end count |
165 | + 2 // start count |
166 | + 2 // delta |
167 | + 2; // range offset |
168 | |
169 | if (!segments[i].use_delta) |
170 | // Add bytes for the glyph index array entries for this segment. |
171 | segment_size += (segments[i].end_code - segments[i].start_code + 1) * 2; |
172 | } |
173 | |
174 | return min_size |
175 | + 2 // Padding |
176 | + segment_size; |
177 | } |
178 | |
179 | static inline bool create_sub_table_plan (const hb_subset_plan_t *plan, |
180 | hb_vector_t<segment_plan> *segments) |
181 | { |
182 | segment_plan *segment = nullptr; |
183 | hb_codepoint_t last_gid = 0; |
184 | |
185 | hb_codepoint_t cp = HB_SET_VALUE_INVALID; |
186 | while (plan->unicodes->next (&cp)) { |
187 | hb_codepoint_t new_gid; |
188 | if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid))) |
189 | { |
190 | DEBUG_MSG(SUBSET, nullptr, "Unable to find new gid for %04x" , cp); |
191 | return false; |
192 | } |
193 | |
194 | if (cp > 0xFFFF) { |
195 | // We are now outside of unicode BMP, stop adding to this cmap. |
196 | break; |
197 | } |
198 | |
199 | if (!segment |
200 | || cp != segment->end_code + 1u) |
201 | { |
202 | segment = segments->push (); |
203 | segment->start_code.set (cp); |
204 | segment->end_code.set (cp); |
205 | segment->use_delta = true; |
206 | } else { |
207 | segment->end_code.set (cp); |
208 | if (last_gid + 1u != new_gid) |
209 | // gid's are not consecutive in this segment so delta |
210 | // cannot be used. |
211 | segment->use_delta = false; |
212 | } |
213 | |
214 | last_gid = new_gid; |
215 | } |
216 | |
217 | // There must be a final entry with end_code == 0xFFFF. Check if we need to add one. |
218 | if (segment == nullptr || segment->end_code != 0xFFFF) |
219 | { |
220 | segment = segments->push (); |
221 | segment->start_code.set (0xFFFF); |
222 | segment->end_code.set (0xFFFF); |
223 | segment->use_delta = true; |
224 | } |
225 | |
226 | return true; |
227 | } |
228 | |
229 | struct accelerator_t |
230 | { |
231 | inline void init (const CmapSubtableFormat4 *subtable) |
232 | { |
233 | segCount = subtable->segCountX2 / 2; |
234 | endCount = subtable->values; |
235 | startCount = endCount + segCount + 1; |
236 | idDelta = startCount + segCount; |
237 | idRangeOffset = idDelta + segCount; |
238 | glyphIdArray = idRangeOffset + segCount; |
239 | glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2; |
240 | } |
241 | inline void fini (void) {} |
242 | |
243 | inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const |
244 | { |
245 | /* Custom two-array bsearch. */ |
246 | int min = 0, max = (int) this->segCount - 1; |
247 | const HBUINT16 *startCount = this->startCount; |
248 | const HBUINT16 *endCount = this->endCount; |
249 | unsigned int i; |
250 | while (min <= max) |
251 | { |
252 | int mid = (min + max) / 2; |
253 | if (codepoint < startCount[mid]) |
254 | max = mid - 1; |
255 | else if (codepoint > endCount[mid]) |
256 | min = mid + 1; |
257 | else |
258 | { |
259 | i = mid; |
260 | goto found; |
261 | } |
262 | } |
263 | return false; |
264 | |
265 | found: |
266 | hb_codepoint_t gid; |
267 | unsigned int rangeOffset = this->idRangeOffset[i]; |
268 | if (rangeOffset == 0) |
269 | gid = codepoint + this->idDelta[i]; |
270 | else |
271 | { |
272 | /* Somebody has been smoking... */ |
273 | unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount; |
274 | if (unlikely (index >= this->glyphIdArrayLength)) |
275 | return false; |
276 | gid = this->glyphIdArray[index]; |
277 | if (unlikely (!gid)) |
278 | return false; |
279 | gid += this->idDelta[i]; |
280 | } |
281 | gid &= 0xFFFFu; |
282 | if (!gid) |
283 | return false; |
284 | *glyph = gid; |
285 | return true; |
286 | } |
287 | static inline bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph) |
288 | { |
289 | return ((const accelerator_t *) obj)->get_glyph (codepoint, glyph); |
290 | } |
291 | inline void collect_unicodes (hb_set_t *out) const |
292 | { |
293 | unsigned int count = this->segCount; |
294 | if (count && this->startCount[count - 1] == 0xFFFFu) |
295 | count--; /* Skip sentinel segment. */ |
296 | for (unsigned int i = 0; i < count; i++) |
297 | { |
298 | unsigned int rangeOffset = this->idRangeOffset[i]; |
299 | if (rangeOffset == 0) |
300 | out->add_range (this->startCount[i], this->endCount[i]); |
301 | else |
302 | { |
303 | for (hb_codepoint_t codepoint = this->startCount[i]; |
304 | codepoint <= this->endCount[i]; |
305 | codepoint++) |
306 | { |
307 | unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount; |
308 | if (unlikely (index >= this->glyphIdArrayLength)) |
309 | break; |
310 | hb_codepoint_t gid = this->glyphIdArray[index]; |
311 | if (unlikely (!gid)) |
312 | continue; |
313 | out->add (codepoint); |
314 | } |
315 | } |
316 | } |
317 | } |
318 | |
319 | const HBUINT16 *endCount; |
320 | const HBUINT16 *startCount; |
321 | const HBUINT16 *idDelta; |
322 | const HBUINT16 *idRangeOffset; |
323 | const HBUINT16 *glyphIdArray; |
324 | unsigned int segCount; |
325 | unsigned int glyphIdArrayLength; |
326 | }; |
327 | |
328 | inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const |
329 | { |
330 | hb_auto_t<accelerator_t> accel (this); |
331 | return accel.get_glyph_func (&accel, codepoint, glyph); |
332 | } |
333 | inline void collect_unicodes (hb_set_t *out) const |
334 | { |
335 | hb_auto_t<accelerator_t> accel (this); |
336 | accel.collect_unicodes (out); |
337 | } |
338 | |
339 | inline bool sanitize (hb_sanitize_context_t *c) const |
340 | { |
341 | TRACE_SANITIZE (this); |
342 | if (unlikely (!c->check_struct (this))) |
343 | return_trace (false); |
344 | |
345 | if (unlikely (!c->check_range (this, length))) |
346 | { |
347 | /* Some broken fonts have too long of a "length" value. |
348 | * If that is the case, just change the value to truncate |
349 | * the subtable at the end of the blob. */ |
350 | uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535, |
351 | (uintptr_t) (c->end - |
352 | (char *) this)); |
353 | if (!c->try_set (&length, new_length)) |
354 | return_trace (false); |
355 | } |
356 | |
357 | return_trace (16 + 4 * (unsigned int) segCountX2 <= length); |
358 | } |
359 | |
360 | |
361 | |
362 | protected: |
363 | HBUINT16 format; /* Format number is set to 4. */ |
364 | HBUINT16 length; /* This is the length in bytes of the |
365 | * subtable. */ |
366 | HBUINT16 language; /* Ignore. */ |
367 | HBUINT16 segCountX2; /* 2 x segCount. */ |
368 | HBUINT16 searchRange; /* 2 * (2**floor(log2(segCount))) */ |
369 | HBUINT16 entrySelector; /* log2(searchRange/2) */ |
370 | HBUINT16 rangeShift; /* 2 x segCount - searchRange */ |
371 | |
372 | HBUINT16 values[VAR]; |
373 | #if 0 |
374 | HBUINT16 endCount[segCount]; /* End characterCode for each segment, |
375 | * last=0xFFFFu. */ |
376 | HBUINT16 reservedPad; /* Set to 0. */ |
377 | HBUINT16 startCount[segCount]; /* Start character code for each segment. */ |
378 | HBINT16 idDelta[segCount]; /* Delta for all character codes in segment. */ |
379 | HBUINT16 idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */ |
380 | HBUINT16 glyphIdArray[VAR]; /* Glyph index array (arbitrary length) */ |
381 | #endif |
382 | |
383 | public: |
384 | DEFINE_SIZE_ARRAY (14, values); |
385 | }; |
386 | |
387 | struct CmapSubtableLongGroup |
388 | { |
389 | friend struct CmapSubtableFormat12; |
390 | friend struct CmapSubtableFormat13; |
391 | template<typename U> |
392 | friend struct CmapSubtableLongSegmented; |
393 | friend struct cmap; |
394 | |
395 | int cmp (hb_codepoint_t codepoint) const |
396 | { |
397 | if (codepoint < startCharCode) return -1; |
398 | if (codepoint > endCharCode) return +1; |
399 | return 0; |
400 | } |
401 | |
402 | inline bool sanitize (hb_sanitize_context_t *c) const |
403 | { |
404 | TRACE_SANITIZE (this); |
405 | return_trace (c->check_struct (this)); |
406 | } |
407 | |
408 | private: |
409 | HBUINT32 startCharCode; /* First character code in this group. */ |
410 | HBUINT32 endCharCode; /* Last character code in this group. */ |
411 | HBUINT32 glyphID; /* Glyph index; interpretation depends on |
412 | * subtable format. */ |
413 | public: |
414 | DEFINE_SIZE_STATIC (12); |
415 | }; |
416 | |
417 | template <typename UINT> |
418 | struct CmapSubtableTrimmed |
419 | { |
420 | inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const |
421 | { |
422 | /* Rely on our implicit array bound-checking. */ |
423 | hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode]; |
424 | if (!gid) |
425 | return false; |
426 | *glyph = gid; |
427 | return true; |
428 | } |
429 | inline void collect_unicodes (hb_set_t *out) const |
430 | { |
431 | hb_codepoint_t start = startCharCode; |
432 | unsigned int count = glyphIdArray.len; |
433 | for (unsigned int i = 0; i < count; i++) |
434 | if (glyphIdArray[i]) |
435 | out->add (start + i); |
436 | } |
437 | |
438 | inline bool sanitize (hb_sanitize_context_t *c) const |
439 | { |
440 | TRACE_SANITIZE (this); |
441 | return_trace (c->check_struct (this) && glyphIdArray.sanitize (c)); |
442 | } |
443 | |
444 | protected: |
445 | UINT formatReserved; /* Subtable format and (maybe) padding. */ |
446 | UINT length; /* Byte length of this subtable. */ |
447 | UINT language; /* Ignore. */ |
448 | UINT startCharCode; /* First character code covered. */ |
449 | ArrayOf<GlyphID, UINT> |
450 | glyphIdArray; /* Array of glyph index values for character |
451 | * codes in the range. */ |
452 | public: |
453 | DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray); |
454 | }; |
455 | |
456 | struct CmapSubtableFormat6 : CmapSubtableTrimmed<HBUINT16> {}; |
457 | struct CmapSubtableFormat10 : CmapSubtableTrimmed<HBUINT32 > {}; |
458 | |
459 | template <typename T> |
460 | struct CmapSubtableLongSegmented |
461 | { |
462 | friend struct cmap; |
463 | |
464 | inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const |
465 | { |
466 | int i = groups.bsearch (codepoint); |
467 | if (i == -1) |
468 | return false; |
469 | hb_codepoint_t gid = T::group_get_glyph (groups[i], codepoint); |
470 | if (!gid) |
471 | return false; |
472 | *glyph = gid; |
473 | return true; |
474 | } |
475 | |
476 | inline void collect_unicodes (hb_set_t *out) const |
477 | { |
478 | for (unsigned int i = 0; i < this->groups.len; i++) { |
479 | out->add_range (this->groups[i].startCharCode, |
480 | MIN ((hb_codepoint_t) this->groups[i].endCharCode, |
481 | (hb_codepoint_t) HB_UNICODE_MAX)); |
482 | } |
483 | } |
484 | |
485 | inline bool sanitize (hb_sanitize_context_t *c) const |
486 | { |
487 | TRACE_SANITIZE (this); |
488 | return_trace (c->check_struct (this) && groups.sanitize (c)); |
489 | } |
490 | |
491 | inline bool serialize (hb_serialize_context_t *c, |
492 | const hb_vector_t<CmapSubtableLongGroup> &group_data) |
493 | { |
494 | TRACE_SERIALIZE (this); |
495 | if (unlikely (!c->extend_min (*this))) return_trace (false); |
496 | Supplier<CmapSubtableLongGroup> supplier (group_data.arrayZ, group_data.len); |
497 | if (unlikely (!groups.serialize (c, supplier, group_data.len))) return_trace (false); |
498 | return true; |
499 | } |
500 | |
501 | protected: |
502 | HBUINT16 format; /* Subtable format; set to 12. */ |
503 | HBUINT16 reserved; /* Reserved; set to 0. */ |
504 | HBUINT32 length; /* Byte length of this subtable. */ |
505 | HBUINT32 language; /* Ignore. */ |
506 | SortedArrayOf<CmapSubtableLongGroup, HBUINT32> |
507 | groups; /* Groupings. */ |
508 | public: |
509 | DEFINE_SIZE_ARRAY (16, groups); |
510 | }; |
511 | |
512 | struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12> |
513 | { |
514 | static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group, |
515 | hb_codepoint_t u) |
516 | { return group.glyphID + (u - group.startCharCode); } |
517 | |
518 | |
519 | bool serialize (hb_serialize_context_t *c, |
520 | const hb_vector_t<CmapSubtableLongGroup> &groups) |
521 | { |
522 | if (unlikely (!c->extend_min (*this))) return false; |
523 | |
524 | this->format.set (12); |
525 | this->reserved.set (0); |
526 | this->length.set (get_sub_table_size (groups)); |
527 | |
528 | return CmapSubtableLongSegmented<CmapSubtableFormat12>::serialize (c, groups); |
529 | } |
530 | |
531 | static inline size_t get_sub_table_size (const hb_vector_t<CmapSubtableLongGroup> &groups) |
532 | { |
533 | return 16 + 12 * groups.len; |
534 | } |
535 | |
536 | static inline bool create_sub_table_plan (const hb_subset_plan_t *plan, |
537 | hb_vector_t<CmapSubtableLongGroup> *groups) |
538 | { |
539 | CmapSubtableLongGroup *group = nullptr; |
540 | |
541 | hb_codepoint_t cp = HB_SET_VALUE_INVALID; |
542 | while (plan->unicodes->next (&cp)) { |
543 | hb_codepoint_t new_gid; |
544 | if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid))) |
545 | { |
546 | DEBUG_MSG(SUBSET, nullptr, "Unable to find new gid for %04x" , cp); |
547 | return false; |
548 | } |
549 | |
550 | if (!group || !_is_gid_consecutive (group, cp, new_gid)) |
551 | { |
552 | group = groups->push (); |
553 | group->startCharCode.set (cp); |
554 | group->endCharCode.set (cp); |
555 | group->glyphID.set (new_gid); |
556 | } else |
557 | { |
558 | group->endCharCode.set (cp); |
559 | } |
560 | } |
561 | |
562 | DEBUG_MSG(SUBSET, nullptr, "cmap" ); |
563 | for (unsigned int i = 0; i < groups->len; i++) { |
564 | CmapSubtableLongGroup& group = (*groups)[i]; |
565 | DEBUG_MSG(SUBSET, nullptr, " %d: U+%04X-U+%04X, gid %d-%d" , i, (uint32_t) group.startCharCode, (uint32_t) group.endCharCode, (uint32_t) group.glyphID, (uint32_t) group.glyphID + ((uint32_t) group.endCharCode - (uint32_t) group.startCharCode)); |
566 | } |
567 | |
568 | return true; |
569 | } |
570 | |
571 | private: |
572 | static inline bool _is_gid_consecutive (CmapSubtableLongGroup *group, |
573 | hb_codepoint_t cp, |
574 | hb_codepoint_t new_gid) |
575 | { |
576 | return (cp - 1 == group->endCharCode) && |
577 | new_gid == group->glyphID + (cp - group->startCharCode); |
578 | } |
579 | |
580 | }; |
581 | |
582 | struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13> |
583 | { |
584 | static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group, |
585 | hb_codepoint_t u HB_UNUSED) |
586 | { return group.glyphID; } |
587 | }; |
588 | |
589 | typedef enum |
590 | { |
591 | GLYPH_VARIANT_NOT_FOUND = 0, |
592 | GLYPH_VARIANT_FOUND = 1, |
593 | GLYPH_VARIANT_USE_DEFAULT = 2 |
594 | } glyph_variant_t; |
595 | |
596 | struct UnicodeValueRange |
597 | { |
598 | inline int cmp (const hb_codepoint_t &codepoint) const |
599 | { |
600 | if (codepoint < startUnicodeValue) return -1; |
601 | if (codepoint > startUnicodeValue + additionalCount) return +1; |
602 | return 0; |
603 | } |
604 | |
605 | inline bool sanitize (hb_sanitize_context_t *c) const |
606 | { |
607 | TRACE_SANITIZE (this); |
608 | return_trace (c->check_struct (this)); |
609 | } |
610 | |
611 | HBUINT24 startUnicodeValue; /* First value in this range. */ |
612 | HBUINT8 additionalCount; /* Number of additional values in this |
613 | * range. */ |
614 | public: |
615 | DEFINE_SIZE_STATIC (4); |
616 | }; |
617 | |
618 | struct DefaultUVS : SortedArrayOf<UnicodeValueRange, HBUINT32> |
619 | { |
620 | inline void collect_unicodes (hb_set_t *out) const |
621 | { |
622 | unsigned int count = len; |
623 | for (unsigned int i = 0; i < count; i++) |
624 | { |
625 | hb_codepoint_t first = arrayZ[i].startUnicodeValue; |
626 | hb_codepoint_t last = MIN ((hb_codepoint_t) (first + arrayZ[i].additionalCount), |
627 | (hb_codepoint_t) HB_UNICODE_MAX); |
628 | out->add_range (first, last); |
629 | } |
630 | } |
631 | |
632 | public: |
633 | DEFINE_SIZE_ARRAY (4, arrayZ); |
634 | }; |
635 | |
636 | struct UVSMapping |
637 | { |
638 | inline int cmp (const hb_codepoint_t &codepoint) const |
639 | { |
640 | return unicodeValue.cmp (codepoint); |
641 | } |
642 | |
643 | inline bool sanitize (hb_sanitize_context_t *c) const |
644 | { |
645 | TRACE_SANITIZE (this); |
646 | return_trace (c->check_struct (this)); |
647 | } |
648 | |
649 | HBUINT24 unicodeValue; /* Base Unicode value of the UVS */ |
650 | GlyphID glyphID; /* Glyph ID of the UVS */ |
651 | public: |
652 | DEFINE_SIZE_STATIC (5); |
653 | }; |
654 | |
655 | struct NonDefaultUVS : SortedArrayOf<UVSMapping, HBUINT32> |
656 | { |
657 | inline void collect_unicodes (hb_set_t *out) const |
658 | { |
659 | unsigned int count = len; |
660 | for (unsigned int i = 0; i < count; i++) |
661 | out->add (arrayZ[i].glyphID); |
662 | } |
663 | |
664 | public: |
665 | DEFINE_SIZE_ARRAY (4, arrayZ); |
666 | }; |
667 | |
668 | struct VariationSelectorRecord |
669 | { |
670 | inline glyph_variant_t get_glyph (hb_codepoint_t codepoint, |
671 | hb_codepoint_t *glyph, |
672 | const void *base) const |
673 | { |
674 | int i; |
675 | const DefaultUVS &defaults = base+defaultUVS; |
676 | i = defaults.bsearch (codepoint); |
677 | if (i != -1) |
678 | return GLYPH_VARIANT_USE_DEFAULT; |
679 | const NonDefaultUVS &nonDefaults = base+nonDefaultUVS; |
680 | i = nonDefaults.bsearch (codepoint); |
681 | if (i != -1 && nonDefaults[i].glyphID) |
682 | { |
683 | *glyph = nonDefaults[i].glyphID; |
684 | return GLYPH_VARIANT_FOUND; |
685 | } |
686 | return GLYPH_VARIANT_NOT_FOUND; |
687 | } |
688 | |
689 | inline void collect_unicodes (hb_set_t *out, const void *base) const |
690 | { |
691 | (base+defaultUVS).collect_unicodes (out); |
692 | (base+nonDefaultUVS).collect_unicodes (out); |
693 | } |
694 | |
695 | inline int cmp (const hb_codepoint_t &variation_selector) const |
696 | { |
697 | return varSelector.cmp (variation_selector); |
698 | } |
699 | |
700 | inline bool sanitize (hb_sanitize_context_t *c, const void *base) const |
701 | { |
702 | TRACE_SANITIZE (this); |
703 | return_trace (c->check_struct (this) && |
704 | defaultUVS.sanitize (c, base) && |
705 | nonDefaultUVS.sanitize (c, base)); |
706 | } |
707 | |
708 | HBUINT24 varSelector; /* Variation selector. */ |
709 | LOffsetTo<DefaultUVS> |
710 | defaultUVS; /* Offset to Default UVS Table. May be 0. */ |
711 | LOffsetTo<NonDefaultUVS> |
712 | nonDefaultUVS; /* Offset to Non-Default UVS Table. May be 0. */ |
713 | public: |
714 | DEFINE_SIZE_STATIC (11); |
715 | }; |
716 | |
717 | struct CmapSubtableFormat14 |
718 | { |
719 | inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint, |
720 | hb_codepoint_t variation_selector, |
721 | hb_codepoint_t *glyph) const |
722 | { |
723 | return record[record.bsearch (variation_selector)].get_glyph (codepoint, glyph, this); |
724 | } |
725 | |
726 | inline void collect_variation_selectors (hb_set_t *out) const |
727 | { |
728 | unsigned int count = record.len; |
729 | for (unsigned int i = 0; i < count; i++) |
730 | out->add (record.arrayZ[i].varSelector); |
731 | } |
732 | inline void collect_variation_unicodes (hb_codepoint_t variation_selector, |
733 | hb_set_t *out) const |
734 | { |
735 | record[record.bsearch (variation_selector)].collect_unicodes (out, this); |
736 | } |
737 | |
738 | inline bool sanitize (hb_sanitize_context_t *c) const |
739 | { |
740 | TRACE_SANITIZE (this); |
741 | return_trace (c->check_struct (this) && |
742 | record.sanitize (c, this)); |
743 | } |
744 | |
745 | protected: |
746 | HBUINT16 format; /* Format number is set to 14. */ |
747 | HBUINT32 length; /* Byte length of this subtable. */ |
748 | SortedArrayOf<VariationSelectorRecord, HBUINT32> |
749 | record; /* Variation selector records; sorted |
750 | * in increasing order of `varSelector'. */ |
751 | public: |
752 | DEFINE_SIZE_ARRAY (10, record); |
753 | }; |
754 | |
755 | struct CmapSubtable |
756 | { |
757 | /* Note: We intentionally do NOT implement subtable formats 2 and 8. */ |
758 | |
759 | inline bool get_glyph (hb_codepoint_t codepoint, |
760 | hb_codepoint_t *glyph) const |
761 | { |
762 | switch (u.format) { |
763 | case 0: return u.format0 .get_glyph (codepoint, glyph); |
764 | case 4: return u.format4 .get_glyph (codepoint, glyph); |
765 | case 6: return u.format6 .get_glyph (codepoint, glyph); |
766 | case 10: return u.format10.get_glyph (codepoint, glyph); |
767 | case 12: return u.format12.get_glyph (codepoint, glyph); |
768 | case 13: return u.format13.get_glyph (codepoint, glyph); |
769 | case 14: |
770 | default: return false; |
771 | } |
772 | } |
773 | inline void collect_unicodes (hb_set_t *out) const |
774 | { |
775 | switch (u.format) { |
776 | case 0: u.format0 .collect_unicodes (out); return; |
777 | case 4: u.format4 .collect_unicodes (out); return; |
778 | case 6: u.format6 .collect_unicodes (out); return; |
779 | case 10: u.format10.collect_unicodes (out); return; |
780 | case 12: u.format12.collect_unicodes (out); return; |
781 | case 13: u.format13.collect_unicodes (out); return; |
782 | case 14: |
783 | default: return; |
784 | } |
785 | } |
786 | |
787 | inline bool sanitize (hb_sanitize_context_t *c) const |
788 | { |
789 | TRACE_SANITIZE (this); |
790 | if (!u.format.sanitize (c)) return_trace (false); |
791 | switch (u.format) { |
792 | case 0: return_trace (u.format0 .sanitize (c)); |
793 | case 4: return_trace (u.format4 .sanitize (c)); |
794 | case 6: return_trace (u.format6 .sanitize (c)); |
795 | case 10: return_trace (u.format10.sanitize (c)); |
796 | case 12: return_trace (u.format12.sanitize (c)); |
797 | case 13: return_trace (u.format13.sanitize (c)); |
798 | case 14: return_trace (u.format14.sanitize (c)); |
799 | default:return_trace (true); |
800 | } |
801 | } |
802 | |
803 | public: |
804 | union { |
805 | HBUINT16 format; /* Format identifier */ |
806 | CmapSubtableFormat0 format0; |
807 | CmapSubtableFormat4 format4; |
808 | CmapSubtableFormat6 format6; |
809 | CmapSubtableFormat10 format10; |
810 | CmapSubtableFormat12 format12; |
811 | CmapSubtableFormat13 format13; |
812 | CmapSubtableFormat14 format14; |
813 | } u; |
814 | public: |
815 | DEFINE_SIZE_UNION (2, format); |
816 | }; |
817 | |
818 | |
819 | struct EncodingRecord |
820 | { |
821 | inline int cmp (const EncodingRecord &other) const |
822 | { |
823 | int ret; |
824 | ret = platformID.cmp (other.platformID); |
825 | if (ret) return ret; |
826 | ret = encodingID.cmp (other.encodingID); |
827 | if (ret) return ret; |
828 | return 0; |
829 | } |
830 | |
831 | inline bool sanitize (hb_sanitize_context_t *c, const void *base) const |
832 | { |
833 | TRACE_SANITIZE (this); |
834 | return_trace (c->check_struct (this) && |
835 | subtable.sanitize (c, base)); |
836 | } |
837 | |
838 | HBUINT16 platformID; /* Platform ID. */ |
839 | HBUINT16 encodingID; /* Platform-specific encoding ID. */ |
840 | LOffsetTo<CmapSubtable> |
841 | subtable; /* Byte offset from beginning of table to the subtable for this encoding. */ |
842 | public: |
843 | DEFINE_SIZE_STATIC (8); |
844 | }; |
845 | |
846 | struct cmap |
847 | { |
848 | static const hb_tag_t tableTag = HB_OT_TAG_cmap; |
849 | |
850 | struct subset_plan |
851 | { |
852 | subset_plan(void) |
853 | { |
854 | format4_segments.init(); |
855 | format12_groups.init(); |
856 | } |
857 | |
858 | ~subset_plan(void) |
859 | { |
860 | format4_segments.fini(); |
861 | format12_groups.fini(); |
862 | } |
863 | |
864 | inline size_t final_size() const |
865 | { |
866 | return 4 // header |
867 | + 8 * 3 // 3 EncodingRecord |
868 | + CmapSubtableFormat4::get_sub_table_size (this->format4_segments) |
869 | + CmapSubtableFormat12::get_sub_table_size (this->format12_groups); |
870 | } |
871 | |
872 | // Format 4 |
873 | hb_vector_t<CmapSubtableFormat4::segment_plan> format4_segments; |
874 | // Format 12 |
875 | hb_vector_t<CmapSubtableLongGroup> format12_groups; |
876 | }; |
877 | |
878 | inline bool sanitize (hb_sanitize_context_t *c) const |
879 | { |
880 | TRACE_SANITIZE (this); |
881 | return_trace (c->check_struct (this) && |
882 | likely (version == 0) && |
883 | encodingRecord.sanitize (c, this)); |
884 | } |
885 | |
886 | inline bool _create_plan (const hb_subset_plan_t *plan, |
887 | subset_plan *cmap_plan) const |
888 | { |
889 | if (unlikely( !CmapSubtableFormat4::create_sub_table_plan (plan, &cmap_plan->format4_segments))) |
890 | return false; |
891 | |
892 | return CmapSubtableFormat12::create_sub_table_plan (plan, &cmap_plan->format12_groups); |
893 | } |
894 | |
895 | inline bool _subset (const hb_subset_plan_t *plan, |
896 | const subset_plan &cmap_subset_plan, |
897 | size_t dest_sz, |
898 | void *dest) const |
899 | { |
900 | hb_serialize_context_t c (dest, dest_sz); |
901 | |
902 | cmap *table = c.start_serialize<cmap> (); |
903 | if (unlikely (!c.extend_min (*table))) |
904 | { |
905 | return false; |
906 | } |
907 | |
908 | table->version.set (0); |
909 | |
910 | if (unlikely (!table->encodingRecord.serialize (&c, /* numTables */ 3))) |
911 | return false; |
912 | |
913 | // TODO(grieger): Convert the below to a for loop |
914 | |
915 | // Format 4, Plat 0 Encoding Record |
916 | EncodingRecord &format4_plat0_rec = table->encodingRecord[0]; |
917 | format4_plat0_rec.platformID.set (0); // Unicode |
918 | format4_plat0_rec.encodingID.set (3); |
919 | |
920 | // Format 4, Plat 3 Encoding Record |
921 | EncodingRecord &format4_plat3_rec = table->encodingRecord[1]; |
922 | format4_plat3_rec.platformID.set (3); // Windows |
923 | format4_plat3_rec.encodingID.set (1); // Unicode BMP |
924 | |
925 | // Format 12 Encoding Record |
926 | EncodingRecord &format12_rec = table->encodingRecord[2]; |
927 | format12_rec.platformID.set (3); // Windows |
928 | format12_rec.encodingID.set (10); // Unicode UCS-4 |
929 | |
930 | // Write out format 4 sub table |
931 | { |
932 | CmapSubtable &subtable = format4_plat0_rec.subtable.serialize (&c, table); |
933 | format4_plat3_rec.subtable.set (format4_plat0_rec.subtable); |
934 | subtable.u.format.set (4); |
935 | |
936 | CmapSubtableFormat4 &format4 = subtable.u.format4; |
937 | if (unlikely (!format4.serialize (&c, plan, cmap_subset_plan.format4_segments))) |
938 | return false; |
939 | } |
940 | |
941 | // Write out format 12 sub table. |
942 | { |
943 | CmapSubtable &subtable = format12_rec.subtable.serialize (&c, table); |
944 | subtable.u.format.set (12); |
945 | |
946 | CmapSubtableFormat12 &format12 = subtable.u.format12; |
947 | if (unlikely (!format12.serialize (&c, cmap_subset_plan.format12_groups))) |
948 | return false; |
949 | } |
950 | |
951 | c.end_serialize (); |
952 | |
953 | return true; |
954 | } |
955 | |
956 | inline bool subset (hb_subset_plan_t *plan) const |
957 | { |
958 | subset_plan cmap_subset_plan; |
959 | |
960 | if (unlikely (!_create_plan (plan, &cmap_subset_plan))) |
961 | { |
962 | DEBUG_MSG(SUBSET, nullptr, "Failed to generate a cmap subsetting plan." ); |
963 | return false; |
964 | } |
965 | |
966 | // We now know how big our blob needs to be |
967 | size_t dest_sz = cmap_subset_plan.final_size(); |
968 | void *dest = malloc (dest_sz); |
969 | if (unlikely (!dest)) { |
970 | DEBUG_MSG(SUBSET, nullptr, "Unable to alloc %lu for cmap subset output" , (unsigned long) dest_sz); |
971 | return false; |
972 | } |
973 | |
974 | if (unlikely (!_subset (plan, cmap_subset_plan, dest_sz, dest))) |
975 | { |
976 | DEBUG_MSG(SUBSET, nullptr, "Failed to perform subsetting of cmap." ); |
977 | free (dest); |
978 | return false; |
979 | } |
980 | |
981 | // all done, write the blob into dest |
982 | hb_blob_t *cmap_prime = hb_blob_create ((const char *)dest, |
983 | dest_sz, |
984 | HB_MEMORY_MODE_READONLY, |
985 | dest, |
986 | free); |
987 | bool result = plan->add_table (HB_OT_TAG_cmap, cmap_prime); |
988 | hb_blob_destroy (cmap_prime); |
989 | return result; |
990 | } |
991 | |
992 | const CmapSubtable *find_best_subtable (bool *symbol = nullptr) const |
993 | { |
994 | if (symbol) *symbol = false; |
995 | |
996 | const CmapSubtable *subtable; |
997 | |
998 | /* 32-bit subtables. */ |
999 | if ((subtable = this->find_subtable (3, 10))) return subtable; |
1000 | if ((subtable = this->find_subtable (0, 6))) return subtable; |
1001 | if ((subtable = this->find_subtable (0, 4))) return subtable; |
1002 | |
1003 | /* 16-bit subtables. */ |
1004 | if ((subtable = this->find_subtable (3, 1))) return subtable; |
1005 | if ((subtable = this->find_subtable (0, 3))) return subtable; |
1006 | if ((subtable = this->find_subtable (0, 2))) return subtable; |
1007 | if ((subtable = this->find_subtable (0, 1))) return subtable; |
1008 | if ((subtable = this->find_subtable (0, 0))) return subtable; |
1009 | |
1010 | /* Symbol subtable. */ |
1011 | if ((subtable = this->find_subtable (3, 0))) |
1012 | { |
1013 | if (symbol) *symbol = true; |
1014 | return subtable; |
1015 | } |
1016 | |
1017 | /* Meh. */ |
1018 | return &Null(CmapSubtable); |
1019 | } |
1020 | |
1021 | struct accelerator_t |
1022 | { |
1023 | inline void init (hb_face_t *face) |
1024 | { |
1025 | this->blob = hb_sanitize_context_t().reference_table<cmap> (face); |
1026 | const cmap *table = this->blob->as<cmap> (); |
1027 | const CmapSubtableFormat14 *subtable_uvs = nullptr; |
1028 | bool symbol; |
1029 | subtable = table->find_best_subtable (&symbol); |
1030 | |
1031 | /* UVS subtable. */ |
1032 | if (!subtable_uvs) |
1033 | { |
1034 | const CmapSubtable *st = table->find_subtable (0, 5); |
1035 | if (st && st->u.format == 14) |
1036 | subtable_uvs = &st->u.format14; |
1037 | } |
1038 | /* Meh. */ |
1039 | if (!subtable_uvs) subtable_uvs = &Null(CmapSubtableFormat14); |
1040 | |
1041 | this->subtable_uvs = subtable_uvs; |
1042 | |
1043 | this->get_glyph_data = subtable; |
1044 | if (unlikely (symbol)) |
1045 | { |
1046 | this->get_glyph_func = get_glyph_from_symbol<CmapSubtable>; |
1047 | } else { |
1048 | switch (subtable->u.format) { |
1049 | /* Accelerate format 4 and format 12. */ |
1050 | default: |
1051 | this->get_glyph_func = get_glyph_from<CmapSubtable>; |
1052 | break; |
1053 | case 12: |
1054 | this->get_glyph_func = get_glyph_from<CmapSubtableFormat12>; |
1055 | break; |
1056 | case 4: |
1057 | { |
1058 | this->format4_accel.init (&subtable->u.format4); |
1059 | this->get_glyph_data = &this->format4_accel; |
1060 | this->get_glyph_func = this->format4_accel.get_glyph_func; |
1061 | } |
1062 | break; |
1063 | } |
1064 | } |
1065 | } |
1066 | |
1067 | inline void fini (void) |
1068 | { |
1069 | hb_blob_destroy (this->blob); |
1070 | } |
1071 | |
1072 | inline bool get_nominal_glyph (hb_codepoint_t unicode, |
1073 | hb_codepoint_t *glyph) const |
1074 | { |
1075 | return this->get_glyph_func (this->get_glyph_data, unicode, glyph); |
1076 | } |
1077 | |
1078 | inline bool get_variation_glyph (hb_codepoint_t unicode, |
1079 | hb_codepoint_t variation_selector, |
1080 | hb_codepoint_t *glyph) const |
1081 | { |
1082 | switch (this->subtable_uvs->get_glyph_variant (unicode, |
1083 | variation_selector, |
1084 | glyph)) |
1085 | { |
1086 | case GLYPH_VARIANT_NOT_FOUND: return false; |
1087 | case GLYPH_VARIANT_FOUND: return true; |
1088 | case GLYPH_VARIANT_USE_DEFAULT: break; |
1089 | } |
1090 | |
1091 | return get_nominal_glyph (unicode, glyph); |
1092 | } |
1093 | |
1094 | inline void collect_unicodes (hb_set_t *out) const |
1095 | { |
1096 | subtable->collect_unicodes (out); |
1097 | } |
1098 | inline void collect_variation_selectors (hb_set_t *out) const |
1099 | { |
1100 | subtable_uvs->collect_variation_selectors (out); |
1101 | } |
1102 | inline void collect_variation_unicodes (hb_codepoint_t variation_selector, |
1103 | hb_set_t *out) const |
1104 | { |
1105 | subtable_uvs->collect_variation_unicodes (variation_selector, out); |
1106 | } |
1107 | |
1108 | protected: |
1109 | typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj, |
1110 | hb_codepoint_t codepoint, |
1111 | hb_codepoint_t *glyph); |
1112 | |
1113 | template <typename Type> |
1114 | static inline bool get_glyph_from (const void *obj, |
1115 | hb_codepoint_t codepoint, |
1116 | hb_codepoint_t *glyph) |
1117 | { |
1118 | const Type *typed_obj = (const Type *) obj; |
1119 | return typed_obj->get_glyph (codepoint, glyph); |
1120 | } |
1121 | |
1122 | template <typename Type> |
1123 | static inline bool get_glyph_from_symbol (const void *obj, |
1124 | hb_codepoint_t codepoint, |
1125 | hb_codepoint_t *glyph) |
1126 | { |
1127 | const Type *typed_obj = (const Type *) obj; |
1128 | if (likely (typed_obj->get_glyph (codepoint, glyph))) |
1129 | return true; |
1130 | |
1131 | if (codepoint <= 0x00FFu) |
1132 | { |
1133 | /* For symbol-encoded OpenType fonts, we duplicate the |
1134 | * U+F000..F0FF range at U+0000..U+00FF. That's what |
1135 | * Windows seems to do, and that's hinted about at: |
1136 | * https://docs.microsoft.com/en-us/typography/opentype/spec/recom |
1137 | * under "Non-Standard (Symbol) Fonts". */ |
1138 | return typed_obj->get_glyph (0xF000u + codepoint, glyph); |
1139 | } |
1140 | |
1141 | return false; |
1142 | } |
1143 | |
1144 | private: |
1145 | const CmapSubtable *subtable; |
1146 | const CmapSubtableFormat14 *subtable_uvs; |
1147 | |
1148 | hb_cmap_get_glyph_func_t get_glyph_func; |
1149 | const void *get_glyph_data; |
1150 | |
1151 | CmapSubtableFormat4::accelerator_t format4_accel; |
1152 | |
1153 | hb_blob_t *blob; |
1154 | }; |
1155 | |
1156 | protected: |
1157 | |
1158 | inline const CmapSubtable *find_subtable (unsigned int platform_id, |
1159 | unsigned int encoding_id) const |
1160 | { |
1161 | EncodingRecord key; |
1162 | key.platformID.set (platform_id); |
1163 | key.encodingID.set (encoding_id); |
1164 | |
1165 | int result = encodingRecord.bsearch (key); |
1166 | if (result == -1 || !encodingRecord[result].subtable) |
1167 | return nullptr; |
1168 | |
1169 | return &(this+encodingRecord[result].subtable); |
1170 | } |
1171 | |
1172 | protected: |
1173 | HBUINT16 version; /* Table version number (0). */ |
1174 | SortedArrayOf<EncodingRecord> |
1175 | encodingRecord; /* Encoding tables. */ |
1176 | public: |
1177 | DEFINE_SIZE_ARRAY (4, encodingRecord); |
1178 | }; |
1179 | |
1180 | struct cmap_accelerator_t : cmap::accelerator_t {}; |
1181 | |
1182 | } /* namespace OT */ |
1183 | |
1184 | |
1185 | #endif /* HB_OT_CMAP_TABLE_HH */ |
1186 | |