1/*
2 * Copyright © 2014 Google, Inc.
3 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_OT_CMAP_TABLE_HH
28#define HB_OT_CMAP_TABLE_HH
29
30#include "hb-open-type.hh"
31#include "hb-set.hh"
32
33/*
34 * cmap -- Character to Glyph Index Mapping
35 * https://docs.microsoft.com/en-us/typography/opentype/spec/cmap
36 */
37#define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
38
39namespace OT {
40
41
42struct CmapSubtableFormat0
43{
44 inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
45 {
46 hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
47 if (!gid)
48 return false;
49 *glyph = gid;
50 return true;
51 }
52 inline void collect_unicodes (hb_set_t *out) const
53 {
54 for (unsigned int i = 0; i < 256; i++)
55 if (glyphIdArray[i])
56 out->add (i);
57 }
58
59 inline bool sanitize (hb_sanitize_context_t *c) const
60 {
61 TRACE_SANITIZE (this);
62 return_trace (c->check_struct (this));
63 }
64
65 protected:
66 HBUINT16 format; /* Format number is set to 0. */
67 HBUINT16 length; /* Byte length of this subtable. */
68 HBUINT16 language; /* Ignore. */
69 HBUINT8 glyphIdArray[256];/* An array that maps character
70 * code to glyph index values. */
71 public:
72 DEFINE_SIZE_STATIC (6 + 256);
73};
74
75struct CmapSubtableFormat4
76{
77 struct segment_plan
78 {
79 HBUINT16 start_code;
80 HBUINT16 end_code;
81 bool use_delta;
82 };
83
84 bool serialize (hb_serialize_context_t *c,
85 const hb_subset_plan_t *plan,
86 const hb_vector_t<segment_plan> &segments)
87 {
88 TRACE_SERIALIZE (this);
89
90 if (unlikely (!c->extend_min (*this))) return_trace (false);
91
92 this->format.set (4);
93 this->length.set (get_sub_table_size (segments));
94
95 this->segCountX2.set (segments.len * 2);
96 this->entrySelector.set (MAX (1u, hb_bit_storage (segments.len)) - 1);
97 this->searchRange.set (2 * (1u << this->entrySelector));
98 this->rangeShift.set (segments.len * 2 > this->searchRange
99 ? 2 * segments.len - this->searchRange
100 : 0);
101
102 HBUINT16 *end_count = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len);
103 c->allocate_size<HBUINT16> (HBUINT16::static_size); // 2 bytes of padding.
104 HBUINT16 *start_count = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len);
105 HBINT16 *id_delta = c->allocate_size<HBINT16> (HBUINT16::static_size * segments.len);
106 HBUINT16 *id_range_offset = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.len);
107
108 if (id_range_offset == nullptr)
109 return_trace (false);
110
111 for (unsigned int i = 0; i < segments.len; i++)
112 {
113 end_count[i].set (segments[i].end_code);
114 start_count[i].set (segments[i].start_code);
115 if (segments[i].use_delta)
116 {
117 hb_codepoint_t cp = segments[i].start_code;
118 hb_codepoint_t start_gid = 0;
119 if (unlikely (!plan->new_gid_for_codepoint (cp, &start_gid) && cp != 0xFFFF))
120 return_trace (false);
121 id_delta[i].set (start_gid - segments[i].start_code);
122 } else {
123 id_delta[i].set (0);
124 unsigned int num_codepoints = segments[i].end_code - segments[i].start_code + 1;
125 HBUINT16 *glyph_id_array = c->allocate_size<HBUINT16> (HBUINT16::static_size * num_codepoints);
126 if (glyph_id_array == nullptr)
127 return_trace (false);
128 // From the cmap spec:
129 //
130 // id_range_offset[i]/2
131 // + (cp - segments[i].start_code)
132 // + (id_range_offset + i)
133 // =
134 // glyph_id_array + (cp - segments[i].start_code)
135 //
136 // So, solve for id_range_offset[i]:
137 //
138 // id_range_offset[i]
139 // =
140 // 2 * (glyph_id_array - id_range_offset - i)
141 id_range_offset[i].set (2 * (
142 glyph_id_array - id_range_offset - i));
143 for (unsigned int j = 0; j < num_codepoints; j++)
144 {
145 hb_codepoint_t cp = segments[i].start_code + j;
146 hb_codepoint_t new_gid;
147 if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid)))
148 return_trace (false);
149 glyph_id_array[j].set (new_gid);
150 }
151 }
152 }
153
154 return_trace (true);
155 }
156
157 static inline size_t get_sub_table_size (const hb_vector_t<segment_plan> &segments)
158 {
159 size_t segment_size = 0;
160 for (unsigned int i = 0; i < segments.len; i++)
161 {
162 // Parallel array entries
163 segment_size +=
164 2 // end count
165 + 2 // start count
166 + 2 // delta
167 + 2; // range offset
168
169 if (!segments[i].use_delta)
170 // Add bytes for the glyph index array entries for this segment.
171 segment_size += (segments[i].end_code - segments[i].start_code + 1) * 2;
172 }
173
174 return min_size
175 + 2 // Padding
176 + segment_size;
177 }
178
179 static inline bool create_sub_table_plan (const hb_subset_plan_t *plan,
180 hb_vector_t<segment_plan> *segments)
181 {
182 segment_plan *segment = nullptr;
183 hb_codepoint_t last_gid = 0;
184
185 hb_codepoint_t cp = HB_SET_VALUE_INVALID;
186 while (plan->unicodes->next (&cp)) {
187 hb_codepoint_t new_gid;
188 if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid)))
189 {
190 DEBUG_MSG(SUBSET, nullptr, "Unable to find new gid for %04x", cp);
191 return false;
192 }
193
194 if (cp > 0xFFFF) {
195 // We are now outside of unicode BMP, stop adding to this cmap.
196 break;
197 }
198
199 if (!segment
200 || cp != segment->end_code + 1u)
201 {
202 segment = segments->push ();
203 segment->start_code.set (cp);
204 segment->end_code.set (cp);
205 segment->use_delta = true;
206 } else {
207 segment->end_code.set (cp);
208 if (last_gid + 1u != new_gid)
209 // gid's are not consecutive in this segment so delta
210 // cannot be used.
211 segment->use_delta = false;
212 }
213
214 last_gid = new_gid;
215 }
216
217 // There must be a final entry with end_code == 0xFFFF. Check if we need to add one.
218 if (segment == nullptr || segment->end_code != 0xFFFF)
219 {
220 segment = segments->push ();
221 segment->start_code.set (0xFFFF);
222 segment->end_code.set (0xFFFF);
223 segment->use_delta = true;
224 }
225
226 return true;
227 }
228
229 struct accelerator_t
230 {
231 inline void init (const CmapSubtableFormat4 *subtable)
232 {
233 segCount = subtable->segCountX2 / 2;
234 endCount = subtable->values;
235 startCount = endCount + segCount + 1;
236 idDelta = startCount + segCount;
237 idRangeOffset = idDelta + segCount;
238 glyphIdArray = idRangeOffset + segCount;
239 glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2;
240 }
241 inline void fini (void) {}
242
243 inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
244 {
245 /* Custom two-array bsearch. */
246 int min = 0, max = (int) this->segCount - 1;
247 const HBUINT16 *startCount = this->startCount;
248 const HBUINT16 *endCount = this->endCount;
249 unsigned int i;
250 while (min <= max)
251 {
252 int mid = (min + max) / 2;
253 if (codepoint < startCount[mid])
254 max = mid - 1;
255 else if (codepoint > endCount[mid])
256 min = mid + 1;
257 else
258 {
259 i = mid;
260 goto found;
261 }
262 }
263 return false;
264
265 found:
266 hb_codepoint_t gid;
267 unsigned int rangeOffset = this->idRangeOffset[i];
268 if (rangeOffset == 0)
269 gid = codepoint + this->idDelta[i];
270 else
271 {
272 /* Somebody has been smoking... */
273 unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
274 if (unlikely (index >= this->glyphIdArrayLength))
275 return false;
276 gid = this->glyphIdArray[index];
277 if (unlikely (!gid))
278 return false;
279 gid += this->idDelta[i];
280 }
281 gid &= 0xFFFFu;
282 if (!gid)
283 return false;
284 *glyph = gid;
285 return true;
286 }
287 static inline bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph)
288 {
289 return ((const accelerator_t *) obj)->get_glyph (codepoint, glyph);
290 }
291 inline void collect_unicodes (hb_set_t *out) const
292 {
293 unsigned int count = this->segCount;
294 if (count && this->startCount[count - 1] == 0xFFFFu)
295 count--; /* Skip sentinel segment. */
296 for (unsigned int i = 0; i < count; i++)
297 {
298 unsigned int rangeOffset = this->idRangeOffset[i];
299 if (rangeOffset == 0)
300 out->add_range (this->startCount[i], this->endCount[i]);
301 else
302 {
303 for (hb_codepoint_t codepoint = this->startCount[i];
304 codepoint <= this->endCount[i];
305 codepoint++)
306 {
307 unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount;
308 if (unlikely (index >= this->glyphIdArrayLength))
309 break;
310 hb_codepoint_t gid = this->glyphIdArray[index];
311 if (unlikely (!gid))
312 continue;
313 out->add (codepoint);
314 }
315 }
316 }
317 }
318
319 const HBUINT16 *endCount;
320 const HBUINT16 *startCount;
321 const HBUINT16 *idDelta;
322 const HBUINT16 *idRangeOffset;
323 const HBUINT16 *glyphIdArray;
324 unsigned int segCount;
325 unsigned int glyphIdArrayLength;
326 };
327
328 inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
329 {
330 hb_auto_t<accelerator_t> accel (this);
331 return accel.get_glyph_func (&accel, codepoint, glyph);
332 }
333 inline void collect_unicodes (hb_set_t *out) const
334 {
335 hb_auto_t<accelerator_t> accel (this);
336 accel.collect_unicodes (out);
337 }
338
339 inline bool sanitize (hb_sanitize_context_t *c) const
340 {
341 TRACE_SANITIZE (this);
342 if (unlikely (!c->check_struct (this)))
343 return_trace (false);
344
345 if (unlikely (!c->check_range (this, length)))
346 {
347 /* Some broken fonts have too long of a "length" value.
348 * If that is the case, just change the value to truncate
349 * the subtable at the end of the blob. */
350 uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535,
351 (uintptr_t) (c->end -
352 (char *) this));
353 if (!c->try_set (&length, new_length))
354 return_trace (false);
355 }
356
357 return_trace (16 + 4 * (unsigned int) segCountX2 <= length);
358 }
359
360
361
362 protected:
363 HBUINT16 format; /* Format number is set to 4. */
364 HBUINT16 length; /* This is the length in bytes of the
365 * subtable. */
366 HBUINT16 language; /* Ignore. */
367 HBUINT16 segCountX2; /* 2 x segCount. */
368 HBUINT16 searchRange; /* 2 * (2**floor(log2(segCount))) */
369 HBUINT16 entrySelector; /* log2(searchRange/2) */
370 HBUINT16 rangeShift; /* 2 x segCount - searchRange */
371
372 HBUINT16 values[VAR];
373#if 0
374 HBUINT16 endCount[segCount]; /* End characterCode for each segment,
375 * last=0xFFFFu. */
376 HBUINT16 reservedPad; /* Set to 0. */
377 HBUINT16 startCount[segCount]; /* Start character code for each segment. */
378 HBINT16 idDelta[segCount]; /* Delta for all character codes in segment. */
379 HBUINT16 idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
380 HBUINT16 glyphIdArray[VAR]; /* Glyph index array (arbitrary length) */
381#endif
382
383 public:
384 DEFINE_SIZE_ARRAY (14, values);
385};
386
387struct CmapSubtableLongGroup
388{
389 friend struct CmapSubtableFormat12;
390 friend struct CmapSubtableFormat13;
391 template<typename U>
392 friend struct CmapSubtableLongSegmented;
393 friend struct cmap;
394
395 int cmp (hb_codepoint_t codepoint) const
396 {
397 if (codepoint < startCharCode) return -1;
398 if (codepoint > endCharCode) return +1;
399 return 0;
400 }
401
402 inline bool sanitize (hb_sanitize_context_t *c) const
403 {
404 TRACE_SANITIZE (this);
405 return_trace (c->check_struct (this));
406 }
407
408 private:
409 HBUINT32 startCharCode; /* First character code in this group. */
410 HBUINT32 endCharCode; /* Last character code in this group. */
411 HBUINT32 glyphID; /* Glyph index; interpretation depends on
412 * subtable format. */
413 public:
414 DEFINE_SIZE_STATIC (12);
415};
416
417template <typename UINT>
418struct CmapSubtableTrimmed
419{
420 inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
421 {
422 /* Rely on our implicit array bound-checking. */
423 hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
424 if (!gid)
425 return false;
426 *glyph = gid;
427 return true;
428 }
429 inline void collect_unicodes (hb_set_t *out) const
430 {
431 hb_codepoint_t start = startCharCode;
432 unsigned int count = glyphIdArray.len;
433 for (unsigned int i = 0; i < count; i++)
434 if (glyphIdArray[i])
435 out->add (start + i);
436 }
437
438 inline bool sanitize (hb_sanitize_context_t *c) const
439 {
440 TRACE_SANITIZE (this);
441 return_trace (c->check_struct (this) && glyphIdArray.sanitize (c));
442 }
443
444 protected:
445 UINT formatReserved; /* Subtable format and (maybe) padding. */
446 UINT length; /* Byte length of this subtable. */
447 UINT language; /* Ignore. */
448 UINT startCharCode; /* First character code covered. */
449 ArrayOf<GlyphID, UINT>
450 glyphIdArray; /* Array of glyph index values for character
451 * codes in the range. */
452 public:
453 DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
454};
455
456struct CmapSubtableFormat6 : CmapSubtableTrimmed<HBUINT16> {};
457struct CmapSubtableFormat10 : CmapSubtableTrimmed<HBUINT32 > {};
458
459template <typename T>
460struct CmapSubtableLongSegmented
461{
462 friend struct cmap;
463
464 inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
465 {
466 int i = groups.bsearch (codepoint);
467 if (i == -1)
468 return false;
469 hb_codepoint_t gid = T::group_get_glyph (groups[i], codepoint);
470 if (!gid)
471 return false;
472 *glyph = gid;
473 return true;
474 }
475
476 inline void collect_unicodes (hb_set_t *out) const
477 {
478 for (unsigned int i = 0; i < this->groups.len; i++) {
479 out->add_range (this->groups[i].startCharCode,
480 MIN ((hb_codepoint_t) this->groups[i].endCharCode,
481 (hb_codepoint_t) HB_UNICODE_MAX));
482 }
483 }
484
485 inline bool sanitize (hb_sanitize_context_t *c) const
486 {
487 TRACE_SANITIZE (this);
488 return_trace (c->check_struct (this) && groups.sanitize (c));
489 }
490
491 inline bool serialize (hb_serialize_context_t *c,
492 const hb_vector_t<CmapSubtableLongGroup> &group_data)
493 {
494 TRACE_SERIALIZE (this);
495 if (unlikely (!c->extend_min (*this))) return_trace (false);
496 Supplier<CmapSubtableLongGroup> supplier (group_data.arrayZ, group_data.len);
497 if (unlikely (!groups.serialize (c, supplier, group_data.len))) return_trace (false);
498 return true;
499 }
500
501 protected:
502 HBUINT16 format; /* Subtable format; set to 12. */
503 HBUINT16 reserved; /* Reserved; set to 0. */
504 HBUINT32 length; /* Byte length of this subtable. */
505 HBUINT32 language; /* Ignore. */
506 SortedArrayOf<CmapSubtableLongGroup, HBUINT32>
507 groups; /* Groupings. */
508 public:
509 DEFINE_SIZE_ARRAY (16, groups);
510};
511
512struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
513{
514 static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
515 hb_codepoint_t u)
516 { return group.glyphID + (u - group.startCharCode); }
517
518
519 bool serialize (hb_serialize_context_t *c,
520 const hb_vector_t<CmapSubtableLongGroup> &groups)
521 {
522 if (unlikely (!c->extend_min (*this))) return false;
523
524 this->format.set (12);
525 this->reserved.set (0);
526 this->length.set (get_sub_table_size (groups));
527
528 return CmapSubtableLongSegmented<CmapSubtableFormat12>::serialize (c, groups);
529 }
530
531 static inline size_t get_sub_table_size (const hb_vector_t<CmapSubtableLongGroup> &groups)
532 {
533 return 16 + 12 * groups.len;
534 }
535
536 static inline bool create_sub_table_plan (const hb_subset_plan_t *plan,
537 hb_vector_t<CmapSubtableLongGroup> *groups)
538 {
539 CmapSubtableLongGroup *group = nullptr;
540
541 hb_codepoint_t cp = HB_SET_VALUE_INVALID;
542 while (plan->unicodes->next (&cp)) {
543 hb_codepoint_t new_gid;
544 if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid)))
545 {
546 DEBUG_MSG(SUBSET, nullptr, "Unable to find new gid for %04x", cp);
547 return false;
548 }
549
550 if (!group || !_is_gid_consecutive (group, cp, new_gid))
551 {
552 group = groups->push ();
553 group->startCharCode.set (cp);
554 group->endCharCode.set (cp);
555 group->glyphID.set (new_gid);
556 } else
557 {
558 group->endCharCode.set (cp);
559 }
560 }
561
562 DEBUG_MSG(SUBSET, nullptr, "cmap");
563 for (unsigned int i = 0; i < groups->len; i++) {
564 CmapSubtableLongGroup& group = (*groups)[i];
565 DEBUG_MSG(SUBSET, nullptr, " %d: U+%04X-U+%04X, gid %d-%d", i, (uint32_t) group.startCharCode, (uint32_t) group.endCharCode, (uint32_t) group.glyphID, (uint32_t) group.glyphID + ((uint32_t) group.endCharCode - (uint32_t) group.startCharCode));
566 }
567
568 return true;
569 }
570
571 private:
572 static inline bool _is_gid_consecutive (CmapSubtableLongGroup *group,
573 hb_codepoint_t cp,
574 hb_codepoint_t new_gid)
575 {
576 return (cp - 1 == group->endCharCode) &&
577 new_gid == group->glyphID + (cp - group->startCharCode);
578 }
579
580};
581
582struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
583{
584 static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
585 hb_codepoint_t u HB_UNUSED)
586 { return group.glyphID; }
587};
588
589typedef enum
590{
591 GLYPH_VARIANT_NOT_FOUND = 0,
592 GLYPH_VARIANT_FOUND = 1,
593 GLYPH_VARIANT_USE_DEFAULT = 2
594} glyph_variant_t;
595
596struct UnicodeValueRange
597{
598 inline int cmp (const hb_codepoint_t &codepoint) const
599 {
600 if (codepoint < startUnicodeValue) return -1;
601 if (codepoint > startUnicodeValue + additionalCount) return +1;
602 return 0;
603 }
604
605 inline bool sanitize (hb_sanitize_context_t *c) const
606 {
607 TRACE_SANITIZE (this);
608 return_trace (c->check_struct (this));
609 }
610
611 HBUINT24 startUnicodeValue; /* First value in this range. */
612 HBUINT8 additionalCount; /* Number of additional values in this
613 * range. */
614 public:
615 DEFINE_SIZE_STATIC (4);
616};
617
618struct DefaultUVS : SortedArrayOf<UnicodeValueRange, HBUINT32>
619{
620 inline void collect_unicodes (hb_set_t *out) const
621 {
622 unsigned int count = len;
623 for (unsigned int i = 0; i < count; i++)
624 {
625 hb_codepoint_t first = arrayZ[i].startUnicodeValue;
626 hb_codepoint_t last = MIN ((hb_codepoint_t) (first + arrayZ[i].additionalCount),
627 (hb_codepoint_t) HB_UNICODE_MAX);
628 out->add_range (first, last);
629 }
630 }
631
632 public:
633 DEFINE_SIZE_ARRAY (4, arrayZ);
634};
635
636struct UVSMapping
637{
638 inline int cmp (const hb_codepoint_t &codepoint) const
639 {
640 return unicodeValue.cmp (codepoint);
641 }
642
643 inline bool sanitize (hb_sanitize_context_t *c) const
644 {
645 TRACE_SANITIZE (this);
646 return_trace (c->check_struct (this));
647 }
648
649 HBUINT24 unicodeValue; /* Base Unicode value of the UVS */
650 GlyphID glyphID; /* Glyph ID of the UVS */
651 public:
652 DEFINE_SIZE_STATIC (5);
653};
654
655struct NonDefaultUVS : SortedArrayOf<UVSMapping, HBUINT32>
656{
657 inline void collect_unicodes (hb_set_t *out) const
658 {
659 unsigned int count = len;
660 for (unsigned int i = 0; i < count; i++)
661 out->add (arrayZ[i].glyphID);
662 }
663
664 public:
665 DEFINE_SIZE_ARRAY (4, arrayZ);
666};
667
668struct VariationSelectorRecord
669{
670 inline glyph_variant_t get_glyph (hb_codepoint_t codepoint,
671 hb_codepoint_t *glyph,
672 const void *base) const
673 {
674 int i;
675 const DefaultUVS &defaults = base+defaultUVS;
676 i = defaults.bsearch (codepoint);
677 if (i != -1)
678 return GLYPH_VARIANT_USE_DEFAULT;
679 const NonDefaultUVS &nonDefaults = base+nonDefaultUVS;
680 i = nonDefaults.bsearch (codepoint);
681 if (i != -1 && nonDefaults[i].glyphID)
682 {
683 *glyph = nonDefaults[i].glyphID;
684 return GLYPH_VARIANT_FOUND;
685 }
686 return GLYPH_VARIANT_NOT_FOUND;
687 }
688
689 inline void collect_unicodes (hb_set_t *out, const void *base) const
690 {
691 (base+defaultUVS).collect_unicodes (out);
692 (base+nonDefaultUVS).collect_unicodes (out);
693 }
694
695 inline int cmp (const hb_codepoint_t &variation_selector) const
696 {
697 return varSelector.cmp (variation_selector);
698 }
699
700 inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
701 {
702 TRACE_SANITIZE (this);
703 return_trace (c->check_struct (this) &&
704 defaultUVS.sanitize (c, base) &&
705 nonDefaultUVS.sanitize (c, base));
706 }
707
708 HBUINT24 varSelector; /* Variation selector. */
709 LOffsetTo<DefaultUVS>
710 defaultUVS; /* Offset to Default UVS Table. May be 0. */
711 LOffsetTo<NonDefaultUVS>
712 nonDefaultUVS; /* Offset to Non-Default UVS Table. May be 0. */
713 public:
714 DEFINE_SIZE_STATIC (11);
715};
716
717struct CmapSubtableFormat14
718{
719 inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
720 hb_codepoint_t variation_selector,
721 hb_codepoint_t *glyph) const
722 {
723 return record[record.bsearch (variation_selector)].get_glyph (codepoint, glyph, this);
724 }
725
726 inline void collect_variation_selectors (hb_set_t *out) const
727 {
728 unsigned int count = record.len;
729 for (unsigned int i = 0; i < count; i++)
730 out->add (record.arrayZ[i].varSelector);
731 }
732 inline void collect_variation_unicodes (hb_codepoint_t variation_selector,
733 hb_set_t *out) const
734 {
735 record[record.bsearch (variation_selector)].collect_unicodes (out, this);
736 }
737
738 inline bool sanitize (hb_sanitize_context_t *c) const
739 {
740 TRACE_SANITIZE (this);
741 return_trace (c->check_struct (this) &&
742 record.sanitize (c, this));
743 }
744
745 protected:
746 HBUINT16 format; /* Format number is set to 14. */
747 HBUINT32 length; /* Byte length of this subtable. */
748 SortedArrayOf<VariationSelectorRecord, HBUINT32>
749 record; /* Variation selector records; sorted
750 * in increasing order of `varSelector'. */
751 public:
752 DEFINE_SIZE_ARRAY (10, record);
753};
754
755struct CmapSubtable
756{
757 /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
758
759 inline bool get_glyph (hb_codepoint_t codepoint,
760 hb_codepoint_t *glyph) const
761 {
762 switch (u.format) {
763 case 0: return u.format0 .get_glyph (codepoint, glyph);
764 case 4: return u.format4 .get_glyph (codepoint, glyph);
765 case 6: return u.format6 .get_glyph (codepoint, glyph);
766 case 10: return u.format10.get_glyph (codepoint, glyph);
767 case 12: return u.format12.get_glyph (codepoint, glyph);
768 case 13: return u.format13.get_glyph (codepoint, glyph);
769 case 14:
770 default: return false;
771 }
772 }
773 inline void collect_unicodes (hb_set_t *out) const
774 {
775 switch (u.format) {
776 case 0: u.format0 .collect_unicodes (out); return;
777 case 4: u.format4 .collect_unicodes (out); return;
778 case 6: u.format6 .collect_unicodes (out); return;
779 case 10: u.format10.collect_unicodes (out); return;
780 case 12: u.format12.collect_unicodes (out); return;
781 case 13: u.format13.collect_unicodes (out); return;
782 case 14:
783 default: return;
784 }
785 }
786
787 inline bool sanitize (hb_sanitize_context_t *c) const
788 {
789 TRACE_SANITIZE (this);
790 if (!u.format.sanitize (c)) return_trace (false);
791 switch (u.format) {
792 case 0: return_trace (u.format0 .sanitize (c));
793 case 4: return_trace (u.format4 .sanitize (c));
794 case 6: return_trace (u.format6 .sanitize (c));
795 case 10: return_trace (u.format10.sanitize (c));
796 case 12: return_trace (u.format12.sanitize (c));
797 case 13: return_trace (u.format13.sanitize (c));
798 case 14: return_trace (u.format14.sanitize (c));
799 default:return_trace (true);
800 }
801 }
802
803 public:
804 union {
805 HBUINT16 format; /* Format identifier */
806 CmapSubtableFormat0 format0;
807 CmapSubtableFormat4 format4;
808 CmapSubtableFormat6 format6;
809 CmapSubtableFormat10 format10;
810 CmapSubtableFormat12 format12;
811 CmapSubtableFormat13 format13;
812 CmapSubtableFormat14 format14;
813 } u;
814 public:
815 DEFINE_SIZE_UNION (2, format);
816};
817
818
819struct EncodingRecord
820{
821 inline int cmp (const EncodingRecord &other) const
822 {
823 int ret;
824 ret = platformID.cmp (other.platformID);
825 if (ret) return ret;
826 ret = encodingID.cmp (other.encodingID);
827 if (ret) return ret;
828 return 0;
829 }
830
831 inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
832 {
833 TRACE_SANITIZE (this);
834 return_trace (c->check_struct (this) &&
835 subtable.sanitize (c, base));
836 }
837
838 HBUINT16 platformID; /* Platform ID. */
839 HBUINT16 encodingID; /* Platform-specific encoding ID. */
840 LOffsetTo<CmapSubtable>
841 subtable; /* Byte offset from beginning of table to the subtable for this encoding. */
842 public:
843 DEFINE_SIZE_STATIC (8);
844};
845
846struct cmap
847{
848 static const hb_tag_t tableTag = HB_OT_TAG_cmap;
849
850 struct subset_plan
851 {
852 subset_plan(void)
853 {
854 format4_segments.init();
855 format12_groups.init();
856 }
857
858 ~subset_plan(void)
859 {
860 format4_segments.fini();
861 format12_groups.fini();
862 }
863
864 inline size_t final_size() const
865 {
866 return 4 // header
867 + 8 * 3 // 3 EncodingRecord
868 + CmapSubtableFormat4::get_sub_table_size (this->format4_segments)
869 + CmapSubtableFormat12::get_sub_table_size (this->format12_groups);
870 }
871
872 // Format 4
873 hb_vector_t<CmapSubtableFormat4::segment_plan> format4_segments;
874 // Format 12
875 hb_vector_t<CmapSubtableLongGroup> format12_groups;
876 };
877
878 inline bool sanitize (hb_sanitize_context_t *c) const
879 {
880 TRACE_SANITIZE (this);
881 return_trace (c->check_struct (this) &&
882 likely (version == 0) &&
883 encodingRecord.sanitize (c, this));
884 }
885
886 inline bool _create_plan (const hb_subset_plan_t *plan,
887 subset_plan *cmap_plan) const
888 {
889 if (unlikely( !CmapSubtableFormat4::create_sub_table_plan (plan, &cmap_plan->format4_segments)))
890 return false;
891
892 return CmapSubtableFormat12::create_sub_table_plan (plan, &cmap_plan->format12_groups);
893 }
894
895 inline bool _subset (const hb_subset_plan_t *plan,
896 const subset_plan &cmap_subset_plan,
897 size_t dest_sz,
898 void *dest) const
899 {
900 hb_serialize_context_t c (dest, dest_sz);
901
902 cmap *table = c.start_serialize<cmap> ();
903 if (unlikely (!c.extend_min (*table)))
904 {
905 return false;
906 }
907
908 table->version.set (0);
909
910 if (unlikely (!table->encodingRecord.serialize (&c, /* numTables */ 3)))
911 return false;
912
913 // TODO(grieger): Convert the below to a for loop
914
915 // Format 4, Plat 0 Encoding Record
916 EncodingRecord &format4_plat0_rec = table->encodingRecord[0];
917 format4_plat0_rec.platformID.set (0); // Unicode
918 format4_plat0_rec.encodingID.set (3);
919
920 // Format 4, Plat 3 Encoding Record
921 EncodingRecord &format4_plat3_rec = table->encodingRecord[1];
922 format4_plat3_rec.platformID.set (3); // Windows
923 format4_plat3_rec.encodingID.set (1); // Unicode BMP
924
925 // Format 12 Encoding Record
926 EncodingRecord &format12_rec = table->encodingRecord[2];
927 format12_rec.platformID.set (3); // Windows
928 format12_rec.encodingID.set (10); // Unicode UCS-4
929
930 // Write out format 4 sub table
931 {
932 CmapSubtable &subtable = format4_plat0_rec.subtable.serialize (&c, table);
933 format4_plat3_rec.subtable.set (format4_plat0_rec.subtable);
934 subtable.u.format.set (4);
935
936 CmapSubtableFormat4 &format4 = subtable.u.format4;
937 if (unlikely (!format4.serialize (&c, plan, cmap_subset_plan.format4_segments)))
938 return false;
939 }
940
941 // Write out format 12 sub table.
942 {
943 CmapSubtable &subtable = format12_rec.subtable.serialize (&c, table);
944 subtable.u.format.set (12);
945
946 CmapSubtableFormat12 &format12 = subtable.u.format12;
947 if (unlikely (!format12.serialize (&c, cmap_subset_plan.format12_groups)))
948 return false;
949 }
950
951 c.end_serialize ();
952
953 return true;
954 }
955
956 inline bool subset (hb_subset_plan_t *plan) const
957 {
958 subset_plan cmap_subset_plan;
959
960 if (unlikely (!_create_plan (plan, &cmap_subset_plan)))
961 {
962 DEBUG_MSG(SUBSET, nullptr, "Failed to generate a cmap subsetting plan.");
963 return false;
964 }
965
966 // We now know how big our blob needs to be
967 size_t dest_sz = cmap_subset_plan.final_size();
968 void *dest = malloc (dest_sz);
969 if (unlikely (!dest)) {
970 DEBUG_MSG(SUBSET, nullptr, "Unable to alloc %lu for cmap subset output", (unsigned long) dest_sz);
971 return false;
972 }
973
974 if (unlikely (!_subset (plan, cmap_subset_plan, dest_sz, dest)))
975 {
976 DEBUG_MSG(SUBSET, nullptr, "Failed to perform subsetting of cmap.");
977 free (dest);
978 return false;
979 }
980
981 // all done, write the blob into dest
982 hb_blob_t *cmap_prime = hb_blob_create ((const char *)dest,
983 dest_sz,
984 HB_MEMORY_MODE_READONLY,
985 dest,
986 free);
987 bool result = plan->add_table (HB_OT_TAG_cmap, cmap_prime);
988 hb_blob_destroy (cmap_prime);
989 return result;
990 }
991
992 const CmapSubtable *find_best_subtable (bool *symbol = nullptr) const
993 {
994 if (symbol) *symbol = false;
995
996 const CmapSubtable *subtable;
997
998 /* 32-bit subtables. */
999 if ((subtable = this->find_subtable (3, 10))) return subtable;
1000 if ((subtable = this->find_subtable (0, 6))) return subtable;
1001 if ((subtable = this->find_subtable (0, 4))) return subtable;
1002
1003 /* 16-bit subtables. */
1004 if ((subtable = this->find_subtable (3, 1))) return subtable;
1005 if ((subtable = this->find_subtable (0, 3))) return subtable;
1006 if ((subtable = this->find_subtable (0, 2))) return subtable;
1007 if ((subtable = this->find_subtable (0, 1))) return subtable;
1008 if ((subtable = this->find_subtable (0, 0))) return subtable;
1009
1010 /* Symbol subtable. */
1011 if ((subtable = this->find_subtable (3, 0)))
1012 {
1013 if (symbol) *symbol = true;
1014 return subtable;
1015 }
1016
1017 /* Meh. */
1018 return &Null(CmapSubtable);
1019 }
1020
1021 struct accelerator_t
1022 {
1023 inline void init (hb_face_t *face)
1024 {
1025 this->blob = hb_sanitize_context_t().reference_table<cmap> (face);
1026 const cmap *table = this->blob->as<cmap> ();
1027 const CmapSubtableFormat14 *subtable_uvs = nullptr;
1028 bool symbol;
1029 subtable = table->find_best_subtable (&symbol);
1030
1031 /* UVS subtable. */
1032 if (!subtable_uvs)
1033 {
1034 const CmapSubtable *st = table->find_subtable (0, 5);
1035 if (st && st->u.format == 14)
1036 subtable_uvs = &st->u.format14;
1037 }
1038 /* Meh. */
1039 if (!subtable_uvs) subtable_uvs = &Null(CmapSubtableFormat14);
1040
1041 this->subtable_uvs = subtable_uvs;
1042
1043 this->get_glyph_data = subtable;
1044 if (unlikely (symbol))
1045 {
1046 this->get_glyph_func = get_glyph_from_symbol<CmapSubtable>;
1047 } else {
1048 switch (subtable->u.format) {
1049 /* Accelerate format 4 and format 12. */
1050 default:
1051 this->get_glyph_func = get_glyph_from<CmapSubtable>;
1052 break;
1053 case 12:
1054 this->get_glyph_func = get_glyph_from<CmapSubtableFormat12>;
1055 break;
1056 case 4:
1057 {
1058 this->format4_accel.init (&subtable->u.format4);
1059 this->get_glyph_data = &this->format4_accel;
1060 this->get_glyph_func = this->format4_accel.get_glyph_func;
1061 }
1062 break;
1063 }
1064 }
1065 }
1066
1067 inline void fini (void)
1068 {
1069 hb_blob_destroy (this->blob);
1070 }
1071
1072 inline bool get_nominal_glyph (hb_codepoint_t unicode,
1073 hb_codepoint_t *glyph) const
1074 {
1075 return this->get_glyph_func (this->get_glyph_data, unicode, glyph);
1076 }
1077
1078 inline bool get_variation_glyph (hb_codepoint_t unicode,
1079 hb_codepoint_t variation_selector,
1080 hb_codepoint_t *glyph) const
1081 {
1082 switch (this->subtable_uvs->get_glyph_variant (unicode,
1083 variation_selector,
1084 glyph))
1085 {
1086 case GLYPH_VARIANT_NOT_FOUND: return false;
1087 case GLYPH_VARIANT_FOUND: return true;
1088 case GLYPH_VARIANT_USE_DEFAULT: break;
1089 }
1090
1091 return get_nominal_glyph (unicode, glyph);
1092 }
1093
1094 inline void collect_unicodes (hb_set_t *out) const
1095 {
1096 subtable->collect_unicodes (out);
1097 }
1098 inline void collect_variation_selectors (hb_set_t *out) const
1099 {
1100 subtable_uvs->collect_variation_selectors (out);
1101 }
1102 inline void collect_variation_unicodes (hb_codepoint_t variation_selector,
1103 hb_set_t *out) const
1104 {
1105 subtable_uvs->collect_variation_unicodes (variation_selector, out);
1106 }
1107
1108 protected:
1109 typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj,
1110 hb_codepoint_t codepoint,
1111 hb_codepoint_t *glyph);
1112
1113 template <typename Type>
1114 static inline bool get_glyph_from (const void *obj,
1115 hb_codepoint_t codepoint,
1116 hb_codepoint_t *glyph)
1117 {
1118 const Type *typed_obj = (const Type *) obj;
1119 return typed_obj->get_glyph (codepoint, glyph);
1120 }
1121
1122 template <typename Type>
1123 static inline bool get_glyph_from_symbol (const void *obj,
1124 hb_codepoint_t codepoint,
1125 hb_codepoint_t *glyph)
1126 {
1127 const Type *typed_obj = (const Type *) obj;
1128 if (likely (typed_obj->get_glyph (codepoint, glyph)))
1129 return true;
1130
1131 if (codepoint <= 0x00FFu)
1132 {
1133 /* For symbol-encoded OpenType fonts, we duplicate the
1134 * U+F000..F0FF range at U+0000..U+00FF. That's what
1135 * Windows seems to do, and that's hinted about at:
1136 * https://docs.microsoft.com/en-us/typography/opentype/spec/recom
1137 * under "Non-Standard (Symbol) Fonts". */
1138 return typed_obj->get_glyph (0xF000u + codepoint, glyph);
1139 }
1140
1141 return false;
1142 }
1143
1144 private:
1145 const CmapSubtable *subtable;
1146 const CmapSubtableFormat14 *subtable_uvs;
1147
1148 hb_cmap_get_glyph_func_t get_glyph_func;
1149 const void *get_glyph_data;
1150
1151 CmapSubtableFormat4::accelerator_t format4_accel;
1152
1153 hb_blob_t *blob;
1154 };
1155
1156 protected:
1157
1158 inline const CmapSubtable *find_subtable (unsigned int platform_id,
1159 unsigned int encoding_id) const
1160 {
1161 EncodingRecord key;
1162 key.platformID.set (platform_id);
1163 key.encodingID.set (encoding_id);
1164
1165 int result = encodingRecord.bsearch (key);
1166 if (result == -1 || !encodingRecord[result].subtable)
1167 return nullptr;
1168
1169 return &(this+encodingRecord[result].subtable);
1170 }
1171
1172 protected:
1173 HBUINT16 version; /* Table version number (0). */
1174 SortedArrayOf<EncodingRecord>
1175 encodingRecord; /* Encoding tables. */
1176 public:
1177 DEFINE_SIZE_ARRAY (4, encodingRecord);
1178};
1179
1180struct cmap_accelerator_t : cmap::accelerator_t {};
1181
1182} /* namespace OT */
1183
1184
1185#endif /* HB_OT_CMAP_TABLE_HH */
1186