1 | /* |
2 | * Copyright © 2014 Google, Inc. |
3 | * |
4 | * This is part of HarfBuzz, a text shaping library. |
5 | * |
6 | * Permission is hereby granted, without written agreement and without |
7 | * license or royalty fees, to use, copy, modify, and distribute this |
8 | * software and its documentation for any purpose, provided that the |
9 | * above copyright notice and the following two paragraphs appear in |
10 | * all copies of this software. |
11 | * |
12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | * DAMAGE. |
17 | * |
18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | * |
24 | * Google Author(s): Behdad Esfahbod |
25 | */ |
26 | |
27 | #ifndef HB_OT_CMAP_TABLE_HH |
28 | #define HB_OT_CMAP_TABLE_HH |
29 | |
30 | #include "hb-open-type.hh" |
31 | #include "hb-set.hh" |
32 | |
33 | /* |
34 | * cmap -- Character to Glyph Index Mapping |
35 | * https://docs.microsoft.com/en-us/typography/opentype/spec/cmap |
36 | */ |
37 | #define HB_OT_TAG_cmap HB_TAG('c','m','a','p') |
38 | |
39 | namespace OT { |
40 | |
41 | |
42 | struct CmapSubtableFormat0 |
43 | { |
44 | bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const |
45 | { |
46 | hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0; |
47 | if (!gid) |
48 | return false; |
49 | *glyph = gid; |
50 | return true; |
51 | } |
52 | void collect_unicodes (hb_set_t *out) const |
53 | { |
54 | for (unsigned int i = 0; i < 256; i++) |
55 | if (glyphIdArray[i]) |
56 | out->add (i); |
57 | } |
58 | |
59 | bool sanitize (hb_sanitize_context_t *c) const |
60 | { |
61 | TRACE_SANITIZE (this); |
62 | return_trace (c->check_struct (this)); |
63 | } |
64 | |
65 | protected: |
66 | HBUINT16 format; /* Format number is set to 0. */ |
67 | HBUINT16 length; /* Byte length of this subtable. */ |
68 | HBUINT16 language; /* Ignore. */ |
69 | HBUINT8 glyphIdArray[256];/* An array that maps character |
70 | * code to glyph index values. */ |
71 | public: |
72 | DEFINE_SIZE_STATIC (6 + 256); |
73 | }; |
74 | |
75 | struct CmapSubtableFormat4 |
76 | { |
77 | struct segment_plan |
78 | { |
79 | HBUINT16 start_code; |
80 | HBUINT16 end_code; |
81 | bool use_delta; |
82 | }; |
83 | |
84 | bool serialize (hb_serialize_context_t *c, |
85 | const hb_subset_plan_t *plan, |
86 | const hb_vector_t<segment_plan> &segments) |
87 | { |
88 | TRACE_SERIALIZE (this); |
89 | |
90 | if (unlikely (!c->extend_min (*this))) return_trace (false); |
91 | |
92 | this->format.set (4); |
93 | this->length.set (get_sub_table_size (segments)); |
94 | |
95 | this->segCountX2.set (segments.length * 2); |
96 | this->entrySelector.set (MAX (1u, hb_bit_storage (segments.length)) - 1); |
97 | this->searchRange.set (2 * (1u << this->entrySelector)); |
98 | this->rangeShift.set (segments.length * 2 > this->searchRange |
99 | ? 2 * segments.length - this->searchRange |
100 | : 0); |
101 | |
102 | HBUINT16 *end_count = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.length); |
103 | c->allocate_size<HBUINT16> (HBUINT16::static_size); // 2 bytes of padding. |
104 | HBUINT16 *start_count = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.length); |
105 | HBINT16 *id_delta = c->allocate_size<HBINT16> (HBUINT16::static_size * segments.length); |
106 | HBUINT16 *id_range_offset = c->allocate_size<HBUINT16> (HBUINT16::static_size * segments.length); |
107 | |
108 | if (id_range_offset == nullptr) |
109 | return_trace (false); |
110 | |
111 | for (unsigned int i = 0; i < segments.length; i++) |
112 | { |
113 | end_count[i].set (segments[i].end_code); |
114 | start_count[i].set (segments[i].start_code); |
115 | if (segments[i].use_delta) |
116 | { |
117 | hb_codepoint_t cp = segments[i].start_code; |
118 | hb_codepoint_t start_gid = 0; |
119 | if (unlikely (!plan->new_gid_for_codepoint (cp, &start_gid) && cp != 0xFFFF)) |
120 | return_trace (false); |
121 | id_delta[i].set (start_gid - segments[i].start_code); |
122 | } else { |
123 | id_delta[i].set (0); |
124 | unsigned int num_codepoints = segments[i].end_code - segments[i].start_code + 1; |
125 | HBUINT16 *glyph_id_array = c->allocate_size<HBUINT16> (HBUINT16::static_size * num_codepoints); |
126 | if (glyph_id_array == nullptr) |
127 | return_trace (false); |
128 | // From the cmap spec: |
129 | // |
130 | // id_range_offset[i]/2 |
131 | // + (cp - segments[i].start_code) |
132 | // + (id_range_offset + i) |
133 | // = |
134 | // glyph_id_array + (cp - segments[i].start_code) |
135 | // |
136 | // So, solve for id_range_offset[i]: |
137 | // |
138 | // id_range_offset[i] |
139 | // = |
140 | // 2 * (glyph_id_array - id_range_offset - i) |
141 | id_range_offset[i].set (2 * ( |
142 | glyph_id_array - id_range_offset - i)); |
143 | for (unsigned int j = 0; j < num_codepoints; j++) |
144 | { |
145 | hb_codepoint_t cp = segments[i].start_code + j; |
146 | hb_codepoint_t new_gid; |
147 | if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid))) |
148 | return_trace (false); |
149 | glyph_id_array[j].set (new_gid); |
150 | } |
151 | } |
152 | } |
153 | |
154 | return_trace (true); |
155 | } |
156 | |
157 | static size_t get_sub_table_size (const hb_vector_t<segment_plan> &segments) |
158 | { |
159 | size_t segment_size = 0; |
160 | for (unsigned int i = 0; i < segments.length; i++) |
161 | { |
162 | // Parallel array entries |
163 | segment_size += |
164 | 2 // end count |
165 | + 2 // start count |
166 | + 2 // delta |
167 | + 2; // range offset |
168 | |
169 | if (!segments[i].use_delta) |
170 | // Add bytes for the glyph index array entries for this segment. |
171 | segment_size += (segments[i].end_code - segments[i].start_code + 1) * 2; |
172 | } |
173 | |
174 | return min_size |
175 | + 2 // Padding |
176 | + segment_size; |
177 | } |
178 | |
179 | static bool create_sub_table_plan (const hb_subset_plan_t *plan, |
180 | hb_vector_t<segment_plan> *segments) |
181 | { |
182 | segment_plan *segment = nullptr; |
183 | hb_codepoint_t last_gid = 0; |
184 | |
185 | hb_codepoint_t cp = HB_SET_VALUE_INVALID; |
186 | while (plan->unicodes->next (&cp)) { |
187 | hb_codepoint_t new_gid; |
188 | if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid))) |
189 | { |
190 | DEBUG_MSG(SUBSET, nullptr, "Unable to find new gid for %04x" , cp); |
191 | return false; |
192 | } |
193 | |
194 | /* Stop adding to cmap if we are now outside of unicode BMP. */ |
195 | if (cp > 0xFFFF) break; |
196 | |
197 | if (!segment || |
198 | cp != segment->end_code + 1u) |
199 | { |
200 | segment = segments->push (); |
201 | segment->start_code.set (cp); |
202 | segment->end_code.set (cp); |
203 | segment->use_delta = true; |
204 | } else { |
205 | segment->end_code.set (cp); |
206 | if (last_gid + 1u != new_gid) |
207 | // gid's are not consecutive in this segment so delta |
208 | // cannot be used. |
209 | segment->use_delta = false; |
210 | } |
211 | |
212 | last_gid = new_gid; |
213 | } |
214 | |
215 | // There must be a final entry with end_code == 0xFFFF. Check if we need to add one. |
216 | if (segment == nullptr || segment->end_code != 0xFFFF) |
217 | { |
218 | segment = segments->push (); |
219 | segment->start_code.set (0xFFFF); |
220 | segment->end_code.set (0xFFFF); |
221 | segment->use_delta = true; |
222 | } |
223 | |
224 | return true; |
225 | } |
226 | |
227 | struct accelerator_t |
228 | { |
229 | accelerator_t () {} |
230 | accelerator_t (const CmapSubtableFormat4 *subtable) { init (subtable); } |
231 | ~accelerator_t () { fini (); } |
232 | |
233 | void init (const CmapSubtableFormat4 *subtable) |
234 | { |
235 | segCount = subtable->segCountX2 / 2; |
236 | endCount = subtable->values.arrayZ; |
237 | startCount = endCount + segCount + 1; |
238 | idDelta = startCount + segCount; |
239 | idRangeOffset = idDelta + segCount; |
240 | glyphIdArray = idRangeOffset + segCount; |
241 | glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2; |
242 | } |
243 | void fini () {} |
244 | |
245 | bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const |
246 | { |
247 | /* Custom two-array bsearch. */ |
248 | int min = 0, max = (int) this->segCount - 1; |
249 | const HBUINT16 *startCount = this->startCount; |
250 | const HBUINT16 *endCount = this->endCount; |
251 | unsigned int i; |
252 | while (min <= max) |
253 | { |
254 | int mid = ((unsigned int) min + (unsigned int) max) / 2; |
255 | if (codepoint < startCount[mid]) |
256 | max = mid - 1; |
257 | else if (codepoint > endCount[mid]) |
258 | min = mid + 1; |
259 | else |
260 | { |
261 | i = mid; |
262 | goto found; |
263 | } |
264 | } |
265 | return false; |
266 | |
267 | found: |
268 | hb_codepoint_t gid; |
269 | unsigned int rangeOffset = this->idRangeOffset[i]; |
270 | if (rangeOffset == 0) |
271 | gid = codepoint + this->idDelta[i]; |
272 | else |
273 | { |
274 | /* Somebody has been smoking... */ |
275 | unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount; |
276 | if (unlikely (index >= this->glyphIdArrayLength)) |
277 | return false; |
278 | gid = this->glyphIdArray[index]; |
279 | if (unlikely (!gid)) |
280 | return false; |
281 | gid += this->idDelta[i]; |
282 | } |
283 | gid &= 0xFFFFu; |
284 | if (!gid) |
285 | return false; |
286 | *glyph = gid; |
287 | return true; |
288 | } |
289 | static bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph) |
290 | { |
291 | return ((const accelerator_t *) obj)->get_glyph (codepoint, glyph); |
292 | } |
293 | void collect_unicodes (hb_set_t *out) const |
294 | { |
295 | unsigned int count = this->segCount; |
296 | if (count && this->startCount[count - 1] == 0xFFFFu) |
297 | count--; /* Skip sentinel segment. */ |
298 | for (unsigned int i = 0; i < count; i++) |
299 | { |
300 | unsigned int rangeOffset = this->idRangeOffset[i]; |
301 | if (rangeOffset == 0) |
302 | out->add_range (this->startCount[i], this->endCount[i]); |
303 | else |
304 | { |
305 | for (hb_codepoint_t codepoint = this->startCount[i]; |
306 | codepoint <= this->endCount[i]; |
307 | codepoint++) |
308 | { |
309 | unsigned int index = rangeOffset / 2 + (codepoint - this->startCount[i]) + i - this->segCount; |
310 | if (unlikely (index >= this->glyphIdArrayLength)) |
311 | break; |
312 | hb_codepoint_t gid = this->glyphIdArray[index]; |
313 | if (unlikely (!gid)) |
314 | continue; |
315 | out->add (codepoint); |
316 | } |
317 | } |
318 | } |
319 | } |
320 | |
321 | const HBUINT16 *endCount; |
322 | const HBUINT16 *startCount; |
323 | const HBUINT16 *idDelta; |
324 | const HBUINT16 *idRangeOffset; |
325 | const HBUINT16 *glyphIdArray; |
326 | unsigned int segCount; |
327 | unsigned int glyphIdArrayLength; |
328 | }; |
329 | |
330 | bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const |
331 | { |
332 | accelerator_t accel (this); |
333 | return accel.get_glyph_func (&accel, codepoint, glyph); |
334 | } |
335 | void collect_unicodes (hb_set_t *out) const |
336 | { |
337 | accelerator_t accel (this); |
338 | accel.collect_unicodes (out); |
339 | } |
340 | |
341 | bool sanitize (hb_sanitize_context_t *c) const |
342 | { |
343 | TRACE_SANITIZE (this); |
344 | if (unlikely (!c->check_struct (this))) |
345 | return_trace (false); |
346 | |
347 | if (unlikely (!c->check_range (this, length))) |
348 | { |
349 | /* Some broken fonts have too long of a "length" value. |
350 | * If that is the case, just change the value to truncate |
351 | * the subtable at the end of the blob. */ |
352 | uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535, |
353 | (uintptr_t) (c->end - |
354 | (char *) this)); |
355 | if (!c->try_set (&length, new_length)) |
356 | return_trace (false); |
357 | } |
358 | |
359 | return_trace (16 + 4 * (unsigned int) segCountX2 <= length); |
360 | } |
361 | |
362 | |
363 | |
364 | protected: |
365 | HBUINT16 format; /* Format number is set to 4. */ |
366 | HBUINT16 length; /* This is the length in bytes of the |
367 | * subtable. */ |
368 | HBUINT16 language; /* Ignore. */ |
369 | HBUINT16 segCountX2; /* 2 x segCount. */ |
370 | HBUINT16 searchRange; /* 2 * (2**floor(log2(segCount))) */ |
371 | HBUINT16 entrySelector; /* log2(searchRange/2) */ |
372 | HBUINT16 rangeShift; /* 2 x segCount - searchRange */ |
373 | |
374 | UnsizedArrayOf<HBUINT16> |
375 | values; |
376 | #if 0 |
377 | HBUINT16 endCount[segCount]; /* End characterCode for each segment, |
378 | * last=0xFFFFu. */ |
379 | HBUINT16 reservedPad; /* Set to 0. */ |
380 | HBUINT16 startCount[segCount]; /* Start character code for each segment. */ |
381 | HBINT16 idDelta[segCount]; /* Delta for all character codes in segment. */ |
382 | HBUINT16 idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */ |
383 | UnsizedArrayOf<HBUINT16> |
384 | glyphIdArray; /* Glyph index array (arbitrary length) */ |
385 | #endif |
386 | |
387 | public: |
388 | DEFINE_SIZE_ARRAY (14, values); |
389 | }; |
390 | |
391 | struct CmapSubtableLongGroup |
392 | { |
393 | friend struct CmapSubtableFormat12; |
394 | friend struct CmapSubtableFormat13; |
395 | template<typename U> |
396 | friend struct CmapSubtableLongSegmented; |
397 | friend struct cmap; |
398 | |
399 | int cmp (hb_codepoint_t codepoint) const |
400 | { |
401 | if (codepoint < startCharCode) return -1; |
402 | if (codepoint > endCharCode) return +1; |
403 | return 0; |
404 | } |
405 | |
406 | bool sanitize (hb_sanitize_context_t *c) const |
407 | { |
408 | TRACE_SANITIZE (this); |
409 | return_trace (c->check_struct (this)); |
410 | } |
411 | |
412 | private: |
413 | HBUINT32 startCharCode; /* First character code in this group. */ |
414 | HBUINT32 endCharCode; /* Last character code in this group. */ |
415 | HBUINT32 glyphID; /* Glyph index; interpretation depends on |
416 | * subtable format. */ |
417 | public: |
418 | DEFINE_SIZE_STATIC (12); |
419 | }; |
420 | DECLARE_NULL_NAMESPACE_BYTES (OT, CmapSubtableLongGroup); |
421 | |
422 | template <typename UINT> |
423 | struct CmapSubtableTrimmed |
424 | { |
425 | bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const |
426 | { |
427 | /* Rely on our implicit array bound-checking. */ |
428 | hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode]; |
429 | if (!gid) |
430 | return false; |
431 | *glyph = gid; |
432 | return true; |
433 | } |
434 | void collect_unicodes (hb_set_t *out) const |
435 | { |
436 | hb_codepoint_t start = startCharCode; |
437 | unsigned int count = glyphIdArray.len; |
438 | for (unsigned int i = 0; i < count; i++) |
439 | if (glyphIdArray[i]) |
440 | out->add (start + i); |
441 | } |
442 | |
443 | bool sanitize (hb_sanitize_context_t *c) const |
444 | { |
445 | TRACE_SANITIZE (this); |
446 | return_trace (c->check_struct (this) && glyphIdArray.sanitize (c)); |
447 | } |
448 | |
449 | protected: |
450 | UINT formatReserved; /* Subtable format and (maybe) padding. */ |
451 | UINT length; /* Byte length of this subtable. */ |
452 | UINT language; /* Ignore. */ |
453 | UINT startCharCode; /* First character code covered. */ |
454 | ArrayOf<GlyphID, UINT> |
455 | glyphIdArray; /* Array of glyph index values for character |
456 | * codes in the range. */ |
457 | public: |
458 | DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray); |
459 | }; |
460 | |
461 | struct CmapSubtableFormat6 : CmapSubtableTrimmed<HBUINT16> {}; |
462 | struct CmapSubtableFormat10 : CmapSubtableTrimmed<HBUINT32 > {}; |
463 | |
464 | template <typename T> |
465 | struct CmapSubtableLongSegmented |
466 | { |
467 | friend struct cmap; |
468 | |
469 | bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const |
470 | { |
471 | hb_codepoint_t gid = T::group_get_glyph (groups.bsearch (codepoint), codepoint); |
472 | if (!gid) |
473 | return false; |
474 | *glyph = gid; |
475 | return true; |
476 | } |
477 | |
478 | void collect_unicodes (hb_set_t *out) const |
479 | { |
480 | for (unsigned int i = 0; i < this->groups.len; i++) { |
481 | out->add_range (this->groups[i].startCharCode, |
482 | MIN ((hb_codepoint_t) this->groups[i].endCharCode, |
483 | (hb_codepoint_t) HB_UNICODE_MAX)); |
484 | } |
485 | } |
486 | |
487 | bool sanitize (hb_sanitize_context_t *c) const |
488 | { |
489 | TRACE_SANITIZE (this); |
490 | return_trace (c->check_struct (this) && groups.sanitize (c)); |
491 | } |
492 | |
493 | bool serialize (hb_serialize_context_t *c, |
494 | const hb_vector_t<CmapSubtableLongGroup> &group_data) |
495 | { |
496 | TRACE_SERIALIZE (this); |
497 | if (unlikely (!c->extend_min (*this))) return_trace (false); |
498 | if (unlikely (!groups.serialize (c, group_data.as_array ()))) return_trace (false); |
499 | return true; |
500 | } |
501 | |
502 | protected: |
503 | HBUINT16 format; /* Subtable format; set to 12. */ |
504 | HBUINT16 reserved; /* Reserved; set to 0. */ |
505 | HBUINT32 length; /* Byte length of this subtable. */ |
506 | HBUINT32 language; /* Ignore. */ |
507 | SortedArrayOf<CmapSubtableLongGroup, HBUINT32> |
508 | groups; /* Groupings. */ |
509 | public: |
510 | DEFINE_SIZE_ARRAY (16, groups); |
511 | }; |
512 | |
513 | struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12> |
514 | { |
515 | static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group, |
516 | hb_codepoint_t u) |
517 | { return likely (group.startCharCode <= group.endCharCode) ? |
518 | group.glyphID + (u - group.startCharCode) : 0; } |
519 | |
520 | |
521 | bool serialize (hb_serialize_context_t *c, |
522 | const hb_vector_t<CmapSubtableLongGroup> &groups) |
523 | { |
524 | if (unlikely (!c->extend_min (*this))) return false; |
525 | |
526 | this->format.set (12); |
527 | this->reserved.set (0); |
528 | this->length.set (get_sub_table_size (groups)); |
529 | |
530 | return CmapSubtableLongSegmented<CmapSubtableFormat12>::serialize (c, groups); |
531 | } |
532 | |
533 | static size_t get_sub_table_size (const hb_vector_t<CmapSubtableLongGroup> &groups) |
534 | { |
535 | return 16 + 12 * groups.length; |
536 | } |
537 | |
538 | static bool create_sub_table_plan (const hb_subset_plan_t *plan, |
539 | hb_vector_t<CmapSubtableLongGroup> *groups) |
540 | { |
541 | CmapSubtableLongGroup *group = nullptr; |
542 | |
543 | hb_codepoint_t cp = HB_SET_VALUE_INVALID; |
544 | while (plan->unicodes->next (&cp)) { |
545 | hb_codepoint_t new_gid; |
546 | if (unlikely (!plan->new_gid_for_codepoint (cp, &new_gid))) |
547 | { |
548 | DEBUG_MSG(SUBSET, nullptr, "Unable to find new gid for %04x" , cp); |
549 | return false; |
550 | } |
551 | |
552 | if (!group || !_is_gid_consecutive (group, cp, new_gid)) |
553 | { |
554 | group = groups->push (); |
555 | group->startCharCode.set (cp); |
556 | group->endCharCode.set (cp); |
557 | group->glyphID.set (new_gid); |
558 | } |
559 | else group->endCharCode.set (cp); |
560 | } |
561 | |
562 | DEBUG_MSG(SUBSET, nullptr, "cmap" ); |
563 | for (unsigned int i = 0; i < groups->length; i++) { |
564 | CmapSubtableLongGroup& group = (*groups)[i]; |
565 | DEBUG_MSG(SUBSET, nullptr, " %d: U+%04X-U+%04X, gid %d-%d" , i, (uint32_t) group.startCharCode, (uint32_t) group.endCharCode, (uint32_t) group.glyphID, (uint32_t) group.glyphID + ((uint32_t) group.endCharCode - (uint32_t) group.startCharCode)); |
566 | } |
567 | |
568 | return true; |
569 | } |
570 | |
571 | private: |
572 | static bool _is_gid_consecutive (CmapSubtableLongGroup *group, |
573 | hb_codepoint_t cp, |
574 | hb_codepoint_t new_gid) |
575 | { |
576 | return (cp - 1 == group->endCharCode) && |
577 | new_gid == group->glyphID + (cp - group->startCharCode); |
578 | } |
579 | |
580 | }; |
581 | |
582 | struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13> |
583 | { |
584 | static hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group, |
585 | hb_codepoint_t u HB_UNUSED) |
586 | { return group.glyphID; } |
587 | }; |
588 | |
589 | typedef enum |
590 | { |
591 | GLYPH_VARIANT_NOT_FOUND = 0, |
592 | GLYPH_VARIANT_FOUND = 1, |
593 | GLYPH_VARIANT_USE_DEFAULT = 2 |
594 | } glyph_variant_t; |
595 | |
596 | struct UnicodeValueRange |
597 | { |
598 | int cmp (const hb_codepoint_t &codepoint) const |
599 | { |
600 | if (codepoint < startUnicodeValue) return -1; |
601 | if (codepoint > startUnicodeValue + additionalCount) return +1; |
602 | return 0; |
603 | } |
604 | |
605 | bool sanitize (hb_sanitize_context_t *c) const |
606 | { |
607 | TRACE_SANITIZE (this); |
608 | return_trace (c->check_struct (this)); |
609 | } |
610 | |
611 | HBUINT24 startUnicodeValue; /* First value in this range. */ |
612 | HBUINT8 additionalCount; /* Number of additional values in this |
613 | * range. */ |
614 | public: |
615 | DEFINE_SIZE_STATIC (4); |
616 | }; |
617 | |
618 | struct DefaultUVS : SortedArrayOf<UnicodeValueRange, HBUINT32> |
619 | { |
620 | void collect_unicodes (hb_set_t *out) const |
621 | { |
622 | unsigned int count = len; |
623 | for (unsigned int i = 0; i < count; i++) |
624 | { |
625 | hb_codepoint_t first = arrayZ[i].startUnicodeValue; |
626 | hb_codepoint_t last = MIN ((hb_codepoint_t) (first + arrayZ[i].additionalCount), |
627 | (hb_codepoint_t) HB_UNICODE_MAX); |
628 | out->add_range (first, last); |
629 | } |
630 | } |
631 | |
632 | public: |
633 | DEFINE_SIZE_ARRAY (4, *this); |
634 | }; |
635 | |
636 | struct UVSMapping |
637 | { |
638 | int cmp (const hb_codepoint_t &codepoint) const |
639 | { |
640 | return unicodeValue.cmp (codepoint); |
641 | } |
642 | |
643 | bool sanitize (hb_sanitize_context_t *c) const |
644 | { |
645 | TRACE_SANITIZE (this); |
646 | return_trace (c->check_struct (this)); |
647 | } |
648 | |
649 | HBUINT24 unicodeValue; /* Base Unicode value of the UVS */ |
650 | GlyphID glyphID; /* Glyph ID of the UVS */ |
651 | public: |
652 | DEFINE_SIZE_STATIC (5); |
653 | }; |
654 | |
655 | struct NonDefaultUVS : SortedArrayOf<UVSMapping, HBUINT32> |
656 | { |
657 | void collect_unicodes (hb_set_t *out) const |
658 | { |
659 | unsigned int count = len; |
660 | for (unsigned int i = 0; i < count; i++) |
661 | out->add (arrayZ[i].glyphID); |
662 | } |
663 | |
664 | public: |
665 | DEFINE_SIZE_ARRAY (4, *this); |
666 | }; |
667 | |
668 | struct VariationSelectorRecord |
669 | { |
670 | glyph_variant_t get_glyph (hb_codepoint_t codepoint, |
671 | hb_codepoint_t *glyph, |
672 | const void *base) const |
673 | { |
674 | if ((base+defaultUVS).bfind (codepoint)) |
675 | return GLYPH_VARIANT_USE_DEFAULT; |
676 | const UVSMapping &nonDefault = (base+nonDefaultUVS).bsearch (codepoint); |
677 | if (nonDefault.glyphID) |
678 | { |
679 | *glyph = nonDefault.glyphID; |
680 | return GLYPH_VARIANT_FOUND; |
681 | } |
682 | return GLYPH_VARIANT_NOT_FOUND; |
683 | } |
684 | |
685 | void collect_unicodes (hb_set_t *out, const void *base) const |
686 | { |
687 | (base+defaultUVS).collect_unicodes (out); |
688 | (base+nonDefaultUVS).collect_unicodes (out); |
689 | } |
690 | |
691 | int cmp (const hb_codepoint_t &variation_selector) const |
692 | { |
693 | return varSelector.cmp (variation_selector); |
694 | } |
695 | |
696 | bool sanitize (hb_sanitize_context_t *c, const void *base) const |
697 | { |
698 | TRACE_SANITIZE (this); |
699 | return_trace (c->check_struct (this) && |
700 | defaultUVS.sanitize (c, base) && |
701 | nonDefaultUVS.sanitize (c, base)); |
702 | } |
703 | |
704 | HBUINT24 varSelector; /* Variation selector. */ |
705 | LOffsetTo<DefaultUVS> |
706 | defaultUVS; /* Offset to Default UVS Table. May be 0. */ |
707 | LOffsetTo<NonDefaultUVS> |
708 | nonDefaultUVS; /* Offset to Non-Default UVS Table. May be 0. */ |
709 | public: |
710 | DEFINE_SIZE_STATIC (11); |
711 | }; |
712 | |
713 | struct CmapSubtableFormat14 |
714 | { |
715 | glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint, |
716 | hb_codepoint_t variation_selector, |
717 | hb_codepoint_t *glyph) const |
718 | { |
719 | return record.bsearch (variation_selector).get_glyph (codepoint, glyph, this); |
720 | } |
721 | |
722 | void collect_variation_selectors (hb_set_t *out) const |
723 | { |
724 | unsigned int count = record.len; |
725 | for (unsigned int i = 0; i < count; i++) |
726 | out->add (record.arrayZ[i].varSelector); |
727 | } |
728 | void collect_variation_unicodes (hb_codepoint_t variation_selector, |
729 | hb_set_t *out) const |
730 | { |
731 | record.bsearch (variation_selector).collect_unicodes (out, this); |
732 | } |
733 | |
734 | bool sanitize (hb_sanitize_context_t *c) const |
735 | { |
736 | TRACE_SANITIZE (this); |
737 | return_trace (c->check_struct (this) && |
738 | record.sanitize (c, this)); |
739 | } |
740 | |
741 | protected: |
742 | HBUINT16 format; /* Format number is set to 14. */ |
743 | HBUINT32 length; /* Byte length of this subtable. */ |
744 | SortedArrayOf<VariationSelectorRecord, HBUINT32> |
745 | record; /* Variation selector records; sorted |
746 | * in increasing order of `varSelector'. */ |
747 | public: |
748 | DEFINE_SIZE_ARRAY (10, record); |
749 | }; |
750 | |
751 | struct CmapSubtable |
752 | { |
753 | /* Note: We intentionally do NOT implement subtable formats 2 and 8. */ |
754 | |
755 | bool get_glyph (hb_codepoint_t codepoint, |
756 | hb_codepoint_t *glyph) const |
757 | { |
758 | switch (u.format) { |
759 | case 0: return u.format0 .get_glyph (codepoint, glyph); |
760 | case 4: return u.format4 .get_glyph (codepoint, glyph); |
761 | case 6: return u.format6 .get_glyph (codepoint, glyph); |
762 | case 10: return u.format10.get_glyph (codepoint, glyph); |
763 | case 12: return u.format12.get_glyph (codepoint, glyph); |
764 | case 13: return u.format13.get_glyph (codepoint, glyph); |
765 | case 14: |
766 | default: return false; |
767 | } |
768 | } |
769 | void collect_unicodes (hb_set_t *out) const |
770 | { |
771 | switch (u.format) { |
772 | case 0: u.format0 .collect_unicodes (out); return; |
773 | case 4: u.format4 .collect_unicodes (out); return; |
774 | case 6: u.format6 .collect_unicodes (out); return; |
775 | case 10: u.format10.collect_unicodes (out); return; |
776 | case 12: u.format12.collect_unicodes (out); return; |
777 | case 13: u.format13.collect_unicodes (out); return; |
778 | case 14: |
779 | default: return; |
780 | } |
781 | } |
782 | |
783 | bool sanitize (hb_sanitize_context_t *c) const |
784 | { |
785 | TRACE_SANITIZE (this); |
786 | if (!u.format.sanitize (c)) return_trace (false); |
787 | switch (u.format) { |
788 | case 0: return_trace (u.format0 .sanitize (c)); |
789 | case 4: return_trace (u.format4 .sanitize (c)); |
790 | case 6: return_trace (u.format6 .sanitize (c)); |
791 | case 10: return_trace (u.format10.sanitize (c)); |
792 | case 12: return_trace (u.format12.sanitize (c)); |
793 | case 13: return_trace (u.format13.sanitize (c)); |
794 | case 14: return_trace (u.format14.sanitize (c)); |
795 | default:return_trace (true); |
796 | } |
797 | } |
798 | |
799 | public: |
800 | union { |
801 | HBUINT16 format; /* Format identifier */ |
802 | CmapSubtableFormat0 format0; |
803 | CmapSubtableFormat4 format4; |
804 | CmapSubtableFormat6 format6; |
805 | CmapSubtableFormat10 format10; |
806 | CmapSubtableFormat12 format12; |
807 | CmapSubtableFormat13 format13; |
808 | CmapSubtableFormat14 format14; |
809 | } u; |
810 | public: |
811 | DEFINE_SIZE_UNION (2, format); |
812 | }; |
813 | |
814 | |
815 | struct EncodingRecord |
816 | { |
817 | int cmp (const EncodingRecord &other) const |
818 | { |
819 | int ret; |
820 | ret = platformID.cmp (other.platformID); |
821 | if (ret) return ret; |
822 | ret = encodingID.cmp (other.encodingID); |
823 | if (ret) return ret; |
824 | return 0; |
825 | } |
826 | |
827 | bool sanitize (hb_sanitize_context_t *c, const void *base) const |
828 | { |
829 | TRACE_SANITIZE (this); |
830 | return_trace (c->check_struct (this) && |
831 | subtable.sanitize (c, base)); |
832 | } |
833 | |
834 | HBUINT16 platformID; /* Platform ID. */ |
835 | HBUINT16 encodingID; /* Platform-specific encoding ID. */ |
836 | LOffsetTo<CmapSubtable> |
837 | subtable; /* Byte offset from beginning of table to the subtable for this encoding. */ |
838 | public: |
839 | DEFINE_SIZE_STATIC (8); |
840 | }; |
841 | |
842 | struct cmap |
843 | { |
844 | static constexpr hb_tag_t tableTag = HB_OT_TAG_cmap; |
845 | |
846 | struct subset_plan |
847 | { |
848 | size_t final_size () const |
849 | { |
850 | return 4 // header |
851 | + 8 * 3 // 3 EncodingRecord |
852 | + CmapSubtableFormat4::get_sub_table_size (this->format4_segments) |
853 | + CmapSubtableFormat12::get_sub_table_size (this->format12_groups); |
854 | } |
855 | |
856 | hb_vector_t<CmapSubtableFormat4::segment_plan> format4_segments; |
857 | hb_vector_t<CmapSubtableLongGroup> format12_groups; |
858 | }; |
859 | |
860 | bool _create_plan (const hb_subset_plan_t *plan, |
861 | subset_plan *cmap_plan) const |
862 | { |
863 | if (unlikely (!CmapSubtableFormat4::create_sub_table_plan (plan, &cmap_plan->format4_segments))) |
864 | return false; |
865 | |
866 | return CmapSubtableFormat12::create_sub_table_plan (plan, &cmap_plan->format12_groups); |
867 | } |
868 | |
869 | bool _subset (const hb_subset_plan_t *plan, |
870 | const subset_plan &cmap_subset_plan, |
871 | size_t dest_sz, |
872 | void *dest) const |
873 | { |
874 | hb_serialize_context_t c (dest, dest_sz); |
875 | |
876 | cmap *table = c.start_serialize<cmap> (); |
877 | if (unlikely (!c.extend_min (*table))) |
878 | { |
879 | return false; |
880 | } |
881 | |
882 | table->version.set (0); |
883 | |
884 | if (unlikely (!table->encodingRecord.serialize (&c, /* numTables */ 3))) |
885 | return false; |
886 | |
887 | // TODO(grieger): Convert the below to a for loop |
888 | |
889 | // Format 4, Plat 0 Encoding Record |
890 | EncodingRecord &format4_plat0_rec = table->encodingRecord[0]; |
891 | format4_plat0_rec.platformID.set (0); // Unicode |
892 | format4_plat0_rec.encodingID.set (3); |
893 | |
894 | // Format 4, Plat 3 Encoding Record |
895 | EncodingRecord &format4_plat3_rec = table->encodingRecord[1]; |
896 | format4_plat3_rec.platformID.set (3); // Windows |
897 | format4_plat3_rec.encodingID.set (1); // Unicode BMP |
898 | |
899 | // Format 12 Encoding Record |
900 | EncodingRecord &format12_rec = table->encodingRecord[2]; |
901 | format12_rec.platformID.set (3); // Windows |
902 | format12_rec.encodingID.set (10); // Unicode UCS-4 |
903 | |
904 | // Write out format 4 sub table |
905 | { |
906 | CmapSubtable &subtable = format4_plat0_rec.subtable.serialize (&c, table); |
907 | format4_plat3_rec.subtable.set (format4_plat0_rec.subtable); |
908 | subtable.u.format.set (4); |
909 | |
910 | CmapSubtableFormat4 &format4 = subtable.u.format4; |
911 | if (unlikely (!format4.serialize (&c, plan, cmap_subset_plan.format4_segments))) |
912 | return false; |
913 | } |
914 | |
915 | // Write out format 12 sub table. |
916 | { |
917 | CmapSubtable &subtable = format12_rec.subtable.serialize (&c, table); |
918 | subtable.u.format.set (12); |
919 | |
920 | CmapSubtableFormat12 &format12 = subtable.u.format12; |
921 | if (unlikely (!format12.serialize (&c, cmap_subset_plan.format12_groups))) |
922 | return false; |
923 | } |
924 | |
925 | c.end_serialize (); |
926 | |
927 | return true; |
928 | } |
929 | |
930 | bool subset (hb_subset_plan_t *plan) const |
931 | { |
932 | subset_plan cmap_subset_plan; |
933 | |
934 | if (unlikely (!_create_plan (plan, &cmap_subset_plan))) |
935 | { |
936 | DEBUG_MSG(SUBSET, nullptr, "Failed to generate a cmap subsetting plan." ); |
937 | return false; |
938 | } |
939 | |
940 | // We now know how big our blob needs to be |
941 | size_t dest_sz = cmap_subset_plan.final_size (); |
942 | void *dest = malloc (dest_sz); |
943 | if (unlikely (!dest)) { |
944 | DEBUG_MSG(SUBSET, nullptr, "Unable to alloc %lu for cmap subset output" , (unsigned long) dest_sz); |
945 | return false; |
946 | } |
947 | |
948 | if (unlikely (!_subset (plan, cmap_subset_plan, dest_sz, dest))) |
949 | { |
950 | DEBUG_MSG(SUBSET, nullptr, "Failed to perform subsetting of cmap." ); |
951 | free (dest); |
952 | return false; |
953 | } |
954 | |
955 | // all done, write the blob into dest |
956 | hb_blob_t *cmap_prime = hb_blob_create ((const char *) dest, |
957 | dest_sz, |
958 | HB_MEMORY_MODE_READONLY, |
959 | dest, |
960 | free); |
961 | bool result = plan->add_table (HB_OT_TAG_cmap, cmap_prime); |
962 | hb_blob_destroy (cmap_prime); |
963 | return result; |
964 | } |
965 | |
966 | const CmapSubtable *find_best_subtable (bool *symbol = nullptr) const |
967 | { |
968 | if (symbol) *symbol = false; |
969 | |
970 | const CmapSubtable *subtable; |
971 | |
972 | /* 32-bit subtables. */ |
973 | if ((subtable = this->find_subtable (3, 10))) return subtable; |
974 | if ((subtable = this->find_subtable (0, 6))) return subtable; |
975 | if ((subtable = this->find_subtable (0, 4))) return subtable; |
976 | |
977 | /* 16-bit subtables. */ |
978 | if ((subtable = this->find_subtable (3, 1))) return subtable; |
979 | if ((subtable = this->find_subtable (0, 3))) return subtable; |
980 | if ((subtable = this->find_subtable (0, 2))) return subtable; |
981 | if ((subtable = this->find_subtable (0, 1))) return subtable; |
982 | if ((subtable = this->find_subtable (0, 0))) return subtable; |
983 | |
984 | /* Symbol subtable. */ |
985 | if ((subtable = this->find_subtable (3, 0))) |
986 | { |
987 | if (symbol) *symbol = true; |
988 | return subtable; |
989 | } |
990 | |
991 | /* Meh. */ |
992 | return &Null (CmapSubtable); |
993 | } |
994 | |
995 | struct accelerator_t |
996 | { |
997 | void init (hb_face_t *face) |
998 | { |
999 | this->table = hb_sanitize_context_t ().reference_table<cmap> (face); |
1000 | bool symbol; |
1001 | this->subtable = table->find_best_subtable (&symbol); |
1002 | this->subtable_uvs = &Null (CmapSubtableFormat14); |
1003 | { |
1004 | const CmapSubtable *st = table->find_subtable (0, 5); |
1005 | if (st && st->u.format == 14) |
1006 | subtable_uvs = &st->u.format14; |
1007 | } |
1008 | |
1009 | this->get_glyph_data = subtable; |
1010 | if (unlikely (symbol)) |
1011 | { |
1012 | this->get_glyph_funcZ = get_glyph_from_symbol<CmapSubtable>; |
1013 | } else { |
1014 | switch (subtable->u.format) { |
1015 | /* Accelerate format 4 and format 12. */ |
1016 | default: |
1017 | this->get_glyph_funcZ = get_glyph_from<CmapSubtable>; |
1018 | break; |
1019 | case 12: |
1020 | this->get_glyph_funcZ = get_glyph_from<CmapSubtableFormat12>; |
1021 | break; |
1022 | case 4: |
1023 | { |
1024 | this->format4_accel.init (&subtable->u.format4); |
1025 | this->get_glyph_data = &this->format4_accel; |
1026 | this->get_glyph_funcZ = this->format4_accel.get_glyph_func; |
1027 | } |
1028 | break; |
1029 | } |
1030 | } |
1031 | } |
1032 | |
1033 | void fini () { this->table.destroy (); } |
1034 | |
1035 | bool get_nominal_glyph (hb_codepoint_t unicode, |
1036 | hb_codepoint_t *glyph) const |
1037 | { |
1038 | if (unlikely (!this->get_glyph_funcZ)) return false; |
1039 | return this->get_glyph_funcZ (this->get_glyph_data, unicode, glyph); |
1040 | } |
1041 | unsigned int get_nominal_glyphs (unsigned int count, |
1042 | const hb_codepoint_t *first_unicode, |
1043 | unsigned int unicode_stride, |
1044 | hb_codepoint_t *first_glyph, |
1045 | unsigned int glyph_stride) const |
1046 | { |
1047 | if (unlikely (!this->get_glyph_funcZ)) return 0; |
1048 | |
1049 | hb_cmap_get_glyph_func_t get_glyph_funcZ = this->get_glyph_funcZ; |
1050 | const void *get_glyph_data = this->get_glyph_data; |
1051 | |
1052 | unsigned int done; |
1053 | for (done = 0; |
1054 | done < count && get_glyph_funcZ (get_glyph_data, *first_unicode, first_glyph); |
1055 | done++) |
1056 | { |
1057 | first_unicode = &StructAtOffsetUnaligned<hb_codepoint_t> (first_unicode, unicode_stride); |
1058 | first_glyph = &StructAtOffsetUnaligned<hb_codepoint_t> (first_glyph, glyph_stride); |
1059 | } |
1060 | return done; |
1061 | } |
1062 | |
1063 | bool get_variation_glyph (hb_codepoint_t unicode, |
1064 | hb_codepoint_t variation_selector, |
1065 | hb_codepoint_t *glyph) const |
1066 | { |
1067 | switch (this->subtable_uvs->get_glyph_variant (unicode, |
1068 | variation_selector, |
1069 | glyph)) |
1070 | { |
1071 | case GLYPH_VARIANT_NOT_FOUND: return false; |
1072 | case GLYPH_VARIANT_FOUND: return true; |
1073 | case GLYPH_VARIANT_USE_DEFAULT: break; |
1074 | } |
1075 | |
1076 | return get_nominal_glyph (unicode, glyph); |
1077 | } |
1078 | |
1079 | void collect_unicodes (hb_set_t *out) const |
1080 | { |
1081 | subtable->collect_unicodes (out); |
1082 | } |
1083 | void collect_variation_selectors (hb_set_t *out) const |
1084 | { |
1085 | subtable_uvs->collect_variation_selectors (out); |
1086 | } |
1087 | void collect_variation_unicodes (hb_codepoint_t variation_selector, |
1088 | hb_set_t *out) const |
1089 | { |
1090 | subtable_uvs->collect_variation_unicodes (variation_selector, out); |
1091 | } |
1092 | |
1093 | protected: |
1094 | typedef bool (*hb_cmap_get_glyph_func_t) (const void *obj, |
1095 | hb_codepoint_t codepoint, |
1096 | hb_codepoint_t *glyph); |
1097 | |
1098 | template <typename Type> |
1099 | static bool get_glyph_from (const void *obj, |
1100 | hb_codepoint_t codepoint, |
1101 | hb_codepoint_t *glyph) |
1102 | { |
1103 | const Type *typed_obj = (const Type *) obj; |
1104 | return typed_obj->get_glyph (codepoint, glyph); |
1105 | } |
1106 | |
1107 | template <typename Type> |
1108 | static bool get_glyph_from_symbol (const void *obj, |
1109 | hb_codepoint_t codepoint, |
1110 | hb_codepoint_t *glyph) |
1111 | { |
1112 | const Type *typed_obj = (const Type *) obj; |
1113 | if (likely (typed_obj->get_glyph (codepoint, glyph))) |
1114 | return true; |
1115 | |
1116 | if (codepoint <= 0x00FFu) |
1117 | { |
1118 | /* For symbol-encoded OpenType fonts, we duplicate the |
1119 | * U+F000..F0FF range at U+0000..U+00FF. That's what |
1120 | * Windows seems to do, and that's hinted about at: |
1121 | * https://docs.microsoft.com/en-us/typography/opentype/spec/recom |
1122 | * under "Non-Standard (Symbol) Fonts". */ |
1123 | return typed_obj->get_glyph (0xF000u + codepoint, glyph); |
1124 | } |
1125 | |
1126 | return false; |
1127 | } |
1128 | |
1129 | private: |
1130 | hb_nonnull_ptr_t<const CmapSubtable> subtable; |
1131 | hb_nonnull_ptr_t<const CmapSubtableFormat14> subtable_uvs; |
1132 | |
1133 | hb_cmap_get_glyph_func_t get_glyph_funcZ; |
1134 | const void *get_glyph_data; |
1135 | |
1136 | CmapSubtableFormat4::accelerator_t format4_accel; |
1137 | |
1138 | hb_blob_ptr_t<cmap> table; |
1139 | }; |
1140 | |
1141 | protected: |
1142 | |
1143 | const CmapSubtable *find_subtable (unsigned int platform_id, |
1144 | unsigned int encoding_id) const |
1145 | { |
1146 | EncodingRecord key; |
1147 | key.platformID.set (platform_id); |
1148 | key.encodingID.set (encoding_id); |
1149 | |
1150 | const EncodingRecord &result = encodingRecord.bsearch (key); |
1151 | if (!result.subtable) |
1152 | return nullptr; |
1153 | |
1154 | return &(this+result.subtable); |
1155 | } |
1156 | |
1157 | public: |
1158 | |
1159 | bool sanitize (hb_sanitize_context_t *c) const |
1160 | { |
1161 | TRACE_SANITIZE (this); |
1162 | return_trace (c->check_struct (this) && |
1163 | likely (version == 0) && |
1164 | encodingRecord.sanitize (c, this)); |
1165 | } |
1166 | |
1167 | protected: |
1168 | HBUINT16 version; /* Table version number (0). */ |
1169 | SortedArrayOf<EncodingRecord> |
1170 | encodingRecord; /* Encoding tables. */ |
1171 | public: |
1172 | DEFINE_SIZE_ARRAY (4, encodingRecord); |
1173 | }; |
1174 | |
1175 | struct cmap_accelerator_t : cmap::accelerator_t {}; |
1176 | |
1177 | } /* namespace OT */ |
1178 | |
1179 | |
1180 | #endif /* HB_OT_CMAP_TABLE_HH */ |
1181 | |