1 | /* |
2 | * Copyright © 2018 Adobe Inc. |
3 | * |
4 | * This is part of HarfBuzz, a text shaping library. |
5 | * |
6 | * Permission is hereby granted, without written agreement and without |
7 | * license or royalty fees, to use, copy, modify, and distribute this |
8 | * software and its documentation for any purpose, provided that the |
9 | * above copyright notice and the following two paragraphs appear in |
10 | * all copies of this software. |
11 | * |
12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | * DAMAGE. |
17 | * |
18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | * |
24 | * Adobe Author(s): Michiharu Ariza |
25 | */ |
26 | |
27 | #ifndef HB_SUBSET_CFF_COMMON_HH |
28 | #define HB_SUBSET_CFF_COMMON_HH |
29 | |
30 | #include "hb.hh" |
31 | |
32 | #include "hb-subset-plan.hh" |
33 | #include "hb-cff-interp-cs-common.hh" |
34 | |
35 | namespace CFF { |
36 | |
37 | /* Used for writing a temporary charstring */ |
38 | struct str_encoder_t |
39 | { |
40 | str_encoder_t (str_buff_t &buff_) |
41 | : buff (buff_) {} |
42 | |
43 | void reset () { buff.reset (); } |
44 | |
45 | void encode_byte (unsigned char b) |
46 | { |
47 | if (likely ((signed) buff.length < buff.allocated)) |
48 | buff.arrayZ[buff.length++] = b; |
49 | else |
50 | buff.push (b); |
51 | } |
52 | |
53 | void encode_int (int v) |
54 | { |
55 | if ((-1131 <= v) && (v <= 1131)) |
56 | { |
57 | if ((-107 <= v) && (v <= 107)) |
58 | encode_byte (v + 139); |
59 | else if (v > 0) |
60 | { |
61 | v -= 108; |
62 | encode_byte ((v >> 8) + OpCode_TwoBytePosInt0); |
63 | encode_byte (v & 0xFF); |
64 | } |
65 | else |
66 | { |
67 | v = -v - 108; |
68 | encode_byte ((v >> 8) + OpCode_TwoByteNegInt0); |
69 | encode_byte (v & 0xFF); |
70 | } |
71 | } |
72 | else |
73 | { |
74 | if (unlikely (v < -32768)) |
75 | v = -32768; |
76 | else if (unlikely (v > 32767)) |
77 | v = 32767; |
78 | encode_byte (OpCode_shortint); |
79 | encode_byte ((v >> 8) & 0xFF); |
80 | encode_byte (v & 0xFF); |
81 | } |
82 | } |
83 | |
84 | // Encode number for CharString |
85 | void encode_num_cs (const number_t& n) |
86 | { |
87 | if (n.in_int_range ()) |
88 | { |
89 | encode_int (n.to_int ()); |
90 | } |
91 | else |
92 | { |
93 | int32_t v = n.to_fixed (); |
94 | encode_byte (OpCode_fixedcs); |
95 | encode_byte ((v >> 24) & 0xFF); |
96 | encode_byte ((v >> 16) & 0xFF); |
97 | encode_byte ((v >> 8) & 0xFF); |
98 | encode_byte (v & 0xFF); |
99 | } |
100 | } |
101 | |
102 | // Encode number for TopDict / Private |
103 | void encode_num_tp (const number_t& n) |
104 | { |
105 | if (n.in_int_range ()) |
106 | { |
107 | // TODO longint |
108 | encode_int (n.to_int ()); |
109 | } |
110 | else |
111 | { |
112 | // Sigh. BCD |
113 | // https://learn.microsoft.com/en-us/typography/opentype/spec/cff2#table-5-nibble-definitions |
114 | double v = n.to_real (); |
115 | encode_byte (OpCode_BCD); |
116 | |
117 | // Based on: |
118 | // https://github.com/fonttools/fonttools/blob/97ed3a61cde03e17b8be36f866192fbd56f1d1a7/Lib/fontTools/misc/psCharStrings.py#L265-L294 |
119 | |
120 | char buf[16]; |
121 | /* FontTools has the following comment: |
122 | * |
123 | * # Note: 14 decimal digits seems to be the limitation for CFF real numbers |
124 | * # in macOS. However, we use 8 here to match the implementation of AFDKO. |
125 | * |
126 | * We use 8 here to match FontTools X-). |
127 | */ |
128 | |
129 | hb_locale_t clocale HB_UNUSED; |
130 | hb_locale_t oldlocale HB_UNUSED; |
131 | oldlocale = hb_uselocale (clocale = newlocale (LC_ALL_MASK, "C" , NULL)); |
132 | snprintf (buf, sizeof (buf), "%.8G" , v); |
133 | (void) hb_uselocale (((void) freelocale (clocale), oldlocale)); |
134 | |
135 | char *s = buf; |
136 | if (s[0] == '0' && s[1] == '.') |
137 | s++; |
138 | else if (s[0] == '-' && s[1] == '0' && s[2] == '.') |
139 | { |
140 | s[1] = '-'; |
141 | s++; |
142 | } |
143 | hb_vector_t<char> nibbles; |
144 | while (*s) |
145 | { |
146 | char c = s[0]; |
147 | s++; |
148 | |
149 | switch (c) |
150 | { |
151 | case 'E': |
152 | { |
153 | char c2 = *s; |
154 | if (c2 == '-') |
155 | { |
156 | s++; |
157 | nibbles.push (0x0C); // E- |
158 | continue; |
159 | } |
160 | if (c2 == '+') |
161 | s++; |
162 | nibbles.push (0x0B); // E |
163 | continue; |
164 | } |
165 | |
166 | case '.': case ',': // Comma for some European locales in case no uselocale available. |
167 | nibbles.push (0x0A); // . |
168 | continue; |
169 | |
170 | case '-': |
171 | nibbles.push (0x0E); // . |
172 | continue; |
173 | } |
174 | |
175 | nibbles.push (c - '0'); |
176 | } |
177 | nibbles.push (0x0F); |
178 | if (nibbles.length % 2) |
179 | nibbles.push (0x0F); |
180 | |
181 | unsigned count = nibbles.length; |
182 | for (unsigned i = 0; i < count; i += 2) |
183 | encode_byte ((nibbles[i] << 4) | nibbles[i+1]); |
184 | } |
185 | } |
186 | |
187 | void encode_op (op_code_t op) |
188 | { |
189 | if (Is_OpCode_ESC (op)) |
190 | { |
191 | encode_byte (OpCode_escape); |
192 | encode_byte (Unmake_OpCode_ESC (op)); |
193 | } |
194 | else |
195 | encode_byte (op); |
196 | } |
197 | |
198 | void copy_str (const unsigned char *str, unsigned length) |
199 | { |
200 | assert ((signed) (buff.length + length) <= buff.allocated); |
201 | hb_memcpy (buff.arrayZ + buff.length, str, length); |
202 | buff.length += length; |
203 | } |
204 | |
205 | bool in_error () const { return buff.in_error (); } |
206 | |
207 | protected: |
208 | |
209 | str_buff_t &buff; |
210 | }; |
211 | |
212 | struct cff_sub_table_info_t { |
213 | cff_sub_table_info_t () |
214 | : fd_array_link (0), |
215 | char_strings_link (0) |
216 | { |
217 | fd_select.init (); |
218 | } |
219 | |
220 | table_info_t fd_select; |
221 | objidx_t fd_array_link; |
222 | objidx_t char_strings_link; |
223 | }; |
224 | |
225 | template <typename OPSTR=op_str_t> |
226 | struct cff_top_dict_op_serializer_t : op_serializer_t |
227 | { |
228 | bool serialize (hb_serialize_context_t *c, |
229 | const OPSTR &opstr, |
230 | const cff_sub_table_info_t &info) const |
231 | { |
232 | TRACE_SERIALIZE (this); |
233 | |
234 | switch (opstr.op) |
235 | { |
236 | case OpCode_CharStrings: |
237 | return_trace (FontDict::serialize_link4_op(c, opstr.op, info.char_strings_link, whence_t::Absolute)); |
238 | |
239 | case OpCode_FDArray: |
240 | return_trace (FontDict::serialize_link4_op(c, opstr.op, info.fd_array_link, whence_t::Absolute)); |
241 | |
242 | case OpCode_FDSelect: |
243 | return_trace (FontDict::serialize_link4_op(c, opstr.op, info.fd_select.link, whence_t::Absolute)); |
244 | |
245 | default: |
246 | return_trace (copy_opstr (c, opstr)); |
247 | } |
248 | return_trace (true); |
249 | } |
250 | }; |
251 | |
252 | struct cff_font_dict_op_serializer_t : op_serializer_t |
253 | { |
254 | bool serialize (hb_serialize_context_t *c, |
255 | const op_str_t &opstr, |
256 | const table_info_t &privateDictInfo) const |
257 | { |
258 | TRACE_SERIALIZE (this); |
259 | |
260 | if (opstr.op == OpCode_Private) |
261 | { |
262 | /* serialize the private dict size & offset as 2-byte & 4-byte integers */ |
263 | return_trace (UnsizedByteStr::serialize_int2 (c, privateDictInfo.size) && |
264 | Dict::serialize_link4_op (c, opstr.op, privateDictInfo.link, whence_t::Absolute)); |
265 | } |
266 | else |
267 | { |
268 | unsigned char *d = c->allocate_size<unsigned char> (opstr.length); |
269 | if (unlikely (!d)) return_trace (false); |
270 | /* Faster than hb_memcpy for small strings. */ |
271 | for (unsigned i = 0; i < opstr.length; i++) |
272 | d[i] = opstr.ptr[i]; |
273 | //hb_memcpy (d, opstr.ptr, opstr.length); |
274 | } |
275 | return_trace (true); |
276 | } |
277 | }; |
278 | |
279 | struct flatten_param_t |
280 | { |
281 | str_buff_t &flatStr; |
282 | bool drop_hints; |
283 | const hb_subset_plan_t *plan; |
284 | }; |
285 | |
286 | template <typename ACC, typename ENV, typename OPSET, op_code_t endchar_op=OpCode_Invalid> |
287 | struct subr_flattener_t |
288 | { |
289 | subr_flattener_t (const ACC &acc_, |
290 | const hb_subset_plan_t *plan_) |
291 | : acc (acc_), plan (plan_) {} |
292 | |
293 | bool flatten (str_buff_vec_t &flat_charstrings) |
294 | { |
295 | unsigned count = plan->num_output_glyphs (); |
296 | if (!flat_charstrings.resize_exact (count)) |
297 | return false; |
298 | for (unsigned int i = 0; i < count; i++) |
299 | { |
300 | hb_codepoint_t glyph; |
301 | if (!plan->old_gid_for_new_gid (i, &glyph)) |
302 | { |
303 | /* add an endchar only charstring for a missing glyph if CFF1 */ |
304 | if (endchar_op != OpCode_Invalid) flat_charstrings[i].push (endchar_op); |
305 | continue; |
306 | } |
307 | const hb_ubytes_t str = (*acc.charStrings)[glyph]; |
308 | unsigned int fd = acc.fdSelect->get_fd (glyph); |
309 | if (unlikely (fd >= acc.fdCount)) |
310 | return false; |
311 | |
312 | |
313 | ENV env (str, acc, fd, |
314 | plan->normalized_coords.arrayZ, plan->normalized_coords.length); |
315 | cs_interpreter_t<ENV, OPSET, flatten_param_t> interp (env); |
316 | flatten_param_t param = { |
317 | flat_charstrings.arrayZ[i], |
318 | (bool) (plan->flags & HB_SUBSET_FLAGS_NO_HINTING), |
319 | plan |
320 | }; |
321 | if (unlikely (!interp.interpret (param))) |
322 | return false; |
323 | } |
324 | return true; |
325 | } |
326 | |
327 | const ACC &acc; |
328 | const hb_subset_plan_t *plan; |
329 | }; |
330 | |
331 | struct subr_closures_t |
332 | { |
333 | subr_closures_t (unsigned int fd_count) : global_closure (), local_closures () |
334 | { |
335 | local_closures.resize_exact (fd_count); |
336 | } |
337 | |
338 | void reset () |
339 | { |
340 | global_closure.clear(); |
341 | for (unsigned int i = 0; i < local_closures.length; i++) |
342 | local_closures[i].clear(); |
343 | } |
344 | |
345 | bool in_error () const { return local_closures.in_error (); } |
346 | hb_set_t global_closure; |
347 | hb_vector_t<hb_set_t> local_closures; |
348 | }; |
349 | |
350 | struct parsed_cs_op_t : op_str_t |
351 | { |
352 | parsed_cs_op_t (unsigned int subr_num_ = 0) : |
353 | subr_num (subr_num_) {} |
354 | |
355 | bool is_hinting () const { return hinting_flag; } |
356 | void set_hinting () { hinting_flag = true; } |
357 | |
358 | /* The layout of this struct is designed to fit within the |
359 | * padding of op_str_t! */ |
360 | |
361 | protected: |
362 | bool hinting_flag = false; |
363 | |
364 | public: |
365 | uint16_t subr_num; |
366 | }; |
367 | |
368 | struct parsed_cs_str_t : parsed_values_t<parsed_cs_op_t> |
369 | { |
370 | parsed_cs_str_t () : |
371 | parsed (false), |
372 | hint_dropped (false), |
373 | has_prefix_ (false), |
374 | has_calls_ (false) |
375 | { |
376 | SUPER::init (); |
377 | } |
378 | |
379 | void add_op (op_code_t op, const byte_str_ref_t& str_ref) |
380 | { |
381 | if (!is_parsed ()) |
382 | SUPER::add_op (op, str_ref); |
383 | } |
384 | |
385 | void add_call_op (op_code_t op, const byte_str_ref_t& str_ref, unsigned int subr_num) |
386 | { |
387 | if (!is_parsed ()) |
388 | { |
389 | has_calls_ = true; |
390 | |
391 | /* Pop the subroutine number. */ |
392 | values.pop (); |
393 | |
394 | SUPER::add_op (op, str_ref, {subr_num}); |
395 | } |
396 | } |
397 | |
398 | void set_prefix (const number_t &num, op_code_t op = OpCode_Invalid) |
399 | { |
400 | has_prefix_ = true; |
401 | prefix_op_ = op; |
402 | prefix_num_ = num; |
403 | } |
404 | |
405 | bool at_end (unsigned int pos) const |
406 | { |
407 | return ((pos + 1 >= values.length) /* CFF2 */ |
408 | || (values[pos + 1].op == OpCode_return)); |
409 | } |
410 | |
411 | bool is_parsed () const { return parsed; } |
412 | void set_parsed () { parsed = true; } |
413 | |
414 | bool is_hint_dropped () const { return hint_dropped; } |
415 | void set_hint_dropped () { hint_dropped = true; } |
416 | |
417 | bool is_vsindex_dropped () const { return vsindex_dropped; } |
418 | void set_vsindex_dropped () { vsindex_dropped = true; } |
419 | |
420 | bool has_prefix () const { return has_prefix_; } |
421 | op_code_t prefix_op () const { return prefix_op_; } |
422 | const number_t &prefix_num () const { return prefix_num_; } |
423 | |
424 | bool has_calls () const { return has_calls_; } |
425 | |
426 | void compact () |
427 | { |
428 | unsigned count = values.length; |
429 | if (!count) return; |
430 | auto &opstr = values.arrayZ; |
431 | unsigned j = 0; |
432 | for (unsigned i = 1; i < count; i++) |
433 | { |
434 | /* See if we can combine op j and op i. */ |
435 | bool combine = |
436 | (opstr[j].op != OpCode_callsubr && opstr[j].op != OpCode_callgsubr) && |
437 | (opstr[i].op != OpCode_callsubr && opstr[i].op != OpCode_callgsubr) && |
438 | (opstr[j].is_hinting () == opstr[i].is_hinting ()) && |
439 | (opstr[j].ptr + opstr[j].length == opstr[i].ptr) && |
440 | (opstr[j].length + opstr[i].length <= 255); |
441 | |
442 | if (combine) |
443 | { |
444 | opstr[j].length += opstr[i].length; |
445 | opstr[j].op = OpCode_Invalid; |
446 | } |
447 | else |
448 | { |
449 | opstr[++j] = opstr[i]; |
450 | } |
451 | } |
452 | values.shrink (j + 1); |
453 | } |
454 | |
455 | protected: |
456 | bool parsed : 1; |
457 | bool hint_dropped : 1; |
458 | bool vsindex_dropped : 1; |
459 | bool has_prefix_ : 1; |
460 | bool has_calls_ : 1; |
461 | op_code_t prefix_op_; |
462 | number_t prefix_num_; |
463 | |
464 | private: |
465 | typedef parsed_values_t<parsed_cs_op_t> SUPER; |
466 | }; |
467 | |
468 | struct parsed_cs_str_vec_t : hb_vector_t<parsed_cs_str_t> |
469 | { |
470 | private: |
471 | typedef hb_vector_t<parsed_cs_str_t> SUPER; |
472 | }; |
473 | |
474 | struct cff_subset_accelerator_t |
475 | { |
476 | static cff_subset_accelerator_t* create ( |
477 | hb_blob_t* original_blob, |
478 | const parsed_cs_str_vec_t& parsed_charstrings, |
479 | const parsed_cs_str_vec_t& parsed_global_subrs, |
480 | const hb_vector_t<parsed_cs_str_vec_t>& parsed_local_subrs) { |
481 | cff_subset_accelerator_t* accel = |
482 | (cff_subset_accelerator_t*) hb_malloc (sizeof(cff_subset_accelerator_t)); |
483 | if (unlikely (!accel)) return nullptr; |
484 | new (accel) cff_subset_accelerator_t (original_blob, |
485 | parsed_charstrings, |
486 | parsed_global_subrs, |
487 | parsed_local_subrs); |
488 | return accel; |
489 | } |
490 | |
491 | static void destroy (void* value) { |
492 | if (!value) return; |
493 | |
494 | cff_subset_accelerator_t* accel = (cff_subset_accelerator_t*) value; |
495 | accel->~cff_subset_accelerator_t (); |
496 | hb_free (accel); |
497 | } |
498 | |
499 | cff_subset_accelerator_t( |
500 | hb_blob_t* original_blob_, |
501 | const parsed_cs_str_vec_t& parsed_charstrings_, |
502 | const parsed_cs_str_vec_t& parsed_global_subrs_, |
503 | const hb_vector_t<parsed_cs_str_vec_t>& parsed_local_subrs_) |
504 | { |
505 | parsed_charstrings = parsed_charstrings_; |
506 | parsed_global_subrs = parsed_global_subrs_; |
507 | parsed_local_subrs = parsed_local_subrs_; |
508 | |
509 | // the parsed charstrings point to memory in the original CFF table so we must hold a reference |
510 | // to it to keep the memory valid. |
511 | original_blob = hb_blob_reference (original_blob_); |
512 | } |
513 | |
514 | ~cff_subset_accelerator_t() |
515 | { |
516 | hb_blob_destroy (original_blob); |
517 | auto *mapping = glyph_to_sid_map.get_relaxed (); |
518 | if (mapping) |
519 | { |
520 | mapping->~glyph_to_sid_map_t (); |
521 | hb_free (mapping); |
522 | } |
523 | } |
524 | |
525 | parsed_cs_str_vec_t parsed_charstrings; |
526 | parsed_cs_str_vec_t parsed_global_subrs; |
527 | hb_vector_t<parsed_cs_str_vec_t> parsed_local_subrs; |
528 | mutable hb_atomic_ptr_t<glyph_to_sid_map_t> glyph_to_sid_map; |
529 | |
530 | private: |
531 | hb_blob_t* original_blob; |
532 | }; |
533 | |
534 | struct subr_subset_param_t |
535 | { |
536 | subr_subset_param_t (parsed_cs_str_t *parsed_charstring_, |
537 | parsed_cs_str_vec_t *parsed_global_subrs_, |
538 | parsed_cs_str_vec_t *parsed_local_subrs_, |
539 | hb_set_t *global_closure_, |
540 | hb_set_t *local_closure_, |
541 | bool drop_hints_) : |
542 | current_parsed_str (parsed_charstring_), |
543 | parsed_charstring (parsed_charstring_), |
544 | parsed_global_subrs (parsed_global_subrs_), |
545 | parsed_local_subrs (parsed_local_subrs_), |
546 | global_closure (global_closure_), |
547 | local_closure (local_closure_), |
548 | drop_hints (drop_hints_) {} |
549 | |
550 | parsed_cs_str_t *get_parsed_str_for_context (call_context_t &context) |
551 | { |
552 | switch (context.type) |
553 | { |
554 | case CSType_CharString: |
555 | return parsed_charstring; |
556 | |
557 | case CSType_LocalSubr: |
558 | if (likely (context.subr_num < parsed_local_subrs->length)) |
559 | return &(*parsed_local_subrs)[context.subr_num]; |
560 | break; |
561 | |
562 | case CSType_GlobalSubr: |
563 | if (likely (context.subr_num < parsed_global_subrs->length)) |
564 | return &(*parsed_global_subrs)[context.subr_num]; |
565 | break; |
566 | } |
567 | return nullptr; |
568 | } |
569 | |
570 | template <typename ENV> |
571 | void set_current_str (ENV &env, bool calling) |
572 | { |
573 | parsed_cs_str_t *parsed_str = get_parsed_str_for_context (env.context); |
574 | if (unlikely (!parsed_str)) |
575 | { |
576 | env.set_error (); |
577 | return; |
578 | } |
579 | /* If the called subroutine is parsed partially but not completely yet, |
580 | * it must be because we are calling it recursively. |
581 | * Handle it as an error. */ |
582 | if (unlikely (calling && !parsed_str->is_parsed () && (parsed_str->values.length > 0))) |
583 | env.set_error (); |
584 | else |
585 | { |
586 | if (!parsed_str->is_parsed ()) |
587 | parsed_str->alloc (env.str_ref.total_size ()); |
588 | current_parsed_str = parsed_str; |
589 | } |
590 | } |
591 | |
592 | parsed_cs_str_t *current_parsed_str; |
593 | |
594 | parsed_cs_str_t *parsed_charstring; |
595 | parsed_cs_str_vec_t *parsed_global_subrs; |
596 | parsed_cs_str_vec_t *parsed_local_subrs; |
597 | hb_set_t *global_closure; |
598 | hb_set_t *local_closure; |
599 | bool drop_hints; |
600 | }; |
601 | |
602 | struct subr_remap_t : hb_inc_bimap_t |
603 | { |
604 | void create (const hb_set_t *closure) |
605 | { |
606 | /* create a remapping of subroutine numbers from old to new. |
607 | * no optimization based on usage counts. fonttools doesn't appear doing that either. |
608 | */ |
609 | |
610 | alloc (closure->get_population ()); |
611 | for (auto old_num : *closure) |
612 | add (old_num); |
613 | |
614 | if (get_population () < 1240) |
615 | bias = 107; |
616 | else if (get_population () < 33900) |
617 | bias = 1131; |
618 | else |
619 | bias = 32768; |
620 | } |
621 | |
622 | int biased_num (unsigned int old_num) const |
623 | { |
624 | hb_codepoint_t new_num = get (old_num); |
625 | return (int)new_num - bias; |
626 | } |
627 | |
628 | protected: |
629 | int bias; |
630 | }; |
631 | |
632 | struct subr_remaps_t |
633 | { |
634 | subr_remaps_t (unsigned int fdCount) |
635 | { |
636 | local_remaps.resize (fdCount); |
637 | } |
638 | |
639 | bool in_error() |
640 | { |
641 | return local_remaps.in_error (); |
642 | } |
643 | |
644 | void create (subr_closures_t& closures) |
645 | { |
646 | global_remap.create (&closures.global_closure); |
647 | for (unsigned int i = 0; i < local_remaps.length; i++) |
648 | local_remaps.arrayZ[i].create (&closures.local_closures[i]); |
649 | } |
650 | |
651 | subr_remap_t global_remap; |
652 | hb_vector_t<subr_remap_t> local_remaps; |
653 | }; |
654 | |
655 | template <typename SUBSETTER, typename SUBRS, typename ACC, typename ENV, typename OPSET, op_code_t endchar_op=OpCode_Invalid> |
656 | struct subr_subsetter_t |
657 | { |
658 | subr_subsetter_t (ACC &acc_, const hb_subset_plan_t *plan_) |
659 | : acc (acc_), plan (plan_), closures(acc_.fdCount), |
660 | remaps(acc_.fdCount) |
661 | {} |
662 | |
663 | /* Subroutine subsetting with --no-desubroutinize runs in phases: |
664 | * |
665 | * 1. execute charstrings/subroutines to determine subroutine closures |
666 | * 2. parse out all operators and numbers |
667 | * 3. mark hint operators and operands for removal if --no-hinting |
668 | * 4. re-encode all charstrings and subroutines with new subroutine numbers |
669 | * |
670 | * Phases #1 and #2 are done at the same time in collect_subrs (). |
671 | * Phase #3 walks charstrings/subroutines forward then backward (hence parsing required), |
672 | * because we can't tell if a number belongs to a hint op until we see the first moveto. |
673 | * |
674 | * Assumption: a callsubr/callgsubr operator must immediately follow a (biased) subroutine number |
675 | * within the same charstring/subroutine, e.g., not split across a charstring and a subroutine. |
676 | */ |
677 | bool subset (void) |
678 | { |
679 | unsigned fd_count = acc.fdCount; |
680 | const cff_subset_accelerator_t* cff_accelerator = nullptr; |
681 | if (acc.cff_accelerator) { |
682 | cff_accelerator = acc.cff_accelerator; |
683 | fd_count = cff_accelerator->parsed_local_subrs.length; |
684 | } |
685 | |
686 | if (cff_accelerator) { |
687 | // If we are not dropping hinting then charstrings are not modified so we can |
688 | // just use a reference to the cached copies. |
689 | cached_charstrings.resize_exact (plan->num_output_glyphs ()); |
690 | parsed_global_subrs = &cff_accelerator->parsed_global_subrs; |
691 | parsed_local_subrs = &cff_accelerator->parsed_local_subrs; |
692 | } else { |
693 | parsed_charstrings.resize_exact (plan->num_output_glyphs ()); |
694 | parsed_global_subrs_storage.resize_exact (acc.globalSubrs->count); |
695 | |
696 | if (unlikely (!parsed_local_subrs_storage.resize (fd_count))) return false; |
697 | |
698 | for (unsigned int i = 0; i < acc.fdCount; i++) |
699 | { |
700 | unsigned count = acc.privateDicts[i].localSubrs->count; |
701 | parsed_local_subrs_storage[i].resize (count); |
702 | if (unlikely (parsed_local_subrs_storage[i].in_error ())) return false; |
703 | } |
704 | |
705 | parsed_global_subrs = &parsed_global_subrs_storage; |
706 | parsed_local_subrs = &parsed_local_subrs_storage; |
707 | } |
708 | |
709 | if (unlikely (remaps.in_error() |
710 | || cached_charstrings.in_error () |
711 | || parsed_charstrings.in_error () |
712 | || parsed_global_subrs->in_error () |
713 | || closures.in_error ())) { |
714 | return false; |
715 | } |
716 | |
717 | /* phase 1 & 2 */ |
718 | for (auto _ : plan->new_to_old_gid_list) |
719 | { |
720 | hb_codepoint_t new_glyph = _.first; |
721 | hb_codepoint_t old_glyph = _.second; |
722 | |
723 | const hb_ubytes_t str = (*acc.charStrings)[old_glyph]; |
724 | unsigned int fd = acc.fdSelect->get_fd (old_glyph); |
725 | if (unlikely (fd >= acc.fdCount)) |
726 | return false; |
727 | |
728 | if (cff_accelerator) |
729 | { |
730 | // parsed string already exists in accelerator, copy it and move |
731 | // on. |
732 | if (cached_charstrings) |
733 | cached_charstrings[new_glyph] = &cff_accelerator->parsed_charstrings[old_glyph]; |
734 | else |
735 | parsed_charstrings[new_glyph] = cff_accelerator->parsed_charstrings[old_glyph]; |
736 | |
737 | continue; |
738 | } |
739 | |
740 | ENV env (str, acc, fd); |
741 | cs_interpreter_t<ENV, OPSET, subr_subset_param_t> interp (env); |
742 | |
743 | parsed_charstrings[new_glyph].alloc (str.length); |
744 | subr_subset_param_t param (&parsed_charstrings[new_glyph], |
745 | &parsed_global_subrs_storage, |
746 | &parsed_local_subrs_storage[fd], |
747 | &closures.global_closure, |
748 | &closures.local_closures[fd], |
749 | plan->flags & HB_SUBSET_FLAGS_NO_HINTING); |
750 | |
751 | if (unlikely (!interp.interpret (param))) |
752 | return false; |
753 | |
754 | /* complete parsed string esp. copy CFF1 width or CFF2 vsindex to the parsed charstring for encoding */ |
755 | SUBSETTER::complete_parsed_str (interp.env, param, parsed_charstrings[new_glyph]); |
756 | |
757 | /* mark hint ops and arguments for drop */ |
758 | if ((plan->flags & HB_SUBSET_FLAGS_NO_HINTING) || plan->inprogress_accelerator) |
759 | { |
760 | subr_subset_param_t param (&parsed_charstrings[new_glyph], |
761 | &parsed_global_subrs_storage, |
762 | &parsed_local_subrs_storage[fd], |
763 | &closures.global_closure, |
764 | &closures.local_closures[fd], |
765 | plan->flags & HB_SUBSET_FLAGS_NO_HINTING); |
766 | |
767 | drop_hints_param_t drop; |
768 | if (drop_hints_in_str (parsed_charstrings[new_glyph], param, drop)) |
769 | { |
770 | parsed_charstrings[new_glyph].set_hint_dropped (); |
771 | if (drop.vsindex_dropped) |
772 | parsed_charstrings[new_glyph].set_vsindex_dropped (); |
773 | } |
774 | } |
775 | |
776 | /* Doing this here one by one instead of compacting all at the end |
777 | * has massive peak-memory saving. |
778 | * |
779 | * The compacting both saves memory and makes further operations |
780 | * faster. |
781 | */ |
782 | parsed_charstrings[new_glyph].compact (); |
783 | } |
784 | |
785 | /* Since parsed strings were loaded from accelerator, we still need |
786 | * to compute the subroutine closures which would have normally happened during |
787 | * parsing. |
788 | * |
789 | * Or if we are dropping hinting, redo closure to get actually used subrs. |
790 | */ |
791 | if ((cff_accelerator || |
792 | (!cff_accelerator && plan->flags & HB_SUBSET_FLAGS_NO_HINTING)) && |
793 | !closure_subroutines(*parsed_global_subrs, |
794 | *parsed_local_subrs)) |
795 | return false; |
796 | |
797 | remaps.create (closures); |
798 | |
799 | populate_subset_accelerator (); |
800 | return true; |
801 | } |
802 | |
803 | bool encode_charstrings (str_buff_vec_t &buffArray, bool encode_prefix = true) const |
804 | { |
805 | unsigned num_glyphs = plan->num_output_glyphs (); |
806 | if (unlikely (!buffArray.resize_exact (num_glyphs))) |
807 | return false; |
808 | hb_codepoint_t last = 0; |
809 | for (auto _ : plan->new_to_old_gid_list) |
810 | { |
811 | hb_codepoint_t gid = _.first; |
812 | hb_codepoint_t old_glyph = _.second; |
813 | |
814 | if (endchar_op != OpCode_Invalid) |
815 | for (; last < gid; last++) |
816 | { |
817 | // Hack to point vector to static string. |
818 | auto &b = buffArray.arrayZ[last]; |
819 | b.length = 1; |
820 | b.arrayZ = const_cast<unsigned char *>(endchar_str); |
821 | } |
822 | |
823 | last++; // Skip over gid |
824 | unsigned int fd = acc.fdSelect->get_fd (old_glyph); |
825 | if (unlikely (fd >= acc.fdCount)) |
826 | return false; |
827 | if (unlikely (!encode_str (get_parsed_charstring (gid), fd, buffArray.arrayZ[gid], encode_prefix))) |
828 | return false; |
829 | } |
830 | if (endchar_op != OpCode_Invalid) |
831 | for (; last < num_glyphs; last++) |
832 | { |
833 | // Hack to point vector to static string. |
834 | auto &b = buffArray.arrayZ[last]; |
835 | b.length = 1; |
836 | b.arrayZ = const_cast<unsigned char *>(endchar_str); |
837 | } |
838 | |
839 | return true; |
840 | } |
841 | |
842 | bool encode_subrs (const parsed_cs_str_vec_t &subrs, const subr_remap_t& remap, unsigned int fd, str_buff_vec_t &buffArray) const |
843 | { |
844 | unsigned int count = remap.get_population (); |
845 | |
846 | if (unlikely (!buffArray.resize_exact (count))) |
847 | return false; |
848 | for (unsigned int new_num = 0; new_num < count; new_num++) |
849 | { |
850 | hb_codepoint_t old_num = remap.backward (new_num); |
851 | assert (old_num != CFF_UNDEF_CODE); |
852 | |
853 | if (unlikely (!encode_str (subrs[old_num], fd, buffArray[new_num]))) |
854 | return false; |
855 | } |
856 | return true; |
857 | } |
858 | |
859 | bool encode_globalsubrs (str_buff_vec_t &buffArray) |
860 | { |
861 | return encode_subrs (*parsed_global_subrs, remaps.global_remap, 0, buffArray); |
862 | } |
863 | |
864 | bool encode_localsubrs (unsigned int fd, str_buff_vec_t &buffArray) const |
865 | { |
866 | return encode_subrs ((*parsed_local_subrs)[fd], remaps.local_remaps[fd], fd, buffArray); |
867 | } |
868 | |
869 | protected: |
870 | struct drop_hints_param_t |
871 | { |
872 | drop_hints_param_t () |
873 | : seen_moveto (false), |
874 | ends_in_hint (false), |
875 | all_dropped (false), |
876 | vsindex_dropped (false) {} |
877 | |
878 | bool seen_moveto; |
879 | bool ends_in_hint; |
880 | bool all_dropped; |
881 | bool vsindex_dropped; |
882 | }; |
883 | |
884 | bool drop_hints_in_subr (parsed_cs_str_t &str, unsigned int pos, |
885 | parsed_cs_str_vec_t &subrs, unsigned int subr_num, |
886 | const subr_subset_param_t ¶m, drop_hints_param_t &drop) |
887 | { |
888 | drop.ends_in_hint = false; |
889 | bool has_hint = drop_hints_in_str (subrs[subr_num], param, drop); |
890 | |
891 | /* if this subr ends with a stem hint (i.e., not a number; potential argument for moveto), |
892 | * then this entire subroutine must be a hint. drop its call. */ |
893 | if (drop.ends_in_hint) |
894 | { |
895 | str.values[pos].set_hinting (); |
896 | /* if this subr call is at the end of the parent subr, propagate the flag |
897 | * otherwise reset the flag */ |
898 | if (!str.at_end (pos)) |
899 | drop.ends_in_hint = false; |
900 | } |
901 | else if (drop.all_dropped) |
902 | { |
903 | str.values[pos].set_hinting (); |
904 | } |
905 | |
906 | return has_hint; |
907 | } |
908 | |
909 | /* returns true if it sees a hint op before the first moveto */ |
910 | bool drop_hints_in_str (parsed_cs_str_t &str, const subr_subset_param_t ¶m, drop_hints_param_t &drop) |
911 | { |
912 | bool seen_hint = false; |
913 | |
914 | unsigned count = str.values.length; |
915 | auto *values = str.values.arrayZ; |
916 | for (unsigned int pos = 0; pos < count; pos++) |
917 | { |
918 | bool has_hint = false; |
919 | switch (values[pos].op) |
920 | { |
921 | case OpCode_callsubr: |
922 | has_hint = drop_hints_in_subr (str, pos, |
923 | *param.parsed_local_subrs, values[pos].subr_num, |
924 | param, drop); |
925 | break; |
926 | |
927 | case OpCode_callgsubr: |
928 | has_hint = drop_hints_in_subr (str, pos, |
929 | *param.parsed_global_subrs, values[pos].subr_num, |
930 | param, drop); |
931 | break; |
932 | |
933 | case OpCode_rmoveto: |
934 | case OpCode_hmoveto: |
935 | case OpCode_vmoveto: |
936 | drop.seen_moveto = true; |
937 | break; |
938 | |
939 | case OpCode_hintmask: |
940 | case OpCode_cntrmask: |
941 | if (drop.seen_moveto) |
942 | { |
943 | values[pos].set_hinting (); |
944 | break; |
945 | } |
946 | HB_FALLTHROUGH; |
947 | |
948 | case OpCode_hstemhm: |
949 | case OpCode_vstemhm: |
950 | case OpCode_hstem: |
951 | case OpCode_vstem: |
952 | has_hint = true; |
953 | values[pos].set_hinting (); |
954 | if (str.at_end (pos)) |
955 | drop.ends_in_hint = true; |
956 | break; |
957 | |
958 | case OpCode_dotsection: |
959 | values[pos].set_hinting (); |
960 | break; |
961 | |
962 | default: |
963 | /* NONE */ |
964 | break; |
965 | } |
966 | if (has_hint) |
967 | { |
968 | for (int i = pos - 1; i >= 0; i--) |
969 | { |
970 | parsed_cs_op_t &csop = values[(unsigned)i]; |
971 | if (csop.is_hinting ()) |
972 | break; |
973 | csop.set_hinting (); |
974 | if (csop.op == OpCode_vsindexcs) |
975 | drop.vsindex_dropped = true; |
976 | } |
977 | seen_hint |= has_hint; |
978 | } |
979 | } |
980 | |
981 | /* Raise all_dropped flag if all operators except return are dropped from a subr. |
982 | * It may happen even after seeing the first moveto if a subr contains |
983 | * only (usually one) hintmask operator, then calls to this subr can be dropped. |
984 | */ |
985 | drop.all_dropped = true; |
986 | for (unsigned int pos = 0; pos < count; pos++) |
987 | { |
988 | parsed_cs_op_t &csop = values[pos]; |
989 | if (csop.op == OpCode_return) |
990 | break; |
991 | if (!csop.is_hinting ()) |
992 | { |
993 | drop.all_dropped = false; |
994 | break; |
995 | } |
996 | } |
997 | |
998 | return seen_hint; |
999 | } |
1000 | |
1001 | bool closure_subroutines (const parsed_cs_str_vec_t& global_subrs, |
1002 | const hb_vector_t<parsed_cs_str_vec_t>& local_subrs) |
1003 | { |
1004 | closures.reset (); |
1005 | for (auto _ : plan->new_to_old_gid_list) |
1006 | { |
1007 | hb_codepoint_t new_glyph = _.first; |
1008 | hb_codepoint_t old_glyph = _.second; |
1009 | unsigned int fd = acc.fdSelect->get_fd (old_glyph); |
1010 | if (unlikely (fd >= acc.fdCount)) |
1011 | return false; |
1012 | |
1013 | // Note: const cast is safe here because the collect_subr_refs_in_str only performs a |
1014 | // closure and does not modify any of the charstrings. |
1015 | subr_subset_param_t param (const_cast<parsed_cs_str_t*> (&get_parsed_charstring (new_glyph)), |
1016 | const_cast<parsed_cs_str_vec_t*> (&global_subrs), |
1017 | const_cast<parsed_cs_str_vec_t*> (&local_subrs[fd]), |
1018 | &closures.global_closure, |
1019 | &closures.local_closures[fd], |
1020 | plan->flags & HB_SUBSET_FLAGS_NO_HINTING); |
1021 | collect_subr_refs_in_str (get_parsed_charstring (new_glyph), param); |
1022 | } |
1023 | |
1024 | return true; |
1025 | } |
1026 | |
1027 | void collect_subr_refs_in_subr (unsigned int subr_num, parsed_cs_str_vec_t &subrs, |
1028 | hb_set_t *closure, |
1029 | const subr_subset_param_t ¶m) |
1030 | { |
1031 | if (closure->has (subr_num)) |
1032 | return; |
1033 | closure->add (subr_num); |
1034 | collect_subr_refs_in_str (subrs[subr_num], param); |
1035 | } |
1036 | |
1037 | void collect_subr_refs_in_str (const parsed_cs_str_t &str, |
1038 | const subr_subset_param_t ¶m) |
1039 | { |
1040 | if (!str.has_calls ()) |
1041 | return; |
1042 | |
1043 | for (auto &opstr : str.values) |
1044 | { |
1045 | if (!param.drop_hints || !opstr.is_hinting ()) |
1046 | { |
1047 | switch (opstr.op) |
1048 | { |
1049 | case OpCode_callsubr: |
1050 | collect_subr_refs_in_subr (opstr.subr_num, *param.parsed_local_subrs, |
1051 | param.local_closure, param); |
1052 | break; |
1053 | |
1054 | case OpCode_callgsubr: |
1055 | collect_subr_refs_in_subr (opstr.subr_num, *param.parsed_global_subrs, |
1056 | param.global_closure, param); |
1057 | break; |
1058 | |
1059 | default: break; |
1060 | } |
1061 | } |
1062 | } |
1063 | } |
1064 | |
1065 | bool encode_str (const parsed_cs_str_t &str, const unsigned int fd, str_buff_t &buff, bool encode_prefix = true) const |
1066 | { |
1067 | str_encoder_t encoder (buff); |
1068 | encoder.reset (); |
1069 | bool hinting = !(plan->flags & HB_SUBSET_FLAGS_NO_HINTING); |
1070 | /* if a prefix (CFF1 width or CFF2 vsindex) has been removed along with hints, |
1071 | * re-insert it at the beginning of charstreing */ |
1072 | if (encode_prefix && str.has_prefix () && !hinting && str.is_hint_dropped ()) |
1073 | { |
1074 | encoder.encode_num_cs (str.prefix_num ()); |
1075 | if (str.prefix_op () != OpCode_Invalid) |
1076 | encoder.encode_op (str.prefix_op ()); |
1077 | } |
1078 | |
1079 | unsigned size = 0; |
1080 | for (auto &opstr : str.values) |
1081 | { |
1082 | size += opstr.length; |
1083 | if (opstr.op == OpCode_callsubr || opstr.op == OpCode_callgsubr) |
1084 | size += 3; |
1085 | } |
1086 | if (!buff.alloc (buff.length + size, true)) |
1087 | return false; |
1088 | |
1089 | for (auto &opstr : str.values) |
1090 | { |
1091 | if (hinting || !opstr.is_hinting ()) |
1092 | { |
1093 | switch (opstr.op) |
1094 | { |
1095 | case OpCode_callsubr: |
1096 | encoder.encode_int (remaps.local_remaps[fd].biased_num (opstr.subr_num)); |
1097 | encoder.copy_str (opstr.ptr, opstr.length); |
1098 | break; |
1099 | |
1100 | case OpCode_callgsubr: |
1101 | encoder.encode_int (remaps.global_remap.biased_num (opstr.subr_num)); |
1102 | encoder.copy_str (opstr.ptr, opstr.length); |
1103 | break; |
1104 | |
1105 | default: |
1106 | encoder.copy_str (opstr.ptr, opstr.length); |
1107 | break; |
1108 | } |
1109 | } |
1110 | } |
1111 | return !encoder.in_error (); |
1112 | } |
1113 | |
1114 | void compact_parsed_subrs () const |
1115 | { |
1116 | for (auto &cs : parsed_global_subrs_storage) |
1117 | cs.compact (); |
1118 | for (auto &vec : parsed_local_subrs_storage) |
1119 | for (auto &cs : vec) |
1120 | cs.compact (); |
1121 | } |
1122 | |
1123 | void populate_subset_accelerator () const |
1124 | { |
1125 | if (!plan->inprogress_accelerator) return; |
1126 | |
1127 | compact_parsed_subrs (); |
1128 | |
1129 | acc.cff_accelerator = |
1130 | cff_subset_accelerator_t::create(acc.blob, |
1131 | parsed_charstrings, |
1132 | parsed_global_subrs_storage, |
1133 | parsed_local_subrs_storage); |
1134 | } |
1135 | |
1136 | const parsed_cs_str_t& get_parsed_charstring (unsigned i) const |
1137 | { |
1138 | if (cached_charstrings) return *(cached_charstrings[i]); |
1139 | return parsed_charstrings[i]; |
1140 | } |
1141 | |
1142 | protected: |
1143 | const ACC &acc; |
1144 | const hb_subset_plan_t *plan; |
1145 | |
1146 | subr_closures_t closures; |
1147 | |
1148 | hb_vector_t<const parsed_cs_str_t*> cached_charstrings; |
1149 | const parsed_cs_str_vec_t* parsed_global_subrs; |
1150 | const hb_vector_t<parsed_cs_str_vec_t>* parsed_local_subrs; |
1151 | |
1152 | subr_remaps_t remaps; |
1153 | |
1154 | private: |
1155 | |
1156 | parsed_cs_str_vec_t parsed_charstrings; |
1157 | parsed_cs_str_vec_t parsed_global_subrs_storage; |
1158 | hb_vector_t<parsed_cs_str_vec_t> parsed_local_subrs_storage; |
1159 | typedef typename SUBRS::count_type subr_count_type; |
1160 | }; |
1161 | |
1162 | } /* namespace CFF */ |
1163 | |
1164 | HB_INTERNAL bool |
1165 | hb_plan_subset_cff_fdselect (const hb_subset_plan_t *plan, |
1166 | unsigned int fdCount, |
1167 | const CFF::FDSelect &src, /* IN */ |
1168 | unsigned int &subset_fd_count /* OUT */, |
1169 | unsigned int &subset_fdselect_size /* OUT */, |
1170 | unsigned int &subset_fdselect_format /* OUT */, |
1171 | hb_vector_t<CFF::code_pair_t> &fdselect_ranges /* OUT */, |
1172 | hb_inc_bimap_t &fdmap /* OUT */); |
1173 | |
1174 | HB_INTERNAL bool |
1175 | hb_serialize_cff_fdselect (hb_serialize_context_t *c, |
1176 | unsigned int num_glyphs, |
1177 | const CFF::FDSelect &src, |
1178 | unsigned int fd_count, |
1179 | unsigned int fdselect_format, |
1180 | unsigned int size, |
1181 | const hb_vector_t<CFF::code_pair_t> &fdselect_ranges); |
1182 | |
1183 | #endif /* HB_SUBSET_CFF_COMMON_HH */ |
1184 | |