1 | /* |
2 | * Copyright © 2011,2012 Google, Inc. |
3 | * |
4 | * This is part of HarfBuzz, a text shaping library. |
5 | * |
6 | * Permission is hereby granted, without written agreement and without |
7 | * license or royalty fees, to use, copy, modify, and distribute this |
8 | * software and its documentation for any purpose, provided that the |
9 | * above copyright notice and the following two paragraphs appear in |
10 | * all copies of this software. |
11 | * |
12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | * DAMAGE. |
17 | * |
18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | * |
24 | * Google Author(s): Behdad Esfahbod |
25 | */ |
26 | |
27 | #include "hb.hh" |
28 | |
29 | #ifndef HB_NO_OT_SHAPE |
30 | |
31 | #include "hb-ot-shaper-khmer-machine.hh" |
32 | #include "hb-ot-shaper-indic.hh" |
33 | #include "hb-ot-layout.hh" |
34 | |
35 | |
36 | /* |
37 | * Khmer shaper. |
38 | */ |
39 | |
40 | |
41 | static const hb_ot_map_feature_t |
42 | khmer_features[] = |
43 | { |
44 | /* |
45 | * Basic features. |
46 | * These features are applied all at once, before reordering, constrained |
47 | * to the syllable. |
48 | */ |
49 | {HB_TAG('p','r','e','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, |
50 | {HB_TAG('b','l','w','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, |
51 | {HB_TAG('a','b','v','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, |
52 | {HB_TAG('p','s','t','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, |
53 | {HB_TAG('c','f','a','r'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, |
54 | /* |
55 | * Other features. |
56 | * These features are applied all at once after clearing syllables. |
57 | */ |
58 | {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS}, |
59 | {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS}, |
60 | {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS}, |
61 | {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS}, |
62 | }; |
63 | |
64 | /* |
65 | * Must be in the same order as the khmer_features array. |
66 | */ |
67 | enum { |
68 | KHMER_PREF, |
69 | KHMER_BLWF, |
70 | KHMER_ABVF, |
71 | KHMER_PSTF, |
72 | KHMER_CFAR, |
73 | |
74 | _KHMER_PRES, |
75 | _KHMER_ABVS, |
76 | _KHMER_BLWS, |
77 | _KHMER_PSTS, |
78 | |
79 | KHMER_NUM_FEATURES, |
80 | KHMER_BASIC_FEATURES = _KHMER_PRES, /* Don't forget to update this! */ |
81 | }; |
82 | |
83 | static inline void |
84 | set_khmer_properties (hb_glyph_info_t &info) |
85 | { |
86 | hb_codepoint_t u = info.codepoint; |
87 | unsigned int type = hb_indic_get_categories (u); |
88 | |
89 | info.khmer_category() = (khmer_category_t) (type & 0xFFu); |
90 | } |
91 | |
92 | static bool |
93 | setup_syllables_khmer (const hb_ot_shape_plan_t *plan, |
94 | hb_font_t *font, |
95 | hb_buffer_t *buffer); |
96 | static bool |
97 | reorder_khmer (const hb_ot_shape_plan_t *plan, |
98 | hb_font_t *font, |
99 | hb_buffer_t *buffer); |
100 | |
101 | static void |
102 | collect_features_khmer (hb_ot_shape_planner_t *plan) |
103 | { |
104 | hb_ot_map_builder_t *map = &plan->map; |
105 | |
106 | /* Do this before any lookups have been applied. */ |
107 | map->add_gsub_pause (setup_syllables_khmer); |
108 | map->add_gsub_pause (reorder_khmer); |
109 | |
110 | /* Testing suggests that Uniscribe does NOT pause between basic |
111 | * features. Test with KhmerUI.ttf and the following three |
112 | * sequences: |
113 | * |
114 | * U+1789,U+17BC |
115 | * U+1789,U+17D2,U+1789 |
116 | * U+1789,U+17D2,U+1789,U+17BC |
117 | * |
118 | * https://github.com/harfbuzz/harfbuzz/issues/974 |
119 | */ |
120 | map->enable_feature (HB_TAG('l','o','c','l'), F_PER_SYLLABLE); |
121 | map->enable_feature (HB_TAG('c','c','m','p'), F_PER_SYLLABLE); |
122 | |
123 | unsigned int i = 0; |
124 | for (; i < KHMER_BASIC_FEATURES; i++) |
125 | map->add_feature (khmer_features[i]); |
126 | |
127 | /* https://github.com/harfbuzz/harfbuzz/issues/3531 */ |
128 | map->add_gsub_pause (hb_syllabic_clear_var); // Don't need syllables anymore, use stop to free buffer var |
129 | |
130 | for (; i < KHMER_NUM_FEATURES; i++) |
131 | map->add_feature (khmer_features[i]); |
132 | } |
133 | |
134 | static void |
135 | override_features_khmer (hb_ot_shape_planner_t *plan) |
136 | { |
137 | hb_ot_map_builder_t *map = &plan->map; |
138 | |
139 | /* Khmer spec has 'clig' as part of required shaping features: |
140 | * "Apply feature 'clig' to form ligatures that are desired for |
141 | * typographical correctness.", hence in overrides... */ |
142 | map->enable_feature (HB_TAG('c','l','i','g')); |
143 | |
144 | /* Uniscribe does not apply 'kern' in Khmer. */ |
145 | if (hb_options ().uniscribe_bug_compatible) |
146 | { |
147 | map->disable_feature (HB_TAG('k','e','r','n')); |
148 | } |
149 | |
150 | map->disable_feature (HB_TAG('l','i','g','a')); |
151 | } |
152 | |
153 | |
154 | struct khmer_shape_plan_t |
155 | { |
156 | hb_mask_t mask_array[KHMER_NUM_FEATURES]; |
157 | }; |
158 | |
159 | static void * |
160 | data_create_khmer (const hb_ot_shape_plan_t *plan) |
161 | { |
162 | khmer_shape_plan_t *khmer_plan = (khmer_shape_plan_t *) hb_calloc (1, sizeof (khmer_shape_plan_t)); |
163 | if (unlikely (!khmer_plan)) |
164 | return nullptr; |
165 | |
166 | for (unsigned int i = 0; i < ARRAY_LENGTH (khmer_plan->mask_array); i++) |
167 | khmer_plan->mask_array[i] = (khmer_features[i].flags & F_GLOBAL) ? |
168 | 0 : plan->map.get_1_mask (khmer_features[i].tag); |
169 | |
170 | return khmer_plan; |
171 | } |
172 | |
173 | static void |
174 | data_destroy_khmer (void *data) |
175 | { |
176 | hb_free (data); |
177 | } |
178 | |
179 | static void |
180 | setup_masks_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED, |
181 | hb_buffer_t *buffer, |
182 | hb_font_t *font HB_UNUSED) |
183 | { |
184 | HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category); |
185 | |
186 | /* We cannot setup masks here. We save information about characters |
187 | * and setup masks later on in a pause-callback. */ |
188 | |
189 | unsigned int count = buffer->len; |
190 | hb_glyph_info_t *info = buffer->info; |
191 | for (unsigned int i = 0; i < count; i++) |
192 | set_khmer_properties (info[i]); |
193 | } |
194 | |
195 | static bool |
196 | setup_syllables_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED, |
197 | hb_font_t *font HB_UNUSED, |
198 | hb_buffer_t *buffer) |
199 | { |
200 | HB_BUFFER_ALLOCATE_VAR (buffer, syllable); |
201 | find_syllables_khmer (buffer); |
202 | foreach_syllable (buffer, start, end) |
203 | buffer->unsafe_to_break (start, end); |
204 | return false; |
205 | } |
206 | |
207 | |
208 | /* Rules from: |
209 | * https://docs.microsoft.com/en-us/typography/script-development/devanagari */ |
210 | |
211 | static void |
212 | reorder_consonant_syllable (const hb_ot_shape_plan_t *plan, |
213 | hb_face_t *face HB_UNUSED, |
214 | hb_buffer_t *buffer, |
215 | unsigned int start, unsigned int end) |
216 | { |
217 | const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data; |
218 | hb_glyph_info_t *info = buffer->info; |
219 | |
220 | /* Setup masks. */ |
221 | { |
222 | /* Post-base */ |
223 | hb_mask_t mask = khmer_plan->mask_array[KHMER_BLWF] | |
224 | khmer_plan->mask_array[KHMER_ABVF] | |
225 | khmer_plan->mask_array[KHMER_PSTF]; |
226 | for (unsigned int i = start + 1; i < end; i++) |
227 | info[i].mask |= mask; |
228 | } |
229 | |
230 | unsigned int num_coengs = 0; |
231 | for (unsigned int i = start + 1; i < end; i++) |
232 | { |
233 | /* """ |
234 | * When a COENG + (Cons | IndV) combination are found (and subscript count |
235 | * is less than two) the character combination is handled according to the |
236 | * subscript type of the character following the COENG. |
237 | * |
238 | * ... |
239 | * |
240 | * Subscript Type 2 - The COENG + RO characters are reordered to immediately |
241 | * before the base glyph. Then the COENG + RO characters are assigned to have |
242 | * the 'pref' OpenType feature applied to them. |
243 | * """ |
244 | */ |
245 | if (info[i].khmer_category() == K_Cat(H) && num_coengs <= 2 && i + 1 < end) |
246 | { |
247 | num_coengs++; |
248 | |
249 | if (info[i + 1].khmer_category() == K_Cat(Ra)) |
250 | { |
251 | for (unsigned int j = 0; j < 2; j++) |
252 | info[i + j].mask |= khmer_plan->mask_array[KHMER_PREF]; |
253 | |
254 | /* Move the Coeng,Ro sequence to the start. */ |
255 | buffer->merge_clusters (start, i + 2); |
256 | hb_glyph_info_t t0 = info[i]; |
257 | hb_glyph_info_t t1 = info[i + 1]; |
258 | memmove (&info[start + 2], &info[start], (i - start) * sizeof (info[0])); |
259 | info[start] = t0; |
260 | info[start + 1] = t1; |
261 | |
262 | /* Mark the subsequent stuff with 'cfar'. Used in Khmer. |
263 | * Read the feature spec. |
264 | * This allows distinguishing the following cases with MS Khmer fonts: |
265 | * U+1784,U+17D2,U+179A,U+17D2,U+1782 |
266 | * U+1784,U+17D2,U+1782,U+17D2,U+179A |
267 | */ |
268 | if (khmer_plan->mask_array[KHMER_CFAR]) |
269 | for (unsigned int j = i + 2; j < end; j++) |
270 | info[j].mask |= khmer_plan->mask_array[KHMER_CFAR]; |
271 | |
272 | num_coengs = 2; /* Done. */ |
273 | } |
274 | } |
275 | |
276 | /* Reorder left matra piece. */ |
277 | else if (info[i].khmer_category() == K_Cat(VPre)) |
278 | { |
279 | /* Move to the start. */ |
280 | buffer->merge_clusters (start, i + 1); |
281 | hb_glyph_info_t t = info[i]; |
282 | memmove (&info[start + 1], &info[start], (i - start) * sizeof (info[0])); |
283 | info[start] = t; |
284 | } |
285 | } |
286 | } |
287 | |
288 | static void |
289 | reorder_syllable_khmer (const hb_ot_shape_plan_t *plan, |
290 | hb_face_t *face, |
291 | hb_buffer_t *buffer, |
292 | unsigned int start, unsigned int end) |
293 | { |
294 | khmer_syllable_type_t syllable_type = (khmer_syllable_type_t) (buffer->info[start].syllable() & 0x0F); |
295 | switch (syllable_type) |
296 | { |
297 | case khmer_broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */ |
298 | case khmer_consonant_syllable: |
299 | reorder_consonant_syllable (plan, face, buffer, start, end); |
300 | break; |
301 | |
302 | case khmer_non_khmer_cluster: |
303 | break; |
304 | } |
305 | } |
306 | |
307 | static bool |
308 | reorder_khmer (const hb_ot_shape_plan_t *plan, |
309 | hb_font_t *font, |
310 | hb_buffer_t *buffer) |
311 | { |
312 | bool ret = false; |
313 | if (buffer->message (font, "start reordering khmer" )) |
314 | { |
315 | if (hb_syllabic_insert_dotted_circles (font, buffer, |
316 | khmer_broken_cluster, |
317 | K_Cat(DOTTEDCIRCLE), |
318 | (unsigned) -1)) |
319 | ret = true; |
320 | |
321 | foreach_syllable (buffer, start, end) |
322 | reorder_syllable_khmer (plan, font->face, buffer, start, end); |
323 | (void) buffer->message (font, "end reordering khmer" ); |
324 | } |
325 | HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category); |
326 | |
327 | return ret; |
328 | } |
329 | |
330 | |
331 | static bool |
332 | decompose_khmer (const hb_ot_shape_normalize_context_t *c, |
333 | hb_codepoint_t ab, |
334 | hb_codepoint_t *a, |
335 | hb_codepoint_t *b) |
336 | { |
337 | switch (ab) |
338 | { |
339 | /* |
340 | * Decompose split matras that don't have Unicode decompositions. |
341 | */ |
342 | |
343 | /* Khmer */ |
344 | case 0x17BEu : *a = 0x17C1u; *b= 0x17BEu; return true; |
345 | case 0x17BFu : *a = 0x17C1u; *b= 0x17BFu; return true; |
346 | case 0x17C0u : *a = 0x17C1u; *b= 0x17C0u; return true; |
347 | case 0x17C4u : *a = 0x17C1u; *b= 0x17C4u; return true; |
348 | case 0x17C5u : *a = 0x17C1u; *b= 0x17C5u; return true; |
349 | } |
350 | |
351 | return (bool) c->unicode->decompose (ab, a, b); |
352 | } |
353 | |
354 | static bool |
355 | compose_khmer (const hb_ot_shape_normalize_context_t *c, |
356 | hb_codepoint_t a, |
357 | hb_codepoint_t b, |
358 | hb_codepoint_t *ab) |
359 | { |
360 | /* Avoid recomposing split matras. */ |
361 | if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a))) |
362 | return false; |
363 | |
364 | return (bool) c->unicode->compose (a, b, ab); |
365 | } |
366 | |
367 | |
368 | const hb_ot_shaper_t _hb_ot_shaper_khmer = |
369 | { |
370 | collect_features_khmer, |
371 | override_features_khmer, |
372 | data_create_khmer, |
373 | data_destroy_khmer, |
374 | nullptr, /* preprocess_text */ |
375 | nullptr, /* postprocess_glyphs */ |
376 | decompose_khmer, |
377 | compose_khmer, |
378 | setup_masks_khmer, |
379 | nullptr, /* reorder_marks */ |
380 | HB_TAG_NONE, /* gpos_tag */ |
381 | HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, |
382 | HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, |
383 | false, /* fallback_position */ |
384 | }; |
385 | |
386 | |
387 | #endif |
388 | |