hb-ot-shaper-khmer.cc source code [Godot/thirdparty/harfbuzz/src/hb-ot-shaper-khmer.cc]

1	/*
2	* Copyright © 2011,2012 Google, Inc.
3	*
4	* This is part of HarfBuzz, a text shaping library.
5	*
6	* Permission is hereby granted, without written agreement and without
7	* license or royalty fees, to use, copy, modify, and distribute this
8	* software and its documentation for any purpose, provided that the
9	* above copyright notice and the following two paragraphs appear in
10	* all copies of this software.
11	*
12	* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13	* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14	* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15	* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16	* DAMAGE.
17	*
18	* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19	* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20	* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21	* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22	* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23	*
24	* Google Author(s): Behdad Esfahbod
25	*/
26
27	#include "hb.hh"
28
29	#ifndef HB_NO_OT_SHAPE
30
31	#include "hb-ot-shaper-khmer-machine.hh"
32	#include "hb-ot-shaper-indic.hh"
33	#include "hb-ot-layout.hh"
34
35
36	/*
37	* Khmer shaper.
38	*/
39
40
41	static const hb_ot_map_feature_t
42	khmer_features[] =
43	{
44	/*
45	* Basic features.
46	* These features are applied all at once, before reordering, constrained
47	* to the syllable.
48	*/
49	{HB_TAG(`'p'`,`'r'`,`'e'`,`'f'`), F_MANUAL_JOINERS \| F_PER_SYLLABLE},
50	{HB_TAG(`'b'`,`'l'`,`'w'`,`'f'`), F_MANUAL_JOINERS \| F_PER_SYLLABLE},
51	{HB_TAG(`'a'`,`'b'`,`'v'`,`'f'`), F_MANUAL_JOINERS \| F_PER_SYLLABLE},
52	{HB_TAG(`'p'`,`'s'`,`'t'`,`'f'`), F_MANUAL_JOINERS \| F_PER_SYLLABLE},
53	{HB_TAG(`'c'`,`'f'`,`'a'`,`'r'`), F_MANUAL_JOINERS \| F_PER_SYLLABLE},
54	/*
55	* Other features.
56	* These features are applied all at once after clearing syllables.
57	*/
58	{HB_TAG(`'p'`,`'r'`,`'e'`,`'s'`), F_GLOBAL_MANUAL_JOINERS},
59	{HB_TAG(`'a'`,`'b'`,`'v'`,`'s'`), F_GLOBAL_MANUAL_JOINERS},
60	{HB_TAG(`'b'`,`'l'`,`'w'`,`'s'`), F_GLOBAL_MANUAL_JOINERS},
61	{HB_TAG(`'p'`,`'s'`,`'t'`,`'s'`), F_GLOBAL_MANUAL_JOINERS},
62	};
63
64	/*
65	* Must be in the same order as the khmer_features array.
66	*/
67	enum {
68	KHMER_PREF,
69	KHMER_BLWF,
70	KHMER_ABVF,
71	KHMER_PSTF,
72	KHMER_CFAR,
73
74	_KHMER_PRES,
75	_KHMER_ABVS,
76	_KHMER_BLWS,
77	_KHMER_PSTS,
78
79	KHMER_NUM_FEATURES,
80	KHMER_BASIC_FEATURES = _KHMER_PRES, / Don't forget to update this! /
81	};
82
83	static inline void
84	set_khmer_properties (hb_glyph_info_t &info)
85	{
86	hb_codepoint_t u = info.codepoint;
87	unsigned int type = hb_indic_get_categories (u);
88
89	info.khmer_category() = (khmer_category_t) (type & `0xFFu`);
90	}
91
92	static bool
93	setup_syllables_khmer (const hb_ot_shape_plan_t *plan,
94	hb_font_t *font,
95	hb_buffer_t *buffer);
96	static bool
97	reorder_khmer (const hb_ot_shape_plan_t *plan,
98	hb_font_t *font,
99	hb_buffer_t *buffer);
100
101	static void
102	collect_features_khmer (hb_ot_shape_planner_t *plan)
103	{
104	hb_ot_map_builder_t *map = &plan->map;
105
106	/ Do this before any lookups have been applied. /
107	map->add_gsub_pause (setup_syllables_khmer);
108	map->add_gsub_pause (reorder_khmer);
109
110	/ Testing suggests that Uniscribe does NOT pause between basic*
111	* features. Test with KhmerUI.ttf and the following three
112	* sequences:
113	*
114	* U+1789,U+17BC
115	* U+1789,U+17D2,U+1789
116	* U+1789,U+17D2,U+1789,U+17BC
117	*
118	* https://github.com/harfbuzz/harfbuzz/issues/974
119	*/
120	map->enable_feature (HB_TAG(`'l'`,`'o'`,`'c'`,`'l'`), F_PER_SYLLABLE);
121	map->enable_feature (HB_TAG(`'c'`,`'c'`,`'m'`,`'p'`), F_PER_SYLLABLE);
122
123	unsigned int i = `0`;
124	for (; i < KHMER_BASIC_FEATURES; i++)
125	map->add_feature (khmer_features[i]);
126
127	/ https://github.com/harfbuzz/harfbuzz/issues/3531 /
128	map->add_gsub_pause (hb_syllabic_clear_var); // Don't need syllables anymore, use stop to free buffer var
129
130	for (; i < KHMER_NUM_FEATURES; i++)
131	map->add_feature (khmer_features[i]);
132	}
133
134	static void
135	override_features_khmer (hb_ot_shape_planner_t *plan)
136	{
137	hb_ot_map_builder_t *map = &plan->map;
138
139	/ Khmer spec has 'clig' as part of required shaping features:*
140	* "Apply feature 'clig' to form ligatures that are desired for
141	* typographical correctness.", hence in overrides... */
142	map->enable_feature (HB_TAG(`'c'`,`'l'`,`'i'`,`'g'`));
143
144	/ Uniscribe does not apply 'kern' in Khmer. /
145	if (hb_options ().uniscribe_bug_compatible)
146	{
147	map->disable_feature (HB_TAG(`'k'`,`'e'`,`'r'`,`'n'`));
148	}
149
150	map->disable_feature (HB_TAG(`'l'`,`'i'`,`'g'`,`'a'`));
151	}
152
153
154	struct khmer_shape_plan_t
155	{
156	hb_mask_t mask_array[KHMER_NUM_FEATURES];
157	};
158
159	static void *
160	data_create_khmer (const hb_ot_shape_plan_t *plan)
161	{
162	khmer_shape_plan_t khmer_plan = (khmer_shape_plan_t ) hb_calloc (`1`, sizeof (khmer_shape_plan_t));
163	if (unlikely (!khmer_plan))
164	return nullptr;
165
166	for (unsigned int i = `0`; i < ARRAY_LENGTH (khmer_plan->mask_array); i++)
167	khmer_plan->mask_array[i] = (khmer_features[i].flags & F_GLOBAL) ?
168	`0` : plan->map.get_1_mask (khmer_features[i].tag);
169
170	return khmer_plan;
171	}
172
173	static void
174	data_destroy_khmer (void *data)
175	{
176	hb_free (data);
177	}
178
179	static void
180	setup_masks_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
181	hb_buffer_t *buffer,
182	hb_font_t *font HB_UNUSED)
183	{
184	HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category);
185
186	/ We cannot setup masks here. We save information about characters*
187	* and setup masks later on in a pause-callback. */
188
189	unsigned int count = buffer->len;
190	hb_glyph_info_t *info = buffer->info;
191	for (unsigned int i = `0`; i < count; i++)
192	set_khmer_properties (info[i]);
193	}
194
195	static bool
196	setup_syllables_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
197	hb_font_t *font HB_UNUSED,
198	hb_buffer_t *buffer)
199	{
200	HB_BUFFER_ALLOCATE_VAR (buffer, syllable);
201	find_syllables_khmer (buffer);
202	foreach_syllable (buffer, start, end)
203	buffer->unsafe_to_break (start, end);
204	return false;
205	}
206
207
208	/ Rules from:*
209	* https://docs.microsoft.com/en-us/typography/script-development/devanagari */
210
211	static void
212	reorder_consonant_syllable (const hb_ot_shape_plan_t *plan,
213	hb_face_t *face HB_UNUSED,
214	hb_buffer_t *buffer,
215	unsigned int start, unsigned int end)
216	{
217	const khmer_shape_plan_t khmer_plan = (const* khmer_shape_plan_t *) plan->data;
218	hb_glyph_info_t *info = buffer->info;
219
220	/ Setup masks. /
221	{
222	/ Post-base /
223	hb_mask_t mask = khmer_plan->mask_array[KHMER_BLWF] \|
224	khmer_plan->mask_array[KHMER_ABVF] \|
225	khmer_plan->mask_array[KHMER_PSTF];
226	for (unsigned int i = start + `1`; i < end; i++)
227	info[i].mask \|= mask;
228	}
229
230	unsigned int num_coengs = `0`;
231	for (unsigned int i = start + `1`; i < end; i++)
232	{
233	/ """*
234	* When a COENG + (Cons \| IndV) combination are found (and subscript count
235	* is less than two) the character combination is handled according to the
236	* subscript type of the character following the COENG.
237	*
238	* ...
239	*
240	* Subscript Type 2 - The COENG + RO characters are reordered to immediately
241	* before the base glyph. Then the COENG + RO characters are assigned to have
242	* the 'pref' OpenType feature applied to them.
243	* """
244	*/
245	if (info[i].khmer_category() == K_Cat(H) && num_coengs <= `2` && i + `1` < end)
246	{
247	num_coengs++;
248
249	if (info[i + `1`].khmer_category() == K_Cat(Ra))
250	{
251	for (unsigned int j = `0`; j < `2`; j++)
252	info[i + j].mask \|= khmer_plan->mask_array[KHMER_PREF];
253
254	/ Move the Coeng,Ro sequence to the start. /
255	buffer->merge_clusters (start, i + `2`);
256	hb_glyph_info_t t0 = info[i];
257	hb_glyph_info_t t1 = info[i + `1`];
258	memmove (&info[start + `2`], &info[start], (i - start) * sizeof (info[`0`]));
259	info[start] = t0;
260	info[start + `1`] = t1;
261
262	/ Mark the subsequent stuff with 'cfar'. Used in Khmer.*
263	* Read the feature spec.
264	* This allows distinguishing the following cases with MS Khmer fonts:
265	* U+1784,U+17D2,U+179A,U+17D2,U+1782
266	* U+1784,U+17D2,U+1782,U+17D2,U+179A
267	*/
268	if (khmer_plan->mask_array[KHMER_CFAR])
269	for (unsigned int j = i + `2`; j < end; j++)
270	info[j].mask \|= khmer_plan->mask_array[KHMER_CFAR];
271
272	num_coengs = `2`; / Done. /
273	}
274	}
275
276	/ Reorder left matra piece. /
277	else if (info[i].khmer_category() == K_Cat(VPre))
278	{
279	/ Move to the start. /
280	buffer->merge_clusters (start, i + `1`);
281	hb_glyph_info_t t = info[i];
282	memmove (&info[start + `1`], &info[start], (i - start) * sizeof (info[`0`]));
283	info[start] = t;
284	}
285	}
286	}
287
288	static void
289	reorder_syllable_khmer (const hb_ot_shape_plan_t *plan,
290	hb_face_t *face,
291	hb_buffer_t *buffer,
292	unsigned int start, unsigned int end)
293	{
294	khmer_syllable_type_t syllable_type = (khmer_syllable_type_t) (buffer->info[start].syllable() & `0x0F`);
295	switch (syllable_type)
296	{
297	case khmer_broken_cluster: / We already inserted dotted-circles, so just call the consonant_syllable. /
298	case khmer_consonant_syllable:
299	reorder_consonant_syllable (plan, face, buffer, start, end);
300	break;
301
302	case khmer_non_khmer_cluster:
303	break;
304	}
305	}
306
307	static bool
308	reorder_khmer (const hb_ot_shape_plan_t *plan,
309	hb_font_t *font,
310	hb_buffer_t *buffer)
311	{
312	bool ret = false;
313	if (buffer->message (font, "start reordering khmer"))
314	{
315	if (hb_syllabic_insert_dotted_circles (font, buffer,
316	khmer_broken_cluster,
317	K_Cat(DOTTEDCIRCLE),
318	(unsigned) -`1`))
319	ret = true;
320
321	foreach_syllable (buffer, start, end)
322	reorder_syllable_khmer (plan, font->face, buffer, start, end);
323	(void) buffer->message (font, "end reordering khmer");
324	}
325	HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category);
326
327	return ret;
328	}
329
330
331	static bool
332	decompose_khmer (const hb_ot_shape_normalize_context_t *c,
333	hb_codepoint_t ab,
334	hb_codepoint_t *a,
335	hb_codepoint_t *b)
336	{
337	switch (ab)
338	{
339	/*
340	* Decompose split matras that don't have Unicode decompositions.
341	*/
342
343	/ Khmer /
344	case `0x17BEu` : a = `0x17C1u`; b= `0x17BEu`; return true;
345	case `0x17BFu` : a = `0x17C1u`; b= `0x17BFu`; return true;
346	case `0x17C0u` : a = `0x17C1u`; b= `0x17C0u`; return true;
347	case `0x17C4u` : a = `0x17C1u`; b= `0x17C4u`; return true;
348	case `0x17C5u` : a = `0x17C1u`; b= `0x17C5u`; return true;
349	}
350
351	return (bool) c->unicode->decompose (ab, a, b);
352	}
353
354	static bool
355	compose_khmer (const hb_ot_shape_normalize_context_t *c,
356	hb_codepoint_t a,
357	hb_codepoint_t b,
358	hb_codepoint_t *ab)
359	{
360	/ Avoid recomposing split matras. /
361	if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
362	return false;
363
364	return (bool) c->unicode->compose (a, b, ab);
365	}
366
367
368	const hb_ot_shaper_t _hb_ot_shaper_khmer =
369	{
370	collect_features_khmer,
371	override_features_khmer,
372	data_create_khmer,
373	data_destroy_khmer,
374	nullptr, / preprocess_text /
375	nullptr, / postprocess_glyphs /
376	decompose_khmer,
377	compose_khmer,
378	setup_masks_khmer,
379	nullptr, / reorder_marks /
380	HB_TAG_NONE, / gpos_tag /
381	HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
382	HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
383	false, / fallback_position /
384	};
385
386
387	#endif
388

Browse the source code of Godot/thirdparty/harfbuzz/src/hb-ot-shaper-khmer.cc