hb-ot-shape-complex-khmer.cc source code [MuPDF/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer.cc]

1	/*
2	* Copyright © 2011,2012 Google, Inc.
3	*
4	* This is part of HarfBuzz, a text shaping library.
5	*
6	* Permission is hereby granted, without written agreement and without
7	* license or royalty fees, to use, copy, modify, and distribute this
8	* software and its documentation for any purpose, provided that the
9	* above copyright notice and the following two paragraphs appear in
10	* all copies of this software.
11	*
12	* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13	* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14	* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15	* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16	* DAMAGE.
17	*
18	* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19	* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20	* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21	* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22	* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23	*
24	* Google Author(s): Behdad Esfahbod
25	*/
26
27	#include "hb-ot-shape-complex-khmer.hh"
28	#include "hb-ot-layout.hh"
29
30
31	/*
32	* Khmer shaper.
33	*/
34
35	struct feature_list_t {
36	hb_tag_t tag;
37	hb_ot_map_feature_flags_t flags;
38	};
39
40	static const feature_list_t
41	khmer_features[] =
42	{
43	/*
44	* Basic features.
45	* These features are applied in order, one at a time, after reordering.
46	*/
47	{HB_TAG(`'p'`,`'r'`,`'e'`,`'f'`), F_NONE},
48	{HB_TAG(`'b'`,`'l'`,`'w'`,`'f'`), F_NONE},
49	{HB_TAG(`'a'`,`'b'`,`'v'`,`'f'`), F_NONE},
50	{HB_TAG(`'p'`,`'s'`,`'t'`,`'f'`), F_NONE},
51	{HB_TAG(`'c'`,`'f'`,`'a'`,`'r'`), F_NONE},
52	/*
53	* Other features.
54	* These features are applied all at once.
55	*/
56	{HB_TAG(`'p'`,`'r'`,`'e'`,`'s'`), F_GLOBAL},
57	{HB_TAG(`'a'`,`'b'`,`'v'`,`'s'`), F_GLOBAL},
58	{HB_TAG(`'b'`,`'l'`,`'w'`,`'s'`), F_GLOBAL},
59	{HB_TAG(`'p'`,`'s'`,`'t'`,`'s'`), F_GLOBAL},
60	/ Positioning features, though we don't care about the types. /
61	{HB_TAG(`'d'`,`'i'`,`'s'`,`'t'`), F_GLOBAL},
62	{HB_TAG(`'a'`,`'b'`,`'v'`,`'m'`), F_GLOBAL},
63	{HB_TAG(`'b'`,`'l'`,`'w'`,`'m'`), F_GLOBAL},
64	};
65
66	/*
67	* Must be in the same order as the khmer_features array.
68	*/
69	enum {
70	PREF,
71	BLWF,
72	ABVF,
73	PSTF,
74	CFAR,
75
76	_PRES,
77	_ABVS,
78	_BLWS,
79	_PSTS,
80	_DIST,
81	_ABVM,
82	_BLWM,
83
84	KHMER_NUM_FEATURES,
85	KHMER_BASIC_FEATURES = _PRES / Don't forget to update this! /
86	};
87
88	static void
89	setup_syllables (const hb_ot_shape_plan_t *plan,
90	hb_font_t *font,
91	hb_buffer_t *buffer);
92	static void
93	reorder (const hb_ot_shape_plan_t *plan,
94	hb_font_t *font,
95	hb_buffer_t *buffer);
96	static void
97	clear_syllables (const hb_ot_shape_plan_t *plan,
98	hb_font_t *font,
99	hb_buffer_t *buffer);
100
101	static void
102	collect_features_khmer (hb_ot_shape_planner_t *plan)
103	{
104	hb_ot_map_builder_t *map = &plan->map;
105
106	/ Do this before any lookups have been applied. /
107	map->add_gsub_pause (setup_syllables);
108	map->add_gsub_pause (reorder);
109
110	/ Testing suggests that Uniscribe does NOT pause between basic*
111	* features. Test with KhmerUI.ttf and the following three
112	* sequences:
113	*
114	* U+1789,U+17BC
115	* U+1789,U+17D2,U+1789
116	* U+1789,U+17D2,U+1789,U+17BC
117	*
118	* https://github.com/harfbuzz/harfbuzz/issues/974
119	*/
120	map->add_global_bool_feature (HB_TAG(`'l'`,`'o'`,`'c'`,`'l'`));
121	map->add_global_bool_feature (HB_TAG(`'c'`,`'c'`,`'m'`,`'p'`));
122
123	unsigned int i = `0`;
124	for (; i < KHMER_BASIC_FEATURES; i++) {
125	map->add_feature (khmer_features[i].tag, `1`, khmer_features[i].flags \| F_MANUAL_ZWJ \| F_MANUAL_ZWNJ);
126	}
127
128	map->add_gsub_pause (clear_syllables);
129
130	for (; i < KHMER_NUM_FEATURES; i++) {
131	map->add_feature (khmer_features[i].tag, `1`, khmer_features[i].flags \| F_MANUAL_ZWJ \| F_MANUAL_ZWNJ);
132	}
133
134	map->add_global_bool_feature (HB_TAG(`'c'`,`'a'`,`'l'`,`'t'`));
135	map->add_global_bool_feature (HB_TAG(`'c'`,`'l'`,`'i'`,`'g'`));
136
137	}
138
139	static void
140	override_features_khmer (hb_ot_shape_planner_t *plan)
141	{
142	/ Uniscribe does not apply 'kern' in Khmer. /
143	if (hb_options ().uniscribe_bug_compatible)
144	{
145	plan->map.add_feature (HB_TAG(`'k'`,`'e'`,`'r'`,`'n'`), `0`, F_GLOBAL);
146	}
147
148	plan->map.add_feature (HB_TAG(`'l'`,`'i'`,`'g'`,`'a'`), `0`, F_GLOBAL);
149	}
150
151
152	struct would_substitute_feature_t
153	{
154	inline void init (const hb_ot_map_t map, hb_tag_t feature_tag, bool* zero_context_)
155	{
156	zero_context = zero_context_;
157	map->get_stage_lookups (`0`/GSUB/,
158	map->get_feature_stage (`0`/GSUB/, feature_tag),
159	&lookups, &count);
160	}
161
162	inline bool would_substitute (const hb_codepoint_t *glyphs,
163	unsigned int glyphs_count,
164	hb_face_t face) const*
165	{
166	for (unsigned int i = `0`; i < count; i++)
167	if (hb_ot_layout_lookup_would_substitute_fast (face, lookups[i].index, glyphs, glyphs_count, zero_context))
168	return true;
169	return false;
170	}
171
172	private:
173	const hb_ot_map_t::lookup_map_t *lookups;
174	unsigned int count;
175	bool zero_context;
176	};
177
178	struct khmer_shape_plan_t
179	{
180	ASSERT_POD ();
181
182	inline bool get_virama_glyph (hb_font_t font, hb_codepoint_t pglyph) const
183	{
184	hb_codepoint_t glyph = virama_glyph;
185	if (unlikely (virama_glyph == (hb_codepoint_t) -`1`))
186	{
187	if (!font->get_nominal_glyph (`0x17D2u`, &glyph))
188	glyph = `0`;
189	/ Technically speaking, the spec says we should apply 'locl' to virama too.*
190	* Maybe one day... */
191
192	/ Our get_nominal_glyph() function needs a font, so we can't get the virama glyph*
193	* during shape planning... Instead, overwrite it here. It's safe. Don't worry! */
194	virama_glyph = glyph;
195	}
196
197	*pglyph = glyph;
198	return glyph != `0`;
199	}
200
201	mutable hb_codepoint_t virama_glyph;
202
203	would_substitute_feature_t pref;
204
205	hb_mask_t mask_array[KHMER_NUM_FEATURES];
206	};
207
208	static void *
209	data_create_khmer (const hb_ot_shape_plan_t *plan)
210	{
211	khmer_shape_plan_t khmer_plan = (khmer_shape_plan_t ) calloc (`1`, sizeof (khmer_shape_plan_t));
212	if (unlikely (!khmer_plan))
213	return nullptr;
214
215	khmer_plan->virama_glyph = (hb_codepoint_t) -`1`;
216
217	khmer_plan->pref.init (&plan->map, HB_TAG(`'p'`,`'r'`,`'e'`,`'f'`), true);
218
219	for (unsigned int i = `0`; i < ARRAY_LENGTH (khmer_plan->mask_array); i++)
220	khmer_plan->mask_array[i] = (khmer_features[i].flags & F_GLOBAL) ?
221	`0` : plan->map.get_1_mask (khmer_features[i].tag);
222
223	return khmer_plan;
224	}
225
226	static void
227	data_destroy_khmer (void *data)
228	{
229	free (data);
230	}
231
232
233	enum syllable_type_t {
234	consonant_syllable,
235	broken_cluster,
236	non_khmer_cluster,
237	};
238
239	#include "hb-ot-shape-complex-khmer-machine.hh"
240
241	static void
242	setup_masks_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED,
243	hb_buffer_t *buffer,
244	hb_font_t *font HB_UNUSED)
245	{
246	HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category);
247	HB_BUFFER_ALLOCATE_VAR (buffer, khmer_position);
248
249	/ We cannot setup masks here. We save information about characters*
250	* and setup masks later on in a pause-callback. */
251
252	unsigned int count = buffer->len;
253	hb_glyph_info_t *info = buffer->info;
254	for (unsigned int i = `0`; i < count; i++)
255	set_khmer_properties (info[i]);
256	}
257
258	static void
259	setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
260	hb_font_t *font HB_UNUSED,
261	hb_buffer_t *buffer)
262	{
263	find_syllables (buffer);
264	foreach_syllable (buffer, start, end)
265	buffer->unsafe_to_break (start, end);
266	}
267
268
269	/ Rules from:*
270	* https://docs.microsoft.com/en-us/typography/script-development/devanagari */
271
272	static void
273	reorder_consonant_syllable (const hb_ot_shape_plan_t *plan,
274	hb_face_t *face,
275	hb_buffer_t *buffer,
276	unsigned int start, unsigned int end)
277	{
278	const khmer_shape_plan_t khmer_plan = (const* khmer_shape_plan_t *) plan->data;
279	hb_glyph_info_t *info = buffer->info;
280
281	/ Setup masks. /
282	{
283	/ Post-base /
284	hb_mask_t mask = khmer_plan->mask_array[BLWF] \| khmer_plan->mask_array[ABVF] \| khmer_plan->mask_array[PSTF];
285	for (unsigned int i = start + `1`; i < end; i++)
286	info[i].mask \|= mask;
287	}
288
289	unsigned int num_coengs = `0`;
290	for (unsigned int i = start + `1`; i < end; i++)
291	{
292	/ """*
293	* When a COENG + (Cons \| IndV) combination are found (and subscript count
294	* is less than two) the character combination is handled according to the
295	* subscript type of the character following the COENG.
296	*
297	* ...
298	*
299	* Subscript Type 2 - The COENG + RO characters are reordered to immediately
300	* before the base glyph. Then the COENG + RO characters are assigned to have
301	* the 'pref' OpenType feature applied to them.
302	* """
303	*/
304	if (info[i].khmer_category() == OT_Coeng && num_coengs <= `2` && i + `1` < end)
305	{
306	num_coengs++;
307
308	if (info[i + `1`].khmer_category() == OT_Ra)
309	{
310	for (unsigned int j = `0`; j < `2`; j++)
311	info[i + j].mask \|= khmer_plan->mask_array[PREF];
312
313	/ Move the Coeng,Ro sequence to the start. /
314	buffer->merge_clusters (start, i + `2`);
315	hb_glyph_info_t t0 = info[i];
316	hb_glyph_info_t t1 = info[i + `1`];
317	memmove (&info[start + `2`], &info[start], (i - start) * sizeof (info[`0`]));
318	info[start] = t0;
319	info[start + `1`] = t1;
320
321	/ Mark the subsequent stuff with 'cfar'. Used in Khmer.*
322	* Read the feature spec.
323	* This allows distinguishing the following cases with MS Khmer fonts:
324	* U+1784,U+17D2,U+179A,U+17D2,U+1782
325	* U+1784,U+17D2,U+1782,U+17D2,U+179A
326	*/
327	if (khmer_plan->mask_array[CFAR])
328	for (unsigned int j = i + `2`; j < end; j++)
329	info[j].mask \|= khmer_plan->mask_array[CFAR];
330
331	num_coengs = `2`; / Done. /
332	}
333	}
334
335	/ Reorder left matra piece. /
336	else if (info[i].khmer_position() == POS_PRE_M)
337	{
338	/ Move to the start. /
339	buffer->merge_clusters (start, i + `1`);
340	hb_glyph_info_t t = info[i];
341	memmove (&info[start + `1`], &info[start], (i - start) * sizeof (info[`0`]));
342	info[start] = t;
343	}
344	}
345	}
346
347	static void
348	initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
349	hb_face_t *face,
350	hb_buffer_t *buffer,
351	unsigned int start, unsigned int end)
352	{
353	syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & `0x0F`);
354	switch (syllable_type)
355	{
356	case broken_cluster: / We already inserted dotted-circles, so just call the consonant_syllable. /
357	case consonant_syllable:
358	reorder_consonant_syllable (plan, face, buffer, start, end);
359	break;
360
361	case non_khmer_cluster:
362	break;
363	}
364	}
365
366	static inline void
367	insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
368	hb_font_t *font,
369	hb_buffer_t *buffer)
370	{
371	/ Note: This loop is extra overhead, but should not be measurable. /
372	bool has_broken_syllables = false;
373	unsigned int count = buffer->len;
374	hb_glyph_info_t *info = buffer->info;
375	for (unsigned int i = `0`; i < count; i++)
376	if ((info[i].syllable() & `0x0F`) == broken_cluster)
377	{
378	has_broken_syllables = true;
379	break;
380	}
381	if (likely (!has_broken_syllables))
382	return;
383
384
385	hb_codepoint_t dottedcircle_glyph;
386	if (!font->get_nominal_glyph (`0x25CCu`, &dottedcircle_glyph))
387	return;
388
389	hb_glyph_info_t dottedcircle = {`0`};
390	dottedcircle.codepoint = `0x25CCu`;
391	set_khmer_properties (dottedcircle);
392	dottedcircle.codepoint = dottedcircle_glyph;
393
394	buffer->clear_output ();
395
396	buffer->idx = `0`;
397	unsigned int last_syllable = `0`;
398	while (buffer->idx < buffer->len && buffer->successful)
399	{
400	unsigned int syllable = buffer->cur().syllable();
401	syllable_type_t syllable_type = (syllable_type_t) (syllable & `0x0F`);
402	if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
403	{
404	last_syllable = syllable;
405
406	hb_glyph_info_t ginfo = dottedcircle;
407	ginfo.cluster = buffer->cur().cluster;
408	ginfo.mask = buffer->cur().mask;
409	ginfo.syllable() = buffer->cur().syllable();
410	/ TODO Set glyph_props? /
411
412	/ Insert dottedcircle after possible Repha. /
413	while (buffer->idx < buffer->len && buffer->successful &&
414	last_syllable == buffer->cur().syllable() &&
415	buffer->cur().khmer_category() == OT_Repha)
416	buffer->next_glyph ();
417
418	buffer->output_info (ginfo);
419	}
420	else
421	buffer->next_glyph ();
422	}
423
424	buffer->swap_buffers ();
425	}
426
427	static void
428	reorder (const hb_ot_shape_plan_t *plan,
429	hb_font_t *font,
430	hb_buffer_t *buffer)
431	{
432	insert_dotted_circles (plan, font, buffer);
433
434	foreach_syllable (buffer, start, end)
435	initial_reordering_syllable (plan, font->face, buffer, start, end);
436
437	HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category);
438	HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_position);
439	}
440
441	static void
442	clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
443	hb_font_t *font HB_UNUSED,
444	hb_buffer_t *buffer)
445	{
446	/ TODO: In USE, we clear syllables right after reorder. Figure out*
447	* what Uniscribe does. */
448	hb_glyph_info_t *info = buffer->info;
449	unsigned int count = buffer->len;
450	for (unsigned int i = `0`; i < count; i++)
451	info[i].syllable() = `0`;
452	}
453
454
455	static bool
456	decompose_khmer (const hb_ot_shape_normalize_context_t *c,
457	hb_codepoint_t ab,
458	hb_codepoint_t *a,
459	hb_codepoint_t *b)
460	{
461	switch (ab)
462	{
463	/*
464	* Decompose split matras that don't have Unicode decompositions.
465	*/
466
467	/ Khmer /
468	case `0x17BEu` : a = `0x17C1u`; b= `0x17BEu`; return true;
469	case `0x17BFu` : a = `0x17C1u`; b= `0x17BFu`; return true;
470	case `0x17C0u` : a = `0x17C1u`; b= `0x17C0u`; return true;
471	case `0x17C4u` : a = `0x17C1u`; b= `0x17C4u`; return true;
472	case `0x17C5u` : a = `0x17C1u`; b= `0x17C5u`; return true;
473	}
474
475	return (bool) c->unicode->decompose (ab, a, b);
476	}
477
478	static bool
479	compose_khmer (const hb_ot_shape_normalize_context_t *c,
480	hb_codepoint_t a,
481	hb_codepoint_t b,
482	hb_codepoint_t *ab)
483	{
484	/ Avoid recomposing split matras. /
485	if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
486	return false;
487
488	return (bool) c->unicode->compose (a, b, ab);
489	}
490
491
492	const hb_ot_complex_shaper_t _hb_ot_complex_shaper_khmer =
493	{
494	collect_features_khmer,
495	override_features_khmer,
496	data_create_khmer,
497	data_destroy_khmer,
498	nullptr, / preprocess_text /
499	nullptr, / postprocess_glyphs /
500	HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
501	decompose_khmer,
502	compose_khmer,
503	setup_masks_khmer,
504	nullptr, / disable_otl /
505	nullptr, / reorder_marks /
506	HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
507	false, / fallback_position /
508	};
509

Browse the source code of MuPDF/thirdparty/harfbuzz/src/hb-ot-shape-complex-khmer.cc