hb-ot-shape-complex-indic.cc source code [MuPDF/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic.cc]

1	/*
2	* Copyright © 2011,2012 Google, Inc.
3	*
4	* This is part of HarfBuzz, a text shaping library.
5	*
6	* Permission is hereby granted, without written agreement and without
7	* license or royalty fees, to use, copy, modify, and distribute this
8	* software and its documentation for any purpose, provided that the
9	* above copyright notice and the following two paragraphs appear in
10	* all copies of this software.
11	*
12	* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13	* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14	* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15	* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16	* DAMAGE.
17	*
18	* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19	* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20	* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21	* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22	* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23	*
24	* Google Author(s): Behdad Esfahbod
25	*/
26
27	#include "hb-ot-shape-complex-indic.hh"
28	#include "hb-ot-layout.hh"
29
30
31	/*
32	* Indic shaper.
33	*/
34
35
36	/*
37	* Indic configurations. Note that we do not want to keep every single script-specific
38	* behavior in these tables necessarily. This should mainly be used for per-script
39	* properties that are cheaper keeping here, than in the code. Ie. if, say, one and
40	* only one script has an exception, that one script can be if'ed directly in the code,
41	* instead of adding a new flag in these structs.
42	*/
43
44	enum base_position_t {
45	BASE_POS_LAST_SINHALA,
46	BASE_POS_LAST
47	};
48	enum reph_position_t {
49	REPH_POS_AFTER_MAIN = POS_AFTER_MAIN,
50	REPH_POS_BEFORE_SUB = POS_BEFORE_SUB,
51	REPH_POS_AFTER_SUB = POS_AFTER_SUB,
52	REPH_POS_BEFORE_POST = POS_BEFORE_POST,
53	REPH_POS_AFTER_POST = POS_AFTER_POST
54	};
55	enum reph_mode_t {
56	REPH_MODE_IMPLICIT, / Reph formed out of initial Ra,H sequence. /
57	REPH_MODE_EXPLICIT, / Reph formed out of initial Ra,H,ZWJ sequence. /
58	REPH_MODE_LOG_REPHA / Encoded Repha character, needs reordering. /
59	};
60	enum blwf_mode_t {
61	BLWF_MODE_PRE_AND_POST, / Below-forms feature applied to pre-base and post-base. /
62	BLWF_MODE_POST_ONLY / Below-forms feature applied to post-base only. /
63	};
64	struct indic_config_t
65	{
66	hb_script_t script;
67	bool has_old_spec;
68	hb_codepoint_t virama;
69	base_position_t base_pos;
70	reph_position_t reph_pos;
71	reph_mode_t reph_mode;
72	blwf_mode_t blwf_mode;
73	};
74
75	static const indic_config_t indic_configs[] =
76	{
77	/ Default. Should be first. /
78	{HB_SCRIPT_INVALID, false, `0`,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
79	{HB_SCRIPT_DEVANAGARI,true, `0x094Du`,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
80	{HB_SCRIPT_BENGALI, true, `0x09CDu`,BASE_POS_LAST, REPH_POS_AFTER_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
81	{HB_SCRIPT_GURMUKHI, true, `0x0A4Du`,BASE_POS_LAST, REPH_POS_BEFORE_SUB, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
82	{HB_SCRIPT_GUJARATI, true, `0x0ACDu`,BASE_POS_LAST, REPH_POS_BEFORE_POST,REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
83	{HB_SCRIPT_ORIYA, true, `0x0B4Du`,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
84	{HB_SCRIPT_TAMIL, true, `0x0BCDu`,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_PRE_AND_POST},
85	{HB_SCRIPT_TELUGU, true, `0x0C4Du`,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_POST_ONLY},
86	{HB_SCRIPT_KANNADA, true, `0x0CCDu`,BASE_POS_LAST, REPH_POS_AFTER_POST, REPH_MODE_IMPLICIT, BLWF_MODE_POST_ONLY},
87	{HB_SCRIPT_MALAYALAM, true, `0x0D4Du`,BASE_POS_LAST, REPH_POS_AFTER_MAIN, REPH_MODE_LOG_REPHA,BLWF_MODE_PRE_AND_POST},
88	{HB_SCRIPT_SINHALA, false,`0x0DCAu`,BASE_POS_LAST_SINHALA,
89	REPH_POS_AFTER_POST, REPH_MODE_EXPLICIT, BLWF_MODE_PRE_AND_POST},
90	};
91
92
93
94	/*
95	* Indic shaper.
96	*/
97
98	struct feature_list_t {
99	hb_tag_t tag;
100	hb_ot_map_feature_flags_t flags;
101	};
102
103	static const feature_list_t
104	indic_features[] =
105	{
106	/*
107	* Basic features.
108	* These features are applied in order, one at a time, after initial_reordering.
109	*/
110	{HB_TAG(`'n'`,`'u'`,`'k'`,`'t'`), F_GLOBAL},
111	{HB_TAG(`'a'`,`'k'`,`'h'`,`'n'`), F_GLOBAL},
112	{HB_TAG(`'r'`,`'p'`,`'h'`,`'f'`), F_NONE},
113	{HB_TAG(`'r'`,`'k'`,`'r'`,`'f'`), F_GLOBAL},
114	{HB_TAG(`'p'`,`'r'`,`'e'`,`'f'`), F_NONE},
115	{HB_TAG(`'b'`,`'l'`,`'w'`,`'f'`), F_NONE},
116	{HB_TAG(`'a'`,`'b'`,`'v'`,`'f'`), F_NONE},
117	{HB_TAG(`'h'`,`'a'`,`'l'`,`'f'`), F_NONE},
118	{HB_TAG(`'p'`,`'s'`,`'t'`,`'f'`), F_NONE},
119	{HB_TAG(`'v'`,`'a'`,`'t'`,`'u'`), F_GLOBAL},
120	{HB_TAG(`'c'`,`'j'`,`'c'`,`'t'`), F_GLOBAL},
121	/*
122	* Other features.
123	* These features are applied all at once, after final_reordering.
124	* Default Bengali font in Windows for example has intermixed
125	* lookups for init,pres,abvs,blws features.
126	*/
127	{HB_TAG(`'i'`,`'n'`,`'i'`,`'t'`), F_NONE},
128	{HB_TAG(`'p'`,`'r'`,`'e'`,`'s'`), F_GLOBAL},
129	{HB_TAG(`'a'`,`'b'`,`'v'`,`'s'`), F_GLOBAL},
130	{HB_TAG(`'b'`,`'l'`,`'w'`,`'s'`), F_GLOBAL},
131	{HB_TAG(`'p'`,`'s'`,`'t'`,`'s'`), F_GLOBAL},
132	{HB_TAG(`'h'`,`'a'`,`'l'`,`'n'`), F_GLOBAL},
133	/ Positioning features, though we don't care about the types. /
134	{HB_TAG(`'d'`,`'i'`,`'s'`,`'t'`), F_GLOBAL},
135	{HB_TAG(`'a'`,`'b'`,`'v'`,`'m'`), F_GLOBAL},
136	{HB_TAG(`'b'`,`'l'`,`'w'`,`'m'`), F_GLOBAL},
137	};
138
139	/*
140	* Must be in the same order as the indic_features array.
141	*/
142	enum {
143	_NUKT,
144	_AKHN,
145	RPHF,
146	_RKRF,
147	PREF,
148	BLWF,
149	ABVF,
150	HALF,
151	PSTF,
152	_VATU,
153	_CJCT,
154
155	INIT,
156	_PRES,
157	_ABVS,
158	_BLWS,
159	_PSTS,
160	_HALN,
161	_DIST,
162	_ABVM,
163	_BLWM,
164
165	INDIC_NUM_FEATURES,
166	INDIC_BASIC_FEATURES = INIT / Don't forget to update this! /
167	};
168
169	static void
170	setup_syllables (const hb_ot_shape_plan_t *plan,
171	hb_font_t *font,
172	hb_buffer_t *buffer);
173	static void
174	initial_reordering (const hb_ot_shape_plan_t *plan,
175	hb_font_t *font,
176	hb_buffer_t *buffer);
177	static void
178	final_reordering (const hb_ot_shape_plan_t *plan,
179	hb_font_t *font,
180	hb_buffer_t *buffer);
181	static void
182	clear_syllables (const hb_ot_shape_plan_t *plan,
183	hb_font_t *font,
184	hb_buffer_t *buffer);
185
186	static void
187	collect_features_indic (hb_ot_shape_planner_t *plan)
188	{
189	hb_ot_map_builder_t *map = &plan->map;
190
191	/ Do this before any lookups have been applied. /
192	map->add_gsub_pause (setup_syllables);
193
194	map->add_global_bool_feature (HB_TAG(`'l'`,`'o'`,`'c'`,`'l'`));
195	/ The Indic specs do not require ccmp, but we apply it here since if*
196	* there is a use of it, it's typically at the beginning. */
197	map->add_global_bool_feature (HB_TAG(`'c'`,`'c'`,`'m'`,`'p'`));
198
199
200	unsigned int i = `0`;
201	map->add_gsub_pause (initial_reordering);
202	for (; i < INDIC_BASIC_FEATURES; i++) {
203	map->add_feature (indic_features[i].tag, `1`, indic_features[i].flags \| F_MANUAL_ZWJ \| F_MANUAL_ZWNJ);
204	map->add_gsub_pause (nullptr);
205	}
206	map->add_gsub_pause (final_reordering);
207	for (; i < INDIC_NUM_FEATURES; i++) {
208	map->add_feature (indic_features[i].tag, `1`, indic_features[i].flags \| F_MANUAL_ZWJ \| F_MANUAL_ZWNJ);
209	}
210
211	map->add_global_bool_feature (HB_TAG(`'c'`,`'a'`,`'l'`,`'t'`));
212	map->add_global_bool_feature (HB_TAG(`'c'`,`'l'`,`'i'`,`'g'`));
213
214	map->add_gsub_pause (clear_syllables);
215	}
216
217	static void
218	override_features_indic (hb_ot_shape_planner_t *plan)
219	{
220	plan->map.add_feature (HB_TAG(`'l'`,`'i'`,`'g'`,`'a'`), `0`, F_GLOBAL);
221	}
222
223
224	struct would_substitute_feature_t
225	{
226	inline void init (const hb_ot_map_t map, hb_tag_t feature_tag, bool* zero_context_)
227	{
228	zero_context = zero_context_;
229	map->get_stage_lookups (`0`/GSUB/,
230	map->get_feature_stage (`0`/GSUB/, feature_tag),
231	&lookups, &count);
232	}
233
234	inline bool would_substitute (const hb_codepoint_t *glyphs,
235	unsigned int glyphs_count,
236	hb_face_t face) const*
237	{
238	for (unsigned int i = `0`; i < count; i++)
239	if (hb_ot_layout_lookup_would_substitute_fast (face, lookups[i].index, glyphs, glyphs_count, zero_context))
240	return true;
241	return false;
242	}
243
244	private:
245	const hb_ot_map_t::lookup_map_t *lookups;
246	unsigned int count;
247	bool zero_context;
248	};
249
250	struct indic_shape_plan_t
251	{
252	ASSERT_POD ();
253
254	inline bool load_virama_glyph (hb_font_t font, hb_codepoint_t pglyph) const
255	{
256	hb_codepoint_t glyph = virama_glyph.get_relaxed ();
257	if (unlikely (glyph == (hb_codepoint_t) -`1`))
258	{
259	if (!config->virama \|\| !font->get_nominal_glyph (config->virama, &glyph))
260	glyph = `0`;
261	/ Technically speaking, the spec says we should apply 'locl' to virama too.*
262	* Maybe one day... */
263
264	/ Our get_nominal_glyph() function needs a font, so we can't get the virama glyph*
265	* during shape planning... Instead, overwrite it here. */
266	virama_glyph.set_relaxed ((int) glyph);
267	}
268
269	*pglyph = glyph;
270	return glyph != `0`;
271	}
272
273	const indic_config_t *config;
274
275	bool is_old_spec;
276	mutable hb_atomic_int_t virama_glyph;
277
278	would_substitute_feature_t rphf;
279	would_substitute_feature_t pref;
280	would_substitute_feature_t blwf;
281	would_substitute_feature_t pstf;
282
283	hb_mask_t mask_array[INDIC_NUM_FEATURES];
284	};
285
286	static void *
287	data_create_indic (const hb_ot_shape_plan_t *plan)
288	{
289	indic_shape_plan_t indic_plan = (indic_shape_plan_t ) calloc (`1`, sizeof (indic_shape_plan_t));
290	if (unlikely (!indic_plan))
291	return nullptr;
292
293	indic_plan->config = &indic_configs[`0`];
294	for (unsigned int i = `1`; i < ARRAY_LENGTH (indic_configs); i++)
295	if (plan->props.script == indic_configs[i].script) {
296	indic_plan->config = &indic_configs[i];
297	break;
298	}
299
300	indic_plan->is_old_spec = indic_plan->config->has_old_spec && ((plan->map.chosen_script[`0`] & `0x000000FFu`) != `'2'`);
301	indic_plan->virama_glyph.set_relaxed (-`1`);
302
303	/ Use zero-context would_substitute() matching for new-spec of the main*
304	* Indic scripts, and scripts with one spec only, but not for old-specs.
305	* The new-spec for all dual-spec scripts says zero-context matching happens.
306	*
307	* However, testing with Malayalam shows that old and new spec both allow
308	* context. Testing with Bengali new-spec however shows that it doesn't.
309	* So, the heuristic here is the way it is. It should only be changed,
310	* as we discover more cases of what Windows does. DON'T TOUCH OTHERWISE.
311	*/
312	bool zero_context = !indic_plan->is_old_spec && plan->props.script != HB_SCRIPT_MALAYALAM;
313	indic_plan->rphf.init (&plan->map, HB_TAG(`'r'`,`'p'`,`'h'`,`'f'`), zero_context);
314	indic_plan->pref.init (&plan->map, HB_TAG(`'p'`,`'r'`,`'e'`,`'f'`), zero_context);
315	indic_plan->blwf.init (&plan->map, HB_TAG(`'b'`,`'l'`,`'w'`,`'f'`), zero_context);
316	indic_plan->pstf.init (&plan->map, HB_TAG(`'p'`,`'s'`,`'t'`,`'f'`), zero_context);
317
318	for (unsigned int i = `0`; i < ARRAY_LENGTH (indic_plan->mask_array); i++)
319	indic_plan->mask_array[i] = (indic_features[i].flags & F_GLOBAL) ?
320	`0` : plan->map.get_1_mask (indic_features[i].tag);
321
322	return indic_plan;
323	}
324
325	static void
326	data_destroy_indic (void *data)
327	{
328	free (data);
329	}
330
331	static indic_position_t
332	consonant_position_from_face (const indic_shape_plan_t *indic_plan,
333	const hb_codepoint_t consonant,
334	const hb_codepoint_t virama,
335	hb_face_t *face)
336	{
337	/ For old-spec, the order of glyphs is Consonant,Virama,*
338	* whereas for new-spec, it's Virama,Consonant. However,
339	* some broken fonts (like Free Sans) simply copied lookups
340	* from old-spec to new-spec without modification.
341	* And oddly enough, Uniscribe seems to respect those lookups.
342	* Eg. in the sequence U+0924,U+094D,U+0930, Uniscribe finds
343	* base at 0. The font however, only has lookups matching
344	* 930,94D in 'blwf', not the expected 94D,930 (with new-spec
345	* table). As such, we simply match both sequences. Seems
346	* to work. */
347	hb_codepoint_t glyphs[`3`] = {virama, consonant, virama};
348	if (indic_plan->blwf.would_substitute (glyphs , `2`, face) \|\|
349	indic_plan->blwf.would_substitute (glyphs+`1`, `2`, face))
350	return POS_BELOW_C;
351	if (indic_plan->pstf.would_substitute (glyphs , `2`, face) \|\|
352	indic_plan->pstf.would_substitute (glyphs+`1`, `2`, face))
353	return POS_POST_C;
354	if (indic_plan->pref.would_substitute (glyphs , `2`, face) \|\|
355	indic_plan->pref.would_substitute (glyphs+`1`, `2`, face))
356	return POS_POST_C;
357	return POS_BASE_C;
358	}
359
360
361	enum syllable_type_t {
362	consonant_syllable,
363	vowel_syllable,
364	standalone_cluster,
365	symbol_cluster,
366	broken_cluster,
367	non_indic_cluster,
368	};
369
370	#include "hb-ot-shape-complex-indic-machine.hh"
371
372
373	static void
374	setup_masks_indic (const hb_ot_shape_plan_t *plan HB_UNUSED,
375	hb_buffer_t *buffer,
376	hb_font_t *font HB_UNUSED)
377	{
378	HB_BUFFER_ALLOCATE_VAR (buffer, indic_category);
379	HB_BUFFER_ALLOCATE_VAR (buffer, indic_position);
380
381	/ We cannot setup masks here. We save information about characters*
382	* and setup masks later on in a pause-callback. */
383
384	unsigned int count = buffer->len;
385	hb_glyph_info_t *info = buffer->info;
386	for (unsigned int i = `0`; i < count; i++)
387	set_indic_properties (info[i]);
388	}
389
390	static void
391	setup_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
392	hb_font_t *font HB_UNUSED,
393	hb_buffer_t *buffer)
394	{
395	find_syllables (buffer);
396	foreach_syllable (buffer, start, end)
397	buffer->unsafe_to_break (start, end);
398	}
399
400	static int
401	compare_indic_order (const hb_glyph_info_t pa, const* hb_glyph_info_t *pb)
402	{
403	int a = pa->indic_position();
404	int b = pb->indic_position();
405
406	return a < b ? -`1` : a == b ? `0` : +`1`;
407	}
408
409
410
411	static void
412	update_consonant_positions (const hb_ot_shape_plan_t *plan,
413	hb_font_t *font,
414	hb_buffer_t *buffer)
415	{
416	const indic_shape_plan_t indic_plan = (const* indic_shape_plan_t *) plan->data;
417
418	if (indic_plan->config->base_pos != BASE_POS_LAST)
419	return;
420
421	hb_codepoint_t virama;
422	if (indic_plan->load_virama_glyph (font, &virama))
423	{
424	hb_face_t *face = font->face;
425	unsigned int count = buffer->len;
426	hb_glyph_info_t *info = buffer->info;
427	for (unsigned int i = `0`; i < count; i++)
428	if (info[i].indic_position() == POS_BASE_C)
429	{
430	hb_codepoint_t consonant = info[i].codepoint;
431	info[i].indic_position() = consonant_position_from_face (indic_plan, consonant, virama, face);
432	}
433	}
434	}
435
436
437	/ Rules from:*
438	* https://docs.microsqoft.com/en-us/typography/script-development/devanagari */
439
440	static void
441	initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
442	hb_face_t *face,
443	hb_buffer_t *buffer,
444	unsigned int start, unsigned int end)
445	{
446	const indic_shape_plan_t indic_plan = (const* indic_shape_plan_t *) plan->data;
447	hb_glyph_info_t *info = buffer->info;
448
449	/ https://github.com/harfbuzz/harfbuzz/issues/435#issuecomment-335560167*
450	* // For compatibility with legacy usage in Kannada,
451	* // Ra+h+ZWJ must behave like Ra+ZWJ+h...
452	*/
453	if (buffer->props.script == HB_SCRIPT_KANNADA &&
454	start + `3` <= end &&
455	is_one_of (info[start ], FLAG (OT_Ra)) &&
456	is_one_of (info[start+`1`], FLAG (OT_H)) &&
457	is_one_of (info[start+`2`], FLAG (OT_ZWJ)))
458	{
459	buffer->merge_clusters (start+`1`, start+`3`);
460	hb_glyph_info_t tmp = info[start+`1`];
461	info[start+`1`] = info[start+`2`];
462	info[start+`2`] = tmp;
463	}
464
465	/ 1. Find base consonant:*
466	*
467	* The shaping engine finds the base consonant of the syllable, using the
468	* following algorithm: starting from the end of the syllable, move backwards
469	* until a consonant is found that does not have a below-base or post-base
470	* form (post-base forms have to follow below-base forms), or that is not a
471	* pre-base-reordering Ra, or arrive at the first consonant. The consonant
472	* stopped at will be the base.
473	*
474	* o If the syllable starts with Ra + Halant (in a script that has Reph)
475	* and has more than one consonant, Ra is excluded from candidates for
476	* base consonants.
477	*/
478
479	unsigned int base = end;
480	bool has_reph = false;
481
482	{
483	/ -> If the syllable starts with Ra + Halant (in a script that has Reph)*
484	* and has more than one consonant, Ra is excluded from candidates for
485	* base consonants. */
486	unsigned int limit = start;
487	if (indic_plan->mask_array[RPHF] &&
488	start + `3` <= end &&
489	(
490	(indic_plan->config->reph_mode == REPH_MODE_IMPLICIT && !is_joiner (info[start + `2`])) \|\|
491	(indic_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + `2`].indic_category() == OT_ZWJ)
492	))
493	{
494	/ See if it matches the 'rphf' feature. /
495	hb_codepoint_t glyphs[`3`] = {info[start].codepoint,
496	info[start + `1`].codepoint,
497	indic_plan->config->reph_mode == REPH_MODE_EXPLICIT ?
498	info[start + `2`].codepoint : `0`};
499	if (indic_plan->rphf.would_substitute (glyphs, `2`, face) \|\|
500	(indic_plan->config->reph_mode == REPH_MODE_EXPLICIT &&
501	indic_plan->rphf.would_substitute (glyphs, `3`, face)))
502	{
503	limit += `2`;
504	while (limit < end && is_joiner (info[limit]))
505	limit++;
506	base = start;
507	has_reph = true;
508	}
509	} else if (indic_plan->config->reph_mode == REPH_MODE_LOG_REPHA && info[start].indic_category() == OT_Repha)
510	{
511	limit += `1`;
512	while (limit < end && is_joiner (info[limit]))
513	limit++;
514	base = start;
515	has_reph = true;
516	}
517
518	switch (indic_plan->config->base_pos)
519	{
520	case BASE_POS_LAST:
521	{
522	/ -> starting from the end of the syllable, move backwards /
523	unsigned int i = end;
524	bool seen_below = false;
525	do {
526	i--;
527	/ -> until a consonant is found /
528	if (is_consonant (info[i]))
529	{
530	/ -> that does not have a below-base or post-base form*
531	* (post-base forms have to follow below-base forms), */
532	if (info[i].indic_position() != POS_BELOW_C &&
533	(info[i].indic_position() != POS_POST_C \|\| seen_below))
534	{
535	base = i;
536	break;
537	}
538	if (info[i].indic_position() == POS_BELOW_C)
539	seen_below = true;
540
541	/ -> or that is not a pre-base-reordering Ra,*
542	*
543	* IMPLEMENTATION NOTES:
544	*
545	* Our pre-base-reordering Ra's are marked POS_POST_C, so will be skipped
546	* by the logic above already.
547	*/
548
549	/ -> or arrive at the first consonant. The consonant stopped at will*
550	* be the base. */
551	base = i;
552	}
553	else
554	{
555	/ A ZWJ after a Halant stops the base search, and requests an explicit*
556	* half form.
557	* A ZWJ before a Halant, requests a subjoined form instead, and hence
558	* search continues. This is particularly important for Bengali
559	* sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya. */
560	if (start < i &&
561	info[i].indic_category() == OT_ZWJ &&
562	info[i - `1`].indic_category() == OT_H)
563	break;
564	}
565	} while (i > limit);
566	}
567	break;
568
569	case BASE_POS_LAST_SINHALA:
570	{
571	/ Sinhala base positioning is slightly different from main Indic, in that:*
572	* 1. Its ZWJ behavior is different,
573	* 2. We don't need to look into the font for consonant positions.
574	*/
575
576	if (!has_reph)
577	base = limit;
578
579	/ Find the last base consonant that is not blocked by ZWJ. If there is*
580	* a ZWJ right before a base consonant, that would request a subjoined form. */
581	for (unsigned int i = limit; i < end; i++)
582	if (is_consonant (info[i]))
583	{
584	if (limit < i && info[i - `1`].indic_category() == OT_ZWJ)
585	break;
586	else
587	base = i;
588	}
589
590	/ Mark all subsequent consonants as below. /
591	for (unsigned int i = base + `1`; i < end; i++)
592	if (is_consonant (info[i]))
593	info[i].indic_position() = POS_BELOW_C;
594	}
595	break;
596	}
597
598	/ -> If the syllable starts with Ra + Halant (in a script that has Reph)*
599	* and has more than one consonant, Ra is excluded from candidates for
600	* base consonants.
601	*
602	* Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */
603	if (has_reph && base == start && limit - base <= `2`) {
604	/ Have no other consonant, so Reph is not formed and Ra becomes base. /
605	has_reph = false;
606	}
607	}
608
609
610	/ 2. Decompose and reorder Matras:*
611	*
612	* Each matra and any syllable modifier sign in the syllable are moved to the
613	* appropriate position relative to the consonant(s) in the syllable. The
614	* shaping engine decomposes two- or three-part matras into their constituent
615	* parts before any repositioning. Matra characters are classified by which
616	* consonant in a conjunct they have affinity for and are reordered to the
617	* following positions:
618	*
619	* o Before first half form in the syllable
620	* o After subjoined consonants
621	* o After post-form consonant
622	* o After main consonant (for above marks)
623	*
624	* IMPLEMENTATION NOTES:
625	*
626	* The normalize() routine has already decomposed matras for us, so we don't
627	* need to worry about that.
628	*/
629
630
631	/ 3. Reorder marks to canonical order:*
632	*
633	* Adjacent nukta and halant or nukta and vedic sign are always repositioned
634	* if necessary, so that the nukta is first.
635	*
636	* IMPLEMENTATION NOTES:
637	*
638	* We don't need to do this: the normalize() routine already did this for us.
639	*/
640
641
642	/ Reorder characters /
643
644	for (unsigned int i = start; i < base; i++)
645	info[i].indic_position() = MIN (POS_PRE_C, (indic_position_t) info[i].indic_position());
646
647	if (base < end)
648	info[base].indic_position() = POS_BASE_C;
649
650	/ Mark final consonants. A final consonant is one appearing after a matra.*
651	* Happens in Sinhala. */
652	for (unsigned int i = base + `1`; i < end; i++)
653	if (info[i].indic_category() == OT_M) {
654	for (unsigned int j = i + `1`; j < end; j++)
655	if (is_consonant (info[j])) {
656	info[j].indic_position() = POS_FINAL_C;
657	break;
658	}
659	break;
660	}
661
662	/ Handle beginning Ra /
663	if (has_reph)
664	info[start].indic_position() = POS_RA_TO_BECOME_REPH;
665
666	/ For old-style Indic script tags, move the first post-base Halant after*
667	* last consonant.
668	*
669	* Reports suggest that in some scripts Uniscribe does this only if there
670	* is not a Halant after last consonant already. We know that is the
671	* case for Kannada, while it reorders unconditionally in other scripts,
672	* eg. Malayalam, Bengali, and Devanagari. We don't currently know about
673	* other scripts, so we blacklist Kannada.
674	*
675	* Kannada test case:
676	* U+0C9A,U+0CCD,U+0C9A,U+0CCD
677	* With some versions of Lohit Kannada.
678	* https://bugs.freedesktop.org/show_bug.cgi?id=59118
679	*
680	* Malayalam test case:
681	* U+0D38,U+0D4D,U+0D31,U+0D4D,U+0D31,U+0D4D
682	* With lohit-ttf-20121122/Lohit-Malayalam.ttf
683	*
684	* Bengali test case:
685	* U+0998,U+09CD,U+09AF,U+09CD
686	* With Windows XP vrinda.ttf
687	* https://github.com/harfbuzz/harfbuzz/issues/1073
688	*
689	* Devanagari test case:
690	* U+091F,U+094D,U+0930,U+094D
691	* With chandas.ttf
692	* https://github.com/harfbuzz/harfbuzz/issues/1071
693	*/
694	if (indic_plan->is_old_spec)
695	{
696	bool disallow_double_halants = buffer->props.script == HB_SCRIPT_KANNADA;
697	for (unsigned int i = base + `1`; i < end; i++)
698	if (info[i].indic_category() == OT_H)
699	{
700	unsigned int j;
701	for (j = end - `1`; j > i; j--)
702	if (is_consonant (info[j]) \|\|
703	(disallow_double_halants && info[j].indic_category() == OT_H))
704	break;
705	if (info[j].indic_category() != OT_H && j > i) {
706	/ Move Halant to after last consonant. /
707	hb_glyph_info_t t = info[i];
708	memmove (&info[i], &info[i + `1`], (j - i) * sizeof (info[`0`]));
709	info[j] = t;
710	}
711	break;
712	}
713	}
714
715	/ Attach misc marks to previous char to move with them. /
716	{
717	indic_position_t last_pos = POS_START;
718	for (unsigned int i = start; i < end; i++)
719	{
720	if ((FLAG_UNSAFE (info[i].indic_category()) & (JOINER_FLAGS \| FLAG (OT_N) \| FLAG (OT_RS) \| MEDIAL_FLAGS \| FLAG (OT_H))))
721	{
722	info[i].indic_position() = last_pos;
723	if (unlikely (info[i].indic_category() == OT_H &&
724	info[i].indic_position() == POS_PRE_M))
725	{
726	/*
727	* Uniscribe doesn't move the Halant with Left Matra.
728	* TEST: U+092B,U+093F,U+094DE
729	* We follow. This is important for the Sinhala
730	* U+0DDA split matra since it decomposes to U+0DD9,U+0DCA
731	* where U+0DD9 is a left matra and U+0DCA is the virama.
732	* We don't want to move the virama with the left matra.
733	* TEST: U+0D9A,U+0DDA
734	*/
735	for (unsigned int j = i; j > start; j--)
736	if (info[j - `1`].indic_position() != POS_PRE_M) {
737	info[i].indic_position() = info[j - `1`].indic_position();
738	break;
739	}
740	}
741	} else if (info[i].indic_position() != POS_SMVD) {
742	last_pos = (indic_position_t) info[i].indic_position();
743	}
744	}
745	}
746	/ For post-base consonants let them own anything before them*
747	* since the last consonant or matra. */
748	{
749	unsigned int last = base;
750	for (unsigned int i = base + `1`; i < end; i++)
751	if (is_consonant (info[i]))
752	{
753	for (unsigned int j = last + `1`; j < i; j++)
754	if (info[j].indic_position() < POS_SMVD)
755	info[j].indic_position() = info[i].indic_position();
756	last = i;
757	} else if (info[i].indic_category() == OT_M)
758	last = i;
759	}
760
761
762	{
763	/ Use syllable() for sort accounting temporarily. /
764	unsigned int syllable = info[start].syllable();
765	for (unsigned int i = start; i < end; i++)
766	info[i].syllable() = i - start;
767
768	/ Sit tight, rock 'n roll! /
769	hb_stable_sort (info + start, end - start, compare_indic_order);
770	/ Find base again /
771	base = end;
772	for (unsigned int i = start; i < end; i++)
773	if (info[i].indic_position() == POS_BASE_C)
774	{
775	base = i;
776	break;
777	}
778	/ Things are out-of-control for post base positions, they may shuffle*
779	* around like crazy. In old-spec mode, we move halants around, so in
780	* that case merge all clusters after base. Otherwise, check the sort
781	* order and merge as needed.
782	* For pre-base stuff, we handle cluster issues in final reordering.
783	*
784	* We could use buffer->sort() for this, if there was no special
785	* reordering of pre-base stuff happening later...
786	*/
787	if (indic_plan->is_old_spec \|\| end - base > `127`)
788	buffer->merge_clusters (base, end);
789	else
790	{
791	/ Note! syllable() is a one-byte field. /
792	for (unsigned int i = base; i < end; i++)
793	if (info[i].syllable() != `255`)
794	{
795	unsigned int max = i;
796	unsigned int j = start + info[i].syllable();
797	while (j != i)
798	{
799	max = MAX (max, j);
800	unsigned int next = start + info[j].syllable();
801	info[j].syllable() = `255`; / So we don't process j later again. /
802	j = next;
803	}
804	if (i != max)
805	buffer->merge_clusters (i, max + `1`);
806	}
807	}
808
809	/ Put syllable back in. /
810	for (unsigned int i = start; i < end; i++)
811	info[i].syllable() = syllable;
812	}
813
814	/ Setup masks now /
815
816	{
817	hb_mask_t mask;
818
819	/ Reph /
820	for (unsigned int i = start; i < end && info[i].indic_position() == POS_RA_TO_BECOME_REPH; i++)
821	info[i].mask \|= indic_plan->mask_array[RPHF];
822
823	/ Pre-base /
824	mask = indic_plan->mask_array[HALF];
825	if (!indic_plan->is_old_spec &&
826	indic_plan->config->blwf_mode == BLWF_MODE_PRE_AND_POST)
827	mask \|= indic_plan->mask_array[BLWF];
828	for (unsigned int i = start; i < base; i++)
829	info[i].mask \|= mask;
830	/ Base /
831	mask = `0`;
832	if (base < end)
833	info[base].mask \|= mask;
834	/ Post-base /
835	mask = indic_plan->mask_array[BLWF] \| indic_plan->mask_array[ABVF] \| indic_plan->mask_array[PSTF];
836	for (unsigned int i = base + `1`; i < end; i++)
837	info[i].mask \|= mask;
838	}
839
840	if (indic_plan->is_old_spec &&
841	buffer->props.script == HB_SCRIPT_DEVANAGARI)
842	{
843	/ Old-spec eye-lash Ra needs special handling. From the*
844	* spec:
845	*
846	* "The feature 'below-base form' is applied to consonants
847	* having below-base forms and following the base consonant.
848	* The exception is vattu, which may appear below half forms
849	* as well as below the base glyph. The feature 'below-base
850	* form' will be applied to all such occurrences of Ra as well."
851	*
852	* Test case: U+0924,U+094D,U+0930,U+094d,U+0915
853	* with Sanskrit 2003 font.
854	*
855	* However, note that Ra,Halant,ZWJ is the correct way to
856	* request eyelash form of Ra, so we wouldbn't inhibit it
857	* in that sequence.
858	*
859	* Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915
860	*/
861	for (unsigned int i = start; i + `1` < base; i++)
862	if (info[i ].indic_category() == OT_Ra &&
863	info[i+`1`].indic_category() == OT_H &&
864	(i + `2` == base \|\|
865	info[i+`2`].indic_category() != OT_ZWJ))
866	{
867	info[i ].mask \|= indic_plan->mask_array[BLWF];
868	info[i+`1`].mask \|= indic_plan->mask_array[BLWF];
869	}
870	}
871
872	unsigned int pref_len = `2`;
873	if (indic_plan->mask_array[PREF] && base + pref_len < end)
874	{
875	/ Find a Halant,Ra sequence and mark it for pre-base-reordering processing. /
876	for (unsigned int i = base + `1`; i + pref_len - `1` < end; i++) {
877	hb_codepoint_t glyphs[`2`];
878	for (unsigned int j = `0`; j < pref_len; j++)
879	glyphs[j] = info[i + j].codepoint;
880	if (indic_plan->pref.would_substitute (glyphs, pref_len, face))
881	{
882	for (unsigned int j = `0`; j < pref_len; j++)
883	info[i++].mask \|= indic_plan->mask_array[PREF];
884	break;
885	}
886	}
887	}
888
889	/ Apply ZWJ/ZWNJ effects /
890	for (unsigned int i = start + `1`; i < end; i++)
891	if (is_joiner (info[i])) {
892	bool non_joiner = info[i].indic_category() == OT_ZWNJ;
893	unsigned int j = i;
894
895	do {
896	j--;
897
898	/ ZWJ/ZWNJ should disable CJCT. They do that by simply*
899	* being there, since we don't skip them for the CJCT
900	* feature (ie. F_MANUAL_ZWJ) */
901
902	/ A ZWNJ disables HALF. /
903	if (non_joiner)
904	info[j].mask &= ~indic_plan->mask_array[HALF];
905
906	} while (j > start && !is_consonant (info[j]));
907	}
908	}
909
910	static void
911	initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan,
912	hb_face_t *face,
913	hb_buffer_t *buffer,
914	unsigned int start, unsigned int end)
915	{
916	/ We treat placeholder/dotted-circle as if they are consonants, so we*
917	* should just chain. Only if not in compatibility mode that is... */
918
919	if (hb_options ().uniscribe_bug_compatible)
920	{
921	/ For dotted-circle, this is what Uniscribe does:*
922	* If dotted-circle is the last glyph, it just does nothing.
923	* Ie. It doesn't form Reph. */
924	if (buffer->info[end - `1`].indic_category() == OT_DOTTEDCIRCLE)
925	return;
926	}
927
928	initial_reordering_consonant_syllable (plan, face, buffer, start, end);
929	}
930
931	static void
932	initial_reordering_syllable (const hb_ot_shape_plan_t *plan,
933	hb_face_t *face,
934	hb_buffer_t *buffer,
935	unsigned int start, unsigned int end)
936	{
937	syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & `0x0F`);
938	switch (syllable_type)
939	{
940	case vowel_syllable: / We made the vowels look like consonants. So let's call the consonant logic! /
941	case consonant_syllable:
942	initial_reordering_consonant_syllable (plan, face, buffer, start, end);
943	break;
944
945	case broken_cluster: / We already inserted dotted-circles, so just call the standalone_cluster. /
946	case standalone_cluster:
947	initial_reordering_standalone_cluster (plan, face, buffer, start, end);
948	break;
949
950	case symbol_cluster:
951	case non_indic_cluster:
952	break;
953	}
954	}
955
956	static inline void
957	insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED,
958	hb_font_t *font,
959	hb_buffer_t *buffer)
960	{
961	/ Note: This loop is extra overhead, but should not be measurable. /
962	bool has_broken_syllables = false;
963	unsigned int count = buffer->len;
964	hb_glyph_info_t *info = buffer->info;
965	for (unsigned int i = `0`; i < count; i++)
966	if ((info[i].syllable() & `0x0F`) == broken_cluster)
967	{
968	has_broken_syllables = true;
969	break;
970	}
971	if (likely (!has_broken_syllables))
972	return;
973
974
975	hb_codepoint_t dottedcircle_glyph;
976	if (!font->get_nominal_glyph (`0x25CCu`, &dottedcircle_glyph))
977	return;
978
979	hb_glyph_info_t dottedcircle = {`0`};
980	dottedcircle.codepoint = `0x25CCu`;
981	set_indic_properties (dottedcircle);
982	dottedcircle.codepoint = dottedcircle_glyph;
983
984	buffer->clear_output ();
985
986	buffer->idx = `0`;
987	unsigned int last_syllable = `0`;
988	while (buffer->idx < buffer->len && buffer->successful)
989	{
990	unsigned int syllable = buffer->cur().syllable();
991	syllable_type_t syllable_type = (syllable_type_t) (syllable & `0x0F`);
992	if (unlikely (last_syllable != syllable && syllable_type == broken_cluster))
993	{
994	last_syllable = syllable;
995
996	hb_glyph_info_t ginfo = dottedcircle;
997	ginfo.cluster = buffer->cur().cluster;
998	ginfo.mask = buffer->cur().mask;
999	ginfo.syllable() = buffer->cur().syllable();
1000	/ TODO Set glyph_props? /
1001
1002	/ Insert dottedcircle after possible Repha. /
1003	while (buffer->idx < buffer->len && buffer->successful &&
1004	last_syllable == buffer->cur().syllable() &&
1005	buffer->cur().indic_category() == OT_Repha)
1006	buffer->next_glyph ();
1007
1008	buffer->output_info (ginfo);
1009	}
1010	else
1011	buffer->next_glyph ();
1012	}
1013
1014	buffer->swap_buffers ();
1015	}
1016
1017	static void
1018	initial_reordering (const hb_ot_shape_plan_t *plan,
1019	hb_font_t *font,
1020	hb_buffer_t *buffer)
1021	{
1022	update_consonant_positions (plan, font, buffer);
1023	insert_dotted_circles (plan, font, buffer);
1024
1025	foreach_syllable (buffer, start, end)
1026	initial_reordering_syllable (plan, font->face, buffer, start, end);
1027	}
1028
1029	static void
1030	final_reordering_syllable (const hb_ot_shape_plan_t *plan,
1031	hb_buffer_t *buffer,
1032	unsigned int start, unsigned int end)
1033	{
1034	const indic_shape_plan_t indic_plan = (const* indic_shape_plan_t *) plan->data;
1035	hb_glyph_info_t *info = buffer->info;
1036
1037
1038	/ This function relies heavily on halant glyphs. Lots of ligation*
1039	* and possibly multiple substitutions happened prior to this
1040	* phase, and that might have messed up our properties. Recover
1041	* from a particular case of that where we're fairly sure that a
1042	* class of OT_H is desired but has been lost. */
1043	/ We don't call load_virama_glyph(), since we know it's already*
1044	* loaded. */
1045	hb_codepoint_t virama_glyph = indic_plan->virama_glyph.get_relaxed ();
1046	if (virama_glyph)
1047	{
1048	for (unsigned int i = start; i < end; i++)
1049	if (info[i].codepoint == virama_glyph &&
1050	_hb_glyph_info_ligated (&info[i]) &&
1051	_hb_glyph_info_multiplied (&info[i]))
1052	{
1053	/ This will make sure that this glyph passes is_halant() test. /
1054	info[i].indic_category() = OT_H;
1055	_hb_glyph_info_clear_ligated_and_multiplied (&info[i]);
1056	}
1057	}
1058
1059
1060	/ 4. Final reordering:*
1061	*
1062	* After the localized forms and basic shaping forms GSUB features have been
1063	* applied (see below), the shaping engine performs some final glyph
1064	* reordering before applying all the remaining font features to the entire
1065	* syllable.
1066	*/
1067
1068	bool try_pref = !!indic_plan->mask_array[PREF];
1069
1070	/ Find base again /
1071	unsigned int base;
1072	for (base = start; base < end; base++)
1073	if (info[base].indic_position() >= POS_BASE_C)
1074	{
1075	if (try_pref && base + `1` < end)
1076	{
1077	for (unsigned int i = base + `1`; i < end; i++)
1078	if ((info[i].mask & indic_plan->mask_array[PREF]) != `0`)
1079	{
1080	if (!(_hb_glyph_info_substituted (&info[i]) &&
1081	_hb_glyph_info_ligated_and_didnt_multiply (&info[i])))
1082	{
1083	/ Ok, this was a 'pref' candidate but didn't form any.*
1084	* Base is around here... */
1085	base = i;
1086	while (base < end && is_halant (info[base]))
1087	base++;
1088	info[base].indic_position() = POS_BASE_C;
1089
1090	try_pref = false;
1091	}
1092	break;
1093	}
1094	}
1095	/ For Malayalam, skip over unformed below- (but NOT post-) forms. /
1096	if (buffer->props.script == HB_SCRIPT_MALAYALAM)
1097	{
1098	for (unsigned int i = base + `1`; i < end; i++)
1099	{
1100	while (i < end && is_joiner (info[i]))
1101	i++;
1102	if (i == end \|\| !is_halant (info[i]))
1103	break;
1104	i++; / Skip halant. /
1105	while (i < end && is_joiner (info[i]))
1106	i++;
1107	if (i < end && is_consonant (info[i]) && info[i].indic_position() == POS_BELOW_C)
1108	{
1109	base = i;
1110	info[base].indic_position() = POS_BASE_C;
1111	}
1112	}
1113	}
1114
1115	if (start < base && info[base].indic_position() > POS_BASE_C)
1116	base--;
1117	break;
1118	}
1119	if (base == end && start < base &&
1120	is_one_of (info[base - `1`], FLAG (OT_ZWJ)))
1121	base--;
1122	if (base < end)
1123	while (start < base &&
1124	is_one_of (info[base], (FLAG (OT_N) \| FLAG (OT_H))))
1125	base--;
1126
1127
1128	/ o Reorder matras:*
1129	*
1130	* If a pre-base matra character had been reordered before applying basic
1131	* features, the glyph can be moved closer to the main consonant based on
1132	* whether half-forms had been formed. Actual position for the matra is
1133	* defined as “after last standalone halant glyph, after initial matra
1134	* position and before the main consonant”. If ZWJ or ZWNJ follow this
1135	* halant, position is moved after it.
1136	*
1137	* IMPLEMENTATION NOTES:
1138	*
1139	* It looks like the last sentence is wrong. Testing, with Windows 7 Uniscribe
1140	* and Devanagari shows that the behavior is best described as:
1141	*
1142	* "If ZWJ follows this halant, matra is NOT repositioned after this halant.
1143	* If ZWNJ follows this halant, position is moved after it."
1144	*
1145	* Test case, with Adobe Devanagari or Nirmala UI:
1146	*
1147	* U+091F,U+094D,U+200C,U+092F,U+093F
1148	* (Matra moves to the middle, after ZWNJ.)
1149	*
1150	* U+091F,U+094D,U+200D,U+092F,U+093F
1151	* (Matra does NOT move, stays to the left.)
1152	*
1153	* https://github.com/harfbuzz/harfbuzz/issues/1070
1154	*/
1155
1156	if (start + `1` < end && start < base) / Otherwise there can't be any pre-base matra characters. /
1157	{
1158	/ If we lost track of base, alas, position before last thingy. /
1159	unsigned int new_pos = base == end ? base - `2` : base - `1`;
1160
1161	/ Malayalam / Tamil do not have "half" forms or explicit virama forms.*
1162	* The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1163	* We want to position matra after them.
1164	*/
1165	if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL)
1166	{
1167	search:
1168	while (new_pos > start &&
1169	!(is_one_of (info[new_pos], (FLAG (OT_M) \| FLAG (OT_H)))))
1170	new_pos--;
1171
1172	/ If we found no Halant we are done.*
1173	* Otherwise only proceed if the Halant does
1174	* not belong to the Matra itself! */
1175	if (is_halant (info[new_pos]) &&
1176	info[new_pos].indic_position() != POS_PRE_M)
1177	{
1178	#if 0 // See comment above
1179	/ -> If ZWJ or ZWNJ follow this halant, position is moved after it. /
1180	if (new_pos + `1` < end && is_joiner (info[new_pos + `1`]))
1181	new_pos++;
1182	#endif
1183	if (new_pos + `1` < end)
1184	{
1185	/ -> If ZWJ follows this halant, matra is NOT repositioned after this halant. /
1186	if (info[new_pos + `1`].indic_category() == OT_ZWJ)
1187	{
1188	/ Keep searching. /
1189	if (new_pos > start)
1190	{
1191	new_pos--;
1192	goto search;
1193	}
1194	}
1195	/ -> If ZWNJ follows this halant, position is moved after it. /
1196	if (info[new_pos + `1`].indic_category() == OT_ZWNJ)
1197	new_pos++;
1198	}
1199	}
1200	else
1201	new_pos = start; / No move. /
1202	}
1203
1204	if (start < new_pos && info[new_pos].indic_position () != POS_PRE_M)
1205	{
1206	/ Now go see if there's actually any matras... /
1207	for (unsigned int i = new_pos; i > start; i--)
1208	if (info[i - `1`].indic_position () == POS_PRE_M)
1209	{
1210	unsigned int old_pos = i - `1`;
1211	if (old_pos < base && base <= new_pos) / Shouldn't actually happen. /
1212	base--;
1213
1214	hb_glyph_info_t tmp = info[old_pos];
1215	memmove (&info[old_pos], &info[old_pos + `1`], (new_pos - old_pos) * sizeof (info[`0`]));
1216	info[new_pos] = tmp;
1217
1218	/ Note: this merge_clusters() is intentionally after the reordering.*
1219	* Indic matra reordering is special and tricky... */
1220	buffer->merge_clusters (new_pos, MIN (end, base + `1`));
1221
1222	new_pos--;
1223	}
1224	} else {
1225	for (unsigned int i = start; i < base; i++)
1226	if (info[i].indic_position () == POS_PRE_M) {
1227	buffer->merge_clusters (i, MIN (end, base + `1`));
1228	break;
1229	}
1230	}
1231	}
1232
1233
1234	/ o Reorder reph:*
1235	*
1236	* Reph’s original position is always at the beginning of the syllable,
1237	* (i.e. it is not reordered at the character reordering stage). However,
1238	* it will be reordered according to the basic-forms shaping results.
1239	* Possible positions for reph, depending on the script, are; after main,
1240	* before post-base consonant forms, and after post-base consonant forms.
1241	*/
1242
1243	/ Two cases:*
1244	*
1245	* - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then
1246	* we should only move it if the sequence ligated to the repha form.
1247	*
1248	* - If repha is encoded separately and in the logical position, we should only
1249	* move it if it did NOT ligate. If it ligated, it's probably the font trying
1250	* to make it work without the reordering.
1251	*/
1252	if (start + `1` < end &&
1253	info[start].indic_position() == POS_RA_TO_BECOME_REPH &&
1254	((info[start].indic_category() == OT_Repha) ^
1255	_hb_glyph_info_ligated_and_didnt_multiply (&info[start])))
1256	{
1257	unsigned int new_reph_pos;
1258	reph_position_t reph_pos = indic_plan->config->reph_pos;
1259
1260	/ 1. If reph should be positioned after post-base consonant forms,*
1261	* proceed to step 5.
1262	*/
1263	if (reph_pos == REPH_POS_AFTER_POST)
1264	{
1265	goto reph_step_5;
1266	}
1267
1268	/ 2. If the reph repositioning class is not after post-base: target*
1269	* position is after the first explicit halant glyph between the
1270	* first post-reph consonant and last main consonant. If ZWJ or ZWNJ
1271	* are following this halant, position is moved after it. If such
1272	* position is found, this is the target position. Otherwise,
1273	* proceed to the next step.
1274	*
1275	* Note: in old-implementation fonts, where classifications were
1276	* fixed in shaping engine, there was no case where reph position
1277	* will be found on this step.
1278	*/
1279	{
1280	new_reph_pos = start + `1`;
1281	while (new_reph_pos < base && !is_halant (info[new_reph_pos]))
1282	new_reph_pos++;
1283
1284	if (new_reph_pos < base && is_halant (info[new_reph_pos]))
1285	{
1286	/ ->If ZWJ or ZWNJ are following this halant, position is moved after it. /
1287	if (new_reph_pos + `1` < base && is_joiner (info[new_reph_pos + `1`]))
1288	new_reph_pos++;
1289	goto reph_move;
1290	}
1291	}
1292
1293	/ 3. If reph should be repositioned after the main consonant: find the*
1294	* first consonant not ligated with main, or find the first
1295	* consonant that is not a potential pre-base-reordering Ra.
1296	*/
1297	if (reph_pos == REPH_POS_AFTER_MAIN)
1298	{
1299	new_reph_pos = base;
1300	while (new_reph_pos + `1` < end && info[new_reph_pos + `1`].indic_position() <= POS_AFTER_MAIN)
1301	new_reph_pos++;
1302	if (new_reph_pos < end)
1303	goto reph_move;
1304	}
1305
1306	/ 4. If reph should be positioned before post-base consonant, find*
1307	* first post-base classified consonant not ligated with main. If no
1308	* consonant is found, the target position should be before the
1309	* first matra, syllable modifier sign or vedic sign.
1310	*/
1311	/ This is our take on what step 4 is trying to say (and failing, BADLY). /
1312	if (reph_pos == REPH_POS_AFTER_SUB)
1313	{
1314	new_reph_pos = base;
1315	while (new_reph_pos + `1` < end &&
1316	!( FLAG_UNSAFE (info[new_reph_pos + `1`].indic_position()) & (FLAG (POS_POST_C) \| FLAG (POS_AFTER_POST) \| FLAG (POS_SMVD))))
1317	new_reph_pos++;
1318	if (new_reph_pos < end)
1319	goto reph_move;
1320	}
1321
1322	/ 5. If no consonant is found in steps 3 or 4, move reph to a position*
1323	* immediately before the first post-base matra, syllable modifier
1324	* sign or vedic sign that has a reordering class after the intended
1325	* reph position. For example, if the reordering position for reph
1326	* is post-main, it will skip above-base matras that also have a
1327	* post-main position.
1328	*/
1329	reph_step_5:
1330	{
1331	/ Copied from step 2. /
1332	new_reph_pos = start + `1`;
1333	while (new_reph_pos < base && !is_halant (info[new_reph_pos]))
1334	new_reph_pos++;
1335
1336	if (new_reph_pos < base && is_halant (info[new_reph_pos]))
1337	{
1338	/ ->If ZWJ or ZWNJ are following this halant, position is moved after it. /
1339	if (new_reph_pos + `1` < base && is_joiner (info[new_reph_pos + `1`]))
1340	new_reph_pos++;
1341	goto reph_move;
1342	}
1343	}
1344
1345	/ 6. Otherwise, reorder reph to the end of the syllable.*
1346	*/
1347	{
1348	new_reph_pos = end - `1`;
1349	while (new_reph_pos > start && info[new_reph_pos].indic_position() == POS_SMVD)
1350	new_reph_pos--;
1351
1352	/*
1353	* If the Reph is to be ending up after a Matra,Halant sequence,
1354	* position it before that Halant so it can interact with the Matra.
1355	* However, if it's a plain Consonant,Halant we shouldn't do that.
1356	* Uniscribe doesn't do this.
1357	* TEST: U+0930,U+094D,U+0915,U+094B,U+094D
1358	*/
1359	if (!hb_options ().uniscribe_bug_compatible &&
1360	unlikely (is_halant (info[new_reph_pos]))) {
1361	for (unsigned int i = base + `1`; i < new_reph_pos; i++)
1362	if (info[i].indic_category() == OT_M) {
1363	/ Ok, got it. /
1364	new_reph_pos--;
1365	}
1366	}
1367	goto reph_move;
1368	}
1369
1370	reph_move:
1371	{
1372	/ Move /
1373	buffer->merge_clusters (start, new_reph_pos + `1`);
1374	hb_glyph_info_t reph = info[start];
1375	memmove (&info[start], &info[start + `1`], (new_reph_pos - start) * sizeof (info[`0`]));
1376	info[new_reph_pos] = reph;
1377
1378	if (start < base && base <= new_reph_pos)
1379	base--;
1380	}
1381	}
1382
1383
1384	/ o Reorder pre-base-reordering consonants:*
1385	*
1386	* If a pre-base-reordering consonant is found, reorder it according to
1387	* the following rules:
1388	*/
1389
1390	if (try_pref && base + `1` < end) / Otherwise there can't be any pre-base-reordering Ra. /
1391	{
1392	for (unsigned int i = base + `1`; i < end; i++)
1393	if ((info[i].mask & indic_plan->mask_array[PREF]) != `0`)
1394	{
1395	/ 1. Only reorder a glyph produced by substitution during application*
1396	* of the <pref> feature. (Note that a font may shape a Ra consonant with
1397	* the feature generally but block it in certain contexts.)
1398	*/
1399	/ Note: We just check that something got substituted. We don't check that*
1400	* the <pref> feature actually did it...
1401	*
1402	* Reorder pref only if it ligated. */
1403	if (_hb_glyph_info_ligated_and_didnt_multiply (&info[i]))
1404	{
1405	/*
1406	* 2. Try to find a target position the same way as for pre-base matra.
1407	* If it is found, reorder pre-base consonant glyph.
1408	*
1409	* 3. If position is not found, reorder immediately before main
1410	* consonant.
1411	*/
1412
1413	unsigned int new_pos = base;
1414	/ Malayalam / Tamil do not have "half" forms or explicit virama forms.*
1415	* The glyphs formed by 'half' are Chillus or ligated explicit viramas.
1416	* We want to position matra after them.
1417	*/
1418	if (buffer->props.script != HB_SCRIPT_MALAYALAM && buffer->props.script != HB_SCRIPT_TAMIL)
1419	{
1420	while (new_pos > start &&
1421	!(is_one_of (info[new_pos - `1`], FLAG(OT_M) \| FLAG (OT_H))))
1422	new_pos--;
1423	}
1424
1425	if (new_pos > start && is_halant (info[new_pos - `1`]))
1426	{
1427	/ -> If ZWJ or ZWNJ follow this halant, position is moved after it. /
1428	if (new_pos < end && is_joiner (info[new_pos]))
1429	new_pos++;
1430	}
1431
1432	{
1433	unsigned int old_pos = i;
1434
1435	buffer->merge_clusters (new_pos, old_pos + `1`);
1436	hb_glyph_info_t tmp = info[old_pos];
1437	memmove (&info[new_pos + `1`], &info[new_pos], (old_pos - new_pos) * sizeof (info[`0`]));
1438	info[new_pos] = tmp;
1439
1440	if (new_pos <= base && base < old_pos)
1441	base++;
1442	}
1443	}
1444
1445	break;
1446	}
1447	}
1448
1449
1450	/ Apply 'init' to the Left Matra if it's a word start. /
1451	if (info[start].indic_position () == POS_PRE_M)
1452	{
1453	if (!start \|\|
1454	!(FLAG_UNSAFE (_hb_glyph_info_get_general_category (&info[start - `1`])) &
1455	FLAG_RANGE (HB_UNICODE_GENERAL_CATEGORY_FORMAT, HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))
1456	info[start].mask \|= indic_plan->mask_array[INIT];
1457	else
1458	buffer->unsafe_to_break (start - `1`, start + `1`);
1459	}
1460
1461
1462	/*
1463	* Finish off the clusters and go home!
1464	*/
1465	if (hb_options ().uniscribe_bug_compatible)
1466	{
1467	switch ((hb_tag_t) plan->props.script)
1468	{
1469	case HB_SCRIPT_TAMIL:
1470	case HB_SCRIPT_SINHALA:
1471	break;
1472
1473	default:
1474	/ Uniscribe merges the entire syllable into a single cluster... Except for Tamil & Sinhala.*
1475	* This means, half forms are submerged into the main consonant's cluster.
1476	* This is unnecessary, and makes cursor positioning harder, but that's what
1477	* Uniscribe does. */
1478	buffer->merge_clusters (start, end);
1479	break;
1480	}
1481	}
1482	}
1483
1484
1485	static void
1486	final_reordering (const hb_ot_shape_plan_t *plan,
1487	hb_font_t *font HB_UNUSED,
1488	hb_buffer_t *buffer)
1489	{
1490	unsigned int count = buffer->len;
1491	if (unlikely (!count)) return;
1492
1493	foreach_syllable (buffer, start, end)
1494	final_reordering_syllable (plan, buffer, start, end);
1495
1496	HB_BUFFER_DEALLOCATE_VAR (buffer, indic_category);
1497	HB_BUFFER_DEALLOCATE_VAR (buffer, indic_position);
1498	}
1499
1500
1501	static void
1502	clear_syllables (const hb_ot_shape_plan_t *plan HB_UNUSED,
1503	hb_font_t *font HB_UNUSED,
1504	hb_buffer_t *buffer)
1505	{
1506	hb_glyph_info_t *info = buffer->info;
1507	unsigned int count = buffer->len;
1508	for (unsigned int i = `0`; i < count; i++)
1509	info[i].syllable() = `0`;
1510	}
1511
1512
1513	static bool
1514	decompose_indic (const hb_ot_shape_normalize_context_t *c,
1515	hb_codepoint_t ab,
1516	hb_codepoint_t *a,
1517	hb_codepoint_t *b)
1518	{
1519	switch (ab)
1520	{
1521	/ Don't decompose these. /
1522	case `0x0931u` : return false; / DEVANAGARI LETTER RRA /
1523	// https://github.com/harfbuzz/harfbuzz/issues/779
1524	case `0x09DCu` : return false; / BENGALI LETTER RRA /
1525	case `0x09DDu` : return false; / BENGALI LETTER RHA /
1526	case `0x0B94u` : return false; / TAMIL LETTER AU /
1527
1528
1529	/*
1530	* Decompose split matras that don't have Unicode decompositions.
1531	*/
1532
1533	#if 0
1534	/ Gujarati /
1535	/ This one has no decomposition in Unicode, but needs no decomposition either. /
1536	/ case 0x0AC9u : return false; /
1537
1538	/ Oriya /
1539	case `0x0B57u` : a = no decomp, -> RIGHT; return* true;
1540	#endif
1541	}
1542
1543	if ((ab == `0x0DDAu` \|\| hb_in_range<hb_codepoint_t> (ab, `0x0DDCu`, `0x0DDEu`)))
1544	{
1545	/*
1546	* Sinhala split matras... Let the fun begin.
1547	*
1548	* These four characters have Unicode decompositions. However, Uniscribe
1549	* decomposes them "Khmer-style", that is, it uses the character itself to
1550	* get the second half. The first half of all four decompositions is always
1551	* U+0DD9.
1552	*
1553	* Now, there are buggy fonts, namely, the widely used lklug.ttf, that are
1554	* broken with Uniscribe. But we need to support them. As such, we only
1555	* do the Uniscribe-style decomposition if the character is transformed into
1556	* its "sec.half" form by the 'pstf' feature. Otherwise, we fall back to
1557	* Unicode decomposition.
1558	*
1559	* Note that we can't unconditionally use Unicode decomposition. That would
1560	* break some other fonts, that are designed to work with Uniscribe, and
1561	* don't have positioning features for the Unicode-style decomposition.
1562	*
1563	* Argh...
1564	*
1565	* The Uniscribe behavior is now documented in the newly published Sinhala
1566	* spec in 2012:
1567	*
1568	* https://docs.microsoft.com/en-us/typography/script-development/sinhala#shaping
1569	*/
1570
1571	const indic_shape_plan_t indic_plan = (const* indic_shape_plan_t *) c->plan->data;
1572
1573	hb_codepoint_t glyph;
1574
1575	if (hb_options ().uniscribe_bug_compatible \|\|
1576	(c->font->get_nominal_glyph (ab, &glyph) &&
1577	indic_plan->pstf.would_substitute (&glyph, `1`, c->font->face)))
1578	{
1579	/ Ok, safe to use Uniscribe-style decomposition. /
1580	*a = `0x0DD9u`;
1581	*b = ab;
1582	return true;
1583	}
1584	}
1585
1586	return (bool) c->unicode->decompose (ab, a, b);
1587	}
1588
1589	static bool
1590	compose_indic (const hb_ot_shape_normalize_context_t *c,
1591	hb_codepoint_t a,
1592	hb_codepoint_t b,
1593	hb_codepoint_t *ab)
1594	{
1595	/ Avoid recomposing split matras. /
1596	if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a)))
1597	return false;
1598
1599	/ Composition-exclusion exceptions that we want to recompose. /
1600	if (a == `0x09AFu` && b == `0x09BCu`) { ab = `0x09DFu`; return* true; }
1601
1602	return (bool) c->unicode->compose (a, b, ab);
1603	}
1604
1605
1606	const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic =
1607	{
1608	collect_features_indic,
1609	override_features_indic,
1610	data_create_indic,
1611	data_destroy_indic,
1612	nullptr, / preprocess_text /
1613	nullptr, / postprocess_glyphs /
1614	HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT,
1615	decompose_indic,
1616	compose_indic,
1617	setup_masks_indic,
1618	nullptr, / disable_otl /
1619	nullptr, / reorder_marks /
1620	HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE,
1621	false, / fallback_position /
1622	};
1623

Browse the source code of MuPDF/thirdparty/harfbuzz/src/hb-ot-shape-complex-indic.cc