hb-ot-tag.cc source code [engine/third_party/harfbuzz/src/hb-ot-tag.cc]

1	/*
2	* Copyright © 2009 Red Hat, Inc.
3	* Copyright © 2011 Google, Inc.
4	*
5	* This is part of HarfBuzz, a text shaping library.
6	*
7	* Permission is hereby granted, without written agreement and without
8	* license or royalty fees, to use, copy, modify, and distribute this
9	* software and its documentation for any purpose, provided that the
10	* above copyright notice and the following two paragraphs appear in
11	* all copies of this software.
12	*
13	* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14	* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15	* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16	* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17	* DAMAGE.
18	*
19	* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20	* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21	* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
22	* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23	* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24	*
25	* Red Hat Author(s): Behdad Esfahbod
26	* Google Author(s): Behdad Esfahbod, Roozbeh Pournader
27	*/
28
29	#include "hb.hh"
30
31	#ifndef HB_NO_OT_TAG
32
33
34	/ hb_script_t /
35
36	static hb_tag_t
37	hb_ot_old_tag_from_script (hb_script_t script)
38	{
39	/ This seems to be accurate as of end of 2012. /
40
41	switch ((hb_tag_t) script)
42	{
43	case HB_SCRIPT_INVALID: return HB_OT_TAG_DEFAULT_SCRIPT;
44
45	/ KATAKANA and HIRAGANA both map to 'kana' /
46	case HB_SCRIPT_HIRAGANA: return HB_TAG(`'k'`,`'a'`,`'n'`,`'a'`);
47
48	/ Spaces at the end are preserved, unlike ISO 15924 /
49	case HB_SCRIPT_LAO: return HB_TAG(`'l'`,`'a'`,`'o'`,`' '`);
50	case HB_SCRIPT_YI: return HB_TAG(`'y'`,`'i'`,`' '`,`' '`);
51	/ Unicode-5.0 additions /
52	case HB_SCRIPT_NKO: return HB_TAG(`'n'`,`'k'`,`'o'`,`' '`);
53	/ Unicode-5.1 additions /
54	case HB_SCRIPT_VAI: return HB_TAG(`'v'`,`'a'`,`'i'`,`' '`);
55	}
56
57	/ Else, just change first char to lowercase and return /
58	return ((hb_tag_t) script) \| `0x20000000u`;
59	}
60
61	static hb_script_t
62	hb_ot_old_tag_to_script (hb_tag_t tag)
63	{
64	if (unlikely (tag == HB_OT_TAG_DEFAULT_SCRIPT))
65	return HB_SCRIPT_INVALID;
66
67	/ This side of the conversion is fully algorithmic. /
68
69	/ Any spaces at the end of the tag are replaced by repeating the last*
70	* letter. Eg 'nko ' -> 'Nkoo' */
71	if (unlikely ((tag & `0x0000FF00u`) == `0x00002000u`))
72	tag \|= (tag >> `8`) & `0x0000FF00u`; / Copy second letter to third /
73	if (unlikely ((tag & `0x000000FFu`) == `0x00000020u`))
74	tag \|= (tag >> `8`) & `0x000000FFu`; / Copy third letter to fourth /
75
76	/ Change first char to uppercase and return /
77	return (hb_script_t) (tag & ~`0x20000000u`);
78	}
79
80	static hb_tag_t
81	hb_ot_new_tag_from_script (hb_script_t script)
82	{
83	switch ((hb_tag_t) script) {
84	case HB_SCRIPT_BENGALI: return HB_TAG(`'b'`,`'n'`,`'g'`,`'2'`);
85	case HB_SCRIPT_DEVANAGARI: return HB_TAG(`'d'`,`'e'`,`'v'`,`'2'`);
86	case HB_SCRIPT_GUJARATI: return HB_TAG(`'g'`,`'j'`,`'r'`,`'2'`);
87	case HB_SCRIPT_GURMUKHI: return HB_TAG(`'g'`,`'u'`,`'r'`,`'2'`);
88	case HB_SCRIPT_KANNADA: return HB_TAG(`'k'`,`'n'`,`'d'`,`'2'`);
89	case HB_SCRIPT_MALAYALAM: return HB_TAG(`'m'`,`'l'`,`'m'`,`'2'`);
90	case HB_SCRIPT_ORIYA: return HB_TAG(`'o'`,`'r'`,`'y'`,`'2'`);
91	case HB_SCRIPT_TAMIL: return HB_TAG(`'t'`,`'m'`,`'l'`,`'2'`);
92	case HB_SCRIPT_TELUGU: return HB_TAG(`'t'`,`'e'`,`'l'`,`'2'`);
93	case HB_SCRIPT_MYANMAR: return HB_TAG(`'m'`,`'y'`,`'m'`,`'2'`);
94	}
95
96	return HB_OT_TAG_DEFAULT_SCRIPT;
97	}
98
99	static hb_script_t
100	hb_ot_new_tag_to_script (hb_tag_t tag)
101	{
102	switch (tag) {
103	case HB_TAG(`'b'`,`'n'`,`'g'`,`'2'`): return HB_SCRIPT_BENGALI;
104	case HB_TAG(`'d'`,`'e'`,`'v'`,`'2'`): return HB_SCRIPT_DEVANAGARI;
105	case HB_TAG(`'g'`,`'j'`,`'r'`,`'2'`): return HB_SCRIPT_GUJARATI;
106	case HB_TAG(`'g'`,`'u'`,`'r'`,`'2'`): return HB_SCRIPT_GURMUKHI;
107	case HB_TAG(`'k'`,`'n'`,`'d'`,`'2'`): return HB_SCRIPT_KANNADA;
108	case HB_TAG(`'m'`,`'l'`,`'m'`,`'2'`): return HB_SCRIPT_MALAYALAM;
109	case HB_TAG(`'o'`,`'r'`,`'y'`,`'2'`): return HB_SCRIPT_ORIYA;
110	case HB_TAG(`'t'`,`'m'`,`'l'`,`'2'`): return HB_SCRIPT_TAMIL;
111	case HB_TAG(`'t'`,`'e'`,`'l'`,`'2'`): return HB_SCRIPT_TELUGU;
112	case HB_TAG(`'m'`,`'y'`,`'m'`,`'2'`): return HB_SCRIPT_MYANMAR;
113	}
114
115	return HB_SCRIPT_UNKNOWN;
116	}
117
118	#ifndef HB_DISABLE_DEPRECATED
119	void
120	hb_ot_tags_from_script (hb_script_t script,
121	hb_tag_t *script_tag_1,
122	hb_tag_t *script_tag_2)
123	{
124	unsigned int count = `2`;
125	hb_tag_t tags[`2`];
126	hb_ot_tags_from_script_and_language (script, HB_LANGUAGE_INVALID, &count, tags, nullptr, nullptr);
127	*script_tag_1 = count > `0` ? tags[`0`] : HB_OT_TAG_DEFAULT_SCRIPT;
128	*script_tag_2 = count > `1` ? tags[`1`] : HB_OT_TAG_DEFAULT_SCRIPT;
129	}
130	#endif
131
132	/*
133	* Complete list at:
134	* https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags
135	*
136	* Most of the script tags are the same as the ISO 15924 tag but lowercased.
137	* So we just do that, and handle the exceptional cases in a switch.
138	*/
139
140	static void
141	hb_ot_all_tags_from_script (hb_script_t script,
142	unsigned int count /* IN/OUT /,
143	hb_tag_t tags /* OUT /)
144	{
145	unsigned int i = `0`;
146
147	hb_tag_t new_tag = hb_ot_new_tag_from_script (script);
148	if (unlikely (new_tag != HB_OT_TAG_DEFAULT_SCRIPT))
149	{
150	/ HB_SCRIPT_MYANMAR maps to 'mym2', but there is no 'mym3'. /
151	if (new_tag != HB_TAG(`'m'`,`'y'`,`'m'`,`'2'`))
152	tags[i++] = new_tag \| `'3'`;
153	if (*count > i)
154	tags[i++] = new_tag;
155	}
156
157	if (*count > i)
158	{
159	hb_tag_t old_tag = hb_ot_old_tag_from_script (script);
160	if (old_tag != HB_OT_TAG_DEFAULT_SCRIPT)
161	tags[i++] = old_tag;
162	}
163
164	*count = i;
165	}
166
167	hb_script_t
168	hb_ot_tag_to_script (hb_tag_t tag)
169	{
170	unsigned char digit = tag & `0x000000FFu`;
171	if (unlikely (digit == `'2'` \|\| digit == `'3'`))
172	return hb_ot_new_tag_to_script (tag & `0xFFFFFF32`);
173
174	return hb_ot_old_tag_to_script (tag);
175	}
176
177
178	/ hb_language_t /
179
180	static bool
181	subtag_matches (const char *lang_str,
182	const char *limit,
183	const char *subtag)
184	{
185	do {
186	const char *s = strstr (lang_str, subtag);
187	if (!s \|\| s >= limit)
188	return false;
189	if (!ISALNUM (s[strlen (subtag)]))
190	return true;
191	lang_str = s + strlen (subtag);
192	} while (true);
193	}
194
195	static hb_bool_t
196	lang_matches (const char lang_str, const* char *spec)
197	{
198	unsigned int len = strlen (spec);
199
200	return strncmp (lang_str, spec, len) == `0` &&
201	(lang_str[len] == `'\0'` \|\| lang_str[len] == `'-'`);
202	}
203
204	struct LangTag
205	{
206	char language[`4`];
207	hb_tag_t tag;
208
209	int cmp (const char a) const*
210	{
211	const char b = this*->language;
212	unsigned int da, db;
213	const char *p;
214
215	p = strchr (a, `'-'`);
216	da = p ? (unsigned int) (p - a) : strlen (a);
217
218	p = strchr (b, `'-'`);
219	db = p ? (unsigned int) (p - b) : strlen (b);
220
221	return strncmp (a, b, hb_max (da, db));
222	}
223	int cmp (const LangTag that) const*
224	{ return cmp (that->language); }
225	};
226
227	#include "hb-ot-tag-table.hh"
228
229	/ The corresponding languages IDs for the following IDs are unclear,*
230	* overlap, or are architecturally weird. Needs more research. */
231
232	/{"??", {HB_TAG('B','C','R',' ')}},/ / Bible Cree /
233	/{"zh?", {HB_TAG('C','H','N',' ')}},/ / Chinese (seen in Microsoft fonts) /
234	/{"ar-Syrc?", {HB_TAG('G','A','R',' ')}},/ / Garshuni /
235	/{"??", {HB_TAG('N','G','R',' ')}},/ / Nagari /
236	/{"??", {HB_TAG('Y','I','C',' ')}},/ / Yi Classic /
237	/{"zh?", {HB_TAG('Z','H','P',' ')}},/ / Chinese Phonetic /
238
239	#ifndef HB_DISABLE_DEPRECATED
240	hb_tag_t
241	hb_ot_tag_from_language (hb_language_t language)
242	{
243	unsigned int count = `1`;
244	hb_tag_t tags[`1`];
245	hb_ot_tags_from_script_and_language (HB_SCRIPT_UNKNOWN, language, nullptr, nullptr, &count, tags);
246	return count > `0` ? tags[`0`] : HB_OT_TAG_DEFAULT_LANGUAGE;
247	}
248	#endif
249
250	static void
251	hb_ot_tags_from_language (const char *lang_str,
252	const char *limit,
253	unsigned int *count,
254	hb_tag_t *tags)
255	{
256	const char *s;
257	unsigned int tag_idx;
258
259	/ Check for matches of multiple subtags. /
260	if (hb_ot_tags_from_complex_language (lang_str, limit, count, tags))
261	return;
262
263	/ Find a language matching in the first component. /
264	s = strchr (lang_str, `'-'`);
265	{
266	if (s && limit - lang_str >= `6`)
267	{
268	const char *extlang_end = strchr (s + `1`, `'-'`);
269	/ If there is an extended language tag, use it. /
270	if (`3` == (extlang_end ? extlang_end - s - `1` : strlen (s + `1`)) &&
271	ISALPHA (s[`1`]))
272	lang_str = s + `1`;
273	}
274	if (hb_sorted_array (ot_languages).bfind (lang_str, &tag_idx))
275	{
276	unsigned int i;
277	while (tag_idx != `0` &&
278	`0` == strcmp (ot_languages[tag_idx].language, ot_languages[tag_idx - `1`].language))
279	tag_idx--;
280	for (i = `0`;
281	i < *count &&
282	tag_idx + i < ARRAY_LENGTH (ot_languages) &&
283	`0` == strcmp (ot_languages[tag_idx + i].language, ot_languages[tag_idx].language);
284	i++)
285	tags[i] = ot_languages[tag_idx + i].tag;
286	*count = i;
287	return;
288	}
289	}
290
291	if (!s)
292	s = lang_str + strlen (lang_str);
293	if (s - lang_str == `3`) {
294	/ Assume it's ISO-639-3 and upper-case and use it. /
295	tags[`0`] = hb_tag_from_string (lang_str, s - lang_str) & ~`0x20202000u`;
296	*count = `1`;
297	return;
298	}
299
300	*count = `0`;
301	}
302
303	static bool
304	parse_private_use_subtag (const char *private_use_subtag,
305	unsigned int *count,
306	hb_tag_t *tags,
307	const char *prefix,
308	unsigned char (normalize) (unsigned* char))
309	{
310	#ifdef HB_NO_LANGUAGE_PRIVATE_SUBTAG
311	return false;
312	#endif
313
314	if (!(private_use_subtag && count && tags && count)) return* false;
315
316	const char *s = strstr (private_use_subtag, prefix);
317	if (!s) return false;
318
319	char tag[`4`];
320	int i;
321	s += strlen (prefix);
322	if (s[`0`] == `'-'`) {
323	s += `1`;
324	char c;
325	for (i = `0`; i < `8` && ISHEX (s[i]); i++)
326	{
327	c = FROMHEX (s[i]);
328	if (i % `2` == `0`)
329	tag[i / `2`] = c << `4`;
330	else
331	tag[i / `2`] += c;
332	}
333	if (i != `8`) return false;
334	} else {
335	for (i = `0`; i < `4` && ISALNUM (s[i]); i++)
336	tag[i] = normalize (s[i]);
337	if (!i) return false;
338
339	for (; i < `4`; i++)
340	tag[i] = `' '`;
341	}
342	tags[`0`] = HB_TAG (tag[`0`], tag[`1`], tag[`2`], tag[`3`]);
343	if ((tags[`0`] & `0xDFDFDFDF`) == HB_OT_TAG_DEFAULT_SCRIPT)
344	tags[`0`] ^= ~`0xDFDFDFDF`;
345	*count = `1`;
346	return true;
347	}
348
349	/**
350	* hb_ot_tags_from_script_and_language:
351	* @script: an #hb_script_t to convert.
352	* @language: an #hb_language_t to convert.
353	* @script_count: (allow-none): maximum number of script tags to retrieve (IN)
354	* and actual number of script tags retrieved (OUT)
355	* @script_tags: (out) (allow-none): array of size at least @script_count to store the
356	* script tag results
357	* @language_count: (allow-none): maximum number of language tags to retrieve
358	* (IN) and actual number of language tags retrieved (OUT)
359	* @language_tags: (out) (allow-none): array of size at least @language_count to store
360	* the language tag results
361	*
362	* Converts an #hb_script_t and an #hb_language_t to script and language tags.
363	*
364	* Since: 2.0.0
365	**/
366	void
367	hb_ot_tags_from_script_and_language (hb_script_t script,
368	hb_language_t language,
369	unsigned int script_count /* IN/OUT /,
370	hb_tag_t script_tags /* OUT /,
371	unsigned int language_count /* IN/OUT /,
372	hb_tag_t language_tags /* OUT /)
373	{
374	bool needs_script = true;
375
376	if (language == HB_LANGUAGE_INVALID)
377	{
378	if (language_count && language_tags && *language_count)
379	*language_count = `0`;
380	}
381	else
382	{
383	const char lang_str, s, limit, private_use_subtag;
384	bool needs_language;
385
386	lang_str = hb_language_to_string (language);
387	limit = nullptr;
388	private_use_subtag = nullptr;
389	if (lang_str[`0`] == `'x'` && lang_str[`1`] == `'-'`)
390	{
391	private_use_subtag = lang_str;
392	} else {
393	for (s = lang_str + `1`; *s; s++)
394	{
395	if (s[-`1`] == `'-'` && s[`1`] == `'-'`)
396	{
397	if (s[`0`] == `'x'`)
398	{
399	private_use_subtag = s;
400	if (!limit)
401	limit = s - `1`;
402	break;
403	} else if (!limit)
404	{
405	limit = s - `1`;
406	}
407	}
408	}
409	if (!limit)
410	limit = s;
411	}
412
413	needs_script = !parse_private_use_subtag (private_use_subtag, script_count, script_tags, "-hbsc", TOLOWER);
414	needs_language = !parse_private_use_subtag (private_use_subtag, language_count, language_tags, "-hbot", TOUPPER);
415
416	if (needs_language && language_count && language_tags && *language_count)
417	hb_ot_tags_from_language (lang_str, limit, language_count, language_tags);
418	}
419
420	if (needs_script && script_count && script_tags && *script_count)
421	hb_ot_all_tags_from_script (script, script_count, script_tags);
422	}
423
424	/**
425	* hb_ot_tag_to_language:
426	*
427	*
428	*
429	* Return value: (transfer none):
430	*
431	* Since: 0.9.2
432	**/
433	hb_language_t
434	hb_ot_tag_to_language (hb_tag_t tag)
435	{
436	unsigned int i;
437
438	if (tag == HB_OT_TAG_DEFAULT_LANGUAGE)
439	return nullptr;
440
441	{
442	hb_language_t disambiguated_tag = hb_ot_ambiguous_tag_to_language (tag);
443	if (disambiguated_tag != HB_LANGUAGE_INVALID)
444	return disambiguated_tag;
445	}
446
447	for (i = `0`; i < ARRAY_LENGTH (ot_languages); i++)
448	if (ot_languages[i].tag == tag)
449	return hb_language_from_string (ot_languages[i].language, -`1`);
450
451	/ Return a custom language in the form of "x-hbot-AABBCCDD".*
452	* If it's three letters long, also guess it's ISO 639-3 and lower-case and
453	* prepend it (if it's not a registered tag, the private use subtags will
454	* ensure that calling hb_ot_tag_from_language on the result will still return
455	* the same tag as the original tag).
456	*/
457	{
458	char buf[`20`];
459	char *str = buf;
460	if (ISALPHA (tag >> `24`)
461	&& ISALPHA ((tag >> `16`) & `0xFF`)
462	&& ISALPHA ((tag >> `8`) & `0xFF`)
463	&& (tag & `0xFF`) == `' '`)
464	{
465	buf[`0`] = TOLOWER (tag >> `24`);
466	buf[`1`] = TOLOWER ((tag >> `16`) & `0xFF`);
467	buf[`2`] = TOLOWER ((tag >> `8`) & `0xFF`);
468	buf[`3`] = `'-'`;
469	str += `4`;
470	}
471	snprintf (str, `16`, "x-hbot-%08x", tag);
472	return hb_language_from_string (&*buf, -`1`);
473	}
474	}
475
476	/**
477	* hb_ot_tags_to_script_and_language:
478	* @script_tag: a script tag
479	* @language_tag: a language tag
480	* @script: (allow-none): the #hb_script_t corresponding to @script_tag (OUT).
481	* @language: (allow-none): the #hb_language_t corresponding to @script_tag and
482	* @language_tag (OUT).
483	*
484	* Converts a script tag and a language tag to an #hb_script_t and an
485	* #hb_language_t.
486	*
487	* Since: 2.0.0
488	**/
489	void
490	hb_ot_tags_to_script_and_language (hb_tag_t script_tag,
491	hb_tag_t language_tag,
492	hb_script_t script /* OUT /,
493	hb_language_t language /* OUT /)
494	{
495	hb_script_t script_out = hb_ot_tag_to_script (script_tag);
496	if (script)
497	*script = script_out;
498	if (language)
499	{
500	unsigned int script_count = `1`;
501	hb_tag_t primary_script_tag[`1`];
502	hb_ot_tags_from_script_and_language (script_out,
503	HB_LANGUAGE_INVALID,
504	&script_count,
505	primary_script_tag,
506	nullptr, nullptr);
507	*language = hb_ot_tag_to_language (language_tag);
508	if (script_count == `0` \|\| primary_script_tag[`0`] != script_tag)
509	{
510	unsigned char *buf;
511	const char lang_str = hb_language_to_string (language);
512	size_t len = strlen (lang_str);
513	buf = (unsigned char *) malloc (len + `16`);
514	if (unlikely (!buf))
515	{
516	language = nullptr*;
517	}
518	else
519	{
520	int shift;
521	memcpy (buf, lang_str, len);
522	if (lang_str[`0`] != `'x'` \|\| lang_str[`1`] != `'-'`) {
523	buf[len++] = `'-'`;
524	buf[len++] = `'x'`;
525	}
526	buf[len++] = `'-'`;
527	buf[len++] = `'h'`;
528	buf[len++] = `'b'`;
529	buf[len++] = `'s'`;
530	buf[len++] = `'c'`;
531	buf[len++] = `'-'`;
532	for (shift = `28`; shift >= `0`; shift -= `4`)
533	buf[len++] = TOHEX (script_tag >> shift);
534	language = hb_language_from_string ((char* *) buf, len);
535	free (buf);
536	}
537	}
538	}
539	}
540
541	#ifdef MAIN
542	static inline void
543	test_langs_sorted ()
544	{
545	for (unsigned int i = `1`; i < ARRAY_LENGTH (ot_languages); i++)
546	{
547	int c = ot_languages[i].cmp (&ot_languages[i - `1`]);
548	if (c > `0`)
549	{
550	fprintf (stderr, "ot_languages not sorted at index %d: %s %d %s\n",
551	i, ot_languages[i-`1`].language, c, ot_languages[i].language);
552	abort();
553	}
554	}
555	}
556
557	int
558	main ()
559	{
560	test_langs_sorted ();
561	return `0`;
562	}
563
564	#endif
565
566
567	#endif
568

Browse the source code of engine/third_party/harfbuzz/src/hb-ot-tag.cc