hb-ot-tag.cc source code [Skia/third_party/externals/harfbuzz/src/hb-ot-tag.cc]

1	/*
2	* Copyright © 2009 Red Hat, Inc.
3	* Copyright © 2011 Google, Inc.
4	*
5	* This is part of HarfBuzz, a text shaping library.
6	*
7	* Permission is hereby granted, without written agreement and without
8	* license or royalty fees, to use, copy, modify, and distribute this
9	* software and its documentation for any purpose, provided that the
10	* above copyright notice and the following two paragraphs appear in
11	* all copies of this software.
12	*
13	* IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14	* DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15	* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16	* IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17	* DAMAGE.
18	*
19	* THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20	* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21	* FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
22	* ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23	* PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24	*
25	* Red Hat Author(s): Behdad Esfahbod
26	* Google Author(s): Behdad Esfahbod, Roozbeh Pournader
27	*/
28
29	#include "hb.hh"
30
31	#ifndef HB_NO_OT_TAG
32
33
34	/ hb_script_t /
35
36	static hb_tag_t
37	hb_ot_old_tag_from_script (hb_script_t script)
38	{
39	/ This seems to be accurate as of end of 2012. /
40
41	switch ((hb_tag_t) script)
42	{
43	case HB_SCRIPT_INVALID: return HB_OT_TAG_DEFAULT_SCRIPT;
44
45	/ KATAKANA and HIRAGANA both map to 'kana' /
46	case HB_SCRIPT_HIRAGANA: return HB_TAG(`'k'`,`'a'`,`'n'`,`'a'`);
47
48	/ Spaces at the end are preserved, unlike ISO 15924 /
49	case HB_SCRIPT_LAO: return HB_TAG(`'l'`,`'a'`,`'o'`,`' '`);
50	case HB_SCRIPT_YI: return HB_TAG(`'y'`,`'i'`,`' '`,`' '`);
51	/ Unicode-5.0 additions /
52	case HB_SCRIPT_NKO: return HB_TAG(`'n'`,`'k'`,`'o'`,`' '`);
53	/ Unicode-5.1 additions /
54	case HB_SCRIPT_VAI: return HB_TAG(`'v'`,`'a'`,`'i'`,`' '`);
55	}
56
57	/ Else, just change first char to lowercase and return /
58	return ((hb_tag_t) script) \| `0x20000000u`;
59	}
60
61	static hb_script_t
62	hb_ot_old_tag_to_script (hb_tag_t tag)
63	{
64	if (unlikely (tag == HB_OT_TAG_DEFAULT_SCRIPT))
65	return HB_SCRIPT_INVALID;
66
67	/ This side of the conversion is fully algorithmic. /
68
69	/ Any spaces at the end of the tag are replaced by repeating the last*
70	* letter. Eg 'nko ' -> 'Nkoo' */
71	if (unlikely ((tag & `0x0000FF00u`) == `0x00002000u`))
72	tag \|= (tag >> `8`) & `0x0000FF00u`; / Copy second letter to third /
73	if (unlikely ((tag & `0x000000FFu`) == `0x00000020u`))
74	tag \|= (tag >> `8`) & `0x000000FFu`; / Copy third letter to fourth /
75
76	/ Change first char to uppercase and return /
77	return (hb_script_t) (tag & ~`0x20000000u`);
78	}
79
80	static hb_tag_t
81	hb_ot_new_tag_from_script (hb_script_t script)
82	{
83	switch ((hb_tag_t) script) {
84	case HB_SCRIPT_BENGALI: return HB_TAG(`'b'`,`'n'`,`'g'`,`'2'`);
85	case HB_SCRIPT_DEVANAGARI: return HB_TAG(`'d'`,`'e'`,`'v'`,`'2'`);
86	case HB_SCRIPT_GUJARATI: return HB_TAG(`'g'`,`'j'`,`'r'`,`'2'`);
87	case HB_SCRIPT_GURMUKHI: return HB_TAG(`'g'`,`'u'`,`'r'`,`'2'`);
88	case HB_SCRIPT_KANNADA: return HB_TAG(`'k'`,`'n'`,`'d'`,`'2'`);
89	case HB_SCRIPT_MALAYALAM: return HB_TAG(`'m'`,`'l'`,`'m'`,`'2'`);
90	case HB_SCRIPT_ORIYA: return HB_TAG(`'o'`,`'r'`,`'y'`,`'2'`);
91	case HB_SCRIPT_TAMIL: return HB_TAG(`'t'`,`'m'`,`'l'`,`'2'`);
92	case HB_SCRIPT_TELUGU: return HB_TAG(`'t'`,`'e'`,`'l'`,`'2'`);
93	case HB_SCRIPT_MYANMAR: return HB_TAG(`'m'`,`'y'`,`'m'`,`'2'`);
94	}
95
96	return HB_OT_TAG_DEFAULT_SCRIPT;
97	}
98
99	static hb_script_t
100	hb_ot_new_tag_to_script (hb_tag_t tag)
101	{
102	switch (tag) {
103	case HB_TAG(`'b'`,`'n'`,`'g'`,`'2'`): return HB_SCRIPT_BENGALI;
104	case HB_TAG(`'d'`,`'e'`,`'v'`,`'2'`): return HB_SCRIPT_DEVANAGARI;
105	case HB_TAG(`'g'`,`'j'`,`'r'`,`'2'`): return HB_SCRIPT_GUJARATI;
106	case HB_TAG(`'g'`,`'u'`,`'r'`,`'2'`): return HB_SCRIPT_GURMUKHI;
107	case HB_TAG(`'k'`,`'n'`,`'d'`,`'2'`): return HB_SCRIPT_KANNADA;
108	case HB_TAG(`'m'`,`'l'`,`'m'`,`'2'`): return HB_SCRIPT_MALAYALAM;
109	case HB_TAG(`'o'`,`'r'`,`'y'`,`'2'`): return HB_SCRIPT_ORIYA;
110	case HB_TAG(`'t'`,`'m'`,`'l'`,`'2'`): return HB_SCRIPT_TAMIL;
111	case HB_TAG(`'t'`,`'e'`,`'l'`,`'2'`): return HB_SCRIPT_TELUGU;
112	case HB_TAG(`'m'`,`'y'`,`'m'`,`'2'`): return HB_SCRIPT_MYANMAR;
113	}
114
115	return HB_SCRIPT_UNKNOWN;
116	}
117
118	#ifndef HB_DISABLE_DEPRECATED
119	void
120	hb_ot_tags_from_script (hb_script_t script,
121	hb_tag_t *script_tag_1,
122	hb_tag_t *script_tag_2)
123	{
124	unsigned int count = `2`;
125	hb_tag_t tags[`2`];
126	hb_ot_tags_from_script_and_language (script, HB_LANGUAGE_INVALID, &count, tags, nullptr, nullptr);
127	*script_tag_1 = count > `0` ? tags[`0`] : HB_OT_TAG_DEFAULT_SCRIPT;
128	*script_tag_2 = count > `1` ? tags[`1`] : HB_OT_TAG_DEFAULT_SCRIPT;
129	}
130	#endif
131
132	/*
133	* Complete list at:
134	* https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags
135	*
136	* Most of the script tags are the same as the ISO 15924 tag but lowercased.
137	* So we just do that, and handle the exceptional cases in a switch.
138	*/
139
140	static void
141	hb_ot_all_tags_from_script (hb_script_t script,
142	unsigned int count /* IN/OUT /,
143	hb_tag_t tags /* OUT /)
144	{
145	unsigned int i = `0`;
146
147	hb_tag_t new_tag = hb_ot_new_tag_from_script (script);
148	if (unlikely (new_tag != HB_OT_TAG_DEFAULT_SCRIPT))
149	{
150	/ HB_SCRIPT_MYANMAR maps to 'mym2', but there is no 'mym3'. /
151	if (new_tag != HB_TAG(`'m'`,`'y'`,`'m'`,`'2'`))
152	tags[i++] = new_tag \| `'3'`;
153	if (*count > i)
154	tags[i++] = new_tag;
155	}
156
157	if (*count > i)
158	{
159	hb_tag_t old_tag = hb_ot_old_tag_from_script (script);
160	if (old_tag != HB_OT_TAG_DEFAULT_SCRIPT)
161	tags[i++] = old_tag;
162	}
163
164	*count = i;
165	}
166
167	hb_script_t
168	hb_ot_tag_to_script (hb_tag_t tag)
169	{
170	unsigned char digit = tag & `0x000000FFu`;
171	if (unlikely (digit == `'2'` \|\| digit == `'3'`))
172	return hb_ot_new_tag_to_script (tag & `0xFFFFFF32`);
173
174	return hb_ot_old_tag_to_script (tag);
175	}
176
177
178	/ hb_language_t /
179
180	static bool
181	subtag_matches (const char *lang_str,
182	const char *limit,
183	const char *subtag)
184	{
185	do {
186	const char *s = strstr (lang_str, subtag);
187	if (!s \|\| s >= limit)
188	return false;
189	if (!ISALNUM (s[strlen (subtag)]))
190	return true;
191	lang_str = s + strlen (subtag);
192	} while (true);
193	}
194
195	static hb_bool_t
196	lang_matches (const char lang_str, const* char *spec)
197	{
198	unsigned int len = strlen (spec);
199
200	return strncmp (lang_str, spec, len) == `0` &&
201	(lang_str[len] == `'\0'` \|\| lang_str[len] == `'-'`);
202	}
203
204	struct LangTag
205	{
206	char language[`4`];
207	hb_tag_t tag;
208
209	int cmp (const char a) const*
210	{
211	const char b = this*->language;
212	unsigned int da, db;
213	const char *p;
214
215	p = strchr (a, `'-'`);
216	da = p ? (unsigned int) (p - a) : strlen (a);
217
218	p = strchr (b, `'-'`);
219	db = p ? (unsigned int) (p - b) : strlen (b);
220
221	return strncmp (a, b, hb_max (da, db));
222	}
223	int cmp (const LangTag that) const*
224	{ return cmp (that->language); }
225	};
226
227	#include "hb-ot-tag-table.hh"
228
229	/ The corresponding languages IDs for the following IDs are unclear,*
230	* overlap, or are architecturally weird. Needs more research. */
231
232	/{"??", {HB_TAG('B','C','R',' ')}},/ / Bible Cree /
233	/{"zh?", {HB_TAG('C','H','N',' ')}},/ / Chinese (seen in Microsoft fonts) /
234	/{"ar-Syrc?", {HB_TAG('G','A','R',' ')}},/ / Garshuni /
235	/{"??", {HB_TAG('N','G','R',' ')}},/ / Nagari /
236	/{"??", {HB_TAG('Y','I','C',' ')}},/ / Yi Classic /
237	/{"zh?", {HB_TAG('Z','H','P',' ')}},/ / Chinese Phonetic /
238
239	#ifndef HB_DISABLE_DEPRECATED
240	hb_tag_t
241	hb_ot_tag_from_language (hb_language_t language)
242	{
243	unsigned int count = `1`;
244	hb_tag_t tags[`1`];
245	hb_ot_tags_from_script_and_language (HB_SCRIPT_UNKNOWN, language, nullptr, nullptr, &count, tags);
246	return count > `0` ? tags[`0`] : HB_OT_TAG_DEFAULT_LANGUAGE;
247	}
248	#endif
249
250	static void
251	hb_ot_tags_from_language (const char *lang_str,
252	const char *limit,
253	unsigned int *count,
254	hb_tag_t *tags)
255	{
256	const char *s;
257	unsigned int tag_idx;
258
259	/ Check for matches of multiple subtags. /
260	if (hb_ot_tags_from_complex_language (lang_str, limit, count, tags))
261	return;
262
263	/ Find a language matching in the first component. /
264	s = strchr (lang_str, `'-'`);
265	{
266	if (s && limit - lang_str >= `6`)
267	{
268	const char *extlang_end = strchr (s + `1`, `'-'`);
269	/ If there is an extended language tag, use it. /
270	if (`3` == (extlang_end ? extlang_end - s - `1` : strlen (s + `1`)) &&
271	ISALPHA (s[`1`]))
272	lang_str = s + `1`;
273	}
274	if (hb_sorted_array (ot_languages).bfind (lang_str, &tag_idx))
275	{
276	unsigned int i;
277	while (tag_idx != `0` &&
278	`0` == strcmp (ot_languages[tag_idx].language, ot_languages[tag_idx - `1`].language))
279	tag_idx--;
280	for (i = `0`;
281	i < *count &&
282	tag_idx + i < ARRAY_LENGTH (ot_languages) &&
283	`0` == strcmp (ot_languages[tag_idx + i].language, ot_languages[tag_idx].language);
284	i++)
285	tags[i] = ot_languages[tag_idx + i].tag;
286	*count = i;
287	return;
288	}
289	}
290
291	if (!s)
292	s = lang_str + strlen (lang_str);
293	if (s - lang_str == `3`) {
294	/ Assume it's ISO-639-3 and upper-case and use it. /
295	tags[`0`] = hb_tag_from_string (lang_str, s - lang_str) & ~`0x20202000u`;
296	*count = `1`;
297	return;
298	}
299
300	*count = `0`;
301	}
302
303	static bool
304	parse_private_use_subtag (const char *private_use_subtag,
305	unsigned int *count,
306	hb_tag_t *tags,
307	const char *prefix,
308	unsigned char (normalize) (unsigned* char))
309	{
310	#ifdef HB_NO_LANGUAGE_PRIVATE_SUBTAG
311	return false;
312	#endif
313
314	if (!(private_use_subtag && count && tags && count)) return* false;
315
316	const char *s = strstr (private_use_subtag, prefix);
317	if (!s) return false;
318
319	char tag[`4`];
320	int i;
321	s += strlen (prefix);
322	for (i = `0`; i < `4` && ISALNUM (s[i]); i++)
323	tag[i] = normalize (s[i]);
324	if (!i) return false;
325
326	for (; i < `4`; i++)
327	tag[i] = `' '`;
328	tags[`0`] = HB_TAG (tag[`0`], tag[`1`], tag[`2`], tag[`3`]);
329	if ((tags[`0`] & `0xDFDFDFDF`) == HB_OT_TAG_DEFAULT_SCRIPT)
330	tags[`0`] ^= ~`0xDFDFDFDF`;
331	*count = `1`;
332	return true;
333	}
334
335	/**
336	* hb_ot_tags_from_script_and_language:
337	* @script: an #hb_script_t to convert.
338	* @language: an #hb_language_t to convert.
339	* @script_count: (allow-none): maximum number of script tags to retrieve (IN)
340	* and actual number of script tags retrieved (OUT)
341	* @script_tags: (out) (allow-none): array of size at least @script_count to store the
342	* script tag results
343	* @language_count: (allow-none): maximum number of language tags to retrieve
344	* (IN) and actual number of language tags retrieved (OUT)
345	* @language_tags: (out) (allow-none): array of size at least @language_count to store
346	* the language tag results
347	*
348	* Converts an #hb_script_t and an #hb_language_t to script and language tags.
349	*
350	* Since: 2.0.0
351	**/
352	void
353	hb_ot_tags_from_script_and_language (hb_script_t script,
354	hb_language_t language,
355	unsigned int script_count /* IN/OUT /,
356	hb_tag_t script_tags /* OUT /,
357	unsigned int language_count /* IN/OUT /,
358	hb_tag_t language_tags /* OUT /)
359	{
360	bool needs_script = true;
361
362	if (language == HB_LANGUAGE_INVALID)
363	{
364	if (language_count && language_tags && *language_count)
365	*language_count = `0`;
366	}
367	else
368	{
369	const char lang_str, s, limit, private_use_subtag;
370	bool needs_language;
371
372	lang_str = hb_language_to_string (language);
373	limit = nullptr;
374	private_use_subtag = nullptr;
375	if (lang_str[`0`] == `'x'` && lang_str[`1`] == `'-'`)
376	{
377	private_use_subtag = lang_str;
378	} else {
379	for (s = lang_str + `1`; *s; s++)
380	{
381	if (s[-`1`] == `'-'` && s[`1`] == `'-'`)
382	{
383	if (s[`0`] == `'x'`)
384	{
385	private_use_subtag = s;
386	if (!limit)
387	limit = s - `1`;
388	break;
389	} else if (!limit)
390	{
391	limit = s - `1`;
392	}
393	}
394	}
395	if (!limit)
396	limit = s;
397	}
398
399	needs_script = !parse_private_use_subtag (private_use_subtag, script_count, script_tags, "-hbsc", TOLOWER);
400	needs_language = !parse_private_use_subtag (private_use_subtag, language_count, language_tags, "-hbot", TOUPPER);
401
402	if (needs_language && language_count && language_tags && *language_count)
403	hb_ot_tags_from_language (lang_str, limit, language_count, language_tags);
404	}
405
406	if (needs_script && script_count && script_tags && *script_count)
407	hb_ot_all_tags_from_script (script, script_count, script_tags);
408	}
409
410	/**
411	* hb_ot_tag_to_language:
412	*
413	*
414	*
415	* Return value: (transfer none):
416	*
417	* Since: 0.9.2
418	**/
419	hb_language_t
420	hb_ot_tag_to_language (hb_tag_t tag)
421	{
422	unsigned int i;
423
424	if (tag == HB_OT_TAG_DEFAULT_LANGUAGE)
425	return nullptr;
426
427	{
428	hb_language_t disambiguated_tag = hb_ot_ambiguous_tag_to_language (tag);
429	if (disambiguated_tag != HB_LANGUAGE_INVALID)
430	return disambiguated_tag;
431	}
432
433	for (i = `0`; i < ARRAY_LENGTH (ot_languages); i++)
434	if (ot_languages[i].tag == tag)
435	return hb_language_from_string (ot_languages[i].language, -`1`);
436
437	/ If it's three letters long, assume it's ISO 639-3 and lower-case and use it*
438	* (if it's not a registered tag, calling hb_ot_tag_from_language on the
439	* result might not return the same tag as the original tag).
440	* Else return a custom language in the form of "x-hbotABCD". */
441	{
442	char buf[`11`] = "x-hbot";
443	char *str = buf;
444	buf[`6`] = tag >> `24`;
445	buf[`7`] = (tag >> `16`) & `0xFF`;
446	buf[`8`] = (tag >> `8`) & `0xFF`;
447	buf[`9`] = tag & `0xFF`;
448	if (buf[`9`] == `0x20`)
449	{
450	buf[`9`] = `'\0'`;
451	if (ISALPHA (buf[`6`]) && ISALPHA (buf[`7`]) && ISALPHA (buf[`8`]))
452	{
453	buf[`6`] = TOLOWER (buf[`6`]);
454	buf[`7`] = TOLOWER (buf[`7`]);
455	buf[`8`] = TOLOWER (buf[`8`]);
456	str += `6`;
457	}
458	}
459	buf[`10`] = `'\0'`;
460	return hb_language_from_string (str, -`1`);
461	}
462	}
463
464	/**
465	* hb_ot_tags_to_script_and_language:
466	* @script_tag: a script tag
467	* @language_tag: a language tag
468	* @script: (allow-none): the #hb_script_t corresponding to @script_tag (OUT).
469	* @language: (allow-none): the #hb_language_t corresponding to @script_tag and
470	* @language_tag (OUT).
471	*
472	* Converts a script tag and a language tag to an #hb_script_t and an
473	* #hb_language_t.
474	*
475	* Since: 2.0.0
476	**/
477	void
478	hb_ot_tags_to_script_and_language (hb_tag_t script_tag,
479	hb_tag_t language_tag,
480	hb_script_t script /* OUT /,
481	hb_language_t language /* OUT /)
482	{
483	hb_script_t script_out = hb_ot_tag_to_script (script_tag);
484	if (script)
485	*script = script_out;
486	if (language)
487	{
488	unsigned int script_count = `1`;
489	hb_tag_t primary_script_tag[`1`];
490	hb_ot_tags_from_script_and_language (script_out,
491	HB_LANGUAGE_INVALID,
492	&script_count,
493	primary_script_tag,
494	nullptr, nullptr);
495	*language = hb_ot_tag_to_language (language_tag);
496	if (script_count == `0` \|\| primary_script_tag[`0`] != script_tag)
497	{
498	unsigned char *buf;
499	const char lang_str = hb_language_to_string (language);
500	size_t len = strlen (lang_str);
501	buf = (unsigned char *) malloc (len + `11`);
502	if (unlikely (!buf))
503	{
504	language = nullptr*;
505	}
506	else
507	{
508	memcpy (buf, lang_str, len);
509	if (lang_str[`0`] != `'x'` \|\| lang_str[`1`] != `'-'`) {
510	buf[len++] = `'-'`;
511	buf[len++] = `'x'`;
512	}
513	buf[len++] = `'-'`;
514	buf[len++] = `'h'`;
515	buf[len++] = `'b'`;
516	buf[len++] = `'s'`;
517	buf[len++] = `'c'`;
518	buf[len++] = script_tag >> `24`;
519	buf[len++] = (script_tag >> `16`) & `0xFF`;
520	buf[len++] = (script_tag >> `8`) & `0xFF`;
521	buf[len++] = script_tag & `0xFF`;
522	language = hb_language_from_string ((char* *) buf, len);
523	free (buf);
524	}
525	}
526	}
527	}
528
529	#ifdef MAIN
530	static inline void
531	test_langs_sorted ()
532	{
533	for (unsigned int i = `1`; i < ARRAY_LENGTH (ot_languages); i++)
534	{
535	int c = ot_languages[i].cmp (&ot_languages[i - `1`]);
536	if (c > `0`)
537	{
538	fprintf (stderr, "ot_languages not sorted at index %d: %s %d %s\n",
539	i, ot_languages[i-`1`].language, c, ot_languages[i].language);
540	abort();
541	}
542	}
543	}
544
545	int
546	main ()
547	{
548	test_langs_sorted ();
549	return `0`;
550	}
551
552	#endif
553
554
555	#endif
556

Browse the source code of Skia/third_party/externals/harfbuzz/src/hb-ot-tag.cc