uscript.h source code [include/unicode/uscript.h]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 1997-2016, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	*
9	* File USCRIPT.H
10	*
11	* Modification History:
12	*
13	* Date Name Description
14	* 07/06/2001 Ram Creation.
15	******************************************************************************
16	*/
17
18	#ifndef USCRIPT_H
19	#define USCRIPT_H
20	#include "unicode/utypes.h"
21
22	/**
23	* \file
24	* \brief C API: Unicode Script Information
25	*/
26
27	/**
28	* Constants for ISO 15924 script codes.
29	*
30	* The current set of script code constants supports at least all scripts
31	* that are encoded in the version of Unicode which ICU currently supports.
32	* The names of the constants are usually derived from the
33	* Unicode script property value aliases.
34	* See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/)
35	* and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
36	*
37	* In addition, constants for many ISO 15924 script codes
38	* are included, for use with language tags, CLDR data, and similar.
39	* Some of those codes are not used in the Unicode Character Database (UCD).
40	* For example, there are no characters that have a UCD script property value of
41	* Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
42	*
43	* Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.
44	*
45	* Starting with ICU 55, script codes are only added when their scripts
46	* have been or will certainly be encoded in Unicode,
47	* and have been assigned Unicode script property value aliases,
48	* to ensure that their script names are stable and match the names of the constants.
49	* Script codes like Latf and Aran that are not subject to separate encoding
50	* may be added at any time.
51	*
52	* @stable ICU 2.2
53	*/
54	typedef enum UScriptCode {
55	/*
56	* Note: UScriptCode constants and their ISO script code comments
57	* are parsed by preparseucd.py.
58	* It matches lines like
59	* USCRIPT_<Unicode Script value name> = <integer>, / * <ISO script code> * /
60	*/
61
62	/* @stable ICU 2.2 /
63	USCRIPT_INVALID_CODE = -`1`,
64	/* @stable ICU 2.2 /
65	USCRIPT_COMMON = `0`, / Zyyy /
66	/* @stable ICU 2.2 /
67	USCRIPT_INHERITED = `1`, / Zinh / / "Code for inherited script", for non-spacing combining marks; also Qaai /
68	/* @stable ICU 2.2 /
69	USCRIPT_ARABIC = `2`, / Arab /
70	/* @stable ICU 2.2 /
71	USCRIPT_ARMENIAN = `3`, / Armn /
72	/* @stable ICU 2.2 /
73	USCRIPT_BENGALI = `4`, / Beng /
74	/* @stable ICU 2.2 /
75	USCRIPT_BOPOMOFO = `5`, / Bopo /
76	/* @stable ICU 2.2 /
77	USCRIPT_CHEROKEE = `6`, / Cher /
78	/* @stable ICU 2.2 /
79	USCRIPT_COPTIC = `7`, / Copt /
80	/* @stable ICU 2.2 /
81	USCRIPT_CYRILLIC = `8`, / Cyrl /
82	/* @stable ICU 2.2 /
83	USCRIPT_DESERET = `9`, / Dsrt /
84	/* @stable ICU 2.2 /
85	USCRIPT_DEVANAGARI = `10`, / Deva /
86	/* @stable ICU 2.2 /
87	USCRIPT_ETHIOPIC = `11`, / Ethi /
88	/* @stable ICU 2.2 /
89	USCRIPT_GEORGIAN = `12`, / Geor /
90	/* @stable ICU 2.2 /
91	USCRIPT_GOTHIC = `13`, / Goth /
92	/* @stable ICU 2.2 /
93	USCRIPT_GREEK = `14`, / Grek /
94	/* @stable ICU 2.2 /
95	USCRIPT_GUJARATI = `15`, / Gujr /
96	/* @stable ICU 2.2 /
97	USCRIPT_GURMUKHI = `16`, / Guru /
98	/* @stable ICU 2.2 /
99	USCRIPT_HAN = `17`, / Hani /
100	/* @stable ICU 2.2 /
101	USCRIPT_HANGUL = `18`, / Hang /
102	/* @stable ICU 2.2 /
103	USCRIPT_HEBREW = `19`, / Hebr /
104	/* @stable ICU 2.2 /
105	USCRIPT_HIRAGANA = `20`, / Hira /
106	/* @stable ICU 2.2 /
107	USCRIPT_KANNADA = `21`, / Knda /
108	/* @stable ICU 2.2 /
109	USCRIPT_KATAKANA = `22`, / Kana /
110	/* @stable ICU 2.2 /
111	USCRIPT_KHMER = `23`, / Khmr /
112	/* @stable ICU 2.2 /
113	USCRIPT_LAO = `24`, / Laoo /
114	/* @stable ICU 2.2 /
115	USCRIPT_LATIN = `25`, / Latn /
116	/* @stable ICU 2.2 /
117	USCRIPT_MALAYALAM = `26`, / Mlym /
118	/* @stable ICU 2.2 /
119	USCRIPT_MONGOLIAN = `27`, / Mong /
120	/* @stable ICU 2.2 /
121	USCRIPT_MYANMAR = `28`, / Mymr /
122	/* @stable ICU 2.2 /
123	USCRIPT_OGHAM = `29`, / Ogam /
124	/* @stable ICU 2.2 /
125	USCRIPT_OLD_ITALIC = `30`, / Ital /
126	/* @stable ICU 2.2 /
127	USCRIPT_ORIYA = `31`, / Orya /
128	/* @stable ICU 2.2 /
129	USCRIPT_RUNIC = `32`, / Runr /
130	/* @stable ICU 2.2 /
131	USCRIPT_SINHALA = `33`, / Sinh /
132	/* @stable ICU 2.2 /
133	USCRIPT_SYRIAC = `34`, / Syrc /
134	/* @stable ICU 2.2 /
135	USCRIPT_TAMIL = `35`, / Taml /
136	/* @stable ICU 2.2 /
137	USCRIPT_TELUGU = `36`, / Telu /
138	/* @stable ICU 2.2 /
139	USCRIPT_THAANA = `37`, / Thaa /
140	/* @stable ICU 2.2 /
141	USCRIPT_THAI = `38`, / Thai /
142	/* @stable ICU 2.2 /
143	USCRIPT_TIBETAN = `39`, / Tibt /
144	/* Canadian_Aboriginal script. @stable ICU 2.6 /
145	USCRIPT_CANADIAN_ABORIGINAL = `40`, / Cans /
146	/* Canadian_Aboriginal script (alias). @stable ICU 2.2 /
147	USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL,
148	/* @stable ICU 2.2 /
149	USCRIPT_YI = `41`, / Yiii /
150	/ New scripts in Unicode 3.2 /
151	/* @stable ICU 2.2 /
152	USCRIPT_TAGALOG = `42`, / Tglg /
153	/* @stable ICU 2.2 /
154	USCRIPT_HANUNOO = `43`, / Hano /
155	/* @stable ICU 2.2 /
156	USCRIPT_BUHID = `44`, / Buhd /
157	/* @stable ICU 2.2 /
158	USCRIPT_TAGBANWA = `45`, / Tagb /
159
160	/ New scripts in Unicode 4 /
161	/* @stable ICU 2.6 /
162	USCRIPT_BRAILLE = `46`, / Brai /
163	/* @stable ICU 2.6 /
164	USCRIPT_CYPRIOT = `47`, / Cprt /
165	/* @stable ICU 2.6 /
166	USCRIPT_LIMBU = `48`, / Limb /
167	/* @stable ICU 2.6 /
168	USCRIPT_LINEAR_B = `49`, / Linb /
169	/* @stable ICU 2.6 /
170	USCRIPT_OSMANYA = `50`, / Osma /
171	/* @stable ICU 2.6 /
172	USCRIPT_SHAVIAN = `51`, / Shaw /
173	/* @stable ICU 2.6 /
174	USCRIPT_TAI_LE = `52`, / Tale /
175	/* @stable ICU 2.6 /
176	USCRIPT_UGARITIC = `53`, / Ugar /
177
178	/* New script code in Unicode 4.0.1 @stable ICU 3.0 /
179	USCRIPT_KATAKANA_OR_HIRAGANA = `54`,/Hrkt /
180
181	/ New scripts in Unicode 4.1 /
182	/* @stable ICU 3.4 /
183	USCRIPT_BUGINESE = `55`, / Bugi /
184	/* @stable ICU 3.4 /
185	USCRIPT_GLAGOLITIC = `56`, / Glag /
186	/* @stable ICU 3.4 /
187	USCRIPT_KHAROSHTHI = `57`, / Khar /
188	/* @stable ICU 3.4 /
189	USCRIPT_SYLOTI_NAGRI = `58`, / Sylo /
190	/* @stable ICU 3.4 /
191	USCRIPT_NEW_TAI_LUE = `59`, / Talu /
192	/* @stable ICU 3.4 /
193	USCRIPT_TIFINAGH = `60`, / Tfng /
194	/* @stable ICU 3.4 /
195	USCRIPT_OLD_PERSIAN = `61`, / Xpeo /
196
197	/ New script codes from Unicode and ISO 15924 /
198	/* @stable ICU 3.6 /
199	USCRIPT_BALINESE = `62`, / Bali /
200	/* @stable ICU 3.6 /
201	USCRIPT_BATAK = `63`, / Batk /
202	/* @stable ICU 3.6 /
203	USCRIPT_BLISSYMBOLS = `64`, / Blis /
204	/* @stable ICU 3.6 /
205	USCRIPT_BRAHMI = `65`, / Brah /
206	/* @stable ICU 3.6 /
207	USCRIPT_CHAM = `66`, / Cham /
208	/* @stable ICU 3.6 /
209	USCRIPT_CIRTH = `67`, / Cirt /
210	/* @stable ICU 3.6 /
211	USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = `68`, / Cyrs /
212	/* @stable ICU 3.6 /
213	USCRIPT_DEMOTIC_EGYPTIAN = `69`, / Egyd /
214	/* @stable ICU 3.6 /
215	USCRIPT_HIERATIC_EGYPTIAN = `70`, / Egyh /
216	/* @stable ICU 3.6 /
217	USCRIPT_EGYPTIAN_HIEROGLYPHS = `71`, / Egyp /
218	/* @stable ICU 3.6 /
219	USCRIPT_KHUTSURI = `72`, / Geok /
220	/* @stable ICU 3.6 /
221	USCRIPT_SIMPLIFIED_HAN = `73`, / Hans /
222	/* @stable ICU 3.6 /
223	USCRIPT_TRADITIONAL_HAN = `74`, / Hant /
224	/* @stable ICU 3.6 /
225	USCRIPT_PAHAWH_HMONG = `75`, / Hmng /
226	/* @stable ICU 3.6 /
227	USCRIPT_OLD_HUNGARIAN = `76`, / Hung /
228	/* @stable ICU 3.6 /
229	USCRIPT_HARAPPAN_INDUS = `77`, / Inds /
230	/* @stable ICU 3.6 /
231	USCRIPT_JAVANESE = `78`, / Java /
232	/* @stable ICU 3.6 /
233	USCRIPT_KAYAH_LI = `79`, / Kali /
234	/* @stable ICU 3.6 /
235	USCRIPT_LATIN_FRAKTUR = `80`, / Latf /
236	/* @stable ICU 3.6 /
237	USCRIPT_LATIN_GAELIC = `81`, / Latg /
238	/* @stable ICU 3.6 /
239	USCRIPT_LEPCHA = `82`, / Lepc /
240	/* @stable ICU 3.6 /
241	USCRIPT_LINEAR_A = `83`, / Lina /
242	/* @stable ICU 4.6 /
243	USCRIPT_MANDAIC = `84`, / Mand /
244	/* @stable ICU 3.6 /
245	USCRIPT_MANDAEAN = USCRIPT_MANDAIC,
246	/* @stable ICU 3.6 /
247	USCRIPT_MAYAN_HIEROGLYPHS = `85`, / Maya /
248	/* @stable ICU 4.6 /
249	USCRIPT_MEROITIC_HIEROGLYPHS = `86`, / Mero /
250	/* @stable ICU 3.6 /
251	USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS,
252	/* @stable ICU 3.6 /
253	USCRIPT_NKO = `87`, / Nkoo /
254	/* @stable ICU 3.6 /
255	USCRIPT_ORKHON = `88`, / Orkh /
256	/* @stable ICU 3.6 /
257	USCRIPT_OLD_PERMIC = `89`, / Perm /
258	/* @stable ICU 3.6 /
259	USCRIPT_PHAGS_PA = `90`, / Phag /
260	/* @stable ICU 3.6 /
261	USCRIPT_PHOENICIAN = `91`, / Phnx /
262	/* @stable ICU 52 /
263	USCRIPT_MIAO = `92`, / Plrd /
264	/* @stable ICU 3.6 /
265	USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO,
266	/* @stable ICU 3.6 /
267	USCRIPT_RONGORONGO = `93`, / Roro /
268	/* @stable ICU 3.6 /
269	USCRIPT_SARATI = `94`, / Sara /
270	/* @stable ICU 3.6 /
271	USCRIPT_ESTRANGELO_SYRIAC = `95`, / Syre /
272	/* @stable ICU 3.6 /
273	USCRIPT_WESTERN_SYRIAC = `96`, / Syrj /
274	/* @stable ICU 3.6 /
275	USCRIPT_EASTERN_SYRIAC = `97`, / Syrn /
276	/* @stable ICU 3.6 /
277	USCRIPT_TENGWAR = `98`, / Teng /
278	/* @stable ICU 3.6 /
279	USCRIPT_VAI = `99`, / Vaii /
280	/* @stable ICU 3.6 /
281	USCRIPT_VISIBLE_SPEECH = `100`,/ Visp /
282	/* @stable ICU 3.6 /
283	USCRIPT_CUNEIFORM = `101`,/ Xsux /
284	/* @stable ICU 3.6 /
285	USCRIPT_UNWRITTEN_LANGUAGES = `102`,/ Zxxx /
286	/* @stable ICU 3.6 /
287	USCRIPT_UNKNOWN = `103`,/ Zzzz / / Unknown="Code for uncoded script", for unassigned code points /
288
289	/* @stable ICU 3.8 /
290	USCRIPT_CARIAN = `104`,/ Cari /
291	/* @stable ICU 3.8 /
292	USCRIPT_JAPANESE = `105`,/ Jpan /
293	/* @stable ICU 3.8 /
294	USCRIPT_LANNA = `106`,/ Lana /
295	/* @stable ICU 3.8 /
296	USCRIPT_LYCIAN = `107`,/ Lyci /
297	/* @stable ICU 3.8 /
298	USCRIPT_LYDIAN = `108`,/ Lydi /
299	/* @stable ICU 3.8 /
300	USCRIPT_OL_CHIKI = `109`,/ Olck /
301	/* @stable ICU 3.8 /
302	USCRIPT_REJANG = `110`,/ Rjng /
303	/* @stable ICU 3.8 /
304	USCRIPT_SAURASHTRA = `111`,/ Saur /
305	/* Sutton SignWriting @stable ICU 3.8 /
306	USCRIPT_SIGN_WRITING = `112`,/ Sgnw /
307	/* @stable ICU 3.8 /
308	USCRIPT_SUNDANESE = `113`,/ Sund /
309	/* @stable ICU 3.8 /
310	USCRIPT_MOON = `114`,/ Moon /
311	/* @stable ICU 3.8 /
312	USCRIPT_MEITEI_MAYEK = `115`,/ Mtei /
313
314	/* @stable ICU 4.0 /
315	USCRIPT_IMPERIAL_ARAMAIC = `116`,/ Armi /
316	/* @stable ICU 4.0 /
317	USCRIPT_AVESTAN = `117`,/ Avst /
318	/* @stable ICU 4.0 /
319	USCRIPT_CHAKMA = `118`,/ Cakm /
320	/* @stable ICU 4.0 /
321	USCRIPT_KOREAN = `119`,/ Kore /
322	/* @stable ICU 4.0 /
323	USCRIPT_KAITHI = `120`,/ Kthi /
324	/* @stable ICU 4.0 /
325	USCRIPT_MANICHAEAN = `121`,/ Mani /
326	/* @stable ICU 4.0 /
327	USCRIPT_INSCRIPTIONAL_PAHLAVI = `122`,/ Phli /
328	/* @stable ICU 4.0 /
329	USCRIPT_PSALTER_PAHLAVI = `123`,/ Phlp /
330	/* @stable ICU 4.0 /
331	USCRIPT_BOOK_PAHLAVI = `124`,/ Phlv /
332	/* @stable ICU 4.0 /
333	USCRIPT_INSCRIPTIONAL_PARTHIAN = `125`,/ Prti /
334	/* @stable ICU 4.0 /
335	USCRIPT_SAMARITAN = `126`,/ Samr /
336	/* @stable ICU 4.0 /
337	USCRIPT_TAI_VIET = `127`,/ Tavt /
338	/* @stable ICU 4.0 /
339	USCRIPT_MATHEMATICAL_NOTATION = `128`,/ Zmth /
340	/* @stable ICU 4.0 /
341	USCRIPT_SYMBOLS = `129`,/ Zsym /
342
343	/* @stable ICU 4.4 /
344	USCRIPT_BAMUM = `130`,/ Bamu /
345	/* @stable ICU 4.4 /
346	USCRIPT_LISU = `131`,/ Lisu /
347	/* @stable ICU 4.4 /
348	USCRIPT_NAKHI_GEBA = `132`,/ Nkgb /
349	/* @stable ICU 4.4 /
350	USCRIPT_OLD_SOUTH_ARABIAN = `133`,/ Sarb /
351
352	/* @stable ICU 4.6 /
353	USCRIPT_BASSA_VAH = `134`,/ Bass /
354	/* @stable ICU 54 /
355	USCRIPT_DUPLOYAN = `135`,/ Dupl /
356	#ifndef U_HIDE_DEPRECATED_API
357	/* @deprecated ICU 54 Typo, use USCRIPT_DUPLOYAN /
358	USCRIPT_DUPLOYAN_SHORTAND = USCRIPT_DUPLOYAN,
359	#endif /* U_HIDE_DEPRECATED_API */
360	/* @stable ICU 4.6 /
361	USCRIPT_ELBASAN = `136`,/ Elba /
362	/* @stable ICU 4.6 /
363	USCRIPT_GRANTHA = `137`,/ Gran /
364	/* @stable ICU 4.6 /
365	USCRIPT_KPELLE = `138`,/ Kpel /
366	/* @stable ICU 4.6 /
367	USCRIPT_LOMA = `139`,/ Loma /
368	/* Mende Kikakui @stable ICU 4.6 /
369	USCRIPT_MENDE = `140`,/ Mend /
370	/* @stable ICU 4.6 /
371	USCRIPT_MEROITIC_CURSIVE = `141`,/ Merc /
372	/* @stable ICU 4.6 /
373	USCRIPT_OLD_NORTH_ARABIAN = `142`,/ Narb /
374	/* @stable ICU 4.6 /
375	USCRIPT_NABATAEAN = `143`,/ Nbat /
376	/* @stable ICU 4.6 /
377	USCRIPT_PALMYRENE = `144`,/ Palm /
378	/* @stable ICU 54 /
379	USCRIPT_KHUDAWADI = `145`,/ Sind /
380	/* @stable ICU 4.6 /
381	USCRIPT_SINDHI = USCRIPT_KHUDAWADI,
382	/* @stable ICU 4.6 /
383	USCRIPT_WARANG_CITI = `146`,/ Wara /
384
385	/* @stable ICU 4.8 /
386	USCRIPT_AFAKA = `147`,/ Afak /
387	/* @stable ICU 4.8 /
388	USCRIPT_JURCHEN = `148`,/ Jurc /
389	/* @stable ICU 4.8 /
390	USCRIPT_MRO = `149`,/ Mroo /
391	/* @stable ICU 4.8 /
392	USCRIPT_NUSHU = `150`,/ Nshu /
393	/* @stable ICU 4.8 /
394	USCRIPT_SHARADA = `151`,/ Shrd /
395	/* @stable ICU 4.8 /
396	USCRIPT_SORA_SOMPENG = `152`,/ Sora /
397	/* @stable ICU 4.8 /
398	USCRIPT_TAKRI = `153`,/ Takr /
399	/* @stable ICU 4.8 /
400	USCRIPT_TANGUT = `154`,/ Tang /
401	/* @stable ICU 4.8 /
402	USCRIPT_WOLEAI = `155`,/ Wole /
403
404	/* @stable ICU 49 /
405	USCRIPT_ANATOLIAN_HIEROGLYPHS = `156`,/ Hluw /
406	/* @stable ICU 49 /
407	USCRIPT_KHOJKI = `157`,/ Khoj /
408	/* @stable ICU 49 /
409	USCRIPT_TIRHUTA = `158`,/ Tirh /
410
411	/* @stable ICU 52 /
412	USCRIPT_CAUCASIAN_ALBANIAN = `159`,/ Aghb /
413	/* @stable ICU 52 /
414	USCRIPT_MAHAJANI = `160`,/ Mahj /
415
416	/* @stable ICU 54 /
417	USCRIPT_AHOM = `161`,/ Ahom /
418	/* @stable ICU 54 /
419	USCRIPT_HATRAN = `162`,/ Hatr /
420	/* @stable ICU 54 /
421	USCRIPT_MODI = `163`,/ Modi /
422	/* @stable ICU 54 /
423	USCRIPT_MULTANI = `164`,/ Mult /
424	/* @stable ICU 54 /
425	USCRIPT_PAU_CIN_HAU = `165`,/ Pauc /
426	/* @stable ICU 54 /
427	USCRIPT_SIDDHAM = `166`,/ Sidd /
428
429	/* @stable ICU 58 /
430	USCRIPT_ADLAM = `167`,/ Adlm /
431	/* @stable ICU 58 /
432	USCRIPT_BHAIKSUKI = `168`,/ Bhks /
433	/* @stable ICU 58 /
434	USCRIPT_MARCHEN = `169`,/ Marc /
435	/* @stable ICU 58 /
436	USCRIPT_NEWA = `170`,/ Newa /
437	/* @stable ICU 58 /
438	USCRIPT_OSAGE = `171`,/ Osge /
439
440	/* @stable ICU 58 /
441	USCRIPT_HAN_WITH_BOPOMOFO = `172`,/ Hanb /
442	/* @stable ICU 58 /
443	USCRIPT_JAMO = `173`,/ Jamo /
444	/* @stable ICU 58 /
445	USCRIPT_SYMBOLS_EMOJI = `174`,/ Zsye /
446
447	/* @stable ICU 60 /
448	USCRIPT_MASARAM_GONDI = `175`,/ Gonm /
449	/* @stable ICU 60 /
450	USCRIPT_SOYOMBO = `176`,/ Soyo /
451	/* @stable ICU 60 /
452	USCRIPT_ZANABAZAR_SQUARE = `177`,/ Zanb /
453
454	/* @stable ICU 62 /
455	USCRIPT_DOGRA = `178`,/ Dogr /
456	/* @stable ICU 62 /
457	USCRIPT_GUNJALA_GONDI = `179`,/ Gong /
458	/* @stable ICU 62 /
459	USCRIPT_MAKASAR = `180`,/ Maka /
460	/* @stable ICU 62 /
461	USCRIPT_MEDEFAIDRIN = `181`,/ Medf /
462	/* @stable ICU 62 /
463	USCRIPT_HANIFI_ROHINGYA = `182`,/ Rohg /
464	/* @stable ICU 62 /
465	USCRIPT_SOGDIAN = `183`,/ Sogd /
466	/* @stable ICU 62 /
467	USCRIPT_OLD_SOGDIAN = `184`,/ Sogo /
468
469	#ifndef U_HIDE_DEPRECATED_API
470	/**
471	* One more than the highest normal UScriptCode value.
472	* The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT).
473	*
474	* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
475	*/
476	USCRIPT_CODE_LIMIT = `185`
477	#endif // U_HIDE_DEPRECATED_API
478	} UScriptCode;
479
480	/**
481	* Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
482	* Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
483	* Fills in USCRIPT_LATIN given "en" OR "en_US"
484	* If the required capacity is greater than the capacity of the destination buffer,
485	* then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned.
486	*
487	* <p>Note: To search by short or long script alias only, use
488	* u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does
489	* a fast lookup with no access of the locale data.
490	*
491	* @param nameOrAbbrOrLocale name of the script, as given in
492	* PropertyValueAliases.txt, or ISO 15924 code or locale
493	* @param fillIn the UScriptCode buffer to fill in the script code
494	* @param capacity the capacity (size) of UScriptCode buffer passed in.
495	* @param err the error status code.
496	* @return The number of script codes filled in the buffer passed in
497	* @stable ICU 2.4
498	*/
499	U_STABLE int32_t U_EXPORT2
500	uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
501
502	/**
503	* Returns the long Unicode script name, if there is one.
504	* Otherwise returns the 4-letter ISO 15924 script code.
505	* Returns "Malayam" given USCRIPT_MALAYALAM.
506	*
507	* @param scriptCode UScriptCode enum
508	* @return long script name as given in PropertyValueAliases.txt, or the 4-letter code,
509	* or NULL if scriptCode is invalid
510	* @stable ICU 2.4
511	*/
512	U_STABLE const char* U_EXPORT2
513	uscript_getName(UScriptCode scriptCode);
514
515	/**
516	* Returns the 4-letter ISO 15924 script code,
517	* which is the same as the short Unicode script name if Unicode has names for the script.
518	* Returns "Mlym" given USCRIPT_MALAYALAM.
519	*
520	* @param scriptCode UScriptCode enum
521	* @return short script name (4-letter code), or NULL if scriptCode is invalid
522	* @stable ICU 2.4
523	*/
524	U_STABLE const char* U_EXPORT2
525	uscript_getShortName(UScriptCode scriptCode);
526
527	/**
528	* Gets the script code associated with the given codepoint.
529	* Returns USCRIPT_MALAYALAM given 0x0D02
530	* @param codepoint UChar32 codepoint
531	* @param err the error status code.
532	* @return The UScriptCode, or 0 if codepoint is invalid
533	* @stable ICU 2.4
534	*/
535	U_STABLE UScriptCode U_EXPORT2
536	uscript_getScript(UChar32 codepoint, UErrorCode *err);
537
538	/**
539	* Do the Script_Extensions of code point c contain script sc?
540	* If c does not have explicit Script_Extensions, then this tests whether
541	* c has the Script property value sc.
542	*
543	* Some characters are commonly used in multiple scripts.
544	* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
545	* @param c code point
546	* @param sc script code
547	* @return TRUE if sc is in Script_Extensions(c)
548	* @stable ICU 49
549	*/
550	U_STABLE UBool U_EXPORT2
551	uscript_hasScript(UChar32 c, UScriptCode sc);
552
553	/**
554	* Writes code point c's Script_Extensions as a list of UScriptCode values
555	* to the output scripts array and returns the number of script codes.
556	* - If c does have Script_Extensions, then the Script property value
557	* (normally Common or Inherited) is not included.
558	* - If c does not have Script_Extensions, then the one Script code is written to the output array.
559	* - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written.
560	* In other words, if the return value is 1,
561	* then the output array contains exactly c's single Script code.
562	* If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.
563	*
564	* Some characters are commonly used in multiple scripts.
565	* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
566	*
567	* If there are more than capacity script codes to be written, then
568	* U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned.
569	* (Usual ICU buffer handling behavior.)
570	*
571	* @param c code point
572	* @param scripts output script code array
573	* @param capacity capacity of the scripts array
574	* @param errorCode Standard ICU error code. Its input value must
575	* pass the U_SUCCESS() test, or else the function returns
576	* immediately. Check for U_FAILURE() on output or use with
577	* function chaining. (See User Guide for details.)
578	* @return number of script codes in c's Script_Extensions, or 1 for the single Script value,
579	* written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
580	* @stable ICU 49
581	*/
582	U_STABLE int32_t U_EXPORT2
583	uscript_getScriptExtensions(UChar32 c,
584	UScriptCode *scripts, int32_t capacity,
585	UErrorCode *errorCode);
586
587	/**
588	* Script usage constants.
589	* See UAX #31 Unicode Identifier and Pattern Syntax.
590	* http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
591	*
592	* @stable ICU 51
593	*/
594	typedef enum UScriptUsage {
595	/* Not encoded in Unicode. @stable ICU 51 /
596	USCRIPT_USAGE_NOT_ENCODED,
597	/* Unknown script usage. @stable ICU 51 /
598	USCRIPT_USAGE_UNKNOWN,
599	/* Candidate for Exclusion from Identifiers. @stable ICU 51 /
600	USCRIPT_USAGE_EXCLUDED,
601	/* Limited Use script. @stable ICU 51 /
602	USCRIPT_USAGE_LIMITED_USE,
603	/* Aspirational Use script. @stable ICU 51 /
604	USCRIPT_USAGE_ASPIRATIONAL,
605	/* Recommended script. @stable ICU 51 /
606	USCRIPT_USAGE_RECOMMENDED
607	} UScriptUsage;
608
609	/**
610	* Writes the script sample character string.
611	* This string normally consists of one code point but might be longer.
612	* The string is empty if the script is not encoded.
613	*
614	* @param script script code
615	* @param dest output string array
616	* @param capacity number of UChars in the dest array
617	* @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input
618	* @return the string length, even if U_BUFFER_OVERFLOW_ERROR
619	* @stable ICU 51
620	*/
621	U_STABLE int32_t U_EXPORT2
622	uscript_getSampleString(UScriptCode script, UChar dest, int32_t capacity, UErrorCode pErrorCode);
623
624	#if U_SHOW_CPLUSPLUS_API
625
626	U_NAMESPACE_BEGIN
627	class UnicodeString;
628	U_NAMESPACE_END
629
630	/**
631	* Returns the script sample character string.
632	* This string normally consists of one code point but might be longer.
633	* The string is empty if the script is not encoded.
634	*
635	* @param script script code
636	* @return the sample character string
637	* @stable ICU 51
638	*/
639	U_COMMON_API icu::UnicodeString U_EXPORT2
640	uscript_getSampleUnicodeString(UScriptCode script);
641
642	#endif
643
644	/**
645	* Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
646	* Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.
647	*
648	* @param script script code
649	* @return script usage
650	* @see UScriptUsage
651	* @stable ICU 51
652	*/
653	U_STABLE UScriptUsage U_EXPORT2
654	uscript_getUsage(UScriptCode script);
655
656	/**
657	* Returns TRUE if the script is written right-to-left.
658	* For example, Arab and Hebr.
659	*
660	* @param script script code
661	* @return TRUE if the script is right-to-left
662	* @stable ICU 51
663	*/
664	U_STABLE UBool U_EXPORT2
665	uscript_isRightToLeft(UScriptCode script);
666
667	/**
668	* Returns TRUE if the script allows line breaks between letters (excluding hyphenation).
669	* Such a script typically requires dictionary-based line breaking.
670	* For example, Hani and Thai.
671	*
672	* @param script script code
673	* @return TRUE if the script allows line breaks between letters
674	* @stable ICU 51
675	*/
676	U_STABLE UBool U_EXPORT2
677	uscript_breaksBetweenLetters(UScriptCode script);
678
679	/**
680	* Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary.
681	* For example, Latn and Cyrl.
682	*
683	* @param script script code
684	* @return TRUE if the script is cased
685	* @stable ICU 51
686	*/
687	U_STABLE UBool U_EXPORT2
688	uscript_isCased(UScriptCode script);
689
690	#endif
691

Browse the source code of include/unicode/uscript.h