uloc.cpp source code [ClickHouse/contrib/icu/icu4c/source/common/uloc.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 1997-2016, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	*
9	* File ULOC.CPP
10	*
11	* Modification History:
12	*
13	* Date Name Description
14	* 04/01/97 aliu Creation.
15	* 08/21/98 stephen JDK 1.2 sync
16	* 12/08/98 rtg New Locale implementation and C API
17	* 03/15/99 damiba overhaul.
18	* 04/06/99 stephen changed setDefault() to realloc and copy
19	* 06/14/99 stephen Changed calls to ures_open for new params
20	* 07/21/99 stephen Modified setDefault() to propagate to C++
21	* 05/14/04 alan 7 years later: refactored, cleaned up, fixed bugs,
22	* brought canonicalization code into line with spec
23	*****************************************************************************/
24
25	/*
26	POSIX's locale format, from putil.c: [no spaces]
27
28	ll [ _CC ] [ . MM ] [ @ VV]
29
30	l = lang, C = ctry, M = charmap, V = variant
31	*/
32
33	#include "unicode/utypes.h"
34	#include "unicode/ustring.h"
35	#include "unicode/uloc.h"
36
37	#include "putilimp.h"
38	#include "ustr_imp.h"
39	#include "ulocimp.h"
40	#include "umutex.h"
41	#include "cstring.h"
42	#include "cmemory.h"
43	#include "locmap.h"
44	#include "uarrsort.h"
45	#include "uenumimp.h"
46	#include "uassert.h"
47	#include "charstr.h"
48
49	#include <stdio.h> /* for sprintf */
50
51	U_NAMESPACE_USE
52
53	/ ### Declarations *************************************************/
54
55	/ Locale stuff from locid.cpp /
56	U_CFUNC void locale_set_default(const char *id);
57	U_CFUNC const char locale_get_default(void*);
58	U_CFUNC int32_t
59	locale_getKeywords(const char *localeID,
60	char prev,
61	char *keywords, int32_t keywordCapacity,
62	char values, int32_t valuesCapacity, int32_t valLen,
63	UBool valuesToo,
64	UErrorCode *status);
65
66	/ ### Data tables *************************************************/
67
68	/**
69	* Table of language codes, both 2- and 3-letter, with preference
70	* given to 2-letter codes where possible. Includes 3-letter codes
71	* that lack a 2-letter equivalent.
72	*
73	* This list must be in sorted order. This list is returned directly
74	* to the user by some API.
75	*
76	* This list must be kept in sync with LANGUAGES_3, with corresponding
77	* entries matched.
78	*
79	* This table should be terminated with a NULL entry, followed by a
80	* second list, and another NULL entry. The first list is visible to
81	* user code when this array is returned by API. The second list
82	* contains codes we support, but do not expose through user API.
83	*
84	* Notes
85	*
86	* Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
87	* include the revisions up to 2001/7/27 CWB
88	*
89	* The 3 character codes are the terminology codes like RFC 3066. This
90	* is compatible with prior ICU codes
91	*
92	* "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
93	* table but now at the end of the table because 3 character codes are
94	* duplicates. This avoids bad searches going from 3 to 2 character
95	* codes.
96	*
97	* The range qaa-qtz is reserved for local use
98	*/
99	/ Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables /
100	/ ISO639 table version is 20150505 /
101	/ Subsequent hand addition of selected languages /
102	static const char * const LANGUAGES[] = {
103	"aa", "ab", "ace", "ach", "ada", "ady", "ae", "aeb",
104	"af", "afh", "agq", "ain", "ak", "akk", "akz", "ale",
105	"aln", "alt", "am", "an", "ang", "anp", "ar", "arc",
106	"arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
107	"asa", "ase", "ast", "av", "avk", "awa", "ay", "az",
108	"ba", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
109	"be", "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
110	"bgn", "bho", "bi", "bik", "bin", "bjn", "bkm", "bla",
111	"bm", "bn", "bo", "bpy", "bqi", "br", "bra", "brh",
112	"brx", "bs", "bss", "bua", "bug", "bum", "byn", "byv",
113	"ca", "cad", "car", "cay", "cch", "ccp", "ce", "ceb", "cgg",
114	"ch", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
115	"chr", "chy", "ckb", "co", "cop", "cps", "cr", "crh",
116	"cs", "csb", "cu", "cv", "cy",
117	"da", "dak", "dar", "dav", "de", "del", "den", "dgr",
118	"din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
119	"dyo", "dyu", "dz", "dzg",
120	"ebu", "ee", "efi", "egl", "egy", "eka", "el", "elx",
121	"en", "enm", "eo", "es", "esu", "et", "eu", "ewo",
122	"ext",
123	"fa", "fan", "fat", "ff", "fi", "fil", "fit", "fj",
124	"fo", "fon", "fr", "frc", "frm", "fro", "frp", "frr",
125	"frs", "fur", "fy",
126	"ga", "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
127	"gez", "gil", "gl", "glk", "gmh", "gn", "goh", "gom",
128	"gon", "gor", "got", "grb", "grc", "gsw", "gu", "guc",
129	"gur", "guz", "gv", "gwi",
130	"ha", "hai", "hak", "haw", "he", "hi", "hif", "hil",
131	"hit", "hmn", "ho", "hr", "hsb", "hsn", "ht", "hu",
132	"hup", "hy", "hz",
133	"ia", "iba", "ibb", "id", "ie", "ig", "ii", "ik",
134	"ilo", "inh", "io", "is", "it", "iu", "izh",
135	"ja", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
136	"jv",
137	"ka", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
138	"kbl", "kcg", "kde", "kea", "ken", "kfo", "kg", "kgp",
139	"kha", "kho", "khq", "khw", "ki", "kiu", "kj", "kk",
140	"kkj", "kl", "kln", "km", "kmb", "kn", "ko", "koi",
141	"kok", "kos", "kpe", "kr", "krc", "kri", "krj", "krl",
142	"kru", "ks", "ksb", "ksf", "ksh", "ku", "kum", "kut",
143	"kv", "kw", "ky",
144	"la", "lad", "lag", "lah", "lam", "lb", "lez", "lfn",
145	"lg", "li", "lij", "liv", "lkt", "lmo", "ln", "lo",
146	"lol", "loz", "lrc", "lt", "ltg", "lu", "lua", "lui",
147	"lun", "luo", "lus", "luy", "lv", "lzh", "lzz",
148	"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
149	"mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga",
150	"mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk",
151	"ml", "mn", "mnc", "mni", "mo",
152	"moh", "mos", "mr", "mrj",
153	"ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
154	"my", "mye", "myv", "mzn",
155	"na", "nan", "nap", "naq", "nb", "nd", "nds", "ne",
156	"new", "ng", "nia", "niu", "njo", "nl", "nmg", "nn",
157	"nnh", "no", "nog", "non", "nov", "nqo", "nr", "nso",
158	"nus", "nv", "nwc", "ny", "nym", "nyn", "nyo", "nzi",
159	"oc", "oj", "om", "or", "os", "osa", "ota",
160	"pa", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
161	"pdt", "peo", "pfl", "phn", "pi", "pl", "pms", "pnt",
162	"pon", "prg", "pro", "ps", "pt",
163	"qu", "quc", "qug",
164	"raj", "rap", "rar", "rgn", "rif", "rm", "rn", "ro",
165	"rof", "rom", "rtm", "ru", "rue", "rug", "rup",
166	"rw", "rwk",
167	"sa", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
168	"sba", "sbp", "sc", "scn", "sco", "sd", "sdc", "sdh",
169	"se", "see", "seh", "sei", "sel", "ses", "sg", "sga",
170	"sgs", "shi", "shn", "shu", "si", "sid", "sk",
171	"sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms",
172	"sn", "snk", "so", "sog", "sq", "sr", "srn", "srr",
173	"ss", "ssy", "st", "stq", "su", "suk", "sus", "sux",
174	"sv", "sw", "swb", "swc", "syc", "syr", "szl",
175	"ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg",
176	"th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl",
177	"tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi",
178	"tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt",
179	"tum", "tvl", "tw", "twq", "ty", "tyv", "tzm",
180	"udm", "ug", "uga", "uk", "umb", "und", "ur", "uz",
181	"vai", "ve", "vec", "vep", "vi", "vls", "vmf", "vo",
182	"vot", "vro", "vun",
183	"wa", "wae", "wal", "war", "was", "wbp", "wo", "wuu",
184	"xal", "xh", "xmf", "xog",
185	"yao", "yap", "yav", "ybb", "yi", "yo", "yrl", "yue",
186	"za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu",
187	"zun", "zxx", "zza",
188	NULL,
189	"in", "iw", "ji", "jw", "sh", / obsolete language codes /
190	NULL
191	};
192
193	static const char* const DEPRECATED_LANGUAGES[]={
194	"in", "iw", "ji", "jw", NULL, NULL
195	};
196	static const char* const REPLACEMENT_LANGUAGES[]={
197	"id", "he", "yi", "jv", NULL, NULL
198	};
199
200	/**
201	* Table of 3-letter language codes.
202	*
203	* This is a lookup table used to convert 3-letter language codes to
204	* their 2-letter equivalent, where possible. It must be kept in sync
205	* with LANGUAGES. For all valid i, LANGUAGES[i] must refer to the
206	* same language as LANGUAGES_3[i]. The commented-out lines are
207	* copied from LANGUAGES to make eyeballing this baby easier.
208	*
209	* Where a 3-letter language code has no 2-letter equivalent, the
210	* 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
211	*
212	* This table should be terminated with a NULL entry, followed by a
213	* second list, and another NULL entry. The two lists correspond to
214	* the two lists in LANGUAGES.
215	*/
216	/ Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables /
217	/ ISO639 table version is 20150505 /
218	/ Subsequent hand addition of selected languages /
219	static const char * const LANGUAGES_3[] = {
220	"aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
221	"afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
222	"aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
223	"arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
224	"asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
225	"bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
226	"bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
227	"bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
228	"bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
229	"brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
230	"cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
231	"cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
232	"chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
233	"ces", "csb", "chu", "chv", "cym",
234	"dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
235	"din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
236	"dyo", "dyu", "dzo", "dzg",
237	"ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
238	"eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
239	"ext",
240	"fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
241	"fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
242	"frs", "fur", "fry",
243	"gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
244	"gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
245	"gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
246	"gur", "guz", "glv", "gwi",
247	"hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
248	"hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
249	"hup", "hye", "her",
250	"ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
251	"ilo", "inh", "ido", "isl", "ita", "iku", "izh",
252	"jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
253	"jav",
254	"kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
255	"kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
256	"kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
257	"kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
258	"kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
259	"kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
260	"kom", "cor", "kir",
261	"lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
262	"lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
263	"lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
264	"lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
265	"mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
266	"mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
267	"mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
268	"mal", "mon", "mnc", "mni", "mol",
269	"moh", "mos", "mar", "mrj",
270	"msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
271	"mya", "mye", "myv", "mzn",
272	"nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
273	"new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
274	"nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
275	"nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
276	"oci", "oji", "orm", "ori", "oss", "osa", "ota",
277	"pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
278	"pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
279	"pon", "prg", "pro", "pus", "por",
280	"que", "quc", "qug",
281	"raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
282	"rof", "rom", "rtm", "rus", "rue", "rug", "rup",
283	"kin", "rwk",
284	"san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
285	"sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
286	"sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
287	"sgs", "shi", "shn", "shu", "sin", "sid", "slk",
288	"slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
289	"sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
290	"ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
291	"swe", "swa", "swb", "swc", "syc", "syr", "szl",
292	"tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
293	"tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
294	"tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
295	"tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
296	"tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
297	"udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
298	"vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
299	"vot", "vro", "vun",
300	"wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
301	"xal", "xho", "xmf", "xog",
302	"yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
303	"zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
304	"zun", "zxx", "zza",
305	NULL,
306	/ "in", "iw", "ji", "jw", "sh", /
307	"ind", "heb", "yid", "jaw", "srp",
308	NULL
309	};
310
311	/**
312	* Table of 2-letter country codes.
313	*
314	* This list must be in sorted order. This list is returned directly
315	* to the user by some API.
316	*
317	* This list must be kept in sync with COUNTRIES_3, with corresponding
318	* entries matched.
319	*
320	* This table should be terminated with a NULL entry, followed by a
321	* second list, and another NULL entry. The first list is visible to
322	* user code when this array is returned by API. The second list
323	* contains codes we support, but do not expose through user API.
324	*
325	* Notes:
326	*
327	* ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
328	* http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
329	* new codes keeping the old ones for compatibility updated to include
330	* 1999/12/03 revisions CWB
331	*
332	* RO(ROM) is now RO(ROU) according to
333	* http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
334	*/
335	static const char * const COUNTRIES[] = {
336	"AD", "AE", "AF", "AG", "AI", "AL", "AM",
337	"AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ",
338	"BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI",
339	"BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV",
340	"BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG",
341	"CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR",
342	"CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK",
343	"DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER",
344	"ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR",
345	"GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL",
346	"GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU",
347	"GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU",
348	"ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS",
349	"IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
350	"KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",
351	"LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU",
352	"LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK",
353	"ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS",
354	"MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA",
355	"NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP",
356	"NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG",
357	"PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT",
358	"PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA",
359	"SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ",
360	"SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV",
361	"SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ",
362	"TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV",
363	"TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ",
364	"VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF",
365	"WS", "YE", "YT", "ZA", "ZM", "ZW",
366	NULL,
367	"AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", / obsolete country codes /
368	NULL
369	};
370
371	static const char* const DEPRECATED_COUNTRIES[] = {
372	"AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL / deprecated country list /
373	};
374	static const char* const REPLACEMENT_COUNTRIES[] = {
375	/ "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" /
376	"CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL / replacement country codes /
377	};
378
379	/**
380	* Table of 3-letter country codes.
381	*
382	* This is a lookup table used to convert 3-letter country codes to
383	* their 2-letter equivalent. It must be kept in sync with COUNTRIES.
384	* For all valid i, COUNTRIES[i] must refer to the same country as
385	* COUNTRIES_3[i]. The commented-out lines are copied from COUNTRIES
386	* to make eyeballing this baby easier.
387	*
388	* This table should be terminated with a NULL entry, followed by a
389	* second list, and another NULL entry. The two lists correspond to
390	* the two lists in COUNTRIES.
391	*/
392	static const char * const COUNTRIES_3[] = {
393	/ "AD", "AE", "AF", "AG", "AI", "AL", "AM", /
394	"AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
395	/ "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AX", "AZ", /
396	"AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
397	/ "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", /
398	"BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
399	/ "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", /
400	"BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
401	/ "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", /
402	"BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
403	/ "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", /
404	"CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
405	/ "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", /
406	"CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
407	/ "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", /
408	"DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
409	/ "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", /
410	"ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
411	/ "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", /
412	"GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
413	/ "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", /
414	"GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
415	/ "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", /
416	"GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
417	/ "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" /
418	"IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
419	/ "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", /
420	"ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
421	/ "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", /
422	"COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
423	/ "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", /
424	"LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
425	/ "LV", "LY", "MA", "MC", "MD", "ME", "MF", "MG", "MH", "MK", /
426	"LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
427	/ "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", /
428	"MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
429	/ "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", /
430	"MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
431	/ "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", /
432	"NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
433	/ "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", /
434	"NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
435	/ "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", /
436	"PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
437	/ "PW", "PY", "QA", "RE", "RO", "RS", "RU", "RW", "SA", /
438	"PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
439	/ "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", /
440	"SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
441	/ "SK", "SL", "SM", "SN", "SO", "SR", "SS", "ST", "SV", /
442	"SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
443	/ "SX", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", /
444	"SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
445	/ "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", /
446	"TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
447	/ "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", /
448	"TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
449	/ "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", /
450	"VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
451	/ "WS", "YE", "YT", "ZA", "ZM", "ZW", /
452	"WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
453	NULL,
454	/ "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" /
455	"ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
456	NULL
457	};
458
459	typedef struct CanonicalizationMap {
460	const char id; /* input ID /
461	const char canonicalID; /* canonicalized output ID /
462	} CanonicalizationMap;
463
464	/**
465	* A map to canonicalize locale IDs. This handles a variety of
466	* different semantic kinds of transformations.
467	*/
468	static const CanonicalizationMap CANONICALIZE_MAP[] = {
469	{ "art_LOJBAN", "jbo" }, / registered name /
470	{ "hy__AREVELA", "hy" }, / Registered IANA variant /
471	{ "hy__AREVMDA", "hyw" }, / Registered IANA variant /
472	{ "zh_GAN", "gan" }, / registered name /
473	{ "zh_GUOYU", "zh" }, / registered name /
474	{ "zh_HAKKA", "hak" }, / registered name /
475	{ "zh_MIN_NAN", "nan" }, / registered name /
476	{ "zh_WUU", "wuu" }, / registered name /
477	{ "zh_XIANG", "hsn" }, / registered name /
478	{ "zh_YUE", "yue" }, / registered name /
479	};
480
481	/ ### BCP47 Conversion ******************************************/
482	/ Test if the locale id has BCP47 u extension and does not have '@' /
483	#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
484	/ Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails /
485	#define _ConvertBCP47(finalID, id, buffer, length,err) UPRV_BLOCK_MACRO_BEGIN { \
486	if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 \|\| \
487	U_FAILURE(err) \|\| err == U_STRING_NOT_TERMINATED_WARNING) { \
488	finalID=id; \
489	if (err == U_STRING_NOT_TERMINATED_WARNING) { err = U_BUFFER_OVERFLOW_ERROR; } \
490	} else { \
491	finalID=buffer; \
492	} \
493	} UPRV_BLOCK_MACRO_END
494	/ Gets the size of the shortest subtag in the given localeID. /
495	static int32_t getShortestSubtagLength(const char *localeID) {
496	int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
497	int32_t length = localeIDLength;
498	int32_t tmpLength = `0`;
499	int32_t i;
500	UBool reset = TRUE;
501
502	for (i = `0`; i < localeIDLength; i++) {
503	if (localeID[i] != `'_'` && localeID[i] != `'-'`) {
504	if (reset) {
505	tmpLength = `0`;
506	reset = FALSE;
507	}
508	tmpLength++;
509	} else {
510	if (tmpLength != `0` && tmpLength < length) {
511	length = tmpLength;
512	}
513	reset = TRUE;
514	}
515	}
516
517	return length;
518	}
519
520	/ ### Keywords *************************************************/
521	#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
522	#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) \|\| UPRV_ISDIGIT(c) )
523	/ Punctuation/symbols allowed in legacy key values /
524	#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' \|\| (c) == '-' \|\| (c) == '+' \|\| (c) == '/')
525
526	#define ULOC_KEYWORD_BUFFER_LEN 25
527	#define ULOC_MAX_NO_KEYWORDS 25
528
529	U_CAPI const char * U_EXPORT2
530	locale_getKeywordsStart(const char *localeID) {
531	const char *result = NULL;
532	if((result = uprv_strchr(localeID, `'@'`)) != NULL) {
533	return result;
534	}
535	#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
536	else {
537	/ We do this because the @ sign is variant, and the @ sign used on one*
538	EBCDIC machine won't be compiled the same way on other EBCDIC based
539	machines. /*
540	static const uint8_t ebcdicSigns[] = { `0x7C`, `0x44`, `0x66`, `0x80`, `0xAC`, `0xAE`, `0xAF`, `0xB5`, `0xEC`, `0xEF`, `0x00` };
541	const uint8_t *charToFind = ebcdicSigns;
542	while(*charToFind) {
543	if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
544	return result;
545	}
546	charToFind++;
547	}
548	}
549	#endif
550	return NULL;
551	}
552
553	/**
554	* @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
555	* @param keywordName incoming name to be canonicalized
556	* @param status return status (keyword too long)
557	* @return length of the keyword name
558	*/
559	static int32_t locale_canonKeywordName(char buf, const* char keywordName, UErrorCode status)
560	{
561	int32_t keywordNameLen = `0`;
562
563	for (; *keywordName != `0`; keywordName++) {
564	if (!UPRV_ISALPHANUM(*keywordName)) {
565	status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name /
566	return `0`;
567	}
568	if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - `1`) {
569	buf[keywordNameLen++] = uprv_tolower(*keywordName);
570	} else {
571	/ keyword name too long for internal buffer /
572	*status = U_INTERNAL_PROGRAM_ERROR;
573	return `0`;
574	}
575	}
576	if (keywordNameLen == `0`) {
577	status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name /
578	return `0`;
579	}
580	buf[keywordNameLen] = `0`; / terminate /
581
582	return keywordNameLen;
583	}
584
585	typedef struct {
586	char keyword[ULOC_KEYWORD_BUFFER_LEN];
587	int32_t keywordLen;
588	const char *valueStart;
589	int32_t valueLen;
590	} KeywordStruct;
591
592	static int32_t U_CALLCONV
593	compareKeywordStructs(const void * /context/, const void left, const* void *right) {
594	const char* leftString = ((const KeywordStruct *)left)->keyword;
595	const char* rightString = ((const KeywordStruct *)right)->keyword;
596	return uprv_strcmp(leftString, rightString);
597	}
598
599	static int32_t
600	_getKeywords(const char *localeID,
601	char prev,
602	char *keywords, int32_t keywordCapacity,
603	char values, int32_t valuesCapacity, int32_t valLen,
604	UBool valuesToo,
605	UErrorCode *status)
606	{
607	KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
608
609	int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
610	int32_t numKeywords = `0`;
611	const char* pos = localeID;
612	const char* equalSign = NULL;
613	const char* semicolon = NULL;
614	int32_t i = `0`, j, n;
615	int32_t keywordsLen = `0`;
616	int32_t valuesLen = `0`;
617
618	if(prev == `'@'`) { / start of keyword definition /
619	/ we will grab pairs, trim spaces, lowercase keywords, sort and return /
620	do {
621	UBool duplicate = FALSE;
622	/ skip leading spaces /
623	while(*pos == `' '`) {
624	pos++;
625	}
626	if (!pos) { /* handle trailing "; " /
627	break;
628	}
629	if(numKeywords == maxKeywords) {
630	*status = U_INTERNAL_PROGRAM_ERROR;
631	return `0`;
632	}
633	equalSign = uprv_strchr(pos, `'='`);
634	semicolon = uprv_strchr(pos, `';'`);
635	/ lack of '=' [foo@currency] is illegal /
636	/ ';' before '=' [foo@currency;collation=pinyin] is illegal /
637	if(!equalSign \|\| (semicolon && semicolon<equalSign)) {
638	*status = U_INVALID_FORMAT_ERROR;
639	return `0`;
640	}
641	/ need to normalize both keyword and keyword name /
642	if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
643	/ keyword name too long for internal buffer /
644	*status = U_INTERNAL_PROGRAM_ERROR;
645	return `0`;
646	}
647	for(i = `0`, n = `0`; i < equalSign - pos; ++i) {
648	if (pos[i] != `' '`) {
649	keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
650	}
651	}
652
653	/ zero-length keyword is an error. /
654	if (n == `0`) {
655	*status = U_INVALID_FORMAT_ERROR;
656	return `0`;
657	}
658
659	keywordList[numKeywords].keyword[n] = `0`;
660	keywordList[numKeywords].keywordLen = n;
661	/ now grab the value part. First we skip the '=' /
662	equalSign++;
663	/ then we leading spaces /
664	while(*equalSign == `' '`) {
665	equalSign++;
666	}
667
668	/ Premature end or zero-length value /
669	if (!*equalSign \|\| equalSign == semicolon) {
670	*status = U_INVALID_FORMAT_ERROR;
671	return `0`;
672	}
673
674	keywordList[numKeywords].valueStart = equalSign;
675
676	pos = semicolon;
677	i = `0`;
678	if(pos) {
679	while(*(pos - i - `1`) == `' '`) {
680	i++;
681	}
682	keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
683	pos++;
684	} else {
685	i = (int32_t)uprv_strlen(equalSign);
686	while(i && equalSign[i-`1`] == `' '`) {
687	i--;
688	}
689	keywordList[numKeywords].valueLen = i;
690	}
691	/ If this is a duplicate keyword, then ignore it /
692	for (j=`0`; j<numKeywords; ++j) {
693	if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == `0`) {
694	duplicate = TRUE;
695	break;
696	}
697	}
698	if (!duplicate) {
699	++numKeywords;
700	}
701	} while(pos);
702
703	/ now we have a list of keywords /
704	/ we need to sort it /
705	uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
706
707	/ Now construct the keyword part /
708	for(i = `0`; i < numKeywords; i++) {
709	if(keywordsLen + keywordList[i].keywordLen + `1`< keywordCapacity) {
710	uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
711	if(valuesToo) {
712	keywords[keywordsLen + keywordList[i].keywordLen] = `'='`;
713	} else {
714	keywords[keywordsLen + keywordList[i].keywordLen] = `0`;
715	}
716	}
717	keywordsLen += keywordList[i].keywordLen + `1`;
718	if(valuesToo) {
719	if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) {
720	uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
721	}
722	keywordsLen += keywordList[i].valueLen;
723
724	if(i < numKeywords - `1`) {
725	if(keywordsLen < keywordCapacity) {
726	keywords[keywordsLen] = `';'`;
727	}
728	keywordsLen++;
729	}
730	}
731	if(values) {
732	if(valuesLen + keywordList[i].valueLen + `1`< valuesCapacity) {
733	uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
734	values[valuesLen + keywordList[i].valueLen] = `0`;
735	}
736	valuesLen += keywordList[i].valueLen + `1`;
737	}
738	}
739	if(values) {
740	values[valuesLen] = `0`;
741	if(valLen) {
742	*valLen = valuesLen;
743	}
744	}
745	return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
746	} else {
747	return `0`;
748	}
749	}
750
751	U_CFUNC int32_t
752	locale_getKeywords(const char *localeID,
753	char prev,
754	char *keywords, int32_t keywordCapacity,
755	char values, int32_t valuesCapacity, int32_t valLen,
756	UBool valuesToo,
757	UErrorCode *status) {
758	return _getKeywords(localeID, prev, keywords, keywordCapacity,
759	values, valuesCapacity, valLen, valuesToo,
760	status);
761	}
762
763	U_CAPI int32_t U_EXPORT2
764	uloc_getKeywordValue(const char* localeID,
765	const char* keywordName,
766	char* buffer, int32_t bufferCapacity,
767	UErrorCode* status)
768	{
769	if (buffer != nullptr) {
770	buffer[`0`] = `'\0'`;
771	}
772	const char* startSearchHere = NULL;
773	const char* nextSeparator = NULL;
774	char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
775	char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
776	int32_t result = `0`;
777
778	if(status && U_SUCCESS(*status) && localeID) {
779	char tempBuffer[ULOC_FULLNAME_CAPACITY];
780	const char* tmpLocaleID;
781
782	if (keywordName == NULL \|\| keywordName[`0`] == `0`) {
783	*status = U_ILLEGAL_ARGUMENT_ERROR;
784	return `0`;
785	}
786
787	locale_canonKeywordName(keywordNameBuffer, keywordName, status);
788	if(U_FAILURE(*status)) {
789	return `0`;
790	}
791
792	if (_hasBCP47Extension(localeID)) {
793	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
794	} else {
795	tmpLocaleID=localeID;
796	}
797
798	startSearchHere = locale_getKeywordsStart(tmpLocaleID);
799	if(startSearchHere == NULL) {
800	/ no keywords, return at once /
801	return `0`;
802	}
803
804	/ find the first keyword /
805	while(startSearchHere) {
806	const char* keyValueTail;
807	int32_t keyValueLen;
808
809	startSearchHere++; / skip @ or ; /
810	nextSeparator = uprv_strchr(startSearchHere, `'='`);
811	if(!nextSeparator) {
812	status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value /
813	return `0`;
814	}
815	/ strip leading & trailing spaces (TC decided to tolerate these) /
816	while(*startSearchHere == `' '`) {
817	startSearchHere++;
818	}
819	keyValueTail = nextSeparator;
820	while (keyValueTail > startSearchHere && *(keyValueTail-`1`) == `' '`) {
821	keyValueTail--;
822	}
823	/ now keyValueTail points to first char after the keyName /
824	/ copy & normalize keyName from locale /
825	if (startSearchHere == keyValueTail) {
826	status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale /
827	return `0`;
828	}
829	keyValueLen = `0`;
830	while (startSearchHere < keyValueTail) {
831	if (!UPRV_ISALPHANUM(*startSearchHere)) {
832	status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name /
833	return `0`;
834	}
835	if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - `1`) {
836	localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
837	} else {
838	/ keyword name too long for internal buffer /
839	*status = U_INTERNAL_PROGRAM_ERROR;
840	return `0`;
841	}
842	}
843	localeKeywordNameBuffer[keyValueLen] = `0`; / terminate /
844
845	startSearchHere = uprv_strchr(nextSeparator, `';'`);
846
847	if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == `0`) {
848	/ current entry matches the keyword. /
849	nextSeparator++; / skip '=' /
850	/ First strip leading & trailing spaces (TC decided to tolerate these) /
851	while(*nextSeparator == `' '`) {
852	nextSeparator++;
853	}
854	keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
855	while(keyValueTail > nextSeparator && *(keyValueTail-`1`) == `' '`) {
856	keyValueTail--;
857	}
858	/ Now copy the value, but check well-formedness /
859	if (nextSeparator == keyValueTail) {
860	status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale /
861	return `0`;
862	}
863	keyValueLen = `0`;
864	while (nextSeparator < keyValueTail) {
865	if (!UPRV_ISALPHANUM(nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(nextSeparator)) {
866	status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value /
867	return `0`;
868	}
869	if (keyValueLen < bufferCapacity) {
870	/ Should we lowercase value to return here? Tests expect as-is. /
871	buffer[keyValueLen++] = *nextSeparator++;
872	} else { / keep advancing so we return correct length in case of overflow /
873	keyValueLen++;
874	nextSeparator++;
875	}
876	}
877	result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status);
878	return result;
879	}
880	}
881	}
882	return `0`;
883	}
884
885	U_CAPI int32_t U_EXPORT2
886	uloc_setKeywordValue(const char* keywordName,
887	const char* keywordValue,
888	char* buffer, int32_t bufferCapacity,
889	UErrorCode* status)
890	{
891	/ TODO: sorting. removal. /
892	int32_t keywordNameLen;
893	int32_t keywordValueLen;
894	int32_t bufLen;
895	int32_t needLen = `0`;
896	char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
897	char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+`1`];
898	char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
899	int32_t rc;
900	char* nextSeparator = NULL;
901	char* nextEqualsign = NULL;
902	char* startSearchHere = NULL;
903	char* keywordStart = NULL;
904	CharString updatedKeysAndValues;
905	int32_t updatedKeysAndValuesLen;
906	UBool handledInputKeyAndValue = FALSE;
907	char keyValuePrefix = `'@'`;
908
909	if(U_FAILURE(*status)) {
910	return -`1`;
911	}
912	if (keywordName == NULL \|\| keywordName[`0`] == `0` \|\| bufferCapacity <= `1`) {
913	*status = U_ILLEGAL_ARGUMENT_ERROR;
914	return `0`;
915	}
916	bufLen = (int32_t)uprv_strlen(buffer);
917	if(bufferCapacity<bufLen) {
918	/ The capacity is less than the length?! Is this NULL terminated? /
919	*status = U_ILLEGAL_ARGUMENT_ERROR;
920	return `0`;
921	}
922	keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
923	if(U_FAILURE(*status)) {
924	return `0`;
925	}
926
927	keywordValueLen = `0`;
928	if(keywordValue) {
929	while (*keywordValue != `0`) {
930	if (!UPRV_ISALPHANUM(keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(keywordValue)) {
931	status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value /
932	return `0`;
933	}
934	if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
935	/ Should we force lowercase in value to set? /
936	keywordValueBuffer[keywordValueLen++] = *keywordValue++;
937	} else {
938	/ keywordValue too long for internal buffer /
939	*status = U_INTERNAL_PROGRAM_ERROR;
940	return `0`;
941	}
942	}
943	}
944	keywordValueBuffer[keywordValueLen] = `0`; / terminate /
945
946	startSearchHere = (char*)locale_getKeywordsStart(buffer);
947	if(startSearchHere == NULL \|\| (startSearchHere[`1`]==`0`)) {
948	if(keywordValueLen == `0`) { / no keywords = nothing to remove /
949	return bufLen;
950	}
951
952	needLen = bufLen+`1`+keywordNameLen+`1`+keywordValueLen;
953	if(startSearchHere) { / had a single @ /
954	needLen--; / already had the @ /
955	/ startSearchHere points at the @ /
956	} else {
957	startSearchHere=buffer+bufLen;
958	}
959	if(needLen >= bufferCapacity) {
960	*status = U_BUFFER_OVERFLOW_ERROR;
961	return needLen; / no change /
962	}
963	*startSearchHere++ = `'@'`;
964	uprv_strcpy(startSearchHere, keywordNameBuffer);
965	startSearchHere += keywordNameLen;
966	*startSearchHere++ = `'='`;
967	uprv_strcpy(startSearchHere, keywordValueBuffer);
968	return needLen;
969	} / end shortcut - no @ /
970
971	keywordStart = startSearchHere;
972	/ search for keyword /
973	while(keywordStart) {
974	const char* keyValueTail;
975	int32_t keyValueLen;
976
977	keywordStart++; / skip @ or ; /
978	nextEqualsign = uprv_strchr(keywordStart, `'='`);
979	if (!nextEqualsign) {
980	status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value /
981	return `0`;
982	}
983	/ strip leading & trailing spaces (TC decided to tolerate these) /
984	while(*keywordStart == `' '`) {
985	keywordStart++;
986	}
987	keyValueTail = nextEqualsign;
988	while (keyValueTail > keywordStart && *(keyValueTail-`1`) == `' '`) {
989	keyValueTail--;
990	}
991	/ now keyValueTail points to first char after the keyName /
992	/ copy & normalize keyName from locale /
993	if (keywordStart == keyValueTail) {
994	status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale /
995	return `0`;
996	}
997	keyValueLen = `0`;
998	while (keywordStart < keyValueTail) {
999	if (!UPRV_ISALPHANUM(*keywordStart)) {
1000	status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name /
1001	return `0`;
1002	}
1003	if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - `1`) {
1004	localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
1005	} else {
1006	/ keyword name too long for internal buffer /
1007	*status = U_INTERNAL_PROGRAM_ERROR;
1008	return `0`;
1009	}
1010	}
1011	localeKeywordNameBuffer[keyValueLen] = `0`; / terminate /
1012
1013	nextSeparator = uprv_strchr(nextEqualsign, `';'`);
1014
1015	/ start processing the value part /
1016	nextEqualsign++; / skip '=' /
1017	/ First strip leading & trailing spaces (TC decided to tolerate these) /
1018	while(*nextEqualsign == `' '`) {
1019	nextEqualsign++;
1020	}
1021	keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
1022	while(keyValueTail > nextEqualsign && *(keyValueTail-`1`) == `' '`) {
1023	keyValueTail--;
1024	}
1025	if (nextEqualsign == keyValueTail) {
1026	status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale /
1027	return `0`;
1028	}
1029
1030	rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1031	if(rc == `0`) {
1032	/ Current entry matches the input keyword. Update the entry /
1033	if(keywordValueLen > `0`) { / updating a value /
1034	updatedKeysAndValues.append(keyValuePrefix, *status);
1035	keyValuePrefix = `';'`; / for any subsequent key-value pair /
1036	updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1037	updatedKeysAndValues.append(`'='`, *status);
1038	updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1039	} / else removing this entry, don't emit anything /
1040	handledInputKeyAndValue = TRUE;
1041	} else {
1042	/ input keyword sorts earlier than current entry, add before current entry /
1043	if (rc < `0` && keywordValueLen > `0` && !handledInputKeyAndValue) {
1044	/ insert new entry at this location /
1045	updatedKeysAndValues.append(keyValuePrefix, *status);
1046	keyValuePrefix = `';'`; / for any subsequent key-value pair /
1047	updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1048	updatedKeysAndValues.append(`'='`, *status);
1049	updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1050	handledInputKeyAndValue = TRUE;
1051	}
1052	/ copy the current entry /
1053	updatedKeysAndValues.append(keyValuePrefix, *status);
1054	keyValuePrefix = `';'`; / for any subsequent key-value pair /
1055	updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1056	updatedKeysAndValues.append(`'='`, *status);
1057	updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
1058	}
1059	if (!nextSeparator && keywordValueLen > `0` && !handledInputKeyAndValue) {
1060	/ append new entry at the end, it sorts later than existing entries /
1061	updatedKeysAndValues.append(keyValuePrefix, *status);
1062	/ skip keyValuePrefix update, no subsequent key-value pair /
1063	updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1064	updatedKeysAndValues.append(`'='`, *status);
1065	updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1066	handledInputKeyAndValue = TRUE;
1067	}
1068	keywordStart = nextSeparator;
1069	} / end loop searching /
1070
1071	/ Any error from updatedKeysAndValues.append above would be internal and not due to*
1072	* problems with the passed-in locale. So if we did encounter problems with the
1073	* passed-in locale above, those errors took precedence and overrode any error
1074	* status from updatedKeysAndValues.append, and also caused a return of 0. If there
1075	* are errors here they are from updatedKeysAndValues.append; they do cause an
1076	* error return but the passed-in locale is unmodified and the original bufLen is
1077	* returned.
1078	*/
1079	if (!handledInputKeyAndValue \|\| U_FAILURE(*status)) {
1080	/ if input key/value specified removal of a keyword not present in locale, or*
1081	* there was an error in CharString.append, leave original locale alone. */
1082	return bufLen;
1083	}
1084
1085	updatedKeysAndValuesLen = updatedKeysAndValues.length();
1086	/ needLen = length of the part before '@' + length of updated key-value part including '@' /
1087	needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;
1088	if(needLen >= bufferCapacity) {
1089	*status = U_BUFFER_OVERFLOW_ERROR;
1090	return needLen; / no change /
1091	}
1092	if (updatedKeysAndValuesLen > `0`) {
1093	uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);
1094	}
1095	buffer[needLen]=`0`;
1096	return needLen;
1097	}
1098
1099	/ ### ID parsing implementation *************************************************/
1100
1101	#define _isPrefixLetter(a) ((a=='x')\|\|(a=='X')\|\|(a=='i')\|\|(a=='I'))
1102
1103	/returns TRUE if one of the special prefixes is here (s=string)*
1104	'x-' or 'i-' /*
1105	#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1106
1107	/ Dot terminates it because of POSIX form where dot precedes the codepage*
1108	* except for variant
1109	*/
1110	#define _isTerminator(a) ((a==0)\|\|(a=='.')\|\|(a=='@'))
1111
1112	/**
1113	* Lookup 'key' in the array 'list'. The array 'list' should contain
1114	* a NULL entry, followed by more entries, and a second NULL entry.
1115	*
1116	* The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1117	* COUNTRIES_3.
1118	*/
1119	static int16_t _findIndex(const char* const* list, const char* key)
1120	{
1121	const char* const* anchor = list;
1122	int32_t pass = `0`;
1123
1124	/ Make two passes through two NULL-terminated arrays at 'list' /
1125	while (pass++ < `2`) {
1126	while (*list) {
1127	if (uprv_strcmp(key, *list) == `0`) {
1128	return (int16_t)(list - anchor);
1129	}
1130	list++;
1131	}
1132	++list; / skip final NULL CWB/*
1133	}
1134	return -`1`;
1135	}
1136
1137	/ count the length of src while copying it to dest; return strlen(src) /
1138	static inline int32_t
1139	_copyCount(char dest, int32_t destCapacity, const* char *src) {
1140	const char *anchor;
1141	char c;
1142
1143	anchor=src;
1144	for(;;) {
1145	if((c=*src)==`0`) {
1146	return (int32_t)(src-anchor);
1147	}
1148	if(destCapacity<=`0`) {
1149	return (int32_t)((src-anchor)+uprv_strlen(src));
1150	}
1151	++src;
1152	*dest++=c;
1153	--destCapacity;
1154	}
1155	}
1156
1157	U_CFUNC const char*
1158	uloc_getCurrentCountryID(const char* oldID){
1159	int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1160	if (offset >= `0`) {
1161	return REPLACEMENT_COUNTRIES[offset];
1162	}
1163	return oldID;
1164	}
1165	U_CFUNC const char*
1166	uloc_getCurrentLanguageID(const char* oldID){
1167	int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1168	if (offset >= `0`) {
1169	return REPLACEMENT_LANGUAGES[offset];
1170	}
1171	return oldID;
1172	}
1173	/*
1174	* the internal functions _getLanguage(), _getCountry(), _getVariant()
1175	* avoid duplicating code to handle the earlier locale ID pieces
1176	* in the functions for the later ones by
1177	* setting the *pEnd pointer to where they stopped parsing
1178	*
1179	* TODO try to use this in Locale
1180	*/
1181	U_CFUNC int32_t
1182	ulocimp_getLanguage(const char *localeID,
1183	char *language, int32_t languageCapacity,
1184	const char **pEnd) {
1185	int32_t i=`0`;
1186	int32_t offset;
1187	char lang[`4`]={ `0`, `0`, `0`, `0` }; / temporary buffer to hold language code for searching /
1188
1189	if (uprv_stricmp(localeID, "root") == `0`) {
1190	localeID += `4`;
1191	} else if (uprv_strnicmp(localeID, "und", `3`) == `0` &&
1192	(localeID[`3`] == `'\0'` \|\|
1193	localeID[`3`] == `'-'` \|\|
1194	localeID[`3`] == `'_'` \|\|
1195	localeID[`3`] == `'@'`)) {
1196	localeID += `3`;
1197	}
1198
1199	/ if it starts with i- or x- then copy that prefix /
1200	if(_isIDPrefix(localeID)) {
1201	if(i<languageCapacity) {
1202	language[i]=(char)uprv_tolower(*localeID);
1203	}
1204	if(i<languageCapacity) {
1205	language[i+`1`]=`'-'`;
1206	}
1207	i+=`2`;
1208	localeID+=`2`;
1209	}
1210
1211	/ copy the language as far as possible and count its length /
1212	while(!_isTerminator(localeID) && !_isIDSeparator(localeID)) {
1213	if(i<languageCapacity) {
1214	language[i]=(char)uprv_tolower(*localeID);
1215	}
1216	if(i<`3`) {
1217	U_ASSERT(i>=`0`);
1218	lang[i]=(char)uprv_tolower(*localeID);
1219	}
1220	i++;
1221	localeID++;
1222	}
1223
1224	if(i==`3`) {
1225	/ convert 3 character code to 2 character code if possible CWB/*
1226	offset=_findIndex(LANGUAGES_3, lang);
1227	if(offset>=`0`) {
1228	i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1229	}
1230	}
1231
1232	if(pEnd!=NULL) {
1233	*pEnd=localeID;
1234	}
1235	return i;
1236	}
1237
1238	U_CFUNC int32_t
1239	ulocimp_getScript(const char *localeID,
1240	char *script, int32_t scriptCapacity,
1241	const char **pEnd)
1242	{
1243	int32_t idLen = `0`;
1244
1245	if (pEnd != NULL) {
1246	*pEnd = localeID;
1247	}
1248
1249	/ copy the second item as far as possible and count its length /
1250	while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1251	&& uprv_isASCIILetter(localeID[idLen])) {
1252	idLen++;
1253	}
1254
1255	/ If it's exactly 4 characters long, then it's a script and not a country. /
1256	if (idLen == `4`) {
1257	int32_t i;
1258	if (pEnd != NULL) {
1259	*pEnd = localeID+idLen;
1260	}
1261	if(idLen > scriptCapacity) {
1262	idLen = scriptCapacity;
1263	}
1264	if (idLen >= `1`) {
1265	script[`0`]=(char)uprv_toupper(*(localeID++));
1266	}
1267	for (i = `1`; i < idLen; i++) {
1268	script[i]=(char)uprv_tolower(*(localeID++));
1269	}
1270	}
1271	else {
1272	idLen = `0`;
1273	}
1274	return idLen;
1275	}
1276
1277	U_CFUNC int32_t
1278	ulocimp_getCountry(const char *localeID,
1279	char *country, int32_t countryCapacity,
1280	const char **pEnd)
1281	{
1282	int32_t idLen=`0`;
1283	char cnty[ULOC_COUNTRY_CAPACITY]={ `0`, `0`, `0`, `0` };
1284	int32_t offset;
1285
1286	/ copy the country as far as possible and count its length /
1287	while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1288	if(idLen<(ULOC_COUNTRY_CAPACITY-`1`)) { /CWB/
1289	cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1290	}
1291	idLen++;
1292	}
1293
1294	/ the country should be either length 2 or 3 /
1295	if (idLen == `2` \|\| idLen == `3`) {
1296	UBool gotCountry = FALSE;
1297	/ convert 3 character code to 2 character code if possible CWB/*
1298	if(idLen==`3`) {
1299	offset=_findIndex(COUNTRIES_3, cnty);
1300	if(offset>=`0`) {
1301	idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1302	gotCountry = TRUE;
1303	}
1304	}
1305	if (!gotCountry) {
1306	int32_t i = `0`;
1307	for (i = `0`; i < idLen; i++) {
1308	if (i < countryCapacity) {
1309	country[i]=(char)uprv_toupper(localeID[i]);
1310	}
1311	}
1312	}
1313	localeID+=idLen;
1314	} else {
1315	idLen = `0`;
1316	}
1317
1318	if(pEnd!=NULL) {
1319	*pEnd=localeID;
1320	}
1321
1322	return idLen;
1323	}
1324
1325	/**
1326	* @param needSeparator if true, then add leading '_' if any variants
1327	* are added to 'variant'
1328	*/
1329	static int32_t
1330	_getVariantEx(const char *localeID,
1331	char prev,
1332	char *variant, int32_t variantCapacity,
1333	UBool needSeparator) {
1334	int32_t i=`0`;
1335
1336	/ get one or more variant tags and separate them with '_' /
1337	if(_isIDSeparator(prev)) {
1338	/ get a variant string after a '-' or '_' /
1339	while(!_isTerminator(*localeID)) {
1340	if (needSeparator) {
1341	if (i<variantCapacity) {
1342	variant[i] = `'_'`;
1343	}
1344	++i;
1345	needSeparator = FALSE;
1346	}
1347	if(i<variantCapacity) {
1348	variant[i]=(char)uprv_toupper(*localeID);
1349	if(variant[i]==`'-'`) {
1350	variant[i]=`'_'`;
1351	}
1352	}
1353	i++;
1354	localeID++;
1355	}
1356	}
1357
1358	/ if there is no variant tag after a '-' or '_' then look for '@' /
1359	if(i==`0`) {
1360	if(prev==`'@'`) {
1361	/ keep localeID /
1362	} else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1363	++localeID; / point after the '@' /
1364	} else {
1365	return `0`;
1366	}
1367	while(!_isTerminator(*localeID)) {
1368	if (needSeparator) {
1369	if (i<variantCapacity) {
1370	variant[i] = `'_'`;
1371	}
1372	++i;
1373	needSeparator = FALSE;
1374	}
1375	if(i<variantCapacity) {
1376	variant[i]=(char)uprv_toupper(*localeID);
1377	if(variant[i]==`'-'` \|\| variant[i]==`','`) {
1378	variant[i]=`'_'`;
1379	}
1380	}
1381	i++;
1382	localeID++;
1383	}
1384	}
1385
1386	return i;
1387	}
1388
1389	static int32_t
1390	_getVariant(const char *localeID,
1391	char prev,
1392	char *variant, int32_t variantCapacity) {
1393	return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1394	}
1395
1396	/ Keyword enumeration /
1397
1398	typedef struct UKeywordsContext {
1399	char* keywords;
1400	char* current;
1401	} UKeywordsContext;
1402
1403	U_CDECL_BEGIN
1404
1405	static void U_CALLCONV
1406	uloc_kw_closeKeywords(UEnumeration *enumerator) {
1407	uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1408	uprv_free(enumerator->context);
1409	uprv_free(enumerator);
1410	}
1411
1412	static int32_t U_CALLCONV
1413	uloc_kw_countKeywords(UEnumeration en, UErrorCode /status/) {
1414	char kw = ((UKeywordsContext )en->context)->keywords;
1415	int32_t result = `0`;
1416	while(*kw) {
1417	result++;
1418	kw += uprv_strlen(kw)+`1`;
1419	}
1420	return result;
1421	}
1422
1423	static const char * U_CALLCONV
1424	uloc_kw_nextKeyword(UEnumeration* en,
1425	int32_t* resultLength,
1426	UErrorCode* /status/) {
1427	const char* result = ((UKeywordsContext *)en->context)->current;
1428	int32_t len = `0`;
1429	if(*result) {
1430	len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1431	((UKeywordsContext *)en->context)->current += len+`1`;
1432	} else {
1433	result = NULL;
1434	}
1435	if (resultLength) {
1436	*resultLength = len;
1437	}
1438	return result;
1439	}
1440
1441	static void U_CALLCONV
1442	uloc_kw_resetKeywords(UEnumeration* en,
1443	UErrorCode* /status/) {
1444	((UKeywordsContext )en->context)->current = ((UKeywordsContext )en->context)->keywords;
1445	}
1446
1447	U_CDECL_END
1448
1449
1450	static const UEnumeration gKeywordsEnum = {
1451	NULL,
1452	NULL,
1453	uloc_kw_closeKeywords,
1454	uloc_kw_countKeywords,
1455	uenum_unextDefault,
1456	uloc_kw_nextKeyword,
1457	uloc_kw_resetKeywords
1458	};
1459
1460	U_CAPI UEnumeration* U_EXPORT2
1461	uloc_openKeywordList(const char keywordList, int32_t keywordListSize, UErrorCode status)
1462	{
1463	LocalMemory<UKeywordsContext> myContext;
1464	LocalMemory<UEnumeration> result;
1465
1466	if (U_FAILURE(*status)) {
1467	return nullptr;
1468	}
1469	myContext.adoptInstead(static_cast<UKeywordsContext >(uprv_malloc(sizeof*(UKeywordsContext))));
1470	result.adoptInstead(static_cast<UEnumeration >(uprv_malloc(sizeof*(UEnumeration))));
1471	if (myContext.isNull() \|\| result.isNull()) {
1472	*status = U_MEMORY_ALLOCATION_ERROR;
1473	return nullptr;
1474	}
1475	uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));
1476	myContext ->keywords = static_cast<char *>(uprv_malloc(keywordListSize+`1`));
1477	if (myContext ->keywords == nullptr) {
1478	*status = U_MEMORY_ALLOCATION_ERROR;
1479	return nullptr;
1480	}
1481	uprv_memcpy(myContext ->keywords, keywordList, keywordListSize);
1482	myContext ->keywords[keywordListSize] = `0`;
1483	myContext ->current = myContext ->keywords;
1484	result ->context = myContext.orphan();
1485	return result.orphan();
1486	}
1487
1488	U_CAPI UEnumeration* U_EXPORT2
1489	uloc_openKeywords(const char* localeID,
1490	UErrorCode* status)
1491	{
1492	int32_t i=`0`;
1493	char keywords[`256`];
1494	int32_t keywordsCapacity = `256`;
1495	char tempBuffer[ULOC_FULLNAME_CAPACITY];
1496	const char* tmpLocaleID;
1497
1498	if(status==NULL \|\| U_FAILURE(*status)) {
1499	return `0`;
1500	}
1501
1502	if (_hasBCP47Extension(localeID)) {
1503	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1504	} else {
1505	if (localeID==NULL) {
1506	localeID=uloc_getDefault();
1507	}
1508	tmpLocaleID=localeID;
1509	}
1510
1511	/ Skip the language /
1512	ulocimp_getLanguage(tmpLocaleID, NULL, `0`, &tmpLocaleID);
1513	if(_isIDSeparator(*tmpLocaleID)) {
1514	const char *scriptID;
1515	/ Skip the script if available /
1516	ulocimp_getScript(tmpLocaleID+`1`, NULL, `0`, &scriptID);
1517	if(scriptID != tmpLocaleID+`1`) {
1518	/ Found optional script /
1519	tmpLocaleID = scriptID;
1520	}
1521	/ Skip the Country /
1522	if (_isIDSeparator(*tmpLocaleID)) {
1523	ulocimp_getCountry(tmpLocaleID+`1`, NULL, `0`, &tmpLocaleID);
1524	if(_isIDSeparator(*tmpLocaleID)) {
1525	_getVariant(tmpLocaleID+`1`, *tmpLocaleID, NULL, `0`);
1526	}
1527	}
1528	}
1529
1530	/ keywords are located after '@' /
1531	if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1532	i=locale_getKeywords(tmpLocaleID+`1`, `'@'`, keywords, keywordsCapacity, NULL, `0`, NULL, FALSE, status);
1533	}
1534
1535	if(i) {
1536	return uloc_openKeywordList(keywords, i, status);
1537	} else {
1538	return NULL;
1539	}
1540	}
1541
1542
1543	/ bit-flags for 'options' parameter of _canonicalize /
1544	#define _ULOC_STRIP_KEYWORDS 0x2
1545	#define _ULOC_CANONICALIZE 0x1
1546
1547	#define OPTION_SET(options, mask) ((options & mask) != 0)
1548
1549	static const char i_default[] = {`'i'`, `'-'`, `'d'`, `'e'`, `'f'`, `'a'`, `'u'`, `'l'`, `'t'`};
1550	#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
1551
1552	/**
1553	* Canonicalize the given localeID, to level 1 or to level 2,
1554	* depending on the options. To specify level 1, pass in options=0.
1555	* To specify level 2, pass in options=_ULOC_CANONICALIZE.
1556	*
1557	* This is the code underlying uloc_getName and uloc_canonicalize.
1558	*/
1559	static int32_t
1560	_canonicalize(const char* localeID,
1561	char* result,
1562	int32_t resultCapacity,
1563	uint32_t options,
1564	UErrorCode* err) {
1565	int32_t j, len, fieldCount=`0`, scriptSize=`0`, variantSize=`0`, nameCapacity;
1566	char localeBuffer[ULOC_FULLNAME_CAPACITY];
1567	char tempBuffer[ULOC_FULLNAME_CAPACITY];
1568	const char* origLocaleID;
1569	const char* tmpLocaleID;
1570	const char* keywordAssign = NULL;
1571	const char* separatorIndicator = NULL;
1572	char* name;
1573	char* variant = NULL; / pointer into name, or NULL /
1574
1575	if (U_FAILURE(*err)) {
1576	return `0`;
1577	}
1578
1579	if (_hasBCP47Extension(localeID)) {
1580	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1581	} else {
1582	if (localeID==NULL) {
1583	localeID=uloc_getDefault();
1584	}
1585	tmpLocaleID=localeID;
1586	}
1587
1588	origLocaleID=tmpLocaleID;
1589
1590	/ if we are doing a full canonicalization, then put results in*
1591	localeBuffer, if necessary; otherwise send them to result. /*
1592	if (/OPTION_SET(options, _ULOC_CANONICALIZE) &&/
1593	(result == NULL \|\| resultCapacity < (int32_t)sizeof(localeBuffer))) {
1594	name = localeBuffer;
1595	nameCapacity = (int32_t)sizeof(localeBuffer);
1596	} else {
1597	name = result;
1598	nameCapacity = resultCapacity;
1599	}
1600
1601	/ get all pieces, one after another, and separate with '_' /
1602	len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
1603
1604	if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == `0`) {
1605	const char *d = uloc_getDefault();
1606
1607	len = (int32_t)uprv_strlen(d);
1608
1609	if (name != NULL) {
1610	uprv_memcpy(name, d, len);
1611	}
1612	} else if(_isIDSeparator(*tmpLocaleID)) {
1613	const char *scriptID;
1614
1615	++fieldCount;
1616	if(len<nameCapacity) {
1617	name[len]=`'_'`;
1618	}
1619	++len;
1620
1621	scriptSize=ulocimp_getScript(tmpLocaleID+`1`,
1622	(len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
1623	if(scriptSize > `0`) {
1624	/ Found optional script /
1625	tmpLocaleID = scriptID;
1626	++fieldCount;
1627	len+=scriptSize;
1628	if (_isIDSeparator(*tmpLocaleID)) {
1629	/ If there is something else, then we add the _ /
1630	if(len<nameCapacity) {
1631	name[len]=`'_'`;
1632	}
1633	++len;
1634	}
1635	}
1636
1637	if (_isIDSeparator(*tmpLocaleID)) {
1638	const char *cntryID;
1639	int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+`1`,
1640	(len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
1641	if (cntrySize > `0`) {
1642	/ Found optional country /
1643	tmpLocaleID = cntryID;
1644	len+=cntrySize;
1645	}
1646	if(_isIDSeparator(*tmpLocaleID)) {
1647	/ If there is something else, then we add the _ if we found country before. /
1648	if (cntrySize >= `0` && ! _isIDSeparator(*(tmpLocaleID+`1`)) ) {
1649	++fieldCount;
1650	if(len<nameCapacity) {
1651	name[len]=`'_'`;
1652	}
1653	++len;
1654	}
1655
1656	variantSize = _getVariant(tmpLocaleID+`1`, *tmpLocaleID,
1657	(len<nameCapacity ? name+len : NULL), nameCapacity-len);
1658	if (variantSize > `0`) {
1659	variant = len<nameCapacity ? name+len : NULL;
1660	len += variantSize;
1661	tmpLocaleID += variantSize + `1`; / skip '_' and variant /
1662	}
1663	}
1664	}
1665	}
1666
1667	/ Copy POSIX-style charset specifier, if any [mr.utf8] /
1668	if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == `'.'`) {
1669	UBool done = FALSE;
1670	do {
1671	char c = *tmpLocaleID;
1672	switch (c) {
1673	case `0`:
1674	case `'@'`:
1675	done = TRUE;
1676	break;
1677	default:
1678	if (len<nameCapacity) {
1679	name[len] = c;
1680	}
1681	++len;
1682	++tmpLocaleID;
1683	break;
1684	}
1685	} while (!done);
1686	}
1687
1688	/ Scan ahead to next '@' and determine if it is followed by '=' and/or ';'*
1689	After this, tmpLocaleID either points to '@' or is NULL /*
1690	if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1691	keywordAssign = uprv_strchr(tmpLocaleID, `'='`);
1692	separatorIndicator = uprv_strchr(tmpLocaleID, `';'`);
1693	}
1694
1695	/ Copy POSIX-style variant, if any [mr@FOO] /
1696	if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1697	tmpLocaleID != NULL && keywordAssign == NULL) {
1698	for (;;) {
1699	char c = *tmpLocaleID;
1700	if (c == `0`) {
1701	break;
1702	}
1703	if (len<nameCapacity) {
1704	name[len] = c;
1705	}
1706	++len;
1707	++tmpLocaleID;
1708	}
1709	}
1710
1711	if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1712	/ Handle @FOO variant if @ is present and not followed by = /
1713	if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1714	int32_t posixVariantSize;
1715	/ Add missing '_' if needed /
1716	if (fieldCount < `2` \|\| (fieldCount < `3` && scriptSize > `0`)) {
1717	do {
1718	if(len<nameCapacity) {
1719	name[len]=`'_'`;
1720	}
1721	++len;
1722	++fieldCount;
1723	} while(fieldCount<`2`);
1724	}
1725	posixVariantSize = _getVariantEx(tmpLocaleID+`1`, `'@'`, name+len, nameCapacity-len,
1726	(UBool)(variantSize > `0`));
1727	if (posixVariantSize > `0`) {
1728	if (variant == NULL) {
1729	variant = name+len;
1730	}
1731	len += posixVariantSize;
1732	variantSize += posixVariantSize;
1733	}
1734	}
1735
1736	/ Look up the ID in the canonicalization map /
1737	for (j=`0`; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
1738	const char* id = CANONICALIZE_MAP[j].id;
1739	int32_t n = (int32_t)uprv_strlen(id);
1740	if (len == n && uprv_strncmp(name, id, n) == `0`) {
1741	if (n == `0` && tmpLocaleID != NULL) {
1742	break; / Don't remap "" if keywords present /
1743	}
1744	len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1745	break;
1746	}
1747	}
1748	}
1749
1750	if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1751	if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1752	(!separatorIndicator \|\| separatorIndicator > keywordAssign)) {
1753	if(len<nameCapacity) {
1754	name[len]=`'@'`;
1755	}
1756	++len;
1757	++fieldCount;
1758	len += _getKeywords(tmpLocaleID+`1`, `'@'`, (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1759	NULL, `0`, NULL, TRUE, err);
1760	}
1761	}
1762
1763	if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1764	uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1765	}
1766
1767	return u_terminateChars(result, resultCapacity, len, err);
1768	}
1769
1770	/ ### ID parsing API *************************************************/
1771
1772	U_CAPI int32_t U_EXPORT2
1773	uloc_getParent(const char* localeID,
1774	char* parent,
1775	int32_t parentCapacity,
1776	UErrorCode* err)
1777	{
1778	const char *lastUnderscore;
1779	int32_t i;
1780
1781	if (U_FAILURE(*err))
1782	return `0`;
1783
1784	if (localeID == NULL)
1785	localeID = uloc_getDefault();
1786
1787	lastUnderscore=uprv_strrchr(localeID, `'_'`);
1788	if(lastUnderscore!=NULL) {
1789	i=(int32_t)(lastUnderscore-localeID);
1790	} else {
1791	i=`0`;
1792	}
1793
1794	if (i > `0`) {
1795	if (uprv_strnicmp(localeID, "und_", `4`) == `0`) {
1796	localeID += `3`;
1797	i -= `3`;
1798	uprv_memmove(parent, localeID, uprv_min(i, parentCapacity));
1799	} else if (parent != localeID) {
1800	uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1801	}
1802	}
1803
1804	return u_terminateChars(parent, parentCapacity, i, err);
1805	}
1806
1807	U_CAPI int32_t U_EXPORT2
1808	uloc_getLanguage(const char* localeID,
1809	char* language,
1810	int32_t languageCapacity,
1811	UErrorCode* err)
1812	{
1813	/ uloc_getLanguage will return a 2 character iso-639 code if one exists. CWB/*
1814	int32_t i=`0`;
1815
1816	if (err==NULL \|\| U_FAILURE(*err)) {
1817	return `0`;
1818	}
1819
1820	if(localeID==NULL) {
1821	localeID=uloc_getDefault();
1822	}
1823
1824	i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1825	return u_terminateChars(language, languageCapacity, i, err);
1826	}
1827
1828	U_CAPI int32_t U_EXPORT2
1829	uloc_getScript(const char* localeID,
1830	char* script,
1831	int32_t scriptCapacity,
1832	UErrorCode* err)
1833	{
1834	int32_t i=`0`;
1835
1836	if(err==NULL \|\| U_FAILURE(*err)) {
1837	return `0`;
1838	}
1839
1840	if(localeID==NULL) {
1841	localeID=uloc_getDefault();
1842	}
1843
1844	/ skip the language /
1845	ulocimp_getLanguage(localeID, NULL, `0`, &localeID);
1846	if(_isIDSeparator(*localeID)) {
1847	i=ulocimp_getScript(localeID+`1`, script, scriptCapacity, NULL);
1848	}
1849	return u_terminateChars(script, scriptCapacity, i, err);
1850	}
1851
1852	U_CAPI int32_t U_EXPORT2
1853	uloc_getCountry(const char* localeID,
1854	char* country,
1855	int32_t countryCapacity,
1856	UErrorCode* err)
1857	{
1858	int32_t i=`0`;
1859
1860	if(err==NULL \|\| U_FAILURE(*err)) {
1861	return `0`;
1862	}
1863
1864	if(localeID==NULL) {
1865	localeID=uloc_getDefault();
1866	}
1867
1868	/ Skip the language /
1869	ulocimp_getLanguage(localeID, NULL, `0`, &localeID);
1870	if(_isIDSeparator(*localeID)) {
1871	const char *scriptID;
1872	/ Skip the script if available /
1873	ulocimp_getScript(localeID+`1`, NULL, `0`, &scriptID);
1874	if(scriptID != localeID+`1`) {
1875	/ Found optional script /
1876	localeID = scriptID;
1877	}
1878	if(_isIDSeparator(*localeID)) {
1879	i=ulocimp_getCountry(localeID+`1`, country, countryCapacity, NULL);
1880	}
1881	}
1882	return u_terminateChars(country, countryCapacity, i, err);
1883	}
1884
1885	U_CAPI int32_t U_EXPORT2
1886	uloc_getVariant(const char* localeID,
1887	char* variant,
1888	int32_t variantCapacity,
1889	UErrorCode* err)
1890	{
1891	char tempBuffer[ULOC_FULLNAME_CAPACITY];
1892	const char* tmpLocaleID;
1893	int32_t i=`0`;
1894
1895	if(err==NULL \|\| U_FAILURE(*err)) {
1896	return `0`;
1897	}
1898
1899	if (_hasBCP47Extension(localeID)) {
1900	_ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1901	} else {
1902	if (localeID==NULL) {
1903	localeID=uloc_getDefault();
1904	}
1905	tmpLocaleID=localeID;
1906	}
1907
1908	/ Skip the language /
1909	ulocimp_getLanguage(tmpLocaleID, NULL, `0`, &tmpLocaleID);
1910	if(_isIDSeparator(*tmpLocaleID)) {
1911	const char *scriptID;
1912	/ Skip the script if available /
1913	ulocimp_getScript(tmpLocaleID+`1`, NULL, `0`, &scriptID);
1914	if(scriptID != tmpLocaleID+`1`) {
1915	/ Found optional script /
1916	tmpLocaleID = scriptID;
1917	}
1918	/ Skip the Country /
1919	if (_isIDSeparator(*tmpLocaleID)) {
1920	const char *cntryID;
1921	ulocimp_getCountry(tmpLocaleID+`1`, NULL, `0`, &cntryID);
1922	if (cntryID != tmpLocaleID+`1`) {
1923	/ Found optional country /
1924	tmpLocaleID = cntryID;
1925	}
1926	if(_isIDSeparator(*tmpLocaleID)) {
1927	/ If there was no country ID, skip a possible extra IDSeparator /
1928	if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[`1`])) {
1929	tmpLocaleID++;
1930	}
1931	i=_getVariant(tmpLocaleID+`1`, *tmpLocaleID, variant, variantCapacity);
1932	}
1933	}
1934	}
1935
1936	/ removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function /
1937	/ if we do not have a variant tag yet then try a POSIX variant after '@' /
1938	/*
1939	if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
1940	i=_getVariant(localeID+1, '@', variant, variantCapacity);
1941	}
1942	*/
1943	return u_terminateChars(variant, variantCapacity, i, err);
1944	}
1945
1946	U_CAPI int32_t U_EXPORT2
1947	uloc_getName(const char* localeID,
1948	char* name,
1949	int32_t nameCapacity,
1950	UErrorCode* err)
1951	{
1952	return _canonicalize(localeID, name, nameCapacity, `0`, err);
1953	}
1954
1955	U_CAPI int32_t U_EXPORT2
1956	uloc_getBaseName(const char* localeID,
1957	char* name,
1958	int32_t nameCapacity,
1959	UErrorCode* err)
1960	{
1961	return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
1962	}
1963
1964	U_CAPI int32_t U_EXPORT2
1965	uloc_canonicalize(const char* localeID,
1966	char* name,
1967	int32_t nameCapacity,
1968	UErrorCode* err)
1969	{
1970	return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
1971	}
1972
1973	U_CAPI const char* U_EXPORT2
1974	uloc_getISO3Language(const char* localeID)
1975	{
1976	int16_t offset;
1977	char lang[ULOC_LANG_CAPACITY];
1978	UErrorCode err = U_ZERO_ERROR;
1979
1980	if (localeID == NULL)
1981	{
1982	localeID = uloc_getDefault();
1983	}
1984	uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1985	if (U_FAILURE(err))
1986	return "";
1987	offset = _findIndex(LANGUAGES, lang);
1988	if (offset < `0`)
1989	return "";
1990	return LANGUAGES_3[offset];
1991	}
1992
1993	U_CAPI const char* U_EXPORT2
1994	uloc_getISO3Country(const char* localeID)
1995	{
1996	int16_t offset;
1997	char cntry[ULOC_LANG_CAPACITY];
1998	UErrorCode err = U_ZERO_ERROR;
1999
2000	if (localeID == NULL)
2001	{
2002	localeID = uloc_getDefault();
2003	}
2004	uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2005	if (U_FAILURE(err))
2006	return "";
2007	offset = _findIndex(COUNTRIES, cntry);
2008	if (offset < `0`)
2009	return "";
2010
2011	return COUNTRIES_3[offset];
2012	}
2013
2014	U_CAPI uint32_t U_EXPORT2
2015	uloc_getLCID(const char* localeID)
2016	{
2017	UErrorCode status = U_ZERO_ERROR;
2018	char langID[ULOC_FULLNAME_CAPACITY];
2019	uint32_t lcid = `0`;
2020
2021	/ Check for incomplete id. /
2022	if (!localeID \|\| uprv_strlen(localeID) < `2`) {
2023	return `0`;
2024	}
2025
2026	// First, attempt Windows platform lookup if available, but fall
2027	// through to catch any special cases (ICU vs Windows name differences).
2028	lcid = uprv_convertToLCIDPlatform(localeID, &status);
2029	if (U_FAILURE(status)) {
2030	return `0`;
2031	}
2032	if (lcid > `0`) {
2033	// Windows found an LCID, return that
2034	return lcid;
2035	}
2036
2037	uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2038	if (U_FAILURE(status) \|\| status == U_STRING_NOT_TERMINATED_WARNING) {
2039	return `0`;
2040	}
2041
2042	if (uprv_strchr(localeID, `'@'`)) {
2043	// uprv_convertToLCID does not support keywords other than collation.
2044	// Remove all keywords except collation.
2045	int32_t len;
2046	char collVal[ULOC_KEYWORDS_CAPACITY];
2047	char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2048
2049	len = uloc_getKeywordValue(localeID, "collation", collVal,
2050	UPRV_LENGTHOF(collVal) - `1`, &status);
2051
2052	if (U_SUCCESS(status) && len > `0`) {
2053	collVal[len] = `0`;
2054
2055	len = uloc_getBaseName(localeID, tmpLocaleID,
2056	UPRV_LENGTHOF(tmpLocaleID) - `1`, &status);
2057
2058	if (U_SUCCESS(status) && len > `0`) {
2059	tmpLocaleID[len] = `0`;
2060
2061	len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
2062	UPRV_LENGTHOF(tmpLocaleID) - len - `1`, &status);
2063
2064	if (U_SUCCESS(status) && len > `0`) {
2065	tmpLocaleID[len] = `0`;
2066	return uprv_convertToLCID(langID, tmpLocaleID, &status);
2067	}
2068	}
2069	}
2070
2071	// fall through - all keywords are simply ignored
2072	status = U_ZERO_ERROR;
2073	}
2074
2075	return uprv_convertToLCID(langID, localeID, &status);
2076	}
2077
2078	U_CAPI int32_t U_EXPORT2
2079	uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2080	UErrorCode *status)
2081	{
2082	return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2083	}
2084
2085	/ ### Default locale *************************************************/
2086
2087	U_CAPI const char* U_EXPORT2
2088	uloc_getDefault()
2089	{
2090	return locale_get_default();
2091	}
2092
2093	U_CAPI void U_EXPORT2
2094	uloc_setDefault(const char* newDefaultLocale,
2095	UErrorCode* err)
2096	{
2097	if (U_FAILURE(*err))
2098	return;
2099	/ the error code isn't currently used for anything by this function/
2100
2101	/ propagate change to C++ /
2102	locale_set_default(newDefaultLocale);
2103	}
2104
2105	/**
2106	* Returns a list of all 2-letter language codes defined in ISO 639. This is a pointer
2107	* to an array of pointers to arrays of char. All of these pointers are owned
2108	* by ICU-- do not delete them, and do not write through them. The array is
2109	* terminated with a null pointer.
2110	*/
2111	U_CAPI const char* const* U_EXPORT2
2112	uloc_getISOLanguages()
2113	{
2114	return LANGUAGES;
2115	}
2116
2117	/**
2118	* Returns a list of all 2-letter country codes defined in ISO 639. This is a
2119	* pointer to an array of pointers to arrays of char. All of these pointers are
2120	* owned by ICU-- do not delete them, and do not write through them. The array is
2121	* terminated with a null pointer.
2122	*/
2123	U_CAPI const char* const* U_EXPORT2
2124	uloc_getISOCountries()
2125	{
2126	return COUNTRIES;
2127	}
2128
2129
2130	/ this function to be moved into cstring.c later /
2131	static char gDecimal = `0`;
2132
2133	static / U_CAPI /
2134	double
2135	/ U_EXPORT2 /
2136	_uloc_strtod(const char start, char* **end) {
2137	char *decimal;
2138	char *myEnd;
2139	char buf[`30`];
2140	double rv;
2141	if (!gDecimal) {
2142	char rep[`5`];
2143	/ For machines that decide to change the decimal on you,*
2144	and try to be too smart with localization.
2145	This normally should be just a '.'. /*
2146	sprintf(rep, "%+1.1f", `1.0`);
2147	gDecimal = rep[`2`];
2148	}
2149
2150	if(gDecimal == `'.'`) {
2151	return uprv_strtod(start, end); / fall through to OS /
2152	} else {
2153	uprv_strncpy(buf, start, `29`);
2154	buf[`29`]=`0`;
2155	decimal = uprv_strchr(buf, `'.'`);
2156	if(decimal) {
2157	*decimal = gDecimal;
2158	} else {
2159	return uprv_strtod(start, end); / no decimal point /
2160	}
2161	rv = uprv_strtod(buf, &myEnd);
2162	if(end) {
2163	end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) /
2164	}
2165	return rv;
2166	}
2167	}
2168
2169	typedef struct {
2170	float q;
2171	int32_t dummy; / to avoid uninitialized memory copy from qsort /
2172	char locale[ULOC_FULLNAME_CAPACITY+`1`];
2173	} _acceptLangItem;
2174
2175	static int32_t U_CALLCONV
2176	uloc_acceptLanguageCompare(const void * /context/, const void a, const* void *b)
2177	{
2178	const _acceptLangItem aa = (const* _acceptLangItem*)a;
2179	const _acceptLangItem bb = (const* _acceptLangItem*)b;
2180
2181	int32_t rc = `0`;
2182	if(bb->q < aa->q) {
2183	rc = -`1`; / A > B /
2184	} else if(bb->q > aa->q) {
2185	rc = `1`; / A < B /
2186	} else {
2187	rc = `0`; / A = B /
2188	}
2189
2190	if(rc==`0`) {
2191	rc = uprv_stricmp(aa->locale, bb->locale);
2192	}
2193
2194	#if defined(ULOC_DEBUG)
2195	/ fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",*
2196	aa->locale, aa->q,
2197	bb->locale, bb->q,
2198	rc);/*
2199	#endif
2200
2201	return rc;
2202	}
2203
2204	/*
2205	mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2206	*/
2207
2208	U_CAPI int32_t U_EXPORT2
2209	uloc_acceptLanguageFromHTTP(char result, int32_t resultAvailable, UAcceptResult outResult,
2210	const char *httpAcceptLanguage,
2211	UEnumeration* availableLocales,
2212	UErrorCode *status)
2213	{
2214	MaybeStackArray<_acceptLangItem, `4`> items; // Struct for collecting items.
2215	char tmp[ULOC_FULLNAME_CAPACITY +`1`];
2216	int32_t n = `0`;
2217	const char *itemEnd;
2218	const char *paramEnd;
2219	const char *s;
2220	const char *t;
2221	int32_t res;
2222	int32_t i;
2223	int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2224
2225	if(U_FAILURE(*status)) {
2226	return -`1`;
2227	}
2228
2229	for(s=httpAcceptLanguage;s&&*s;) {
2230	while(isspace(s)) /* eat space at the beginning /
2231	s++;
2232	itemEnd=uprv_strchr(s,`','`);
2233	paramEnd=uprv_strchr(s,`';'`);
2234	if(!itemEnd) {
2235	itemEnd = httpAcceptLanguage+l; / end of string /
2236	}
2237	if(paramEnd && paramEnd<itemEnd) {
2238	/ semicolon (;) is closer than end (,) /
2239	t = paramEnd+`1`;
2240	if(*t==`'q'`) {
2241	t++;
2242	}
2243	while(isspace(*t)) {
2244	t++;
2245	}
2246	if(*t==`'='`) {
2247	t++;
2248	}
2249	while(isspace(*t)) {
2250	t++;
2251	}
2252	items [n].q = (float)_uloc_strtod(t,NULL);
2253	} else {
2254	/ no semicolon - it's 1.0 /
2255	items [n].q = `1.0f`;
2256	paramEnd = itemEnd;
2257	}
2258	items [n].dummy=`0`;
2259	/ eat spaces prior to semi /
2260	for(t=(paramEnd-`1`);(paramEnd>s)&&isspace(*t);t--)
2261	;
2262	int32_t slen = static_cast<int32_t>(((t+`1`)-s));
2263	if(slen > ULOC_FULLNAME_CAPACITY) {
2264	*status = U_BUFFER_OVERFLOW_ERROR;
2265	return -`1`; // too big
2266	}
2267	uprv_strncpy(items[n].locale, s, slen);
2268	items [n].locale[slen]=`0`; // terminate
2269	int32_t clen = uloc_canonicalize(items [n].locale, tmp, UPRV_LENGTHOF(tmp)-`1`, status);
2270	if(U_FAILURE(status)) return* -`1`;
2271	if((clen!=slen) \|\| (uprv_strncmp(items[n].locale, tmp, slen))) {
2272	// canonicalization had an effect- copy back
2273	uprv_strncpy(items[n].locale, tmp, clen);
2274	items [n].locale[clen] = `0`; // terminate
2275	}
2276	#if defined(ULOC_DEBUG)
2277	/fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);/
2278	#endif
2279	n++;
2280	s = itemEnd;
2281	while(s==`','`) { /* eat duplicate commas /
2282	s++;
2283	}
2284	if(n>=items.getCapacity()) { // If we need more items
2285	if(NULL == items.resize(items.getCapacity()*`2`, items.getCapacity())) {
2286	*status = U_MEMORY_ALLOCATION_ERROR;
2287	return -`1`;
2288	}
2289	#if defined(ULOC_DEBUG)
2290	fprintf(stderr,"malloced at size %d\n", items.getCapacity());
2291	#endif
2292	}
2293	}
2294	uprv_sortArray(items.getAlias(), n, sizeof(items [`0`]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2295	if (U_FAILURE(*status)) {
2296	return -`1`;
2297	}
2298	LocalMemory<const char*> strs(NULL);
2299	if (strs.allocateInsteadAndReset(n) == NULL) {
2300	*status = U_MEMORY_ALLOCATION_ERROR;
2301	return -`1`;
2302	}
2303	for(i=`0`;i<n;i++) {
2304	#if defined(ULOC_DEBUG)
2305	/fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);/
2306	#endif
2307	strs [i]=items [i].locale;
2308	}
2309	res = uloc_acceptLanguage(result, resultAvailable, outResult,
2310	strs.getAlias(), n, availableLocales, status);
2311	return res;
2312	}
2313
2314
2315	U_CAPI int32_t U_EXPORT2
2316	uloc_acceptLanguage(char *result, int32_t resultAvailable,
2317	UAcceptResult outResult, const* char **acceptList,
2318	int32_t acceptListCount,
2319	UEnumeration* availableLocales,
2320	UErrorCode *status)
2321	{
2322	int32_t i,j;
2323	int32_t len;
2324	int32_t maxLen=`0`;
2325	char tmp[ULOC_FULLNAME_CAPACITY+`1`];
2326	const char *l;
2327	char **fallbackList;
2328	if(U_FAILURE(*status)) {
2329	return -`1`;
2330	}
2331	fallbackList = static_cast<char >(uprv_malloc((size_t)(sizeof*(fallbackList[`0`])acceptListCount)));
2332	if(fallbackList==NULL) {
2333	*status = U_MEMORY_ALLOCATION_ERROR;
2334	return -`1`;
2335	}
2336	for(i=`0`;i<acceptListCount;i++) {
2337	#if defined(ULOC_DEBUG)
2338	fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2339	#endif
2340	while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
2341	#if defined(ULOC_DEBUG)
2342	fprintf(stderr," %s\n", l);
2343	#endif
2344	len = (int32_t)uprv_strlen(l);
2345	if(!uprv_strcmp(acceptList[i], l)) {
2346	if(outResult) {
2347	*outResult = ULOC_ACCEPT_VALID;
2348	}
2349	#if defined(ULOC_DEBUG)
2350	fprintf(stderr, "MATCH! %s\n", l);
2351	#endif
2352	if(len>`0`) {
2353	uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2354	}
2355	for(j=`0`;j<i;j++) {
2356	uprv_free(fallbackList[j]);
2357	}
2358	uprv_free(fallbackList);
2359	return u_terminateChars(result, resultAvailable, len, status);
2360	}
2361	if(len>maxLen) {
2362	maxLen = len;
2363	}
2364	}
2365	uenum_reset(availableLocales, status);
2366	/ save off parent info /
2367	if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=`0`) {
2368	fallbackList[i] = uprv_strdup(tmp);
2369	} else {
2370	fallbackList[i]=`0`;
2371	}
2372	}
2373
2374	for(maxLen--;maxLen>`0`;maxLen--) {
2375	for(i=`0`;i<acceptListCount;i++) {
2376	if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2377	#if defined(ULOC_DEBUG)
2378	fprintf(stderr,"Try: [%s]", fallbackList[i]);
2379	#endif
2380	while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
2381	#if defined(ULOC_DEBUG)
2382	fprintf(stderr," %s\n", l);
2383	#endif
2384	len = (int32_t)uprv_strlen(l);
2385	if(!uprv_strcmp(fallbackList[i], l)) {
2386	if(outResult) {
2387	*outResult = ULOC_ACCEPT_FALLBACK;
2388	}
2389	#if defined(ULOC_DEBUG)
2390	fprintf(stderr, "fallback MATCH! %s\n", l);
2391	#endif
2392	if(len>`0`) {
2393	uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2394	}
2395	for(j=`0`;j<acceptListCount;j++) {
2396	uprv_free(fallbackList[j]);
2397	}
2398	uprv_free(fallbackList);
2399	return u_terminateChars(result, resultAvailable, len, status);
2400	}
2401	}
2402	uenum_reset(availableLocales, status);
2403
2404	if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=`0`) {
2405	uprv_free(fallbackList[i]);
2406	fallbackList[i] = uprv_strdup(tmp);
2407	} else {
2408	uprv_free(fallbackList[i]);
2409	fallbackList[i]=`0`;
2410	}
2411	}
2412	}
2413	if(outResult) {
2414	*outResult = ULOC_ACCEPT_FAILED;
2415	}
2416	}
2417	for(i=`0`;i<acceptListCount;i++) {
2418	uprv_free(fallbackList[i]);
2419	}
2420	uprv_free(fallbackList);
2421	return -`1`;
2422	}
2423
2424	U_CAPI const char* U_EXPORT2
2425	uloc_toUnicodeLocaleKey(const char* keyword)
2426	{
2427	const char* bcpKey = ulocimp_toBcpKey(keyword);
2428	if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -`1`)) {
2429	// unknown keyword, but syntax is fine..
2430	return keyword;
2431	}
2432	return bcpKey;
2433	}
2434
2435	U_CAPI const char* U_EXPORT2
2436	uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2437	{
2438	const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2439	if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -`1`)) {
2440	// unknown keyword, but syntax is fine..
2441	return value;
2442	}
2443	return bcpType;
2444	}
2445
2446	static UBool
2447	isWellFormedLegacyKey(const char* legacyKey)
2448	{
2449	const char* p = legacyKey;
2450	while (*p) {
2451	if (!UPRV_ISALPHANUM(*p)) {
2452	return FALSE;
2453	}
2454	p++;
2455	}
2456	return TRUE;
2457	}
2458
2459	static UBool
2460	isWellFormedLegacyType(const char* legacyType)
2461	{
2462	const char* p = legacyType;
2463	int32_t alphaNumLen = `0`;
2464	while (*p) {
2465	if (p == `'_'` \|\| p == `'/'` \|\| *p == `'-'`) {
2466	if (alphaNumLen == `0`) {
2467	return FALSE;
2468	}
2469	alphaNumLen = `0`;
2470	} else if (UPRV_ISALPHANUM(*p)) {
2471	alphaNumLen++;
2472	} else {
2473	return FALSE;
2474	}
2475	p++;
2476	}
2477	return (alphaNumLen != `0`);
2478	}
2479
2480	U_CAPI const char* U_EXPORT2
2481	uloc_toLegacyKey(const char* keyword)
2482	{
2483	const char* legacyKey = ulocimp_toLegacyKey(keyword);
2484	if (legacyKey == NULL) {
2485	// Checks if the specified locale key is well-formed with the legacy locale syntax.
2486	//
2487	// Note:
2488	// LDML/CLDR provides some definition of keyword syntax in
2489	// http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and*
2490	// http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax*
2491	// Keys can only consist of [0-9a-zA-Z].
2492	if (isWellFormedLegacyKey(keyword)) {
2493	return keyword;
2494	}
2495	}
2496	return legacyKey;
2497	}
2498
2499	U_CAPI const char* U_EXPORT2
2500	uloc_toLegacyType(const char* keyword, const char* value)
2501	{
2502	const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2503	if (legacyType == NULL) {
2504	// Checks if the specified locale type is well-formed with the legacy locale syntax.
2505	//
2506	// Note:
2507	// LDML/CLDR provides some definition of keyword syntax in
2508	// http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and*
2509	// http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax*
2510	// Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2511	// we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
2512	if (isWellFormedLegacyType(value)) {
2513	return value;
2514	}
2515	}
2516	return legacyType;
2517	}
2518
2519	/eof/
2520

Browse the source code of ClickHouse/contrib/icu/icu4c/source/common/uloc.cpp