locid.cpp source code [Godot/thirdparty/icu4c/common/locid.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 1997-2016, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	*
9	* File locid.cpp
10	*
11	* Created by: Richard Gillam
12	*
13	* Modification History:
14	*
15	* Date Name Description
16	* 02/11/97 aliu Changed gLocPath to fgDataDirectory and added
17	* methods to get and set it.
18	* 04/02/97 aliu Made operator!= inline; fixed return value
19	* of getName().
20	* 04/15/97 aliu Cleanup for AIX/Win32.
21	* 04/24/97 aliu Numerous changes per code review.
22	* 08/18/98 stephen Changed getDisplayName()
23	* Added SIMPLIFIED_CHINESE, TRADITIONAL_CHINESE
24	* Added getISOCountries(), getISOLanguages(),
25	* getLanguagesForCountry()
26	* 03/16/99 bertrand rehaul.
27	* 07/21/99 stephen Added U_CFUNC setDefault
28	* 11/09/99 weiv Added const char * getName() const;
29	* 04/12/00 srl removing unicodestring api's and cached hash code
30	* 08/10/01 grhoten Change the static Locales to accessor functions
31	******************************************************************************
32	*/
33
34	#include <utility>
35
36	#include "unicode/bytestream.h"
37	#include "unicode/locid.h"
38	#include "unicode/localebuilder.h"
39	#include "unicode/strenum.h"
40	#include "unicode/stringpiece.h"
41	#include "unicode/uloc.h"
42	#include "unicode/ures.h"
43
44	#include "bytesinkutil.h"
45	#include "charstr.h"
46	#include "charstrmap.h"
47	#include "cmemory.h"
48	#include "cstring.h"
49	#include "mutex.h"
50	#include "putilimp.h"
51	#include "uassert.h"
52	#include "ucln_cmn.h"
53	#include "uhash.h"
54	#include "ulocimp.h"
55	#include "umutex.h"
56	#include "uniquecharstr.h"
57	#include "ustr_imp.h"
58	#include "uvector.h"
59
60	U_CDECL_BEGIN
61	static UBool U_CALLCONV locale_cleanup();
62	U_CDECL_END
63
64	U_NAMESPACE_BEGIN
65
66	static Locale gLocaleCache = nullptr*;
67	static UInitOnce gLocaleCacheInitOnce {};
68
69	// gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale.
70	static UMutex gDefaultLocaleMutex;
71	static UHashtable gDefaultLocalesHashT = nullptr*;
72	static Locale gDefaultLocale = nullptr*;
73
74	/**
75	* \def ULOC_STRING_LIMIT
76	* strings beyond this value crash in CharString
77	*/
78	#define ULOC_STRING_LIMIT 357913941
79
80	U_NAMESPACE_END
81
82	typedef enum ELocalePos {
83	eENGLISH,
84	eFRENCH,
85	eGERMAN,
86	eITALIAN,
87	eJAPANESE,
88	eKOREAN,
89	eCHINESE,
90
91	eFRANCE,
92	eGERMANY,
93	eITALY,
94	eJAPAN,
95	eKOREA,
96	eCHINA, / Alias for PRC /
97	eTAIWAN,
98	eUK,
99	eUS,
100	eCANADA,
101	eCANADA_FRENCH,
102	eROOT,
103
104
105	//eDEFAULT,
106	eMAX_LOCALES
107	} ELocalePos;
108
109	U_CDECL_BEGIN
110	//
111	// Deleter function for Locales owned by the default Locale hash table/
112	//
113	static void U_CALLCONV
114	deleteLocale(void *obj) {
115	delete (icu::Locale *) obj;
116	}
117
118	static UBool U_CALLCONV locale_cleanup()
119	{
120	U_NAMESPACE_USE
121
122	delete [] gLocaleCache;
123	gLocaleCache = nullptr;
124	gLocaleCacheInitOnce.reset();
125
126	if (gDefaultLocalesHashT) {
127	uhash_close(gDefaultLocalesHashT); // Automatically deletes all elements, using deleter func.
128	gDefaultLocalesHashT = nullptr;
129	}
130	gDefaultLocale = nullptr;
131	return true;
132	}
133
134
135	static void U_CALLCONV locale_init(UErrorCode &status) {
136	U_NAMESPACE_USE
137
138	U_ASSERT(gLocaleCache == nullptr);
139	gLocaleCache = new Locale[(int)eMAX_LOCALES];
140	if (gLocaleCache == nullptr) {
141	status = U_MEMORY_ALLOCATION_ERROR;
142	return;
143	}
144	ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
145	gLocaleCache[eROOT] = Locale ("");
146	gLocaleCache[eENGLISH] = Locale ("en");
147	gLocaleCache[eFRENCH] = Locale ("fr");
148	gLocaleCache[eGERMAN] = Locale ("de");
149	gLocaleCache[eITALIAN] = Locale ("it");
150	gLocaleCache[eJAPANESE] = Locale ("ja");
151	gLocaleCache[eKOREAN] = Locale ("ko");
152	gLocaleCache[eCHINESE] = Locale ("zh");
153	gLocaleCache[eFRANCE] = Locale ("fr", "FR");
154	gLocaleCache[eGERMANY] = Locale ("de", "DE");
155	gLocaleCache[eITALY] = Locale ("it", "IT");
156	gLocaleCache[eJAPAN] = Locale ("ja", "JP");
157	gLocaleCache[eKOREA] = Locale ("ko", "KR");
158	gLocaleCache[eCHINA] = Locale ("zh", "CN");
159	gLocaleCache[eTAIWAN] = Locale ("zh", "TW");
160	gLocaleCache[eUK] = Locale ("en", "GB");
161	gLocaleCache[eUS] = Locale ("en", "US");
162	gLocaleCache[eCANADA] = Locale ("en", "CA");
163	gLocaleCache[eCANADA_FRENCH] = Locale ("fr", "CA");
164	}
165
166	U_CDECL_END
167
168	U_NAMESPACE_BEGIN
169
170	Locale locale_set_default_internal(const* char *id, UErrorCode& status) {
171	// Synchronize this entire function.
172	Mutex lock(&gDefaultLocaleMutex);
173
174	UBool canonicalize = false;
175
176	// If given a nullptr string for the locale id, grab the default
177	// name from the system.
178	// (Different from most other locale APIs, where a null name means use
179	// the current ICU default locale.)
180	if (id == nullptr) {
181	id = uprv_getDefaultLocaleID(); // This function not thread safe? TODO: verify.
182	canonicalize = true; // always canonicalize host ID
183	}
184
185	CharString localeNameBuf;
186	{
187	CharStringByteSink sink(&localeNameBuf);
188	if (canonicalize) {
189	ulocimp_canonicalize(id, sink, &status);
190	} else {
191	ulocimp_getName(id, sink, &status);
192	}
193	}
194
195	if (U_FAILURE(status)) {
196	return gDefaultLocale;
197	}
198
199	if (gDefaultLocalesHashT == nullptr) {
200	gDefaultLocalesHashT = uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &status);
201	if (U_FAILURE(status)) {
202	return gDefaultLocale;
203	}
204	uhash_setValueDeleter(gDefaultLocalesHashT, deleteLocale);
205	ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
206	}
207
208	Locale newDefault = (Locale )uhash_get(gDefaultLocalesHashT, localeNameBuf.data());
209	if (newDefault == nullptr) {
210	newDefault = new Locale (Locale::eBOGUS);
211	if (newDefault == nullptr) {
212	status = U_MEMORY_ALLOCATION_ERROR;
213	return gDefaultLocale;
214	}
215	newDefault->init(localeNameBuf.data(), false);
216	uhash_put(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status);
217	if (U_FAILURE(status)) {
218	return gDefaultLocale;
219	}
220	}
221	gDefaultLocale = newDefault;
222	return gDefaultLocale;
223	}
224
225	U_NAMESPACE_END
226
227	/ sfb 07/21/99 /
228	U_CFUNC void
229	locale_set_default(const char *id)
230	{
231	U_NAMESPACE_USE
232	UErrorCode status = U_ZERO_ERROR;
233	locale_set_default_internal(id, status);
234	}
235	/ end /
236
237	U_CFUNC const char *
238	locale_get_default()
239	{
240	U_NAMESPACE_USE
241	return Locale::getDefault().getName();
242	}
243
244
245	U_NAMESPACE_BEGIN
246
247	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
248
249	/Character separating the posix id fields/
250	// '_'
251	// In the platform codepage.
252	#define SEP_CHAR '_'
253	#define NULL_CHAR '\0'
254
255	Locale::~Locale()
256	{
257	if ((baseName != fullName) && (baseName != fullNameBuffer)) {
258	uprv_free(baseName);
259	}
260	baseName = nullptr;
261	/if fullName is on the heap, we free it/
262	if (fullName != fullNameBuffer)
263	{
264	uprv_free(fullName);
265	fullName = nullptr;
266	}
267	}
268
269	Locale::Locale()
270	: UObject (), fullName(fullNameBuffer), baseName(nullptr)
271	{
272	init(nullptr, false);
273	}
274
275	/*
276	* Internal constructor to allow construction of a locale object with
277	* NO side effects. (Default constructor tries to get
278	* the default locale.)
279	*/
280	Locale::Locale(Locale::ELocaleType)
281	: UObject (), fullName(fullNameBuffer), baseName(nullptr)
282	{
283	setToBogus();
284	}
285
286
287	Locale::Locale( const char * newLanguage,
288	const char * newCountry,
289	const char * newVariant,
290	const char * newKeywords)
291	: UObject (), fullName(fullNameBuffer), baseName(nullptr)
292	{
293	if( (newLanguage==nullptr) && (newCountry == nullptr) && (newVariant == nullptr) )
294	{
295	init(nullptr, false); / shortcut /
296	}
297	else
298	{
299	UErrorCode status = U_ZERO_ERROR;
300	int32_t lsize = `0`;
301	int32_t csize = `0`;
302	int32_t vsize = `0`;
303	int32_t ksize = `0`;
304
305	// Check the sizes of the input strings.
306
307	// Language
308	if ( newLanguage != nullptr )
309	{
310	lsize = (int32_t)uprv_strlen(newLanguage);
311	if ( lsize < `0` \|\| lsize > ULOC_STRING_LIMIT ) { // int32 wrap
312	setToBogus();
313	return;
314	}
315	}
316
317	CharString togo(newLanguage, lsize, status); // start with newLanguage
318
319	// _Country
320	if ( newCountry != nullptr )
321	{
322	csize = (int32_t)uprv_strlen(newCountry);
323	if ( csize < `0` \|\| csize > ULOC_STRING_LIMIT ) { // int32 wrap
324	setToBogus();
325	return;
326	}
327	}
328
329	// _Variant
330	if ( newVariant != nullptr )
331	{
332	// remove leading _'s
333	while(newVariant[`0`] == SEP_CHAR)
334	{
335	newVariant++;
336	}
337
338	// remove trailing _'s
339	vsize = (int32_t)uprv_strlen(newVariant);
340	if ( vsize < `0` \|\| vsize > ULOC_STRING_LIMIT ) { // int32 wrap
341	setToBogus();
342	return;
343	}
344	while( (vsize>`1`) && (newVariant[vsize-`1`] == SEP_CHAR) )
345	{
346	vsize--;
347	}
348	}
349
350	if ( newKeywords != nullptr)
351	{
352	ksize = (int32_t)uprv_strlen(newKeywords);
353	if ( ksize < `0` \|\| ksize > ULOC_STRING_LIMIT ) {
354	setToBogus();
355	return;
356	}
357	}
358
359	// We've checked the input sizes, now build up the full locale string..
360
361	// newLanguage is already copied
362
363	if ( ( vsize != `0` ) \|\| (csize != `0`) ) // at least: __v
364	{ // ^
365	togo.append(SEP_CHAR, status);
366	}
367
368	if ( csize != `0` )
369	{
370	togo.append(newCountry, status);
371	}
372
373	if ( vsize != `0`)
374	{
375	togo.append(SEP_CHAR, status)
376	.append(newVariant, vsize, status);
377	}
378
379	if ( ksize != `0`)
380	{
381	if (uprv_strchr(newKeywords, `'='`)) {
382	togo.append(`'@'`, status); / keyword parsing /
383	}
384	else {
385	togo.append(`'_'`, status); / Variant parsing with a script /
386	if ( vsize == `0`) {
387	togo.append(`'_'`, status); / No country found /
388	}
389	}
390	togo.append(newKeywords, status);
391	}
392
393	if (U_FAILURE(status)) {
394	// Something went wrong with appending, etc.
395	setToBogus();
396	return;
397	}
398	// Parse it, because for example 'language' might really be a complete
399	// string.
400	init(togo.data(), false);
401	}
402	}
403
404	Locale::Locale(const Locale &other)
405	: UObject (other), fullName(fullNameBuffer), baseName(nullptr)
406	{
407	*this = other;
408	}
409
410	Locale::Locale(Locale&& other) noexcept
411	: UObject (other), fullName(fullNameBuffer), baseName(fullName) {
412	*this = std::move(other);
413	}
414
415	Locale& Locale::operator=(const Locale& other) {
416	if (this == &other) {
417	return *this;
418	}
419
420	setToBogus();
421
422	if (other.fullName == other.fullNameBuffer) {
423	uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
424	} else if (other.fullName == nullptr) {
425	fullName = nullptr;
426	} else {
427	fullName = uprv_strdup(other.fullName);
428	if (fullName == nullptr) return *this;
429	}
430
431	if (other.baseName == other.fullName) {
432	baseName = fullName;
433	} else if (other.baseName != nullptr) {
434	baseName = uprv_strdup(other.baseName);
435	if (baseName == nullptr) return *this;
436	}
437
438	uprv_strcpy(language, other.language);
439	uprv_strcpy(script, other.script);
440	uprv_strcpy(country, other.country);
441
442	variantBegin = other.variantBegin;
443	fIsBogus = other.fIsBogus;
444
445	return *this;
446	}
447
448	Locale& Locale::operator=(Locale&& other) noexcept {
449	if ((baseName != fullName) && (baseName != fullNameBuffer)) uprv_free(baseName);
450	if (fullName != fullNameBuffer) uprv_free(fullName);
451
452	if (other.fullName == other.fullNameBuffer \|\| other.baseName == other.fullNameBuffer) {
453	uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
454	}
455	if (other.fullName == other.fullNameBuffer) {
456	fullName = fullNameBuffer;
457	} else {
458	fullName = other.fullName;
459	}
460
461	if (other.baseName == other.fullNameBuffer) {
462	baseName = fullNameBuffer;
463	} else if (other.baseName == other.fullName) {
464	baseName = fullName;
465	} else {
466	baseName = other.baseName;
467	}
468
469	uprv_strcpy(language, other.language);
470	uprv_strcpy(script, other.script);
471	uprv_strcpy(country, other.country);
472
473	variantBegin = other.variantBegin;
474	fIsBogus = other.fIsBogus;
475
476	other.baseName = other.fullName = other.fullNameBuffer;
477
478	return *this;
479	}
480
481	Locale *
482	Locale::clone() const {
483	return new Locale (*this);
484	}
485
486	bool
487	Locale::operator==( const Locale& other) const
488	{
489	return (uprv_strcmp(other.fullName, fullName) == `0`);
490	}
491
492	namespace {
493
494	UInitOnce gKnownCanonicalizedInitOnce {};
495	UHashtable gKnownCanonicalized = nullptr*;
496
497	static const char* const KNOWN_CANONICALIZED[] = {
498	"c",
499	// Commonly used locales known are already canonicalized
500	"af", "af_ZA", "am", "am_ET", "ar", "ar_001", "as", "as_IN", "az", "az_AZ",
501	"be", "be_BY", "bg", "bg_BG", "bn", "bn_IN", "bs", "bs_BA", "ca", "ca_ES",
502	"cs", "cs_CZ", "cy", "cy_GB", "da", "da_DK", "de", "de_DE", "el", "el_GR",
503	"en", "en_GB", "en_US", "es", "es_419", "es_ES", "et", "et_EE", "eu",
504	"eu_ES", "fa", "fa_IR", "fi", "fi_FI", "fil", "fil_PH", "fr", "fr_FR",
505	"ga", "ga_IE", "gl", "gl_ES", "gu", "gu_IN", "he", "he_IL", "hi", "hi_IN",
506	"hr", "hr_HR", "hu", "hu_HU", "hy", "hy_AM", "id", "id_ID", "is", "is_IS",
507	"it", "it_IT", "ja", "ja_JP", "jv", "jv_ID", "ka", "ka_GE", "kk", "kk_KZ",
508	"km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA",
509	"lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN",
510	"mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP",
511	"nl", "nl_NL", "no", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
512	"pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si",
513	"si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr",
514	"sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta",
515	"ta_IN", "te", "te_IN", "th", "th_TH", "tk", "tk_TM", "tr", "tr_TR", "uk",
516	"uk_UA", "ur", "ur_PK", "uz", "uz_UZ", "vi", "vi_VN", "yue", "yue_Hant",
517	"yue_Hant_HK", "yue_HK", "zh", "zh_CN", "zh_Hans", "zh_Hans_CN", "zh_Hant",
518	"zh_Hant_TW", "zh_TW", "zu", "zu_ZA"
519	};
520
521	static UBool U_CALLCONV cleanupKnownCanonicalized() {
522	gKnownCanonicalizedInitOnce.reset();
523	if (gKnownCanonicalized) { uhash_close(gKnownCanonicalized); }
524	return true;
525	}
526
527	static void U_CALLCONV loadKnownCanonicalized(UErrorCode &status) {
528	ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED,
529	cleanupKnownCanonicalized);
530	LocalUHashtablePointer newKnownCanonicalizedMap(
531	uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &status));
532	for (int32_t i = `0`;
533	U_SUCCESS(status) && i < UPRV_LENGTHOF(KNOWN_CANONICALIZED);
534	i++) {
535	uhash_puti(newKnownCanonicalizedMap.getAlias(),
536	(void*)KNOWN_CANONICALIZED[i],
537	`1`, &status);
538	}
539	if (U_FAILURE(status)) {
540	return;
541	}
542
543	gKnownCanonicalized = newKnownCanonicalizedMap.orphan();
544	}
545
546	class AliasData;
547
548	/**
549	* A Builder class to build the alias data.
550	*/
551	class AliasDataBuilder {
552	public:
553	AliasDataBuilder() {
554	}
555
556	// Build the AliasData from resource.
557	AliasData* build(UErrorCode &status);
558
559	private:
560	void readAlias(UResourceBundle* alias,
561	UniqueCharStrings* strings,
562	LocalMemory<const char*>& types,
563	LocalMemory<int32_t>& replacementIndexes,
564	int32_t &length,
565	void (checkType)(const* char* type),
566	void (checkReplacement)(const* UnicodeString& replacement),
567	UErrorCode &status);
568
569	// Read the languageAlias data from alias to
570	// strings+types+replacementIndexes
571	// The number of record will be stored into length.
572	// Allocate length items for types, to store the type field.
573	// Allocate length items for replacementIndexes,
574	// to store the index in the strings for the replacement script.
575	void readLanguageAlias(UResourceBundle* alias,
576	UniqueCharStrings* strings,
577	LocalMemory<const char*>& types,
578	LocalMemory<int32_t>& replacementIndexes,
579	int32_t &length,
580	UErrorCode &status);
581
582	// Read the scriptAlias data from alias to
583	// strings+types+replacementIndexes
584	// Allocate length items for types, to store the type field.
585	// Allocate length items for replacementIndexes,
586	// to store the index in the strings for the replacement script.
587	void readScriptAlias(UResourceBundle* alias,
588	UniqueCharStrings* strings,
589	LocalMemory<const char*>& types,
590	LocalMemory<int32_t>& replacementIndexes,
591	int32_t &length, UErrorCode &status);
592
593	// Read the territoryAlias data from alias to
594	// strings+types+replacementIndexes
595	// Allocate length items for types, to store the type field.
596	// Allocate length items for replacementIndexes,
597	// to store the index in the strings for the replacement script.
598	void readTerritoryAlias(UResourceBundle* alias,
599	UniqueCharStrings* strings,
600	LocalMemory<const char*>& types,
601	LocalMemory<int32_t>& replacementIndexes,
602	int32_t &length, UErrorCode &status);
603
604	// Read the variantAlias data from alias to
605	// strings+types+replacementIndexes
606	// Allocate length items for types, to store the type field.
607	// Allocate length items for replacementIndexes,
608	// to store the index in the strings for the replacement variant.
609	void readVariantAlias(UResourceBundle* alias,
610	UniqueCharStrings* strings,
611	LocalMemory<const char*>& types,
612	LocalMemory<int32_t>& replacementIndexes,
613	int32_t &length, UErrorCode &status);
614
615	// Read the subdivisionAlias data from alias to
616	// strings+types+replacementIndexes
617	// Allocate length items for types, to store the type field.
618	// Allocate length items for replacementIndexes,
619	// to store the index in the strings for the replacement variant.
620	void readSubdivisionAlias(UResourceBundle* alias,
621	UniqueCharStrings* strings,
622	LocalMemory<const char*>& types,
623	LocalMemory<int32_t>& replacementIndexes,
624	int32_t &length, UErrorCode &status);
625	};
626
627	/**
628	* A class to hold the Alias Data.
629	*/
630	class AliasData : public UMemory {
631	public:
632	static const AliasData* singleton(UErrorCode& status) {
633	if (U_FAILURE(status)) {
634	// Do not get into loadData if the status already has error.
635	return nullptr;
636	}
637	umtx_initOnce(AliasData::gInitOnce, &AliasData::loadData, status);
638	return gSingleton;
639	}
640
641	const CharStringMap& languageMap() const { return language; }
642	const CharStringMap& scriptMap() const { return script; }
643	const CharStringMap& territoryMap() const { return territory; }
644	const CharStringMap& variantMap() const { return variant; }
645	const CharStringMap& subdivisionMap() const { return subdivision; }
646
647	static void U_CALLCONV loadData(UErrorCode &status);
648	static UBool U_CALLCONV cleanup();
649
650	static UInitOnce gInitOnce;
651
652	private:
653	AliasData(CharStringMap languageMap,
654	CharStringMap scriptMap,
655	CharStringMap territoryMap,
656	CharStringMap variantMap,
657	CharStringMap subdivisionMap,
658	CharString* strings)
659	: language (std::move(languageMap)),
660	script (std::move(scriptMap)),
661	territory (std::move(territoryMap)),
662	variant (std::move(variantMap)),
663	subdivision (std::move(subdivisionMap)),
664	strings(strings) {
665	}
666
667	~AliasData() {
668	delete strings;
669	}
670
671	static const AliasData* gSingleton;
672
673	CharStringMap language;
674	CharStringMap script;
675	CharStringMap territory;
676	CharStringMap variant;
677	CharStringMap subdivision;
678	CharString* strings;
679
680	friend class AliasDataBuilder;
681	};
682
683
684	const AliasData* AliasData::gSingleton = nullptr;
685	UInitOnce AliasData::gInitOnce {};
686
687	UBool U_CALLCONV
688	AliasData::cleanup()
689	{
690	gInitOnce.reset();
691	delete gSingleton;
692	return true;
693	}
694
695	void
696	AliasDataBuilder::readAlias(
697	UResourceBundle* alias,
698	UniqueCharStrings* strings,
699	LocalMemory<const char*>& types,
700	LocalMemory<int32_t>& replacementIndexes,
701	int32_t &length,
702	void (checkType)(const* char* type),
703	void (checkReplacement)(const* UnicodeString& replacement),
704	UErrorCode &status) {
705	if (U_FAILURE(status)) {
706	return;
707	}
708	length = ures_getSize(alias);
709	const char** rawTypes = types.allocateInsteadAndCopy(length);
710	if (rawTypes == nullptr) {
711	status = U_MEMORY_ALLOCATION_ERROR;
712	return;
713	}
714	int32_t* rawIndexes = replacementIndexes.allocateInsteadAndCopy(length);
715	if (rawIndexes == nullptr) {
716	status = U_MEMORY_ALLOCATION_ERROR;
717	return;
718	}
719	for (int i = `0`; U_SUCCESS(status) && ures_hasNext(alias); i++) {
720	LocalUResourceBundlePointer res(
721	ures_getNextResource(alias, nullptr, &status));
722	const char* aliasFrom = ures_getKey(res.getAlias());
723	UnicodeString aliasTo =
724	ures_getUnicodeStringByKey(res.getAlias(), "replacement", &status);
725	if (U_FAILURE(status)) return;
726
727	checkType(aliasFrom);
728	checkReplacement(aliasTo);
729
730	rawTypes[i] = aliasFrom;
731	rawIndexes[i] = strings->add(aliasTo, status);
732	}
733	}
734
735	/**
736	* Read the languageAlias data from alias to strings+types+replacementIndexes.
737	* Allocate length items for types, to store the type field. Allocate length
738	* items for replacementIndexes, to store the index in the strings for the
739	* replacement language.
740	*/
741	void
742	AliasDataBuilder::readLanguageAlias(
743	UResourceBundle* alias,
744	UniqueCharStrings* strings,
745	LocalMemory<const char*>& types,
746	LocalMemory<int32_t>& replacementIndexes,
747	int32_t &length,
748	UErrorCode &status)
749	{
750	return readAlias(
751	alias, strings, types, replacementIndexes, length,
752	#if U_DEBUG
753	[](const char* type) {
754	// Assert the aliasFrom only contains the following possibilities
755	// language_REGION_variant
756	// language_REGION
757	// language_variant
758	// language
759	// und_variant
760	Locale test(type);
761	// Assert no script in aliasFrom
762	U_ASSERT(test.getScript()[`0`] == `'\0'`);
763	// Assert when language is und, no REGION in aliasFrom.
764	U_ASSERT(test.getLanguage()[`0`] != `'\0'` \|\| test.getCountry()[`0`] == `'\0'`);
765	},
766	#else
767	[](const char*) {},
768	#endif
769	[](const UnicodeString&) {}, status);
770	}
771
772	/**
773	* Read the scriptAlias data from alias to strings+types+replacementIndexes.
774	* Allocate length items for types, to store the type field. Allocate length
775	* items for replacementIndexes, to store the index in the strings for the
776	* replacement script.
777	*/
778	void
779	AliasDataBuilder::readScriptAlias(
780	UResourceBundle* alias,
781	UniqueCharStrings* strings,
782	LocalMemory<const char*>& types,
783	LocalMemory<int32_t>& replacementIndexes,
784	int32_t &length,
785	UErrorCode &status)
786	{
787	return readAlias(
788	alias, strings, types, replacementIndexes, length,
789	#if U_DEBUG
790	[](const char* type) {
791	U_ASSERT(uprv_strlen(type) == `4`);
792	},
793	[](const UnicodeString& replacement) {
794	U_ASSERT(replacement.length() == `4`);
795	},
796	#else
797	[](const char*) {},
798	[](const UnicodeString&) { },
799	#endif
800	status);
801	}
802
803	/**
804	* Read the territoryAlias data from alias to strings+types+replacementIndexes.
805	* Allocate length items for types, to store the type field. Allocate length
806	* items for replacementIndexes, to store the index in the strings for the
807	* replacement regions.
808	*/
809	void
810	AliasDataBuilder::readTerritoryAlias(
811	UResourceBundle* alias,
812	UniqueCharStrings* strings,
813	LocalMemory<const char*>& types,
814	LocalMemory<int32_t>& replacementIndexes,
815	int32_t &length,
816	UErrorCode &status)
817	{
818	return readAlias(
819	alias, strings, types, replacementIndexes, length,
820	#if U_DEBUG
821	[](const char* type) {
822	U_ASSERT(uprv_strlen(type) == `2` \|\| uprv_strlen(type) == `3`);
823	},
824	#else
825	[](const char*) {},
826	#endif
827	[](const UnicodeString&) { },
828	status);
829	}
830
831	/**
832	* Read the variantAlias data from alias to strings+types+replacementIndexes.
833	* Allocate length items for types, to store the type field. Allocate length
834	* items for replacementIndexes, to store the index in the strings for the
835	* replacement variant.
836	*/
837	void
838	AliasDataBuilder::readVariantAlias(
839	UResourceBundle* alias,
840	UniqueCharStrings* strings,
841	LocalMemory<const char*>& types,
842	LocalMemory<int32_t>& replacementIndexes,
843	int32_t &length,
844	UErrorCode &status)
845	{
846	return readAlias(
847	alias, strings, types, replacementIndexes, length,
848	#if U_DEBUG
849	[](const char* type) {
850	U_ASSERT(uprv_strlen(type) >= `4` && uprv_strlen(type) <= `8`);
851	U_ASSERT(uprv_strlen(type) != `4` \|\|
852	(type[`0`] >= `'0'` && type[`0`] <= `'9'`));
853	},
854	[](const UnicodeString& replacement) {
855	U_ASSERT(replacement.length() >= `4` && replacement.length() <= `8`);
856	U_ASSERT(replacement.length() != `4` \|\|
857	(replacement.charAt(`0`) >= u`'0'` &&
858	replacement.charAt(`0`) <= u`'9'`));
859	},
860	#else
861	[](const char*) {},
862	[](const UnicodeString&) { },
863	#endif
864	status);
865	}
866
867	/**
868	* Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
869	* Allocate length items for types, to store the type field. Allocate length
870	* items for replacementIndexes, to store the index in the strings for the
871	* replacement regions.
872	*/
873	void
874	AliasDataBuilder::readSubdivisionAlias(
875	UResourceBundle* alias,
876	UniqueCharStrings* strings,
877	LocalMemory<const char*>& types,
878	LocalMemory<int32_t>& replacementIndexes,
879	int32_t &length,
880	UErrorCode &status)
881	{
882	return readAlias(
883	alias, strings, types, replacementIndexes, length,
884	#if U_DEBUG
885	[](const char* type) {
886	U_ASSERT(uprv_strlen(type) >= `3` && uprv_strlen(type) <= `8`);
887	},
888	#else
889	[](const char*) {},
890	#endif
891	[](const UnicodeString&) { },
892	status);
893	}
894
895	/**
896	* Initializes the alias data from the ICU resource bundles. The alias data
897	* contains alias of language, country, script and variants.
898	*
899	* If the alias data has already loaded, then this method simply returns without
900	* doing anything meaningful.
901	*/
902	void U_CALLCONV
903	AliasData::loadData(UErrorCode &status)
904	{
905	#ifdef LOCALE_CANONICALIZATION_DEBUG
906	UDate start = uprv_getRawUTCtime();
907	#endif // LOCALE_CANONICALIZATION_DEBUG
908	ucln_common_registerCleanup(UCLN_COMMON_LOCALE_ALIAS, cleanup);
909	AliasDataBuilder builder;
910	gSingleton = builder.build(status);
911	#ifdef LOCALE_CANONICALIZATION_DEBUG
912	UDate end = uprv_getRawUTCtime();
913	printf("AliasData::loadData took total %f ms\n", end - start);
914	#endif // LOCALE_CANONICALIZATION_DEBUG
915	}
916
917	/**
918	* Build the alias data from resources.
919	*/
920	AliasData*
921	AliasDataBuilder::build(UErrorCode &status) {
922	LocalUResourceBundlePointer metadata(
923	ures_openDirect(nullptr, "metadata", &status));
924	LocalUResourceBundlePointer metadataAlias(
925	ures_getByKey(metadata.getAlias(), "alias", nullptr, &status));
926	LocalUResourceBundlePointer languageAlias(
927	ures_getByKey(metadataAlias.getAlias(), "language", nullptr, &status));
928	LocalUResourceBundlePointer scriptAlias(
929	ures_getByKey(metadataAlias.getAlias(), "script", nullptr, &status));
930	LocalUResourceBundlePointer territoryAlias(
931	ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
932	LocalUResourceBundlePointer variantAlias(
933	ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
934	LocalUResourceBundlePointer subdivisionAlias(
935	ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status));
936
937	if (U_FAILURE(status)) {
938	return nullptr;
939	}
940	int32_t languagesLength = `0`, scriptLength = `0`, territoryLength = `0`,
941	variantLength = `0`, subdivisionLength = `0`;
942
943	// Read the languageAlias into languageTypes, languageReplacementIndexes
944	// and strings
945	UniqueCharStrings strings(status);
946	LocalMemory<const char*> languageTypes;
947	LocalMemory<int32_t> languageReplacementIndexes;
948	readLanguageAlias(languageAlias.getAlias(),
949	&strings,
950	languageTypes,
951	languageReplacementIndexes,
952	languagesLength,
953	status);
954
955	// Read the scriptAlias into scriptTypes, scriptReplacementIndexes
956	// and strings
957	LocalMemory<const char*> scriptTypes;
958	LocalMemory<int32_t> scriptReplacementIndexes;
959	readScriptAlias(scriptAlias.getAlias(),
960	&strings,
961	scriptTypes,
962	scriptReplacementIndexes,
963	scriptLength,
964	status);
965
966	// Read the territoryAlias into territoryTypes, territoryReplacementIndexes
967	// and strings
968	LocalMemory<const char*> territoryTypes;
969	LocalMemory<int32_t> territoryReplacementIndexes;
970	readTerritoryAlias(territoryAlias.getAlias(),
971	&strings,
972	territoryTypes,
973	territoryReplacementIndexes,
974	territoryLength, status);
975
976	// Read the variantAlias into variantTypes, variantReplacementIndexes
977	// and strings
978	LocalMemory<const char*> variantTypes;
979	LocalMemory<int32_t> variantReplacementIndexes;
980	readVariantAlias(variantAlias.getAlias(),
981	&strings,
982	variantTypes,
983	variantReplacementIndexes,
984	variantLength, status);
985
986	// Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
987	// and strings
988	LocalMemory<const char*> subdivisionTypes;
989	LocalMemory<int32_t> subdivisionReplacementIndexes;
990	readSubdivisionAlias(subdivisionAlias.getAlias(),
991	&strings,
992	subdivisionTypes,
993	subdivisionReplacementIndexes,
994	subdivisionLength, status);
995
996	if (U_FAILURE(status)) {
997	return nullptr;
998	}
999
1000	// We can only use strings after freeze it.
1001	strings.freeze();
1002
1003	// Build the languageMap from languageTypes & languageReplacementIndexes
1004	CharStringMap languageMap(`490`, status);
1005	for (int32_t i = `0`; U_SUCCESS(status) && i < languagesLength; i++) {
1006	languageMap.put(languageTypes [i],
1007	strings.get(languageReplacementIndexes [i]),
1008	status);
1009	}
1010
1011	// Build the scriptMap from scriptTypes & scriptReplacementIndexes
1012	CharStringMap scriptMap(`1`, status);
1013	for (int32_t i = `0`; U_SUCCESS(status) && i < scriptLength; i++) {
1014	scriptMap.put(scriptTypes [i],
1015	strings.get(scriptReplacementIndexes [i]),
1016	status);
1017	}
1018
1019	// Build the territoryMap from territoryTypes & territoryReplacementIndexes
1020	CharStringMap territoryMap(`650`, status);
1021	for (int32_t i = `0`; U_SUCCESS(status) && i < territoryLength; i++) {
1022	territoryMap.put(territoryTypes [i],
1023	strings.get(territoryReplacementIndexes [i]),
1024	status);
1025	}
1026
1027	// Build the variantMap from variantTypes & variantReplacementIndexes.
1028	CharStringMap variantMap(`2`, status);
1029	for (int32_t i = `0`; U_SUCCESS(status) && i < variantLength; i++) {
1030	variantMap.put(variantTypes [i],
1031	strings.get(variantReplacementIndexes [i]),
1032	status);
1033	}
1034
1035	// Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
1036	CharStringMap subdivisionMap(`2`, status);
1037	for (int32_t i = `0`; U_SUCCESS(status) && i < subdivisionLength; i++) {
1038	subdivisionMap.put(subdivisionTypes [i],
1039	strings.get(subdivisionReplacementIndexes [i]),
1040	status);
1041	}
1042
1043	if (U_FAILURE(status)) {
1044	return nullptr;
1045	}
1046
1047	// copy hashtables
1048	auto data = new* AliasData (
1049	std::move(languageMap),
1050	std::move(scriptMap),
1051	std::move(territoryMap),
1052	std::move(variantMap),
1053	std::move(subdivisionMap),
1054	strings.orphanCharStrings());
1055
1056	if (data == nullptr) {
1057	status = U_MEMORY_ALLOCATION_ERROR;
1058	}
1059	return data;
1060	}
1061
1062	/**
1063	* A class that find the replacement values of locale fields by using AliasData.
1064	*/
1065	class AliasReplacer {
1066	public:
1067	AliasReplacer(UErrorCode status) :
1068	language(nullptr), script(nullptr), region(nullptr),
1069	extensions(nullptr), variants (status),
1070	data(nullptr) {
1071	}
1072	~AliasReplacer() {
1073	}
1074
1075	// Check the fields inside locale, if need to replace fields,
1076	// place the the replaced locale ID in out and return true.
1077	// Otherwise return false for no replacement or error.
1078	bool replace(
1079	const Locale& locale, CharString& out, UErrorCode& status);
1080
1081	private:
1082	const char* language;
1083	const char* script;
1084	const char* region;
1085	const char* extensions;
1086	UVector variants;
1087
1088	const AliasData* data;
1089
1090	inline bool notEmpty(const char* str) {
1091	return str && str[`0`] != NULL_CHAR;
1092	}
1093
1094	/**
1095	* If replacement is neither null nor empty and input is either null or empty,
1096	* return replacement.
1097	* If replacement is neither null nor empty but input is not empty, return input.
1098	* If replacement is either null or empty and type is either null or empty,
1099	* return input.
1100	* Otherwise return null.
1101	* replacement input type return
1102	* AAA nullptr * AAA
1103	* AAA BBB * BBB
1104	* nullptr \|\| "" CCC nullptr CCC
1105	* nullptr \|\| "" * DDD nullptr
1106	*/
1107	inline const char* deleteOrReplace(
1108	const char* input, const char* type, const char* replacement) {
1109	return notEmpty(replacement) ?
1110	((input == nullptr) ? replacement : input) :
1111	((type == nullptr) ? input : nullptr);
1112	}
1113
1114	inline bool same(const char* a, const char* b) {
1115	if (a == nullptr && b == nullptr) {
1116	return true;
1117	}
1118	if ((a == nullptr && b != nullptr) \|\|
1119	(a != nullptr && b == nullptr)) {
1120	return false;
1121	}
1122	return uprv_strcmp(a, b) == `0`;
1123	}
1124
1125	// Gather fields and generate locale ID into out.
1126	CharString& outputToString(CharString& out, UErrorCode status);
1127
1128	// Generate the lookup key.
1129	CharString& generateKey(const char* language, const char* region,
1130	const char* variant, CharString& out,
1131	UErrorCode status);
1132
1133	void parseLanguageReplacement(const char* replacement,
1134	const char*& replaceLanguage,
1135	const char*& replaceScript,
1136	const char*& replaceRegion,
1137	const char*& replaceVariant,
1138	const char*& replaceExtensions,
1139	UVector& toBeFreed,
1140	UErrorCode& status);
1141
1142	// Replace by using languageAlias.
1143	bool replaceLanguage(bool checkLanguage, bool checkRegion,
1144	bool checkVariants, UVector& toBeFreed,
1145	UErrorCode& status);
1146
1147	// Replace by using territoryAlias.
1148	bool replaceTerritory(UVector& toBeFreed, UErrorCode& status);
1149
1150	// Replace by using scriptAlias.
1151	bool replaceScript(UErrorCode& status);
1152
1153	// Replace by using variantAlias.
1154	bool replaceVariant(UErrorCode& status);
1155
1156	// Replace by using subdivisionAlias.
1157	bool replaceSubdivision(StringPiece subdivision,
1158	CharString& output, UErrorCode& status);
1159
1160	// Replace transformed extensions.
1161	bool replaceTransformedExtensions(
1162	CharString& transformedExtensions, CharString& output, UErrorCode& status);
1163	};
1164
1165	CharString&
1166	AliasReplacer::generateKey(
1167	const char* language, const char* region, const char* variant,
1168	CharString& out, UErrorCode status)
1169	{
1170	out.append(language, status);
1171	if (notEmpty(region)) {
1172	out.append(SEP_CHAR, status)
1173	.append(region, status);
1174	}
1175	if (notEmpty(variant)) {
1176	out.append(SEP_CHAR, status)
1177	.append(variant, status);
1178	}
1179	return out;
1180	}
1181
1182	void
1183	AliasReplacer::parseLanguageReplacement(
1184	const char* replacement,
1185	const char*& replacedLanguage,
1186	const char*& replacedScript,
1187	const char*& replacedRegion,
1188	const char*& replacedVariant,
1189	const char*& replacedExtensions,
1190	UVector& toBeFreed,
1191	UErrorCode& status)
1192	{
1193	if (U_FAILURE(status)) {
1194	return;
1195	}
1196	replacedScript = replacedRegion = replacedVariant
1197	= replacedExtensions = nullptr;
1198	if (uprv_strchr(replacement, `'_'`) == nullptr) {
1199	replacedLanguage = replacement;
1200	// reach the end, just return it.
1201	return;
1202	}
1203	// We have multiple field so we have to allocate and parse
1204	CharString* str = new CharString (
1205	replacement, (int32_t)uprv_strlen(replacement), status);
1206	LocalPointer<CharString> lpStr(str, status);
1207	toBeFreed.adoptElement(lpStr.orphan(), status);
1208	if (U_FAILURE(status)) {
1209	return;
1210	}
1211	char* data = str->data();
1212	replacedLanguage = (const char*) data;
1213	char* endOfField = uprv_strchr(data, `'_'`);
1214	endOfField = `'\0'`; // null terminiate it.*
1215	endOfField++;
1216	const char* start = endOfField;
1217	endOfField = (char*) uprv_strchr(start, `'_'`);
1218	size_t len = `0`;
1219	if (endOfField == nullptr) {
1220	len = uprv_strlen(start);
1221	} else {
1222	len = endOfField - start;
1223	endOfField = `'\0'`; // null terminiate it.*
1224	}
1225	if (len == `4` && uprv_isASCIILetter(*start)) {
1226	// Got a script
1227	replacedScript = start;
1228	if (endOfField == nullptr) {
1229	return;
1230	}
1231	start = endOfField++;
1232	endOfField = (char*)uprv_strchr(start, `'_'`);
1233	if (endOfField == nullptr) {
1234	len = uprv_strlen(start);
1235	} else {
1236	len = endOfField - start;
1237	endOfField = `'\0'`; // null terminiate it.*
1238	}
1239	}
1240	if (len >= `2` && len <= `3`) {
1241	// Got a region
1242	replacedRegion = start;
1243	if (endOfField == nullptr) {
1244	return;
1245	}
1246	start = endOfField++;
1247	endOfField = (char*)uprv_strchr(start, `'_'`);
1248	if (endOfField == nullptr) {
1249	len = uprv_strlen(start);
1250	} else {
1251	len = endOfField - start;
1252	endOfField = `'\0'`; // null terminiate it.*
1253	}
1254	}
1255	if (len >= `4`) {
1256	// Got a variant
1257	replacedVariant = start;
1258	if (endOfField == nullptr) {
1259	return;
1260	}
1261	start = endOfField++;
1262	}
1263	replacedExtensions = start;
1264	}
1265
1266	bool
1267	AliasReplacer::replaceLanguage(
1268	bool checkLanguage, bool checkRegion,
1269	bool checkVariants, UVector& toBeFreed, UErrorCode& status)
1270	{
1271	if (U_FAILURE(status)) {
1272	return false;
1273	}
1274	if ( (checkRegion && region == nullptr) \|\|
1275	(checkVariants && variants.size() == `0`)) {
1276	// Nothing to search.
1277	return false;
1278	}
1279	int32_t variant_size = checkVariants ? variants.size() : `1`;
1280	// Since we may have more than one variant, we need to loop through them.
1281	const char* searchLanguage = checkLanguage ? language : "und";
1282	const char* searchRegion = checkRegion ? region : nullptr;
1283	const char* searchVariant = nullptr;
1284	for (int32_t variant_index = `0`;
1285	variant_index < variant_size;
1286	variant_index++) {
1287	if (checkVariants) {
1288	U_ASSERT(variant_index < variant_size);
1289	searchVariant = (const char*)(variants.elementAt(variant_index));
1290	}
1291
1292	if (searchVariant != nullptr && uprv_strlen(searchVariant) < `4`) {
1293	// Do not consider ill-formed variant subtag.
1294	searchVariant = nullptr;
1295	}
1296	CharString typeKey;
1297	generateKey(searchLanguage, searchRegion, searchVariant, typeKey,
1298	status);
1299	if (U_FAILURE(status)) {
1300	return false;
1301	}
1302	const char *replacement = data->languageMap().get(typeKey.data());
1303	if (replacement == nullptr) {
1304	// Found no replacement data.
1305	continue;
1306	}
1307
1308	const char* replacedLanguage = nullptr;
1309	const char* replacedScript = nullptr;
1310	const char* replacedRegion = nullptr;
1311	const char* replacedVariant = nullptr;
1312	const char* replacedExtensions = nullptr;
1313	parseLanguageReplacement(replacement,
1314	replacedLanguage,
1315	replacedScript,
1316	replacedRegion,
1317	replacedVariant,
1318	replacedExtensions,
1319	toBeFreed,
1320	status);
1321	replacedLanguage =
1322	(replacedLanguage != nullptr && uprv_strcmp(replacedLanguage, "und") == `0`) ?
1323	language : replacedLanguage;
1324	replacedScript = deleteOrReplace(script, nullptr, replacedScript);
1325	replacedRegion = deleteOrReplace(region, searchRegion, replacedRegion);
1326	replacedVariant = deleteOrReplace(
1327	searchVariant, searchVariant, replacedVariant);
1328
1329	if ( same(language, replacedLanguage) &&
1330	same(script, replacedScript) &&
1331	same(region, replacedRegion) &&
1332	same(searchVariant, replacedVariant) &&
1333	replacedExtensions == nullptr) {
1334	// Replacement produce no changes.
1335	continue;
1336	}
1337
1338	language = replacedLanguage;
1339	region = replacedRegion;
1340	script = replacedScript;
1341	if (searchVariant != nullptr) {
1342	if (notEmpty(replacedVariant)) {
1343	variants.setElementAt((void*)replacedVariant, variant_index);
1344	} else {
1345	variants.removeElementAt(variant_index);
1346	}
1347	}
1348	if (replacedExtensions != nullptr) {
1349	// DO NOTHING
1350	// UTS35 does not specify what should we do if we have extensions in the
1351	// replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
1352	// extensions in them languageAlias:
1353	// i_default => en_x_i_default
1354	// i_enochian => und_x_i_enochian
1355	// i_mingo => see_x_i_mingo
1356	// zh_min => nan_x_zh_min
1357	// But all of them are already changed by code inside ultag_parse() before
1358	// hitting this code.
1359	}
1360
1361	// Something changed by language alias data.
1362	return true;
1363	}
1364	// Nothing changed by language alias data.
1365	return false;
1366	}
1367
1368	bool
1369	AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
1370	{
1371	if (U_FAILURE(status)) {
1372	return false;
1373	}
1374	if (region == nullptr) {
1375	// No region to search.
1376	return false;
1377	}
1378	const char *replacement = data->territoryMap().get(region);
1379	if (replacement == nullptr) {
1380	// Found no replacement data for this region.
1381	return false;
1382	}
1383	const char* replacedRegion = replacement;
1384	const char* firstSpace = uprv_strchr(replacement, `' '`);
1385	if (firstSpace != nullptr) {
1386	// If there are are more than one region in the replacement.
1387	// We need to check which one match based on the language.
1388	// Cannot use nullptr for language because that will construct
1389	// the default locale, in that case, use "und" to get the correct
1390	// locale.
1391	Locale l = LocaleBuilder ()
1392	.setLanguage(language == nullptr ? "und" : language)
1393	.setScript(script)
1394	.build(status);
1395	l.addLikelySubtags(status);
1396	const char* likelyRegion = l.getCountry();
1397	LocalPointer<CharString> item;
1398	if (likelyRegion != nullptr && uprv_strlen(likelyRegion) > `0`) {
1399	size_t len = uprv_strlen(likelyRegion);
1400	const char* foundInReplacement = uprv_strstr(replacement,
1401	likelyRegion);
1402	if (foundInReplacement != nullptr) {
1403	// Assuming the case there are no three letter region code in
1404	// the replacement of territoryAlias
1405	U_ASSERT(foundInReplacement == replacement \|\|
1406	*(foundInReplacement-`1`) == `' '`);
1407	U_ASSERT(foundInReplacement[len] == `' '` \|\|
1408	foundInReplacement[len] == `'\0'`);
1409	item.adoptInsteadAndCheckErrorCode(
1410	new CharString (foundInReplacement, (int32_t)len, status), status);
1411	}
1412	}
1413	if (item.isNull() && U_SUCCESS(status)) {
1414	item.adoptInsteadAndCheckErrorCode(
1415	new CharString (replacement,
1416	(int32_t)(firstSpace - replacement), status), status);
1417	}
1418	if (U_FAILURE(status)) { return false; }
1419	replacedRegion = item ->data();
1420	toBeFreed.adoptElement(item.orphan(), status);
1421	if (U_FAILURE(status)) { return false; }
1422	}
1423	U_ASSERT(!same(region, replacedRegion));
1424	region = replacedRegion;
1425	// The region is changed by data in territory alias.
1426	return true;
1427	}
1428
1429	bool
1430	AliasReplacer::replaceScript(UErrorCode& status)
1431	{
1432	if (U_FAILURE(status)) {
1433	return false;
1434	}
1435	if (script == nullptr) {
1436	// No script to search.
1437	return false;
1438	}
1439	const char *replacement = data->scriptMap().get(script);
1440	if (replacement == nullptr) {
1441	// Found no replacement data for this script.
1442	return false;
1443	}
1444	U_ASSERT(!same(script, replacement));
1445	script = replacement;
1446	// The script is changed by data in script alias.
1447	return true;
1448	}
1449
1450	bool
1451	AliasReplacer::replaceVariant(UErrorCode& status)
1452	{
1453	if (U_FAILURE(status)) {
1454	return false;
1455	}
1456	// Since we may have more than one variant, we need to loop through them.
1457	for (int32_t i = `0`; i < variants.size(); i++) {
1458	const char variant = (const* char*)(variants.elementAt(i));
1459	const char *replacement = data->variantMap().get(variant);
1460	if (replacement == nullptr) {
1461	// Found no replacement data for this variant.
1462	continue;
1463	}
1464	U_ASSERT((uprv_strlen(replacement) >= `5` &&
1465	uprv_strlen(replacement) <= `8`) \|\|
1466	(uprv_strlen(replacement) == `4` &&
1467	replacement[`0`] >= `'0'` &&
1468	replacement[`0`] <= `'9'`));
1469	if (!same(variant, replacement)) {
1470	variants.setElementAt((void*)replacement, i);
1471	// Special hack to handle hepburn-heploc => alalc97
1472	if (uprv_strcmp(variant, "heploc") == `0`) {
1473	for (int32_t j = `0`; j < variants.size(); j++) {
1474	if (uprv_strcmp((const char*)(variants.elementAt(j)),
1475	"hepburn") == `0`) {
1476	variants.removeElementAt(j);
1477	}
1478	}
1479	}
1480	return true;
1481	}
1482	}
1483	return false;
1484	}
1485
1486	bool
1487	AliasReplacer::replaceSubdivision(
1488	StringPiece subdivision, CharString& output, UErrorCode& status)
1489	{
1490	if (U_FAILURE(status)) {
1491	return false;
1492	}
1493	const char *replacement = data->subdivisionMap().get(subdivision.data());
1494	if (replacement != nullptr) {
1495	const char* firstSpace = uprv_strchr(replacement, `' '`);
1496	// Found replacement data for this subdivision.
1497	size_t len = (firstSpace != nullptr) ?
1498	(firstSpace - replacement) : uprv_strlen(replacement);
1499	if (`2` <= len && len <= `8`) {
1500	output.append(replacement, (int32_t)len, status);
1501	if (`2` == len) {
1502	// Add 'zzzz' based on changes to UTS #35 for CLDR-14312.
1503	output.append("zzzz", `4`, status);
1504	}
1505	}
1506	return true;
1507	}
1508	return false;
1509	}
1510
1511	bool
1512	AliasReplacer::replaceTransformedExtensions(
1513	CharString& transformedExtensions, CharString& output, UErrorCode& status)
1514	{
1515	// The content of the transformedExtensions will be modified in this
1516	// function to NUL-terminating (tkey-tvalue) pairs.
1517	if (U_FAILURE(status)) {
1518	return false;
1519	}
1520	int32_t len = transformedExtensions.length();
1521	const char* str = transformedExtensions.data();
1522	const char* tkey = ultag_getTKeyStart(str);
1523	int32_t tlangLen = (tkey == str) ? `0` :
1524	((tkey == nullptr) ? len : static_cast<int32_t>((tkey - str - `1`)));
1525	CharStringByteSink sink(&output);
1526	if (tlangLen > `0`) {
1527	Locale tlang = LocaleBuilder ()
1528	.setLanguageTag(StringPiece (str, tlangLen))
1529	.build(status);
1530	tlang.canonicalize(status);
1531	tlang.toLanguageTag(sink, status);
1532	if (U_FAILURE(status)) {
1533	return false;
1534	}
1535	T_CString_toLowerCase(output.data());
1536	}
1537	if (tkey != nullptr) {
1538	// We need to sort the tfields by tkey
1539	UVector tfields(status);
1540	if (U_FAILURE(status)) {
1541	return false;
1542	}
1543	do {
1544	const char* tvalue = uprv_strchr(tkey, `'-'`);
1545	if (tvalue == nullptr) {
1546	status = U_ILLEGAL_ARGUMENT_ERROR;
1547	return false;
1548	}
1549	const char* nextTKey = ultag_getTKeyStart(tvalue);
1550	if (nextTKey != nullptr) {
1551	((char*)(nextTKey-`1`)) = `'\0'`; // NUL terminate tvalue*
1552	}
1553	tfields.insertElementAt((void*)tkey, tfields.size(), status);
1554	if (U_FAILURE(status)) {
1555	return false;
1556	}
1557	tkey = nextTKey;
1558	} while (tkey != nullptr);
1559	tfields.sort([](UElement e1, UElement e2) -> int32_t {
1560	return uprv_strcmp((const char)e1.pointer, (const* char*)e2.pointer);
1561	}, status);
1562	for (int32_t i = `0`; i < tfields.size(); i++) {
1563	if (output.length() > `0`) {
1564	output.append(`'-'`, status);
1565	}
1566	const char* tfield = (const char*) tfields.elementAt(i);
1567	const char* tvalue = uprv_strchr(tfield, `'-'`);
1568	if (tvalue == nullptr) {
1569	status = U_ILLEGAL_ARGUMENT_ERROR;
1570	return false;
1571	}
1572	// Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
1573	((char*)tvalue++) = `'\0'`; // NUL terminate tkey*
1574	output.append(tfield, status).append(`'-'`, status);
1575	const char* bcpTValue = ulocimp_toBcpType(tfield, tvalue, nullptr, nullptr);
1576	output.append((bcpTValue == nullptr) ? tvalue : bcpTValue, status);
1577	}
1578	}
1579	if (U_FAILURE(status)) {
1580	return false;
1581	}
1582	return true;
1583	}
1584
1585	CharString&
1586	AliasReplacer::outputToString(
1587	CharString& out, UErrorCode status)
1588	{
1589	out.append(language, status);
1590	if (notEmpty(script)) {
1591	out.append(SEP_CHAR, status)
1592	.append(script, status);
1593	}
1594	if (notEmpty(region)) {
1595	out.append(SEP_CHAR, status)
1596	.append(region, status);
1597	}
1598	if (variants.size() > `0`) {
1599	if (!notEmpty(script) && !notEmpty(region)) {
1600	out.append(SEP_CHAR, status);
1601	}
1602	variants.sort([](UElement e1, UElement e2) -> int32_t {
1603	return uprv_strcmp((const char)e1.pointer, (const* char*)e2.pointer);
1604	}, status);
1605	int32_t variantsStart = out.length();
1606	for (int32_t i = `0`; i < variants.size(); i++) {
1607	out.append(SEP_CHAR, status)
1608	.append((const char*)(variants.elementAt(i)),
1609	status);
1610	}
1611	T_CString_toUpperCase(out.data() + variantsStart);
1612	}
1613	if (notEmpty(extensions)) {
1614	CharString tmp("und_", status);
1615	tmp.append(extensions, status);
1616	Locale tmpLocale(tmp.data());
1617	// only support x extension inside CLDR for now.
1618	U_ASSERT(extensions[`0`] == `'x'`);
1619	out.append(tmpLocale.getName() + `1`, status);
1620	}
1621	return out;
1622	}
1623
1624	bool
1625	AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status)
1626	{
1627	data = AliasData::singleton(status);
1628	if (U_FAILURE(status)) {
1629	return false;
1630	}
1631	U_ASSERT(data != nullptr);
1632	out.clear();
1633	language = locale.getLanguage();
1634	if (!notEmpty(language)) {
1635	language = nullptr;
1636	}
1637	script = locale.getScript();
1638	if (!notEmpty(script)) {
1639	script = nullptr;
1640	}
1641	region = locale.getCountry();
1642	if (!notEmpty(region)) {
1643	region = nullptr;
1644	}
1645	const char* variantsStr = locale.getVariant();
1646	CharString variantsBuff(variantsStr, -`1`, status);
1647	if (!variantsBuff.isEmpty()) {
1648	if (U_FAILURE(status)) { return false; }
1649	char* start = variantsBuff.data();
1650	T_CString_toLowerCase(start);
1651	char* end;
1652	while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr &&
1653	U_SUCCESS(status)) {
1654	end = NULL_CHAR; // null terminate inside variantsBuff*
1655	variants.addElement(start, status);
1656	start = end + `1`;
1657	}
1658	variants.addElement(start, status);
1659	}
1660	if (U_FAILURE(status)) { return false; }
1661
1662	// Sort the variants
1663	variants.sort([](UElement e1, UElement e2) -> int32_t {
1664	return uprv_strcmp((const char)e1.pointer, (const* char*)e2.pointer);
1665	}, status);
1666
1667	// A changed count to assert when loop too many times.
1668	int changed = `0`;
1669	// A UVector to to hold CharString allocated by the replace method*
1670	// and freed when out of scope from his function.
1671	UVector stringsToBeFreed([](void obj){ delete* ((CharString*) obj); },
1672	nullptr, `10`, status);
1673	while (U_SUCCESS(status)) {
1674	// Something wrong with the data cause looping here more than 10 times
1675	// already.
1676	U_ASSERT(changed < `5`);
1677	// From observation of key in data/misc/metadata.txt
1678	// we know currently we only need to search in the following combination
1679	// of fields for type in languageAlias:
1680	// lang_region_variant*
1681	// lang_region*
1682	// lang_variant*
1683	// lang*
1684	// und_variant*
1685	// This assumption is ensured by the U_ASSERT in readLanguageAlias
1686	//
1687	// lang REGION variant
1688	if ( replaceLanguage(true, true, true, stringsToBeFreed, status) \|\|
1689	replaceLanguage(true, true, false, stringsToBeFreed, status) \|\|
1690	replaceLanguage(true, false, true, stringsToBeFreed, status) \|\|
1691	replaceLanguage(true, false, false, stringsToBeFreed, status) \|\|
1692	replaceLanguage(false,false, true, stringsToBeFreed, status) \|\|
1693	replaceTerritory(stringsToBeFreed, status) \|\|
1694	replaceScript(status) \|\|
1695	replaceVariant(status)) {
1696	// Some values in data is changed, try to match from the beginning
1697	// again.
1698	changed++;
1699	continue;
1700	}
1701	// Nothing changed. Break out.
1702	break;
1703	} // while(1)
1704
1705	if (U_FAILURE(status)) { return false; }
1706	// Nothing changed and we know the order of the variants are not change
1707	// because we have no variant or only one.
1708	const char* extensionsStr = locale_getKeywordsStart(locale.getName());
1709	if (changed == `0` && variants.size() <= `1` && extensionsStr == nullptr) {
1710	return false;
1711	}
1712	outputToString(out, status);
1713	if (U_FAILURE(status)) {
1714	return false;
1715	}
1716	if (extensionsStr != nullptr) {
1717	changed = `0`;
1718	Locale temp(locale);
1719	LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
1720	if (U_SUCCESS(status) && !iter.isNull()) {
1721	const char* key;
1722	while ((key = iter ->next(nullptr, status)) != nullptr) {
1723	if (uprv_strcmp("sd", key) == `0` \|\| uprv_strcmp("rg", key) == `0` \|\|
1724	uprv_strcmp("t", key) == `0`) {
1725	CharString value;
1726	CharStringByteSink valueSink(&value);
1727	locale.getKeywordValue(key, valueSink, status);
1728	if (U_FAILURE(status)) {
1729	status = U_ZERO_ERROR;
1730	continue;
1731	}
1732	CharString replacement;
1733	if (uprv_strlen(key) == `2`) {
1734	if (replaceSubdivision(value.toStringPiece(), replacement, status)) {
1735	changed++;
1736	temp.setKeywordValue(key, replacement.data(), status);
1737	}
1738	} else {
1739	U_ASSERT(uprv_strcmp(key, "t") == `0`);
1740	if (replaceTransformedExtensions(value, replacement, status)) {
1741	changed++;
1742	temp.setKeywordValue(key, replacement.data(), status);
1743	}
1744	}
1745	if (U_FAILURE(status)) {
1746	return false;
1747	}
1748	}
1749	}
1750	}
1751	if (changed != `0`) {
1752	extensionsStr = locale_getKeywordsStart(temp.getName());
1753	}
1754	out.append(extensionsStr, status);
1755	}
1756	if (U_FAILURE(status)) {
1757	return false;
1758	}
1759	// If the tag is not changed, return.
1760	if (uprv_strcmp(out.data(), locale.getName()) == `0`) {
1761	out.clear();
1762	return false;
1763	}
1764	return true;
1765	}
1766
1767	// Return true if the locale is changed during canonicalization.
1768	// The replaced value then will be put into out.
1769	bool
1770	canonicalizeLocale(const Locale& locale, CharString& out, UErrorCode& status)
1771	{
1772	AliasReplacer replacer(status);
1773	return replacer.replace(locale, out, status);
1774	}
1775
1776	// Function to optimize for known cases without so we can skip the loading
1777	// of resources in the startup time until we really need it.
1778	bool
1779	isKnownCanonicalizedLocale(const char* locale, UErrorCode& status)
1780	{
1781	if ( uprv_strcmp(locale, "c") == `0` \|\|
1782	uprv_strcmp(locale, "en") == `0` \|\|
1783	uprv_strcmp(locale, "en_US") == `0`) {
1784	return true;
1785	}
1786
1787	// common well-known Canonicalized.
1788	umtx_initOnce(gKnownCanonicalizedInitOnce,
1789	&loadKnownCanonicalized, status);
1790	if (U_FAILURE(status)) {
1791	return false;
1792	}
1793	U_ASSERT(gKnownCanonicalized != nullptr);
1794	return uhash_geti(gKnownCanonicalized, locale) != `0`;
1795	}
1796
1797	} // namespace
1798
1799	// Function for testing.
1800	U_CAPI const char* const*
1801	ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length)
1802	{
1803	*length = UPRV_LENGTHOF(KNOWN_CANONICALIZED);
1804	return KNOWN_CANONICALIZED;
1805	}
1806
1807	// Function for testing.
1808	U_CAPI bool
1809	ulocimp_isCanonicalizedLocaleForTest(const char* localeName)
1810	{
1811	Locale l(localeName);
1812	UErrorCode status = U_ZERO_ERROR;
1813	CharString temp;
1814	return !canonicalizeLocale(l, temp, status) && U_SUCCESS(status);
1815	}
1816
1817	/This function initializes a Locale from a C locale ID/
1818	Locale& Locale::init(const char* localeID, UBool canonicalize)
1819	{
1820	fIsBogus = false;
1821	/ Free our current storage /
1822	if ((baseName != fullName) && (baseName != fullNameBuffer)) {
1823	uprv_free(baseName);
1824	}
1825	baseName = nullptr;
1826	if(fullName != fullNameBuffer) {
1827	uprv_free(fullName);
1828	fullName = fullNameBuffer;
1829	}
1830
1831	// not a loop:
1832	// just an easy way to have a common error-exit
1833	// without goto and without another function
1834	do {
1835	char *separator;
1836	char *field[`5`] = {`0`};
1837	int32_t fieldLen[`5`] = {`0`};
1838	int32_t fieldIdx;
1839	int32_t variantField;
1840	int32_t length;
1841	UErrorCode err;
1842
1843	if(localeID == nullptr) {
1844	// not an error, just set the default locale
1845	return *this = getDefault();
1846	}
1847
1848	/ preset all fields to empty /
1849	language[`0`] = script[`0`] = country[`0`] = `0`;
1850
1851	// "canonicalize" the locale ID to ICU/Java format
1852	err = U_ZERO_ERROR;
1853	length = canonicalize ?
1854	uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) :
1855	uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
1856
1857	if(err == U_BUFFER_OVERFLOW_ERROR \|\| length >= (int32_t)sizeof(fullNameBuffer)) {
1858	U_ASSERT(baseName == nullptr);
1859	/Go to heap for the fullName if necessary/
1860	fullName = (char )uprv_malloc(sizeof(char)(length + `1`));
1861	if(fullName == `0`) {
1862	fullName = fullNameBuffer;
1863	break; // error: out of memory
1864	}
1865	err = U_ZERO_ERROR;
1866	length = canonicalize ?
1867	uloc_canonicalize(localeID, fullName, length+`1`, &err) :
1868	uloc_getName(localeID, fullName, length+`1`, &err);
1869	}
1870	if(U_FAILURE(err) \|\| err == U_STRING_NOT_TERMINATED_WARNING) {
1871	/ should never occur /
1872	break;
1873	}
1874
1875	variantBegin = length;
1876
1877	/ after uloc_getName/canonicalize() we know that only '_' are separators /
1878	/ But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" /
1879	separator = field[`0`] = fullName;
1880	fieldIdx = `1`;
1881	char* at = uprv_strchr(fullName, `'@'`);
1882	while ((separator = uprv_strchr(field[fieldIdx-`1`], SEP_CHAR)) != `0` &&
1883	fieldIdx < UPRV_LENGTHOF(field)-`1` &&
1884	(at == nullptr \|\| separator < at)) {
1885	field[fieldIdx] = separator + `1`;
1886	fieldLen[fieldIdx-`1`] = (int32_t)(separator - field[fieldIdx-`1`]);
1887	fieldIdx++;
1888	}
1889	// variant may contain @foo or .foo POSIX cruft; remove it
1890	separator = uprv_strchr(field[fieldIdx-`1`], `'@'`);
1891	char* sep2 = uprv_strchr(field[fieldIdx-`1`], `'.'`);
1892	if (separator!=nullptr \|\| sep2!=nullptr) {
1893	if (separator==nullptr \|\| (sep2!=nullptr && separator > sep2)) {
1894	separator = sep2;
1895	}
1896	fieldLen[fieldIdx-`1`] = (int32_t)(separator - field[fieldIdx-`1`]);
1897	} else {
1898	fieldLen[fieldIdx-`1`] = length - (int32_t)(field[fieldIdx-`1`] - fullName);
1899	}
1900
1901	if (fieldLen[`0`] >= (int32_t)(sizeof(language)))
1902	{
1903	break; // error: the language field is too long
1904	}
1905
1906	variantField = `1`; / Usually the 2nd one, except when a script or country is also used. /
1907	if (fieldLen[`0`] > `0`) {
1908	/ We have a language /
1909	uprv_memcpy(language, fullName, fieldLen[`0`]);
1910	language[fieldLen[`0`]] = `0`;
1911	}
1912	if (fieldLen[`1`] == `4` && uprv_isASCIILetter(field[`1`][`0`]) &&
1913	uprv_isASCIILetter(field[`1`][`1`]) && uprv_isASCIILetter(field[`1`][`2`]) &&
1914	uprv_isASCIILetter(field[`1`][`3`])) {
1915	/ We have at least a script /
1916	uprv_memcpy(script, field[`1`], fieldLen[`1`]);
1917	script[fieldLen[`1`]] = `0`;
1918	variantField++;
1919	}
1920
1921	if (fieldLen[variantField] == `2` \|\| fieldLen[variantField] == `3`) {
1922	/ We have a country /
1923	uprv_memcpy(country, field[variantField], fieldLen[variantField]);
1924	country[fieldLen[variantField]] = `0`;
1925	variantField++;
1926	} else if (fieldLen[variantField] == `0`) {
1927	variantField++; / script or country empty but variant in next field (i.e. en__POSIX) /
1928	}
1929
1930	if (fieldLen[variantField] > `0`) {
1931	/ We have a variant /
1932	variantBegin = (int32_t)(field[variantField] - fullName);
1933	}
1934
1935	err = U_ZERO_ERROR;
1936	initBaseName(err);
1937	if (U_FAILURE(err)) {
1938	break;
1939	}
1940
1941	if (canonicalize) {
1942	if (!isKnownCanonicalizedLocale(fullName, err)) {
1943	CharString replaced;
1944	// Not sure it is already canonicalized
1945	if (canonicalizeLocale(*this, replaced, err)) {
1946	U_ASSERT(U_SUCCESS(err));
1947	// If need replacement, call init again.
1948	init(replaced.data(), false);
1949	}
1950	if (U_FAILURE(err)) {
1951	break;
1952	}
1953	}
1954	} // if (canonicalize) {
1955
1956	// successful end of init()
1957	return *this;
1958	} while(`0`); /loop doesn't iterate/
1959
1960	// when an error occurs, then set this object to "bogus" (there is no UErrorCode here)
1961	setToBogus();
1962
1963	return *this;
1964	}
1965
1966	/*
1967	* Set up the base name.
1968	* If there are no key words, it's exactly the full name.
1969	* If key words exist, it's the full name truncated at the '@' character.
1970	* Need to set up both at init() and after setting a keyword.
1971	*/
1972	void
1973	Locale::initBaseName(UErrorCode &status) {
1974	if (U_FAILURE(status)) {
1975	return;
1976	}
1977	U_ASSERT(baseName==nullptr \|\| baseName==fullName);
1978	const char *atPtr = uprv_strchr(fullName, `'@'`);
1979	const char *eqPtr = uprv_strchr(fullName, `'='`);
1980	if (atPtr && eqPtr && atPtr < eqPtr) {
1981	// Key words exist.
1982	int32_t baseNameLength = (int32_t)(atPtr - fullName);
1983	baseName = (char *)uprv_malloc(baseNameLength + `1`);
1984	if (baseName == nullptr) {
1985	status = U_MEMORY_ALLOCATION_ERROR;
1986	return;
1987	}
1988	uprv_strncpy(baseName, fullName, baseNameLength);
1989	baseName[baseNameLength] = `0`;
1990
1991	// The original computation of variantBegin leaves it equal to the length
1992	// of fullName if there is no variant. It should instead be
1993	// the length of the baseName.
1994	if (variantBegin > baseNameLength) {
1995	variantBegin = baseNameLength;
1996	}
1997	} else {
1998	baseName = fullName;
1999	}
2000	}
2001
2002
2003	int32_t
2004	Locale::hashCode() const
2005	{
2006	return ustr_hashCharsN(fullName, static_cast<int32_t>(uprv_strlen(fullName)));
2007	}
2008
2009	void
2010	Locale::setToBogus() {
2011	/ Free our current storage /
2012	if((baseName != fullName) && (baseName != fullNameBuffer)) {
2013	uprv_free(baseName);
2014	}
2015	baseName = nullptr;
2016	if(fullName != fullNameBuffer) {
2017	uprv_free(fullName);
2018	fullName = fullNameBuffer;
2019	}
2020	*fullNameBuffer = `0`;
2021	*language = `0`;
2022	*script = `0`;
2023	*country = `0`;
2024	fIsBogus = true;
2025	variantBegin = `0`;
2026	}
2027
2028	const Locale& U_EXPORT2
2029	Locale::getDefault()
2030	{
2031	{
2032	Mutex lock(&gDefaultLocaleMutex);
2033	if (gDefaultLocale != nullptr) {
2034	return *gDefaultLocale;
2035	}
2036	}
2037	UErrorCode status = U_ZERO_ERROR;
2038	return locale_set_default_internal(nullptr*, status);
2039	}
2040
2041
2042
2043	void U_EXPORT2
2044	Locale::setDefault( const Locale& newLocale,
2045	UErrorCode& status)
2046	{
2047	if (U_FAILURE(status)) {
2048	return;
2049	}
2050
2051	/ Set the default from the full name string of the supplied locale.*
2052	* This is a convenient way to access the default locale caching mechanisms.
2053	*/
2054	const char *localeID = newLocale.getName();
2055	locale_set_default_internal(localeID, status);
2056	}
2057
2058	void
2059	Locale::addLikelySubtags(UErrorCode& status) {
2060	if (U_FAILURE(status)) {
2061	return;
2062	}
2063
2064	CharString maximizedLocaleID;
2065	{
2066	CharStringByteSink sink(&maximizedLocaleID);
2067	ulocimp_addLikelySubtags(fullName, sink, &status);
2068	}
2069
2070	if (U_FAILURE(status)) {
2071	return;
2072	}
2073
2074	init(maximizedLocaleID.data(), /canonicalize=/false);
2075	if (isBogus()) {
2076	status = U_ILLEGAL_ARGUMENT_ERROR;
2077	}
2078	}
2079
2080	void
2081	Locale::minimizeSubtags(UErrorCode& status) {
2082	if (U_FAILURE(status)) {
2083	return;
2084	}
2085
2086	CharString minimizedLocaleID;
2087	{
2088	CharStringByteSink sink(&minimizedLocaleID);
2089	ulocimp_minimizeSubtags(fullName, sink, &status);
2090	}
2091
2092	if (U_FAILURE(status)) {
2093	return;
2094	}
2095
2096	init(minimizedLocaleID.data(), /canonicalize=/false);
2097	if (isBogus()) {
2098	status = U_ILLEGAL_ARGUMENT_ERROR;
2099	}
2100	}
2101
2102	void
2103	Locale::canonicalize(UErrorCode& status) {
2104	if (U_FAILURE(status)) {
2105	return;
2106	}
2107	if (isBogus()) {
2108	status = U_ILLEGAL_ARGUMENT_ERROR;
2109	return;
2110	}
2111	CharString uncanonicalized(fullName, status);
2112	if (U_FAILURE(status)) {
2113	return;
2114	}
2115	init(uncanonicalized.data(), /canonicalize=/true);
2116	if (isBogus()) {
2117	status = U_ILLEGAL_ARGUMENT_ERROR;
2118	}
2119	}
2120
2121	Locale U_EXPORT2
2122	Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
2123	{
2124	Locale result(Locale::eBOGUS);
2125
2126	if (U_FAILURE(status)) {
2127	return result;
2128	}
2129
2130	// If a BCP 47 language tag is passed as the language parameter to the
2131	// normal Locale constructor, it will actually fall back to invoking
2132	// uloc_forLanguageTag() to parse it if it somehow is able to detect that
2133	// the string actually is BCP 47. This works well for things like strings
2134	// using BCP 47 extensions, but it does not at all work for things like
2135	// legacy language tags (marked as “Type: grandfathered” in BCP 47,
2136	// e.g., "en-GB-oed") which are possible to also
2137	// interpret as ICU locale IDs and because of that won't trigger the BCP 47
2138	// parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
2139	// and then Locale::init(), instead of just calling the normal constructor.
2140
2141	CharString localeID;
2142	int32_t parsedLength;
2143	{
2144	CharStringByteSink sink(&localeID);
2145	ulocimp_forLanguageTag(
2146	tag.data(),
2147	tag.length(),
2148	sink,
2149	&parsedLength,
2150	&status);
2151	}
2152
2153	if (U_FAILURE(status)) {
2154	return result;
2155	}
2156
2157	if (parsedLength != tag.size()) {
2158	status = U_ILLEGAL_ARGUMENT_ERROR;
2159	return result;
2160	}
2161
2162	result.init(localeID.data(), /canonicalize=/false);
2163	if (result.isBogus()) {
2164	status = U_ILLEGAL_ARGUMENT_ERROR;
2165	}
2166	return result;
2167	}
2168
2169	void
2170	Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
2171	{
2172	if (U_FAILURE(status)) {
2173	return;
2174	}
2175
2176	if (fIsBogus) {
2177	status = U_ILLEGAL_ARGUMENT_ERROR;
2178	return;
2179	}
2180
2181	ulocimp_toLanguageTag(fullName, sink, /strict=/false, &status);
2182	}
2183
2184	Locale U_EXPORT2
2185	Locale::createFromName (const char *name)
2186	{
2187	if (name) {
2188	Locale l("");
2189	l.init(name, false);
2190	return l;
2191	}
2192	else {
2193	return getDefault();
2194	}
2195	}
2196
2197	Locale U_EXPORT2
2198	Locale::createCanonical(const char* name) {
2199	Locale loc("");
2200	loc.init(name, true);
2201	return loc;
2202	}
2203
2204	const char *
2205	Locale::getISO3Language() const
2206	{
2207	return uloc_getISO3Language(fullName);
2208	}
2209
2210
2211	const char *
2212	Locale::getISO3Country() const
2213	{
2214	return uloc_getISO3Country(fullName);
2215	}
2216
2217	/**
2218	* Return the LCID value as specified in the "LocaleID" resource for this
2219	* locale. The LocaleID must be expressed as a hexadecimal number, from
2220	* one to four digits. If the LocaleID resource is not present, or is
2221	* in an incorrect format, 0 is returned. The LocaleID is for use in
2222	* Windows (it is an LCID), but is available on all platforms.
2223	*/
2224	uint32_t
2225	Locale::getLCID() const
2226	{
2227	return uloc_getLCID(fullName);
2228	}
2229
2230	const char* const* U_EXPORT2 Locale::getISOCountries()
2231	{
2232	return uloc_getISOCountries();
2233	}
2234
2235	const char* const* U_EXPORT2 Locale::getISOLanguages()
2236	{
2237	return uloc_getISOLanguages();
2238	}
2239
2240	// Set the locale's data based on a posix id.
2241	void Locale::setFromPOSIXID(const char *posixID)
2242	{
2243	init(posixID, true);
2244	}
2245
2246	const Locale & U_EXPORT2
2247	Locale::getRoot()
2248	{
2249	return getLocale(eROOT);
2250	}
2251
2252	const Locale & U_EXPORT2
2253	Locale::getEnglish()
2254	{
2255	return getLocale(eENGLISH);
2256	}
2257
2258	const Locale & U_EXPORT2
2259	Locale::getFrench()
2260	{
2261	return getLocale(eFRENCH);
2262	}
2263
2264	const Locale & U_EXPORT2
2265	Locale::getGerman()
2266	{
2267	return getLocale(eGERMAN);
2268	}
2269
2270	const Locale & U_EXPORT2
2271	Locale::getItalian()
2272	{
2273	return getLocale(eITALIAN);
2274	}
2275
2276	const Locale & U_EXPORT2
2277	Locale::getJapanese()
2278	{
2279	return getLocale(eJAPANESE);
2280	}
2281
2282	const Locale & U_EXPORT2
2283	Locale::getKorean()
2284	{
2285	return getLocale(eKOREAN);
2286	}
2287
2288	const Locale & U_EXPORT2
2289	Locale::getChinese()
2290	{
2291	return getLocale(eCHINESE);
2292	}
2293
2294	const Locale & U_EXPORT2
2295	Locale::getSimplifiedChinese()
2296	{
2297	return getLocale(eCHINA);
2298	}
2299
2300	const Locale & U_EXPORT2
2301	Locale::getTraditionalChinese()
2302	{
2303	return getLocale(eTAIWAN);
2304	}
2305
2306
2307	const Locale & U_EXPORT2
2308	Locale::getFrance()
2309	{
2310	return getLocale(eFRANCE);
2311	}
2312
2313	const Locale & U_EXPORT2
2314	Locale::getGermany()
2315	{
2316	return getLocale(eGERMANY);
2317	}
2318
2319	const Locale & U_EXPORT2
2320	Locale::getItaly()
2321	{
2322	return getLocale(eITALY);
2323	}
2324
2325	const Locale & U_EXPORT2
2326	Locale::getJapan()
2327	{
2328	return getLocale(eJAPAN);
2329	}
2330
2331	const Locale & U_EXPORT2
2332	Locale::getKorea()
2333	{
2334	return getLocale(eKOREA);
2335	}
2336
2337	const Locale & U_EXPORT2
2338	Locale::getChina()
2339	{
2340	return getLocale(eCHINA);
2341	}
2342
2343	const Locale & U_EXPORT2
2344	Locale::getPRC()
2345	{
2346	return getLocale(eCHINA);
2347	}
2348
2349	const Locale & U_EXPORT2
2350	Locale::getTaiwan()
2351	{
2352	return getLocale(eTAIWAN);
2353	}
2354
2355	const Locale & U_EXPORT2
2356	Locale::getUK()
2357	{
2358	return getLocale(eUK);
2359	}
2360
2361	const Locale & U_EXPORT2
2362	Locale::getUS()
2363	{
2364	return getLocale(eUS);
2365	}
2366
2367	const Locale & U_EXPORT2
2368	Locale::getCanada()
2369	{
2370	return getLocale(eCANADA);
2371	}
2372
2373	const Locale & U_EXPORT2
2374	Locale::getCanadaFrench()
2375	{
2376	return getLocale(eCANADA_FRENCH);
2377	}
2378
2379	const Locale &
2380	Locale::getLocale(int locid)
2381	{
2382	Locale *localeCache = getLocaleCache();
2383	U_ASSERT((locid < eMAX_LOCALES)&&(locid>=`0`));
2384	if (localeCache == nullptr) {
2385	// Failure allocating the locale cache.
2386	// The best we can do is return a nullptr reference.
2387	locid = `0`;
2388	}
2389	return localeCache[locid]; /operating on nullptr/
2390	}
2391
2392	/*
2393	This function is defined this way in order to get around static
2394	initialization and static destruction.
2395	*/
2396	Locale *
2397	Locale::getLocaleCache()
2398	{
2399	UErrorCode status = U_ZERO_ERROR;
2400	umtx_initOnce(gLocaleCacheInitOnce, locale_init, status);
2401	return gLocaleCache;
2402	}
2403
2404	class KeywordEnumeration : public StringEnumeration {
2405	private:
2406	char *keywords;
2407	char *current;
2408	int32_t length;
2409	UnicodeString currUSKey;
2410	static const char fgClassID;/ Warning this is used beyond the typical RTTI usage. /
2411
2412	public:
2413	static UClassID U_EXPORT2 getStaticClassID() { return (UClassID)&fgClassID; }
2414	virtual UClassID getDynamicClassID() const override { return getStaticClassID(); }
2415	public:
2416	KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
2417	: keywords((char )&fgClassID), current((char* *)&fgClassID), length(`0`) {
2418	if(U_SUCCESS(status) && keywordLen != `0`) {
2419	if(keys == nullptr \|\| keywordLen < `0`) {
2420	status = U_ILLEGAL_ARGUMENT_ERROR;
2421	} else {
2422	keywords = (char *)uprv_malloc(keywordLen+`1`);
2423	if (keywords == nullptr) {
2424	status = U_MEMORY_ALLOCATION_ERROR;
2425	}
2426	else {
2427	uprv_memcpy(keywords, keys, keywordLen);
2428	keywords[keywordLen] = `0`;
2429	current = keywords + currentIndex;
2430	length = keywordLen;
2431	}
2432	}
2433	}
2434	}
2435
2436	virtual ~KeywordEnumeration();
2437
2438	virtual StringEnumeration * clone() const override
2439	{
2440	UErrorCode status = U_ZERO_ERROR;
2441	return new KeywordEnumeration (keywords, length, (int32_t)(current - keywords), status);
2442	}
2443
2444	virtual int32_t count(UErrorCode &/status/) const override {
2445	char *kw = keywords;
2446	int32_t result = `0`;
2447	while(*kw) {
2448	result++;
2449	kw += uprv_strlen(kw)+`1`;
2450	}
2451	return result;
2452	}
2453
2454	virtual const char* next(int32_t* resultLength, UErrorCode& status) override {
2455	const char* result;
2456	int32_t len;
2457	if(U_SUCCESS(status) && *current != `0`) {
2458	result = current;
2459	len = (int32_t)uprv_strlen(current);
2460	current += len+`1`;
2461	if(resultLength != nullptr) {
2462	*resultLength = len;
2463	}
2464	} else {
2465	if(resultLength != nullptr) {
2466	*resultLength = `0`;
2467	}
2468	result = nullptr;
2469	}
2470	return result;
2471	}
2472
2473	virtual const UnicodeString* snext(UErrorCode& status) override {
2474	int32_t resultLength = `0`;
2475	const char *s = next(&resultLength, status);
2476	return setChars(s, resultLength, status);
2477	}
2478
2479	virtual void reset(UErrorCode& /status/) override {
2480	current = keywords;
2481	}
2482	};
2483
2484	const char KeywordEnumeration::fgClassID = `'\0'`;
2485
2486	KeywordEnumeration::~KeywordEnumeration() {
2487	uprv_free(keywords);
2488	}
2489
2490	// A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
2491	// the next() method for each keyword before returning it.
2492	class UnicodeKeywordEnumeration : public KeywordEnumeration {
2493	public:
2494	using KeywordEnumeration::KeywordEnumeration;
2495	virtual ~UnicodeKeywordEnumeration();
2496
2497	virtual const char* next(int32_t* resultLength, UErrorCode& status) override {
2498	const char* legacy_key = KeywordEnumeration::next(nullptr, status);
2499	while (U_SUCCESS(status) && legacy_key != nullptr) {
2500	const char* key = uloc_toUnicodeLocaleKey(legacy_key);
2501	if (key != nullptr) {
2502	if (resultLength != nullptr) {
2503	resultLength = static_cast*<int32_t>(uprv_strlen(key));
2504	}
2505	return key;
2506	}
2507	// Not a Unicode keyword, could be a t, x or other, continue to look at the next one.
2508	legacy_key = KeywordEnumeration::next(nullptr, status);
2509	}
2510	if (resultLength != nullptr) *resultLength = `0`;
2511	return nullptr;
2512	}
2513	};
2514
2515	// Out-of-line virtual destructor to serve as the "key function".
2516	UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default;
2517
2518	StringEnumeration *
2519	Locale::createKeywords(UErrorCode &status) const
2520	{
2521	StringEnumeration result = nullptr*;
2522
2523	if (U_FAILURE(status)) {
2524	return result;
2525	}
2526
2527	const char* variantStart = uprv_strchr(fullName, `'@'`);
2528	const char* assignment = uprv_strchr(fullName, `'='`);
2529	if(variantStart) {
2530	if(assignment > variantStart) {
2531	CharString keywords;
2532	CharStringByteSink sink(&keywords);
2533	ulocimp_getKeywords(variantStart+`1`, `'@'`, sink, false, &status);
2534	if (U_SUCCESS(status) && !keywords.isEmpty()) {
2535	result = new KeywordEnumeration (keywords.data(), keywords.length(), `0`, status);
2536	if (!result) {
2537	status = U_MEMORY_ALLOCATION_ERROR;
2538	}
2539	}
2540	} else {
2541	status = U_INVALID_FORMAT_ERROR;
2542	}
2543	}
2544	return result;
2545	}
2546
2547	StringEnumeration *
2548	Locale::createUnicodeKeywords(UErrorCode &status) const
2549	{
2550	StringEnumeration result = nullptr*;
2551
2552	if (U_FAILURE(status)) {
2553	return result;
2554	}
2555
2556	const char* variantStart = uprv_strchr(fullName, `'@'`);
2557	const char* assignment = uprv_strchr(fullName, `'='`);
2558	if(variantStart) {
2559	if(assignment > variantStart) {
2560	CharString keywords;
2561	CharStringByteSink sink(&keywords);
2562	ulocimp_getKeywords(variantStart+`1`, `'@'`, sink, false, &status);
2563	if (U_SUCCESS(status) && !keywords.isEmpty()) {
2564	result = new UnicodeKeywordEnumeration (keywords.data(), keywords.length(), `0`, status);
2565	if (!result) {
2566	status = U_MEMORY_ALLOCATION_ERROR;
2567	}
2568	}
2569	} else {
2570	status = U_INVALID_FORMAT_ERROR;
2571	}
2572	}
2573	return result;
2574	}
2575
2576	int32_t
2577	Locale::getKeywordValue(const char* keywordName, char buffer, int32_t bufLen, UErrorCode &status) const*
2578	{
2579	return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status);
2580	}
2581
2582	void
2583	Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const {
2584	if (U_FAILURE(status)) {
2585	return;
2586	}
2587
2588	if (fIsBogus) {
2589	status = U_ILLEGAL_ARGUMENT_ERROR;
2590	return;
2591	}
2592
2593	// TODO: Remove the need for a const char to a NUL terminated buffer.*
2594	const CharString keywordName_nul(keywordName, status);
2595	if (U_FAILURE(status)) {
2596	return;
2597	}
2598
2599	ulocimp_getKeywordValue(fullName, keywordName_nul.data(), sink, &status);
2600	}
2601
2602	void
2603	Locale::getUnicodeKeywordValue(StringPiece keywordName,
2604	ByteSink& sink,
2605	UErrorCode& status) const {
2606	// TODO: Remove the need for a const char to a NUL terminated buffer.*
2607	const CharString keywordName_nul(keywordName, status);
2608	if (U_FAILURE(status)) {
2609	return;
2610	}
2611
2612	const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
2613
2614	if (legacy_key == nullptr) {
2615	status = U_ILLEGAL_ARGUMENT_ERROR;
2616	return;
2617	}
2618
2619	CharString legacy_value;
2620	{
2621	CharStringByteSink sink(&legacy_value);
2622	getKeywordValue(legacy_key, sink, status);
2623	}
2624
2625	if (U_FAILURE(status)) {
2626	return;
2627	}
2628
2629	const char* unicode_value = uloc_toUnicodeLocaleType(
2630	keywordName_nul.data(), legacy_value.data());
2631
2632	if (unicode_value == nullptr) {
2633	status = U_ILLEGAL_ARGUMENT_ERROR;
2634	return;
2635	}
2636
2637	sink.Append(unicode_value, static_cast<int32_t>(uprv_strlen(unicode_value)));
2638	}
2639
2640	void
2641	Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status)
2642	{
2643	if (U_FAILURE(status)) {
2644	return;
2645	}
2646	if (status == U_STRING_NOT_TERMINATED_WARNING) {
2647	status = U_ZERO_ERROR;
2648	}
2649	int32_t bufferLength = uprv_max((int32_t)(uprv_strlen(fullName) + `1`), ULOC_FULLNAME_CAPACITY);
2650	int32_t newLength = uloc_setKeywordValue(keywordName, keywordValue, fullName,
2651	bufferLength, &status) + `1`;
2652	U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
2653	/ Handle the case the current buffer is not enough to hold the new id /
2654	if (status == U_BUFFER_OVERFLOW_ERROR) {
2655	U_ASSERT(newLength > bufferLength);
2656	char* newFullName = (char *)uprv_malloc(newLength);
2657	if (newFullName == nullptr) {
2658	status = U_MEMORY_ALLOCATION_ERROR;
2659	return;
2660	}
2661	uprv_strcpy(newFullName, fullName);
2662	if (fullName != fullNameBuffer) {
2663	// if full Name is already on the heap, need to free it.
2664	uprv_free(fullName);
2665	if (baseName == fullName) {
2666	baseName = newFullName; // baseName should not point to freed memory.
2667	}
2668	}
2669	fullName = newFullName;
2670	status = U_ZERO_ERROR;
2671	uloc_setKeywordValue(keywordName, keywordValue, fullName, newLength, &status);
2672	U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
2673	} else {
2674	U_ASSERT(newLength <= bufferLength);
2675	}
2676	if (U_SUCCESS(status) && baseName == fullName) {
2677	// May have added the first keyword, meaning that the fullName is no longer also the baseName.
2678	initBaseName(status);
2679	}
2680	}
2681
2682	void
2683	Locale::setKeywordValue(StringPiece keywordName,
2684	StringPiece keywordValue,
2685	UErrorCode& status) {
2686	// TODO: Remove the need for a const char to a NUL terminated buffer.*
2687	const CharString keywordName_nul(keywordName, status);
2688	const CharString keywordValue_nul(keywordValue, status);
2689	setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status);
2690	}
2691
2692	void
2693	Locale::setUnicodeKeywordValue(StringPiece keywordName,
2694	StringPiece keywordValue,
2695	UErrorCode& status) {
2696	// TODO: Remove the need for a const char to a NUL terminated buffer.*
2697	const CharString keywordName_nul(keywordName, status);
2698	const CharString keywordValue_nul(keywordValue, status);
2699
2700	if (U_FAILURE(status)) {
2701	return;
2702	}
2703
2704	const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
2705
2706	if (legacy_key == nullptr) {
2707	status = U_ILLEGAL_ARGUMENT_ERROR;
2708	return;
2709	}
2710
2711	const char* legacy_value = nullptr;
2712
2713	if (!keywordValue_nul.isEmpty()) {
2714	legacy_value =
2715	uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data());
2716
2717	if (legacy_value == nullptr) {
2718	status = U_ILLEGAL_ARGUMENT_ERROR;
2719	return;
2720	}
2721	}
2722
2723	setKeywordValue(legacy_key, legacy_value, status);
2724	}
2725
2726	const char *
2727	Locale::getBaseName() const {
2728	return baseName;
2729	}
2730
2731	Locale::Iterator::~Iterator() = default;
2732
2733	//eof
2734	U_NAMESPACE_END
2735

Browse the source code of Godot/thirdparty/icu4c/common/locid.cpp