locutil.cpp source code [ClickHouse/contrib/icu/icu4c/source/common/locutil.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	*******************************************************************************
5	* Copyright (C) 2002-2014, International Business Machines Corporation and
6	* others. All Rights Reserved.
7	*******************************************************************************
8	*/
9	#include "unicode/utypes.h"
10
11	#if !UCONFIG_NO_SERVICE \|\| !UCONFIG_NO_TRANSLITERATION
12
13	#include "unicode/resbund.h"
14	#include "unicode/uenum.h"
15	#include "cmemory.h"
16	#include "ustrfmt.h"
17	#include "locutil.h"
18	#include "charstr.h"
19	#include "ucln_cmn.h"
20	#include "uassert.h"
21	#include "umutex.h"
22
23	// see LocaleUtility::getAvailableLocaleNames
24	static icu::UInitOnce LocaleUtilityInitOnce = U_INITONCE_INITIALIZER;
25	static icu::Hashtable * LocaleUtility_cache = NULL;
26
27	#define UNDERSCORE_CHAR ((UChar)0x005f)
28	#define AT_SIGN_CHAR ((UChar)64)
29	#define PERIOD_CHAR ((UChar)46)
30
31	/*
32	******************************************************************
33	*/
34
35	/**
36	* Release all static memory held by Locale Utility.
37	*/
38	U_CDECL_BEGIN
39	static UBool U_CALLCONV service_cleanup(void) {
40	if (LocaleUtility_cache) {
41	delete LocaleUtility_cache;
42	LocaleUtility_cache = NULL;
43	}
44	return TRUE;
45	}
46
47
48	static void U_CALLCONV locale_utility_init(UErrorCode &status) {
49	using namespace icu;
50	U_ASSERT(LocaleUtility_cache == NULL);
51	ucln_common_registerCleanup(UCLN_COMMON_SERVICE, service_cleanup);
52	LocaleUtility_cache = new Hashtable (status);
53	if (U_FAILURE(status)) {
54	delete LocaleUtility_cache;
55	LocaleUtility_cache = NULL;
56	return;
57	}
58	if (LocaleUtility_cache == NULL) {
59	status = U_MEMORY_ALLOCATION_ERROR;
60	return;
61	}
62	LocaleUtility_cache->setValueDeleter(uhash_deleteHashtable);
63	}
64
65	U_CDECL_END
66
67	U_NAMESPACE_BEGIN
68
69	UnicodeString&
70	LocaleUtility::canonicalLocaleString(const UnicodeString* id, UnicodeString& result)
71	{
72	if (id == NULL) {
73	result.setToBogus();
74	} else {
75	// Fix case only (no other changes) up to the first '@' or '.' or
76	// end of string, whichever comes first. In 3.0 I changed this to
77	// stop at first '@' or '.'. It used to run out to the end of
78	// string. My fix makes the tests pass but is probably
79	// structurally incorrect. See below. [alan 3.0]
80
81	// TODO: Doug, you might want to revise this...
82	result = *id;
83	int32_t i = `0`;
84	int32_t end = result.indexOf(AT_SIGN_CHAR);
85	int32_t n = result.indexOf(PERIOD_CHAR);
86	if (n >= `0` && n < end) {
87	end = n;
88	}
89	if (end < `0`) {
90	end = result.length();
91	}
92	n = result.indexOf(UNDERSCORE_CHAR);
93	if (n < `0`) {
94	n = end;
95	}
96	for (; i < n; ++i) {
97	UChar c = result.charAt(i);
98	if (c >= `0x0041` && c <= `0x005a`) {
99	c += `0x20`;
100	result.setCharAt(i, c);
101	}
102	}
103	for (n = end; i < n; ++i) {
104	UChar c = result.charAt(i);
105	if (c >= `0x0061` && c <= `0x007a`) {
106	c -= `0x20`;
107	result.setCharAt(i, c);
108	}
109	}
110	}
111	return result;
112
113	#if 0
114	// This code does a proper full level 2 canonicalization of id.
115	// It's nasty to go from UChar to char to char to UChar -- but
116	// that's what you have to do to use the uloc_canonicalize
117	// function on UnicodeStrings.
118
119	// I ended up doing the alternate fix (see above) not for
120	// performance reasons, although performance will certainly be
121	// better, but because doing a full level 2 canonicalization
122	// causes some tests to fail. [alan 3.0]
123
124	// TODO: Doug, you might want to revisit this...
125	result.setToBogus();
126	if (id != `0`) {
127	int32_t buflen = id->length() + `8`; // space for NUL
128	char* buf = (char*) uprv_malloc(buflen);
129	char* canon = (buf == `0`) ? `0` : (char*) uprv_malloc(buflen);
130	if (buf != `0` && canon != `0`) {
131	U_ASSERT(id->extract(`0`, INT32_MAX, buf, buflen) < buflen);
132	UErrorCode ec = U_ZERO_ERROR;
133	uloc_canonicalize(buf, canon, buflen, &ec);
134	if (U_SUCCESS(ec)) {
135	result = UnicodeString(canon);
136	}
137	}
138	uprv_free(buf);
139	uprv_free(canon);
140	}
141	return result;
142	#endif
143	}
144
145	Locale&
146	LocaleUtility::initLocaleFromName(const UnicodeString& id, Locale& result)
147	{
148	enum { BUFLEN = `128` }; // larger than ever needed
149
150	if (id.isBogus() \|\| id.length() >= BUFLEN) {
151	result.setToBogus();
152	} else {
153	/*
154	* We need to convert from a UnicodeString to char * in order to
155	* create a Locale.
156	*
157	* Problem: Locale ID strings may contain '@' which is a variant
158	* character and cannot be handled by invariant-character conversion.
159	*
160	* Hack: Since ICU code can handle locale IDs with multiple encodings
161	* of '@' (at least for EBCDIC; it's not known to be a problem for
162	* ASCII-based systems),
163	* we use regular invariant-character conversion for everything else
164	* and manually convert U+0040 into a compiler-char-constant '@'.
165	* While this compilation-time constant may not match the runtime
166	* encoding of '@', it should be one of the encodings which ICU
167	* recognizes.
168	*
169	* There should be only at most one '@' in a locale ID.
170	*/
171	char buffer[BUFLEN];
172	int32_t prev, i;
173	prev = `0`;
174	for(;;) {
175	i = id.indexOf((UChar)`0x40`, prev);
176	if(i < `0`) {
177	// no @ between prev and the rest of the string
178	id.extract(prev, INT32_MAX, buffer + prev, BUFLEN - prev, US_INV);
179	break; // done
180	} else {
181	// normal invariant-character conversion for text between @s
182	id.extract(prev, i - prev, buffer + prev, BUFLEN - prev, US_INV);
183	// manually "convert" U+0040 at id[i] into '@' at buffer[i]
184	buffer[i] = `'@'`;
185	prev = i + `1`;
186	}
187	}
188	result = Locale::createFromName(buffer);
189	}
190	return result;
191	}
192
193	UnicodeString&
194	LocaleUtility::initNameFromLocale(const Locale& locale, UnicodeString& result)
195	{
196	if (locale.isBogus()) {
197	result.setToBogus();
198	} else {
199	result.append(UnicodeString (locale.getName(), -`1`, US_INV));
200	}
201	return result;
202	}
203
204	const Hashtable*
205	LocaleUtility::getAvailableLocaleNames(const UnicodeString& bundleID)
206	{
207	// LocaleUtility_cache is a hash-of-hashes. The top-level keys
208	// are path strings ('bundleID') passed to
209	// ures_openAvailableLocales. The top-level values are
210	// second-level hashes. The second-level keys are result strings
211	// from ures_openAvailableLocales. The second-level values are
212	// garbage ((void)1 or other random pointer).*
213
214	UErrorCode status = U_ZERO_ERROR;
215	umtx_initOnce(LocaleUtilityInitOnce, locale_utility_init, status);
216	Hashtable *cache = LocaleUtility_cache;
217	if (cache == NULL) {
218	// Catastrophic failure.
219	return NULL;
220	}
221
222	Hashtable* htp;
223	umtx_lock(NULL);
224	htp = (Hashtable*) cache->get(bundleID);
225	umtx_unlock(NULL);
226
227	if (htp == NULL) {
228	htp = new Hashtable (status);
229	if (htp && U_SUCCESS(status)) {
230	CharString cbundleID;
231	cbundleID.appendInvariantChars(bundleID, status);
232	const char* path = cbundleID.isEmpty() ? NULL : cbundleID.data();
233	icu::LocalUEnumerationPointer uenum(ures_openAvailableLocales(path, &status));
234	for (;;) {
235	const UChar* id = uenum_unext(uenum.getAlias(), NULL, &status);
236	if (id == NULL) {
237	break;
238	}
239	htp->put(UnicodeString (id), (void*)htp, status);
240	}
241	if (U_FAILURE(status)) {
242	delete htp;
243	return NULL;
244	}
245	umtx_lock(NULL);
246	Hashtable t = static_cast<Hashtable >(cache->get(bundleID));
247	if (t != NULL) {
248	// Another thread raced through this code, creating the cache entry first.
249	// Discard ours and return theirs.
250	umtx_unlock(NULL);
251	delete htp;
252	htp = t;
253	} else {
254	cache->put(bundleID, (void*)htp, status);
255	umtx_unlock(NULL);
256	}
257	}
258	}
259	return htp;
260	}
261
262	UBool
263	LocaleUtility::isFallbackOf(const UnicodeString& root, const UnicodeString& child)
264	{
265	return child.indexOf(root) == `0` &&
266	(child.length() == root.length() \|\|
267	child.charAt(root.length()) == UNDERSCORE_CHAR);
268	}
269
270	U_NAMESPACE_END
271
272	/ !UCONFIG_NO_SERVICE /
273	#endif
274
275
276

Browse the source code of ClickHouse/contrib/icu/icu4c/source/common/locutil.cpp