1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ********************************************************************** |
5 | * Copyright (C) 2014-2016, International Business Machines |
6 | * Corporation and others. All Rights Reserved. |
7 | ********************************************************************** |
8 | */ |
9 | #include <algorithm> |
10 | |
11 | #include "unicode/utypes.h" |
12 | #include "unicode/unistr.h" |
13 | #include "unicode/uobject.h" |
14 | |
15 | #include "charstr.h" |
16 | #include "cmemory.h" |
17 | #include "cstring.h" |
18 | #include "uassert.h" |
19 | #include "ucln_cmn.h" |
20 | #include "uhash.h" |
21 | #include "umutex.h" |
22 | #include "uresimp.h" |
23 | #include "uvector.h" |
24 | #include "udataswp.h" /* for InvChar functions */ |
25 | |
26 | static UHashtable* gLocExtKeyMap = nullptr; |
27 | static icu::UInitOnce gLocExtKeyMapInitOnce {}; |
28 | |
29 | // bit flags for special types |
30 | typedef enum { |
31 | SPECIALTYPE_NONE = 0, |
32 | SPECIALTYPE_CODEPOINTS = 1, |
33 | SPECIALTYPE_REORDER_CODE = 2, |
34 | SPECIALTYPE_RG_KEY_VALUE = 4 |
35 | } SpecialType; |
36 | |
37 | struct LocExtKeyData : public icu::UMemory { |
38 | const char* legacyId; |
39 | const char* bcpId; |
40 | icu::LocalUHashtablePointer typeMap; |
41 | uint32_t specialTypes; |
42 | }; |
43 | |
44 | struct LocExtType : public icu::UMemory { |
45 | const char* legacyId; |
46 | const char* bcpId; |
47 | }; |
48 | |
49 | static icu::MemoryPool<icu::CharString>* gKeyTypeStringPool = nullptr; |
50 | static icu::MemoryPool<LocExtKeyData>* gLocExtKeyDataEntries = nullptr; |
51 | static icu::MemoryPool<LocExtType>* gLocExtTypeEntries = nullptr; |
52 | |
53 | U_CDECL_BEGIN |
54 | |
55 | static UBool U_CALLCONV |
56 | uloc_key_type_cleanup() { |
57 | if (gLocExtKeyMap != nullptr) { |
58 | uhash_close(gLocExtKeyMap); |
59 | gLocExtKeyMap = nullptr; |
60 | } |
61 | |
62 | delete gLocExtKeyDataEntries; |
63 | gLocExtKeyDataEntries = nullptr; |
64 | |
65 | delete gLocExtTypeEntries; |
66 | gLocExtTypeEntries = nullptr; |
67 | |
68 | delete gKeyTypeStringPool; |
69 | gKeyTypeStringPool = nullptr; |
70 | |
71 | gLocExtKeyMapInitOnce.reset(); |
72 | return true; |
73 | } |
74 | |
75 | U_CDECL_END |
76 | |
77 | |
78 | static void U_CALLCONV |
79 | initFromResourceBundle(UErrorCode& sts) { |
80 | U_NAMESPACE_USE |
81 | ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup); |
82 | |
83 | gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, nullptr, &sts); |
84 | |
85 | LocalUResourceBundlePointer keyTypeDataRes(ures_openDirect(nullptr, "keyTypeData" , &sts)); |
86 | LocalUResourceBundlePointer keyMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "keyMap" , nullptr, &sts)); |
87 | LocalUResourceBundlePointer typeMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeMap" , nullptr, &sts)); |
88 | |
89 | if (U_FAILURE(sts)) { |
90 | return; |
91 | } |
92 | |
93 | UErrorCode tmpSts = U_ZERO_ERROR; |
94 | LocalUResourceBundlePointer typeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeAlias" , nullptr, &tmpSts)); |
95 | tmpSts = U_ZERO_ERROR; |
96 | LocalUResourceBundlePointer bcpTypeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "bcpTypeAlias" , nullptr, &tmpSts)); |
97 | |
98 | // initialize pools storing dynamically allocated objects |
99 | gKeyTypeStringPool = new icu::MemoryPool<icu::CharString>; |
100 | if (gKeyTypeStringPool == nullptr) { |
101 | sts = U_MEMORY_ALLOCATION_ERROR; |
102 | return; |
103 | } |
104 | gLocExtKeyDataEntries = new icu::MemoryPool<LocExtKeyData>; |
105 | if (gLocExtKeyDataEntries == nullptr) { |
106 | sts = U_MEMORY_ALLOCATION_ERROR; |
107 | return; |
108 | } |
109 | gLocExtTypeEntries = new icu::MemoryPool<LocExtType>; |
110 | if (gLocExtTypeEntries == nullptr) { |
111 | sts = U_MEMORY_ALLOCATION_ERROR; |
112 | return; |
113 | } |
114 | |
115 | // iterate through keyMap resource |
116 | LocalUResourceBundlePointer keyMapEntry; |
117 | |
118 | while (ures_hasNext(keyMapRes.getAlias())) { |
119 | keyMapEntry.adoptInstead(ures_getNextResource(keyMapRes.getAlias(), keyMapEntry.orphan(), &sts)); |
120 | if (U_FAILURE(sts)) { |
121 | break; |
122 | } |
123 | const char* legacyKeyId = ures_getKey(keyMapEntry.getAlias()); |
124 | UnicodeString uBcpKeyId = ures_getUnicodeString(keyMapEntry.getAlias(), &sts); |
125 | if (U_FAILURE(sts)) { |
126 | break; |
127 | } |
128 | |
129 | // empty value indicates that BCP key is same with the legacy key. |
130 | const char* bcpKeyId = legacyKeyId; |
131 | if (!uBcpKeyId.isEmpty()) { |
132 | icu::CharString* bcpKeyIdBuf = gKeyTypeStringPool->create(); |
133 | if (bcpKeyIdBuf == nullptr) { |
134 | sts = U_MEMORY_ALLOCATION_ERROR; |
135 | break; |
136 | } |
137 | bcpKeyIdBuf->appendInvariantChars(uBcpKeyId, sts); |
138 | if (U_FAILURE(sts)) { |
139 | break; |
140 | } |
141 | bcpKeyId = bcpKeyIdBuf->data(); |
142 | } |
143 | |
144 | UBool isTZ = uprv_strcmp(legacyKeyId, "timezone" ) == 0; |
145 | |
146 | UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, nullptr, &sts); |
147 | if (U_FAILURE(sts)) { |
148 | break; |
149 | } |
150 | uint32_t specialTypes = SPECIALTYPE_NONE; |
151 | |
152 | LocalUResourceBundlePointer typeAliasResByKey; |
153 | LocalUResourceBundlePointer bcpTypeAliasResByKey; |
154 | |
155 | if (typeAliasRes.isValid()) { |
156 | tmpSts = U_ZERO_ERROR; |
157 | typeAliasResByKey.adoptInstead(ures_getByKey(typeAliasRes.getAlias(), legacyKeyId, nullptr, &tmpSts)); |
158 | if (U_FAILURE(tmpSts)) { |
159 | typeAliasResByKey.orphan(); |
160 | } |
161 | } |
162 | if (bcpTypeAliasRes.isValid()) { |
163 | tmpSts = U_ZERO_ERROR; |
164 | bcpTypeAliasResByKey.adoptInstead(ures_getByKey(bcpTypeAliasRes.getAlias(), bcpKeyId, nullptr, &tmpSts)); |
165 | if (U_FAILURE(tmpSts)) { |
166 | bcpTypeAliasResByKey.orphan(); |
167 | } |
168 | } |
169 | |
170 | // look up type map for the key, and walk through the mapping data |
171 | LocalUResourceBundlePointer typeMapResByKey(ures_getByKey(typeMapRes.getAlias(), legacyKeyId, nullptr, &sts)); |
172 | if (U_FAILURE(sts)) { |
173 | // We fail here if typeMap does not have an entry corresponding to every entry in keyMap (should |
174 | // not happen for valid keyTypeData), or if ures_getByKeyfails fails for some other reason |
175 | // (e.g. data file cannot be loaded, using stubdata, over-aggressive data filtering has removed |
176 | // something like timezoneTypes.res, etc.). Error code is already set. See ICU-21669. |
177 | UPRV_UNREACHABLE_ASSERT; |
178 | } else { |
179 | LocalUResourceBundlePointer typeMapEntry; |
180 | |
181 | while (ures_hasNext(typeMapResByKey.getAlias())) { |
182 | typeMapEntry.adoptInstead(ures_getNextResource(typeMapResByKey.getAlias(), typeMapEntry.orphan(), &sts)); |
183 | if (U_FAILURE(sts)) { |
184 | break; |
185 | } |
186 | const char* legacyTypeId = ures_getKey(typeMapEntry.getAlias()); |
187 | |
188 | // special types |
189 | if (uprv_strcmp(legacyTypeId, "CODEPOINTS" ) == 0) { |
190 | specialTypes |= SPECIALTYPE_CODEPOINTS; |
191 | continue; |
192 | } |
193 | if (uprv_strcmp(legacyTypeId, "REORDER_CODE" ) == 0) { |
194 | specialTypes |= SPECIALTYPE_REORDER_CODE; |
195 | continue; |
196 | } |
197 | if (uprv_strcmp(legacyTypeId, "RG_KEY_VALUE" ) == 0) { |
198 | specialTypes |= SPECIALTYPE_RG_KEY_VALUE; |
199 | continue; |
200 | } |
201 | |
202 | if (isTZ) { |
203 | // a timezone key uses a colon instead of a slash in the resource. |
204 | // e.g. America:Los_Angeles |
205 | if (uprv_strchr(legacyTypeId, ':') != nullptr) { |
206 | icu::CharString* legacyTypeIdBuf = |
207 | gKeyTypeStringPool->create(legacyTypeId, sts); |
208 | if (legacyTypeIdBuf == nullptr) { |
209 | sts = U_MEMORY_ALLOCATION_ERROR; |
210 | break; |
211 | } |
212 | if (U_FAILURE(sts)) { |
213 | break; |
214 | } |
215 | std::replace( |
216 | legacyTypeIdBuf->data(), |
217 | legacyTypeIdBuf->data() + legacyTypeIdBuf->length(), |
218 | ':', '/'); |
219 | legacyTypeId = legacyTypeIdBuf->data(); |
220 | } |
221 | } |
222 | |
223 | UnicodeString uBcpTypeId = ures_getUnicodeString(typeMapEntry.getAlias(), &sts); |
224 | if (U_FAILURE(sts)) { |
225 | break; |
226 | } |
227 | |
228 | // empty value indicates that BCP type is same with the legacy type. |
229 | const char* bcpTypeId = legacyTypeId; |
230 | if (!uBcpTypeId.isEmpty()) { |
231 | icu::CharString* bcpTypeIdBuf = gKeyTypeStringPool->create(); |
232 | if (bcpTypeIdBuf == nullptr) { |
233 | sts = U_MEMORY_ALLOCATION_ERROR; |
234 | break; |
235 | } |
236 | bcpTypeIdBuf->appendInvariantChars(uBcpTypeId, sts); |
237 | if (U_FAILURE(sts)) { |
238 | break; |
239 | } |
240 | bcpTypeId = bcpTypeIdBuf->data(); |
241 | } |
242 | |
243 | // Note: legacy type value should never be |
244 | // equivalent to bcp type value of a different |
245 | // type under the same key. So we use a single |
246 | // map for lookup. |
247 | LocExtType* t = gLocExtTypeEntries->create(); |
248 | if (t == nullptr) { |
249 | sts = U_MEMORY_ALLOCATION_ERROR; |
250 | break; |
251 | } |
252 | t->bcpId = bcpTypeId; |
253 | t->legacyId = legacyTypeId; |
254 | |
255 | uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts); |
256 | if (bcpTypeId != legacyTypeId) { |
257 | // different type value |
258 | uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts); |
259 | } |
260 | if (U_FAILURE(sts)) { |
261 | break; |
262 | } |
263 | |
264 | // also put aliases in the map |
265 | if (typeAliasResByKey.isValid()) { |
266 | LocalUResourceBundlePointer typeAliasDataEntry; |
267 | |
268 | ures_resetIterator(typeAliasResByKey.getAlias()); |
269 | while (ures_hasNext(typeAliasResByKey.getAlias()) && U_SUCCESS(sts)) { |
270 | int32_t toLen; |
271 | typeAliasDataEntry.adoptInstead(ures_getNextResource(typeAliasResByKey.getAlias(), typeAliasDataEntry.orphan(), &sts)); |
272 | const char16_t* to = ures_getString(typeAliasDataEntry.getAlias(), &toLen, &sts); |
273 | if (U_FAILURE(sts)) { |
274 | break; |
275 | } |
276 | // check if this is an alias of canonical legacy type |
277 | if (uprv_compareInvWithUChar(nullptr, legacyTypeId, -1, to, toLen) == 0) { |
278 | const char* from = ures_getKey(typeAliasDataEntry.getAlias()); |
279 | if (isTZ) { |
280 | // replace colon with slash if necessary |
281 | if (uprv_strchr(from, ':') != nullptr) { |
282 | icu::CharString* fromBuf = |
283 | gKeyTypeStringPool->create(from, sts); |
284 | if (fromBuf == nullptr) { |
285 | sts = U_MEMORY_ALLOCATION_ERROR; |
286 | break; |
287 | } |
288 | if (U_FAILURE(sts)) { |
289 | break; |
290 | } |
291 | std::replace( |
292 | fromBuf->data(), |
293 | fromBuf->data() + fromBuf->length(), |
294 | ':', '/'); |
295 | from = fromBuf->data(); |
296 | } |
297 | } |
298 | uhash_put(typeDataMap, (void*)from, t, &sts); |
299 | } |
300 | } |
301 | if (U_FAILURE(sts)) { |
302 | break; |
303 | } |
304 | } |
305 | |
306 | if (bcpTypeAliasResByKey.isValid()) { |
307 | LocalUResourceBundlePointer bcpTypeAliasDataEntry; |
308 | |
309 | ures_resetIterator(bcpTypeAliasResByKey.getAlias()); |
310 | while (ures_hasNext(bcpTypeAliasResByKey.getAlias()) && U_SUCCESS(sts)) { |
311 | int32_t toLen; |
312 | bcpTypeAliasDataEntry.adoptInstead(ures_getNextResource(bcpTypeAliasResByKey.getAlias(), bcpTypeAliasDataEntry.orphan(), &sts)); |
313 | const char16_t* to = ures_getString(bcpTypeAliasDataEntry.getAlias(), &toLen, &sts); |
314 | if (U_FAILURE(sts)) { |
315 | break; |
316 | } |
317 | // check if this is an alias of bcp type |
318 | if (uprv_compareInvWithUChar(nullptr, bcpTypeId, -1, to, toLen) == 0) { |
319 | const char* from = ures_getKey(bcpTypeAliasDataEntry.getAlias()); |
320 | uhash_put(typeDataMap, (void*)from, t, &sts); |
321 | } |
322 | } |
323 | if (U_FAILURE(sts)) { |
324 | break; |
325 | } |
326 | } |
327 | } |
328 | } |
329 | if (U_FAILURE(sts)) { |
330 | break; |
331 | } |
332 | |
333 | LocExtKeyData* keyData = gLocExtKeyDataEntries->create(); |
334 | if (keyData == nullptr) { |
335 | sts = U_MEMORY_ALLOCATION_ERROR; |
336 | break; |
337 | } |
338 | keyData->bcpId = bcpKeyId; |
339 | keyData->legacyId = legacyKeyId; |
340 | keyData->specialTypes = specialTypes; |
341 | keyData->typeMap.adoptInstead(typeDataMap); |
342 | |
343 | uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts); |
344 | if (legacyKeyId != bcpKeyId) { |
345 | // different key value |
346 | uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts); |
347 | } |
348 | if (U_FAILURE(sts)) { |
349 | break; |
350 | } |
351 | } |
352 | } |
353 | |
354 | static UBool |
355 | init() { |
356 | UErrorCode sts = U_ZERO_ERROR; |
357 | umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts); |
358 | if (U_FAILURE(sts)) { |
359 | return false; |
360 | } |
361 | return true; |
362 | } |
363 | |
364 | static UBool |
365 | isSpecialTypeCodepoints(const char* val) { |
366 | int32_t subtagLen = 0; |
367 | const char* p = val; |
368 | while (*p) { |
369 | if (*p == '-') { |
370 | if (subtagLen < 4 || subtagLen > 6) { |
371 | return false; |
372 | } |
373 | subtagLen = 0; |
374 | } else if ((*p >= '0' && *p <= '9') || |
375 | (*p >= 'A' && *p <= 'F') || // A-F/a-f are contiguous |
376 | (*p >= 'a' && *p <= 'f')) { // also in EBCDIC |
377 | subtagLen++; |
378 | } else { |
379 | return false; |
380 | } |
381 | p++; |
382 | } |
383 | return (subtagLen >= 4 && subtagLen <= 6); |
384 | } |
385 | |
386 | static UBool |
387 | isSpecialTypeReorderCode(const char* val) { |
388 | int32_t subtagLen = 0; |
389 | const char* p = val; |
390 | while (*p) { |
391 | if (*p == '-') { |
392 | if (subtagLen < 3 || subtagLen > 8) { |
393 | return false; |
394 | } |
395 | subtagLen = 0; |
396 | } else if (uprv_isASCIILetter(*p)) { |
397 | subtagLen++; |
398 | } else { |
399 | return false; |
400 | } |
401 | p++; |
402 | } |
403 | return (subtagLen >=3 && subtagLen <=8); |
404 | } |
405 | |
406 | static UBool |
407 | isSpecialTypeRgKeyValue(const char* val) { |
408 | int32_t subtagLen = 0; |
409 | const char* p = val; |
410 | while (*p) { |
411 | if ( (subtagLen < 2 && uprv_isASCIILetter(*p)) || |
412 | (subtagLen >= 2 && (*p == 'Z' || *p == 'z')) ) { |
413 | subtagLen++; |
414 | } else { |
415 | return false; |
416 | } |
417 | p++; |
418 | } |
419 | return (subtagLen == 6); |
420 | } |
421 | |
422 | U_CFUNC const char* |
423 | ulocimp_toBcpKey(const char* key) { |
424 | if (!init()) { |
425 | return nullptr; |
426 | } |
427 | |
428 | LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key); |
429 | if (keyData != nullptr) { |
430 | return keyData->bcpId; |
431 | } |
432 | return nullptr; |
433 | } |
434 | |
435 | U_CFUNC const char* |
436 | ulocimp_toLegacyKey(const char* key) { |
437 | if (!init()) { |
438 | return nullptr; |
439 | } |
440 | |
441 | LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key); |
442 | if (keyData != nullptr) { |
443 | return keyData->legacyId; |
444 | } |
445 | return nullptr; |
446 | } |
447 | |
448 | U_CFUNC const char* |
449 | ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) { |
450 | if (isKnownKey != nullptr) { |
451 | *isKnownKey = false; |
452 | } |
453 | if (isSpecialType != nullptr) { |
454 | *isSpecialType = false; |
455 | } |
456 | |
457 | if (!init()) { |
458 | return nullptr; |
459 | } |
460 | |
461 | LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key); |
462 | if (keyData != nullptr) { |
463 | if (isKnownKey != nullptr) { |
464 | *isKnownKey = true; |
465 | } |
466 | LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type); |
467 | if (t != nullptr) { |
468 | return t->bcpId; |
469 | } |
470 | if (keyData->specialTypes != SPECIALTYPE_NONE) { |
471 | UBool matched = false; |
472 | if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) { |
473 | matched = isSpecialTypeCodepoints(type); |
474 | } |
475 | if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) { |
476 | matched = isSpecialTypeReorderCode(type); |
477 | } |
478 | if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) { |
479 | matched = isSpecialTypeRgKeyValue(type); |
480 | } |
481 | if (matched) { |
482 | if (isSpecialType != nullptr) { |
483 | *isSpecialType = true; |
484 | } |
485 | return type; |
486 | } |
487 | } |
488 | } |
489 | return nullptr; |
490 | } |
491 | |
492 | |
493 | U_CFUNC const char* |
494 | ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) { |
495 | if (isKnownKey != nullptr) { |
496 | *isKnownKey = false; |
497 | } |
498 | if (isSpecialType != nullptr) { |
499 | *isSpecialType = false; |
500 | } |
501 | |
502 | if (!init()) { |
503 | return nullptr; |
504 | } |
505 | |
506 | LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key); |
507 | if (keyData != nullptr) { |
508 | if (isKnownKey != nullptr) { |
509 | *isKnownKey = true; |
510 | } |
511 | LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type); |
512 | if (t != nullptr) { |
513 | return t->legacyId; |
514 | } |
515 | if (keyData->specialTypes != SPECIALTYPE_NONE) { |
516 | UBool matched = false; |
517 | if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) { |
518 | matched = isSpecialTypeCodepoints(type); |
519 | } |
520 | if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) { |
521 | matched = isSpecialTypeReorderCode(type); |
522 | } |
523 | if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) { |
524 | matched = isSpecialTypeRgKeyValue(type); |
525 | } |
526 | if (matched) { |
527 | if (isSpecialType != nullptr) { |
528 | *isSpecialType = true; |
529 | } |
530 | return type; |
531 | } |
532 | } |
533 | } |
534 | return nullptr; |
535 | } |
536 | |
537 | |