1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (C) 2014-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8*/
9#include <algorithm>
10
11#include "unicode/utypes.h"
12#include "unicode/unistr.h"
13#include "unicode/uobject.h"
14
15#include "charstr.h"
16#include "cmemory.h"
17#include "cstring.h"
18#include "uassert.h"
19#include "ucln_cmn.h"
20#include "uhash.h"
21#include "umutex.h"
22#include "uresimp.h"
23#include "uvector.h"
24#include "udataswp.h" /* for InvChar functions */
25
26static UHashtable* gLocExtKeyMap = nullptr;
27static icu::UInitOnce gLocExtKeyMapInitOnce {};
28
29// bit flags for special types
30typedef enum {
31 SPECIALTYPE_NONE = 0,
32 SPECIALTYPE_CODEPOINTS = 1,
33 SPECIALTYPE_REORDER_CODE = 2,
34 SPECIALTYPE_RG_KEY_VALUE = 4
35} SpecialType;
36
37struct LocExtKeyData : public icu::UMemory {
38 const char* legacyId;
39 const char* bcpId;
40 icu::LocalUHashtablePointer typeMap;
41 uint32_t specialTypes;
42};
43
44struct LocExtType : public icu::UMemory {
45 const char* legacyId;
46 const char* bcpId;
47};
48
49static icu::MemoryPool<icu::CharString>* gKeyTypeStringPool = nullptr;
50static icu::MemoryPool<LocExtKeyData>* gLocExtKeyDataEntries = nullptr;
51static icu::MemoryPool<LocExtType>* gLocExtTypeEntries = nullptr;
52
53U_CDECL_BEGIN
54
55static UBool U_CALLCONV
56uloc_key_type_cleanup() {
57 if (gLocExtKeyMap != nullptr) {
58 uhash_close(gLocExtKeyMap);
59 gLocExtKeyMap = nullptr;
60 }
61
62 delete gLocExtKeyDataEntries;
63 gLocExtKeyDataEntries = nullptr;
64
65 delete gLocExtTypeEntries;
66 gLocExtTypeEntries = nullptr;
67
68 delete gKeyTypeStringPool;
69 gKeyTypeStringPool = nullptr;
70
71 gLocExtKeyMapInitOnce.reset();
72 return true;
73}
74
75U_CDECL_END
76
77
78static void U_CALLCONV
79initFromResourceBundle(UErrorCode& sts) {
80 U_NAMESPACE_USE
81 ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KEY_TYPE, uloc_key_type_cleanup);
82
83 gLocExtKeyMap = uhash_open(uhash_hashIChars, uhash_compareIChars, nullptr, &sts);
84
85 LocalUResourceBundlePointer keyTypeDataRes(ures_openDirect(nullptr, "keyTypeData", &sts));
86 LocalUResourceBundlePointer keyMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "keyMap", nullptr, &sts));
87 LocalUResourceBundlePointer typeMapRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeMap", nullptr, &sts));
88
89 if (U_FAILURE(sts)) {
90 return;
91 }
92
93 UErrorCode tmpSts = U_ZERO_ERROR;
94 LocalUResourceBundlePointer typeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "typeAlias", nullptr, &tmpSts));
95 tmpSts = U_ZERO_ERROR;
96 LocalUResourceBundlePointer bcpTypeAliasRes(ures_getByKey(keyTypeDataRes.getAlias(), "bcpTypeAlias", nullptr, &tmpSts));
97
98 // initialize pools storing dynamically allocated objects
99 gKeyTypeStringPool = new icu::MemoryPool<icu::CharString>;
100 if (gKeyTypeStringPool == nullptr) {
101 sts = U_MEMORY_ALLOCATION_ERROR;
102 return;
103 }
104 gLocExtKeyDataEntries = new icu::MemoryPool<LocExtKeyData>;
105 if (gLocExtKeyDataEntries == nullptr) {
106 sts = U_MEMORY_ALLOCATION_ERROR;
107 return;
108 }
109 gLocExtTypeEntries = new icu::MemoryPool<LocExtType>;
110 if (gLocExtTypeEntries == nullptr) {
111 sts = U_MEMORY_ALLOCATION_ERROR;
112 return;
113 }
114
115 // iterate through keyMap resource
116 LocalUResourceBundlePointer keyMapEntry;
117
118 while (ures_hasNext(keyMapRes.getAlias())) {
119 keyMapEntry.adoptInstead(ures_getNextResource(keyMapRes.getAlias(), keyMapEntry.orphan(), &sts));
120 if (U_FAILURE(sts)) {
121 break;
122 }
123 const char* legacyKeyId = ures_getKey(keyMapEntry.getAlias());
124 UnicodeString uBcpKeyId = ures_getUnicodeString(keyMapEntry.getAlias(), &sts);
125 if (U_FAILURE(sts)) {
126 break;
127 }
128
129 // empty value indicates that BCP key is same with the legacy key.
130 const char* bcpKeyId = legacyKeyId;
131 if (!uBcpKeyId.isEmpty()) {
132 icu::CharString* bcpKeyIdBuf = gKeyTypeStringPool->create();
133 if (bcpKeyIdBuf == nullptr) {
134 sts = U_MEMORY_ALLOCATION_ERROR;
135 break;
136 }
137 bcpKeyIdBuf->appendInvariantChars(uBcpKeyId, sts);
138 if (U_FAILURE(sts)) {
139 break;
140 }
141 bcpKeyId = bcpKeyIdBuf->data();
142 }
143
144 UBool isTZ = uprv_strcmp(legacyKeyId, "timezone") == 0;
145
146 UHashtable* typeDataMap = uhash_open(uhash_hashIChars, uhash_compareIChars, nullptr, &sts);
147 if (U_FAILURE(sts)) {
148 break;
149 }
150 uint32_t specialTypes = SPECIALTYPE_NONE;
151
152 LocalUResourceBundlePointer typeAliasResByKey;
153 LocalUResourceBundlePointer bcpTypeAliasResByKey;
154
155 if (typeAliasRes.isValid()) {
156 tmpSts = U_ZERO_ERROR;
157 typeAliasResByKey.adoptInstead(ures_getByKey(typeAliasRes.getAlias(), legacyKeyId, nullptr, &tmpSts));
158 if (U_FAILURE(tmpSts)) {
159 typeAliasResByKey.orphan();
160 }
161 }
162 if (bcpTypeAliasRes.isValid()) {
163 tmpSts = U_ZERO_ERROR;
164 bcpTypeAliasResByKey.adoptInstead(ures_getByKey(bcpTypeAliasRes.getAlias(), bcpKeyId, nullptr, &tmpSts));
165 if (U_FAILURE(tmpSts)) {
166 bcpTypeAliasResByKey.orphan();
167 }
168 }
169
170 // look up type map for the key, and walk through the mapping data
171 LocalUResourceBundlePointer typeMapResByKey(ures_getByKey(typeMapRes.getAlias(), legacyKeyId, nullptr, &sts));
172 if (U_FAILURE(sts)) {
173 // We fail here if typeMap does not have an entry corresponding to every entry in keyMap (should
174 // not happen for valid keyTypeData), or if ures_getByKeyfails fails for some other reason
175 // (e.g. data file cannot be loaded, using stubdata, over-aggressive data filtering has removed
176 // something like timezoneTypes.res, etc.). Error code is already set. See ICU-21669.
177 UPRV_UNREACHABLE_ASSERT;
178 } else {
179 LocalUResourceBundlePointer typeMapEntry;
180
181 while (ures_hasNext(typeMapResByKey.getAlias())) {
182 typeMapEntry.adoptInstead(ures_getNextResource(typeMapResByKey.getAlias(), typeMapEntry.orphan(), &sts));
183 if (U_FAILURE(sts)) {
184 break;
185 }
186 const char* legacyTypeId = ures_getKey(typeMapEntry.getAlias());
187
188 // special types
189 if (uprv_strcmp(legacyTypeId, "CODEPOINTS") == 0) {
190 specialTypes |= SPECIALTYPE_CODEPOINTS;
191 continue;
192 }
193 if (uprv_strcmp(legacyTypeId, "REORDER_CODE") == 0) {
194 specialTypes |= SPECIALTYPE_REORDER_CODE;
195 continue;
196 }
197 if (uprv_strcmp(legacyTypeId, "RG_KEY_VALUE") == 0) {
198 specialTypes |= SPECIALTYPE_RG_KEY_VALUE;
199 continue;
200 }
201
202 if (isTZ) {
203 // a timezone key uses a colon instead of a slash in the resource.
204 // e.g. America:Los_Angeles
205 if (uprv_strchr(legacyTypeId, ':') != nullptr) {
206 icu::CharString* legacyTypeIdBuf =
207 gKeyTypeStringPool->create(legacyTypeId, sts);
208 if (legacyTypeIdBuf == nullptr) {
209 sts = U_MEMORY_ALLOCATION_ERROR;
210 break;
211 }
212 if (U_FAILURE(sts)) {
213 break;
214 }
215 std::replace(
216 legacyTypeIdBuf->data(),
217 legacyTypeIdBuf->data() + legacyTypeIdBuf->length(),
218 ':', '/');
219 legacyTypeId = legacyTypeIdBuf->data();
220 }
221 }
222
223 UnicodeString uBcpTypeId = ures_getUnicodeString(typeMapEntry.getAlias(), &sts);
224 if (U_FAILURE(sts)) {
225 break;
226 }
227
228 // empty value indicates that BCP type is same with the legacy type.
229 const char* bcpTypeId = legacyTypeId;
230 if (!uBcpTypeId.isEmpty()) {
231 icu::CharString* bcpTypeIdBuf = gKeyTypeStringPool->create();
232 if (bcpTypeIdBuf == nullptr) {
233 sts = U_MEMORY_ALLOCATION_ERROR;
234 break;
235 }
236 bcpTypeIdBuf->appendInvariantChars(uBcpTypeId, sts);
237 if (U_FAILURE(sts)) {
238 break;
239 }
240 bcpTypeId = bcpTypeIdBuf->data();
241 }
242
243 // Note: legacy type value should never be
244 // equivalent to bcp type value of a different
245 // type under the same key. So we use a single
246 // map for lookup.
247 LocExtType* t = gLocExtTypeEntries->create();
248 if (t == nullptr) {
249 sts = U_MEMORY_ALLOCATION_ERROR;
250 break;
251 }
252 t->bcpId = bcpTypeId;
253 t->legacyId = legacyTypeId;
254
255 uhash_put(typeDataMap, (void*)legacyTypeId, t, &sts);
256 if (bcpTypeId != legacyTypeId) {
257 // different type value
258 uhash_put(typeDataMap, (void*)bcpTypeId, t, &sts);
259 }
260 if (U_FAILURE(sts)) {
261 break;
262 }
263
264 // also put aliases in the map
265 if (typeAliasResByKey.isValid()) {
266 LocalUResourceBundlePointer typeAliasDataEntry;
267
268 ures_resetIterator(typeAliasResByKey.getAlias());
269 while (ures_hasNext(typeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
270 int32_t toLen;
271 typeAliasDataEntry.adoptInstead(ures_getNextResource(typeAliasResByKey.getAlias(), typeAliasDataEntry.orphan(), &sts));
272 const char16_t* to = ures_getString(typeAliasDataEntry.getAlias(), &toLen, &sts);
273 if (U_FAILURE(sts)) {
274 break;
275 }
276 // check if this is an alias of canonical legacy type
277 if (uprv_compareInvWithUChar(nullptr, legacyTypeId, -1, to, toLen) == 0) {
278 const char* from = ures_getKey(typeAliasDataEntry.getAlias());
279 if (isTZ) {
280 // replace colon with slash if necessary
281 if (uprv_strchr(from, ':') != nullptr) {
282 icu::CharString* fromBuf =
283 gKeyTypeStringPool->create(from, sts);
284 if (fromBuf == nullptr) {
285 sts = U_MEMORY_ALLOCATION_ERROR;
286 break;
287 }
288 if (U_FAILURE(sts)) {
289 break;
290 }
291 std::replace(
292 fromBuf->data(),
293 fromBuf->data() + fromBuf->length(),
294 ':', '/');
295 from = fromBuf->data();
296 }
297 }
298 uhash_put(typeDataMap, (void*)from, t, &sts);
299 }
300 }
301 if (U_FAILURE(sts)) {
302 break;
303 }
304 }
305
306 if (bcpTypeAliasResByKey.isValid()) {
307 LocalUResourceBundlePointer bcpTypeAliasDataEntry;
308
309 ures_resetIterator(bcpTypeAliasResByKey.getAlias());
310 while (ures_hasNext(bcpTypeAliasResByKey.getAlias()) && U_SUCCESS(sts)) {
311 int32_t toLen;
312 bcpTypeAliasDataEntry.adoptInstead(ures_getNextResource(bcpTypeAliasResByKey.getAlias(), bcpTypeAliasDataEntry.orphan(), &sts));
313 const char16_t* to = ures_getString(bcpTypeAliasDataEntry.getAlias(), &toLen, &sts);
314 if (U_FAILURE(sts)) {
315 break;
316 }
317 // check if this is an alias of bcp type
318 if (uprv_compareInvWithUChar(nullptr, bcpTypeId, -1, to, toLen) == 0) {
319 const char* from = ures_getKey(bcpTypeAliasDataEntry.getAlias());
320 uhash_put(typeDataMap, (void*)from, t, &sts);
321 }
322 }
323 if (U_FAILURE(sts)) {
324 break;
325 }
326 }
327 }
328 }
329 if (U_FAILURE(sts)) {
330 break;
331 }
332
333 LocExtKeyData* keyData = gLocExtKeyDataEntries->create();
334 if (keyData == nullptr) {
335 sts = U_MEMORY_ALLOCATION_ERROR;
336 break;
337 }
338 keyData->bcpId = bcpKeyId;
339 keyData->legacyId = legacyKeyId;
340 keyData->specialTypes = specialTypes;
341 keyData->typeMap.adoptInstead(typeDataMap);
342
343 uhash_put(gLocExtKeyMap, (void*)legacyKeyId, keyData, &sts);
344 if (legacyKeyId != bcpKeyId) {
345 // different key value
346 uhash_put(gLocExtKeyMap, (void*)bcpKeyId, keyData, &sts);
347 }
348 if (U_FAILURE(sts)) {
349 break;
350 }
351 }
352}
353
354static UBool
355init() {
356 UErrorCode sts = U_ZERO_ERROR;
357 umtx_initOnce(gLocExtKeyMapInitOnce, &initFromResourceBundle, sts);
358 if (U_FAILURE(sts)) {
359 return false;
360 }
361 return true;
362}
363
364static UBool
365isSpecialTypeCodepoints(const char* val) {
366 int32_t subtagLen = 0;
367 const char* p = val;
368 while (*p) {
369 if (*p == '-') {
370 if (subtagLen < 4 || subtagLen > 6) {
371 return false;
372 }
373 subtagLen = 0;
374 } else if ((*p >= '0' && *p <= '9') ||
375 (*p >= 'A' && *p <= 'F') || // A-F/a-f are contiguous
376 (*p >= 'a' && *p <= 'f')) { // also in EBCDIC
377 subtagLen++;
378 } else {
379 return false;
380 }
381 p++;
382 }
383 return (subtagLen >= 4 && subtagLen <= 6);
384}
385
386static UBool
387isSpecialTypeReorderCode(const char* val) {
388 int32_t subtagLen = 0;
389 const char* p = val;
390 while (*p) {
391 if (*p == '-') {
392 if (subtagLen < 3 || subtagLen > 8) {
393 return false;
394 }
395 subtagLen = 0;
396 } else if (uprv_isASCIILetter(*p)) {
397 subtagLen++;
398 } else {
399 return false;
400 }
401 p++;
402 }
403 return (subtagLen >=3 && subtagLen <=8);
404}
405
406static UBool
407isSpecialTypeRgKeyValue(const char* val) {
408 int32_t subtagLen = 0;
409 const char* p = val;
410 while (*p) {
411 if ( (subtagLen < 2 && uprv_isASCIILetter(*p)) ||
412 (subtagLen >= 2 && (*p == 'Z' || *p == 'z')) ) {
413 subtagLen++;
414 } else {
415 return false;
416 }
417 p++;
418 }
419 return (subtagLen == 6);
420}
421
422U_CFUNC const char*
423ulocimp_toBcpKey(const char* key) {
424 if (!init()) {
425 return nullptr;
426 }
427
428 LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
429 if (keyData != nullptr) {
430 return keyData->bcpId;
431 }
432 return nullptr;
433}
434
435U_CFUNC const char*
436ulocimp_toLegacyKey(const char* key) {
437 if (!init()) {
438 return nullptr;
439 }
440
441 LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
442 if (keyData != nullptr) {
443 return keyData->legacyId;
444 }
445 return nullptr;
446}
447
448U_CFUNC const char*
449ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
450 if (isKnownKey != nullptr) {
451 *isKnownKey = false;
452 }
453 if (isSpecialType != nullptr) {
454 *isSpecialType = false;
455 }
456
457 if (!init()) {
458 return nullptr;
459 }
460
461 LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
462 if (keyData != nullptr) {
463 if (isKnownKey != nullptr) {
464 *isKnownKey = true;
465 }
466 LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type);
467 if (t != nullptr) {
468 return t->bcpId;
469 }
470 if (keyData->specialTypes != SPECIALTYPE_NONE) {
471 UBool matched = false;
472 if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
473 matched = isSpecialTypeCodepoints(type);
474 }
475 if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
476 matched = isSpecialTypeReorderCode(type);
477 }
478 if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
479 matched = isSpecialTypeRgKeyValue(type);
480 }
481 if (matched) {
482 if (isSpecialType != nullptr) {
483 *isSpecialType = true;
484 }
485 return type;
486 }
487 }
488 }
489 return nullptr;
490}
491
492
493U_CFUNC const char*
494ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType) {
495 if (isKnownKey != nullptr) {
496 *isKnownKey = false;
497 }
498 if (isSpecialType != nullptr) {
499 *isSpecialType = false;
500 }
501
502 if (!init()) {
503 return nullptr;
504 }
505
506 LocExtKeyData* keyData = (LocExtKeyData*)uhash_get(gLocExtKeyMap, key);
507 if (keyData != nullptr) {
508 if (isKnownKey != nullptr) {
509 *isKnownKey = true;
510 }
511 LocExtType* t = (LocExtType*)uhash_get(keyData->typeMap.getAlias(), type);
512 if (t != nullptr) {
513 return t->legacyId;
514 }
515 if (keyData->specialTypes != SPECIALTYPE_NONE) {
516 UBool matched = false;
517 if (keyData->specialTypes & SPECIALTYPE_CODEPOINTS) {
518 matched = isSpecialTypeCodepoints(type);
519 }
520 if (!matched && keyData->specialTypes & SPECIALTYPE_REORDER_CODE) {
521 matched = isSpecialTypeReorderCode(type);
522 }
523 if (!matched && keyData->specialTypes & SPECIALTYPE_RG_KEY_VALUE) {
524 matched = isSpecialTypeRgKeyValue(type);
525 }
526 if (matched) {
527 if (isSpecialType != nullptr) {
528 *isSpecialType = true;
529 }
530 return type;
531 }
532 }
533 }
534 return nullptr;
535}
536
537