1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4 **********************************************************************
5 * Copyright (C) 1997-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8*
9* File locid.cpp
10*
11* Created by: Richard Gillam
12*
13* Modification History:
14*
15* Date Name Description
16* 02/11/97 aliu Changed gLocPath to fgDataDirectory and added
17* methods to get and set it.
18* 04/02/97 aliu Made operator!= inline; fixed return value
19* of getName().
20* 04/15/97 aliu Cleanup for AIX/Win32.
21* 04/24/97 aliu Numerous changes per code review.
22* 08/18/98 stephen Changed getDisplayName()
23* Added SIMPLIFIED_CHINESE, TRADITIONAL_CHINESE
24* Added getISOCountries(), getISOLanguages(),
25* getLanguagesForCountry()
26* 03/16/99 bertrand rehaul.
27* 07/21/99 stephen Added U_CFUNC setDefault
28* 11/09/99 weiv Added const char * getName() const;
29* 04/12/00 srl removing unicodestring api's and cached hash code
30* 08/10/01 grhoten Change the static Locales to accessor functions
31******************************************************************************
32*/
33
34#include <utility>
35
36#include "unicode/bytestream.h"
37#include "unicode/locid.h"
38#include "unicode/strenum.h"
39#include "unicode/stringpiece.h"
40#include "unicode/uloc.h"
41
42#include "bytesinkutil.h"
43#include "charstr.h"
44#include "cmemory.h"
45#include "cstring.h"
46#include "mutex.h"
47#include "putilimp.h"
48#include "uassert.h"
49#include "ucln_cmn.h"
50#include "uhash.h"
51#include "ulocimp.h"
52#include "umutex.h"
53#include "ustr_imp.h"
54
55U_CDECL_BEGIN
56static UBool U_CALLCONV locale_cleanup(void);
57U_CDECL_END
58
59U_NAMESPACE_BEGIN
60
61static Locale *gLocaleCache = NULL;
62static UInitOnce gLocaleCacheInitOnce = U_INITONCE_INITIALIZER;
63
64// gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale.
65static UMutex gDefaultLocaleMutex;
66static UHashtable *gDefaultLocalesHashT = NULL;
67static Locale *gDefaultLocale = NULL;
68
69/**
70 * \def ULOC_STRING_LIMIT
71 * strings beyond this value crash in CharString
72 */
73#define ULOC_STRING_LIMIT 357913941
74
75U_NAMESPACE_END
76
77typedef enum ELocalePos {
78 eENGLISH,
79 eFRENCH,
80 eGERMAN,
81 eITALIAN,
82 eJAPANESE,
83 eKOREAN,
84 eCHINESE,
85
86 eFRANCE,
87 eGERMANY,
88 eITALY,
89 eJAPAN,
90 eKOREA,
91 eCHINA, /* Alias for PRC */
92 eTAIWAN,
93 eUK,
94 eUS,
95 eCANADA,
96 eCANADA_FRENCH,
97 eROOT,
98
99
100 //eDEFAULT,
101 eMAX_LOCALES
102} ELocalePos;
103
104U_CFUNC int32_t locale_getKeywords(const char *localeID,
105 char prev,
106 char *keywords, int32_t keywordCapacity,
107 char *values, int32_t valuesCapacity, int32_t *valLen,
108 UBool valuesToo,
109 UErrorCode *status);
110
111U_CDECL_BEGIN
112//
113// Deleter function for Locales owned by the default Locale hash table/
114//
115static void U_CALLCONV
116deleteLocale(void *obj) {
117 delete (icu::Locale *) obj;
118}
119
120static UBool U_CALLCONV locale_cleanup(void)
121{
122 U_NAMESPACE_USE
123
124 delete [] gLocaleCache;
125 gLocaleCache = NULL;
126 gLocaleCacheInitOnce.reset();
127
128 if (gDefaultLocalesHashT) {
129 uhash_close(gDefaultLocalesHashT); // Automatically deletes all elements, using deleter func.
130 gDefaultLocalesHashT = NULL;
131 }
132 gDefaultLocale = NULL;
133 return TRUE;
134}
135
136
137static void U_CALLCONV locale_init(UErrorCode &status) {
138 U_NAMESPACE_USE
139
140 U_ASSERT(gLocaleCache == NULL);
141 gLocaleCache = new Locale[(int)eMAX_LOCALES];
142 if (gLocaleCache == NULL) {
143 status = U_MEMORY_ALLOCATION_ERROR;
144 return;
145 }
146 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
147 gLocaleCache[eROOT] = Locale("");
148 gLocaleCache[eENGLISH] = Locale("en");
149 gLocaleCache[eFRENCH] = Locale("fr");
150 gLocaleCache[eGERMAN] = Locale("de");
151 gLocaleCache[eITALIAN] = Locale("it");
152 gLocaleCache[eJAPANESE] = Locale("ja");
153 gLocaleCache[eKOREAN] = Locale("ko");
154 gLocaleCache[eCHINESE] = Locale("zh");
155 gLocaleCache[eFRANCE] = Locale("fr", "FR");
156 gLocaleCache[eGERMANY] = Locale("de", "DE");
157 gLocaleCache[eITALY] = Locale("it", "IT");
158 gLocaleCache[eJAPAN] = Locale("ja", "JP");
159 gLocaleCache[eKOREA] = Locale("ko", "KR");
160 gLocaleCache[eCHINA] = Locale("zh", "CN");
161 gLocaleCache[eTAIWAN] = Locale("zh", "TW");
162 gLocaleCache[eUK] = Locale("en", "GB");
163 gLocaleCache[eUS] = Locale("en", "US");
164 gLocaleCache[eCANADA] = Locale("en", "CA");
165 gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA");
166}
167
168U_CDECL_END
169
170U_NAMESPACE_BEGIN
171
172Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
173 // Synchronize this entire function.
174 Mutex lock(&gDefaultLocaleMutex);
175
176 UBool canonicalize = FALSE;
177
178 // If given a NULL string for the locale id, grab the default
179 // name from the system.
180 // (Different from most other locale APIs, where a null name means use
181 // the current ICU default locale.)
182 if (id == NULL) {
183 id = uprv_getDefaultLocaleID(); // This function not thread safe? TODO: verify.
184 canonicalize = TRUE; // always canonicalize host ID
185 }
186
187 char localeNameBuf[512];
188
189 if (canonicalize) {
190 uloc_canonicalize(id, localeNameBuf, sizeof(localeNameBuf)-1, &status);
191 } else {
192 uloc_getName(id, localeNameBuf, sizeof(localeNameBuf)-1, &status);
193 }
194 localeNameBuf[sizeof(localeNameBuf)-1] = 0; // Force null termination in event of
195 // a long name filling the buffer.
196 // (long names are truncated.)
197 //
198 if (U_FAILURE(status)) {
199 return gDefaultLocale;
200 }
201
202 if (gDefaultLocalesHashT == NULL) {
203 gDefaultLocalesHashT = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
204 if (U_FAILURE(status)) {
205 return gDefaultLocale;
206 }
207 uhash_setValueDeleter(gDefaultLocalesHashT, deleteLocale);
208 ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
209 }
210
211 Locale *newDefault = (Locale *)uhash_get(gDefaultLocalesHashT, localeNameBuf);
212 if (newDefault == NULL) {
213 newDefault = new Locale(Locale::eBOGUS);
214 if (newDefault == NULL) {
215 status = U_MEMORY_ALLOCATION_ERROR;
216 return gDefaultLocale;
217 }
218 newDefault->init(localeNameBuf, FALSE);
219 uhash_put(gDefaultLocalesHashT, (char*) newDefault->getName(), newDefault, &status);
220 if (U_FAILURE(status)) {
221 return gDefaultLocale;
222 }
223 }
224 gDefaultLocale = newDefault;
225 return gDefaultLocale;
226}
227
228U_NAMESPACE_END
229
230/* sfb 07/21/99 */
231U_CFUNC void
232locale_set_default(const char *id)
233{
234 U_NAMESPACE_USE
235 UErrorCode status = U_ZERO_ERROR;
236 locale_set_default_internal(id, status);
237}
238/* end */
239
240U_CFUNC const char *
241locale_get_default(void)
242{
243 U_NAMESPACE_USE
244 return Locale::getDefault().getName();
245}
246
247
248U_NAMESPACE_BEGIN
249
250UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
251
252/*Character separating the posix id fields*/
253// '_'
254// In the platform codepage.
255#define SEP_CHAR '_'
256
257Locale::~Locale()
258{
259 if (baseName != fullName) {
260 uprv_free(baseName);
261 }
262 baseName = NULL;
263 /*if fullName is on the heap, we free it*/
264 if (fullName != fullNameBuffer)
265 {
266 uprv_free(fullName);
267 fullName = NULL;
268 }
269}
270
271Locale::Locale()
272 : UObject(), fullName(fullNameBuffer), baseName(NULL)
273{
274 init(NULL, FALSE);
275}
276
277/*
278 * Internal constructor to allow construction of a locale object with
279 * NO side effects. (Default constructor tries to get
280 * the default locale.)
281 */
282Locale::Locale(Locale::ELocaleType)
283 : UObject(), fullName(fullNameBuffer), baseName(NULL)
284{
285 setToBogus();
286}
287
288
289Locale::Locale( const char * newLanguage,
290 const char * newCountry,
291 const char * newVariant,
292 const char * newKeywords)
293 : UObject(), fullName(fullNameBuffer), baseName(NULL)
294{
295 if( (newLanguage==NULL) && (newCountry == NULL) && (newVariant == NULL) )
296 {
297 init(NULL, FALSE); /* shortcut */
298 }
299 else
300 {
301 UErrorCode status = U_ZERO_ERROR;
302 int32_t size = 0;
303 int32_t lsize = 0;
304 int32_t csize = 0;
305 int32_t vsize = 0;
306 int32_t ksize = 0;
307
308 // Calculate the size of the resulting string.
309
310 // Language
311 if ( newLanguage != NULL )
312 {
313 lsize = (int32_t)uprv_strlen(newLanguage);
314 if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap
315 setToBogus();
316 return;
317 }
318 size = lsize;
319 }
320
321 CharString togo(newLanguage, lsize, status); // start with newLanguage
322
323 // _Country
324 if ( newCountry != NULL )
325 {
326 csize = (int32_t)uprv_strlen(newCountry);
327 if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap
328 setToBogus();
329 return;
330 }
331 size += csize;
332 }
333
334 // _Variant
335 if ( newVariant != NULL )
336 {
337 // remove leading _'s
338 while(newVariant[0] == SEP_CHAR)
339 {
340 newVariant++;
341 }
342
343 // remove trailing _'s
344 vsize = (int32_t)uprv_strlen(newVariant);
345 if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap
346 setToBogus();
347 return;
348 }
349 while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) )
350 {
351 vsize--;
352 }
353 }
354
355 if( vsize > 0 )
356 {
357 size += vsize;
358 }
359
360 // Separator rules:
361 if ( vsize > 0 )
362 {
363 size += 2; // at least: __v
364 }
365 else if ( csize > 0 )
366 {
367 size += 1; // at least: _v
368 }
369
370 if ( newKeywords != NULL)
371 {
372 ksize = (int32_t)uprv_strlen(newKeywords);
373 if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) {
374 setToBogus();
375 return;
376 }
377 size += ksize + 1;
378 }
379
380 // NOW we have the full locale string..
381 // Now, copy it back.
382
383 // newLanguage is already copied
384
385 if ( ( vsize != 0 ) || (csize != 0) ) // at least: __v
386 { // ^
387 togo.append(SEP_CHAR, status);
388 }
389
390 if ( csize != 0 )
391 {
392 togo.append(newCountry, status);
393 }
394
395 if ( vsize != 0)
396 {
397 togo.append(SEP_CHAR, status)
398 .append(newVariant, vsize, status);
399 }
400
401 if ( ksize != 0)
402 {
403 if (uprv_strchr(newKeywords, '=')) {
404 togo.append('@', status); /* keyword parsing */
405 }
406 else {
407 togo.append('_', status); /* Variant parsing with a script */
408 if ( vsize == 0) {
409 togo.append('_', status); /* No country found */
410 }
411 }
412 togo.append(newKeywords, status);
413 }
414
415 if (U_FAILURE(status)) {
416 // Something went wrong with appending, etc.
417 setToBogus();
418 return;
419 }
420 // Parse it, because for example 'language' might really be a complete
421 // string.
422 init(togo.data(), FALSE);
423 }
424}
425
426Locale::Locale(const Locale &other)
427 : UObject(other), fullName(fullNameBuffer), baseName(NULL)
428{
429 *this = other;
430}
431
432Locale::Locale(Locale&& other) U_NOEXCEPT
433 : UObject(other), fullName(fullNameBuffer), baseName(fullName) {
434 *this = std::move(other);
435}
436
437Locale& Locale::operator=(const Locale& other) {
438 if (this == &other) {
439 return *this;
440 }
441
442 setToBogus();
443
444 if (other.fullName == other.fullNameBuffer) {
445 uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
446 } else if (other.fullName == nullptr) {
447 fullName = nullptr;
448 } else {
449 fullName = uprv_strdup(other.fullName);
450 if (fullName == nullptr) return *this;
451 }
452
453 if (other.baseName == other.fullName) {
454 baseName = fullName;
455 } else if (other.baseName != nullptr) {
456 baseName = uprv_strdup(other.baseName);
457 if (baseName == nullptr) return *this;
458 }
459
460 uprv_strcpy(language, other.language);
461 uprv_strcpy(script, other.script);
462 uprv_strcpy(country, other.country);
463
464 variantBegin = other.variantBegin;
465 fIsBogus = other.fIsBogus;
466
467 return *this;
468}
469
470Locale& Locale::operator=(Locale&& other) U_NOEXCEPT {
471 if (baseName != fullName) uprv_free(baseName);
472 if (fullName != fullNameBuffer) uprv_free(fullName);
473
474 if (other.fullName == other.fullNameBuffer) {
475 uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
476 fullName = fullNameBuffer;
477 } else {
478 fullName = other.fullName;
479 }
480
481 if (other.baseName == other.fullName) {
482 baseName = fullName;
483 } else {
484 baseName = other.baseName;
485 }
486
487 uprv_strcpy(language, other.language);
488 uprv_strcpy(script, other.script);
489 uprv_strcpy(country, other.country);
490
491 variantBegin = other.variantBegin;
492 fIsBogus = other.fIsBogus;
493
494 other.baseName = other.fullName = other.fullNameBuffer;
495
496 return *this;
497}
498
499Locale *
500Locale::clone() const {
501 return new Locale(*this);
502}
503
504UBool
505Locale::operator==( const Locale& other) const
506{
507 return (uprv_strcmp(other.fullName, fullName) == 0);
508}
509
510#define ISASCIIALPHA(c) (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
511
512/*This function initializes a Locale from a C locale ID*/
513Locale& Locale::init(const char* localeID, UBool canonicalize)
514{
515 fIsBogus = FALSE;
516 /* Free our current storage */
517 if (baseName != fullName) {
518 uprv_free(baseName);
519 }
520 baseName = NULL;
521 if(fullName != fullNameBuffer) {
522 uprv_free(fullName);
523 fullName = fullNameBuffer;
524 }
525
526 // not a loop:
527 // just an easy way to have a common error-exit
528 // without goto and without another function
529 do {
530 char *separator;
531 char *field[5] = {0};
532 int32_t fieldLen[5] = {0};
533 int32_t fieldIdx;
534 int32_t variantField;
535 int32_t length;
536 UErrorCode err;
537
538 if(localeID == NULL) {
539 // not an error, just set the default locale
540 return *this = getDefault();
541 }
542
543 /* preset all fields to empty */
544 language[0] = script[0] = country[0] = 0;
545
546 // "canonicalize" the locale ID to ICU/Java format
547 err = U_ZERO_ERROR;
548 length = canonicalize ?
549 uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) :
550 uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
551
552 if(err == U_BUFFER_OVERFLOW_ERROR || length >= (int32_t)sizeof(fullNameBuffer)) {
553 /*Go to heap for the fullName if necessary*/
554 fullName = (char *)uprv_malloc(sizeof(char)*(length + 1));
555 if(fullName == 0) {
556 fullName = fullNameBuffer;
557 break; // error: out of memory
558 }
559 err = U_ZERO_ERROR;
560 length = canonicalize ?
561 uloc_canonicalize(localeID, fullName, length+1, &err) :
562 uloc_getName(localeID, fullName, length+1, &err);
563 }
564 if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) {
565 /* should never occur */
566 break;
567 }
568
569 variantBegin = length;
570
571 /* after uloc_getName/canonicalize() we know that only '_' are separators */
572 /* But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" */
573 separator = field[0] = fullName;
574 fieldIdx = 1;
575 char* at = uprv_strchr(fullName, '@');
576 while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != 0 &&
577 fieldIdx < UPRV_LENGTHOF(field)-1 &&
578 (at == nullptr || separator < at)) {
579 field[fieldIdx] = separator + 1;
580 fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
581 fieldIdx++;
582 }
583 // variant may contain @foo or .foo POSIX cruft; remove it
584 separator = uprv_strchr(field[fieldIdx-1], '@');
585 char* sep2 = uprv_strchr(field[fieldIdx-1], '.');
586 if (separator!=NULL || sep2!=NULL) {
587 if (separator==NULL || (sep2!=NULL && separator > sep2)) {
588 separator = sep2;
589 }
590 fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]);
591 } else {
592 fieldLen[fieldIdx-1] = length - (int32_t)(field[fieldIdx-1] - fullName);
593 }
594
595 if (fieldLen[0] >= (int32_t)(sizeof(language)))
596 {
597 break; // error: the language field is too long
598 }
599
600 variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */
601 if (fieldLen[0] > 0) {
602 /* We have a language */
603 uprv_memcpy(language, fullName, fieldLen[0]);
604 language[fieldLen[0]] = 0;
605 }
606 if (fieldLen[1] == 4 && ISASCIIALPHA(field[1][0]) &&
607 ISASCIIALPHA(field[1][1]) && ISASCIIALPHA(field[1][2]) &&
608 ISASCIIALPHA(field[1][3])) {
609 /* We have at least a script */
610 uprv_memcpy(script, field[1], fieldLen[1]);
611 script[fieldLen[1]] = 0;
612 variantField++;
613 }
614
615 if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) {
616 /* We have a country */
617 uprv_memcpy(country, field[variantField], fieldLen[variantField]);
618 country[fieldLen[variantField]] = 0;
619 variantField++;
620 } else if (fieldLen[variantField] == 0) {
621 variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */
622 }
623
624 if (fieldLen[variantField] > 0) {
625 /* We have a variant */
626 variantBegin = (int32_t)(field[variantField] - fullName);
627 }
628
629 err = U_ZERO_ERROR;
630 initBaseName(err);
631 if (U_FAILURE(err)) {
632 break;
633 }
634
635 // successful end of init()
636 return *this;
637 } while(0); /*loop doesn't iterate*/
638
639 // when an error occurs, then set this object to "bogus" (there is no UErrorCode here)
640 setToBogus();
641
642 return *this;
643}
644
645/*
646 * Set up the base name.
647 * If there are no key words, it's exactly the full name.
648 * If key words exist, it's the full name truncated at the '@' character.
649 * Need to set up both at init() and after setting a keyword.
650 */
651void
652Locale::initBaseName(UErrorCode &status) {
653 if (U_FAILURE(status)) {
654 return;
655 }
656 U_ASSERT(baseName==NULL || baseName==fullName);
657 const char *atPtr = uprv_strchr(fullName, '@');
658 const char *eqPtr = uprv_strchr(fullName, '=');
659 if (atPtr && eqPtr && atPtr < eqPtr) {
660 // Key words exist.
661 int32_t baseNameLength = (int32_t)(atPtr - fullName);
662 baseName = (char *)uprv_malloc(baseNameLength + 1);
663 if (baseName == NULL) {
664 status = U_MEMORY_ALLOCATION_ERROR;
665 return;
666 }
667 uprv_strncpy(baseName, fullName, baseNameLength);
668 baseName[baseNameLength] = 0;
669
670 // The original computation of variantBegin leaves it equal to the length
671 // of fullName if there is no variant. It should instead be
672 // the length of the baseName.
673 if (variantBegin > baseNameLength) {
674 variantBegin = baseNameLength;
675 }
676 } else {
677 baseName = fullName;
678 }
679}
680
681
682int32_t
683Locale::hashCode() const
684{
685 return ustr_hashCharsN(fullName, static_cast<int32_t>(uprv_strlen(fullName)));
686}
687
688void
689Locale::setToBogus() {
690 /* Free our current storage */
691 if(baseName != fullName) {
692 uprv_free(baseName);
693 }
694 baseName = NULL;
695 if(fullName != fullNameBuffer) {
696 uprv_free(fullName);
697 fullName = fullNameBuffer;
698 }
699 *fullNameBuffer = 0;
700 *language = 0;
701 *script = 0;
702 *country = 0;
703 fIsBogus = TRUE;
704 variantBegin = 0;
705}
706
707const Locale& U_EXPORT2
708Locale::getDefault()
709{
710 {
711 Mutex lock(&gDefaultLocaleMutex);
712 if (gDefaultLocale != NULL) {
713 return *gDefaultLocale;
714 }
715 }
716 UErrorCode status = U_ZERO_ERROR;
717 return *locale_set_default_internal(NULL, status);
718}
719
720
721
722void U_EXPORT2
723Locale::setDefault( const Locale& newLocale,
724 UErrorCode& status)
725{
726 if (U_FAILURE(status)) {
727 return;
728 }
729
730 /* Set the default from the full name string of the supplied locale.
731 * This is a convenient way to access the default locale caching mechanisms.
732 */
733 const char *localeID = newLocale.getName();
734 locale_set_default_internal(localeID, status);
735}
736
737void
738Locale::addLikelySubtags(UErrorCode& status) {
739 if (U_FAILURE(status)) {
740 return;
741 }
742
743 CharString maximizedLocaleID;
744 {
745 CharStringByteSink sink(&maximizedLocaleID);
746 ulocimp_addLikelySubtags(fullName, sink, &status);
747 }
748
749 if (U_FAILURE(status)) {
750 return;
751 }
752
753 init(maximizedLocaleID.data(), /*canonicalize=*/FALSE);
754 if (isBogus()) {
755 status = U_ILLEGAL_ARGUMENT_ERROR;
756 }
757}
758
759void
760Locale::minimizeSubtags(UErrorCode& status) {
761 if (U_FAILURE(status)) {
762 return;
763 }
764
765 CharString minimizedLocaleID;
766 {
767 CharStringByteSink sink(&minimizedLocaleID);
768 ulocimp_minimizeSubtags(fullName, sink, &status);
769 }
770
771 if (U_FAILURE(status)) {
772 return;
773 }
774
775 init(minimizedLocaleID.data(), /*canonicalize=*/FALSE);
776 if (isBogus()) {
777 status = U_ILLEGAL_ARGUMENT_ERROR;
778 }
779}
780
781Locale U_EXPORT2
782Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
783{
784 Locale result(Locale::eBOGUS);
785
786 if (U_FAILURE(status)) {
787 return result;
788 }
789
790 // If a BCP-47 language tag is passed as the language parameter to the
791 // normal Locale constructor, it will actually fall back to invoking
792 // uloc_forLanguageTag() to parse it if it somehow is able to detect that
793 // the string actually is BCP-47. This works well for things like strings
794 // using BCP-47 extensions, but it does not at all work for things like
795 // BCP-47 grandfathered tags (eg. "en-GB-oed") which are possible to also
796 // interpret as ICU locale IDs and because of that won't trigger the BCP-47
797 // parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
798 // and then Locale::init(), instead of just calling the normal constructor.
799
800 CharString localeID;
801 int32_t parsedLength;
802 {
803 CharStringByteSink sink(&localeID);
804 ulocimp_forLanguageTag(
805 tag.data(),
806 tag.length(),
807 sink,
808 &parsedLength,
809 &status);
810 }
811
812 if (U_FAILURE(status)) {
813 return result;
814 }
815
816 if (parsedLength != tag.size()) {
817 status = U_ILLEGAL_ARGUMENT_ERROR;
818 return result;
819 }
820
821 result.init(localeID.data(), /*canonicalize=*/FALSE);
822 if (result.isBogus()) {
823 status = U_ILLEGAL_ARGUMENT_ERROR;
824 }
825 return result;
826}
827
828void
829Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
830{
831 if (U_FAILURE(status)) {
832 return;
833 }
834
835 if (fIsBogus) {
836 status = U_ILLEGAL_ARGUMENT_ERROR;
837 return;
838 }
839
840 ulocimp_toLanguageTag(fullName, sink, /*strict=*/FALSE, &status);
841}
842
843Locale U_EXPORT2
844Locale::createFromName (const char *name)
845{
846 if (name) {
847 Locale l("");
848 l.init(name, FALSE);
849 return l;
850 }
851 else {
852 return getDefault();
853 }
854}
855
856Locale U_EXPORT2
857Locale::createCanonical(const char* name) {
858 Locale loc("");
859 loc.init(name, TRUE);
860 return loc;
861}
862
863const char *
864Locale::getISO3Language() const
865{
866 return uloc_getISO3Language(fullName);
867}
868
869
870const char *
871Locale::getISO3Country() const
872{
873 return uloc_getISO3Country(fullName);
874}
875
876/**
877 * Return the LCID value as specified in the "LocaleID" resource for this
878 * locale. The LocaleID must be expressed as a hexadecimal number, from
879 * one to four digits. If the LocaleID resource is not present, or is
880 * in an incorrect format, 0 is returned. The LocaleID is for use in
881 * Windows (it is an LCID), but is available on all platforms.
882 */
883uint32_t
884Locale::getLCID() const
885{
886 return uloc_getLCID(fullName);
887}
888
889const char* const* U_EXPORT2 Locale::getISOCountries()
890{
891 return uloc_getISOCountries();
892}
893
894const char* const* U_EXPORT2 Locale::getISOLanguages()
895{
896 return uloc_getISOLanguages();
897}
898
899// Set the locale's data based on a posix id.
900void Locale::setFromPOSIXID(const char *posixID)
901{
902 init(posixID, TRUE);
903}
904
905const Locale & U_EXPORT2
906Locale::getRoot(void)
907{
908 return getLocale(eROOT);
909}
910
911const Locale & U_EXPORT2
912Locale::getEnglish(void)
913{
914 return getLocale(eENGLISH);
915}
916
917const Locale & U_EXPORT2
918Locale::getFrench(void)
919{
920 return getLocale(eFRENCH);
921}
922
923const Locale & U_EXPORT2
924Locale::getGerman(void)
925{
926 return getLocale(eGERMAN);
927}
928
929const Locale & U_EXPORT2
930Locale::getItalian(void)
931{
932 return getLocale(eITALIAN);
933}
934
935const Locale & U_EXPORT2
936Locale::getJapanese(void)
937{
938 return getLocale(eJAPANESE);
939}
940
941const Locale & U_EXPORT2
942Locale::getKorean(void)
943{
944 return getLocale(eKOREAN);
945}
946
947const Locale & U_EXPORT2
948Locale::getChinese(void)
949{
950 return getLocale(eCHINESE);
951}
952
953const Locale & U_EXPORT2
954Locale::getSimplifiedChinese(void)
955{
956 return getLocale(eCHINA);
957}
958
959const Locale & U_EXPORT2
960Locale::getTraditionalChinese(void)
961{
962 return getLocale(eTAIWAN);
963}
964
965
966const Locale & U_EXPORT2
967Locale::getFrance(void)
968{
969 return getLocale(eFRANCE);
970}
971
972const Locale & U_EXPORT2
973Locale::getGermany(void)
974{
975 return getLocale(eGERMANY);
976}
977
978const Locale & U_EXPORT2
979Locale::getItaly(void)
980{
981 return getLocale(eITALY);
982}
983
984const Locale & U_EXPORT2
985Locale::getJapan(void)
986{
987 return getLocale(eJAPAN);
988}
989
990const Locale & U_EXPORT2
991Locale::getKorea(void)
992{
993 return getLocale(eKOREA);
994}
995
996const Locale & U_EXPORT2
997Locale::getChina(void)
998{
999 return getLocale(eCHINA);
1000}
1001
1002const Locale & U_EXPORT2
1003Locale::getPRC(void)
1004{
1005 return getLocale(eCHINA);
1006}
1007
1008const Locale & U_EXPORT2
1009Locale::getTaiwan(void)
1010{
1011 return getLocale(eTAIWAN);
1012}
1013
1014const Locale & U_EXPORT2
1015Locale::getUK(void)
1016{
1017 return getLocale(eUK);
1018}
1019
1020const Locale & U_EXPORT2
1021Locale::getUS(void)
1022{
1023 return getLocale(eUS);
1024}
1025
1026const Locale & U_EXPORT2
1027Locale::getCanada(void)
1028{
1029 return getLocale(eCANADA);
1030}
1031
1032const Locale & U_EXPORT2
1033Locale::getCanadaFrench(void)
1034{
1035 return getLocale(eCANADA_FRENCH);
1036}
1037
1038const Locale &
1039Locale::getLocale(int locid)
1040{
1041 Locale *localeCache = getLocaleCache();
1042 U_ASSERT((locid < eMAX_LOCALES)&&(locid>=0));
1043 if (localeCache == NULL) {
1044 // Failure allocating the locale cache.
1045 // The best we can do is return a NULL reference.
1046 locid = 0;
1047 }
1048 return localeCache[locid]; /*operating on NULL*/
1049}
1050
1051/*
1052This function is defined this way in order to get around static
1053initialization and static destruction.
1054 */
1055Locale *
1056Locale::getLocaleCache(void)
1057{
1058 UErrorCode status = U_ZERO_ERROR;
1059 umtx_initOnce(gLocaleCacheInitOnce, locale_init, status);
1060 return gLocaleCache;
1061}
1062
1063class KeywordEnumeration : public StringEnumeration {
1064private:
1065 char *keywords;
1066 char *current;
1067 int32_t length;
1068 UnicodeString currUSKey;
1069 static const char fgClassID;/* Warning this is used beyond the typical RTTI usage. */
1070
1071public:
1072 static UClassID U_EXPORT2 getStaticClassID(void) { return (UClassID)&fgClassID; }
1073 virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }
1074public:
1075 KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
1076 : keywords((char *)&fgClassID), current((char *)&fgClassID), length(0) {
1077 if(U_SUCCESS(status) && keywordLen != 0) {
1078 if(keys == NULL || keywordLen < 0) {
1079 status = U_ILLEGAL_ARGUMENT_ERROR;
1080 } else {
1081 keywords = (char *)uprv_malloc(keywordLen+1);
1082 if (keywords == NULL) {
1083 status = U_MEMORY_ALLOCATION_ERROR;
1084 }
1085 else {
1086 uprv_memcpy(keywords, keys, keywordLen);
1087 keywords[keywordLen] = 0;
1088 current = keywords + currentIndex;
1089 length = keywordLen;
1090 }
1091 }
1092 }
1093 }
1094
1095 virtual ~KeywordEnumeration();
1096
1097 virtual StringEnumeration * clone() const
1098 {
1099 UErrorCode status = U_ZERO_ERROR;
1100 return new KeywordEnumeration(keywords, length, (int32_t)(current - keywords), status);
1101 }
1102
1103 virtual int32_t count(UErrorCode &/*status*/) const {
1104 char *kw = keywords;
1105 int32_t result = 0;
1106 while(*kw) {
1107 result++;
1108 kw += uprv_strlen(kw)+1;
1109 }
1110 return result;
1111 }
1112
1113 virtual const char* next(int32_t* resultLength, UErrorCode& status) {
1114 const char* result;
1115 int32_t len;
1116 if(U_SUCCESS(status) && *current != 0) {
1117 result = current;
1118 len = (int32_t)uprv_strlen(current);
1119 current += len+1;
1120 if(resultLength != NULL) {
1121 *resultLength = len;
1122 }
1123 } else {
1124 if(resultLength != NULL) {
1125 *resultLength = 0;
1126 }
1127 result = NULL;
1128 }
1129 return result;
1130 }
1131
1132 virtual const UnicodeString* snext(UErrorCode& status) {
1133 int32_t resultLength = 0;
1134 const char *s = next(&resultLength, status);
1135 return setChars(s, resultLength, status);
1136 }
1137
1138 virtual void reset(UErrorCode& /*status*/) {
1139 current = keywords;
1140 }
1141};
1142
1143const char KeywordEnumeration::fgClassID = '\0';
1144
1145KeywordEnumeration::~KeywordEnumeration() {
1146 uprv_free(keywords);
1147}
1148
1149// A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
1150// the next() method for each keyword before returning it.
1151class UnicodeKeywordEnumeration : public KeywordEnumeration {
1152public:
1153 using KeywordEnumeration::KeywordEnumeration;
1154 virtual ~UnicodeKeywordEnumeration();
1155
1156 virtual const char* next(int32_t* resultLength, UErrorCode& status) {
1157 const char* legacy_key = KeywordEnumeration::next(nullptr, status);
1158 if (U_SUCCESS(status) && legacy_key != nullptr) {
1159 const char* key = uloc_toUnicodeLocaleKey(legacy_key);
1160 if (key == nullptr) {
1161 status = U_ILLEGAL_ARGUMENT_ERROR;
1162 } else {
1163 if (resultLength != nullptr) {
1164 *resultLength = static_cast<int32_t>(uprv_strlen(key));
1165 }
1166 return key;
1167 }
1168 }
1169 if (resultLength != nullptr) *resultLength = 0;
1170 return nullptr;
1171 }
1172};
1173
1174// Out-of-line virtual destructor to serve as the "key function".
1175UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default;
1176
1177StringEnumeration *
1178Locale::createKeywords(UErrorCode &status) const
1179{
1180 char keywords[256];
1181 int32_t keywordCapacity = sizeof keywords;
1182 StringEnumeration *result = NULL;
1183
1184 if (U_FAILURE(status)) {
1185 return result;
1186 }
1187
1188 const char* variantStart = uprv_strchr(fullName, '@');
1189 const char* assignment = uprv_strchr(fullName, '=');
1190 if(variantStart) {
1191 if(assignment > variantStart) {
1192 int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status);
1193 if(U_SUCCESS(status) && keyLen) {
1194 result = new KeywordEnumeration(keywords, keyLen, 0, status);
1195 if (!result) {
1196 status = U_MEMORY_ALLOCATION_ERROR;
1197 }
1198 }
1199 } else {
1200 status = U_INVALID_FORMAT_ERROR;
1201 }
1202 }
1203 return result;
1204}
1205
1206StringEnumeration *
1207Locale::createUnicodeKeywords(UErrorCode &status) const
1208{
1209 char keywords[256];
1210 int32_t keywordCapacity = sizeof keywords;
1211 StringEnumeration *result = NULL;
1212
1213 if (U_FAILURE(status)) {
1214 return result;
1215 }
1216
1217 const char* variantStart = uprv_strchr(fullName, '@');
1218 const char* assignment = uprv_strchr(fullName, '=');
1219 if(variantStart) {
1220 if(assignment > variantStart) {
1221 int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status);
1222 if(U_SUCCESS(status) && keyLen) {
1223 result = new UnicodeKeywordEnumeration(keywords, keyLen, 0, status);
1224 if (!result) {
1225 status = U_MEMORY_ALLOCATION_ERROR;
1226 }
1227 }
1228 } else {
1229 status = U_INVALID_FORMAT_ERROR;
1230 }
1231 }
1232 return result;
1233}
1234
1235int32_t
1236Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const
1237{
1238 return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status);
1239}
1240
1241void
1242Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const {
1243 if (U_FAILURE(status)) {
1244 return;
1245 }
1246
1247 if (fIsBogus) {
1248 status = U_ILLEGAL_ARGUMENT_ERROR;
1249 return;
1250 }
1251
1252 // TODO: Remove the need for a const char* to a NUL terminated buffer.
1253 const CharString keywordName_nul(keywordName, status);
1254 if (U_FAILURE(status)) {
1255 return;
1256 }
1257
1258 LocalMemory<char> scratch;
1259 int32_t scratch_capacity = 16; // Arbitrarily chosen default size.
1260
1261 char* buffer;
1262 int32_t result_capacity, reslen;
1263
1264 for (;;) {
1265 if (scratch.allocateInsteadAndReset(scratch_capacity) == nullptr) {
1266 status = U_MEMORY_ALLOCATION_ERROR;
1267 return;
1268 }
1269
1270 buffer = sink.GetAppendBuffer(
1271 /*min_capacity=*/scratch_capacity,
1272 /*desired_capacity_hint=*/scratch_capacity,
1273 scratch.getAlias(),
1274 scratch_capacity,
1275 &result_capacity);
1276
1277 reslen = uloc_getKeywordValue(
1278 fullName,
1279 keywordName_nul.data(),
1280 buffer,
1281 result_capacity,
1282 &status);
1283
1284 if (status != U_BUFFER_OVERFLOW_ERROR) {
1285 break;
1286 }
1287
1288 scratch_capacity = reslen;
1289 status = U_ZERO_ERROR;
1290 }
1291
1292 if (U_FAILURE(status)) {
1293 return;
1294 }
1295
1296 sink.Append(buffer, reslen);
1297 if (status == U_STRING_NOT_TERMINATED_WARNING) {
1298 status = U_ZERO_ERROR; // Terminators not used.
1299 }
1300}
1301
1302void
1303Locale::getUnicodeKeywordValue(StringPiece keywordName,
1304 ByteSink& sink,
1305 UErrorCode& status) const {
1306 // TODO: Remove the need for a const char* to a NUL terminated buffer.
1307 const CharString keywordName_nul(keywordName, status);
1308 if (U_FAILURE(status)) {
1309 return;
1310 }
1311
1312 const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
1313
1314 if (legacy_key == nullptr) {
1315 status = U_ILLEGAL_ARGUMENT_ERROR;
1316 return;
1317 }
1318
1319 CharString legacy_value;
1320 {
1321 CharStringByteSink sink(&legacy_value);
1322 getKeywordValue(legacy_key, sink, status);
1323 }
1324
1325 if (U_FAILURE(status)) {
1326 return;
1327 }
1328
1329 const char* unicode_value = uloc_toUnicodeLocaleType(
1330 keywordName_nul.data(), legacy_value.data());
1331
1332 if (unicode_value == nullptr) {
1333 status = U_ILLEGAL_ARGUMENT_ERROR;
1334 return;
1335 }
1336
1337 sink.Append(unicode_value, static_cast<int32_t>(uprv_strlen(unicode_value)));
1338}
1339
1340void
1341Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status)
1342{
1343 uloc_setKeywordValue(keywordName, keywordValue, fullName, ULOC_FULLNAME_CAPACITY, &status);
1344 if (U_SUCCESS(status) && baseName == fullName) {
1345 // May have added the first keyword, meaning that the fullName is no longer also the baseName.
1346 initBaseName(status);
1347 }
1348}
1349
1350void
1351Locale::setKeywordValue(StringPiece keywordName,
1352 StringPiece keywordValue,
1353 UErrorCode& status) {
1354 // TODO: Remove the need for a const char* to a NUL terminated buffer.
1355 const CharString keywordName_nul(keywordName, status);
1356 const CharString keywordValue_nul(keywordValue, status);
1357 setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status);
1358}
1359
1360void
1361Locale::setUnicodeKeywordValue(StringPiece keywordName,
1362 StringPiece keywordValue,
1363 UErrorCode& status) {
1364 // TODO: Remove the need for a const char* to a NUL terminated buffer.
1365 const CharString keywordName_nul(keywordName, status);
1366 const CharString keywordValue_nul(keywordValue, status);
1367
1368 if (U_FAILURE(status)) {
1369 return;
1370 }
1371
1372 const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data());
1373
1374 if (legacy_key == nullptr) {
1375 status = U_ILLEGAL_ARGUMENT_ERROR;
1376 return;
1377 }
1378
1379 const char* legacy_value = nullptr;
1380
1381 if (!keywordValue_nul.isEmpty()) {
1382 legacy_value =
1383 uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data());
1384
1385 if (legacy_value == nullptr) {
1386 status = U_ILLEGAL_ARGUMENT_ERROR;
1387 return;
1388 }
1389 }
1390
1391 setKeywordValue(legacy_key, legacy_value, status);
1392}
1393
1394const char *
1395Locale::getBaseName() const {
1396 return baseName;
1397}
1398
1399Locale::Iterator::~Iterator() = default;
1400
1401//eof
1402U_NAMESPACE_END
1403