1// © 2019 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4#include <utility>
5
6#include "bytesinkutil.h" // CharStringByteSink
7#include "charstr.h"
8#include "cstring.h"
9#include "ulocimp.h"
10#include "unicode/localebuilder.h"
11#include "unicode/locid.h"
12
13U_NAMESPACE_BEGIN
14
15#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
16#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
17
18constexpr const char* kAttributeKey = "attribute";
19
20static bool _isExtensionSubtags(char key, const char* s, int32_t len) {
21 switch (uprv_tolower(key)) {
22 case 'u':
23 return ultag_isUnicodeExtensionSubtags(s, len);
24 case 't':
25 return ultag_isTransformedExtensionSubtags(s, len);
26 case 'x':
27 return ultag_isPrivateuseValueSubtags(s, len);
28 default:
29 return ultag_isExtensionSubtags(s, len);
30 }
31}
32
33LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
34 script_(), region_(), variant_(nullptr), extensions_(nullptr)
35{
36 language_[0] = 0;
37 script_[0] = 0;
38 region_[0] = 0;
39}
40
41LocaleBuilder::~LocaleBuilder()
42{
43 delete variant_;
44 delete extensions_;
45}
46
47LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)
48{
49 clear();
50 setLanguage(locale.getLanguage());
51 setScript(locale.getScript());
52 setRegion(locale.getCountry());
53 setVariant(locale.getVariant());
54 extensions_ = locale.clone();
55 if (extensions_ == nullptr) {
56 status_ = U_MEMORY_ALLOCATION_ERROR;
57 }
58 return *this;
59}
60
61LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
62{
63 Locale l = Locale::forLanguageTag(tag, status_);
64 if (U_FAILURE(status_)) { return *this; }
65 // Because setLocale will reset status_ we need to return
66 // first if we have error in forLanguageTag.
67 setLocale(l);
68 return *this;
69}
70
71static void setField(StringPiece input, char* dest, UErrorCode& errorCode,
72 UBool (*test)(const char*, int32_t)) {
73 if (U_FAILURE(errorCode)) { return; }
74 if (input.empty()) {
75 dest[0] = '\0';
76 } else if (test(input.data(), input.length())) {
77 uprv_memcpy(dest, input.data(), input.length());
78 dest[input.length()] = '\0';
79 } else {
80 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
81 }
82}
83
84LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
85{
86 setField(language, language_, status_, &ultag_isLanguageSubtag);
87 return *this;
88}
89
90LocaleBuilder& LocaleBuilder::setScript(StringPiece script)
91{
92 setField(script, script_, status_, &ultag_isScriptSubtag);
93 return *this;
94}
95
96LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
97{
98 setField(region, region_, status_, &ultag_isRegionSubtag);
99 return *this;
100}
101
102static void transform(char* data, int32_t len) {
103 for (int32_t i = 0; i < len; i++, data++) {
104 if (*data == '_') {
105 *data = '-';
106 } else {
107 *data = uprv_tolower(*data);
108 }
109 }
110}
111
112LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
113{
114 if (U_FAILURE(status_)) { return *this; }
115 if (variant.empty()) {
116 delete variant_;
117 variant_ = nullptr;
118 return *this;
119 }
120 CharString* new_variant = new CharString(variant, status_);
121 if (U_FAILURE(status_)) { return *this; }
122 if (new_variant == nullptr) {
123 status_ = U_MEMORY_ALLOCATION_ERROR;
124 return *this;
125 }
126 transform(new_variant->data(), new_variant->length());
127 if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) {
128 delete new_variant;
129 status_ = U_ILLEGAL_ARGUMENT_ERROR;
130 return *this;
131 }
132 delete variant_;
133 variant_ = new_variant;
134 return *this;
135}
136
137static bool
138_isKeywordValue(const char* key, const char* value, int32_t value_len)
139{
140 if (key[1] == '\0') {
141 // one char key
142 return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&
143 _isExtensionSubtags(key[0], value, value_len));
144 } else if (uprv_strcmp(key, kAttributeKey) == 0) {
145 // unicode attributes
146 return ultag_isUnicodeLocaleAttributes(value, value_len);
147 }
148 // otherwise: unicode extension value
149 // We need to convert from legacy key/value to unicode
150 // key/value
151 const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key);
152 const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value);
153
154 return unicode_locale_key && unicode_locale_type &&
155 ultag_isUnicodeLocaleKey(unicode_locale_key, -1) &&
156 ultag_isUnicodeLocaleType(unicode_locale_type, -1);
157}
158
159static void
160_copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
161 Locale& to, bool validate, UErrorCode& errorCode)
162{
163 if (U_FAILURE(errorCode)) { return; }
164 LocalPointer<icu::StringEnumeration> ownedKeywords;
165 if (keywords == nullptr) {
166 ownedKeywords.adoptInstead(from.createKeywords(errorCode));
167 if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }
168 keywords = ownedKeywords.getAlias();
169 }
170 const char* key;
171 while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
172 CharString value;
173 CharStringByteSink sink(&value);
174 from.getKeywordValue(key, sink, errorCode);
175 if (U_FAILURE(errorCode)) { return; }
176 if (uprv_strcmp(key, kAttributeKey) == 0) {
177 transform(value.data(), value.length());
178 }
179 if (validate &&
180 !_isKeywordValue(key, value.data(), value.length())) {
181 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
182 return;
183 }
184 to.setKeywordValue(key, value.data(), errorCode);
185 if (U_FAILURE(errorCode)) { return; }
186 }
187}
188
189void static
190_clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
191{
192 // Clear Unicode attributes
193 locale.setKeywordValue(kAttributeKey, "", errorCode);
194
195 // Clear all Unicode keyword values
196 LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));
197 if (U_FAILURE(errorCode) || iter.isNull()) { return; }
198 const char* key;
199 while ((key = iter->next(nullptr, errorCode)) != nullptr) {
200 locale.setUnicodeKeywordValue(key, nullptr, errorCode);
201 }
202}
203
204static void
205_setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
206{
207 // Add the unicode extensions to extensions_
208 CharString locale_str("und-u-", errorCode);
209 locale_str.append(value, errorCode);
210 _copyExtensions(
211 Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,
212 locale, false, errorCode);
213}
214
215LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
216{
217 if (U_FAILURE(status_)) { return *this; }
218 if (!UPRV_ISALPHANUM(key)) {
219 status_ = U_ILLEGAL_ARGUMENT_ERROR;
220 return *this;
221 }
222 CharString value_str(value, status_);
223 if (U_FAILURE(status_)) { return *this; }
224 transform(value_str.data(), value_str.length());
225 if (!value_str.isEmpty() &&
226 !_isExtensionSubtags(key, value_str.data(), value_str.length())) {
227 status_ = U_ILLEGAL_ARGUMENT_ERROR;
228 return *this;
229 }
230 if (extensions_ == nullptr) {
231 extensions_ = Locale::getRoot().clone();
232 if (extensions_ == nullptr) {
233 status_ = U_MEMORY_ALLOCATION_ERROR;
234 return *this;
235 }
236 }
237 if (uprv_tolower(key) != 'u') {
238 // for t, x and others extension.
239 extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),
240 status_);
241 return *this;
242 }
243 _clearUAttributesAndKeyType(*extensions_, status_);
244 if (U_FAILURE(status_)) { return *this; }
245 if (!value.empty()) {
246 _setUnicodeExtensions(*extensions_, value_str, status_);
247 }
248 return *this;
249}
250
251LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
252 StringPiece key, StringPiece type)
253{
254 if (U_FAILURE(status_)) { return *this; }
255 if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||
256 (!type.empty() &&
257 !ultag_isUnicodeLocaleType(type.data(), type.length()))) {
258 status_ = U_ILLEGAL_ARGUMENT_ERROR;
259 return *this;
260 }
261 if (extensions_ == nullptr) {
262 extensions_ = Locale::getRoot().clone();
263 if (extensions_ == nullptr) {
264 status_ = U_MEMORY_ALLOCATION_ERROR;
265 return *this;
266 }
267 }
268 extensions_->setUnicodeKeywordValue(key, type, status_);
269 return *this;
270}
271
272LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
273 StringPiece value)
274{
275 CharString value_str(value, status_);
276 if (U_FAILURE(status_)) { return *this; }
277 transform(value_str.data(), value_str.length());
278 if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
279 status_ = U_ILLEGAL_ARGUMENT_ERROR;
280 return *this;
281 }
282 if (extensions_ == nullptr) {
283 extensions_ = Locale::getRoot().clone();
284 if (extensions_ == nullptr) {
285 status_ = U_MEMORY_ALLOCATION_ERROR;
286 return *this;
287 }
288 extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);
289 return *this;
290 }
291
292 CharString attributes;
293 CharStringByteSink sink(&attributes);
294 UErrorCode localErrorCode = U_ZERO_ERROR;
295 extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
296 if (U_FAILURE(localErrorCode)) {
297 CharString new_attributes(value_str.data(), status_);
298 // No attributes, set the attribute.
299 extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
300 return *this;
301 }
302
303 transform(attributes.data(),attributes.length());
304 const char* start = attributes.data();
305 const char* limit = attributes.data() + attributes.length();
306 CharString new_attributes;
307 bool inserted = false;
308 while (start < limit) {
309 if (!inserted) {
310 int cmp = uprv_strcmp(start, value_str.data());
311 if (cmp == 0) { return *this; } // Found it in attributes: Just return
312 if (cmp > 0) {
313 if (!new_attributes.isEmpty()) new_attributes.append('_', status_);
314 new_attributes.append(value_str.data(), status_);
315 inserted = true;
316 }
317 }
318 if (!new_attributes.isEmpty()) {
319 new_attributes.append('_', status_);
320 }
321 new_attributes.append(start, status_);
322 start += uprv_strlen(start) + 1;
323 }
324 if (!inserted) {
325 if (!new_attributes.isEmpty()) {
326 new_attributes.append('_', status_);
327 }
328 new_attributes.append(value_str.data(), status_);
329 }
330 // Not yet in the attributes, set the attribute.
331 extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
332 return *this;
333}
334
335LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
336 StringPiece value)
337{
338 CharString value_str(value, status_);
339 if (U_FAILURE(status_)) { return *this; }
340 transform(value_str.data(), value_str.length());
341 if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
342 status_ = U_ILLEGAL_ARGUMENT_ERROR;
343 return *this;
344 }
345 if (extensions_ == nullptr) { return *this; }
346 UErrorCode localErrorCode = U_ZERO_ERROR;
347 CharString attributes;
348 CharStringByteSink sink(&attributes);
349 extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode);
350 // get failure, just return
351 if (U_FAILURE(localErrorCode)) { return *this; }
352 // Do not have any attributes, just return.
353 if (attributes.isEmpty()) { return *this; }
354
355 char* p = attributes.data();
356 // Replace null terminiator in place for _ and - so later
357 // we can use uprv_strcmp to compare.
358 for (int32_t i = 0; i < attributes.length(); i++, p++) {
359 *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);
360 }
361
362 const char* start = attributes.data();
363 const char* limit = attributes.data() + attributes.length();
364 CharString new_attributes;
365 bool found = false;
366 while (start < limit) {
367 if (uprv_strcmp(start, value_str.data()) == 0) {
368 found = true;
369 } else {
370 if (!new_attributes.isEmpty()) {
371 new_attributes.append('_', status_);
372 }
373 new_attributes.append(start, status_);
374 }
375 start += uprv_strlen(start) + 1;
376 }
377 // Found the value in attributes, set the attribute.
378 if (found) {
379 extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
380 }
381 return *this;
382}
383
384LocaleBuilder& LocaleBuilder::clear()
385{
386 status_ = U_ZERO_ERROR;
387 language_[0] = 0;
388 script_[0] = 0;
389 region_[0] = 0;
390 delete variant_;
391 variant_ = nullptr;
392 clearExtensions();
393 return *this;
394}
395
396LocaleBuilder& LocaleBuilder::clearExtensions()
397{
398 delete extensions_;
399 extensions_ = nullptr;
400 return *this;
401}
402
403Locale makeBogusLocale() {
404 Locale bogus;
405 bogus.setToBogus();
406 return bogus;
407}
408
409void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
410{
411 if (U_FAILURE(errorCode)) { return; }
412 LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));
413 if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {
414 // Error, or no extensions to copy.
415 return;
416 }
417 if (extensions_ == nullptr) {
418 extensions_ = Locale::getRoot().clone();
419 if (extensions_ == nullptr) {
420 status_ = U_MEMORY_ALLOCATION_ERROR;
421 return;
422 }
423 }
424 _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);
425}
426
427Locale LocaleBuilder::build(UErrorCode& errorCode)
428{
429 if (U_FAILURE(errorCode)) {
430 return makeBogusLocale();
431 }
432 if (U_FAILURE(status_)) {
433 errorCode = status_;
434 return makeBogusLocale();
435 }
436 CharString locale_str(language_, errorCode);
437 if (uprv_strlen(script_) > 0) {
438 locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);
439 }
440 if (uprv_strlen(region_) > 0) {
441 locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);
442 }
443 if (variant_ != nullptr) {
444 locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);
445 }
446 if (U_FAILURE(errorCode)) {
447 return makeBogusLocale();
448 }
449 Locale product(locale_str.data());
450 if (extensions_ != nullptr) {
451 _copyExtensions(*extensions_, nullptr, product, true, errorCode);
452 }
453 if (U_FAILURE(errorCode)) {
454 return makeBogusLocale();
455 }
456 return product;
457}
458
459UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const {
460 if (U_FAILURE(outErrorCode)) {
461 // Do not overwrite the older error code
462 return true;
463 }
464 outErrorCode = status_;
465 return U_FAILURE(outErrorCode);
466}
467
468U_NAMESPACE_END
469