1 | // © 2019 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | |
4 | #include <utility> |
5 | |
6 | #include "bytesinkutil.h" // CharStringByteSink |
7 | #include "charstr.h" |
8 | #include "cstring.h" |
9 | #include "ulocimp.h" |
10 | #include "unicode/localebuilder.h" |
11 | #include "unicode/locid.h" |
12 | |
13 | U_NAMESPACE_BEGIN |
14 | |
15 | #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9')) |
16 | #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) ) |
17 | |
18 | constexpr const char* kAttributeKey = "attribute" ; |
19 | |
20 | static bool _isExtensionSubtags(char key, const char* s, int32_t len) { |
21 | switch (uprv_tolower(key)) { |
22 | case 'u': |
23 | return ultag_isUnicodeExtensionSubtags(s, len); |
24 | case 't': |
25 | return ultag_isTransformedExtensionSubtags(s, len); |
26 | case 'x': |
27 | return ultag_isPrivateuseValueSubtags(s, len); |
28 | default: |
29 | return ultag_isExtensionSubtags(s, len); |
30 | } |
31 | } |
32 | |
33 | LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(), |
34 | script_(), region_(), variant_(nullptr), extensions_(nullptr) |
35 | { |
36 | language_[0] = 0; |
37 | script_[0] = 0; |
38 | region_[0] = 0; |
39 | } |
40 | |
41 | LocaleBuilder::~LocaleBuilder() |
42 | { |
43 | delete variant_; |
44 | delete extensions_; |
45 | } |
46 | |
47 | LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale) |
48 | { |
49 | clear(); |
50 | setLanguage(locale.getLanguage()); |
51 | setScript(locale.getScript()); |
52 | setRegion(locale.getCountry()); |
53 | setVariant(locale.getVariant()); |
54 | extensions_ = locale.clone(); |
55 | if (extensions_ == nullptr) { |
56 | status_ = U_MEMORY_ALLOCATION_ERROR; |
57 | } |
58 | return *this; |
59 | } |
60 | |
61 | LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag) |
62 | { |
63 | Locale l = Locale::forLanguageTag(tag, status_); |
64 | if (U_FAILURE(status_)) { return *this; } |
65 | // Because setLocale will reset status_ we need to return |
66 | // first if we have error in forLanguageTag. |
67 | setLocale(l); |
68 | return *this; |
69 | } |
70 | |
71 | static void setField(StringPiece input, char* dest, UErrorCode& errorCode, |
72 | UBool (*test)(const char*, int32_t)) { |
73 | if (U_FAILURE(errorCode)) { return; } |
74 | if (input.empty()) { |
75 | dest[0] = '\0'; |
76 | } else if (test(input.data(), input.length())) { |
77 | uprv_memcpy(dest, input.data(), input.length()); |
78 | dest[input.length()] = '\0'; |
79 | } else { |
80 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
81 | } |
82 | } |
83 | |
84 | LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language) |
85 | { |
86 | setField(language, language_, status_, &ultag_isLanguageSubtag); |
87 | return *this; |
88 | } |
89 | |
90 | LocaleBuilder& LocaleBuilder::setScript(StringPiece script) |
91 | { |
92 | setField(script, script_, status_, &ultag_isScriptSubtag); |
93 | return *this; |
94 | } |
95 | |
96 | LocaleBuilder& LocaleBuilder::setRegion(StringPiece region) |
97 | { |
98 | setField(region, region_, status_, &ultag_isRegionSubtag); |
99 | return *this; |
100 | } |
101 | |
102 | static void transform(char* data, int32_t len) { |
103 | for (int32_t i = 0; i < len; i++, data++) { |
104 | if (*data == '_') { |
105 | *data = '-'; |
106 | } else { |
107 | *data = uprv_tolower(*data); |
108 | } |
109 | } |
110 | } |
111 | |
112 | LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant) |
113 | { |
114 | if (U_FAILURE(status_)) { return *this; } |
115 | if (variant.empty()) { |
116 | delete variant_; |
117 | variant_ = nullptr; |
118 | return *this; |
119 | } |
120 | CharString* new_variant = new CharString(variant, status_); |
121 | if (U_FAILURE(status_)) { return *this; } |
122 | if (new_variant == nullptr) { |
123 | status_ = U_MEMORY_ALLOCATION_ERROR; |
124 | return *this; |
125 | } |
126 | transform(new_variant->data(), new_variant->length()); |
127 | if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) { |
128 | delete new_variant; |
129 | status_ = U_ILLEGAL_ARGUMENT_ERROR; |
130 | return *this; |
131 | } |
132 | delete variant_; |
133 | variant_ = new_variant; |
134 | return *this; |
135 | } |
136 | |
137 | static bool |
138 | _isKeywordValue(const char* key, const char* value, int32_t value_len) |
139 | { |
140 | if (key[1] == '\0') { |
141 | // one char key |
142 | return (UPRV_ISALPHANUM(uprv_tolower(key[0])) && |
143 | _isExtensionSubtags(key[0], value, value_len)); |
144 | } else if (uprv_strcmp(key, kAttributeKey) == 0) { |
145 | // unicode attributes |
146 | return ultag_isUnicodeLocaleAttributes(value, value_len); |
147 | } |
148 | // otherwise: unicode extension value |
149 | // We need to convert from legacy key/value to unicode |
150 | // key/value |
151 | const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key); |
152 | const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value); |
153 | |
154 | return unicode_locale_key && unicode_locale_type && |
155 | ultag_isUnicodeLocaleKey(unicode_locale_key, -1) && |
156 | ultag_isUnicodeLocaleType(unicode_locale_type, -1); |
157 | } |
158 | |
159 | static void |
160 | _copyExtensions(const Locale& from, icu::StringEnumeration *keywords, |
161 | Locale& to, bool validate, UErrorCode& errorCode) |
162 | { |
163 | if (U_FAILURE(errorCode)) { return; } |
164 | LocalPointer<icu::StringEnumeration> ownedKeywords; |
165 | if (keywords == nullptr) { |
166 | ownedKeywords.adoptInstead(from.createKeywords(errorCode)); |
167 | if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; } |
168 | keywords = ownedKeywords.getAlias(); |
169 | } |
170 | const char* key; |
171 | while ((key = keywords->next(nullptr, errorCode)) != nullptr) { |
172 | CharString value; |
173 | CharStringByteSink sink(&value); |
174 | from.getKeywordValue(key, sink, errorCode); |
175 | if (U_FAILURE(errorCode)) { return; } |
176 | if (uprv_strcmp(key, kAttributeKey) == 0) { |
177 | transform(value.data(), value.length()); |
178 | } |
179 | if (validate && |
180 | !_isKeywordValue(key, value.data(), value.length())) { |
181 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
182 | return; |
183 | } |
184 | to.setKeywordValue(key, value.data(), errorCode); |
185 | if (U_FAILURE(errorCode)) { return; } |
186 | } |
187 | } |
188 | |
189 | void static |
190 | _clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode) |
191 | { |
192 | // Clear Unicode attributes |
193 | locale.setKeywordValue(kAttributeKey, "" , errorCode); |
194 | |
195 | // Clear all Unicode keyword values |
196 | LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode)); |
197 | if (U_FAILURE(errorCode) || iter.isNull()) { return; } |
198 | const char* key; |
199 | while ((key = iter->next(nullptr, errorCode)) != nullptr) { |
200 | locale.setUnicodeKeywordValue(key, nullptr, errorCode); |
201 | } |
202 | } |
203 | |
204 | static void |
205 | _setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode) |
206 | { |
207 | // Add the unicode extensions to extensions_ |
208 | CharString locale_str("und-u-" , errorCode); |
209 | locale_str.append(value, errorCode); |
210 | _copyExtensions( |
211 | Locale::forLanguageTag(locale_str.data(), errorCode), nullptr, |
212 | locale, false, errorCode); |
213 | } |
214 | |
215 | LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value) |
216 | { |
217 | if (U_FAILURE(status_)) { return *this; } |
218 | if (!UPRV_ISALPHANUM(key)) { |
219 | status_ = U_ILLEGAL_ARGUMENT_ERROR; |
220 | return *this; |
221 | } |
222 | CharString value_str(value, status_); |
223 | if (U_FAILURE(status_)) { return *this; } |
224 | transform(value_str.data(), value_str.length()); |
225 | if (!value_str.isEmpty() && |
226 | !_isExtensionSubtags(key, value_str.data(), value_str.length())) { |
227 | status_ = U_ILLEGAL_ARGUMENT_ERROR; |
228 | return *this; |
229 | } |
230 | if (extensions_ == nullptr) { |
231 | extensions_ = Locale::getRoot().clone(); |
232 | if (extensions_ == nullptr) { |
233 | status_ = U_MEMORY_ALLOCATION_ERROR; |
234 | return *this; |
235 | } |
236 | } |
237 | if (uprv_tolower(key) != 'u') { |
238 | // for t, x and others extension. |
239 | extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(), |
240 | status_); |
241 | return *this; |
242 | } |
243 | _clearUAttributesAndKeyType(*extensions_, status_); |
244 | if (U_FAILURE(status_)) { return *this; } |
245 | if (!value.empty()) { |
246 | _setUnicodeExtensions(*extensions_, value_str, status_); |
247 | } |
248 | return *this; |
249 | } |
250 | |
251 | LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword( |
252 | StringPiece key, StringPiece type) |
253 | { |
254 | if (U_FAILURE(status_)) { return *this; } |
255 | if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) || |
256 | (!type.empty() && |
257 | !ultag_isUnicodeLocaleType(type.data(), type.length()))) { |
258 | status_ = U_ILLEGAL_ARGUMENT_ERROR; |
259 | return *this; |
260 | } |
261 | if (extensions_ == nullptr) { |
262 | extensions_ = Locale::getRoot().clone(); |
263 | if (extensions_ == nullptr) { |
264 | status_ = U_MEMORY_ALLOCATION_ERROR; |
265 | return *this; |
266 | } |
267 | } |
268 | extensions_->setUnicodeKeywordValue(key, type, status_); |
269 | return *this; |
270 | } |
271 | |
272 | LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute( |
273 | StringPiece value) |
274 | { |
275 | CharString value_str(value, status_); |
276 | if (U_FAILURE(status_)) { return *this; } |
277 | transform(value_str.data(), value_str.length()); |
278 | if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) { |
279 | status_ = U_ILLEGAL_ARGUMENT_ERROR; |
280 | return *this; |
281 | } |
282 | if (extensions_ == nullptr) { |
283 | extensions_ = Locale::getRoot().clone(); |
284 | if (extensions_ == nullptr) { |
285 | status_ = U_MEMORY_ALLOCATION_ERROR; |
286 | return *this; |
287 | } |
288 | extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_); |
289 | return *this; |
290 | } |
291 | |
292 | CharString attributes; |
293 | CharStringByteSink sink(&attributes); |
294 | UErrorCode localErrorCode = U_ZERO_ERROR; |
295 | extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode); |
296 | if (U_FAILURE(localErrorCode)) { |
297 | CharString new_attributes(value_str.data(), status_); |
298 | // No attributes, set the attribute. |
299 | extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); |
300 | return *this; |
301 | } |
302 | |
303 | transform(attributes.data(),attributes.length()); |
304 | const char* start = attributes.data(); |
305 | const char* limit = attributes.data() + attributes.length(); |
306 | CharString new_attributes; |
307 | bool inserted = false; |
308 | while (start < limit) { |
309 | if (!inserted) { |
310 | int cmp = uprv_strcmp(start, value_str.data()); |
311 | if (cmp == 0) { return *this; } // Found it in attributes: Just return |
312 | if (cmp > 0) { |
313 | if (!new_attributes.isEmpty()) new_attributes.append('_', status_); |
314 | new_attributes.append(value_str.data(), status_); |
315 | inserted = true; |
316 | } |
317 | } |
318 | if (!new_attributes.isEmpty()) { |
319 | new_attributes.append('_', status_); |
320 | } |
321 | new_attributes.append(start, status_); |
322 | start += uprv_strlen(start) + 1; |
323 | } |
324 | if (!inserted) { |
325 | if (!new_attributes.isEmpty()) { |
326 | new_attributes.append('_', status_); |
327 | } |
328 | new_attributes.append(value_str.data(), status_); |
329 | } |
330 | // Not yet in the attributes, set the attribute. |
331 | extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); |
332 | return *this; |
333 | } |
334 | |
335 | LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute( |
336 | StringPiece value) |
337 | { |
338 | CharString value_str(value, status_); |
339 | if (U_FAILURE(status_)) { return *this; } |
340 | transform(value_str.data(), value_str.length()); |
341 | if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) { |
342 | status_ = U_ILLEGAL_ARGUMENT_ERROR; |
343 | return *this; |
344 | } |
345 | if (extensions_ == nullptr) { return *this; } |
346 | UErrorCode localErrorCode = U_ZERO_ERROR; |
347 | CharString attributes; |
348 | CharStringByteSink sink(&attributes); |
349 | extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode); |
350 | // get failure, just return |
351 | if (U_FAILURE(localErrorCode)) { return *this; } |
352 | // Do not have any attributes, just return. |
353 | if (attributes.isEmpty()) { return *this; } |
354 | |
355 | char* p = attributes.data(); |
356 | // Replace null terminiator in place for _ and - so later |
357 | // we can use uprv_strcmp to compare. |
358 | for (int32_t i = 0; i < attributes.length(); i++, p++) { |
359 | *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p); |
360 | } |
361 | |
362 | const char* start = attributes.data(); |
363 | const char* limit = attributes.data() + attributes.length(); |
364 | CharString new_attributes; |
365 | bool found = false; |
366 | while (start < limit) { |
367 | if (uprv_strcmp(start, value_str.data()) == 0) { |
368 | found = true; |
369 | } else { |
370 | if (!new_attributes.isEmpty()) { |
371 | new_attributes.append('_', status_); |
372 | } |
373 | new_attributes.append(start, status_); |
374 | } |
375 | start += uprv_strlen(start) + 1; |
376 | } |
377 | // Found the value in attributes, set the attribute. |
378 | if (found) { |
379 | extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); |
380 | } |
381 | return *this; |
382 | } |
383 | |
384 | LocaleBuilder& LocaleBuilder::clear() |
385 | { |
386 | status_ = U_ZERO_ERROR; |
387 | language_[0] = 0; |
388 | script_[0] = 0; |
389 | region_[0] = 0; |
390 | delete variant_; |
391 | variant_ = nullptr; |
392 | clearExtensions(); |
393 | return *this; |
394 | } |
395 | |
396 | LocaleBuilder& LocaleBuilder::clearExtensions() |
397 | { |
398 | delete extensions_; |
399 | extensions_ = nullptr; |
400 | return *this; |
401 | } |
402 | |
403 | Locale makeBogusLocale() { |
404 | Locale bogus; |
405 | bogus.setToBogus(); |
406 | return bogus; |
407 | } |
408 | |
409 | void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode) |
410 | { |
411 | if (U_FAILURE(errorCode)) { return; } |
412 | LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode)); |
413 | if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) { |
414 | // Error, or no extensions to copy. |
415 | return; |
416 | } |
417 | if (extensions_ == nullptr) { |
418 | extensions_ = Locale::getRoot().clone(); |
419 | if (extensions_ == nullptr) { |
420 | status_ = U_MEMORY_ALLOCATION_ERROR; |
421 | return; |
422 | } |
423 | } |
424 | _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode); |
425 | } |
426 | |
427 | Locale LocaleBuilder::build(UErrorCode& errorCode) |
428 | { |
429 | if (U_FAILURE(errorCode)) { |
430 | return makeBogusLocale(); |
431 | } |
432 | if (U_FAILURE(status_)) { |
433 | errorCode = status_; |
434 | return makeBogusLocale(); |
435 | } |
436 | CharString locale_str(language_, errorCode); |
437 | if (uprv_strlen(script_) > 0) { |
438 | locale_str.append('-', errorCode).append(StringPiece(script_), errorCode); |
439 | } |
440 | if (uprv_strlen(region_) > 0) { |
441 | locale_str.append('-', errorCode).append(StringPiece(region_), errorCode); |
442 | } |
443 | if (variant_ != nullptr) { |
444 | locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode); |
445 | } |
446 | if (U_FAILURE(errorCode)) { |
447 | return makeBogusLocale(); |
448 | } |
449 | Locale product(locale_str.data()); |
450 | if (extensions_ != nullptr) { |
451 | _copyExtensions(*extensions_, nullptr, product, true, errorCode); |
452 | } |
453 | if (U_FAILURE(errorCode)) { |
454 | return makeBogusLocale(); |
455 | } |
456 | return product; |
457 | } |
458 | |
459 | UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const { |
460 | if (U_FAILURE(outErrorCode)) { |
461 | // Do not overwrite the older error code |
462 | return true; |
463 | } |
464 | outErrorCode = status_; |
465 | return U_FAILURE(outErrorCode); |
466 | } |
467 | |
468 | U_NAMESPACE_END |
469 | |