| 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | ********************************************************************** |
| 5 | * Copyright (C) 2004-2016, International Business Machines |
| 6 | * Corporation and others. All Rights Reserved. |
| 7 | ********************************************************************** |
| 8 | */ |
| 9 | |
| 10 | #ifndef ULOCIMP_H |
| 11 | #define ULOCIMP_H |
| 12 | |
| 13 | #include "unicode/bytestream.h" |
| 14 | #include "unicode/uloc.h" |
| 15 | |
| 16 | #include "charstr.h" |
| 17 | |
| 18 | /** |
| 19 | * Create an iterator over the specified keywords list |
| 20 | * @param keywordList double-null terminated list. Will be copied. |
| 21 | * @param keywordListSize size in bytes of keywordList |
| 22 | * @param status err code |
| 23 | * @return enumeration (owned by caller) of the keyword list. |
| 24 | * @internal ICU 3.0 |
| 25 | */ |
| 26 | U_CAPI UEnumeration* U_EXPORT2 |
| 27 | uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status); |
| 28 | |
| 29 | /** |
| 30 | * Look up a resource bundle table item with fallback on the table level. |
| 31 | * This is accessible so it can be called by C++ code. |
| 32 | */ |
| 33 | U_CAPI const UChar * U_EXPORT2 |
| 34 | uloc_getTableStringWithFallback( |
| 35 | const char *path, |
| 36 | const char *locale, |
| 37 | const char *tableKey, |
| 38 | const char *subTableKey, |
| 39 | const char *itemKey, |
| 40 | int32_t *pLength, |
| 41 | UErrorCode *pErrorCode); |
| 42 | |
| 43 | /*returns true if a is an ID separator false otherwise*/ |
| 44 | #define _isIDSeparator(a) (a == '_' || a == '-') |
| 45 | |
| 46 | U_CFUNC const char* |
| 47 | uloc_getCurrentCountryID(const char* oldID); |
| 48 | |
| 49 | U_CFUNC const char* |
| 50 | uloc_getCurrentLanguageID(const char* oldID); |
| 51 | |
| 52 | U_CFUNC void |
| 53 | ulocimp_getKeywords(const char *localeID, |
| 54 | char prev, |
| 55 | icu::ByteSink& sink, |
| 56 | UBool valuesToo, |
| 57 | UErrorCode *status); |
| 58 | |
| 59 | icu::CharString U_EXPORT2 |
| 60 | ulocimp_getLanguage(const char *localeID, |
| 61 | const char **pEnd, |
| 62 | UErrorCode &status); |
| 63 | |
| 64 | icu::CharString U_EXPORT2 |
| 65 | ulocimp_getScript(const char *localeID, |
| 66 | const char **pEnd, |
| 67 | UErrorCode &status); |
| 68 | |
| 69 | icu::CharString U_EXPORT2 |
| 70 | ulocimp_getCountry(const char *localeID, |
| 71 | const char **pEnd, |
| 72 | UErrorCode &status); |
| 73 | |
| 74 | U_CAPI void U_EXPORT2 |
| 75 | ulocimp_getName(const char* localeID, |
| 76 | icu::ByteSink& sink, |
| 77 | UErrorCode* err); |
| 78 | |
| 79 | U_CAPI void U_EXPORT2 |
| 80 | ulocimp_getBaseName(const char* localeID, |
| 81 | icu::ByteSink& sink, |
| 82 | UErrorCode* err); |
| 83 | |
| 84 | U_CAPI void U_EXPORT2 |
| 85 | ulocimp_canonicalize(const char* localeID, |
| 86 | icu::ByteSink& sink, |
| 87 | UErrorCode* err); |
| 88 | |
| 89 | U_CAPI void U_EXPORT2 |
| 90 | ulocimp_getKeywordValue(const char* localeID, |
| 91 | const char* keywordName, |
| 92 | icu::ByteSink& sink, |
| 93 | UErrorCode* status); |
| 94 | |
| 95 | /** |
| 96 | * Writes a well-formed language tag for this locale ID. |
| 97 | * |
| 98 | * **Note**: When `strict` is false, any locale fields which do not satisfy the |
| 99 | * BCP47 syntax requirement will be omitted from the result. When `strict` is |
| 100 | * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale |
| 101 | * fields do not satisfy the BCP47 syntax requirement. |
| 102 | * |
| 103 | * @param localeID the input locale ID |
| 104 | * @param sink the output sink receiving the BCP47 language |
| 105 | * tag for this Locale. |
| 106 | * @param strict boolean value indicating if the function returns |
| 107 | * an error for an ill-formed input locale ID. |
| 108 | * @param err error information if receiving the language |
| 109 | * tag failed. |
| 110 | * @return The length of the BCP47 language tag. |
| 111 | * |
| 112 | * @internal ICU 64 |
| 113 | */ |
| 114 | U_CAPI void U_EXPORT2 |
| 115 | ulocimp_toLanguageTag(const char* localeID, |
| 116 | icu::ByteSink& sink, |
| 117 | UBool strict, |
| 118 | UErrorCode* err); |
| 119 | |
| 120 | /** |
| 121 | * Returns a locale ID for the specified BCP47 language tag string. |
| 122 | * If the specified language tag contains any ill-formed subtags, |
| 123 | * the first such subtag and all following subtags are ignored. |
| 124 | * <p> |
| 125 | * This implements the 'Language-Tag' production of BCP 47, and so |
| 126 | * supports legacy language tags (marked as “Type: grandfathered” in BCP 47) |
| 127 | * (regular and irregular) as well as private use language tags. |
| 128 | * |
| 129 | * Private use tags are represented as 'x-whatever', |
| 130 | * and legacy tags are converted to their canonical replacements where they exist. |
| 131 | * |
| 132 | * Note that a few legacy tags have no modern replacement; |
| 133 | * these will be converted using the fallback described in |
| 134 | * the first paragraph, so some information might be lost. |
| 135 | * |
| 136 | * @param langtag the input BCP47 language tag. |
| 137 | * @param tagLen the length of langtag, or -1 to call uprv_strlen(). |
| 138 | * @param sink the output sink receiving a locale ID for the |
| 139 | * specified BCP47 language tag. |
| 140 | * @param parsedLength if not NULL, successfully parsed length |
| 141 | * for the input language tag is set. |
| 142 | * @param err error information if receiving the locald ID |
| 143 | * failed. |
| 144 | * @internal ICU 63 |
| 145 | */ |
| 146 | U_CAPI void U_EXPORT2 |
| 147 | ulocimp_forLanguageTag(const char* langtag, |
| 148 | int32_t tagLen, |
| 149 | icu::ByteSink& sink, |
| 150 | int32_t* parsedLength, |
| 151 | UErrorCode* err); |
| 152 | |
| 153 | /** |
| 154 | * Get the region to use for supplemental data lookup. Uses |
| 155 | * (1) any region specified by locale tag "rg"; if none then |
| 156 | * (2) any unicode_region_tag in the locale ID; if none then |
| 157 | * (3) if inferRegion is true, the region suggested by |
| 158 | * getLikelySubtags on the localeID. |
| 159 | * If no region is found, returns length 0. |
| 160 | * |
| 161 | * @param localeID |
| 162 | * The complete locale ID (with keywords) from which |
| 163 | * to get the region to use for supplemental data. |
| 164 | * @param inferRegion |
| 165 | * If true, will try to infer region from localeID if |
| 166 | * no other region is found. |
| 167 | * @param region |
| 168 | * Buffer in which to put the region ID found; should |
| 169 | * have a capacity at least ULOC_COUNTRY_CAPACITY. |
| 170 | * @param regionCapacity |
| 171 | * The actual capacity of the region buffer. |
| 172 | * @param status |
| 173 | * Pointer to in/out UErrorCode value for latest status. |
| 174 | * @return |
| 175 | * The length of any region code found, or 0 if none. |
| 176 | * @internal ICU 57 |
| 177 | */ |
| 178 | U_CAPI int32_t U_EXPORT2 |
| 179 | ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, |
| 180 | char *region, int32_t regionCapacity, UErrorCode* status); |
| 181 | |
| 182 | /** |
| 183 | * Add the likely subtags for a provided locale ID, per the algorithm described |
| 184 | * in the following CLDR technical report: |
| 185 | * |
| 186 | * http://www.unicode.org/reports/tr35/#Likely_Subtags |
| 187 | * |
| 188 | * If localeID is already in the maximal form, or there is no data available |
| 189 | * for maximization, it will be copied to the output buffer. For example, |
| 190 | * "und-Zzzz" cannot be maximized, since there is no reasonable maximization. |
| 191 | * |
| 192 | * Examples: |
| 193 | * |
| 194 | * "en" maximizes to "en_Latn_US" |
| 195 | * |
| 196 | * "de" maximizes to "de_Latn_US" |
| 197 | * |
| 198 | * "sr" maximizes to "sr_Cyrl_RS" |
| 199 | * |
| 200 | * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.) |
| 201 | * |
| 202 | * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.) |
| 203 | * |
| 204 | * @param localeID The locale to maximize |
| 205 | * @param sink The output sink receiving the maximized locale |
| 206 | * @param err Error information if maximizing the locale failed. If the length |
| 207 | * of the localeID and the null-terminator is greater than the maximum allowed size, |
| 208 | * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. |
| 209 | * @internal ICU 64 |
| 210 | */ |
| 211 | U_CAPI void U_EXPORT2 |
| 212 | ulocimp_addLikelySubtags(const char* localeID, |
| 213 | icu::ByteSink& sink, |
| 214 | UErrorCode* err); |
| 215 | |
| 216 | /** |
| 217 | * Minimize the subtags for a provided locale ID, per the algorithm described |
| 218 | * in the following CLDR technical report: |
| 219 | * |
| 220 | * http://www.unicode.org/reports/tr35/#Likely_Subtags |
| 221 | * |
| 222 | * If localeID is already in the minimal form, or there is no data available |
| 223 | * for minimization, it will be copied to the output buffer. Since the |
| 224 | * minimization algorithm relies on proper maximization, see the comments |
| 225 | * for ulocimp_addLikelySubtags for reasons why there might not be any data. |
| 226 | * |
| 227 | * Examples: |
| 228 | * |
| 229 | * "en_Latn_US" minimizes to "en" |
| 230 | * |
| 231 | * "de_Latn_US" minimizes to "de" |
| 232 | * |
| 233 | * "sr_Cyrl_RS" minimizes to "sr" |
| 234 | * |
| 235 | * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the |
| 236 | * script, and minimizing to "zh" would imply "zh_Hans_CN".) |
| 237 | * |
| 238 | * @param localeID The locale to minimize |
| 239 | * @param sink The output sink receiving the maximized locale |
| 240 | * @param err Error information if minimizing the locale failed. If the length |
| 241 | * of the localeID and the null-terminator is greater than the maximum allowed size, |
| 242 | * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. |
| 243 | * @internal ICU 64 |
| 244 | */ |
| 245 | U_CAPI void U_EXPORT2 |
| 246 | ulocimp_minimizeSubtags(const char* localeID, |
| 247 | icu::ByteSink& sink, |
| 248 | UErrorCode* err); |
| 249 | |
| 250 | U_CAPI const char * U_EXPORT2 |
| 251 | locale_getKeywordsStart(const char *localeID); |
| 252 | |
| 253 | U_CFUNC UBool |
| 254 | ultag_isExtensionSubtags(const char* s, int32_t len); |
| 255 | |
| 256 | U_CFUNC UBool |
| 257 | ultag_isLanguageSubtag(const char* s, int32_t len); |
| 258 | |
| 259 | U_CFUNC UBool |
| 260 | ultag_isPrivateuseValueSubtags(const char* s, int32_t len); |
| 261 | |
| 262 | U_CFUNC UBool |
| 263 | ultag_isRegionSubtag(const char* s, int32_t len); |
| 264 | |
| 265 | U_CFUNC UBool |
| 266 | ultag_isScriptSubtag(const char* s, int32_t len); |
| 267 | |
| 268 | U_CFUNC UBool |
| 269 | ultag_isTransformedExtensionSubtags(const char* s, int32_t len); |
| 270 | |
| 271 | U_CFUNC UBool |
| 272 | ultag_isUnicodeExtensionSubtags(const char* s, int32_t len); |
| 273 | |
| 274 | U_CFUNC UBool |
| 275 | ultag_isUnicodeLocaleAttribute(const char* s, int32_t len); |
| 276 | |
| 277 | U_CFUNC UBool |
| 278 | ultag_isUnicodeLocaleAttributes(const char* s, int32_t len); |
| 279 | |
| 280 | U_CFUNC UBool |
| 281 | ultag_isUnicodeLocaleKey(const char* s, int32_t len); |
| 282 | |
| 283 | U_CFUNC UBool |
| 284 | ultag_isUnicodeLocaleType(const char* s, int32_t len); |
| 285 | |
| 286 | U_CFUNC UBool |
| 287 | ultag_isVariantSubtags(const char* s, int32_t len); |
| 288 | |
| 289 | U_CAPI const char * U_EXPORT2 |
| 290 | ultag_getTKeyStart(const char *localeID); |
| 291 | |
| 292 | U_CFUNC const char* |
| 293 | ulocimp_toBcpKey(const char* key); |
| 294 | |
| 295 | U_CFUNC const char* |
| 296 | ulocimp_toLegacyKey(const char* key); |
| 297 | |
| 298 | U_CFUNC const char* |
| 299 | ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType); |
| 300 | |
| 301 | U_CFUNC const char* |
| 302 | ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType); |
| 303 | |
| 304 | /* Function for testing purpose */ |
| 305 | U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length); |
| 306 | |
| 307 | // Return true if the value is already canonicalized. |
| 308 | U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName); |
| 309 | |
| 310 | /** |
| 311 | * A utility class for handling locale IDs that may be longer than ULOC_FULLNAME_CAPACITY. |
| 312 | * This encompasses all of the logic to allocate a temporary locale ID buffer on the stack, |
| 313 | * and then, if it's not big enough, reallocate it on the heap and try again. |
| 314 | * |
| 315 | * You use it like this: |
| 316 | * UErrorCode err = U_ZERO_ERROR; |
| 317 | * |
| 318 | * PreflightingLocaleIDBuffer tempBuffer; |
| 319 | * do { |
| 320 | * tempBuffer.requestedCapacity = uloc_doSomething(localeID, tempBuffer.getBuffer(), tempBuffer.getCapacity(), &err); |
| 321 | * } while (tempBuffer.needToTryAgain(&err)); |
| 322 | * if (U_SUCCESS(err)) { |
| 323 | * uloc_doSomethingWithTheResult(tempBuffer.getBuffer()); |
| 324 | * } |
| 325 | */ |
| 326 | class PreflightingLocaleIDBuffer { |
| 327 | private: |
| 328 | char stackBuffer[ULOC_FULLNAME_CAPACITY]; |
| 329 | char* heapBuffer = nullptr; |
| 330 | int32_t capacity = ULOC_FULLNAME_CAPACITY; |
| 331 | |
| 332 | public: |
| 333 | int32_t requestedCapacity = ULOC_FULLNAME_CAPACITY; |
| 334 | |
| 335 | // No heap allocation. Use only on the stack. |
| 336 | static void* U_EXPORT2 operator new(size_t) noexcept = delete; |
| 337 | static void* U_EXPORT2 operator new[](size_t) noexcept = delete; |
| 338 | #if U_HAVE_PLACEMENT_NEW |
| 339 | static void* U_EXPORT2 operator new(size_t, void*) noexcept = delete; |
| 340 | #endif |
| 341 | |
| 342 | PreflightingLocaleIDBuffer() {} |
| 343 | |
| 344 | ~PreflightingLocaleIDBuffer() { uprv_free(heapBuffer); } |
| 345 | |
| 346 | char* getBuffer() { |
| 347 | if (heapBuffer == nullptr) { |
| 348 | return stackBuffer; |
| 349 | } else { |
| 350 | return heapBuffer; |
| 351 | } |
| 352 | } |
| 353 | |
| 354 | int32_t getCapacity() { |
| 355 | return capacity; |
| 356 | } |
| 357 | |
| 358 | bool needToTryAgain(UErrorCode* err) { |
| 359 | if (heapBuffer != nullptr) { |
| 360 | return false; |
| 361 | } |
| 362 | |
| 363 | if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING) { |
| 364 | int32_t newCapacity = requestedCapacity + 2; // one for the terminating null, one just for paranoia |
| 365 | heapBuffer = static_cast<char*>(uprv_malloc(newCapacity)); |
| 366 | if (heapBuffer == nullptr) { |
| 367 | *err = U_MEMORY_ALLOCATION_ERROR; |
| 368 | } else { |
| 369 | *err = U_ZERO_ERROR; |
| 370 | capacity = newCapacity; |
| 371 | } |
| 372 | return U_SUCCESS(*err); |
| 373 | } |
| 374 | return false; |
| 375 | } |
| 376 | }; |
| 377 | |
| 378 | #endif |
| 379 | |