1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ********************************************************************** |
5 | * Copyright (C) 2004-2016, International Business Machines |
6 | * Corporation and others. All Rights Reserved. |
7 | ********************************************************************** |
8 | */ |
9 | |
10 | #ifndef ULOCIMP_H |
11 | #define ULOCIMP_H |
12 | |
13 | #include "unicode/bytestream.h" |
14 | #include "unicode/uloc.h" |
15 | |
16 | #include "charstr.h" |
17 | |
18 | /** |
19 | * Create an iterator over the specified keywords list |
20 | * @param keywordList double-null terminated list. Will be copied. |
21 | * @param keywordListSize size in bytes of keywordList |
22 | * @param status err code |
23 | * @return enumeration (owned by caller) of the keyword list. |
24 | * @internal ICU 3.0 |
25 | */ |
26 | U_CAPI UEnumeration* U_EXPORT2 |
27 | uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status); |
28 | |
29 | /** |
30 | * Look up a resource bundle table item with fallback on the table level. |
31 | * This is accessible so it can be called by C++ code. |
32 | */ |
33 | U_CAPI const UChar * U_EXPORT2 |
34 | uloc_getTableStringWithFallback( |
35 | const char *path, |
36 | const char *locale, |
37 | const char *tableKey, |
38 | const char *subTableKey, |
39 | const char *itemKey, |
40 | int32_t *pLength, |
41 | UErrorCode *pErrorCode); |
42 | |
43 | /*returns true if a is an ID separator false otherwise*/ |
44 | #define _isIDSeparator(a) (a == '_' || a == '-') |
45 | |
46 | U_CFUNC const char* |
47 | uloc_getCurrentCountryID(const char* oldID); |
48 | |
49 | U_CFUNC const char* |
50 | uloc_getCurrentLanguageID(const char* oldID); |
51 | |
52 | U_CFUNC void |
53 | ulocimp_getKeywords(const char *localeID, |
54 | char prev, |
55 | icu::ByteSink& sink, |
56 | UBool valuesToo, |
57 | UErrorCode *status); |
58 | |
59 | icu::CharString U_EXPORT2 |
60 | ulocimp_getLanguage(const char *localeID, |
61 | const char **pEnd, |
62 | UErrorCode &status); |
63 | |
64 | icu::CharString U_EXPORT2 |
65 | ulocimp_getScript(const char *localeID, |
66 | const char **pEnd, |
67 | UErrorCode &status); |
68 | |
69 | icu::CharString U_EXPORT2 |
70 | ulocimp_getCountry(const char *localeID, |
71 | const char **pEnd, |
72 | UErrorCode &status); |
73 | |
74 | U_CAPI void U_EXPORT2 |
75 | ulocimp_getName(const char* localeID, |
76 | icu::ByteSink& sink, |
77 | UErrorCode* err); |
78 | |
79 | U_CAPI void U_EXPORT2 |
80 | ulocimp_getBaseName(const char* localeID, |
81 | icu::ByteSink& sink, |
82 | UErrorCode* err); |
83 | |
84 | U_CAPI void U_EXPORT2 |
85 | ulocimp_canonicalize(const char* localeID, |
86 | icu::ByteSink& sink, |
87 | UErrorCode* err); |
88 | |
89 | U_CAPI void U_EXPORT2 |
90 | ulocimp_getKeywordValue(const char* localeID, |
91 | const char* keywordName, |
92 | icu::ByteSink& sink, |
93 | UErrorCode* status); |
94 | |
95 | /** |
96 | * Writes a well-formed language tag for this locale ID. |
97 | * |
98 | * **Note**: When `strict` is false, any locale fields which do not satisfy the |
99 | * BCP47 syntax requirement will be omitted from the result. When `strict` is |
100 | * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale |
101 | * fields do not satisfy the BCP47 syntax requirement. |
102 | * |
103 | * @param localeID the input locale ID |
104 | * @param sink the output sink receiving the BCP47 language |
105 | * tag for this Locale. |
106 | * @param strict boolean value indicating if the function returns |
107 | * an error for an ill-formed input locale ID. |
108 | * @param err error information if receiving the language |
109 | * tag failed. |
110 | * @return The length of the BCP47 language tag. |
111 | * |
112 | * @internal ICU 64 |
113 | */ |
114 | U_CAPI void U_EXPORT2 |
115 | ulocimp_toLanguageTag(const char* localeID, |
116 | icu::ByteSink& sink, |
117 | UBool strict, |
118 | UErrorCode* err); |
119 | |
120 | /** |
121 | * Returns a locale ID for the specified BCP47 language tag string. |
122 | * If the specified language tag contains any ill-formed subtags, |
123 | * the first such subtag and all following subtags are ignored. |
124 | * <p> |
125 | * This implements the 'Language-Tag' production of BCP 47, and so |
126 | * supports legacy language tags (marked as “Type: grandfathered” in BCP 47) |
127 | * (regular and irregular) as well as private use language tags. |
128 | * |
129 | * Private use tags are represented as 'x-whatever', |
130 | * and legacy tags are converted to their canonical replacements where they exist. |
131 | * |
132 | * Note that a few legacy tags have no modern replacement; |
133 | * these will be converted using the fallback described in |
134 | * the first paragraph, so some information might be lost. |
135 | * |
136 | * @param langtag the input BCP47 language tag. |
137 | * @param tagLen the length of langtag, or -1 to call uprv_strlen(). |
138 | * @param sink the output sink receiving a locale ID for the |
139 | * specified BCP47 language tag. |
140 | * @param parsedLength if not NULL, successfully parsed length |
141 | * for the input language tag is set. |
142 | * @param err error information if receiving the locald ID |
143 | * failed. |
144 | * @internal ICU 63 |
145 | */ |
146 | U_CAPI void U_EXPORT2 |
147 | ulocimp_forLanguageTag(const char* langtag, |
148 | int32_t tagLen, |
149 | icu::ByteSink& sink, |
150 | int32_t* parsedLength, |
151 | UErrorCode* err); |
152 | |
153 | /** |
154 | * Get the region to use for supplemental data lookup. Uses |
155 | * (1) any region specified by locale tag "rg"; if none then |
156 | * (2) any unicode_region_tag in the locale ID; if none then |
157 | * (3) if inferRegion is true, the region suggested by |
158 | * getLikelySubtags on the localeID. |
159 | * If no region is found, returns length 0. |
160 | * |
161 | * @param localeID |
162 | * The complete locale ID (with keywords) from which |
163 | * to get the region to use for supplemental data. |
164 | * @param inferRegion |
165 | * If true, will try to infer region from localeID if |
166 | * no other region is found. |
167 | * @param region |
168 | * Buffer in which to put the region ID found; should |
169 | * have a capacity at least ULOC_COUNTRY_CAPACITY. |
170 | * @param regionCapacity |
171 | * The actual capacity of the region buffer. |
172 | * @param status |
173 | * Pointer to in/out UErrorCode value for latest status. |
174 | * @return |
175 | * The length of any region code found, or 0 if none. |
176 | * @internal ICU 57 |
177 | */ |
178 | U_CAPI int32_t U_EXPORT2 |
179 | ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, |
180 | char *region, int32_t regionCapacity, UErrorCode* status); |
181 | |
182 | /** |
183 | * Add the likely subtags for a provided locale ID, per the algorithm described |
184 | * in the following CLDR technical report: |
185 | * |
186 | * http://www.unicode.org/reports/tr35/#Likely_Subtags |
187 | * |
188 | * If localeID is already in the maximal form, or there is no data available |
189 | * for maximization, it will be copied to the output buffer. For example, |
190 | * "und-Zzzz" cannot be maximized, since there is no reasonable maximization. |
191 | * |
192 | * Examples: |
193 | * |
194 | * "en" maximizes to "en_Latn_US" |
195 | * |
196 | * "de" maximizes to "de_Latn_US" |
197 | * |
198 | * "sr" maximizes to "sr_Cyrl_RS" |
199 | * |
200 | * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.) |
201 | * |
202 | * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.) |
203 | * |
204 | * @param localeID The locale to maximize |
205 | * @param sink The output sink receiving the maximized locale |
206 | * @param err Error information if maximizing the locale failed. If the length |
207 | * of the localeID and the null-terminator is greater than the maximum allowed size, |
208 | * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. |
209 | * @internal ICU 64 |
210 | */ |
211 | U_CAPI void U_EXPORT2 |
212 | ulocimp_addLikelySubtags(const char* localeID, |
213 | icu::ByteSink& sink, |
214 | UErrorCode* err); |
215 | |
216 | /** |
217 | * Minimize the subtags for a provided locale ID, per the algorithm described |
218 | * in the following CLDR technical report: |
219 | * |
220 | * http://www.unicode.org/reports/tr35/#Likely_Subtags |
221 | * |
222 | * If localeID is already in the minimal form, or there is no data available |
223 | * for minimization, it will be copied to the output buffer. Since the |
224 | * minimization algorithm relies on proper maximization, see the comments |
225 | * for ulocimp_addLikelySubtags for reasons why there might not be any data. |
226 | * |
227 | * Examples: |
228 | * |
229 | * "en_Latn_US" minimizes to "en" |
230 | * |
231 | * "de_Latn_US" minimizes to "de" |
232 | * |
233 | * "sr_Cyrl_RS" minimizes to "sr" |
234 | * |
235 | * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the |
236 | * script, and minimizing to "zh" would imply "zh_Hans_CN".) |
237 | * |
238 | * @param localeID The locale to minimize |
239 | * @param sink The output sink receiving the maximized locale |
240 | * @param err Error information if minimizing the locale failed. If the length |
241 | * of the localeID and the null-terminator is greater than the maximum allowed size, |
242 | * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. |
243 | * @internal ICU 64 |
244 | */ |
245 | U_CAPI void U_EXPORT2 |
246 | ulocimp_minimizeSubtags(const char* localeID, |
247 | icu::ByteSink& sink, |
248 | UErrorCode* err); |
249 | |
250 | U_CAPI const char * U_EXPORT2 |
251 | locale_getKeywordsStart(const char *localeID); |
252 | |
253 | U_CFUNC UBool |
254 | ultag_isExtensionSubtags(const char* s, int32_t len); |
255 | |
256 | U_CFUNC UBool |
257 | ultag_isLanguageSubtag(const char* s, int32_t len); |
258 | |
259 | U_CFUNC UBool |
260 | ultag_isPrivateuseValueSubtags(const char* s, int32_t len); |
261 | |
262 | U_CFUNC UBool |
263 | ultag_isRegionSubtag(const char* s, int32_t len); |
264 | |
265 | U_CFUNC UBool |
266 | ultag_isScriptSubtag(const char* s, int32_t len); |
267 | |
268 | U_CFUNC UBool |
269 | ultag_isTransformedExtensionSubtags(const char* s, int32_t len); |
270 | |
271 | U_CFUNC UBool |
272 | ultag_isUnicodeExtensionSubtags(const char* s, int32_t len); |
273 | |
274 | U_CFUNC UBool |
275 | ultag_isUnicodeLocaleAttribute(const char* s, int32_t len); |
276 | |
277 | U_CFUNC UBool |
278 | ultag_isUnicodeLocaleAttributes(const char* s, int32_t len); |
279 | |
280 | U_CFUNC UBool |
281 | ultag_isUnicodeLocaleKey(const char* s, int32_t len); |
282 | |
283 | U_CFUNC UBool |
284 | ultag_isUnicodeLocaleType(const char* s, int32_t len); |
285 | |
286 | U_CFUNC UBool |
287 | ultag_isVariantSubtags(const char* s, int32_t len); |
288 | |
289 | U_CAPI const char * U_EXPORT2 |
290 | ultag_getTKeyStart(const char *localeID); |
291 | |
292 | U_CFUNC const char* |
293 | ulocimp_toBcpKey(const char* key); |
294 | |
295 | U_CFUNC const char* |
296 | ulocimp_toLegacyKey(const char* key); |
297 | |
298 | U_CFUNC const char* |
299 | ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType); |
300 | |
301 | U_CFUNC const char* |
302 | ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType); |
303 | |
304 | /* Function for testing purpose */ |
305 | U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length); |
306 | |
307 | // Return true if the value is already canonicalized. |
308 | U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName); |
309 | |
310 | /** |
311 | * A utility class for handling locale IDs that may be longer than ULOC_FULLNAME_CAPACITY. |
312 | * This encompasses all of the logic to allocate a temporary locale ID buffer on the stack, |
313 | * and then, if it's not big enough, reallocate it on the heap and try again. |
314 | * |
315 | * You use it like this: |
316 | * UErrorCode err = U_ZERO_ERROR; |
317 | * |
318 | * PreflightingLocaleIDBuffer tempBuffer; |
319 | * do { |
320 | * tempBuffer.requestedCapacity = uloc_doSomething(localeID, tempBuffer.getBuffer(), tempBuffer.getCapacity(), &err); |
321 | * } while (tempBuffer.needToTryAgain(&err)); |
322 | * if (U_SUCCESS(err)) { |
323 | * uloc_doSomethingWithTheResult(tempBuffer.getBuffer()); |
324 | * } |
325 | */ |
326 | class PreflightingLocaleIDBuffer { |
327 | private: |
328 | char stackBuffer[ULOC_FULLNAME_CAPACITY]; |
329 | char* heapBuffer = nullptr; |
330 | int32_t capacity = ULOC_FULLNAME_CAPACITY; |
331 | |
332 | public: |
333 | int32_t requestedCapacity = ULOC_FULLNAME_CAPACITY; |
334 | |
335 | // No heap allocation. Use only on the stack. |
336 | static void* U_EXPORT2 operator new(size_t) noexcept = delete; |
337 | static void* U_EXPORT2 operator new[](size_t) noexcept = delete; |
338 | #if U_HAVE_PLACEMENT_NEW |
339 | static void* U_EXPORT2 operator new(size_t, void*) noexcept = delete; |
340 | #endif |
341 | |
342 | PreflightingLocaleIDBuffer() {} |
343 | |
344 | ~PreflightingLocaleIDBuffer() { uprv_free(heapBuffer); } |
345 | |
346 | char* getBuffer() { |
347 | if (heapBuffer == nullptr) { |
348 | return stackBuffer; |
349 | } else { |
350 | return heapBuffer; |
351 | } |
352 | } |
353 | |
354 | int32_t getCapacity() { |
355 | return capacity; |
356 | } |
357 | |
358 | bool needToTryAgain(UErrorCode* err) { |
359 | if (heapBuffer != nullptr) { |
360 | return false; |
361 | } |
362 | |
363 | if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING) { |
364 | int32_t newCapacity = requestedCapacity + 2; // one for the terminating null, one just for paranoia |
365 | heapBuffer = static_cast<char*>(uprv_malloc(newCapacity)); |
366 | if (heapBuffer == nullptr) { |
367 | *err = U_MEMORY_ALLOCATION_ERROR; |
368 | } else { |
369 | *err = U_ZERO_ERROR; |
370 | capacity = newCapacity; |
371 | } |
372 | return U_SUCCESS(*err); |
373 | } |
374 | return false; |
375 | } |
376 | }; |
377 | |
378 | #endif |
379 | |