1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (C) 2004-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8*/
9
10#ifndef ULOCIMP_H
11#define ULOCIMP_H
12
13#include "unicode/bytestream.h"
14#include "unicode/uloc.h"
15
16#include "charstr.h"
17
18/**
19 * Create an iterator over the specified keywords list
20 * @param keywordList double-null terminated list. Will be copied.
21 * @param keywordListSize size in bytes of keywordList
22 * @param status err code
23 * @return enumeration (owned by caller) of the keyword list.
24 * @internal ICU 3.0
25 */
26U_CAPI UEnumeration* U_EXPORT2
27uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status);
28
29/**
30 * Look up a resource bundle table item with fallback on the table level.
31 * This is accessible so it can be called by C++ code.
32 */
33U_CAPI const UChar * U_EXPORT2
34uloc_getTableStringWithFallback(
35 const char *path,
36 const char *locale,
37 const char *tableKey,
38 const char *subTableKey,
39 const char *itemKey,
40 int32_t *pLength,
41 UErrorCode *pErrorCode);
42
43/*returns true if a is an ID separator false otherwise*/
44#define _isIDSeparator(a) (a == '_' || a == '-')
45
46U_CFUNC const char*
47uloc_getCurrentCountryID(const char* oldID);
48
49U_CFUNC const char*
50uloc_getCurrentLanguageID(const char* oldID);
51
52U_CFUNC void
53ulocimp_getKeywords(const char *localeID,
54 char prev,
55 icu::ByteSink& sink,
56 UBool valuesToo,
57 UErrorCode *status);
58
59icu::CharString U_EXPORT2
60ulocimp_getLanguage(const char *localeID,
61 const char **pEnd,
62 UErrorCode &status);
63
64icu::CharString U_EXPORT2
65ulocimp_getScript(const char *localeID,
66 const char **pEnd,
67 UErrorCode &status);
68
69icu::CharString U_EXPORT2
70ulocimp_getCountry(const char *localeID,
71 const char **pEnd,
72 UErrorCode &status);
73
74U_CAPI void U_EXPORT2
75ulocimp_getName(const char* localeID,
76 icu::ByteSink& sink,
77 UErrorCode* err);
78
79U_CAPI void U_EXPORT2
80ulocimp_getBaseName(const char* localeID,
81 icu::ByteSink& sink,
82 UErrorCode* err);
83
84U_CAPI void U_EXPORT2
85ulocimp_canonicalize(const char* localeID,
86 icu::ByteSink& sink,
87 UErrorCode* err);
88
89U_CAPI void U_EXPORT2
90ulocimp_getKeywordValue(const char* localeID,
91 const char* keywordName,
92 icu::ByteSink& sink,
93 UErrorCode* status);
94
95/**
96 * Writes a well-formed language tag for this locale ID.
97 *
98 * **Note**: When `strict` is false, any locale fields which do not satisfy the
99 * BCP47 syntax requirement will be omitted from the result. When `strict` is
100 * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale
101 * fields do not satisfy the BCP47 syntax requirement.
102 *
103 * @param localeID the input locale ID
104 * @param sink the output sink receiving the BCP47 language
105 * tag for this Locale.
106 * @param strict boolean value indicating if the function returns
107 * an error for an ill-formed input locale ID.
108 * @param err error information if receiving the language
109 * tag failed.
110 * @return The length of the BCP47 language tag.
111 *
112 * @internal ICU 64
113 */
114U_CAPI void U_EXPORT2
115ulocimp_toLanguageTag(const char* localeID,
116 icu::ByteSink& sink,
117 UBool strict,
118 UErrorCode* err);
119
120/**
121 * Returns a locale ID for the specified BCP47 language tag string.
122 * If the specified language tag contains any ill-formed subtags,
123 * the first such subtag and all following subtags are ignored.
124 * <p>
125 * This implements the 'Language-Tag' production of BCP 47, and so
126 * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
127 * (regular and irregular) as well as private use language tags.
128 *
129 * Private use tags are represented as 'x-whatever',
130 * and legacy tags are converted to their canonical replacements where they exist.
131 *
132 * Note that a few legacy tags have no modern replacement;
133 * these will be converted using the fallback described in
134 * the first paragraph, so some information might be lost.
135 *
136 * @param langtag the input BCP47 language tag.
137 * @param tagLen the length of langtag, or -1 to call uprv_strlen().
138 * @param sink the output sink receiving a locale ID for the
139 * specified BCP47 language tag.
140 * @param parsedLength if not NULL, successfully parsed length
141 * for the input language tag is set.
142 * @param err error information if receiving the locald ID
143 * failed.
144 * @internal ICU 63
145 */
146U_CAPI void U_EXPORT2
147ulocimp_forLanguageTag(const char* langtag,
148 int32_t tagLen,
149 icu::ByteSink& sink,
150 int32_t* parsedLength,
151 UErrorCode* err);
152
153/**
154 * Get the region to use for supplemental data lookup. Uses
155 * (1) any region specified by locale tag "rg"; if none then
156 * (2) any unicode_region_tag in the locale ID; if none then
157 * (3) if inferRegion is true, the region suggested by
158 * getLikelySubtags on the localeID.
159 * If no region is found, returns length 0.
160 *
161 * @param localeID
162 * The complete locale ID (with keywords) from which
163 * to get the region to use for supplemental data.
164 * @param inferRegion
165 * If true, will try to infer region from localeID if
166 * no other region is found.
167 * @param region
168 * Buffer in which to put the region ID found; should
169 * have a capacity at least ULOC_COUNTRY_CAPACITY.
170 * @param regionCapacity
171 * The actual capacity of the region buffer.
172 * @param status
173 * Pointer to in/out UErrorCode value for latest status.
174 * @return
175 * The length of any region code found, or 0 if none.
176 * @internal ICU 57
177 */
178U_CAPI int32_t U_EXPORT2
179ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
180 char *region, int32_t regionCapacity, UErrorCode* status);
181
182/**
183 * Add the likely subtags for a provided locale ID, per the algorithm described
184 * in the following CLDR technical report:
185 *
186 * http://www.unicode.org/reports/tr35/#Likely_Subtags
187 *
188 * If localeID is already in the maximal form, or there is no data available
189 * for maximization, it will be copied to the output buffer. For example,
190 * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
191 *
192 * Examples:
193 *
194 * "en" maximizes to "en_Latn_US"
195 *
196 * "de" maximizes to "de_Latn_US"
197 *
198 * "sr" maximizes to "sr_Cyrl_RS"
199 *
200 * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
201 *
202 * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
203 *
204 * @param localeID The locale to maximize
205 * @param sink The output sink receiving the maximized locale
206 * @param err Error information if maximizing the locale failed. If the length
207 * of the localeID and the null-terminator is greater than the maximum allowed size,
208 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
209 * @internal ICU 64
210 */
211U_CAPI void U_EXPORT2
212ulocimp_addLikelySubtags(const char* localeID,
213 icu::ByteSink& sink,
214 UErrorCode* err);
215
216/**
217 * Minimize the subtags for a provided locale ID, per the algorithm described
218 * in the following CLDR technical report:
219 *
220 * http://www.unicode.org/reports/tr35/#Likely_Subtags
221 *
222 * If localeID is already in the minimal form, or there is no data available
223 * for minimization, it will be copied to the output buffer. Since the
224 * minimization algorithm relies on proper maximization, see the comments
225 * for ulocimp_addLikelySubtags for reasons why there might not be any data.
226 *
227 * Examples:
228 *
229 * "en_Latn_US" minimizes to "en"
230 *
231 * "de_Latn_US" minimizes to "de"
232 *
233 * "sr_Cyrl_RS" minimizes to "sr"
234 *
235 * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
236 * script, and minimizing to "zh" would imply "zh_Hans_CN".)
237 *
238 * @param localeID The locale to minimize
239 * @param sink The output sink receiving the maximized locale
240 * @param err Error information if minimizing the locale failed. If the length
241 * of the localeID and the null-terminator is greater than the maximum allowed size,
242 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
243 * @internal ICU 64
244 */
245U_CAPI void U_EXPORT2
246ulocimp_minimizeSubtags(const char* localeID,
247 icu::ByteSink& sink,
248 UErrorCode* err);
249
250U_CAPI const char * U_EXPORT2
251locale_getKeywordsStart(const char *localeID);
252
253U_CFUNC UBool
254ultag_isExtensionSubtags(const char* s, int32_t len);
255
256U_CFUNC UBool
257ultag_isLanguageSubtag(const char* s, int32_t len);
258
259U_CFUNC UBool
260ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
261
262U_CFUNC UBool
263ultag_isRegionSubtag(const char* s, int32_t len);
264
265U_CFUNC UBool
266ultag_isScriptSubtag(const char* s, int32_t len);
267
268U_CFUNC UBool
269ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
270
271U_CFUNC UBool
272ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
273
274U_CFUNC UBool
275ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
276
277U_CFUNC UBool
278ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
279
280U_CFUNC UBool
281ultag_isUnicodeLocaleKey(const char* s, int32_t len);
282
283U_CFUNC UBool
284ultag_isUnicodeLocaleType(const char* s, int32_t len);
285
286U_CFUNC UBool
287ultag_isVariantSubtags(const char* s, int32_t len);
288
289U_CAPI const char * U_EXPORT2
290ultag_getTKeyStart(const char *localeID);
291
292U_CFUNC const char*
293ulocimp_toBcpKey(const char* key);
294
295U_CFUNC const char*
296ulocimp_toLegacyKey(const char* key);
297
298U_CFUNC const char*
299ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
300
301U_CFUNC const char*
302ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
303
304/* Function for testing purpose */
305U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length);
306
307// Return true if the value is already canonicalized.
308U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
309
310/**
311 * A utility class for handling locale IDs that may be longer than ULOC_FULLNAME_CAPACITY.
312 * This encompasses all of the logic to allocate a temporary locale ID buffer on the stack,
313 * and then, if it's not big enough, reallocate it on the heap and try again.
314 *
315 * You use it like this:
316 * UErrorCode err = U_ZERO_ERROR;
317 *
318 * PreflightingLocaleIDBuffer tempBuffer;
319 * do {
320 * tempBuffer.requestedCapacity = uloc_doSomething(localeID, tempBuffer.getBuffer(), tempBuffer.getCapacity(), &err);
321 * } while (tempBuffer.needToTryAgain(&err));
322 * if (U_SUCCESS(err)) {
323 * uloc_doSomethingWithTheResult(tempBuffer.getBuffer());
324 * }
325 */
326class PreflightingLocaleIDBuffer {
327private:
328 char stackBuffer[ULOC_FULLNAME_CAPACITY];
329 char* heapBuffer = nullptr;
330 int32_t capacity = ULOC_FULLNAME_CAPACITY;
331
332public:
333 int32_t requestedCapacity = ULOC_FULLNAME_CAPACITY;
334
335 // No heap allocation. Use only on the stack.
336 static void* U_EXPORT2 operator new(size_t) noexcept = delete;
337 static void* U_EXPORT2 operator new[](size_t) noexcept = delete;
338#if U_HAVE_PLACEMENT_NEW
339 static void* U_EXPORT2 operator new(size_t, void*) noexcept = delete;
340#endif
341
342 PreflightingLocaleIDBuffer() {}
343
344 ~PreflightingLocaleIDBuffer() { uprv_free(heapBuffer); }
345
346 char* getBuffer() {
347 if (heapBuffer == nullptr) {
348 return stackBuffer;
349 } else {
350 return heapBuffer;
351 }
352 }
353
354 int32_t getCapacity() {
355 return capacity;
356 }
357
358 bool needToTryAgain(UErrorCode* err) {
359 if (heapBuffer != nullptr) {
360 return false;
361 }
362
363 if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING) {
364 int32_t newCapacity = requestedCapacity + 2; // one for the terminating null, one just for paranoia
365 heapBuffer = static_cast<char*>(uprv_malloc(newCapacity));
366 if (heapBuffer == nullptr) {
367 *err = U_MEMORY_ALLOCATION_ERROR;
368 } else {
369 *err = U_ZERO_ERROR;
370 capacity = newCapacity;
371 }
372 return U_SUCCESS(*err);
373 }
374 return false;
375 }
376};
377
378#endif
379