1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4//
5
6#include <assert.h>
7#include <stdint.h>
8#include <string.h>
9#include <stdlib.h>
10#include <locale.h>
11
12#include "icushim.h"
13#include "locale.hpp"
14
15int32_t UErrorCodeToBool(UErrorCode status)
16{
17 if (U_SUCCESS(status))
18 {
19 return 1;
20 }
21
22 // assert errors that should never occur
23 assert(status != U_BUFFER_OVERFLOW_ERROR);
24 assert(status != U_INTERNAL_PROGRAM_ERROR);
25
26 // add possible SetLastError support here
27
28 return 0;
29}
30
31int32_t GetLocale(
32 const UChar* localeName, char* localeNameResult, int32_t localeNameResultLength, bool canonicalize, UErrorCode* err)
33{
34 char localeNameTemp[ULOC_FULLNAME_CAPACITY] = {0};
35 int32_t localeLength;
36
37 // Convert ourselves instead of doing u_UCharsToChars as that function considers '@' a variant and stops.
38 for (int i = 0; i < ULOC_FULLNAME_CAPACITY - 1; i++)
39 {
40 UChar c = localeName[i];
41
42 if (c > (UChar)0x7F)
43 {
44 *err = U_ILLEGAL_ARGUMENT_ERROR;
45 return ULOC_FULLNAME_CAPACITY;
46 }
47
48 localeNameTemp[i] = (char)c;
49
50 if (c == (UChar)0x0)
51 {
52 break;
53 }
54 }
55
56 if (canonicalize)
57 {
58 localeLength = uloc_canonicalize(localeNameTemp, localeNameResult, localeNameResultLength, err);
59 }
60 else
61 {
62 localeLength = uloc_getName(localeNameTemp, localeNameResult, localeNameResultLength, err);
63 }
64
65 if (U_SUCCESS(*err))
66 {
67 // Make sure the "language" part of the locale is reasonable (i.e. we can fetch it and it is within range).
68 // This mimics how the C++ ICU API determines if a locale is "bogus" or not.
69
70 char language[ULOC_LANG_CAPACITY];
71 uloc_getLanguage(localeNameTemp, language, ULOC_LANG_CAPACITY, err);
72
73 if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING)
74 {
75 // ULOC_LANG_CAPACITY includes the null terminator, so if we couldn't extract the language with the null
76 // terminator, the language must be invalid.
77
78 *err = U_ILLEGAL_ARGUMENT_ERROR;
79 }
80 }
81
82 return localeLength;
83}
84
85UErrorCode u_charsToUChars_safe(const char* str, UChar* value, int32_t valueLength)
86{
87 int len = strlen(str);
88
89 if (len >= valueLength)
90 {
91 return U_BUFFER_OVERFLOW_ERROR;
92 }
93
94 u_charsToUChars(str, value, len + 1);
95 return U_ZERO_ERROR;
96}
97
98int32_t FixupLocaleName(UChar* value, int32_t valueLength)
99{
100 int32_t i = 0;
101 for (; i < valueLength; i++)
102 {
103 if (value[i] == (UChar)'\0')
104 {
105 break;
106 }
107 else if (value[i] == (UChar)'_')
108 {
109 value[i] = (UChar)'-';
110 }
111 }
112
113 return i;
114}
115
116bool IsEnvVarSet(const char* name)
117{
118 const char* value = getenv(name);
119
120 return (value != nullptr) && (strcmp("", value) != 0);
121}
122
123// The behavior of uloc_getDefault() on POSIX systems is to query
124// setlocale(LC_MESSAGES) and use that value, unless it is C or
125// POSIX. In that case it tries to read LC_ALL, LC_MESSAGES and LANG
126// and then falls back to en_US_POSIX if none of them are set.
127//
128// en_US_POSIX is a weird locale since the collation rules treat 'a'
129// and 'A' as different letters even when ignoring case. Furthermore
130// it's common for LC_ALL, LC_MESSAGES and LANG to be unset when
131// running under Docker.
132//
133// We'd rather default to invariant in this case. If any of these
134// are set, we'll just call into ICU and let it do whatever
135// normalization it would do.
136const char* DetectDefaultLocaleName()
137{
138 char* loc = setlocale(LC_MESSAGES, nullptr);
139
140 if (loc != nullptr && (strcmp("C", loc) == 0 || strcmp("POSIX", loc) == 0))
141 {
142 if (!IsEnvVarSet("LC_ALL") && !IsEnvVarSet("LC_MESSAGES") && !IsEnvVarSet("LANG"))
143 {
144 return "";
145 }
146 }
147
148 return uloc_getDefault();
149}
150
151// GlobalizationNative_GetLocales gets all locale names and store it in the value buffer
152// in case of success, it returns the count of the characters stored in value buffer
153// in case of failure, it returns negative number.
154// if the input value buffer is null, it returns the length needed to store the
155// locale names list.
156// if the value is not null, it fills the value with locale names separated by the length
157// of each name.
158extern "C" int32_t GlobalizationNative_GetLocales(UChar *value, int32_t valueLength)
159{
160 int32_t totalLength = 0;
161 int32_t index = 0;
162 int32_t localeCount = uloc_countAvailable();
163
164 if (localeCount <= 0)
165 return -1; // failed
166
167 for (int32_t i = 0; i < localeCount; i++)
168 {
169 const char *pLocaleName = uloc_getAvailable(i);
170 if (pLocaleName[0] == 0) // unexpected empty name
171 return -2;
172
173 int32_t localeNameLength = strlen(pLocaleName);
174
175 totalLength += localeNameLength + 1; // add 1 for the name length
176
177 if (value != nullptr)
178 {
179 if (totalLength > valueLength)
180 return -3;
181
182 value[index++] = (UChar) localeNameLength;
183
184 for (int j=0; j<localeNameLength; j++)
185 {
186 if (pLocaleName[j] == '_') // fix the locale name
187 {
188 value[index++] = (UChar) '-';
189 }
190 else
191 {
192 value[index++] = (UChar) pLocaleName[j];
193 }
194 }
195 }
196 }
197
198 return totalLength;
199}
200
201extern "C" int32_t GlobalizationNative_GetLocaleName(const UChar* localeName, UChar* value, int32_t valueLength)
202{
203 UErrorCode status = U_ZERO_ERROR;
204
205 char localeNameBuffer[ULOC_FULLNAME_CAPACITY];
206 GetLocale(localeName, localeNameBuffer, ULOC_FULLNAME_CAPACITY, true, &status);
207
208 if (U_SUCCESS(status))
209 {
210 status = u_charsToUChars_safe(localeNameBuffer, value, valueLength);
211
212 if (U_SUCCESS(status))
213 {
214 FixupLocaleName(value, valueLength);
215 }
216 }
217
218 return UErrorCodeToBool(status);
219}
220
221extern "C" int32_t GlobalizationNative_GetDefaultLocaleName(UChar* value, int32_t valueLength)
222{
223 char localeNameBuffer[ULOC_FULLNAME_CAPACITY];
224 UErrorCode status = U_ZERO_ERROR;
225
226 const char* defaultLocale = DetectDefaultLocaleName();
227
228 uloc_getBaseName(defaultLocale, localeNameBuffer, ULOC_FULLNAME_CAPACITY, &status);
229
230 if (U_SUCCESS(status))
231 {
232 status = u_charsToUChars_safe(localeNameBuffer, value, valueLength);
233
234 if (U_SUCCESS(status))
235 {
236 int localeNameLen = FixupLocaleName(value, valueLength);
237
238 char collationValueTemp[ULOC_KEYWORDS_CAPACITY];
239 int32_t collationLen =
240 uloc_getKeywordValue(defaultLocale, "collation", collationValueTemp, ULOC_KEYWORDS_CAPACITY, &status);
241
242 if (U_SUCCESS(status) && collationLen > 0)
243 {
244 // copy the collation; managed uses a "_" to represent collation (not
245 // "@collation=")
246 status = u_charsToUChars_safe("_", &value[localeNameLen], valueLength - localeNameLen);
247 if (U_SUCCESS(status))
248 {
249 status = u_charsToUChars_safe(
250 collationValueTemp, &value[localeNameLen + 1], valueLength - localeNameLen - 1);
251 }
252 }
253 }
254 }
255
256 return UErrorCodeToBool(status);
257}
258