1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4//
5
6#include <assert.h>
7#include <pthread.h>
8#include <stdint.h>
9#include <vector>
10#include <map>
11
12#include "icushim.h"
13#include "locale.hpp"
14#include "errors.h"
15
16const int32_t CompareOptionsIgnoreCase = 0x1;
17const int32_t CompareOptionsIgnoreNonSpace = 0x2;
18const int32_t CompareOptionsIgnoreSymbols = 0x4;
19const int32_t CompareOptionsIgnoreKanaType = 0x8;
20const int32_t CompareOptionsIgnoreWidth = 0x10;
21// const int32_t CompareOptionsStringSort = 0x20000000;
22// ICU's default is to use "StringSort", i.e. nonalphanumeric symbols come before alphanumeric.
23// When StringSort is not specified (.NET's default), the sort order will be different between
24// Windows and Unix platforms. The nonalphanumeric symbols will come after alphanumeric
25// characters on Windows, but before on Unix.
26// Since locale - specific string sort order can change from one version of Windows to the next,
27// there is no reason to guarantee string sort order between Windows and ICU. Thus trying to
28// change ICU's default behavior here isn't really justified unless someone has a strong reason
29// for !StringSort to behave differently.
30
31typedef std::map<int32_t, UCollator*> TCollatorMap;
32typedef std::pair<int32_t, UCollator*> TCollatorMapPair;
33
34/*
35 * For increased performance, we cache the UCollator objects for a locale and
36 * share them across threads. This is safe (and supported in ICU) if we ensure
37 * multiple threads are only ever dealing with const UCollators.
38 */
39typedef struct _sort_handle
40{
41 UCollator* regular;
42 TCollatorMap collatorsPerOption;
43 pthread_mutex_t collatorsLockObject;
44
45 _sort_handle() : regular(nullptr)
46 {
47 int result = pthread_mutex_init(&collatorsLockObject, NULL);
48 if (result != 0)
49 {
50 assert(false && "Unexpected pthread_mutex_init return value.");
51 }
52 }
53
54} SortHandle;
55
56// Hiragana character range
57const UChar hiraganaStart = 0x3041;
58const UChar hiraganaEnd = 0x309e;
59const UChar hiraganaToKatakanaOffset = 0x30a1 - 0x3041;
60
61// Mapping between half- and fullwidth characters.
62// LowerChars are the characters that should sort lower than HigherChars
63const UChar g_HalfFullLowerChars[] = {
64 // halfwidth characters
65 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
66 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e,
67 0x003f, 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d,
68 0x004e, 0x004f, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005d,
69 0x005e, 0x005f, 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c,
70 0x006d, 0x006e, 0x006f, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b,
71 0x007c, 0x007d, 0x007e, 0x00a2, 0x00a3, 0x00ac, 0x00af, 0x00a6, 0x00a5, 0x20a9,
72
73 // fullwidth characters
74 0x3002, 0x300c, 0x300d, 0x3001, 0x30fb, 0x30f2, 0x30a1, 0x30a3, 0x30a5, 0x30a7, 0x30a9, 0x30e3, 0x30e5, 0x30e7, 0x30c3,
75 0x30a2, 0x30a4, 0x30a6, 0x30a8, 0x30aa, 0x30ab, 0x30ad, 0x30af, 0x30b1, 0x30b3, 0x30b5, 0x30b7, 0x30b9, 0x30bb, 0x30bd,
76 0x30bf, 0x30c1, 0x30c4, 0x30c6, 0x30c8, 0x30ca, 0x30cb, 0x30cc, 0x30cd, 0x30ce, 0x30cf, 0x30d2, 0x30d5, 0x30d8, 0x30db,
77 0x30de, 0x30df, 0x30e0, 0x30e1, 0x30e2, 0x30e4, 0x30e6, 0x30e8, 0x30e9, 0x30ea, 0x30eb, 0x30ec, 0x30ed, 0x30ef, 0x30f3,
78 0x3164, 0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136, 0x3137, 0x3138, 0x3139, 0x313a, 0x313b, 0x313c, 0x313d, 0x313e,
79 0x313f, 0x3140, 0x3141, 0x3142, 0x3143, 0x3144, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b, 0x314c, 0x314d,
80 0x314e, 0x314f, 0x3150, 0x3151, 0x3152, 0x3153, 0x3154, 0x3155, 0x3156, 0x3157, 0x3158, 0x3159, 0x315a, 0x315b, 0x315c,
81 0x315d, 0x315e, 0x315f, 0x3160, 0x3161, 0x3162, 0x3163
82
83};
84const UChar g_HalfFullHigherChars[] = {
85 // fullwidth characters
86 0xff01, 0xff02, 0xff03, 0xff04, 0xff05, 0xff06, 0xff07, 0xff08, 0xff09, 0xff0a, 0xff0b, 0xff0c, 0xff0d, 0xff0e, 0xff0f,
87 0xff10, 0xff11, 0xff12, 0xff13, 0xff14, 0xff15, 0xff16, 0xff17, 0xff18, 0xff19, 0xff1a, 0xff1b, 0xff1c, 0xff1d, 0xff1e,
88 0xff1f, 0xff20, 0xff21, 0xff22, 0xff23, 0xff24, 0xff25, 0xff26, 0xff27, 0xff28, 0xff29, 0xff2a, 0xff2b, 0xff2c, 0xff2d,
89 0xff2e, 0xff2f, 0xff30, 0xff31, 0xff32, 0xff33, 0xff34, 0xff35, 0xff36, 0xff37, 0xff38, 0xff39, 0xff3a, 0xff3b, 0xff3d,
90 0xff3e, 0xff3f, 0xff40, 0xff41, 0xff42, 0xff43, 0xff44, 0xff45, 0xff46, 0xff47, 0xff48, 0xff49, 0xff4a, 0xff4b, 0xff4c,
91 0xff4d, 0xff4e, 0xff4f, 0xff50, 0xff51, 0xff52, 0xff53, 0xff54, 0xff55, 0xff56, 0xff57, 0xff58, 0xff59, 0xff5a, 0xff5b,
92 0xff5c, 0xff5d, 0xff5e, 0xffe0, 0xffe1, 0xffe2, 0xffe3, 0xffe4, 0xffe5, 0xffe6,
93
94 // halfwidth characters
95 0xff61, 0xff62, 0xff63, 0xff64, 0xff65, 0xff66, 0xff67, 0xff68, 0xff69, 0xff6a, 0xff6b, 0xff6c, 0xff6d, 0xff6e, 0xff6f,
96 0xff71, 0xff72, 0xff73, 0xff74, 0xff75, 0xff76, 0xff77, 0xff78, 0xff79, 0xff7a, 0xff7b, 0xff7c, 0xff7d, 0xff7e, 0xff7f,
97 0xff80, 0xff81, 0xff82, 0xff83, 0xff84, 0xff85, 0xff86, 0xff87, 0xff88, 0xff89, 0xff8a, 0xff8b, 0xff8c, 0xff8d, 0xff8e,
98 0xff8f, 0xff90, 0xff91, 0xff92, 0xff93, 0xff94, 0xff95, 0xff96, 0xff97, 0xff98, 0xff99, 0xff9a, 0xff9b, 0xff9c, 0xff9d,
99 0xffa0, 0xffa1, 0xffa2, 0xffa3, 0xffa4, 0xffa5, 0xffa6, 0xffa7, 0xffa8, 0xffa9, 0xffaa, 0xffab, 0xffac, 0xffad, 0xffae,
100 0xffaf, 0xffb0, 0xffb1, 0xffb2, 0xffb3, 0xffb4, 0xffb5, 0xffb6, 0xffb7, 0xffb8, 0xffb9, 0xffba, 0xffbb, 0xffbc, 0xffbd,
101 0xffbe, 0xffc2, 0xffc3, 0xffc4, 0xffc5, 0xffc6, 0xffc7, 0xffca, 0xffcb, 0xffcc, 0xffcd, 0xffce, 0xffcf, 0xffd2, 0xffd3,
102 0xffd4, 0xffd5, 0xffd6, 0xffd7, 0xffda, 0xffdb, 0xffdc
103};
104const int32_t g_HalfFullCharsLength = (sizeof(g_HalfFullHigherChars) / sizeof(UChar));
105
106/*
107ICU collation rules reserve any punctuation and whitespace characters for use in the syntax.
108Thus, to use these characters in a rule, they need to be escaped.
109
110This rule was taken from http://www.unicode.org/reports/tr35/tr35-collation.html#Rules.
111*/
112bool NeedsEscape(UChar character)
113{
114 return ((0x21 <= character && character <= 0x2f)
115 || (0x3a <= character && character <= 0x40)
116 || (0x5b <= character && character <= 0x60)
117 || (0x7b <= character && character <= 0x7e));
118}
119
120/*
121Gets a value indicating whether the HalfFullHigher character is considered a symbol character.
122
123The ranges specified here are only checking for characters in the g_HalfFullHigherChars list and needs
124to be combined with NeedsEscape above with the g_HalfFullLowerChars for all the IgnoreSymbols characters.
125This is done so we can use range checks instead of comparing individual characters.
126
127These ranges were obtained by running the above characters through .NET CompareInfo.Compare
128with CompareOptions.IgnoreSymbols on Windows.
129*/
130bool IsHalfFullHigherSymbol(UChar character)
131{
132 return (0xffe0 <= character && character <= 0xffe6)
133 || (0xff61 <= character && character <= 0xff65);
134}
135
136/*
137Gets a string of custom collation rules, if necessary.
138
139Since the CompareOptions flags don't map 1:1 with ICU default functionality, we need to fall back to using
140custom rules in order to support IgnoreKanaType and IgnoreWidth CompareOptions correctly.
141*/
142std::vector<UChar> GetCustomRules(int32_t options, UColAttributeValue strength, bool isIgnoreSymbols)
143{
144 bool isIgnoreKanaType = (options & CompareOptionsIgnoreKanaType) == CompareOptionsIgnoreKanaType;
145 bool isIgnoreWidth = (options & CompareOptionsIgnoreWidth) == CompareOptionsIgnoreWidth;
146
147 // kana differs at the tertiary level
148 bool needsIgnoreKanaTypeCustomRule = isIgnoreKanaType && strength >= UCOL_TERTIARY;
149 bool needsNotIgnoreKanaTypeCustomRule = !isIgnoreKanaType && strength < UCOL_TERTIARY;
150
151 // character width differs at the tertiary level
152 bool needsIgnoreWidthCustomRule = isIgnoreWidth && strength >= UCOL_TERTIARY;
153 bool needsNotIgnoreWidthCustomRule = !isIgnoreWidth && strength < UCOL_TERTIARY;
154
155 std::vector<UChar> customRules;
156 if (needsIgnoreKanaTypeCustomRule || needsNotIgnoreKanaTypeCustomRule || needsIgnoreWidthCustomRule || needsNotIgnoreWidthCustomRule)
157 {
158 // If we need to create customRules, the KanaType custom rule will be 88 kana characters * 4 = 352 chars long
159 // and the Width custom rule will be at least 215 halfwidth characters * 4 = 860 chars long.
160 // Use 512 as the starting size, so the customRules won't have to grow if we are just
161 // doing the KanaType custom rule.
162 customRules.reserve(512);
163
164 if (needsIgnoreKanaTypeCustomRule || needsNotIgnoreKanaTypeCustomRule)
165 {
166 UChar compareChar = needsIgnoreKanaTypeCustomRule ? '=' : '<';
167
168 for (UChar hiraganaChar = hiraganaStart; hiraganaChar <= hiraganaEnd; hiraganaChar++)
169 {
170 // Hiragana is the range 3041 to 3096 & 309D & 309E
171 if (hiraganaChar <= 0x3096 || hiraganaChar >= 0x309D) // characters between 3096 and 309D are not mapped to katakana
172 {
173 customRules.push_back('&');
174 customRules.push_back(hiraganaChar);
175 customRules.push_back(compareChar);
176 customRules.push_back(hiraganaChar + hiraganaToKatakanaOffset);
177 }
178 }
179 }
180
181 if (needsIgnoreWidthCustomRule || needsNotIgnoreWidthCustomRule)
182 {
183 UChar compareChar = needsIgnoreWidthCustomRule ? '=' : '<';
184
185 UChar lowerChar;
186 UChar higherChar;
187 bool needsEscape;
188 for (int i = 0; i < g_HalfFullCharsLength; i++)
189 {
190 lowerChar = g_HalfFullLowerChars[i];
191 higherChar = g_HalfFullHigherChars[i];
192 // the lower chars need to be checked for escaping since they contain ASCII punctuation
193 needsEscape = NeedsEscape(lowerChar);
194
195 // when isIgnoreSymbols is true and we are not ignoring width, check to see if
196 // this character is a symbol, and if so skip it
197 if (!(isIgnoreSymbols && needsNotIgnoreWidthCustomRule && (needsEscape || IsHalfFullHigherSymbol(higherChar))))
198 {
199 customRules.push_back('&');
200
201 if (needsEscape)
202 {
203 customRules.push_back('\\');
204 }
205 customRules.push_back(lowerChar);
206
207 customRules.push_back(compareChar);
208 customRules.push_back(higherChar);
209 }
210 }
211 }
212 }
213
214 return customRules;
215}
216
217/*
218 * The collator returned by this function is owned by the callee and must be
219 * closed when this method returns with a U_SUCCESS UErrorCode.
220 *
221 * On error, the return value is undefined.
222 */
223UCollator* CloneCollatorWithOptions(const UCollator* pCollator, int32_t options, UErrorCode* pErr)
224{
225 UColAttributeValue strength = ucol_getStrength(pCollator);
226
227 bool isIgnoreCase = (options & CompareOptionsIgnoreCase) == CompareOptionsIgnoreCase;
228 bool isIgnoreNonSpace = (options & CompareOptionsIgnoreNonSpace) == CompareOptionsIgnoreNonSpace;
229 bool isIgnoreSymbols = (options & CompareOptionsIgnoreSymbols) == CompareOptionsIgnoreSymbols;
230
231 if (isIgnoreCase)
232 {
233 strength = UCOL_SECONDARY;
234 }
235
236 if (isIgnoreNonSpace)
237 {
238 strength = UCOL_PRIMARY;
239 }
240
241 UCollator* pClonedCollator;
242 std::vector<UChar> customRules = GetCustomRules(options, strength, isIgnoreSymbols);
243 if (customRules.empty())
244 {
245 pClonedCollator = ucol_safeClone(pCollator, nullptr, nullptr, pErr);
246 }
247 else
248 {
249 int32_t customRuleLength = customRules.size();
250
251 int32_t localeRulesLength;
252 const UChar* localeRules = ucol_getRules(pCollator, &localeRulesLength);
253
254 std::vector<UChar> completeRules(localeRulesLength + customRuleLength + 1, '\0');
255 for (int i = 0; i < localeRulesLength; i++)
256 {
257 completeRules[i] = localeRules[i];
258 }
259 for (int i = 0; i < customRuleLength; i++)
260 {
261 completeRules[localeRulesLength + i] = customRules[i];
262 }
263
264 pClonedCollator = ucol_openRules(completeRules.data(), completeRules.size(), UCOL_DEFAULT, strength, NULL, pErr);
265 }
266
267 if (isIgnoreSymbols)
268 {
269 ucol_setAttribute(pClonedCollator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, pErr);
270
271 // by default, ICU alternate shifted handling only ignores punctuation, but
272 // IgnoreSymbols needs symbols and currency as well, so change the "variable top"
273 // to include all symbols and currency
274#if HAVE_SET_MAX_VARIABLE
275 ucol_setMaxVariable(pClonedCollator, UCOL_REORDER_CODE_CURRENCY, pErr);
276#else
277 // 0xfdfc is the last currency character before the first digit character
278 // in http://source.icu-project.org/repos/icu/icu/tags/release-52-1/source/data/unidata/FractionalUCA.txt
279 const UChar ignoreSymbolsVariableTop[] = { 0xfdfc };
280 ucol_setVariableTop(pClonedCollator, ignoreSymbolsVariableTop, 1, pErr);
281#endif
282 }
283
284 ucol_setAttribute(pClonedCollator, UCOL_STRENGTH, strength, pErr);
285
286 // casing differs at the tertiary level.
287 // if strength is less than tertiary, but we are not ignoring case, then we need to flip CASE_LEVEL On
288 if (strength < UCOL_TERTIARY && !isIgnoreCase)
289 {
290 ucol_setAttribute(pClonedCollator, UCOL_CASE_LEVEL, UCOL_ON, pErr);
291 }
292
293 return pClonedCollator;
294}
295
296// Returns TRUE if all the collation elements in str are completely ignorable
297bool CanIgnoreAllCollationElements(const UCollator* pColl, const UChar* lpStr, int32_t length)
298{
299 bool result = false;
300 UErrorCode err = U_ZERO_ERROR;
301 UCollationElements* pCollElem = ucol_openElements(pColl, lpStr, length, &err);
302
303 if (U_SUCCESS(err))
304 {
305 int32_t curCollElem = UCOL_NULLORDER;
306
307 result = true;
308
309 while ((curCollElem = ucol_next(pCollElem, &err)) != UCOL_NULLORDER)
310 {
311 if (curCollElem != 0)
312 {
313 result = false;
314 break;
315 }
316 }
317
318 if (U_FAILURE(err))
319 {
320 result = false;
321 }
322
323 ucol_closeElements(pCollElem);
324 }
325
326 return result;
327
328}
329
330extern "C" ResultCode GlobalizationNative_GetSortHandle(const char* lpLocaleName, SortHandle** ppSortHandle)
331{
332 assert(ppSortHandle != nullptr);
333
334 *ppSortHandle = new (std::nothrow) SortHandle();
335 if ((*ppSortHandle) == nullptr)
336 {
337 return GetResultCode(U_MEMORY_ALLOCATION_ERROR);
338 }
339
340 UErrorCode err = U_ZERO_ERROR;
341
342 (*ppSortHandle)->regular = ucol_open(lpLocaleName, &err);
343
344 if (U_FAILURE(err))
345 {
346 if ((*ppSortHandle)->regular != nullptr)
347 ucol_close((*ppSortHandle)->regular);
348
349 delete (*ppSortHandle);
350 (*ppSortHandle) = nullptr;
351 }
352
353 return GetResultCode(err);
354}
355
356extern "C" void GlobalizationNative_CloseSortHandle(SortHandle* pSortHandle)
357{
358 ucol_close(pSortHandle->regular);
359 pSortHandle->regular = nullptr;
360
361 TCollatorMap::iterator it;
362 for (it = pSortHandle->collatorsPerOption.begin(); it != pSortHandle->collatorsPerOption.end(); it++)
363 {
364 ucol_close(it->second);
365 }
366
367 pthread_mutex_destroy(&pSortHandle->collatorsLockObject);
368
369 delete pSortHandle;
370}
371
372const UCollator* GetCollatorFromSortHandle(SortHandle* pSortHandle, int32_t options, UErrorCode* pErr)
373{
374 UCollator* pCollator;
375 if (options == 0)
376 {
377 pCollator = pSortHandle->regular;
378 }
379 else
380 {
381 int lockResult = pthread_mutex_lock(&pSortHandle->collatorsLockObject);
382 if (lockResult != 0)
383 {
384 assert(false && "Unexpected pthread_mutex_lock return value.");
385 }
386
387 TCollatorMap::iterator entry = pSortHandle->collatorsPerOption.find(options);
388 if (entry == pSortHandle->collatorsPerOption.end())
389 {
390 pCollator = CloneCollatorWithOptions(pSortHandle->regular, options, pErr);
391 pSortHandle->collatorsPerOption[options] = pCollator;
392 }
393 else
394 {
395 pCollator = entry->second;
396 }
397
398 pthread_mutex_unlock(&pSortHandle->collatorsLockObject);
399 }
400
401 return pCollator;
402}
403
404extern "C" int32_t GlobalizationNative_GetSortVersion(SortHandle* pSortHandle)
405{
406 UErrorCode err = U_ZERO_ERROR;
407 const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, 0, &err);
408 int32_t result = 0;
409
410 if (U_SUCCESS(err))
411 {
412 ucol_getVersion(pColl, (uint8_t *) &result);
413 }
414 else
415 {
416 assert(false && "Unexpected ucol_getVersion to fail.");
417
418 // we didn't use UCOL_TAILORINGS_VERSION because it is deprecated in ICU v5
419 result = UCOL_RUNTIME_VERSION << 16 | UCOL_BUILDER_VERSION;
420 }
421 return result;
422}
423
424/*
425Function:
426CompareString
427*/
428extern "C" int32_t GlobalizationNative_CompareString(
429 SortHandle* pSortHandle, const UChar* lpStr1, int32_t cwStr1Length, const UChar* lpStr2, int32_t cwStr2Length, int32_t options)
430{
431 static_assert(UCOL_EQUAL == 0, "managed side requires 0 for equal strings");
432 static_assert(UCOL_LESS < 0, "managed side requires less than zero for a < b");
433 static_assert(UCOL_GREATER > 0, "managed side requires greater than zero for a > b");
434
435 UCollationResult result = UCOL_EQUAL;
436 UErrorCode err = U_ZERO_ERROR;
437 const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err);
438
439 if (U_SUCCESS(err))
440 {
441 result = ucol_strcoll(pColl, lpStr1, cwStr1Length, lpStr2, cwStr2Length);
442 }
443
444 return result;
445}
446
447/*
448Function:
449IndexOf
450*/
451extern "C" int32_t GlobalizationNative_IndexOf(
452 SortHandle* pSortHandle,
453 const UChar* lpTarget,
454 int32_t cwTargetLength,
455 const UChar* lpSource,
456 int32_t cwSourceLength,
457 int32_t options,
458 int32_t* pMatchedLength)
459{
460 static_assert(USEARCH_DONE == -1, "managed side requires -1 for not found");
461
462 int32_t result = USEARCH_DONE;
463 UErrorCode err = U_ZERO_ERROR;
464 const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err);
465
466 if (U_SUCCESS(err))
467 {
468 UStringSearch* pSearch = usearch_openFromCollator(lpTarget, cwTargetLength, lpSource, cwSourceLength, pColl, nullptr, &err);
469
470 if (U_SUCCESS(err))
471 {
472 result = usearch_first(pSearch, &err);
473
474 // if the search was successful,
475 // we'll try to get the matched string length.
476 if(result != USEARCH_DONE && pMatchedLength != NULL)
477 {
478 *pMatchedLength = usearch_getMatchedLength(pSearch);
479 }
480 usearch_close(pSearch);
481 }
482 }
483
484 return result;
485}
486
487/*
488Function:
489LastIndexOf
490*/
491extern "C" int32_t GlobalizationNative_LastIndexOf(
492 SortHandle* pSortHandle,
493 const UChar* lpTarget,
494 int32_t cwTargetLength,
495 const UChar* lpSource,
496 int32_t cwSourceLength,
497 int32_t options)
498{
499 static_assert(USEARCH_DONE == -1, "managed side requires -1 for not found");
500
501 int32_t result = USEARCH_DONE;
502 UErrorCode err = U_ZERO_ERROR;
503 const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err);
504
505 if (U_SUCCESS(err))
506 {
507 UStringSearch* pSearch = usearch_openFromCollator(lpTarget, cwTargetLength, lpSource, cwSourceLength, pColl, nullptr, &err);
508
509 if (U_SUCCESS(err))
510 {
511 result = usearch_last(pSearch, &err);
512 usearch_close(pSearch);
513 }
514 }
515
516 return result;
517}
518
519/*
520Static Function:
521AreEqualOrdinalIgnoreCase
522*/
523static bool AreEqualOrdinalIgnoreCase(UChar32 one, UChar32 two)
524{
525 // Return whether the two characters are identical or would be identical if they were upper-cased.
526
527 if (one == two)
528 {
529 return true;
530 }
531
532 if (one == 0x0131 || two == 0x0131)
533 {
534 // On Windows with InvariantCulture, the LATIN SMALL LETTER DOTLESS I (U+0131)
535 // capitalizes to itself, whereas with ICU it capitalizes to LATIN CAPITAL LETTER I (U+0049).
536 // We special case it to match the Windows invariant behavior.
537 return false;
538 }
539
540 return u_toupper(one) == u_toupper(two);
541}
542
543/*
544Function:
545IndexOfOrdinalIgnoreCase
546*/
547extern "C" int32_t GlobalizationNative_IndexOfOrdinalIgnoreCase(
548 const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t findLast)
549{
550 int32_t result = -1;
551
552 int32_t endIndex = cwSourceLength - cwTargetLength;
553 assert(endIndex >= 0);
554
555 int32_t i = 0;
556 while (i <= endIndex)
557 {
558 int32_t srcIdx = i, trgIdx = 0;
559 const UChar *src = lpSource, *trg = lpTarget;
560 UChar32 srcCodepoint, trgCodepoint;
561
562 bool match = true;
563 while (trgIdx < cwTargetLength)
564 {
565 U16_NEXT(src, srcIdx, cwSourceLength, srcCodepoint);
566 U16_NEXT(trg, trgIdx, cwTargetLength, trgCodepoint);
567 if (!AreEqualOrdinalIgnoreCase(srcCodepoint, trgCodepoint))
568 {
569 match = false;
570 break;
571 }
572 }
573
574 if (match)
575 {
576 result = i;
577 if (!findLast)
578 {
579 break;
580 }
581 }
582
583 U16_FWD_1(lpSource, i, cwSourceLength);
584 }
585
586 return result;
587}
588
589/*
590 Return value is a "Win32 BOOL" (1 = true, 0 = false)
591 */
592extern "C" int32_t GlobalizationNative_StartsWith(
593 SortHandle* pSortHandle,
594 const UChar* lpTarget,
595 int32_t cwTargetLength,
596 const UChar* lpSource,
597 int32_t cwSourceLength,
598 int32_t options)
599{
600 int32_t result = FALSE;
601 UErrorCode err = U_ZERO_ERROR;
602 const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err);
603
604 if (U_SUCCESS(err))
605 {
606 UStringSearch* pSearch = usearch_openFromCollator(lpTarget, cwTargetLength, lpSource, cwSourceLength, pColl, nullptr, &err);
607 int32_t idx = USEARCH_DONE;
608
609 if (U_SUCCESS(err))
610 {
611 idx = usearch_first(pSearch, &err);
612 if (idx != USEARCH_DONE)
613 {
614 if (idx == 0)
615 {
616 result = TRUE;
617 }
618 else
619 {
620 result = CanIgnoreAllCollationElements(pColl, lpSource, idx);
621 }
622 }
623
624 usearch_close(pSearch);
625 }
626 }
627
628 return result;
629}
630
631/*
632 Return value is a "Win32 BOOL" (1 = true, 0 = false)
633 */
634extern "C" int32_t GlobalizationNative_EndsWith(
635 SortHandle* pSortHandle,
636 const UChar* lpTarget,
637 int32_t cwTargetLength,
638 const UChar* lpSource,
639 int32_t cwSourceLength,
640 int32_t options)
641{
642 int32_t result = FALSE;
643 UErrorCode err = U_ZERO_ERROR;
644 const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err);
645
646 if (U_SUCCESS(err))
647 {
648 UStringSearch* pSearch = usearch_openFromCollator(lpTarget, cwTargetLength, lpSource, cwSourceLength, pColl, nullptr, &err);
649 int32_t idx = USEARCH_DONE;
650
651 if (U_SUCCESS(err))
652 {
653 idx = usearch_last(pSearch, &err);
654
655 if (idx != USEARCH_DONE)
656 {
657 if ((idx + usearch_getMatchedLength(pSearch)) == cwSourceLength)
658 {
659 result = TRUE;
660 }
661 else
662 {
663 int32_t matchEnd = idx + usearch_getMatchedLength(pSearch);
664 int32_t remainingStringLength = cwSourceLength - matchEnd;
665
666 result = CanIgnoreAllCollationElements(pColl, lpSource + matchEnd, remainingStringLength);
667 }
668 }
669
670 usearch_close(pSearch);
671 }
672 }
673
674 return result;
675}
676
677extern "C" int32_t GlobalizationNative_GetSortKey(
678 SortHandle* pSortHandle,
679 const UChar* lpStr,
680 int32_t cwStrLength,
681 uint8_t* sortKey,
682 int32_t cbSortKeyLength,
683 int32_t options)
684{
685 UErrorCode err = U_ZERO_ERROR;
686 const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err);
687 int32_t result = 0;
688
689 if (U_SUCCESS(err))
690 {
691 result = ucol_getSortKey(pColl, lpStr, cwStrLength, sortKey, cbSortKeyLength);
692 }
693
694 return result;
695}
696
697extern "C" int32_t GlobalizationNative_CompareStringOrdinalIgnoreCase(
698 const UChar* lpStr1, int32_t cwStr1Length, const UChar* lpStr2, int32_t cwStr2Length)
699{
700 assert(lpStr1 != nullptr);
701 assert(cwStr1Length >= 0);
702 assert(lpStr2 != nullptr);
703 assert(cwStr2Length >= 0);
704
705 int32_t str1Idx = 0;
706 int32_t str2Idx = 0;
707
708 while (str1Idx < cwStr1Length && str2Idx < cwStr2Length)
709 {
710 UChar32 str1Codepoint;
711 UChar32 str2Codepoint;
712
713 U16_NEXT(lpStr1, str1Idx, cwStr1Length, str1Codepoint);
714 U16_NEXT(lpStr2, str2Idx, cwStr2Length, str2Codepoint);
715
716 if (str1Codepoint != str2Codepoint && u_toupper(str1Codepoint) != u_toupper(str2Codepoint))
717 {
718 return str1Codepoint < str2Codepoint ? -1 : 1;
719 }
720 }
721
722 if (cwStr1Length < cwStr2Length)
723 {
724 return -1;
725 }
726
727 if (cwStr2Length < cwStr1Length)
728 {
729 return 1;
730 }
731
732 return 0;
733}
734