1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4//
5
6#include <assert.h>
7#include <stdint.h>
8#include "icushim.h"
9
10/*
11Function:
12ChangeCase
13
14Performs upper or lower casing of a string into a new buffer.
15No special casing is performed beyond that provided by ICU.
16*/
17extern "C" void GlobalizationNative_ChangeCase(
18 const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper)
19{
20 // Iterate through the string, decoding the next one or two UTF-16 code units
21 // into a codepoint and updating srcIdx to point to the next UTF-16 code unit
22 // to decode. Then upper or lower case it, write dstCodepoint into lpDst at
23 // offset dstIdx, and update dstIdx.
24
25 // (The loop here has been manually cloned for each of the four cases, rather
26 // than having a single loop that internally branched based on bToUpper as the
27 // compiler wasn't doing that optimization, and it results in an ~15-20% perf
28 // improvement on longer strings.)
29
30 UBool isError = FALSE;
31 int32_t srcIdx = 0, dstIdx = 0;
32 UChar32 srcCodepoint, dstCodepoint;
33
34 if (bToUpper)
35 {
36 while (srcIdx < cwSrcLength)
37 {
38 U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
39 dstCodepoint = u_toupper(srcCodepoint);
40 U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
41 assert(isError == FALSE && srcIdx == dstIdx);
42 }
43 }
44 else
45 {
46 while (srcIdx < cwSrcLength)
47 {
48 U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
49 dstCodepoint = u_tolower(srcCodepoint);
50 U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
51 assert(isError == FALSE && srcIdx == dstIdx);
52 }
53 }
54}
55
56/*
57Function:
58ChangeCaseInvariant
59
60Performs upper or lower casing of a string into a new buffer.
61Special casing is performed to ensure that invariant casing
62matches that of Windows in certain situations, e.g. Turkish i's.
63*/
64extern "C" void GlobalizationNative_ChangeCaseInvariant(
65 const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper)
66{
67 // See algorithmic comment in ChangeCase.
68
69 UBool isError = FALSE;
70 int32_t srcIdx = 0, dstIdx = 0;
71 UChar32 srcCodepoint, dstCodepoint;
72
73 if (bToUpper)
74 {
75 while (srcIdx < cwSrcLength)
76 {
77 // On Windows with InvariantCulture, the LATIN SMALL LETTER DOTLESS I (U+0131)
78 // capitalizes to itself, whereas with ICU it capitalizes to LATIN CAPITAL LETTER I (U+0049).
79 // We special case it to match the Windows invariant behavior.
80 U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
81 dstCodepoint = ((srcCodepoint == (UChar32)0x0131) ? (UChar32)0x0131 : u_toupper(srcCodepoint));
82 U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
83 assert(isError == FALSE && srcIdx == dstIdx);
84 }
85 }
86 else
87 {
88 while (srcIdx < cwSrcLength)
89 {
90 // On Windows with InvariantCulture, the LATIN CAPITAL LETTER I WITH DOT ABOVE (U+0130)
91 // lower cases to itself, whereas with ICU it lower cases to LATIN SMALL LETTER I (U+0069).
92 // We special case it to match the Windows invariant behavior.
93 U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
94 dstCodepoint = ((srcCodepoint == (UChar32)0x0130) ? (UChar32)0x0130 : u_tolower(srcCodepoint));
95 U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
96 assert(isError == FALSE && srcIdx == dstIdx);
97 }
98 }
99}
100
101/*
102Function:
103ChangeCaseTurkish
104
105Performs upper or lower casing of a string into a new buffer, performing special
106casing for Turkish.
107*/
108extern "C" void GlobalizationNative_ChangeCaseTurkish(
109 const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper)
110{
111 // See algorithmic comment in ChangeCase.
112
113 UBool isError = FALSE;
114 int32_t srcIdx = 0, dstIdx = 0;
115 UChar32 srcCodepoint, dstCodepoint;
116
117 if (bToUpper)
118 {
119 while (srcIdx < cwSrcLength)
120 {
121 // In turkish casing, LATIN SMALL LETTER I (U+0069) upper cases to LATIN
122 // CAPITAL LETTER I WITH DOT ABOVE (U+0130).
123 U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
124 dstCodepoint = ((srcCodepoint == (UChar32)0x0069) ? (UChar32)0x0130 : u_toupper(srcCodepoint));
125 U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
126 assert(isError == FALSE && srcIdx == dstIdx);
127 }
128 }
129 else
130 {
131 while (srcIdx < cwSrcLength)
132 {
133 // In turkish casing, LATIN CAPITAL LETTER I (U+0049) lower cases to
134 // LATIN SMALL LETTER DOTLESS I (U+0131).
135 U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
136 dstCodepoint = ((srcCodepoint == (UChar32)0x0049) ? (UChar32)0x0131 : u_tolower(srcCodepoint));
137 U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
138 assert(isError == FALSE && srcIdx == dstIdx);
139 }
140 }
141}
142