1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | // |
5 | |
6 | #include <assert.h> |
7 | #include <stdint.h> |
8 | #include "icushim.h" |
9 | |
10 | /* |
11 | Function: |
12 | ChangeCase |
13 | |
14 | Performs upper or lower casing of a string into a new buffer. |
15 | No special casing is performed beyond that provided by ICU. |
16 | */ |
17 | extern "C" void GlobalizationNative_ChangeCase( |
18 | const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper) |
19 | { |
20 | // Iterate through the string, decoding the next one or two UTF-16 code units |
21 | // into a codepoint and updating srcIdx to point to the next UTF-16 code unit |
22 | // to decode. Then upper or lower case it, write dstCodepoint into lpDst at |
23 | // offset dstIdx, and update dstIdx. |
24 | |
25 | // (The loop here has been manually cloned for each of the four cases, rather |
26 | // than having a single loop that internally branched based on bToUpper as the |
27 | // compiler wasn't doing that optimization, and it results in an ~15-20% perf |
28 | // improvement on longer strings.) |
29 | |
30 | UBool isError = FALSE; |
31 | int32_t srcIdx = 0, dstIdx = 0; |
32 | UChar32 srcCodepoint, dstCodepoint; |
33 | |
34 | if (bToUpper) |
35 | { |
36 | while (srcIdx < cwSrcLength) |
37 | { |
38 | U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); |
39 | dstCodepoint = u_toupper(srcCodepoint); |
40 | U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); |
41 | assert(isError == FALSE && srcIdx == dstIdx); |
42 | } |
43 | } |
44 | else |
45 | { |
46 | while (srcIdx < cwSrcLength) |
47 | { |
48 | U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); |
49 | dstCodepoint = u_tolower(srcCodepoint); |
50 | U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); |
51 | assert(isError == FALSE && srcIdx == dstIdx); |
52 | } |
53 | } |
54 | } |
55 | |
56 | /* |
57 | Function: |
58 | ChangeCaseInvariant |
59 | |
60 | Performs upper or lower casing of a string into a new buffer. |
61 | Special casing is performed to ensure that invariant casing |
62 | matches that of Windows in certain situations, e.g. Turkish i's. |
63 | */ |
64 | extern "C" void GlobalizationNative_ChangeCaseInvariant( |
65 | const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper) |
66 | { |
67 | // See algorithmic comment in ChangeCase. |
68 | |
69 | UBool isError = FALSE; |
70 | int32_t srcIdx = 0, dstIdx = 0; |
71 | UChar32 srcCodepoint, dstCodepoint; |
72 | |
73 | if (bToUpper) |
74 | { |
75 | while (srcIdx < cwSrcLength) |
76 | { |
77 | // On Windows with InvariantCulture, the LATIN SMALL LETTER DOTLESS I (U+0131) |
78 | // capitalizes to itself, whereas with ICU it capitalizes to LATIN CAPITAL LETTER I (U+0049). |
79 | // We special case it to match the Windows invariant behavior. |
80 | U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); |
81 | dstCodepoint = ((srcCodepoint == (UChar32)0x0131) ? (UChar32)0x0131 : u_toupper(srcCodepoint)); |
82 | U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); |
83 | assert(isError == FALSE && srcIdx == dstIdx); |
84 | } |
85 | } |
86 | else |
87 | { |
88 | while (srcIdx < cwSrcLength) |
89 | { |
90 | // On Windows with InvariantCulture, the LATIN CAPITAL LETTER I WITH DOT ABOVE (U+0130) |
91 | // lower cases to itself, whereas with ICU it lower cases to LATIN SMALL LETTER I (U+0069). |
92 | // We special case it to match the Windows invariant behavior. |
93 | U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); |
94 | dstCodepoint = ((srcCodepoint == (UChar32)0x0130) ? (UChar32)0x0130 : u_tolower(srcCodepoint)); |
95 | U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); |
96 | assert(isError == FALSE && srcIdx == dstIdx); |
97 | } |
98 | } |
99 | } |
100 | |
101 | /* |
102 | Function: |
103 | ChangeCaseTurkish |
104 | |
105 | Performs upper or lower casing of a string into a new buffer, performing special |
106 | casing for Turkish. |
107 | */ |
108 | extern "C" void GlobalizationNative_ChangeCaseTurkish( |
109 | const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper) |
110 | { |
111 | // See algorithmic comment in ChangeCase. |
112 | |
113 | UBool isError = FALSE; |
114 | int32_t srcIdx = 0, dstIdx = 0; |
115 | UChar32 srcCodepoint, dstCodepoint; |
116 | |
117 | if (bToUpper) |
118 | { |
119 | while (srcIdx < cwSrcLength) |
120 | { |
121 | // In turkish casing, LATIN SMALL LETTER I (U+0069) upper cases to LATIN |
122 | // CAPITAL LETTER I WITH DOT ABOVE (U+0130). |
123 | U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); |
124 | dstCodepoint = ((srcCodepoint == (UChar32)0x0069) ? (UChar32)0x0130 : u_toupper(srcCodepoint)); |
125 | U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); |
126 | assert(isError == FALSE && srcIdx == dstIdx); |
127 | } |
128 | } |
129 | else |
130 | { |
131 | while (srcIdx < cwSrcLength) |
132 | { |
133 | // In turkish casing, LATIN CAPITAL LETTER I (U+0049) lower cases to |
134 | // LATIN SMALL LETTER DOTLESS I (U+0131). |
135 | U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); |
136 | dstCodepoint = ((srcCodepoint == (UChar32)0x0049) ? (UChar32)0x0131 : u_tolower(srcCodepoint)); |
137 | U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); |
138 | assert(isError == FALSE && srcIdx == dstIdx); |
139 | } |
140 | } |
141 | } |
142 | |