| 1 | // Licensed to the .NET Foundation under one or more agreements. |
| 2 | // The .NET Foundation licenses this file to you under the MIT license. |
| 3 | // See the LICENSE file in the project root for more information. |
| 4 | // |
| 5 | |
| 6 | #include <assert.h> |
| 7 | #include <stdint.h> |
| 8 | #include "icushim.h" |
| 9 | |
| 10 | /* |
| 11 | Function: |
| 12 | ChangeCase |
| 13 | |
| 14 | Performs upper or lower casing of a string into a new buffer. |
| 15 | No special casing is performed beyond that provided by ICU. |
| 16 | */ |
| 17 | extern "C" void GlobalizationNative_ChangeCase( |
| 18 | const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper) |
| 19 | { |
| 20 | // Iterate through the string, decoding the next one or two UTF-16 code units |
| 21 | // into a codepoint and updating srcIdx to point to the next UTF-16 code unit |
| 22 | // to decode. Then upper or lower case it, write dstCodepoint into lpDst at |
| 23 | // offset dstIdx, and update dstIdx. |
| 24 | |
| 25 | // (The loop here has been manually cloned for each of the four cases, rather |
| 26 | // than having a single loop that internally branched based on bToUpper as the |
| 27 | // compiler wasn't doing that optimization, and it results in an ~15-20% perf |
| 28 | // improvement on longer strings.) |
| 29 | |
| 30 | UBool isError = FALSE; |
| 31 | int32_t srcIdx = 0, dstIdx = 0; |
| 32 | UChar32 srcCodepoint, dstCodepoint; |
| 33 | |
| 34 | if (bToUpper) |
| 35 | { |
| 36 | while (srcIdx < cwSrcLength) |
| 37 | { |
| 38 | U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); |
| 39 | dstCodepoint = u_toupper(srcCodepoint); |
| 40 | U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); |
| 41 | assert(isError == FALSE && srcIdx == dstIdx); |
| 42 | } |
| 43 | } |
| 44 | else |
| 45 | { |
| 46 | while (srcIdx < cwSrcLength) |
| 47 | { |
| 48 | U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); |
| 49 | dstCodepoint = u_tolower(srcCodepoint); |
| 50 | U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); |
| 51 | assert(isError == FALSE && srcIdx == dstIdx); |
| 52 | } |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | /* |
| 57 | Function: |
| 58 | ChangeCaseInvariant |
| 59 | |
| 60 | Performs upper or lower casing of a string into a new buffer. |
| 61 | Special casing is performed to ensure that invariant casing |
| 62 | matches that of Windows in certain situations, e.g. Turkish i's. |
| 63 | */ |
| 64 | extern "C" void GlobalizationNative_ChangeCaseInvariant( |
| 65 | const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper) |
| 66 | { |
| 67 | // See algorithmic comment in ChangeCase. |
| 68 | |
| 69 | UBool isError = FALSE; |
| 70 | int32_t srcIdx = 0, dstIdx = 0; |
| 71 | UChar32 srcCodepoint, dstCodepoint; |
| 72 | |
| 73 | if (bToUpper) |
| 74 | { |
| 75 | while (srcIdx < cwSrcLength) |
| 76 | { |
| 77 | // On Windows with InvariantCulture, the LATIN SMALL LETTER DOTLESS I (U+0131) |
| 78 | // capitalizes to itself, whereas with ICU it capitalizes to LATIN CAPITAL LETTER I (U+0049). |
| 79 | // We special case it to match the Windows invariant behavior. |
| 80 | U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); |
| 81 | dstCodepoint = ((srcCodepoint == (UChar32)0x0131) ? (UChar32)0x0131 : u_toupper(srcCodepoint)); |
| 82 | U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); |
| 83 | assert(isError == FALSE && srcIdx == dstIdx); |
| 84 | } |
| 85 | } |
| 86 | else |
| 87 | { |
| 88 | while (srcIdx < cwSrcLength) |
| 89 | { |
| 90 | // On Windows with InvariantCulture, the LATIN CAPITAL LETTER I WITH DOT ABOVE (U+0130) |
| 91 | // lower cases to itself, whereas with ICU it lower cases to LATIN SMALL LETTER I (U+0069). |
| 92 | // We special case it to match the Windows invariant behavior. |
| 93 | U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); |
| 94 | dstCodepoint = ((srcCodepoint == (UChar32)0x0130) ? (UChar32)0x0130 : u_tolower(srcCodepoint)); |
| 95 | U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); |
| 96 | assert(isError == FALSE && srcIdx == dstIdx); |
| 97 | } |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | /* |
| 102 | Function: |
| 103 | ChangeCaseTurkish |
| 104 | |
| 105 | Performs upper or lower casing of a string into a new buffer, performing special |
| 106 | casing for Turkish. |
| 107 | */ |
| 108 | extern "C" void GlobalizationNative_ChangeCaseTurkish( |
| 109 | const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper) |
| 110 | { |
| 111 | // See algorithmic comment in ChangeCase. |
| 112 | |
| 113 | UBool isError = FALSE; |
| 114 | int32_t srcIdx = 0, dstIdx = 0; |
| 115 | UChar32 srcCodepoint, dstCodepoint; |
| 116 | |
| 117 | if (bToUpper) |
| 118 | { |
| 119 | while (srcIdx < cwSrcLength) |
| 120 | { |
| 121 | // In turkish casing, LATIN SMALL LETTER I (U+0069) upper cases to LATIN |
| 122 | // CAPITAL LETTER I WITH DOT ABOVE (U+0130). |
| 123 | U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); |
| 124 | dstCodepoint = ((srcCodepoint == (UChar32)0x0069) ? (UChar32)0x0130 : u_toupper(srcCodepoint)); |
| 125 | U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); |
| 126 | assert(isError == FALSE && srcIdx == dstIdx); |
| 127 | } |
| 128 | } |
| 129 | else |
| 130 | { |
| 131 | while (srcIdx < cwSrcLength) |
| 132 | { |
| 133 | // In turkish casing, LATIN CAPITAL LETTER I (U+0049) lower cases to |
| 134 | // LATIN SMALL LETTER DOTLESS I (U+0131). |
| 135 | U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint); |
| 136 | dstCodepoint = ((srcCodepoint == (UChar32)0x0049) ? (UChar32)0x0131 : u_tolower(srcCodepoint)); |
| 137 | U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError); |
| 138 | assert(isError == FALSE && srcIdx == dstIdx); |
| 139 | } |
| 140 | } |
| 141 | } |
| 142 | |