casing.cpp source code [CoreCLR/corefx/System.Globalization.Native/casing.cpp]

1	// Licensed to the .NET Foundation under one or more agreements.
2	// The .NET Foundation licenses this file to you under the MIT license.
3	// See the LICENSE file in the project root for more information.
4	//
5
6	#include <assert.h>
7	#include <stdint.h>
8	#include "icushim.h"
9
10	/*
11	Function:
12	ChangeCase
13
14	Performs upper or lower casing of a string into a new buffer.
15	No special casing is performed beyond that provided by ICU.
16	*/
17	extern "C" void GlobalizationNative_ChangeCase(
18	const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper)
19	{
20	// Iterate through the string, decoding the next one or two UTF-16 code units
21	// into a codepoint and updating srcIdx to point to the next UTF-16 code unit
22	// to decode. Then upper or lower case it, write dstCodepoint into lpDst at
23	// offset dstIdx, and update dstIdx.
24
25	// (The loop here has been manually cloned for each of the four cases, rather
26	// than having a single loop that internally branched based on bToUpper as the
27	// compiler wasn't doing that optimization, and it results in an ~15-20% perf
28	// improvement on longer strings.)
29
30	UBool isError = FALSE;
31	int32_t srcIdx = `0`, dstIdx = `0`;
32	UChar32 srcCodepoint, dstCodepoint;
33
34	if (bToUpper)
35	{
36	while (srcIdx < cwSrcLength)
37	{
38	U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
39	dstCodepoint = u_toupper(srcCodepoint);
40	U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
41	assert(isError == FALSE && srcIdx == dstIdx);
42	}
43	}
44	else
45	{
46	while (srcIdx < cwSrcLength)
47	{
48	U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
49	dstCodepoint = u_tolower(srcCodepoint);
50	U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
51	assert(isError == FALSE && srcIdx == dstIdx);
52	}
53	}
54	}
55
56	/*
57	Function:
58	ChangeCaseInvariant
59
60	Performs upper or lower casing of a string into a new buffer.
61	Special casing is performed to ensure that invariant casing
62	matches that of Windows in certain situations, e.g. Turkish i's.
63	*/
64	extern "C" void GlobalizationNative_ChangeCaseInvariant(
65	const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper)
66	{
67	// See algorithmic comment in ChangeCase.
68
69	UBool isError = FALSE;
70	int32_t srcIdx = `0`, dstIdx = `0`;
71	UChar32 srcCodepoint, dstCodepoint;
72
73	if (bToUpper)
74	{
75	while (srcIdx < cwSrcLength)
76	{
77	// On Windows with InvariantCulture, the LATIN SMALL LETTER DOTLESS I (U+0131)
78	// capitalizes to itself, whereas with ICU it capitalizes to LATIN CAPITAL LETTER I (U+0049).
79	// We special case it to match the Windows invariant behavior.
80	U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
81	dstCodepoint = ((srcCodepoint == (UChar32)`0x0131`) ? (UChar32)`0x0131` : u_toupper(srcCodepoint));
82	U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
83	assert(isError == FALSE && srcIdx == dstIdx);
84	}
85	}
86	else
87	{
88	while (srcIdx < cwSrcLength)
89	{
90	// On Windows with InvariantCulture, the LATIN CAPITAL LETTER I WITH DOT ABOVE (U+0130)
91	// lower cases to itself, whereas with ICU it lower cases to LATIN SMALL LETTER I (U+0069).
92	// We special case it to match the Windows invariant behavior.
93	U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
94	dstCodepoint = ((srcCodepoint == (UChar32)`0x0130`) ? (UChar32)`0x0130` : u_tolower(srcCodepoint));
95	U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
96	assert(isError == FALSE && srcIdx == dstIdx);
97	}
98	}
99	}
100
101	/*
102	Function:
103	ChangeCaseTurkish
104
105	Performs upper or lower casing of a string into a new buffer, performing special
106	casing for Turkish.
107	*/
108	extern "C" void GlobalizationNative_ChangeCaseTurkish(
109	const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper)
110	{
111	// See algorithmic comment in ChangeCase.
112
113	UBool isError = FALSE;
114	int32_t srcIdx = `0`, dstIdx = `0`;
115	UChar32 srcCodepoint, dstCodepoint;
116
117	if (bToUpper)
118	{
119	while (srcIdx < cwSrcLength)
120	{
121	// In turkish casing, LATIN SMALL LETTER I (U+0069) upper cases to LATIN
122	// CAPITAL LETTER I WITH DOT ABOVE (U+0130).
123	U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
124	dstCodepoint = ((srcCodepoint == (UChar32)`0x0069`) ? (UChar32)`0x0130` : u_toupper(srcCodepoint));
125	U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
126	assert(isError == FALSE && srcIdx == dstIdx);
127	}
128	}
129	else
130	{
131	while (srcIdx < cwSrcLength)
132	{
133	// In turkish casing, LATIN CAPITAL LETTER I (U+0049) lower cases to
134	// LATIN SMALL LETTER DOTLESS I (U+0131).
135	U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
136	dstCodepoint = ((srcCodepoint == (UChar32)`0x0049`) ? (UChar32)`0x0131` : u_tolower(srcCodepoint));
137	U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
138	assert(isError == FALSE && srcIdx == dstIdx);
139	}
140	}
141	}
142

Browse the source code of CoreCLR/corefx/System.Globalization.Native/casing.cpp