1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4// ---------------------------------------------------------------------------
5// FString.cpp
6//
7
8// ---------------------------------------------------------------------------
9
10#include "stdafx.h"
11#include "ex.h"
12#include "holder.h"
13
14#include "fstring.h"
15
16
17namespace FString
18{
19
20#ifdef _MSC_VER
21#pragma optimize("t", on)
22#endif // _MSC_VER
23
24#define MAX_LENGTH 0x1fffff00
25
26
27HRESULT Unicode_Utf8_Length(__in_z LPCWSTR pString, __out bool * pAllAscii, __out DWORD * pLength)
28{
29 CONTRACTL
30 {
31 NOTHROW;
32 GC_NOTRIGGER;
33 }
34 CONTRACTL_END;
35
36 * pAllAscii = true;
37
38 LPCWSTR p = pString;
39
40 while (true)
41 {
42 WCHAR ch = * p;
43
44 // Single check for termination and non ASCII
45 if (((unsigned) (ch - 1)) >= 0x7F)
46 {
47 if (ch != 0)
48 {
49 * pAllAscii = false;
50 }
51
52 break;
53 }
54
55 p ++;
56 }
57
58 if (* pAllAscii)
59 {
60 if ((p - pString) > MAX_LENGTH)
61 {
62 return COR_E_OVERFLOW;
63 }
64
65 * pLength = (DWORD) (p - pString);
66 }
67 else // use WideCharToMultiByte to calculate result length
68 {
69 * pLength = WszWideCharToMultiByte(CP_UTF8, 0, pString, -1, NULL, 0, NULL, NULL);
70
71 if (*pLength == 0)
72 {
73 return HRESULT_FROM_GetLastError();
74 }
75
76 // Remove the count of null terminator, to be consistent with the all-ASCII case.
77 --*pLength;
78
79 if (*pLength > MAX_LENGTH)
80 {
81 return COR_E_OVERFLOW;
82 }
83 }
84
85 return S_OK;
86}
87
88
89// UNICODE to UTF8
90HRESULT Unicode_Utf8(__in_z LPCWSTR pString, bool allAscii, __out_z LPSTR pBuffer, DWORD length)
91{
92 CONTRACTL
93 {
94 NOTHROW;
95 GC_NOTRIGGER;
96 }
97 CONTRACTL_END;
98
99 pBuffer[length] = 0;
100
101 if (allAscii)
102 {
103 LPCWSTR p = pString;
104
105 LPSTR q = pBuffer;
106
107 LPCWSTR endP = p + length - 8;
108
109 // Unfold to optimize for long string: 8 chars per iteration
110 while (p < endP)
111 {
112 q[0] = (char) p[0];
113 q[1] = (char) p[1];
114 q[2] = (char) p[2];
115 q[3] = (char) p[3];
116
117 q[4] = (char) p[4];
118 q[5] = (char) p[5];
119 q[6] = (char) p[6];
120 q[7] = (char) p[7];
121
122 q += 8;
123 p += 8;
124 }
125
126 endP += 8;
127
128 while (p < endP)
129 {
130 * q ++ = (char) * p ++;
131 }
132 }
133 else
134 {
135 length = WszWideCharToMultiByte(CP_UTF8, 0, pString, -1, pBuffer, (int) length + 1, NULL, NULL);
136
137 if (length == 0)
138 {
139 return HRESULT_FROM_GetLastError();
140 }
141 }
142
143 return S_OK;
144}
145
146
147HRESULT Utf8_Unicode_Length(__in_z LPCSTR pString, __out bool * pAllAscii, __out DWORD * pLength)
148{
149 CONTRACTL
150 {
151 NOTHROW;
152 GC_NOTRIGGER;
153 }
154 CONTRACTL_END;
155
156 * pAllAscii = true;
157
158 LPCSTR p = pString;
159
160 while (true)
161 {
162 char ch = * p;
163
164 // Single check for termination and non ASCII
165 if (((unsigned) (ch - 1)) >= 0x7F)
166 {
167 if (ch != 0)
168 {
169 * pAllAscii = false;
170 }
171
172 break;
173 }
174
175 p ++;
176 }
177
178 if (* pAllAscii)
179 {
180 if ((p - pString) > MAX_LENGTH)
181 {
182 return COR_E_OVERFLOW;
183 }
184
185 * pLength = (DWORD)(p - pString);
186 }
187 else
188 {
189 * pLength = WszMultiByteToWideChar(CP_UTF8, 0, pString, -1, NULL, 0);
190
191 if (* pLength == 0)
192 {
193 return HRESULT_FROM_GetLastError();
194 }
195
196 // Remove the count of null terminator, to be consistent with the all-ASCII case.
197 --*pLength;
198
199 if (* pLength > MAX_LENGTH)
200 {
201 return COR_E_OVERFLOW;
202 }
203 }
204
205 return S_OK;
206}
207
208
209// UTF8 to Unicode
210
211HRESULT Utf8_Unicode(__in_z LPCSTR pString, bool allAscii, __out_z LPWSTR pBuffer, DWORD length)
212{
213 CONTRACTL
214 {
215 NOTHROW;
216 GC_NOTRIGGER;
217 }
218 CONTRACTL_END;
219
220 pBuffer[length] = 0;
221
222 if (allAscii)
223 {
224 LPCSTR p = pString;
225
226 LPWSTR q = pBuffer;
227
228 LPCSTR endP = p + length - 8;
229
230 // Unfold to optimize for long string: 4 chars per iteration
231 while (p < endP)
232 {
233 q[0] = (WCHAR) p[0];
234 q[1] = (WCHAR) p[1];
235 q[2] = (WCHAR) p[2];
236 q[3] = (WCHAR) p[3];
237
238 q[4] = (WCHAR) p[4];
239 q[5] = (WCHAR) p[5];
240 q[6] = (WCHAR) p[6];
241 q[7] = (WCHAR) p[7];
242
243 q += 8;
244 p += 8;
245 }
246
247 endP += 8;
248
249 while (p < endP)
250 {
251 * q ++ = (WCHAR) * p ++;
252 }
253 }
254 else
255 {
256 length = WszMultiByteToWideChar(CP_UTF8, 0, pString, -1, pBuffer, (int) length + 1);
257
258 if (length == 0)
259 {
260 return HRESULT_FROM_GetLastError();
261 }
262 }
263
264 return S_OK;
265}
266
267
268HRESULT ConvertUnicode_Utf8(__in_z LPCWSTR pString, __out_z LPSTR * pBuffer)
269{
270 bool allAscii;
271 DWORD length;
272
273 HRESULT hr = Unicode_Utf8_Length(pString, & allAscii, & length);
274
275 if (SUCCEEDED(hr))
276 {
277 * pBuffer = new (nothrow) char[length + 1];
278
279 if (* pBuffer == NULL)
280 {
281 hr = E_OUTOFMEMORY;
282 }
283 else
284 {
285 hr = Unicode_Utf8(pString, allAscii, * pBuffer, length);
286 }
287 }
288
289 return hr;
290}
291
292
293HRESULT ConvertUtf8_Unicode(__in_z LPCSTR pString, __out_z LPWSTR * pBuffer)
294{
295 bool allAscii;
296 DWORD length;
297
298 HRESULT hr = Utf8_Unicode_Length(pString, & allAscii, & length);
299
300 if (SUCCEEDED(hr))
301 {
302 * pBuffer = new (nothrow) WCHAR[length + 1];
303
304 if (* pBuffer == NULL)
305 {
306 hr = E_OUTOFMEMORY;
307 }
308 else
309 {
310 hr = Utf8_Unicode(pString, allAscii, * pBuffer, length);
311 }
312 }
313
314 return hr;
315}
316
317
318#ifdef _MSC_VER
319#pragma optimize("", on)
320#endif // _MSC_VER
321
322} // namespace FString
323