1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | // --------------------------------------------------------------------------- |
5 | // FString.cpp |
6 | // |
7 | |
8 | // --------------------------------------------------------------------------- |
9 | |
10 | #include "stdafx.h" |
11 | #include "ex.h" |
12 | #include "holder.h" |
13 | |
14 | #include "fstring.h" |
15 | |
16 | |
17 | namespace FString |
18 | { |
19 | |
20 | #ifdef _MSC_VER |
21 | #pragma optimize("t", on) |
22 | #endif // _MSC_VER |
23 | |
24 | #define MAX_LENGTH 0x1fffff00 |
25 | |
26 | |
27 | HRESULT Unicode_Utf8_Length(__in_z LPCWSTR pString, __out bool * pAllAscii, __out DWORD * pLength) |
28 | { |
29 | CONTRACTL |
30 | { |
31 | NOTHROW; |
32 | GC_NOTRIGGER; |
33 | } |
34 | CONTRACTL_END; |
35 | |
36 | * pAllAscii = true; |
37 | |
38 | LPCWSTR p = pString; |
39 | |
40 | while (true) |
41 | { |
42 | WCHAR ch = * p; |
43 | |
44 | // Single check for termination and non ASCII |
45 | if (((unsigned) (ch - 1)) >= 0x7F) |
46 | { |
47 | if (ch != 0) |
48 | { |
49 | * pAllAscii = false; |
50 | } |
51 | |
52 | break; |
53 | } |
54 | |
55 | p ++; |
56 | } |
57 | |
58 | if (* pAllAscii) |
59 | { |
60 | if ((p - pString) > MAX_LENGTH) |
61 | { |
62 | return COR_E_OVERFLOW; |
63 | } |
64 | |
65 | * pLength = (DWORD) (p - pString); |
66 | } |
67 | else // use WideCharToMultiByte to calculate result length |
68 | { |
69 | * pLength = WszWideCharToMultiByte(CP_UTF8, 0, pString, -1, NULL, 0, NULL, NULL); |
70 | |
71 | if (*pLength == 0) |
72 | { |
73 | return HRESULT_FROM_GetLastError(); |
74 | } |
75 | |
76 | // Remove the count of null terminator, to be consistent with the all-ASCII case. |
77 | --*pLength; |
78 | |
79 | if (*pLength > MAX_LENGTH) |
80 | { |
81 | return COR_E_OVERFLOW; |
82 | } |
83 | } |
84 | |
85 | return S_OK; |
86 | } |
87 | |
88 | |
89 | // UNICODE to UTF8 |
90 | HRESULT Unicode_Utf8(__in_z LPCWSTR pString, bool allAscii, __out_z LPSTR pBuffer, DWORD length) |
91 | { |
92 | CONTRACTL |
93 | { |
94 | NOTHROW; |
95 | GC_NOTRIGGER; |
96 | } |
97 | CONTRACTL_END; |
98 | |
99 | pBuffer[length] = 0; |
100 | |
101 | if (allAscii) |
102 | { |
103 | LPCWSTR p = pString; |
104 | |
105 | LPSTR q = pBuffer; |
106 | |
107 | LPCWSTR endP = p + length - 8; |
108 | |
109 | // Unfold to optimize for long string: 8 chars per iteration |
110 | while (p < endP) |
111 | { |
112 | q[0] = (char) p[0]; |
113 | q[1] = (char) p[1]; |
114 | q[2] = (char) p[2]; |
115 | q[3] = (char) p[3]; |
116 | |
117 | q[4] = (char) p[4]; |
118 | q[5] = (char) p[5]; |
119 | q[6] = (char) p[6]; |
120 | q[7] = (char) p[7]; |
121 | |
122 | q += 8; |
123 | p += 8; |
124 | } |
125 | |
126 | endP += 8; |
127 | |
128 | while (p < endP) |
129 | { |
130 | * q ++ = (char) * p ++; |
131 | } |
132 | } |
133 | else |
134 | { |
135 | length = WszWideCharToMultiByte(CP_UTF8, 0, pString, -1, pBuffer, (int) length + 1, NULL, NULL); |
136 | |
137 | if (length == 0) |
138 | { |
139 | return HRESULT_FROM_GetLastError(); |
140 | } |
141 | } |
142 | |
143 | return S_OK; |
144 | } |
145 | |
146 | |
147 | HRESULT Utf8_Unicode_Length(__in_z LPCSTR pString, __out bool * pAllAscii, __out DWORD * pLength) |
148 | { |
149 | CONTRACTL |
150 | { |
151 | NOTHROW; |
152 | GC_NOTRIGGER; |
153 | } |
154 | CONTRACTL_END; |
155 | |
156 | * pAllAscii = true; |
157 | |
158 | LPCSTR p = pString; |
159 | |
160 | while (true) |
161 | { |
162 | char ch = * p; |
163 | |
164 | // Single check for termination and non ASCII |
165 | if (((unsigned) (ch - 1)) >= 0x7F) |
166 | { |
167 | if (ch != 0) |
168 | { |
169 | * pAllAscii = false; |
170 | } |
171 | |
172 | break; |
173 | } |
174 | |
175 | p ++; |
176 | } |
177 | |
178 | if (* pAllAscii) |
179 | { |
180 | if ((p - pString) > MAX_LENGTH) |
181 | { |
182 | return COR_E_OVERFLOW; |
183 | } |
184 | |
185 | * pLength = (DWORD)(p - pString); |
186 | } |
187 | else |
188 | { |
189 | * pLength = WszMultiByteToWideChar(CP_UTF8, 0, pString, -1, NULL, 0); |
190 | |
191 | if (* pLength == 0) |
192 | { |
193 | return HRESULT_FROM_GetLastError(); |
194 | } |
195 | |
196 | // Remove the count of null terminator, to be consistent with the all-ASCII case. |
197 | --*pLength; |
198 | |
199 | if (* pLength > MAX_LENGTH) |
200 | { |
201 | return COR_E_OVERFLOW; |
202 | } |
203 | } |
204 | |
205 | return S_OK; |
206 | } |
207 | |
208 | |
209 | // UTF8 to Unicode |
210 | |
211 | HRESULT Utf8_Unicode(__in_z LPCSTR pString, bool allAscii, __out_z LPWSTR pBuffer, DWORD length) |
212 | { |
213 | CONTRACTL |
214 | { |
215 | NOTHROW; |
216 | GC_NOTRIGGER; |
217 | } |
218 | CONTRACTL_END; |
219 | |
220 | pBuffer[length] = 0; |
221 | |
222 | if (allAscii) |
223 | { |
224 | LPCSTR p = pString; |
225 | |
226 | LPWSTR q = pBuffer; |
227 | |
228 | LPCSTR endP = p + length - 8; |
229 | |
230 | // Unfold to optimize for long string: 4 chars per iteration |
231 | while (p < endP) |
232 | { |
233 | q[0] = (WCHAR) p[0]; |
234 | q[1] = (WCHAR) p[1]; |
235 | q[2] = (WCHAR) p[2]; |
236 | q[3] = (WCHAR) p[3]; |
237 | |
238 | q[4] = (WCHAR) p[4]; |
239 | q[5] = (WCHAR) p[5]; |
240 | q[6] = (WCHAR) p[6]; |
241 | q[7] = (WCHAR) p[7]; |
242 | |
243 | q += 8; |
244 | p += 8; |
245 | } |
246 | |
247 | endP += 8; |
248 | |
249 | while (p < endP) |
250 | { |
251 | * q ++ = (WCHAR) * p ++; |
252 | } |
253 | } |
254 | else |
255 | { |
256 | length = WszMultiByteToWideChar(CP_UTF8, 0, pString, -1, pBuffer, (int) length + 1); |
257 | |
258 | if (length == 0) |
259 | { |
260 | return HRESULT_FROM_GetLastError(); |
261 | } |
262 | } |
263 | |
264 | return S_OK; |
265 | } |
266 | |
267 | |
268 | HRESULT ConvertUnicode_Utf8(__in_z LPCWSTR pString, __out_z LPSTR * pBuffer) |
269 | { |
270 | bool allAscii; |
271 | DWORD length; |
272 | |
273 | HRESULT hr = Unicode_Utf8_Length(pString, & allAscii, & length); |
274 | |
275 | if (SUCCEEDED(hr)) |
276 | { |
277 | * pBuffer = new (nothrow) char[length + 1]; |
278 | |
279 | if (* pBuffer == NULL) |
280 | { |
281 | hr = E_OUTOFMEMORY; |
282 | } |
283 | else |
284 | { |
285 | hr = Unicode_Utf8(pString, allAscii, * pBuffer, length); |
286 | } |
287 | } |
288 | |
289 | return hr; |
290 | } |
291 | |
292 | |
293 | HRESULT ConvertUtf8_Unicode(__in_z LPCSTR pString, __out_z LPWSTR * pBuffer) |
294 | { |
295 | bool allAscii; |
296 | DWORD length; |
297 | |
298 | HRESULT hr = Utf8_Unicode_Length(pString, & allAscii, & length); |
299 | |
300 | if (SUCCEEDED(hr)) |
301 | { |
302 | * pBuffer = new (nothrow) WCHAR[length + 1]; |
303 | |
304 | if (* pBuffer == NULL) |
305 | { |
306 | hr = E_OUTOFMEMORY; |
307 | } |
308 | else |
309 | { |
310 | hr = Utf8_Unicode(pString, allAscii, * pBuffer, length); |
311 | } |
312 | } |
313 | |
314 | return hr; |
315 | } |
316 | |
317 | |
318 | #ifdef _MSC_VER |
319 | #pragma optimize("", on) |
320 | #endif // _MSC_VER |
321 | |
322 | } // namespace FString |
323 | |