1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5/*============================================================================
6**
7** Source: test4.c
8**
9** Purpose: Tests MultiByteToWideChar with a UTF-8 encoding
10**
11**
12**==========================================================================*/
13
14#include <palsuite.h>
15
16int __cdecl main(int argc, char *argv[])
17{
18 int ret;
19 int ret2;
20
21 if (PAL_Initialize(argc, argv))
22 {
23 return FAIL;
24 }
25
26 const char * const utf8Strings[] =
27 {
28 // Correct strings
29
30 // Empty string
31 "",
32 // 1 byte encoded 1 character long string
33 "A",
34 // 2 byte encoded 1 character long string
35 "\xC2\x80",
36 // 3 byte encoded 1 character long string
37 "\xE0\xA0\x80",
38 // 1 byte encoded characters only
39 "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
40 // valid 2 byte encoded characters only
41 "\xC2\x80\xC3\xBF\xC7\x81\xDF\xBF",
42 // valid 3 byte encoded characters only
43 "\xE0\xA0\x80\xE1\xB6\x88\xE1\x80\x80\xEF\xBF\xBF",
44 // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 1 byte char
45 "\x41\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF\x45",
46 // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 1 byte char, ending with 2 byte one
47 "\x41\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF",
48 // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 2 byte char, ending with 1 byte one
49 "\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF\x45",
50 // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 2 byte char
51 "\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF",
52 // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 1 byte char
53 "\x41\x42\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF\x45\x46",
54 // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 1 byte char, ending with 2 byte one
55 "\x41\x42\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF",
56 // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 2 byte char, ending with 1 byte one
57 "\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF\x45\x46",
58 // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 2 byte char
59 "\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF",
60 // surrogates
61 "\xF0\x90\x80\x80\xF0\x90\x89\x80\xF3\x80\x8E\xB0\xF4\x8F\xBF\xBF",
62
63 // Strings with errors
64 // Incomplete 2 byte encoded character 1 byte missing standalone
65 "\xC2",
66 // Incomplete 3 byte encoded character 1 byte missing standalone
67 "\xE0\xA0",
68 // Incomplete 3 byte encoded character 2 bytes missing standalone
69 "\xE0",
70 // Incomplete surrogate character 1 byte missing standalone
71 "\xF0\x90\x80",
72 // Incomplete surrogate character 2 bytes missing standalone
73 "\xF0\x90",
74 // Incomplete surrogate character 3 bytes missing standalone
75 "\xF0",
76 // Trailing byte with no lead byte standalone
77 "\x80",
78 // Incomplete 2 byte encoded character 1 byte missing between 1 byte chars
79 "\x41\xC2\x42",
80 // Incomplete 3 byte encoded character 1 byte missing between 1 byte chars
81 "\x41\xE0\xA0\x42",
82 // Incomplete 3 byte encoded character 2 bytes missing between 1 byte chars
83 "\x41\xE0\x42",
84 // Trailing byte with no lead byte between 1 byte chars
85 "\x41\x80\x42",
86 // Incomplete 2 byte encoded character 1 byte missing before 1 byte char
87 "\xC2\x42",
88 // Incomplete 3 byte encoded character 1 byte missing before 1 byte char
89 "\xE0\xA0\x42",
90 // Incomplete 3 byte encoded character 2 bytes missing before 1 byte char
91 "\xE0\x42",
92 // Trailing byte with no lead byte before 1 byte char
93 "\x80\x42",
94 // Incomplete 2 byte encoded character 1 byte missing after 1 byte char
95 "\x41\xC2",
96 // Incomplete 3 byte encoded character 1 byte missing after 1 byte char
97 "\x41\xE0\xA0",
98 // Incomplete 3 byte encoded character 2 bytes missing after 1 byte char
99 "\x41\xE0",
100 // Trailing byte with no lead byte after 1 byte char
101 "\x41\x80",
102 // Incomplete 2 byte encoded character 1 byte missing between 2 byte chars
103 "\xC2\x80\xC2\xC3\xBF",
104 // Incomplete 3 byte encoded character 1 byte missing between 2 byte chars
105 "\xC2\x80\xE0\xA0\xC3\xBF",
106 // Incomplete 3 byte encoded character 2 bytes missing between 2 byte chars
107 "\xC2\x80\xE0\xC3\xBF",
108 // Trailing byte with no lead byte between 2 byte chars
109 "\xC2\x80\x80\xC3\xBF",
110 // 2 byte encoded character in non-shortest form encodings (these are not allowed)
111 "\xC0\x80",
112 // 3 byte encoded character in non-shortest form encodings (these are not allowed)
113 "\xE0\x80\x80",
114 // 4 byte encoded character in non-shortest form encodings (these are not allowed)
115 "\xF0\x80\x80\x80",
116 };
117
118 const WCHAR * const unicodeStrings[] =
119 {
120 // Empty string
121 W(""),
122 // 1 byte encoded 1 character long string
123 W("A"),
124 // 2 byte encoded 1 character long string
125 W("\x0080"),
126 // 3 byte encoded 1 character long string
127 W("\x0800"),
128 // 1 byte encoded characters only
129 W("ABCDEFGHIJKLMNOPQRSTUVWXYZ"),
130 // 2 byte encoded characters only
131 W("\x0080\x00FF\x01C1\x07FF"),
132 // valid 3 byte encoded characters only
133 W("\x0800\x1D88\x1000\xFFFF"),
134 // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 1 byte char
135 W("\x0041\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF\x0045"),
136 // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 1 byte char, ending with 2 byte one
137 W("\x0041\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF"),
138 // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 2 byte char, ending with 1 byte one
139 W("\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF\x0045"),
140 // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 2 byte char
141 W("\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF"),
142 // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 1 byte char
143 W("\x0041\x0042\x0080\x00FF\x0043\x0044\x01C1\x07FF\x0045\x0046"),
144 // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 1 byte char, ending with 2 byte one
145 W("\x0041\x0042\x0080\x00FF\x0043\x0044\x01C1\x07FF"),
146 // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 2 byte char, ending with 1 byte one
147 W("\x0080\x00FF\x0043\x0044\x01C1\x07FF\x0045\x0046"),
148 // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 2 byte char
149 W("\x0080\x00FF\x0043\x0044\x01C1\x07FF"),
150 // surrogates
151 W("\xD800\xDC00\xD800\xDE40\xDAC0\xDFB0\xDBFF\xDFFF"),
152
153 // Strings with errors
154 // Incomplete 2 byte encoded character standalone
155 W("\xFFFD"),
156 // Incomplete 3 byte encoded character 1 byte missing standalone
157 W("\xFFFD"),
158 // Incomplete 3 byte encoded character 2 bytes missing standalone
159 W("\xFFFD"),
160 // Incomplete surrogate character 1 byte missing standalone
161 W("\xFFFD"),
162 // Incomplete surrogate character 2 bytes missing standalone
163 W("\xFFFD"),
164 // Incomplete surrogate character 3 bytes missing standalone
165 W("\xFFFD"),
166 // Trailing byte with no lead byte standalone
167 W("\xFFFD"),
168 // Incomplete 2 byte encoded character 1 byte missing between 1 byte chars
169 W("\x0041\xFFFD\x0042"),
170 // Incomplete 3 byte encoded character 1 byte missing between 1 byte chars
171 W("\x0041\xFFFD\x0042"),
172 // Incomplete 3 byte encoded character 2 bytes missing between 1 byte chars
173 W("\x0041\xFFFD\x0042"),
174 // Trailing byte with no lead byte between 1 byte chars
175 W("\x0041\xFFFD\x0042"),
176 // Incomplete 2 byte encoded character 1 byte missing before 1 byte char
177 W("\xFFFD\x0042"),
178 // Incomplete 3 byte encoded character 1 byte missing before 1 byte char
179 W("\xFFFD\x0042"),
180 // Incomplete 3 byte encoded character 2 bytes missing before 1 byte char
181 W("\xFFFD\x0042"),
182 // Trailing byte with no lead byte before 1 byte char
183 W("\xFFFD\x0042"),
184 // Incomplete 2 byte encoded character 1 byte missing after 1 byte char
185 W("\x0041\xFFFD"),
186 // Incomplete 3 byte encoded character 1 byte missing after 1 byte char
187 W("\x0041\xFFFD"),
188 // Incomplete 3 byte encoded character 2 bytes missing after 1 byte char
189 W("\x0041\xFFFD"),
190 // Trailing byte with no lead byte after 1 byte char
191 W("\x0041\xFFFD"),
192 // Incomplete 2 byte encoded character 1 byte missing between 2 byte chars
193 W("\x0080\xFFFD\x00FF"),
194 // Incomplete 3 byte encoded character 1 byte missing between 2 byte chars
195 W("\x0080\xFFFD\x00FF"),
196 // Incomplete 3 byte encoded character 2 bytes missing between 2 byte chars
197 W("\x0080\xFFFD\x00FF"),
198 // Trailing byte with no lead byte between 2 byte chars
199 W("\x0080\xFFFD\x00FF"),
200 // 2 byte encoded character in non-shortest form encodings (these are not allowed)
201 W("\xFFFD\xFFFD"),
202 // 3 byte encoded character in non-shortest form encodings (these are not allowed)
203 W("\xFFFD\xFFFD"),
204 // 4 byte encoded character in non-shortest form encodings (these are not allowed)
205 W("\xFFFD\xFFFD\xFFFD"),
206 };
207
208 for (int i = 0; i < (sizeof(utf8Strings) / sizeof(utf8Strings[0])); i++)
209 {
210 ret = MultiByteToWideChar(CP_UTF8, 0, utf8Strings[i], -1, NULL, 0);
211 WCHAR* wideBuffer = (WCHAR*)malloc(ret * sizeof(WCHAR));
212 ret2 = MultiByteToWideChar(CP_UTF8, 0, utf8Strings[i], -1, wideBuffer, ret);
213 if (ret != ret2)
214 {
215 Fail("MultiByteToWideChar string %d: returned different string length for empty and real dest buffers!\n"
216 "Got %d for the empty one, %d for real one.\n", i, ret2, ret);
217 }
218
219 if (wcscmp(wideBuffer, unicodeStrings[i]) != 0)
220 {
221 Fail("MultiByteToWideChar string %d: the resulting string doesn't match the expected one!\n", i);
222 }
223
224 free(wideBuffer);
225 }
226
227 PAL_Terminate();
228
229 return PASS;
230}
231