1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | |
5 | /*============================================================================ |
6 | ** |
7 | ** Source: test4.c |
8 | ** |
9 | ** Purpose: Tests MultiByteToWideChar with a UTF-8 encoding |
10 | ** |
11 | ** |
12 | **==========================================================================*/ |
13 | |
14 | #include <palsuite.h> |
15 | |
16 | int __cdecl main(int argc, char *argv[]) |
17 | { |
18 | int ret; |
19 | int ret2; |
20 | |
21 | if (PAL_Initialize(argc, argv)) |
22 | { |
23 | return FAIL; |
24 | } |
25 | |
26 | const char * const utf8Strings[] = |
27 | { |
28 | // Correct strings |
29 | |
30 | // Empty string |
31 | "" , |
32 | // 1 byte encoded 1 character long string |
33 | "A" , |
34 | // 2 byte encoded 1 character long string |
35 | "\xC2\x80" , |
36 | // 3 byte encoded 1 character long string |
37 | "\xE0\xA0\x80" , |
38 | // 1 byte encoded characters only |
39 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" , |
40 | // valid 2 byte encoded characters only |
41 | "\xC2\x80\xC3\xBF\xC7\x81\xDF\xBF" , |
42 | // valid 3 byte encoded characters only |
43 | "\xE0\xA0\x80\xE1\xB6\x88\xE1\x80\x80\xEF\xBF\xBF" , |
44 | // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 1 byte char |
45 | "\x41\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF\x45" , |
46 | // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 1 byte char, ending with 2 byte one |
47 | "\x41\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF" , |
48 | // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 2 byte char, ending with 1 byte one |
49 | "\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF\x45" , |
50 | // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 2 byte char |
51 | "\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF" , |
52 | // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 1 byte char |
53 | "\x41\x42\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF\x45\x46" , |
54 | // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 1 byte char, ending with 2 byte one |
55 | "\x41\x42\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF" , |
56 | // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 2 byte char, ending with 1 byte one |
57 | "\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF\x45\x46" , |
58 | // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 2 byte char |
59 | "\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF" , |
60 | // surrogates |
61 | "\xF0\x90\x80\x80\xF0\x90\x89\x80\xF3\x80\x8E\xB0\xF4\x8F\xBF\xBF" , |
62 | |
63 | // Strings with errors |
64 | // Incomplete 2 byte encoded character 1 byte missing standalone |
65 | "\xC2" , |
66 | // Incomplete 3 byte encoded character 1 byte missing standalone |
67 | "\xE0\xA0" , |
68 | // Incomplete 3 byte encoded character 2 bytes missing standalone |
69 | "\xE0" , |
70 | // Incomplete surrogate character 1 byte missing standalone |
71 | "\xF0\x90\x80" , |
72 | // Incomplete surrogate character 2 bytes missing standalone |
73 | "\xF0\x90" , |
74 | // Incomplete surrogate character 3 bytes missing standalone |
75 | "\xF0" , |
76 | // Trailing byte with no lead byte standalone |
77 | "\x80" , |
78 | // Incomplete 2 byte encoded character 1 byte missing between 1 byte chars |
79 | "\x41\xC2\x42" , |
80 | // Incomplete 3 byte encoded character 1 byte missing between 1 byte chars |
81 | "\x41\xE0\xA0\x42" , |
82 | // Incomplete 3 byte encoded character 2 bytes missing between 1 byte chars |
83 | "\x41\xE0\x42" , |
84 | // Trailing byte with no lead byte between 1 byte chars |
85 | "\x41\x80\x42" , |
86 | // Incomplete 2 byte encoded character 1 byte missing before 1 byte char |
87 | "\xC2\x42" , |
88 | // Incomplete 3 byte encoded character 1 byte missing before 1 byte char |
89 | "\xE0\xA0\x42" , |
90 | // Incomplete 3 byte encoded character 2 bytes missing before 1 byte char |
91 | "\xE0\x42" , |
92 | // Trailing byte with no lead byte before 1 byte char |
93 | "\x80\x42" , |
94 | // Incomplete 2 byte encoded character 1 byte missing after 1 byte char |
95 | "\x41\xC2" , |
96 | // Incomplete 3 byte encoded character 1 byte missing after 1 byte char |
97 | "\x41\xE0\xA0" , |
98 | // Incomplete 3 byte encoded character 2 bytes missing after 1 byte char |
99 | "\x41\xE0" , |
100 | // Trailing byte with no lead byte after 1 byte char |
101 | "\x41\x80" , |
102 | // Incomplete 2 byte encoded character 1 byte missing between 2 byte chars |
103 | "\xC2\x80\xC2\xC3\xBF" , |
104 | // Incomplete 3 byte encoded character 1 byte missing between 2 byte chars |
105 | "\xC2\x80\xE0\xA0\xC3\xBF" , |
106 | // Incomplete 3 byte encoded character 2 bytes missing between 2 byte chars |
107 | "\xC2\x80\xE0\xC3\xBF" , |
108 | // Trailing byte with no lead byte between 2 byte chars |
109 | "\xC2\x80\x80\xC3\xBF" , |
110 | // 2 byte encoded character in non-shortest form encodings (these are not allowed) |
111 | "\xC0\x80" , |
112 | // 3 byte encoded character in non-shortest form encodings (these are not allowed) |
113 | "\xE0\x80\x80" , |
114 | // 4 byte encoded character in non-shortest form encodings (these are not allowed) |
115 | "\xF0\x80\x80\x80" , |
116 | }; |
117 | |
118 | const WCHAR * const unicodeStrings[] = |
119 | { |
120 | // Empty string |
121 | W("" ), |
122 | // 1 byte encoded 1 character long string |
123 | W("A" ), |
124 | // 2 byte encoded 1 character long string |
125 | W("\x0080" ), |
126 | // 3 byte encoded 1 character long string |
127 | W("\x0800" ), |
128 | // 1 byte encoded characters only |
129 | W("ABCDEFGHIJKLMNOPQRSTUVWXYZ" ), |
130 | // 2 byte encoded characters only |
131 | W("\x0080\x00FF\x01C1\x07FF" ), |
132 | // valid 3 byte encoded characters only |
133 | W("\x0800\x1D88\x1000\xFFFF" ), |
134 | // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 1 byte char |
135 | W("\x0041\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF\x0045" ), |
136 | // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 1 byte char, ending with 2 byte one |
137 | W("\x0041\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF" ), |
138 | // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 2 byte char, ending with 1 byte one |
139 | W("\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF\x0045" ), |
140 | // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 2 byte char |
141 | W("\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF" ), |
142 | // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 1 byte char |
143 | W("\x0041\x0042\x0080\x00FF\x0043\x0044\x01C1\x07FF\x0045\x0046" ), |
144 | // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 1 byte char, ending with 2 byte one |
145 | W("\x0041\x0042\x0080\x00FF\x0043\x0044\x01C1\x07FF" ), |
146 | // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 2 byte char, ending with 1 byte one |
147 | W("\x0080\x00FF\x0043\x0044\x01C1\x07FF\x0045\x0046" ), |
148 | // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 2 byte char |
149 | W("\x0080\x00FF\x0043\x0044\x01C1\x07FF" ), |
150 | // surrogates |
151 | W("\xD800\xDC00\xD800\xDE40\xDAC0\xDFB0\xDBFF\xDFFF" ), |
152 | |
153 | // Strings with errors |
154 | // Incomplete 2 byte encoded character standalone |
155 | W("\xFFFD" ), |
156 | // Incomplete 3 byte encoded character 1 byte missing standalone |
157 | W("\xFFFD" ), |
158 | // Incomplete 3 byte encoded character 2 bytes missing standalone |
159 | W("\xFFFD" ), |
160 | // Incomplete surrogate character 1 byte missing standalone |
161 | W("\xFFFD" ), |
162 | // Incomplete surrogate character 2 bytes missing standalone |
163 | W("\xFFFD" ), |
164 | // Incomplete surrogate character 3 bytes missing standalone |
165 | W("\xFFFD" ), |
166 | // Trailing byte with no lead byte standalone |
167 | W("\xFFFD" ), |
168 | // Incomplete 2 byte encoded character 1 byte missing between 1 byte chars |
169 | W("\x0041\xFFFD\x0042" ), |
170 | // Incomplete 3 byte encoded character 1 byte missing between 1 byte chars |
171 | W("\x0041\xFFFD\x0042" ), |
172 | // Incomplete 3 byte encoded character 2 bytes missing between 1 byte chars |
173 | W("\x0041\xFFFD\x0042" ), |
174 | // Trailing byte with no lead byte between 1 byte chars |
175 | W("\x0041\xFFFD\x0042" ), |
176 | // Incomplete 2 byte encoded character 1 byte missing before 1 byte char |
177 | W("\xFFFD\x0042" ), |
178 | // Incomplete 3 byte encoded character 1 byte missing before 1 byte char |
179 | W("\xFFFD\x0042" ), |
180 | // Incomplete 3 byte encoded character 2 bytes missing before 1 byte char |
181 | W("\xFFFD\x0042" ), |
182 | // Trailing byte with no lead byte before 1 byte char |
183 | W("\xFFFD\x0042" ), |
184 | // Incomplete 2 byte encoded character 1 byte missing after 1 byte char |
185 | W("\x0041\xFFFD" ), |
186 | // Incomplete 3 byte encoded character 1 byte missing after 1 byte char |
187 | W("\x0041\xFFFD" ), |
188 | // Incomplete 3 byte encoded character 2 bytes missing after 1 byte char |
189 | W("\x0041\xFFFD" ), |
190 | // Trailing byte with no lead byte after 1 byte char |
191 | W("\x0041\xFFFD" ), |
192 | // Incomplete 2 byte encoded character 1 byte missing between 2 byte chars |
193 | W("\x0080\xFFFD\x00FF" ), |
194 | // Incomplete 3 byte encoded character 1 byte missing between 2 byte chars |
195 | W("\x0080\xFFFD\x00FF" ), |
196 | // Incomplete 3 byte encoded character 2 bytes missing between 2 byte chars |
197 | W("\x0080\xFFFD\x00FF" ), |
198 | // Trailing byte with no lead byte between 2 byte chars |
199 | W("\x0080\xFFFD\x00FF" ), |
200 | // 2 byte encoded character in non-shortest form encodings (these are not allowed) |
201 | W("\xFFFD\xFFFD" ), |
202 | // 3 byte encoded character in non-shortest form encodings (these are not allowed) |
203 | W("\xFFFD\xFFFD" ), |
204 | // 4 byte encoded character in non-shortest form encodings (these are not allowed) |
205 | W("\xFFFD\xFFFD\xFFFD" ), |
206 | }; |
207 | |
208 | for (int i = 0; i < (sizeof(utf8Strings) / sizeof(utf8Strings[0])); i++) |
209 | { |
210 | ret = MultiByteToWideChar(CP_UTF8, 0, utf8Strings[i], -1, NULL, 0); |
211 | WCHAR* wideBuffer = (WCHAR*)malloc(ret * sizeof(WCHAR)); |
212 | ret2 = MultiByteToWideChar(CP_UTF8, 0, utf8Strings[i], -1, wideBuffer, ret); |
213 | if (ret != ret2) |
214 | { |
215 | Fail("MultiByteToWideChar string %d: returned different string length for empty and real dest buffers!\n" |
216 | "Got %d for the empty one, %d for real one.\n" , i, ret2, ret); |
217 | } |
218 | |
219 | if (wcscmp(wideBuffer, unicodeStrings[i]) != 0) |
220 | { |
221 | Fail("MultiByteToWideChar string %d: the resulting string doesn't match the expected one!\n" , i); |
222 | } |
223 | |
224 | free(wideBuffer); |
225 | } |
226 | |
227 | PAL_Terminate(); |
228 | |
229 | return PASS; |
230 | } |
231 | |