1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | |
5 | /*============================================================================ |
6 | ** |
7 | ** Source: test4.c |
8 | ** |
9 | ** Purpose: Tests WideCharMultiByte with UTF-8 encoding |
10 | ** |
11 | ** |
12 | **==========================================================================*/ |
13 | |
14 | #include <palsuite.h> |
15 | |
16 | int __cdecl main(int argc, char *argv[]) |
17 | { |
18 | int ret; |
19 | int ret2; |
20 | |
21 | if (PAL_Initialize(argc, argv)) |
22 | { |
23 | return FAIL; |
24 | } |
25 | |
26 | const WCHAR * const unicodeStrings[] = |
27 | { |
28 | // Correct strings |
29 | |
30 | // Empty string |
31 | W("" ), |
32 | // 1 byte encoded 1 character long string |
33 | W("A" ), |
34 | // 2 byte encoded 1 character long string |
35 | W("\x0080" ), |
36 | // 3 byte encoded 1 character long string |
37 | W("\x0800" ), |
38 | // 1 byte encoded characters only |
39 | W("ABCDEFGHIJKLMNOPQRSTUVWXYZ" ), |
40 | // 2 byte encoded characters only |
41 | W("\x0080\x00FF\x01C1\x07FF" ), |
42 | // valid 3 byte encoded characters only |
43 | W("\x0800\x1D88\x1000\xFFFF" ), |
44 | // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 1 byte char |
45 | W("\x0041\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF\x0045" ), |
46 | // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 1 byte char, ending with 2 byte one |
47 | W("\x0041\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF" ), |
48 | // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 2 byte char, ending with 1 byte one |
49 | W("\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF\x0045" ), |
50 | // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 2 byte char |
51 | W("\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF" ), |
52 | // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 1 byte char |
53 | W("\x0041\x0042\x0080\x00FF\x0043\x0044\x01C1\x07FF\x0045\x0046" ), |
54 | // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 1 byte char, ending with 2 byte one |
55 | W("\x0041\x0042\x0080\x00FF\x0043\x0044\x01C1\x07FF" ), |
56 | // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 2 byte char, ending with 1 byte one |
57 | W("\x0080\x00FF\x0043\x0044\x01C1\x07FF\x0045\x0046" ), |
58 | // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 2 byte char |
59 | W("\x0080\x00FF\x0043\x0044\x01C1\x07FF" ), |
60 | // Surrogates |
61 | W("\xD800\xDC00\xD800\xDE40\xDAC0\xDFB0\xDBFF\xDFFF" ), |
62 | |
63 | // Strings with errors |
64 | |
65 | // Single high surrogate |
66 | W("\xD800" ), |
67 | // Single low surrogate |
68 | W("\xDC00" ), |
69 | // Character followed by single high surrogate |
70 | W("\x0041\xD800" ), |
71 | // Character followed by single low surrogate |
72 | W("\x0041\xDC00" ), |
73 | // Single high surrogate between two characters |
74 | W("\x0041\xD800\x0042" ), |
75 | // Single low surrogate between two characters |
76 | W("\x0041\xDC00\x0042" ), |
77 | }; |
78 | |
79 | const char * const utf8Strings[] = |
80 | { |
81 | // Correct strings |
82 | |
83 | // Empty string |
84 | "" , |
85 | // 1 byte encoded 1 character long string |
86 | "A" , |
87 | // 2 byte encoded 1 character long string |
88 | "\xC2\x80" , |
89 | // 3 byte encoded 1 character long string |
90 | "\xE0\xA0\x80" , |
91 | // 1 byte encoded characters only |
92 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" , |
93 | // valid 2 byte encoded characters only |
94 | "\xC2\x80\xC3\xBF\xC7\x81\xDF\xBF" , |
95 | // valid 3 byte encoded characters only |
96 | "\xE0\xA0\x80\xE1\xB6\x88\xE1\x80\x80\xEF\xBF\xBF" , |
97 | // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 1 byte char |
98 | "\x41\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF\x45" , |
99 | // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 1 byte char, ending with 2 byte one |
100 | "\x41\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF" , |
101 | // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 2 byte char, ending with 1 byte one |
102 | "\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF\x45" , |
103 | // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 2 byte char |
104 | "\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF" , |
105 | // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 1 byte char |
106 | "\x41\x42\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF\x45\x46" , |
107 | // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 1 byte char, ending with 2 byte one |
108 | "\x41\x42\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF" , |
109 | // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 2 byte char, ending with 1 byte one |
110 | "\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF\x45\x46" , |
111 | // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 2 byte char |
112 | "\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF" , |
113 | // Surrogates |
114 | "\xF0\x90\x80\x80\xF0\x90\x89\x80\xF3\x80\x8E\xB0\xF4\x8F\xBF\xBF" , |
115 | |
116 | // Strings with errors |
117 | |
118 | // Single high surrogate |
119 | "\xEF\xBF\xBD" , |
120 | // Single low surrogate |
121 | "\xEF\xBF\xBD" , |
122 | // Character followed by single high surrogate |
123 | "\x41\xEF\xBF\xBD" , |
124 | // Character followed by single low surrogate |
125 | "\x41\xEF\xBF\xBD" , |
126 | // Single high surrogate between two characters |
127 | "\x41\xEF\xBF\xBD\x42" , |
128 | // Single low surrogate between two characters |
129 | "\x41\xEF\xBF\xBD\x42" , |
130 | }; |
131 | |
132 | for (int i = 0; i < (sizeof(unicodeStrings) / sizeof(unicodeStrings[0])); i++) |
133 | { |
134 | ret = WideCharToMultiByte(CP_UTF8, 0, unicodeStrings[i], -1, NULL, 0, NULL, NULL); |
135 | CHAR* utf8Buffer = (CHAR*)malloc(ret * sizeof(CHAR)); |
136 | ret2 = WideCharToMultiByte(CP_UTF8, 0, unicodeStrings[i], -1, utf8Buffer, ret, NULL, NULL); |
137 | if (ret != ret2) |
138 | { |
139 | Fail("WideCharToMultiByte string %d: returned different string length for empty and real dest buffers!\n" |
140 | "Got %d for the empty one, %d for real one.\n" , i, ret2, ret); |
141 | } |
142 | |
143 | if (strcmp(utf8Buffer, utf8Strings[i]) != 0) |
144 | { |
145 | Fail("WideCharToMultiByte string %d: the resulting string doesn't match the expected one!\n" , i); |
146 | } |
147 | |
148 | free(utf8Buffer); |
149 | } |
150 | |
151 | PAL_Terminate(); |
152 | |
153 | return PASS; |
154 | } |
155 | |