1// Licensed to the .NET Foundation under one or more agreements.
2// The .NET Foundation licenses this file to you under the MIT license.
3// See the LICENSE file in the project root for more information.
4
5/*============================================================================
6**
7** Source: test4.c
8**
9** Purpose: Tests WideCharMultiByte with UTF-8 encoding
10**
11**
12**==========================================================================*/
13
14#include <palsuite.h>
15
16int __cdecl main(int argc, char *argv[])
17{
18 int ret;
19 int ret2;
20
21 if (PAL_Initialize(argc, argv))
22 {
23 return FAIL;
24 }
25
26 const WCHAR * const unicodeStrings[] =
27 {
28 // Correct strings
29
30 // Empty string
31 W(""),
32 // 1 byte encoded 1 character long string
33 W("A"),
34 // 2 byte encoded 1 character long string
35 W("\x0080"),
36 // 3 byte encoded 1 character long string
37 W("\x0800"),
38 // 1 byte encoded characters only
39 W("ABCDEFGHIJKLMNOPQRSTUVWXYZ"),
40 // 2 byte encoded characters only
41 W("\x0080\x00FF\x01C1\x07FF"),
42 // valid 3 byte encoded characters only
43 W("\x0800\x1D88\x1000\xFFFF"),
44 // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 1 byte char
45 W("\x0041\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF\x0045"),
46 // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 1 byte char, ending with 2 byte one
47 W("\x0041\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF"),
48 // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 2 byte char, ending with 1 byte one
49 W("\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF\x0045"),
50 // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 2 byte char
51 W("\x0080\x0042\x00FF\x0043\x01C1\x0044\x07FF"),
52 // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 1 byte char
53 W("\x0041\x0042\x0080\x00FF\x0043\x0044\x01C1\x07FF\x0045\x0046"),
54 // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 1 byte char, ending with 2 byte one
55 W("\x0041\x0042\x0080\x00FF\x0043\x0044\x01C1\x07FF"),
56 // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 2 byte char, ending with 1 byte one
57 W("\x0080\x00FF\x0043\x0044\x01C1\x07FF\x0045\x0046"),
58 // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 2 byte char
59 W("\x0080\x00FF\x0043\x0044\x01C1\x07FF"),
60 // Surrogates
61 W("\xD800\xDC00\xD800\xDE40\xDAC0\xDFB0\xDBFF\xDFFF"),
62
63 // Strings with errors
64
65 // Single high surrogate
66 W("\xD800"),
67 // Single low surrogate
68 W("\xDC00"),
69 // Character followed by single high surrogate
70 W("\x0041\xD800"),
71 // Character followed by single low surrogate
72 W("\x0041\xDC00"),
73 // Single high surrogate between two characters
74 W("\x0041\xD800\x0042"),
75 // Single low surrogate between two characters
76 W("\x0041\xDC00\x0042"),
77 };
78
79 const char * const utf8Strings[] =
80 {
81 // Correct strings
82
83 // Empty string
84 "",
85 // 1 byte encoded 1 character long string
86 "A",
87 // 2 byte encoded 1 character long string
88 "\xC2\x80",
89 // 3 byte encoded 1 character long string
90 "\xE0\xA0\x80",
91 // 1 byte encoded characters only
92 "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
93 // valid 2 byte encoded characters only
94 "\xC2\x80\xC3\xBF\xC7\x81\xDF\xBF",
95 // valid 3 byte encoded characters only
96 "\xE0\xA0\x80\xE1\xB6\x88\xE1\x80\x80\xEF\xBF\xBF",
97 // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 1 byte char
98 "\x41\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF\x45",
99 // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 1 byte char, ending with 2 byte one
100 "\x41\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF",
101 // 1 byte and 2 byte encoded characters interleaved 1:1 starting with 2 byte char, ending with 1 byte one
102 "\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF\x45",
103 // 1 byte and 2 byte encoded characters interleaved 1:1 starting and ending with 2 byte char
104 "\xC2\x80\x42\xC3\xBF\x43\xC7\x81\x44\xDF\xBF",
105 // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 1 byte char
106 "\x41\x42\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF\x45\x46",
107 // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 1 byte char, ending with 2 byte one
108 "\x41\x42\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF",
109 // 1 byte and 2 byte encoded characters interleaved 2:2 starting with 2 byte char, ending with 1 byte one
110 "\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF\x45\x46",
111 // 1 byte and 2 byte encoded characters interleaved 2:2 starting and ending with 2 byte char
112 "\xC2\x80\xC3\xBF\x43\x44\xC7\x81\xDF\xBF",
113 // Surrogates
114 "\xF0\x90\x80\x80\xF0\x90\x89\x80\xF3\x80\x8E\xB0\xF4\x8F\xBF\xBF",
115
116 // Strings with errors
117
118 // Single high surrogate
119 "\xEF\xBF\xBD",
120 // Single low surrogate
121 "\xEF\xBF\xBD",
122 // Character followed by single high surrogate
123 "\x41\xEF\xBF\xBD",
124 // Character followed by single low surrogate
125 "\x41\xEF\xBF\xBD",
126 // Single high surrogate between two characters
127 "\x41\xEF\xBF\xBD\x42",
128 // Single low surrogate between two characters
129 "\x41\xEF\xBF\xBD\x42",
130 };
131
132 for (int i = 0; i < (sizeof(unicodeStrings) / sizeof(unicodeStrings[0])); i++)
133 {
134 ret = WideCharToMultiByte(CP_UTF8, 0, unicodeStrings[i], -1, NULL, 0, NULL, NULL);
135 CHAR* utf8Buffer = (CHAR*)malloc(ret * sizeof(CHAR));
136 ret2 = WideCharToMultiByte(CP_UTF8, 0, unicodeStrings[i], -1, utf8Buffer, ret, NULL, NULL);
137 if (ret != ret2)
138 {
139 Fail("WideCharToMultiByte string %d: returned different string length for empty and real dest buffers!\n"
140 "Got %d for the empty one, %d for real one.\n", i, ret2, ret);
141 }
142
143 if (strcmp(utf8Buffer, utf8Strings[i]) != 0)
144 {
145 Fail("WideCharToMultiByte string %d: the resulting string doesn't match the expected one!\n", i);
146 }
147
148 free(utf8Buffer);
149 }
150
151 PAL_Terminate();
152
153 return PASS;
154}
155