1 | // |
2 | // TextConverterTest.cpp |
3 | // |
4 | // Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. |
5 | // and Contributors. |
6 | // |
7 | // SPDX-License-Identifier: BSL-1.0 |
8 | // |
9 | |
10 | |
11 | #include "TextConverterTest.h" |
12 | #include "Poco/CppUnit/TestCaller.h" |
13 | #include "Poco/CppUnit/TestSuite.h" |
14 | #include "Poco/TextConverter.h" |
15 | #include "Poco/ASCIIEncoding.h" |
16 | #include "Poco/Latin1Encoding.h" |
17 | #include "Poco/Latin2Encoding.h" |
18 | #include "Poco/Latin9Encoding.h" |
19 | #include "Poco/Windows1250Encoding.h" |
20 | #include "Poco/Windows1251Encoding.h" |
21 | #include "Poco/Windows1252Encoding.h" |
22 | #include "Poco/UTF8Encoding.h" |
23 | |
24 | #ifdef POCO_COMPILER_MSVC |
25 | #pragma warning(push) |
26 | #pragma warning(disable : 4267) |
27 | #endif // POCO_COMPILER_MSVC |
28 | |
29 | using namespace Poco; |
30 | |
31 | |
32 | TextConverterTest::TextConverterTest(const std::string& rName): CppUnit::TestCase(rName) |
33 | { |
34 | } |
35 | |
36 | |
37 | TextConverterTest::~TextConverterTest() |
38 | { |
39 | } |
40 | |
41 | |
42 | void TextConverterTest::testIdentityASCII() |
43 | { |
44 | ASCIIEncoding encoding; |
45 | TextConverter converter(encoding, encoding); |
46 | |
47 | std::string empty; |
48 | std::string result0; |
49 | int errors = converter.convert(empty, result0); |
50 | assertTrue (result0 == empty); |
51 | assertTrue (errors == 0); |
52 | |
53 | std::string fooBar = "foo bar" ; |
54 | std::string result1; |
55 | errors = converter.convert(fooBar, result1); |
56 | assertTrue (result1 == fooBar); |
57 | assertTrue (errors == 0); |
58 | |
59 | std::string result2; |
60 | errors = converter.convert(fooBar.data(), (int) fooBar.length(), result2); |
61 | assertTrue (result2 == fooBar); |
62 | assertTrue (errors == 0); |
63 | |
64 | std::string result3; |
65 | errors = converter.convert("" , 0, result3); |
66 | assertTrue (result3.empty()); |
67 | assertTrue (errors == 0); |
68 | |
69 | std::string x = "x" ; |
70 | std::string result4; |
71 | errors = converter.convert(x, result4); |
72 | assertTrue (result4 == x); |
73 | assertTrue (errors == 0); |
74 | |
75 | std::string result5; |
76 | errors = converter.convert("x" , 1, result5); |
77 | assertTrue (result5 == x); |
78 | assertTrue (errors == 0); |
79 | } |
80 | |
81 | |
82 | void TextConverterTest::testIdentityUTF8() |
83 | { |
84 | UTF8Encoding encoding; |
85 | TextConverter converter(encoding, encoding); |
86 | |
87 | std::string empty; |
88 | std::string result0; |
89 | int errors = converter.convert(empty, result0); |
90 | assertTrue (result0 == empty); |
91 | assertTrue (errors == 0); |
92 | |
93 | std::string fooBar = "foo bar" ; |
94 | std::string result1; |
95 | errors = converter.convert(fooBar, result1); |
96 | assertTrue (result1 == fooBar); |
97 | assertTrue (errors == 0); |
98 | |
99 | std::string result2; |
100 | errors = converter.convert(fooBar.data(), (int) fooBar.length(), result2); |
101 | assertTrue (result2 == fooBar); |
102 | assertTrue (errors == 0); |
103 | |
104 | std::string result3; |
105 | errors = converter.convert("" , 0, result3); |
106 | assertTrue (result3.empty()); |
107 | assertTrue (errors == 0); |
108 | |
109 | const unsigned char greek[] = {0x20, 0xce, 0xba, 0xe1, 0xbd, 0xb9, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5, 0x20, 0x00}; |
110 | std::string text((const char*) greek); |
111 | |
112 | std::string result4; |
113 | errors = converter.convert(text, result4); |
114 | assertTrue (result4 == text); |
115 | assertTrue (errors == 0); |
116 | |
117 | std::string result5; |
118 | errors = converter.convert((char*) greek, 13, result5); |
119 | assertTrue (result5 == text); |
120 | assertTrue (errors == 0); |
121 | |
122 | std::string x = "x" ; |
123 | std::string result6; |
124 | errors = converter.convert(x, result6); |
125 | assertTrue (result6 == x); |
126 | assertTrue (errors == 0); |
127 | |
128 | std::string result7; |
129 | errors = converter.convert("x" , 1, result7); |
130 | assertTrue (result7 == x); |
131 | assertTrue (errors == 0); |
132 | |
133 | std::string utfChar((char*) greek + 1, 2); |
134 | std::string result8; |
135 | errors = converter.convert(utfChar, result8); |
136 | assertTrue (result8 == utfChar); |
137 | assertTrue (errors == 0); |
138 | |
139 | std::string result9; |
140 | errors = converter.convert((char*) greek + 1, 2, result9); |
141 | assertTrue (result9 == utfChar); |
142 | assertTrue (errors == 0); |
143 | } |
144 | |
145 | |
146 | void TextConverterTest::testUTF8toASCII() |
147 | { |
148 | UTF8Encoding utf8Encoding; |
149 | ASCIIEncoding asciiEncoding; |
150 | TextConverter converter(utf8Encoding, asciiEncoding); |
151 | |
152 | const unsigned char greek[] = {0x20, 0xce, 0xba, 0xe1, 0xbd, 0xb9, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5, 0x20, 0x41, 0x42, 0x00}; |
153 | std::string text((const char*) greek); |
154 | std::string result0; |
155 | int errors = converter.convert(text, result0); |
156 | assertTrue (result0 == " ????? AB" ); |
157 | assertTrue (errors == 0); |
158 | |
159 | std::string result1; |
160 | errors = converter.convert("abcde" , 5, result1); |
161 | assertTrue (result1 == "abcde" ); |
162 | } |
163 | |
164 | |
165 | void TextConverterTest::testLatin1toUTF8() |
166 | { |
167 | Latin1Encoding latin1Encoding; |
168 | UTF8Encoding utf8Encoding; |
169 | TextConverter converter(latin1Encoding, utf8Encoding); |
170 | |
171 | const unsigned char latin1Chars[] = {'g', 252, 'n', 't', 'e', 'r', 0}; |
172 | const unsigned char utf8Chars[] = {'g', 195, 188, 'n', 't', 'e', 'r', 0}; |
173 | std::string latin1Text((const char*) latin1Chars); |
174 | std::string utf8Text((const char*) utf8Chars); |
175 | |
176 | std::string result0; |
177 | int errors = converter.convert(latin1Text, result0); |
178 | assertTrue (result0 == utf8Text); |
179 | assertTrue (errors == 0); |
180 | assertEqual((long) result0.size(), 7); |
181 | |
182 | std::string result1; |
183 | errors = converter.convert(latin1Chars, 6, result1); |
184 | assertTrue (result1 == utf8Text); |
185 | assertTrue (errors == 0); |
186 | } |
187 | |
188 | |
189 | void TextConverterTest::testLatin2toUTF8() |
190 | { |
191 | Latin2Encoding latinEncoding; |
192 | UTF8Encoding utf8Encoding; |
193 | TextConverter converter(latinEncoding, utf8Encoding); |
194 | |
195 | const unsigned char latinChars[26] = { 0xb5, 0xb9, 0xe8, 0xbb, 0xbe, 0xfd, 0xe1, 0xed, 0xe9, 0xfa, 0xe4, 0xf4, |
196 | 0x20, 0xa5, 0xa9, 0xc8, 0xab, 0xae, 0xdd, 0xc1, 0xcd, 0xc9, 0xda, 0xc4, 0xd4, 0x00 }; |
197 | const unsigned char utf8Chars[] = "ľščťžýáíéúäô ĽŠČŤŽÝÁÍÉÚÄÔ" ; |
198 | std::string latinText((const char*) latinChars); |
199 | std::string utf8Text((const char*) utf8Chars); |
200 | |
201 | std::string result0; |
202 | int errors = converter.convert(latinText, result0); |
203 | assertEqual (result0, utf8Text); |
204 | assertEqual ((long) errors, 0); |
205 | assertEqual((long) result0.size(), 49); |
206 | |
207 | std::string result1; |
208 | errors = converter.convert(latinChars, 25, result1); |
209 | assertEqual (result1, utf8Text); |
210 | assertEqual ((long) errors, 0); |
211 | assertEqual((long) result1.size(), 49); |
212 | } |
213 | |
214 | |
215 | void TextConverterTest::testLatin9toUTF8() |
216 | { |
217 | Latin9Encoding latinEncoding; |
218 | UTF8Encoding utf8Encoding; |
219 | TextConverter converter(latinEncoding, utf8Encoding); |
220 | |
221 | const unsigned char latinChars[26] = { 0x3f, 0xa8, 0x3f, 0x3f, 0xb8, 0xfd, 0xe1, 0xed, 0xe9, 0xfa, 0xe4, 0xf4, |
222 | 0x20, 0x3f, 0xa6, 0x3f, 0x3f, 0xb4, 0xdd, 0xc1, 0xcd, 0xc9, 0xda, 0xc4, 0xd4, 0x00 }; |
223 | const unsigned char utf8Chars[] = "?š??žýáíéúäô ?Š??ŽÝÁÍÉÚÄÔ" ; |
224 | std::string latinText((const char*) latinChars); |
225 | std::string utf8Text((const char*) utf8Chars); |
226 | |
227 | std::string result0; |
228 | int errors = converter.convert(latinText, result0); |
229 | assertEqual (result0, utf8Text); |
230 | assertEqual ((long) errors, 0); |
231 | assertEqual(result0.size(), 43); |
232 | |
233 | std::string result1; |
234 | errors = converter.convert(latinChars, 25, result1); |
235 | assertEqual(result1, utf8Text); |
236 | assertEqual((long) errors, 0); |
237 | assertEqual((long) result1.size(), 43); |
238 | } |
239 | |
240 | |
241 | void TextConverterTest::testCP1250toUTF8() |
242 | { |
243 | Windows1250Encoding latinEncoding; |
244 | UTF8Encoding utf8Encoding; |
245 | TextConverter converter(latinEncoding, utf8Encoding); |
246 | |
247 | const unsigned char latinChars[26] = { 0xbe, 0x9a, 0xe8, 0x9d, 0x9e, 0xfd, 0xe1, 0xed, 0xe9, 0xfa, 0xe4, 0xf4, |
248 | 0x20, 0xbc, 0x8a, 0xc8, 0x8d, 0x8e, 0xdd, 0xc1, 0xcd, 0xc9, 0xda, 0xc4, 0xd4, 0x00 }; |
249 | const unsigned char utf8Chars[] = "ľščťžýáíéúäô ĽŠČŤŽÝÁÍÉÚÄÔ" ; |
250 | std::string latinText((const char*) latinChars); |
251 | std::string utf8Text((const char*) utf8Chars); |
252 | |
253 | std::string result0; |
254 | int errors = converter.convert(latinText, result0); |
255 | assertEqual (result0, utf8Text); |
256 | assertEqual ((long) errors, 0); |
257 | assertEqual((long) result0.size(), 49); |
258 | |
259 | std::string result1; |
260 | errors = converter.convert(latinChars, 25, result1); |
261 | assertEqual(result1, utf8Text); |
262 | assertEqual((long) errors, 0); |
263 | assertEqual((long) result1.size(), 49); |
264 | } |
265 | |
266 | |
267 | void TextConverterTest::testCP1251toUTF8() |
268 | { |
269 | Windows1251Encoding latinEncoding; |
270 | UTF8Encoding utf8Encoding; |
271 | TextConverter converter(latinEncoding, utf8Encoding); |
272 | |
273 | const unsigned char latinChars[32] = { 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x00 }; |
274 | const unsigned char utf8Chars[] = "бвгдежзийклмнопрстуфхцчшщъыьэюя" ; |
275 | std::string latinText((const char*) latinChars); |
276 | std::string utf8Text((const char*) utf8Chars); |
277 | |
278 | std::string result0; |
279 | int errors = converter.convert(latinText, result0); |
280 | assertEqual (result0, utf8Text); |
281 | assertEqual ((long) errors, 0); |
282 | assertEqual((long) result0.size(), 62); |
283 | |
284 | std::string result1; |
285 | errors = converter.convert(latinChars, 31, result1); |
286 | assertEqual (result1, utf8Text); |
287 | assertEqual ((long) errors, 0); |
288 | assertEqual((long) result1.size(), 62); |
289 | } |
290 | |
291 | |
292 | void TextConverterTest::testCP1252toUTF8() |
293 | { |
294 | Windows1252Encoding latinEncoding; |
295 | UTF8Encoding utf8Encoding; |
296 | TextConverter converter(latinEncoding, utf8Encoding); |
297 | |
298 | const unsigned char latinChars[26] = { 0x3f, 0x9a, 0x3f, 0x3f, 0x9e, 0xfd, 0xe1, 0xed, 0xe9, 0xfa, 0xe4, 0xf4, |
299 | 0x20, 0x3f, 0x8a, 0x3f, 0x3f, 0x8e, 0xdd, 0xc1, 0xcd, 0xc9, 0xda, 0xc4, 0xd4, 0x00 }; |
300 | const unsigned char utf8Chars[] = "?š??žýáíéúäô ?Š??ŽÝÁÍÉÚÄÔ" ; |
301 | std::string latinText((const char*) latinChars); |
302 | std::string utf8Text((const char*) utf8Chars); |
303 | |
304 | std::string result0; |
305 | int errors = converter.convert(latinText, result0); |
306 | assertEqual(result0, utf8Text); |
307 | assertEqual((long) errors, 0); |
308 | assertEqual((long) result0.size(), 43); |
309 | |
310 | std::string result1; |
311 | errors = converter.convert(latinChars, 25, result1); |
312 | assertEqual(result1, utf8Text); |
313 | assertEqual((long) errors, 0); |
314 | assertEqual((long) result1.size(), 43); |
315 | } |
316 | |
317 | |
318 | void TextConverterTest::testErrors() |
319 | { |
320 | UTF8Encoding utf8Encoding; |
321 | Latin1Encoding latin1Encoding; |
322 | TextConverter converter(utf8Encoding, latin1Encoding); |
323 | |
324 | const unsigned char badChars[] = {'a', 'b', 255, 'c', 254, 0}; |
325 | std::string badText((const char*) badChars); |
326 | |
327 | std::string result; |
328 | int errors = converter.convert(badText, result); |
329 | assertTrue (errors == 2); |
330 | } |
331 | |
332 | |
333 | void TextConverterTest::setUp() |
334 | { |
335 | } |
336 | |
337 | |
338 | void TextConverterTest::tearDown() |
339 | { |
340 | } |
341 | |
342 | |
343 | CppUnit::Test* TextConverterTest::suite() |
344 | { |
345 | CppUnit::TestSuite* pSuite = new CppUnit::TestSuite("TextConverterTest" ); |
346 | |
347 | CppUnit_addTest(pSuite, TextConverterTest, testIdentityASCII); |
348 | CppUnit_addTest(pSuite, TextConverterTest, testIdentityUTF8); |
349 | CppUnit_addTest(pSuite, TextConverterTest, testUTF8toASCII); |
350 | CppUnit_addTest(pSuite, TextConverterTest, testLatin1toUTF8); |
351 | CppUnit_addTest(pSuite, TextConverterTest, testLatin2toUTF8); |
352 | CppUnit_addTest(pSuite, TextConverterTest, testLatin9toUTF8); |
353 | CppUnit_addTest(pSuite, TextConverterTest, testCP1250toUTF8); |
354 | CppUnit_addTest(pSuite, TextConverterTest, testCP1251toUTF8); |
355 | CppUnit_addTest(pSuite, TextConverterTest, testCP1252toUTF8); |
356 | CppUnit_addTest(pSuite, TextConverterTest, testErrors); |
357 | |
358 | return pSuite; |
359 | } |
360 | |
361 | #ifdef POCO_COMPILER_MSVC |
362 | #pragma warning(pop) |
363 | #endif // POCO_COMPILER_MSVC |
364 | |