| 1 | // |
| 2 | // TextConverterTest.cpp |
| 3 | // |
| 4 | // Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. |
| 5 | // and Contributors. |
| 6 | // |
| 7 | // SPDX-License-Identifier: BSL-1.0 |
| 8 | // |
| 9 | |
| 10 | |
| 11 | #include "TextConverterTest.h" |
| 12 | #include "Poco/CppUnit/TestCaller.h" |
| 13 | #include "Poco/CppUnit/TestSuite.h" |
| 14 | #include "Poco/TextConverter.h" |
| 15 | #include "Poco/ASCIIEncoding.h" |
| 16 | #include "Poco/Latin1Encoding.h" |
| 17 | #include "Poco/Latin2Encoding.h" |
| 18 | #include "Poco/Latin9Encoding.h" |
| 19 | #include "Poco/Windows1250Encoding.h" |
| 20 | #include "Poco/Windows1251Encoding.h" |
| 21 | #include "Poco/Windows1252Encoding.h" |
| 22 | #include "Poco/UTF8Encoding.h" |
| 23 | |
| 24 | #ifdef POCO_COMPILER_MSVC |
| 25 | #pragma warning(push) |
| 26 | #pragma warning(disable : 4267) |
| 27 | #endif // POCO_COMPILER_MSVC |
| 28 | |
| 29 | using namespace Poco; |
| 30 | |
| 31 | |
| 32 | TextConverterTest::TextConverterTest(const std::string& rName): CppUnit::TestCase(rName) |
| 33 | { |
| 34 | } |
| 35 | |
| 36 | |
| 37 | TextConverterTest::~TextConverterTest() |
| 38 | { |
| 39 | } |
| 40 | |
| 41 | |
| 42 | void TextConverterTest::testIdentityASCII() |
| 43 | { |
| 44 | ASCIIEncoding encoding; |
| 45 | TextConverter converter(encoding, encoding); |
| 46 | |
| 47 | std::string empty; |
| 48 | std::string result0; |
| 49 | int errors = converter.convert(empty, result0); |
| 50 | assertTrue (result0 == empty); |
| 51 | assertTrue (errors == 0); |
| 52 | |
| 53 | std::string fooBar = "foo bar" ; |
| 54 | std::string result1; |
| 55 | errors = converter.convert(fooBar, result1); |
| 56 | assertTrue (result1 == fooBar); |
| 57 | assertTrue (errors == 0); |
| 58 | |
| 59 | std::string result2; |
| 60 | errors = converter.convert(fooBar.data(), (int) fooBar.length(), result2); |
| 61 | assertTrue (result2 == fooBar); |
| 62 | assertTrue (errors == 0); |
| 63 | |
| 64 | std::string result3; |
| 65 | errors = converter.convert("" , 0, result3); |
| 66 | assertTrue (result3.empty()); |
| 67 | assertTrue (errors == 0); |
| 68 | |
| 69 | std::string x = "x" ; |
| 70 | std::string result4; |
| 71 | errors = converter.convert(x, result4); |
| 72 | assertTrue (result4 == x); |
| 73 | assertTrue (errors == 0); |
| 74 | |
| 75 | std::string result5; |
| 76 | errors = converter.convert("x" , 1, result5); |
| 77 | assertTrue (result5 == x); |
| 78 | assertTrue (errors == 0); |
| 79 | } |
| 80 | |
| 81 | |
| 82 | void TextConverterTest::testIdentityUTF8() |
| 83 | { |
| 84 | UTF8Encoding encoding; |
| 85 | TextConverter converter(encoding, encoding); |
| 86 | |
| 87 | std::string empty; |
| 88 | std::string result0; |
| 89 | int errors = converter.convert(empty, result0); |
| 90 | assertTrue (result0 == empty); |
| 91 | assertTrue (errors == 0); |
| 92 | |
| 93 | std::string fooBar = "foo bar" ; |
| 94 | std::string result1; |
| 95 | errors = converter.convert(fooBar, result1); |
| 96 | assertTrue (result1 == fooBar); |
| 97 | assertTrue (errors == 0); |
| 98 | |
| 99 | std::string result2; |
| 100 | errors = converter.convert(fooBar.data(), (int) fooBar.length(), result2); |
| 101 | assertTrue (result2 == fooBar); |
| 102 | assertTrue (errors == 0); |
| 103 | |
| 104 | std::string result3; |
| 105 | errors = converter.convert("" , 0, result3); |
| 106 | assertTrue (result3.empty()); |
| 107 | assertTrue (errors == 0); |
| 108 | |
| 109 | const unsigned char greek[] = {0x20, 0xce, 0xba, 0xe1, 0xbd, 0xb9, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5, 0x20, 0x00}; |
| 110 | std::string text((const char*) greek); |
| 111 | |
| 112 | std::string result4; |
| 113 | errors = converter.convert(text, result4); |
| 114 | assertTrue (result4 == text); |
| 115 | assertTrue (errors == 0); |
| 116 | |
| 117 | std::string result5; |
| 118 | errors = converter.convert((char*) greek, 13, result5); |
| 119 | assertTrue (result5 == text); |
| 120 | assertTrue (errors == 0); |
| 121 | |
| 122 | std::string x = "x" ; |
| 123 | std::string result6; |
| 124 | errors = converter.convert(x, result6); |
| 125 | assertTrue (result6 == x); |
| 126 | assertTrue (errors == 0); |
| 127 | |
| 128 | std::string result7; |
| 129 | errors = converter.convert("x" , 1, result7); |
| 130 | assertTrue (result7 == x); |
| 131 | assertTrue (errors == 0); |
| 132 | |
| 133 | std::string utfChar((char*) greek + 1, 2); |
| 134 | std::string result8; |
| 135 | errors = converter.convert(utfChar, result8); |
| 136 | assertTrue (result8 == utfChar); |
| 137 | assertTrue (errors == 0); |
| 138 | |
| 139 | std::string result9; |
| 140 | errors = converter.convert((char*) greek + 1, 2, result9); |
| 141 | assertTrue (result9 == utfChar); |
| 142 | assertTrue (errors == 0); |
| 143 | } |
| 144 | |
| 145 | |
| 146 | void TextConverterTest::testUTF8toASCII() |
| 147 | { |
| 148 | UTF8Encoding utf8Encoding; |
| 149 | ASCIIEncoding asciiEncoding; |
| 150 | TextConverter converter(utf8Encoding, asciiEncoding); |
| 151 | |
| 152 | const unsigned char greek[] = {0x20, 0xce, 0xba, 0xe1, 0xbd, 0xb9, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5, 0x20, 0x41, 0x42, 0x00}; |
| 153 | std::string text((const char*) greek); |
| 154 | std::string result0; |
| 155 | int errors = converter.convert(text, result0); |
| 156 | assertTrue (result0 == " ????? AB" ); |
| 157 | assertTrue (errors == 0); |
| 158 | |
| 159 | std::string result1; |
| 160 | errors = converter.convert("abcde" , 5, result1); |
| 161 | assertTrue (result1 == "abcde" ); |
| 162 | } |
| 163 | |
| 164 | |
| 165 | void TextConverterTest::testLatin1toUTF8() |
| 166 | { |
| 167 | Latin1Encoding latin1Encoding; |
| 168 | UTF8Encoding utf8Encoding; |
| 169 | TextConverter converter(latin1Encoding, utf8Encoding); |
| 170 | |
| 171 | const unsigned char latin1Chars[] = {'g', 252, 'n', 't', 'e', 'r', 0}; |
| 172 | const unsigned char utf8Chars[] = {'g', 195, 188, 'n', 't', 'e', 'r', 0}; |
| 173 | std::string latin1Text((const char*) latin1Chars); |
| 174 | std::string utf8Text((const char*) utf8Chars); |
| 175 | |
| 176 | std::string result0; |
| 177 | int errors = converter.convert(latin1Text, result0); |
| 178 | assertTrue (result0 == utf8Text); |
| 179 | assertTrue (errors == 0); |
| 180 | assertEqual((long) result0.size(), 7); |
| 181 | |
| 182 | std::string result1; |
| 183 | errors = converter.convert(latin1Chars, 6, result1); |
| 184 | assertTrue (result1 == utf8Text); |
| 185 | assertTrue (errors == 0); |
| 186 | } |
| 187 | |
| 188 | |
| 189 | void TextConverterTest::testLatin2toUTF8() |
| 190 | { |
| 191 | Latin2Encoding latinEncoding; |
| 192 | UTF8Encoding utf8Encoding; |
| 193 | TextConverter converter(latinEncoding, utf8Encoding); |
| 194 | |
| 195 | const unsigned char latinChars[26] = { 0xb5, 0xb9, 0xe8, 0xbb, 0xbe, 0xfd, 0xe1, 0xed, 0xe9, 0xfa, 0xe4, 0xf4, |
| 196 | 0x20, 0xa5, 0xa9, 0xc8, 0xab, 0xae, 0xdd, 0xc1, 0xcd, 0xc9, 0xda, 0xc4, 0xd4, 0x00 }; |
| 197 | const unsigned char utf8Chars[] = "ľščťžýáíéúäô ĽŠČŤŽÝÁÍÉÚÄÔ" ; |
| 198 | std::string latinText((const char*) latinChars); |
| 199 | std::string utf8Text((const char*) utf8Chars); |
| 200 | |
| 201 | std::string result0; |
| 202 | int errors = converter.convert(latinText, result0); |
| 203 | assertEqual (result0, utf8Text); |
| 204 | assertEqual ((long) errors, 0); |
| 205 | assertEqual((long) result0.size(), 49); |
| 206 | |
| 207 | std::string result1; |
| 208 | errors = converter.convert(latinChars, 25, result1); |
| 209 | assertEqual (result1, utf8Text); |
| 210 | assertEqual ((long) errors, 0); |
| 211 | assertEqual((long) result1.size(), 49); |
| 212 | } |
| 213 | |
| 214 | |
| 215 | void TextConverterTest::testLatin9toUTF8() |
| 216 | { |
| 217 | Latin9Encoding latinEncoding; |
| 218 | UTF8Encoding utf8Encoding; |
| 219 | TextConverter converter(latinEncoding, utf8Encoding); |
| 220 | |
| 221 | const unsigned char latinChars[26] = { 0x3f, 0xa8, 0x3f, 0x3f, 0xb8, 0xfd, 0xe1, 0xed, 0xe9, 0xfa, 0xe4, 0xf4, |
| 222 | 0x20, 0x3f, 0xa6, 0x3f, 0x3f, 0xb4, 0xdd, 0xc1, 0xcd, 0xc9, 0xda, 0xc4, 0xd4, 0x00 }; |
| 223 | const unsigned char utf8Chars[] = "?š??žýáíéúäô ?Š??ŽÝÁÍÉÚÄÔ" ; |
| 224 | std::string latinText((const char*) latinChars); |
| 225 | std::string utf8Text((const char*) utf8Chars); |
| 226 | |
| 227 | std::string result0; |
| 228 | int errors = converter.convert(latinText, result0); |
| 229 | assertEqual (result0, utf8Text); |
| 230 | assertEqual ((long) errors, 0); |
| 231 | assertEqual(result0.size(), 43); |
| 232 | |
| 233 | std::string result1; |
| 234 | errors = converter.convert(latinChars, 25, result1); |
| 235 | assertEqual(result1, utf8Text); |
| 236 | assertEqual((long) errors, 0); |
| 237 | assertEqual((long) result1.size(), 43); |
| 238 | } |
| 239 | |
| 240 | |
| 241 | void TextConverterTest::testCP1250toUTF8() |
| 242 | { |
| 243 | Windows1250Encoding latinEncoding; |
| 244 | UTF8Encoding utf8Encoding; |
| 245 | TextConverter converter(latinEncoding, utf8Encoding); |
| 246 | |
| 247 | const unsigned char latinChars[26] = { 0xbe, 0x9a, 0xe8, 0x9d, 0x9e, 0xfd, 0xe1, 0xed, 0xe9, 0xfa, 0xe4, 0xf4, |
| 248 | 0x20, 0xbc, 0x8a, 0xc8, 0x8d, 0x8e, 0xdd, 0xc1, 0xcd, 0xc9, 0xda, 0xc4, 0xd4, 0x00 }; |
| 249 | const unsigned char utf8Chars[] = "ľščťžýáíéúäô ĽŠČŤŽÝÁÍÉÚÄÔ" ; |
| 250 | std::string latinText((const char*) latinChars); |
| 251 | std::string utf8Text((const char*) utf8Chars); |
| 252 | |
| 253 | std::string result0; |
| 254 | int errors = converter.convert(latinText, result0); |
| 255 | assertEqual (result0, utf8Text); |
| 256 | assertEqual ((long) errors, 0); |
| 257 | assertEqual((long) result0.size(), 49); |
| 258 | |
| 259 | std::string result1; |
| 260 | errors = converter.convert(latinChars, 25, result1); |
| 261 | assertEqual(result1, utf8Text); |
| 262 | assertEqual((long) errors, 0); |
| 263 | assertEqual((long) result1.size(), 49); |
| 264 | } |
| 265 | |
| 266 | |
| 267 | void TextConverterTest::testCP1251toUTF8() |
| 268 | { |
| 269 | Windows1251Encoding latinEncoding; |
| 270 | UTF8Encoding utf8Encoding; |
| 271 | TextConverter converter(latinEncoding, utf8Encoding); |
| 272 | |
| 273 | const unsigned char latinChars[32] = { 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x00 }; |
| 274 | const unsigned char utf8Chars[] = "бвгдежзийклмнопрстуфхцчшщъыьэюя" ; |
| 275 | std::string latinText((const char*) latinChars); |
| 276 | std::string utf8Text((const char*) utf8Chars); |
| 277 | |
| 278 | std::string result0; |
| 279 | int errors = converter.convert(latinText, result0); |
| 280 | assertEqual (result0, utf8Text); |
| 281 | assertEqual ((long) errors, 0); |
| 282 | assertEqual((long) result0.size(), 62); |
| 283 | |
| 284 | std::string result1; |
| 285 | errors = converter.convert(latinChars, 31, result1); |
| 286 | assertEqual (result1, utf8Text); |
| 287 | assertEqual ((long) errors, 0); |
| 288 | assertEqual((long) result1.size(), 62); |
| 289 | } |
| 290 | |
| 291 | |
| 292 | void TextConverterTest::testCP1252toUTF8() |
| 293 | { |
| 294 | Windows1252Encoding latinEncoding; |
| 295 | UTF8Encoding utf8Encoding; |
| 296 | TextConverter converter(latinEncoding, utf8Encoding); |
| 297 | |
| 298 | const unsigned char latinChars[26] = { 0x3f, 0x9a, 0x3f, 0x3f, 0x9e, 0xfd, 0xe1, 0xed, 0xe9, 0xfa, 0xe4, 0xf4, |
| 299 | 0x20, 0x3f, 0x8a, 0x3f, 0x3f, 0x8e, 0xdd, 0xc1, 0xcd, 0xc9, 0xda, 0xc4, 0xd4, 0x00 }; |
| 300 | const unsigned char utf8Chars[] = "?š??žýáíéúäô ?Š??ŽÝÁÍÉÚÄÔ" ; |
| 301 | std::string latinText((const char*) latinChars); |
| 302 | std::string utf8Text((const char*) utf8Chars); |
| 303 | |
| 304 | std::string result0; |
| 305 | int errors = converter.convert(latinText, result0); |
| 306 | assertEqual(result0, utf8Text); |
| 307 | assertEqual((long) errors, 0); |
| 308 | assertEqual((long) result0.size(), 43); |
| 309 | |
| 310 | std::string result1; |
| 311 | errors = converter.convert(latinChars, 25, result1); |
| 312 | assertEqual(result1, utf8Text); |
| 313 | assertEqual((long) errors, 0); |
| 314 | assertEqual((long) result1.size(), 43); |
| 315 | } |
| 316 | |
| 317 | |
| 318 | void TextConverterTest::testErrors() |
| 319 | { |
| 320 | UTF8Encoding utf8Encoding; |
| 321 | Latin1Encoding latin1Encoding; |
| 322 | TextConverter converter(utf8Encoding, latin1Encoding); |
| 323 | |
| 324 | const unsigned char badChars[] = {'a', 'b', 255, 'c', 254, 0}; |
| 325 | std::string badText((const char*) badChars); |
| 326 | |
| 327 | std::string result; |
| 328 | int errors = converter.convert(badText, result); |
| 329 | assertTrue (errors == 2); |
| 330 | } |
| 331 | |
| 332 | |
| 333 | void TextConverterTest::setUp() |
| 334 | { |
| 335 | } |
| 336 | |
| 337 | |
| 338 | void TextConverterTest::tearDown() |
| 339 | { |
| 340 | } |
| 341 | |
| 342 | |
| 343 | CppUnit::Test* TextConverterTest::suite() |
| 344 | { |
| 345 | CppUnit::TestSuite* pSuite = new CppUnit::TestSuite("TextConverterTest" ); |
| 346 | |
| 347 | CppUnit_addTest(pSuite, TextConverterTest, testIdentityASCII); |
| 348 | CppUnit_addTest(pSuite, TextConverterTest, testIdentityUTF8); |
| 349 | CppUnit_addTest(pSuite, TextConverterTest, testUTF8toASCII); |
| 350 | CppUnit_addTest(pSuite, TextConverterTest, testLatin1toUTF8); |
| 351 | CppUnit_addTest(pSuite, TextConverterTest, testLatin2toUTF8); |
| 352 | CppUnit_addTest(pSuite, TextConverterTest, testLatin9toUTF8); |
| 353 | CppUnit_addTest(pSuite, TextConverterTest, testCP1250toUTF8); |
| 354 | CppUnit_addTest(pSuite, TextConverterTest, testCP1251toUTF8); |
| 355 | CppUnit_addTest(pSuite, TextConverterTest, testCP1252toUTF8); |
| 356 | CppUnit_addTest(pSuite, TextConverterTest, testErrors); |
| 357 | |
| 358 | return pSuite; |
| 359 | } |
| 360 | |
| 361 | #ifdef POCO_COMPILER_MSVC |
| 362 | #pragma warning(pop) |
| 363 | #endif // POCO_COMPILER_MSVC |
| 364 | |