| 1 | // | 
|---|
| 2 | // TextConverterTest.cpp | 
|---|
| 3 | // | 
|---|
| 4 | // Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. | 
|---|
| 5 | // and Contributors. | 
|---|
| 6 | // | 
|---|
| 7 | // SPDX-License-Identifier:	BSL-1.0 | 
|---|
| 8 | // | 
|---|
| 9 |  | 
|---|
| 10 |  | 
|---|
| 11 | #include "TextConverterTest.h" | 
|---|
| 12 | #include "Poco/CppUnit/TestCaller.h" | 
|---|
| 13 | #include "Poco/CppUnit/TestSuite.h" | 
|---|
| 14 | #include "Poco/TextConverter.h" | 
|---|
| 15 | #include "Poco/ASCIIEncoding.h" | 
|---|
| 16 | #include "Poco/Latin1Encoding.h" | 
|---|
| 17 | #include "Poco/Latin2Encoding.h" | 
|---|
| 18 | #include "Poco/Latin9Encoding.h" | 
|---|
| 19 | #include "Poco/Windows1250Encoding.h" | 
|---|
| 20 | #include "Poco/Windows1251Encoding.h" | 
|---|
| 21 | #include "Poco/Windows1252Encoding.h" | 
|---|
| 22 | #include "Poco/UTF8Encoding.h" | 
|---|
| 23 |  | 
|---|
| 24 | #ifdef POCO_COMPILER_MSVC | 
|---|
| 25 | #pragma warning(push) | 
|---|
| 26 | #pragma warning(disable : 4267) | 
|---|
| 27 | #endif // POCO_COMPILER_MSVC | 
|---|
| 28 |  | 
|---|
| 29 | using namespace Poco; | 
|---|
| 30 |  | 
|---|
| 31 |  | 
|---|
| 32 | TextConverterTest::TextConverterTest(const std::string& rName): CppUnit::TestCase(rName) | 
|---|
| 33 | { | 
|---|
| 34 | } | 
|---|
| 35 |  | 
|---|
| 36 |  | 
|---|
| 37 | TextConverterTest::~TextConverterTest() | 
|---|
| 38 | { | 
|---|
| 39 | } | 
|---|
| 40 |  | 
|---|
| 41 |  | 
|---|
| 42 | void TextConverterTest::testIdentityASCII() | 
|---|
| 43 | { | 
|---|
| 44 | ASCIIEncoding encoding; | 
|---|
| 45 | TextConverter converter(encoding, encoding); | 
|---|
| 46 |  | 
|---|
| 47 | std::string empty; | 
|---|
| 48 | std::string result0; | 
|---|
| 49 | int errors = converter.convert(empty, result0); | 
|---|
| 50 | assertTrue (result0 == empty); | 
|---|
| 51 | assertTrue (errors == 0); | 
|---|
| 52 |  | 
|---|
| 53 | std::string fooBar = "foo bar"; | 
|---|
| 54 | std::string result1; | 
|---|
| 55 | errors = converter.convert(fooBar, result1); | 
|---|
| 56 | assertTrue (result1 == fooBar); | 
|---|
| 57 | assertTrue (errors == 0); | 
|---|
| 58 |  | 
|---|
| 59 | std::string result2; | 
|---|
| 60 | errors = converter.convert(fooBar.data(), (int) fooBar.length(), result2); | 
|---|
| 61 | assertTrue (result2 == fooBar); | 
|---|
| 62 | assertTrue (errors == 0); | 
|---|
| 63 |  | 
|---|
| 64 | std::string result3; | 
|---|
| 65 | errors = converter.convert( "", 0, result3); | 
|---|
| 66 | assertTrue (result3.empty()); | 
|---|
| 67 | assertTrue (errors == 0); | 
|---|
| 68 |  | 
|---|
| 69 | std::string x = "x"; | 
|---|
| 70 | std::string result4; | 
|---|
| 71 | errors = converter.convert(x, result4); | 
|---|
| 72 | assertTrue (result4 == x); | 
|---|
| 73 | assertTrue (errors == 0); | 
|---|
| 74 |  | 
|---|
| 75 | std::string result5; | 
|---|
| 76 | errors = converter.convert( "x", 1, result5); | 
|---|
| 77 | assertTrue (result5 == x); | 
|---|
| 78 | assertTrue (errors == 0); | 
|---|
| 79 | } | 
|---|
| 80 |  | 
|---|
| 81 |  | 
|---|
| 82 | void TextConverterTest::testIdentityUTF8() | 
|---|
| 83 | { | 
|---|
| 84 | UTF8Encoding encoding; | 
|---|
| 85 | TextConverter converter(encoding, encoding); | 
|---|
| 86 |  | 
|---|
| 87 | std::string empty; | 
|---|
| 88 | std::string result0; | 
|---|
| 89 | int errors = converter.convert(empty, result0); | 
|---|
| 90 | assertTrue (result0 == empty); | 
|---|
| 91 | assertTrue (errors == 0); | 
|---|
| 92 |  | 
|---|
| 93 | std::string fooBar = "foo bar"; | 
|---|
| 94 | std::string result1; | 
|---|
| 95 | errors = converter.convert(fooBar, result1); | 
|---|
| 96 | assertTrue (result1 == fooBar); | 
|---|
| 97 | assertTrue (errors == 0); | 
|---|
| 98 |  | 
|---|
| 99 | std::string result2; | 
|---|
| 100 | errors = converter.convert(fooBar.data(), (int) fooBar.length(), result2); | 
|---|
| 101 | assertTrue (result2 == fooBar); | 
|---|
| 102 | assertTrue (errors == 0); | 
|---|
| 103 |  | 
|---|
| 104 | std::string result3; | 
|---|
| 105 | errors = converter.convert( "", 0, result3); | 
|---|
| 106 | assertTrue (result3.empty()); | 
|---|
| 107 | assertTrue (errors == 0); | 
|---|
| 108 |  | 
|---|
| 109 | const unsigned char greek[] = {0x20, 0xce, 0xba, 0xe1, 0xbd, 0xb9, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5, 0x20, 0x00}; | 
|---|
| 110 | std::string text((const char*) greek); | 
|---|
| 111 |  | 
|---|
| 112 | std::string result4; | 
|---|
| 113 | errors = converter.convert(text, result4); | 
|---|
| 114 | assertTrue (result4 == text); | 
|---|
| 115 | assertTrue (errors == 0); | 
|---|
| 116 |  | 
|---|
| 117 | std::string result5; | 
|---|
| 118 | errors = converter.convert((char*) greek, 13, result5); | 
|---|
| 119 | assertTrue (result5 == text); | 
|---|
| 120 | assertTrue (errors == 0); | 
|---|
| 121 |  | 
|---|
| 122 | std::string x = "x"; | 
|---|
| 123 | std::string result6; | 
|---|
| 124 | errors = converter.convert(x, result6); | 
|---|
| 125 | assertTrue (result6 == x); | 
|---|
| 126 | assertTrue (errors == 0); | 
|---|
| 127 |  | 
|---|
| 128 | std::string result7; | 
|---|
| 129 | errors = converter.convert( "x", 1, result7); | 
|---|
| 130 | assertTrue (result7 == x); | 
|---|
| 131 | assertTrue (errors == 0); | 
|---|
| 132 |  | 
|---|
| 133 | std::string utfChar((char*) greek + 1, 2); | 
|---|
| 134 | std::string result8; | 
|---|
| 135 | errors = converter.convert(utfChar, result8); | 
|---|
| 136 | assertTrue (result8 == utfChar); | 
|---|
| 137 | assertTrue (errors == 0); | 
|---|
| 138 |  | 
|---|
| 139 | std::string result9; | 
|---|
| 140 | errors = converter.convert((char*) greek + 1, 2, result9); | 
|---|
| 141 | assertTrue (result9 == utfChar); | 
|---|
| 142 | assertTrue (errors == 0); | 
|---|
| 143 | } | 
|---|
| 144 |  | 
|---|
| 145 |  | 
|---|
| 146 | void TextConverterTest::testUTF8toASCII() | 
|---|
| 147 | { | 
|---|
| 148 | UTF8Encoding utf8Encoding; | 
|---|
| 149 | ASCIIEncoding asciiEncoding; | 
|---|
| 150 | TextConverter converter(utf8Encoding, asciiEncoding); | 
|---|
| 151 |  | 
|---|
| 152 | const unsigned char greek[] = {0x20, 0xce, 0xba, 0xe1, 0xbd, 0xb9, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5, 0x20, 0x41, 0x42, 0x00}; | 
|---|
| 153 | std::string text((const char*) greek); | 
|---|
| 154 | std::string result0; | 
|---|
| 155 | int errors = converter.convert(text, result0); | 
|---|
| 156 | assertTrue (result0 == " ????? AB"); | 
|---|
| 157 | assertTrue (errors == 0); | 
|---|
| 158 |  | 
|---|
| 159 | std::string result1; | 
|---|
| 160 | errors = converter.convert( "abcde", 5, result1); | 
|---|
| 161 | assertTrue (result1 == "abcde"); | 
|---|
| 162 | } | 
|---|
| 163 |  | 
|---|
| 164 |  | 
|---|
| 165 | void TextConverterTest::testLatin1toUTF8() | 
|---|
| 166 | { | 
|---|
| 167 | Latin1Encoding latin1Encoding; | 
|---|
| 168 | UTF8Encoding utf8Encoding; | 
|---|
| 169 | TextConverter converter(latin1Encoding, utf8Encoding); | 
|---|
| 170 |  | 
|---|
| 171 | const unsigned char latin1Chars[] = {'g', 252, 'n', 't', 'e', 'r', 0}; | 
|---|
| 172 | const unsigned char utf8Chars[]   = {'g', 195, 188, 'n', 't', 'e', 'r', 0}; | 
|---|
| 173 | std::string latin1Text((const char*) latin1Chars); | 
|---|
| 174 | std::string utf8Text((const char*) utf8Chars); | 
|---|
| 175 |  | 
|---|
| 176 | std::string result0; | 
|---|
| 177 | int errors = converter.convert(latin1Text, result0); | 
|---|
| 178 | assertTrue (result0 == utf8Text); | 
|---|
| 179 | assertTrue (errors == 0); | 
|---|
| 180 | assertEqual((long) result0.size(), 7); | 
|---|
| 181 |  | 
|---|
| 182 | std::string result1; | 
|---|
| 183 | errors = converter.convert(latin1Chars, 6, result1); | 
|---|
| 184 | assertTrue (result1 == utf8Text); | 
|---|
| 185 | assertTrue (errors == 0); | 
|---|
| 186 | } | 
|---|
| 187 |  | 
|---|
| 188 |  | 
|---|
| 189 | void TextConverterTest::testLatin2toUTF8() | 
|---|
| 190 | { | 
|---|
| 191 | Latin2Encoding latinEncoding; | 
|---|
| 192 | UTF8Encoding utf8Encoding; | 
|---|
| 193 | TextConverter converter(latinEncoding, utf8Encoding); | 
|---|
| 194 |  | 
|---|
| 195 | const unsigned char latinChars[26] = { 	0xb5, 0xb9, 0xe8, 0xbb, 0xbe, 0xfd, 0xe1, 0xed, 0xe9, 0xfa, 0xe4, 0xf4, | 
|---|
| 196 | 0x20, 0xa5, 0xa9, 0xc8, 0xab, 0xae, 0xdd, 0xc1, 0xcd, 0xc9, 0xda, 0xc4, 0xd4, 0x00 }; | 
|---|
| 197 | const unsigned char utf8Chars[] = "ľščťžýáíéúäô ĽŠČŤŽÝÁÍÉÚÄÔ"; | 
|---|
| 198 | std::string latinText((const char*) latinChars); | 
|---|
| 199 | std::string utf8Text((const char*) utf8Chars); | 
|---|
| 200 |  | 
|---|
| 201 | std::string result0; | 
|---|
| 202 | int errors = converter.convert(latinText, result0); | 
|---|
| 203 | assertEqual (result0, utf8Text); | 
|---|
| 204 | assertEqual ((long) errors, 0); | 
|---|
| 205 | assertEqual((long) result0.size(), 49); | 
|---|
| 206 |  | 
|---|
| 207 | std::string result1; | 
|---|
| 208 | errors = converter.convert(latinChars, 25, result1); | 
|---|
| 209 | assertEqual (result1, utf8Text); | 
|---|
| 210 | assertEqual ((long) errors, 0); | 
|---|
| 211 | assertEqual((long) result1.size(), 49); | 
|---|
| 212 | } | 
|---|
| 213 |  | 
|---|
| 214 |  | 
|---|
| 215 | void TextConverterTest::testLatin9toUTF8() | 
|---|
| 216 | { | 
|---|
| 217 | Latin9Encoding latinEncoding; | 
|---|
| 218 | UTF8Encoding utf8Encoding; | 
|---|
| 219 | TextConverter converter(latinEncoding, utf8Encoding); | 
|---|
| 220 |  | 
|---|
| 221 | const unsigned char latinChars[26] = { 	0x3f, 0xa8, 0x3f, 0x3f, 0xb8, 0xfd, 0xe1, 0xed, 0xe9, 0xfa, 0xe4, 0xf4, | 
|---|
| 222 | 0x20, 0x3f, 0xa6, 0x3f, 0x3f, 0xb4, 0xdd, 0xc1, 0xcd, 0xc9, 0xda, 0xc4, 0xd4, 0x00 }; | 
|---|
| 223 | const unsigned char utf8Chars[] = "?š??žýáíéúäô ?Š??ŽÝÁÍÉÚÄÔ"; | 
|---|
| 224 | std::string latinText((const char*) latinChars); | 
|---|
| 225 | std::string utf8Text((const char*) utf8Chars); | 
|---|
| 226 |  | 
|---|
| 227 | std::string result0; | 
|---|
| 228 | int errors = converter.convert(latinText, result0); | 
|---|
| 229 | assertEqual (result0, utf8Text); | 
|---|
| 230 | assertEqual ((long) errors, 0); | 
|---|
| 231 | assertEqual(result0.size(), 43); | 
|---|
| 232 |  | 
|---|
| 233 | std::string result1; | 
|---|
| 234 | errors = converter.convert(latinChars, 25, result1); | 
|---|
| 235 | assertEqual(result1, utf8Text); | 
|---|
| 236 | assertEqual((long) errors, 0); | 
|---|
| 237 | assertEqual((long) result1.size(), 43); | 
|---|
| 238 | } | 
|---|
| 239 |  | 
|---|
| 240 |  | 
|---|
| 241 | void TextConverterTest::testCP1250toUTF8() | 
|---|
| 242 | { | 
|---|
| 243 | Windows1250Encoding latinEncoding; | 
|---|
| 244 | UTF8Encoding utf8Encoding; | 
|---|
| 245 | TextConverter converter(latinEncoding, utf8Encoding); | 
|---|
| 246 |  | 
|---|
| 247 | const unsigned char latinChars[26] = { 	0xbe, 0x9a, 0xe8, 0x9d, 0x9e, 0xfd, 0xe1, 0xed, 0xe9, 0xfa, 0xe4, 0xf4, | 
|---|
| 248 | 0x20, 0xbc, 0x8a, 0xc8, 0x8d, 0x8e, 0xdd, 0xc1, 0xcd, 0xc9, 0xda, 0xc4, 0xd4, 0x00 }; | 
|---|
| 249 | const unsigned char utf8Chars[] = "ľščťžýáíéúäô ĽŠČŤŽÝÁÍÉÚÄÔ"; | 
|---|
| 250 | std::string latinText((const char*) latinChars); | 
|---|
| 251 | std::string utf8Text((const char*) utf8Chars); | 
|---|
| 252 |  | 
|---|
| 253 | std::string result0; | 
|---|
| 254 | int errors = converter.convert(latinText, result0); | 
|---|
| 255 | assertEqual (result0, utf8Text); | 
|---|
| 256 | assertEqual ((long) errors, 0); | 
|---|
| 257 | assertEqual((long) result0.size(), 49); | 
|---|
| 258 |  | 
|---|
| 259 | std::string result1; | 
|---|
| 260 | errors = converter.convert(latinChars, 25, result1); | 
|---|
| 261 | assertEqual(result1, utf8Text); | 
|---|
| 262 | assertEqual((long) errors, 0); | 
|---|
| 263 | assertEqual((long) result1.size(), 49); | 
|---|
| 264 | } | 
|---|
| 265 |  | 
|---|
| 266 |  | 
|---|
| 267 | void TextConverterTest::testCP1251toUTF8() | 
|---|
| 268 | { | 
|---|
| 269 | Windows1251Encoding latinEncoding; | 
|---|
| 270 | UTF8Encoding utf8Encoding; | 
|---|
| 271 | TextConverter converter(latinEncoding, utf8Encoding); | 
|---|
| 272 |  | 
|---|
| 273 | const unsigned char latinChars[32] = { 	0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 0x00 }; | 
|---|
| 274 | const unsigned char utf8Chars[] = "бвгдежзийклмнопрстуфхцчшщъыьэюя"; | 
|---|
| 275 | std::string latinText((const char*) latinChars); | 
|---|
| 276 | std::string utf8Text((const char*) utf8Chars); | 
|---|
| 277 |  | 
|---|
| 278 | std::string result0; | 
|---|
| 279 | int errors = converter.convert(latinText, result0); | 
|---|
| 280 | assertEqual (result0, utf8Text); | 
|---|
| 281 | assertEqual ((long) errors, 0); | 
|---|
| 282 | assertEqual((long) result0.size(), 62); | 
|---|
| 283 |  | 
|---|
| 284 | std::string result1; | 
|---|
| 285 | errors = converter.convert(latinChars, 31, result1); | 
|---|
| 286 | assertEqual (result1, utf8Text); | 
|---|
| 287 | assertEqual ((long) errors, 0); | 
|---|
| 288 | assertEqual((long) result1.size(), 62); | 
|---|
| 289 | } | 
|---|
| 290 |  | 
|---|
| 291 |  | 
|---|
| 292 | void TextConverterTest::testCP1252toUTF8() | 
|---|
| 293 | { | 
|---|
| 294 | Windows1252Encoding latinEncoding; | 
|---|
| 295 | UTF8Encoding utf8Encoding; | 
|---|
| 296 | TextConverter converter(latinEncoding, utf8Encoding); | 
|---|
| 297 |  | 
|---|
| 298 | const unsigned char latinChars[26] = { 	0x3f, 0x9a, 0x3f, 0x3f, 0x9e, 0xfd, 0xe1, 0xed, 0xe9, 0xfa, 0xe4, 0xf4, | 
|---|
| 299 | 0x20, 0x3f, 0x8a, 0x3f, 0x3f, 0x8e, 0xdd, 0xc1, 0xcd, 0xc9, 0xda, 0xc4, 0xd4, 0x00 }; | 
|---|
| 300 | const unsigned char utf8Chars[] = "?š??žýáíéúäô ?Š??ŽÝÁÍÉÚÄÔ"; | 
|---|
| 301 | std::string latinText((const char*) latinChars); | 
|---|
| 302 | std::string utf8Text((const char*) utf8Chars); | 
|---|
| 303 |  | 
|---|
| 304 | std::string result0; | 
|---|
| 305 | int errors = converter.convert(latinText, result0); | 
|---|
| 306 | assertEqual(result0, utf8Text); | 
|---|
| 307 | assertEqual((long) errors, 0); | 
|---|
| 308 | assertEqual((long) result0.size(), 43); | 
|---|
| 309 |  | 
|---|
| 310 | std::string result1; | 
|---|
| 311 | errors = converter.convert(latinChars, 25, result1); | 
|---|
| 312 | assertEqual(result1, utf8Text); | 
|---|
| 313 | assertEqual((long) errors, 0); | 
|---|
| 314 | assertEqual((long) result1.size(), 43); | 
|---|
| 315 | } | 
|---|
| 316 |  | 
|---|
| 317 |  | 
|---|
| 318 | void TextConverterTest::testErrors() | 
|---|
| 319 | { | 
|---|
| 320 | UTF8Encoding utf8Encoding; | 
|---|
| 321 | Latin1Encoding latin1Encoding; | 
|---|
| 322 | TextConverter converter(utf8Encoding, latin1Encoding); | 
|---|
| 323 |  | 
|---|
| 324 | const unsigned char badChars[] = {'a', 'b', 255, 'c', 254, 0}; | 
|---|
| 325 | std::string badText((const char*) badChars); | 
|---|
| 326 |  | 
|---|
| 327 | std::string result; | 
|---|
| 328 | int errors = converter.convert(badText, result); | 
|---|
| 329 | assertTrue (errors == 2); | 
|---|
| 330 | } | 
|---|
| 331 |  | 
|---|
| 332 |  | 
|---|
| 333 | void TextConverterTest::setUp() | 
|---|
| 334 | { | 
|---|
| 335 | } | 
|---|
| 336 |  | 
|---|
| 337 |  | 
|---|
| 338 | void TextConverterTest::tearDown() | 
|---|
| 339 | { | 
|---|
| 340 | } | 
|---|
| 341 |  | 
|---|
| 342 |  | 
|---|
| 343 | CppUnit::Test* TextConverterTest::suite() | 
|---|
| 344 | { | 
|---|
| 345 | CppUnit::TestSuite* pSuite = new CppUnit::TestSuite( "TextConverterTest"); | 
|---|
| 346 |  | 
|---|
| 347 | CppUnit_addTest(pSuite, TextConverterTest, testIdentityASCII); | 
|---|
| 348 | CppUnit_addTest(pSuite, TextConverterTest, testIdentityUTF8); | 
|---|
| 349 | CppUnit_addTest(pSuite, TextConverterTest, testUTF8toASCII); | 
|---|
| 350 | CppUnit_addTest(pSuite, TextConverterTest, testLatin1toUTF8); | 
|---|
| 351 | CppUnit_addTest(pSuite, TextConverterTest, testLatin2toUTF8); | 
|---|
| 352 | CppUnit_addTest(pSuite, TextConverterTest, testLatin9toUTF8); | 
|---|
| 353 | CppUnit_addTest(pSuite, TextConverterTest, testCP1250toUTF8); | 
|---|
| 354 | CppUnit_addTest(pSuite, TextConverterTest, testCP1251toUTF8); | 
|---|
| 355 | CppUnit_addTest(pSuite, TextConverterTest, testCP1252toUTF8); | 
|---|
| 356 | CppUnit_addTest(pSuite, TextConverterTest, testErrors); | 
|---|
| 357 |  | 
|---|
| 358 | return pSuite; | 
|---|
| 359 | } | 
|---|
| 360 |  | 
|---|
| 361 | #ifdef POCO_COMPILER_MSVC | 
|---|
| 362 | #pragma warning(pop) | 
|---|
| 363 | #endif // POCO_COMPILER_MSVC | 
|---|
| 364 |  | 
|---|