| 1 | // |
| 2 | // UTF16Encoding.h |
| 3 | // |
| 4 | // Library: Foundation |
| 5 | // Package: Text |
| 6 | // Module: UTF16Encoding |
| 7 | // |
| 8 | // Definition of the UTF16Encoding class. |
| 9 | // |
| 10 | // Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. |
| 11 | // and Contributors. |
| 12 | // |
| 13 | // SPDX-License-Identifier: BSL-1.0 |
| 14 | // |
| 15 | |
| 16 | |
| 17 | #ifndef Foundation_UTF16Encoding_INCLUDED |
| 18 | #define Foundation_UTF16Encoding_INCLUDED |
| 19 | |
| 20 | |
| 21 | #include "Poco/Foundation.h" |
| 22 | #include "Poco/TextEncoding.h" |
| 23 | |
| 24 | |
| 25 | namespace Poco { |
| 26 | |
| 27 | |
| 28 | class Foundation_API UTF16Encoding: public TextEncoding |
| 29 | /// UTF-16 text encoding, as defined in RFC 2781. |
| 30 | /// |
| 31 | /// When converting from UTF-16 to Unicode, surrogates are |
| 32 | /// reported as they are - in other words, surrogate pairs |
| 33 | /// are not combined into one Unicode character. |
| 34 | /// When converting from Unicode to UTF-16, however, characters |
| 35 | /// outside the 16-bit range are converted into a low and |
| 36 | /// high surrogate. |
| 37 | { |
| 38 | public: |
| 39 | enum ByteOrderType |
| 40 | { |
| 41 | BIG_ENDIAN_BYTE_ORDER, |
| 42 | LITTLE_ENDIAN_BYTE_ORDER, |
| 43 | NATIVE_BYTE_ORDER |
| 44 | }; |
| 45 | |
| 46 | UTF16Encoding(ByteOrderType byteOrder = NATIVE_BYTE_ORDER); |
| 47 | /// Creates and initializes the encoding for the given byte order. |
| 48 | |
| 49 | UTF16Encoding(int byteOrderMark); |
| 50 | /// Creates and initializes the encoding for the byte-order |
| 51 | /// indicated by the given byte-order mark, which is the Unicode |
| 52 | /// character 0xFEFF. |
| 53 | |
| 54 | ~UTF16Encoding(); |
| 55 | |
| 56 | ByteOrderType getByteOrder() const; |
| 57 | /// Returns the byte-order currently in use. |
| 58 | |
| 59 | void setByteOrder(ByteOrderType byteOrder); |
| 60 | /// Sets the byte order. |
| 61 | |
| 62 | void setByteOrder(int byteOrderMark); |
| 63 | /// Sets the byte order according to the given |
| 64 | /// byte order mark, which is the Unicode |
| 65 | /// character 0xFEFF. |
| 66 | |
| 67 | const char* canonicalName() const; |
| 68 | bool isA(const std::string& encodingName) const; |
| 69 | const CharacterMap& characterMap() const; |
| 70 | int convert(const unsigned char* bytes) const; |
| 71 | int convert(int ch, unsigned char* bytes, int length) const; |
| 72 | int queryConvert(const unsigned char* bytes, int length) const; |
| 73 | int sequenceLength(const unsigned char* bytes, int length) const; |
| 74 | |
| 75 | private: |
| 76 | bool _flipBytes; |
| 77 | static const char* _names[]; |
| 78 | static const CharacterMap _charMap; |
| 79 | }; |
| 80 | |
| 81 | |
| 82 | } // namespace Poco |
| 83 | |
| 84 | |
| 85 | #endif // Foundation_UTF16Encoding_INCLUDED |
| 86 | |