| 1 | // |
| 2 | // DoubleByteEncoding.h |
| 3 | // |
| 4 | // Library: Encodings |
| 5 | // Package: Encodings |
| 6 | // Module: DoubleByteEncoding |
| 7 | // |
| 8 | // Definition of the DoubleByteEncoding class. |
| 9 | // |
| 10 | // Copyright (c) 2018, Applied Informatics Software Engineering GmbH. |
| 11 | // and Contributors. |
| 12 | // |
| 13 | // SPDX-License-Identifier: BSL-1.0 |
| 14 | // |
| 15 | |
| 16 | |
| 17 | #ifndef Encodings_DoubleByteEncoding_INCLUDED |
| 18 | #define Encodings_DoubleByteEncoding_INCLUDED |
| 19 | |
| 20 | |
| 21 | #include "Poco/Encodings.h" |
| 22 | #include "Poco/TextEncoding.h" |
| 23 | |
| 24 | |
| 25 | namespace Poco { |
| 26 | |
| 27 | |
| 28 | class Encodings_API DoubleByteEncoding: public TextEncoding |
| 29 | /// This abstract class is a base class for various double-byte character |
| 30 | /// set (DBCS) encodings. |
| 31 | /// |
| 32 | /// Double-byte encodings are variants of multi-byte encodings |
| 33 | /// where (Unicode) each code point is represented by one or |
| 34 | /// two bytes. Unicode code points are restricted to the |
| 35 | /// Basic Multilingual Plane. |
| 36 | /// |
| 37 | /// Subclasses must provide encoding names, a static CharacterMap, as well |
| 38 | /// as static Mapping and reverse Mapping tables, and provide these to the |
| 39 | /// DoubleByteEncoding constructor. |
| 40 | { |
| 41 | public: |
| 42 | struct Mapping |
| 43 | { |
| 44 | Poco::UInt16 from; |
| 45 | Poco::UInt16 to; |
| 46 | }; |
| 47 | |
| 48 | // TextEncoding |
| 49 | const char* canonicalName() const; |
| 50 | bool isA(const std::string& encodingName) const; |
| 51 | const CharacterMap& characterMap() const; |
| 52 | int convert(const unsigned char* bytes) const; |
| 53 | int convert(int ch, unsigned char* bytes, int length) const; |
| 54 | int queryConvert(const unsigned char* bytes, int length) const; |
| 55 | int sequenceLength(const unsigned char* bytes, int length) const; |
| 56 | |
| 57 | protected: |
| 58 | DoubleByteEncoding(const char** names, const TextEncoding::CharacterMap& charMap, const Mapping mappingTable[], std::size_t mappingTableSize, const Mapping reverseMappingTable[], std::size_t reverseMappingTableSize); |
| 59 | /// Creates a DoubleByteEncoding using the given mapping and reverse-mapping tables. |
| 60 | /// |
| 61 | /// names must be a static array declared in the derived class, |
| 62 | /// containing the names of this encoding, declared as: |
| 63 | /// |
| 64 | /// const char* MyEncoding::_names[] = |
| 65 | /// { |
| 66 | /// "myencoding", |
| 67 | /// "MyEncoding", |
| 68 | /// NULL |
| 69 | /// }; |
| 70 | /// |
| 71 | /// The first entry in names must be the canonical name. |
| 72 | /// |
| 73 | /// charMap must be a static CharacterMap giving information about double-byte |
| 74 | /// character sequences. |
| 75 | /// |
| 76 | /// For each mappingTable item, from must be a value in range 0x0100 to |
| 77 | // 0xFFFF for double-byte mappings, which the most significant (upper) byte |
| 78 | /// representing the first character in the sequence and the lower byte |
| 79 | /// representing the second character in the sequence. |
| 80 | /// |
| 81 | /// For each reverseMappingTable item, from must be Unicode code point from the |
| 82 | /// Basic Multilingual Plane, and to is a one-byte or two-byte sequence. |
| 83 | /// As with mappingTable, a one-byte sequence is in range 0x00 to 0xFF, and a |
| 84 | /// two-byte sequence is in range 0x0100 to 0xFFFF. |
| 85 | /// |
| 86 | /// Unicode code points are restricted to the Basic Multilingual Plane |
| 87 | /// (code points 0x0000 to 0xFFFF). |
| 88 | /// |
| 89 | /// Items in both tables must be sorted by from, in ascending order. |
| 90 | |
| 91 | ~DoubleByteEncoding(); |
| 92 | /// Destroys the DoubleByteEncoding. |
| 93 | |
| 94 | int map(Poco::UInt16 encoded) const; |
| 95 | /// Maps a double-byte encoded character to its Unicode code point. |
| 96 | /// |
| 97 | /// Returns the Unicode code point, or -1 if the encoded character is bad |
| 98 | /// and cannot be mapped. |
| 99 | |
| 100 | int reverseMap(int cp) const; |
| 101 | /// Maps a Unicode code point to its double-byte representation. |
| 102 | /// |
| 103 | /// Returns -1 if the code point cannot be mapped, otherwise |
| 104 | /// a value in range 0 to 0xFF for single-byte mappings, or |
| 105 | /// 0x0100 to 0xFFFF for double-byte mappings. |
| 106 | |
| 107 | private: |
| 108 | DoubleByteEncoding(); |
| 109 | |
| 110 | const char** _names; |
| 111 | const TextEncoding::CharacterMap& _charMap; |
| 112 | const Mapping* _mappingTable; |
| 113 | const std::size_t _mappingTableSize; |
| 114 | const Mapping* _reverseMappingTable; |
| 115 | const std::size_t _reverseMappingTableSize; |
| 116 | }; |
| 117 | |
| 118 | |
| 119 | } // namespace Poco |
| 120 | |
| 121 | |
| 122 | #endif // Encodings_DoubleByteEncoding_INCLUDED |
| 123 | |