1 | // |
2 | // DoubleByteEncoding.h |
3 | // |
4 | // Library: Encodings |
5 | // Package: Encodings |
6 | // Module: DoubleByteEncoding |
7 | // |
8 | // Definition of the DoubleByteEncoding class. |
9 | // |
10 | // Copyright (c) 2018, Applied Informatics Software Engineering GmbH. |
11 | // and Contributors. |
12 | // |
13 | // SPDX-License-Identifier: BSL-1.0 |
14 | // |
15 | |
16 | |
17 | #ifndef Encodings_DoubleByteEncoding_INCLUDED |
18 | #define Encodings_DoubleByteEncoding_INCLUDED |
19 | |
20 | |
21 | #include "Poco/Encodings.h" |
22 | #include "Poco/TextEncoding.h" |
23 | |
24 | |
25 | namespace Poco { |
26 | |
27 | |
28 | class Encodings_API DoubleByteEncoding: public TextEncoding |
29 | /// This abstract class is a base class for various double-byte character |
30 | /// set (DBCS) encodings. |
31 | /// |
32 | /// Double-byte encodings are variants of multi-byte encodings |
33 | /// where (Unicode) each code point is represented by one or |
34 | /// two bytes. Unicode code points are restricted to the |
35 | /// Basic Multilingual Plane. |
36 | /// |
37 | /// Subclasses must provide encoding names, a static CharacterMap, as well |
38 | /// as static Mapping and reverse Mapping tables, and provide these to the |
39 | /// DoubleByteEncoding constructor. |
40 | { |
41 | public: |
42 | struct Mapping |
43 | { |
44 | Poco::UInt16 from; |
45 | Poco::UInt16 to; |
46 | }; |
47 | |
48 | // TextEncoding |
49 | const char* canonicalName() const; |
50 | bool isA(const std::string& encodingName) const; |
51 | const CharacterMap& characterMap() const; |
52 | int convert(const unsigned char* bytes) const; |
53 | int convert(int ch, unsigned char* bytes, int length) const; |
54 | int queryConvert(const unsigned char* bytes, int length) const; |
55 | int sequenceLength(const unsigned char* bytes, int length) const; |
56 | |
57 | protected: |
58 | DoubleByteEncoding(const char** names, const TextEncoding::CharacterMap& charMap, const Mapping mappingTable[], std::size_t mappingTableSize, const Mapping reverseMappingTable[], std::size_t reverseMappingTableSize); |
59 | /// Creates a DoubleByteEncoding using the given mapping and reverse-mapping tables. |
60 | /// |
61 | /// names must be a static array declared in the derived class, |
62 | /// containing the names of this encoding, declared as: |
63 | /// |
64 | /// const char* MyEncoding::_names[] = |
65 | /// { |
66 | /// "myencoding", |
67 | /// "MyEncoding", |
68 | /// NULL |
69 | /// }; |
70 | /// |
71 | /// The first entry in names must be the canonical name. |
72 | /// |
73 | /// charMap must be a static CharacterMap giving information about double-byte |
74 | /// character sequences. |
75 | /// |
76 | /// For each mappingTable item, from must be a value in range 0x0100 to |
77 | // 0xFFFF for double-byte mappings, which the most significant (upper) byte |
78 | /// representing the first character in the sequence and the lower byte |
79 | /// representing the second character in the sequence. |
80 | /// |
81 | /// For each reverseMappingTable item, from must be Unicode code point from the |
82 | /// Basic Multilingual Plane, and to is a one-byte or two-byte sequence. |
83 | /// As with mappingTable, a one-byte sequence is in range 0x00 to 0xFF, and a |
84 | /// two-byte sequence is in range 0x0100 to 0xFFFF. |
85 | /// |
86 | /// Unicode code points are restricted to the Basic Multilingual Plane |
87 | /// (code points 0x0000 to 0xFFFF). |
88 | /// |
89 | /// Items in both tables must be sorted by from, in ascending order. |
90 | |
91 | ~DoubleByteEncoding(); |
92 | /// Destroys the DoubleByteEncoding. |
93 | |
94 | int map(Poco::UInt16 encoded) const; |
95 | /// Maps a double-byte encoded character to its Unicode code point. |
96 | /// |
97 | /// Returns the Unicode code point, or -1 if the encoded character is bad |
98 | /// and cannot be mapped. |
99 | |
100 | int reverseMap(int cp) const; |
101 | /// Maps a Unicode code point to its double-byte representation. |
102 | /// |
103 | /// Returns -1 if the code point cannot be mapped, otherwise |
104 | /// a value in range 0 to 0xFF for single-byte mappings, or |
105 | /// 0x0100 to 0xFFFF for double-byte mappings. |
106 | |
107 | private: |
108 | DoubleByteEncoding(); |
109 | |
110 | const char** _names; |
111 | const TextEncoding::CharacterMap& _charMap; |
112 | const Mapping* _mappingTable; |
113 | const std::size_t _mappingTableSize; |
114 | const Mapping* _reverseMappingTable; |
115 | const std::size_t _reverseMappingTableSize; |
116 | }; |
117 | |
118 | |
119 | } // namespace Poco |
120 | |
121 | |
122 | #endif // Encodings_DoubleByteEncoding_INCLUDED |
123 | |