1//
2// DoubleByteEncoding.h
3//
4// Library: Encodings
5// Package: Encodings
6// Module: DoubleByteEncoding
7//
8// Definition of the DoubleByteEncoding class.
9//
10// Copyright (c) 2018, Applied Informatics Software Engineering GmbH.
11// and Contributors.
12//
13// SPDX-License-Identifier: BSL-1.0
14//
15
16
17#ifndef Encodings_DoubleByteEncoding_INCLUDED
18#define Encodings_DoubleByteEncoding_INCLUDED
19
20
21#include "Poco/Encodings.h"
22#include "Poco/TextEncoding.h"
23
24
25namespace Poco {
26
27
28class Encodings_API DoubleByteEncoding: public TextEncoding
29 /// This abstract class is a base class for various double-byte character
30 /// set (DBCS) encodings.
31 ///
32 /// Double-byte encodings are variants of multi-byte encodings
33 /// where (Unicode) each code point is represented by one or
34 /// two bytes. Unicode code points are restricted to the
35 /// Basic Multilingual Plane.
36 ///
37 /// Subclasses must provide encoding names, a static CharacterMap, as well
38 /// as static Mapping and reverse Mapping tables, and provide these to the
39 /// DoubleByteEncoding constructor.
40{
41public:
42 struct Mapping
43 {
44 Poco::UInt16 from;
45 Poco::UInt16 to;
46 };
47
48 // TextEncoding
49 const char* canonicalName() const;
50 bool isA(const std::string& encodingName) const;
51 const CharacterMap& characterMap() const;
52 int convert(const unsigned char* bytes) const;
53 int convert(int ch, unsigned char* bytes, int length) const;
54 int queryConvert(const unsigned char* bytes, int length) const;
55 int sequenceLength(const unsigned char* bytes, int length) const;
56
57protected:
58 DoubleByteEncoding(const char** names, const TextEncoding::CharacterMap& charMap, const Mapping mappingTable[], std::size_t mappingTableSize, const Mapping reverseMappingTable[], std::size_t reverseMappingTableSize);
59 /// Creates a DoubleByteEncoding using the given mapping and reverse-mapping tables.
60 ///
61 /// names must be a static array declared in the derived class,
62 /// containing the names of this encoding, declared as:
63 ///
64 /// const char* MyEncoding::_names[] =
65 /// {
66 /// "myencoding",
67 /// "MyEncoding",
68 /// NULL
69 /// };
70 ///
71 /// The first entry in names must be the canonical name.
72 ///
73 /// charMap must be a static CharacterMap giving information about double-byte
74 /// character sequences.
75 ///
76 /// For each mappingTable item, from must be a value in range 0x0100 to
77 // 0xFFFF for double-byte mappings, which the most significant (upper) byte
78 /// representing the first character in the sequence and the lower byte
79 /// representing the second character in the sequence.
80 ///
81 /// For each reverseMappingTable item, from must be Unicode code point from the
82 /// Basic Multilingual Plane, and to is a one-byte or two-byte sequence.
83 /// As with mappingTable, a one-byte sequence is in range 0x00 to 0xFF, and a
84 /// two-byte sequence is in range 0x0100 to 0xFFFF.
85 ///
86 /// Unicode code points are restricted to the Basic Multilingual Plane
87 /// (code points 0x0000 to 0xFFFF).
88 ///
89 /// Items in both tables must be sorted by from, in ascending order.
90
91 ~DoubleByteEncoding();
92 /// Destroys the DoubleByteEncoding.
93
94 int map(Poco::UInt16 encoded) const;
95 /// Maps a double-byte encoded character to its Unicode code point.
96 ///
97 /// Returns the Unicode code point, or -1 if the encoded character is bad
98 /// and cannot be mapped.
99
100 int reverseMap(int cp) const;
101 /// Maps a Unicode code point to its double-byte representation.
102 ///
103 /// Returns -1 if the code point cannot be mapped, otherwise
104 /// a value in range 0 to 0xFF for single-byte mappings, or
105 /// 0x0100 to 0xFFFF for double-byte mappings.
106
107private:
108 DoubleByteEncoding();
109
110 const char** _names;
111 const TextEncoding::CharacterMap& _charMap;
112 const Mapping* _mappingTable;
113 const std::size_t _mappingTableSize;
114 const Mapping* _reverseMappingTable;
115 const std::size_t _reverseMappingTableSize;
116};
117
118
119} // namespace Poco
120
121
122#endif // Encodings_DoubleByteEncoding_INCLUDED
123