1//
2// UTF16Encoding.cpp
3//
4// Library: Foundation
5// Package: Text
6// Module: UTF16Encoding
7//
8// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH.
9// and Contributors.
10//
11// SPDX-License-Identifier: BSL-1.0
12//
13
14
15#include "Poco/UTF16Encoding.h"
16#include "Poco/ByteOrder.h"
17#include "Poco/String.h"
18
19
20namespace Poco {
21
22
23const char* UTF16Encoding::_names[] =
24{
25 "UTF-16",
26 "UTF16",
27 NULL
28};
29
30
31const TextEncoding::CharacterMap UTF16Encoding::_charMap =
32{
33 /* 00 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
34 /* 10 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
35 /* 20 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
36 /* 30 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
37 /* 40 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
38 /* 50 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
39 /* 60 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
40 /* 70 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
41 /* 80 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
42 /* 90 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
43 /* a0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
44 /* b0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
45 /* c0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
46 /* d0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
47 /* e0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
48 /* f0 */ -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
49};
50
51
52UTF16Encoding::UTF16Encoding(ByteOrderType byteOrder)
53{
54 setByteOrder(byteOrder);
55}
56
57
58UTF16Encoding::UTF16Encoding(int byteOrderMark)
59{
60 setByteOrder(byteOrderMark);
61}
62
63
64UTF16Encoding::~UTF16Encoding()
65{
66}
67
68
69UTF16Encoding::ByteOrderType UTF16Encoding::getByteOrder() const
70{
71#if defined(POCO_ARCH_BIG_ENDIAN)
72 return _flipBytes ? LITTLE_ENDIAN_BYTE_ORDER : BIG_ENDIAN_BYTE_ORDER;
73#else
74 return _flipBytes ? BIG_ENDIAN_BYTE_ORDER : LITTLE_ENDIAN_BYTE_ORDER;
75#endif
76}
77
78
79void UTF16Encoding::setByteOrder(ByteOrderType byteOrder)
80{
81#if defined(POCO_ARCH_BIG_ENDIAN)
82 _flipBytes = byteOrder == LITTLE_ENDIAN_BYTE_ORDER;
83#else
84 _flipBytes = byteOrder == BIG_ENDIAN_BYTE_ORDER;;
85#endif
86}
87
88
89void UTF16Encoding::setByteOrder(int byteOrderMark)
90{
91 _flipBytes = byteOrderMark != 0xFEFF;
92}
93
94
95const char* UTF16Encoding::canonicalName() const
96{
97 return _names[0];
98}
99
100
101bool UTF16Encoding::isA(const std::string& encodingName) const
102{
103 for (const char** name = _names; *name; ++name)
104 {
105 if (Poco::icompare(encodingName, *name) == 0)
106 return true;
107 }
108 return false;
109}
110
111
112const TextEncoding::CharacterMap& UTF16Encoding::characterMap() const
113{
114 return _charMap;
115}
116
117
118int UTF16Encoding::convert(const unsigned char* bytes) const
119{
120 UInt16 uc;
121 unsigned char* p = (unsigned char*) &uc;
122 *p++ = *bytes++;
123 *p++ = *bytes++;
124
125 if (_flipBytes)
126 {
127 ByteOrder::flipBytes(uc);
128 }
129
130 if (uc >= 0xd800 && uc < 0xdc00)
131 {
132 UInt16 uc2;
133 p = (unsigned char*) &uc2;
134 *p++ = *bytes++;
135 *p++ = *bytes++;
136
137 if (_flipBytes)
138 {
139 ByteOrder::flipBytes(uc2);
140 }
141 if (uc2 >= 0xdc00 && uc2 < 0xe000)
142 {
143 return ((uc & 0x3ff) << 10) + (uc2 & 0x3ff) + 0x10000;
144 }
145 else
146 {
147 return -1;
148 }
149 }
150 else
151 {
152 return uc;
153 }
154}
155
156
157int UTF16Encoding::convert(int ch, unsigned char* bytes, int length) const
158{
159 if (ch <= 0xFFFF)
160 {
161 if (bytes && length >= 2)
162 {
163 UInt16 ch1 = _flipBytes ? ByteOrder::flipBytes((UInt16) ch) : (UInt16) ch;
164 unsigned char* p = (unsigned char*) &ch1;
165 *bytes++ = *p++;
166 *bytes++ = *p++;
167 }
168 return 2;
169 }
170 else
171 {
172 if (bytes && length >= 4)
173 {
174 int ch1 = ch - 0x10000;
175 UInt16 w1 = 0xD800 + ((ch1 >> 10) & 0x3FF);
176 UInt16 w2 = 0xDC00 + (ch1 & 0x3FF);
177 if (_flipBytes)
178 {
179 w1 = ByteOrder::flipBytes(w1);
180 w2 = ByteOrder::flipBytes(w2);
181 }
182 unsigned char* p = (unsigned char*) &w1;
183 *bytes++ = *p++;
184 *bytes++ = *p++;
185 p = (unsigned char*) &w2;
186 *bytes++ = *p++;
187 *bytes++ = *p++;
188 }
189 return 4;
190 }
191}
192
193
194int UTF16Encoding::queryConvert(const unsigned char* bytes, int length) const
195{
196 int ret = -2;
197
198 if (length >= 2)
199 {
200 UInt16 uc;
201 unsigned char* p = (unsigned char*) &uc;
202 *p++ = *bytes++;
203 *p++ = *bytes++;
204 if (_flipBytes)
205 ByteOrder::flipBytes(uc);
206 if (uc >= 0xd800 && uc < 0xdc00)
207 {
208 if (length >= 4)
209 {
210 UInt16 uc2;
211 p = (unsigned char*) &uc2;
212 *p++ = *bytes++;
213 *p++ = *bytes++;
214 if (_flipBytes)
215 ByteOrder::flipBytes(uc2);
216 if (uc2 >= 0xdc00 && uc < 0xe000)
217 {
218 ret = ((uc & 0x3ff) << 10) + (uc2 & 0x3ff) + 0x10000;
219 }
220 else
221 {
222 ret = -1; // Malformed sequence
223 }
224 }
225 else
226 {
227 ret = -4; // surrogate pair, four bytes needed
228 }
229 }
230 else
231 {
232 ret = uc;
233 }
234 }
235
236 return ret;
237}
238
239
240int UTF16Encoding::sequenceLength(const unsigned char* bytes, int length) const
241{
242 int ret = -2;
243
244 if (_flipBytes)
245 {
246 if (length >= 1)
247 {
248 unsigned char c = *bytes;
249 if (c >= 0xd8 && c < 0xdc)
250 ret = 4;
251 else
252 ret = 2;
253 }
254 }
255 else
256 {
257 if (length >= 2)
258 {
259 UInt16 uc;
260 unsigned char* p = (unsigned char*) &uc;
261 *p++ = *bytes++;
262 *p++ = *bytes++;
263 if (uc >= 0xd800 && uc < 0xdc00)
264 ret = 4;
265 else
266 ret = 2;
267 }
268 }
269 return ret;
270}
271
272
273} // namespace Poco
274