1// [Blend2D]
2// 2D Vector Graphics Powered by a JIT Compiler.
3//
4// [License]
5// Zlib - See LICENSE.md file in the package.
6
7#ifndef BLEND2D_BLUNICODE_P_H
8#define BLEND2D_BLUNICODE_P_H
9
10#include "./blsupport_p.h"
11
12//! \cond INTERNAL
13//! \addtogroup blend2d_internal
14//! \{
15
16// ============================================================================
17// [Unicode Data]
18// ============================================================================
19
20BL_HIDDEN extern const uint8_t blUtf8SizeData[256];
21
22// ============================================================================
23// [Unicode Constants]
24// ============================================================================
25
26//! Special unicode characters.
27enum BLCharCode : uint32_t {
28 BL_CHAR_BOM = 0x00FEFFu, //!< Native Byte-Order-Mark.
29 BL_CHAR_MAX = 0x10FFFFu, //!< Last code-point.
30
31 BL_CHAR_REPLACEMENT = 0x00FFFDu, //!< Replacement character.
32
33 BL_CHAR_FVS1 = 0x00180Bu, //!< First char in Mongolian 'free variation selectors' FVS1..FVS3.
34 BL_CHAR_FVS3 = 0x00180Du, //!< Last char in Mongolian 'free variation selectors' FVS1..FVS3.
35
36 BL_CHAR_VS1 = 0x00FE00u, //!< First char in 'variation selectors' VS1..VS16.
37 BL_CHAR_VS16 = 0x00FE0Fu, //!< Last char in 'variation selectors' VS1..VS16.
38
39 BL_CHAR_VS17 = 0x0E0100u, //!< First char in 'variation selectors supplement' VS17..VS256.
40 BL_CHAR_VS256 = 0x0E01EFu, //!< Last char in 'variation selectors supplement' VS17..VS256.
41
42 BL_CHAR_SURROGATE_FIRST = 0x00D800u, //!< First surrogate code-point.
43 BL_CHAR_SURROGATE_LAST = 0x00DFFFu, //!< Last surrogate code-point.
44
45 BL_CHAR_HI_SURROGATE_FIRST = 0x00D800u, //!< First high-surrogate code-point
46 BL_CHAR_HI_SURROGATE_LAST = 0x00DBFFu, //!< Last high-surrogate code-point
47
48 BL_CHAR_LO_SURROGATE_FIRST = 0x00DC00u, //!< First low-surrogate code-point
49 BL_CHAR_LO_SURROGATE_LAST = 0x00DFFFu //!< Last low-surrogate code-point
50};
51
52//! Flags that can be used to parametrize unicode I/O iterators.
53enum BLUnicodeIOFlags : uint32_t {
54 BL_UNICODE_IO_UNALIGNED = 0x00000001u,
55 BL_UNICODE_IO_BYTE_SWAP = 0x00000002u,
56 BL_UNICODE_IO_STRICT = 0x00000004u,
57 BL_UNICODE_IO_CALC_INDEX = 0x00000008u,
58
59 BL_UNICODE_IO_BYTE_ORDER_LE = BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_LE ? 0 : BL_UNICODE_IO_BYTE_SWAP,
60 BL_UNICODE_IO_BYTE_ORDER_BE = BL_BYTE_ORDER_NATIVE == BL_BYTE_ORDER_BE ? 0 : BL_UNICODE_IO_BYTE_SWAP
61};
62
63// ============================================================================
64// [Unicode Utilities]
65// ============================================================================
66
67namespace {
68
69template<typename T>
70BL_INLINE uint32_t blUtf8CharSize(const T& c) noexcept {
71 typedef typename std::make_unsigned<T>::type U;
72 return blUtf8SizeData[U(c)];
73}
74
75template<typename T>
76BL_INLINE bool blIsValidUtf8(const T& c) noexcept {
77 typedef typename std::make_unsigned<T>::type U;
78 return U(c) < 128 || (U(c) - U(194) < U(245 - 194));
79}
80
81template<typename T>
82constexpr bool blIsAsciiAlpha(const T& x) noexcept { return T(x | 0x20) >= T('a') && T(x | 0x20) <= T('z'); }
83
84template<typename T>
85constexpr bool blIsAsciiDigit(const T& x) noexcept { return x >= T('0') && x <= T('9'); }
86
87template<typename T>
88constexpr bool blIsAsciiAlnum(const T& x) noexcept { return blIsAsciiAlpha(x) || (x >= T('0') && x <= T('9')); }
89
90template<typename T>
91constexpr T blAsciiToLower(const T& x) noexcept { return x >= T('A') && x <= T('Z') ? T(x | T(0x20)) : x; }
92
93template<typename T>
94constexpr T blAsciiToUpper(const T& x) noexcept { return x >= T('a') && x <= T('z') ? T(x & ~T(0x20)) : x; }
95
96//! Tests whether the unicode character `uc` is high or low surrogate.
97template<typename T>
98constexpr bool blIsSurrogate(const T& uc) noexcept { return uc >= BL_CHAR_SURROGATE_FIRST && uc <= BL_CHAR_SURROGATE_LAST; }
99
100//! Tests whether the unicode character `uc` is a high (leading) surrogate.
101template<typename T>
102constexpr bool blIsHiSurrogate(const T& uc) noexcept { return uc >= BL_CHAR_HI_SURROGATE_FIRST && uc <= BL_CHAR_HI_SURROGATE_LAST; }
103
104//! Tests whether the unicode character `uc` is a low (trailing) surrogate.
105template<typename T>
106constexpr bool blIsLoSurrogate(const T& uc) noexcept { return uc >= BL_CHAR_LO_SURROGATE_FIRST && uc <= BL_CHAR_LO_SURROGATE_LAST; }
107
108//! Composes `hi` and `lo` surrogates into a unicode code-point.
109template<typename T>
110constexpr uint32_t blCharFromSurrogate(const T& hi, const T& lo) noexcept {
111 return (uint32_t(hi) << 10) + uint32_t(lo) - uint32_t((BL_CHAR_SURROGATE_FIRST << 10) + BL_CHAR_LO_SURROGATE_FIRST - 0x10000u);
112}
113
114//! Decomposes a unicode code-point into `hi` and `lo` surrogates.
115template<typename T>
116BL_INLINE void blCharToSurrogate(uint32_t uc, T& hi, T& lo) noexcept {
117 uc -= 0x10000u;
118 hi = T(BL_CHAR_HI_SURROGATE_FIRST | (uc >> 10));
119 lo = T(BL_CHAR_LO_SURROGATE_FIRST | (uc & 0x3FFu));
120}
121
122} // {anonymous}
123
124// ============================================================================
125// [Unicode Validation]
126// ============================================================================
127
128struct BLUnicodeValidationState {
129 size_t utf8Index;
130 size_t utf16Index;
131 size_t utf32Index;
132
133 BL_INLINE void reset() noexcept {
134 utf8Index = 0;
135 utf16Index = 0;
136 utf32Index = 0;
137 }
138
139 BL_INLINE bool hasSMP() const noexcept { return utf16Index != utf32Index; }
140};
141
142BL_HIDDEN BLResult blValidateUnicode(const void* data, size_t sizeInBytes, uint32_t encoding, BLUnicodeValidationState& state) noexcept;
143
144static BL_INLINE BLResult blValidateUtf8(const char* data, size_t size, BLUnicodeValidationState& state) noexcept {
145 return blValidateUnicode(data, size, BL_TEXT_ENCODING_UTF8, state);
146}
147
148static BL_INLINE BLResult blValidateUtf16(const uint16_t* data, size_t size, BLUnicodeValidationState& state) noexcept {
149 return blValidateUnicode(data, size * 2u, BL_TEXT_ENCODING_UTF16, state);
150}
151
152static BL_INLINE BLResult blValidateUtf32(const uint32_t* data, size_t size, BLUnicodeValidationState& state) noexcept {
153 return blValidateUnicode(data, size * 4u, BL_TEXT_ENCODING_UTF32, state);
154}
155
156// ============================================================================
157// [Conversion]
158// ============================================================================
159
160struct BLUnicodeConversionState {
161 size_t dstIndex;
162 size_t srcIndex;
163
164 BL_INLINE void reset() noexcept {
165 this->dstIndex = 0;
166 this->srcIndex = 0;
167 }
168};
169
170//! Converts a string from one encoding to another.
171//!
172//! Convert function works at a byte level. All sizes here are including those
173//! stored in a `BLUnicodeConversionState` are byte entities. So for example to convert
174//! a single UTF-16 BMP character the source size must be 2, etc...
175BL_HIDDEN BLResult blConvertUnicode(
176 void* dst, size_t dstSizeInBytes, uint32_t dstEncoding,
177 const void* src, size_t srcSizeInBytes, uint32_t srcEncoding, BLUnicodeConversionState& state) noexcept;
178
179// ============================================================================
180// [BLUtf8Reader]
181// ============================================================================
182
183//! UTF-8 reader.
184class BLUtf8Reader {
185public:
186 enum : uint32_t { kCharSize = 1 };
187
188 //! Current pointer.
189 const char* _ptr;
190 //! End of input.
191 const char* _end;
192 //! `index() - _utf32IndexSubtract` yields the current `utf32Index`.
193 size_t _utf32IndexSubtract;
194 //! Number of surrogates is required to calculate `utf16Index`.
195 size_t _utf16SurrogateCount;
196
197 BL_INLINE BLUtf8Reader(const void* data, size_t byteSize) noexcept {
198 reset(data, byteSize);
199 }
200
201 BL_INLINE void reset(const void* data, size_t byteSize) noexcept {
202 _ptr = static_cast<const char*>(data);
203 _end = static_cast<const char*>(data) + byteSize;
204 _utf32IndexSubtract = 0;
205 _utf16SurrogateCount = 0;
206 }
207
208 BL_INLINE bool hasNext() const noexcept { return _ptr != _end; }
209 BL_INLINE size_t remainingByteSize() const noexcept { return (size_t)(_end - _ptr); }
210
211 BL_INLINE size_t byteIndex(const void* start) const noexcept { return (size_t)(_ptr - static_cast<const char*>(start)); }
212 BL_INLINE size_t utf8Index(const void* start) const noexcept { return byteIndex(start); }
213 BL_INLINE size_t utf16Index(const void* start) const noexcept { return utf32Index(start) + _utf16SurrogateCount; }
214 BL_INLINE size_t utf32Index(const void* start) const noexcept { return byteIndex(start) - _utf32IndexSubtract; }
215 BL_INLINE size_t nativeIndex(const void* start) const noexcept { return utf8Index(start); }
216
217 template<uint32_t Flags = 0>
218 BL_INLINE BLResult next(uint32_t& uc) noexcept {
219 size_t ucSizeInBytes;
220 return next<Flags>(uc, ucSizeInBytes);
221 }
222
223 template<uint32_t Flags = 0>
224 BL_INLINE BLResult next(uint32_t& uc, size_t& ucSizeInBytes) noexcept {
225 BL_ASSERT(hasNext());
226
227 uc = blMemReadU8(_ptr);
228 ucSizeInBytes = 1;
229
230 _ptr++;
231 if (uc < 0x80u) {
232 // 1-Byte UTF-8 Sequence -> [0x00..0x7F].
233 // ...nothing to do...
234 }
235 else {
236 // Start of MultiByte.
237 const uint32_t kMultiByte = 0xC2u;
238
239 uc -= kMultiByte;
240 if (uc < 0xE0u - kMultiByte) {
241 // 2-Byte UTF-8 Sequence -> [0x80-0x7FF].
242 _ptr++;
243 ucSizeInBytes = 2;
244
245 // Truncated input.
246 if (BL_UNLIKELY(_ptr > _end))
247 goto TruncatedString;
248
249 // All consecutive bytes must be '10xxxxxx'.
250 uint32_t b1 = blMemReadU8(_ptr - 1) ^ 0x80u;
251 uc = ((uc + kMultiByte - 0xC0u) << 6) + b1;
252
253 if (BL_UNLIKELY(b1 > 0x3Fu))
254 goto InvalidString;
255
256 // 2-Byte UTF-8 maps to one UTF-16 or UTF-32 code-point, so subtract 1.
257 if (Flags & BL_UNICODE_IO_CALC_INDEX) _utf32IndexSubtract += 1;
258 }
259 else if (uc < 0xF0u - kMultiByte) {
260 // 3-Byte UTF-8 Sequence -> [0x800-0xFFFF].
261 _ptr += 2;
262 ucSizeInBytes = 3;
263
264 // Truncated input.
265 if (BL_UNLIKELY(_ptr > _end))
266 goto TruncatedString;
267
268 uint32_t b1 = blMemReadU8(_ptr - 2) ^ 0x80u;
269 uint32_t b2 = blMemReadU8(_ptr - 1) ^ 0x80u;
270 uc = ((uc + kMultiByte - 0xE0u) << 12) + (b1 << 6) + b2;
271
272 // 1. All consecutive bytes must be '10xxxxxx'.
273 // 2. Refuse overlong UTF-8.
274 if (BL_UNLIKELY((b1 | b2) > 0x3Fu || uc < 0x800u))
275 goto InvalidString;
276
277 // 3-Byte UTF-8 maps to one UTF-16 or UTF-32 code-point, so subtract 2.
278 if (Flags & BL_UNICODE_IO_CALC_INDEX) _utf32IndexSubtract += 2;
279 }
280 else {
281 // 4-Byte UTF-8 Sequence -> [0x010000-0x10FFFF].
282 _ptr += 3;
283 ucSizeInBytes = 4;
284
285 // Truncated input.
286 if (BL_UNLIKELY(_ptr > _end)) {
287 // If this happens we want to report a correct error, bytes 0xF5
288 // and above are always invalid and normally caught later.
289 if (uc >= 0xF5u - kMultiByte)
290 goto InvalidString;
291 else
292 goto TruncatedString;
293 }
294
295 uint32_t b1 = blMemReadU8(_ptr - 3) ^ 0x80u;
296 uint32_t b2 = blMemReadU8(_ptr - 2) ^ 0x80u;
297 uint32_t b3 = blMemReadU8(_ptr - 1) ^ 0x80u;
298 uc = ((uc + kMultiByte - 0xF0u) << 18) + (b1 << 12) + (b2 << 6) + b3;
299
300 // 1. All consecutive bytes must be '10xxxxxx'.
301 // 2. Refuse overlong UTF-8.
302 // 3. Make sure the final character is <= U+10FFFF.
303 if (BL_UNLIKELY((b1 | b2 | b3) > 0x3Fu || uc < 0x010000u || uc > BL_CHAR_MAX))
304 goto InvalidString;
305
306 // 4-Byte UTF-8 maps to one UTF-16 or UTF-32 code-point, so subtract 3.
307 if (Flags & BL_UNICODE_IO_CALC_INDEX) _utf32IndexSubtract += 3;
308 if (Flags & BL_UNICODE_IO_CALC_INDEX) _utf16SurrogateCount += 1;
309 }
310 }
311 return BL_SUCCESS;
312
313InvalidString:
314 _ptr -= ucSizeInBytes;
315 return blTraceError(BL_ERROR_INVALID_STRING);
316
317TruncatedString:
318 _ptr -= ucSizeInBytes;
319 return blTraceError(BL_ERROR_DATA_TRUNCATED);
320 }
321
322 BL_INLINE void skipOneUnit() noexcept {
323 BL_ASSERT(hasNext());
324 _ptr++;
325 }
326
327 template<uint32_t Flags = 0>
328 BL_INLINE BLResult validate() noexcept {
329 BLResult result = BL_SUCCESS;
330 while (hasNext()) {
331 uint32_t uc;
332 result = next<Flags>(uc);
333 if (result)
334 break;
335 }
336 return result;
337 }
338};
339
340// ============================================================================
341// [BLUtf16Reader]
342// ============================================================================
343
344//! UTF-16 reader.
345class BLUtf16Reader {
346public:
347 enum : uint32_t { kCharSize = 2 };
348
349 const char* _ptr;
350 const char* _end;
351
352 size_t _utf8IndexAdd;
353 size_t _utf16SurrogateCount;
354
355 BL_INLINE BLUtf16Reader(const void* data, size_t byteSize) noexcept {
356 reset(data, byteSize);
357 }
358
359 // --------------------------------------------------------------------------
360 // [Reset]
361 // --------------------------------------------------------------------------
362
363 BL_INLINE void reset(const void* data, size_t byteSize) noexcept {
364 _ptr = static_cast<const char*>(data);
365 _end = static_cast<const char*>(data) + blAlignDown(byteSize, 2);
366 _utf8IndexAdd = 0;
367 _utf16SurrogateCount = 0;
368 }
369
370 // --------------------------------------------------------------------------
371 // [Accessors]
372 // --------------------------------------------------------------------------
373
374 BL_INLINE bool hasNext() const noexcept { return _ptr != _end; }
375 BL_INLINE size_t remainingByteSize() const noexcept { return (size_t)(_end - _ptr); }
376
377 BL_INLINE size_t byteIndex(const void* start) const noexcept { return (size_t)(_ptr - static_cast<const char*>(start)); }
378 BL_INLINE size_t utf8Index(const void* start) const noexcept { return utf16Index(start) + _utf8IndexAdd; }
379 BL_INLINE size_t utf16Index(const void* start) const noexcept { return byteIndex(start) / 2u; }
380 BL_INLINE size_t utf32Index(const void* start) const noexcept { return utf16Index(start) - _utf16SurrogateCount; }
381 BL_INLINE size_t nativeIndex(const void* start) const noexcept { return utf16Index(start); }
382
383 // --------------------------------------------------------------------------
384 // [Iterator]
385 // --------------------------------------------------------------------------
386
387 template<uint32_t Flags = 0>
388 BL_INLINE BLResult next(uint32_t& uc) noexcept {
389 size_t ucSizeInBytes;
390 return next<Flags>(uc, ucSizeInBytes);
391 }
392
393 template<uint32_t Flags = 0>
394 BL_INLINE BLResult next(uint32_t& uc, size_t& ucSizeInBytes) noexcept {
395 BL_ASSERT(hasNext());
396
397 uc = readU16<Flags>(_ptr);
398 _ptr += 2;
399
400 if (blIsSurrogate(uc)) {
401 if (BL_LIKELY(blIsHiSurrogate(uc))) {
402 if (BL_LIKELY(_ptr != _end)) {
403 uint32_t lo = readU16<Flags>(_ptr);
404 if (BL_LIKELY(blIsLoSurrogate(lo))) {
405 uc = blCharFromSurrogate(uc, lo);
406 _ptr += 2;
407
408 // Add two to `_utf8IndexAdd` as two surrogates count as 2, so we
409 // have to add 2 more to have UTF-8 length of a valid surrogate.
410 if (Flags & BL_UNICODE_IO_CALC_INDEX) _utf8IndexAdd += 2;
411 if (Flags & BL_UNICODE_IO_CALC_INDEX) _utf16SurrogateCount += 1;
412
413 ucSizeInBytes = 4;
414 return BL_SUCCESS;
415 }
416 else {
417 if (Flags & BL_UNICODE_IO_STRICT)
418 goto InvalidString;
419 }
420 }
421 else {
422 if (Flags & BL_UNICODE_IO_STRICT)
423 goto TruncatedString;
424 }
425 }
426 else {
427 if (Flags & BL_UNICODE_IO_STRICT)
428 goto InvalidString;
429 }
430 }
431
432 // Either not surrogate or fallback in non-strict mode.
433 if (Flags & BL_UNICODE_IO_CALC_INDEX)
434 _utf8IndexAdd += size_t(uc >= 0x0080u) + size_t(uc >= 0x0800u);
435
436 ucSizeInBytes = 2;
437 return BL_SUCCESS;
438
439InvalidString:
440 _ptr -= 2;
441 return blTraceError(BL_ERROR_INVALID_STRING);
442
443TruncatedString:
444 _ptr -= 2;
445 return blTraceError(BL_ERROR_DATA_TRUNCATED);
446 }
447
448 BL_INLINE void skipOneUnit() noexcept {
449 BL_ASSERT(hasNext());
450 _ptr += 2;
451 }
452
453 // --------------------------------------------------------------------------
454 // [Validator]
455 // --------------------------------------------------------------------------
456
457 template<uint32_t Flags = 0>
458 BL_INLINE BLResult validate() noexcept {
459 BLResult result = BL_SUCCESS;
460 while (hasNext()) {
461 uint32_t uc;
462 result = next<Flags>(uc);
463 if (result)
464 break;
465 }
466 return result;
467 }
468
469 // --------------------------------------------------------------------------
470 // [Utilities]
471 // --------------------------------------------------------------------------
472
473 template<uint32_t Flags = 0>
474 static BL_INLINE uint32_t readU16(const char* ptr) noexcept {
475 constexpr uint32_t kByteOrder = Flags & BL_UNICODE_IO_BYTE_SWAP ? BL_BYTE_ORDER_SWAPPED : BL_BYTE_ORDER_NATIVE;
476 constexpr uint32_t kAlignment = Flags & BL_UNICODE_IO_UNALIGNED ? 1 : 2;
477 return blMemReadU16<kByteOrder, kAlignment>(ptr);
478 }
479};
480
481// ============================================================================
482// [BLUtf32Reader]
483// ============================================================================
484
485//! UTF-32 reader.
486class BLUtf32Reader {
487public:
488 enum : uint32_t { kCharSize = 4 };
489
490 const char* _ptr;
491 const char* _end;
492
493 size_t _utf8IndexAdd;
494 size_t _utf16SurrogateCount;
495
496 BL_INLINE BLUtf32Reader(const void* data, size_t byteSize) noexcept {
497 reset(data, byteSize);
498 }
499
500 BL_INLINE void reset(const void* data, size_t byteSize) noexcept {
501 _ptr = static_cast<const char*>(data);
502 _end = static_cast<const char*>(data) + blAlignDown(byteSize, 4);
503 _utf8IndexAdd = 0;
504 _utf16SurrogateCount = 0;
505 }
506
507 BL_INLINE bool hasNext() const noexcept { return _ptr != _end; }
508 BL_INLINE size_t remainingByteSize() const noexcept { return (size_t)(_end - _ptr); }
509
510 BL_INLINE size_t byteIndex(const void* start) const noexcept { return (size_t)(_ptr - static_cast<const char*>(start)); }
511 BL_INLINE size_t utf8Index(const void* start) const noexcept { return utf32Index(start) + _utf16SurrogateCount + _utf8IndexAdd; }
512 BL_INLINE size_t utf16Index(const void* start) const noexcept { return utf32Index(start) + _utf16SurrogateCount; }
513 BL_INLINE size_t utf32Index(const void* start) const noexcept { return byteIndex(start) / 4u; }
514 BL_INLINE size_t nativeIndex(const void* start) const noexcept { return utf32Index(start); }
515
516 template<uint32_t Flags = 0>
517 BL_INLINE BLResult next(uint32_t& uc) noexcept {
518 size_t ucSizeInBytes;
519 return next<Flags>(uc, ucSizeInBytes);
520 }
521
522 template<uint32_t Flags = 0>
523 BL_INLINE BLResult next(uint32_t& uc, size_t& ucSizeInBytes) noexcept {
524 BL_ASSERT(hasNext());
525
526 uc = readU32<Flags>(_ptr);
527 if (BL_UNLIKELY(uc > BL_CHAR_MAX))
528 return blTraceError(BL_ERROR_INVALID_STRING);
529
530 if (Flags & BL_UNICODE_IO_STRICT) {
531 if (BL_UNLIKELY(blIsSurrogate(uc)))
532 return blTraceError(BL_ERROR_INVALID_STRING);
533 }
534
535 if (Flags & BL_UNICODE_IO_CALC_INDEX) _utf8IndexAdd += size_t(uc >= 0x800u) + size_t(uc >= 0x80u);
536 if (Flags & BL_UNICODE_IO_CALC_INDEX) _utf16SurrogateCount += size_t(uc >= 0x10000u);
537
538 _ptr += 4;
539 ucSizeInBytes = 4;
540 return BL_SUCCESS;
541 }
542
543 BL_INLINE void skipOneUnit() noexcept {
544 BL_ASSERT(hasNext());
545 _ptr += 4;
546 }
547
548 template<uint32_t Flags = 0>
549 BL_INLINE BLResult validate() noexcept {
550 BLResult result = BL_SUCCESS;
551 while (hasNext()) {
552 uint32_t uc;
553 result = next<Flags>(uc);
554 if (result)
555 break;
556 }
557 return result;
558 }
559
560 template<uint32_t Flags = 0>
561 static BL_INLINE uint32_t readU32(const char* ptr) noexcept {
562 constexpr uint32_t kByteOrder = Flags & BL_UNICODE_IO_BYTE_SWAP ? BL_BYTE_ORDER_SWAPPED : BL_BYTE_ORDER_NATIVE;
563 constexpr uint32_t kAlignment = Flags & BL_UNICODE_IO_UNALIGNED ? 1 : 4;
564 return blMemReadU32<kByteOrder, kAlignment>(ptr);
565 }
566};
567
568// ============================================================================
569// [BLUtf8Writer]
570// ============================================================================
571
572//! UTF8 writer.
573class BLUtf8Writer {
574public:
575 typedef char CharType;
576
577 char* _ptr;
578 char* _end;
579
580 BL_INLINE BLUtf8Writer(char* dst, size_t size) noexcept {
581 reset(dst, size);
582 }
583
584 BL_INLINE void reset(char* dst, size_t size) noexcept {
585 _ptr = dst;
586 _end = dst + size;
587 }
588
589 BL_INLINE size_t index(const char* start) const noexcept {
590 return (size_t)(_ptr - start);
591 }
592
593 BL_INLINE bool atEnd() const noexcept { return _ptr == _end; }
594 BL_INLINE size_t remainingSize() const noexcept { return (size_t)(_end - _ptr); }
595
596 BL_INLINE BLResult write(uint32_t uc) noexcept {
597 if (uc <= 0x7F)
598 return writeByte(uc);
599 else if (uc <= 0x7FFu)
600 return write2Bytes(uc);
601 else if (uc <= 0xFFFFu)
602 return write3Bytes(uc);
603 else
604 return write4Bytes(uc);
605 }
606
607 BL_INLINE BLResult writeUnsafe(uint32_t uc) noexcept {
608 if (uc <= 0x7F)
609 return writeByteUnsafe(uc);
610 else if (uc <= 0x7FFu)
611 return write2BytesUnsafe(uc);
612 else if (uc <= 0xFFFFu)
613 return write3BytesUnsafe(uc);
614 else
615 return write4BytesUnsafe(uc);
616 }
617
618 BL_INLINE BLResult writeByte(uint32_t uc) noexcept {
619 BL_ASSERT(uc <= 0x7Fu);
620 if (BL_UNLIKELY(atEnd()))
621 return blTraceError(BL_ERROR_NO_SPACE_LEFT);
622
623 _ptr[0] = char(uint8_t(uc));
624 _ptr++;
625 return BL_SUCCESS;
626 }
627
628 BL_INLINE BLResult writeByteUnsafe(uint32_t uc) noexcept {
629 BL_ASSERT(remainingSize() >= 1);
630 _ptr[0] = char(uint8_t(uc));
631 _ptr++;
632 return BL_SUCCESS;
633 }
634
635 BL_INLINE BLResult write2Bytes(uint32_t uc) noexcept {
636 BL_ASSERT(uc >= 0x80u && uc <= 0x7FFu);
637
638 _ptr += 2;
639 if (BL_UNLIKELY(_ptr > _end)) {
640 _ptr -= 2;
641 return blTraceError(BL_ERROR_NO_SPACE_LEFT);
642 }
643
644 _ptr[-2] = char(uint8_t(0xC0u | (uc >> 6)));
645 _ptr[-1] = char(uint8_t(0x80u | (uc & 63)));
646 return BL_SUCCESS;
647 }
648
649 BL_INLINE BLResult write2BytesUnsafe(uint32_t uc) noexcept {
650 BL_ASSERT(remainingSize() >= 2);
651 BL_ASSERT(uc >= 0x80u && uc <= 0x7FFu);
652
653 _ptr[0] = char(uint8_t(0xC0u | (uc >> 6)));
654 _ptr[1] = char(uint8_t(0x80u | (uc & 63)));
655
656 _ptr += 2;
657 return BL_SUCCESS;
658 }
659
660 BL_INLINE BLResult write3Bytes(uint32_t uc) noexcept {
661 BL_ASSERT(uc >= 0x800u && uc <= 0xFFFFu);
662
663 _ptr += 3;
664 if (BL_UNLIKELY(_ptr > _end)) {
665 _ptr -= 3;
666 return blTraceError(BL_ERROR_NO_SPACE_LEFT);
667 }
668
669 _ptr[-3] = char(uint8_t(0xE0u | ((uc >> 12) )));
670 _ptr[-2] = char(uint8_t(0x80u | ((uc >> 6) & 63)));
671 _ptr[-1] = char(uint8_t(0x80u | ((uc ) & 63)));
672 return BL_SUCCESS;
673 }
674
675 BL_INLINE BLResult write3BytesUnsafe(uint32_t uc) noexcept {
676 BL_ASSERT(remainingSize() >= 3);
677 BL_ASSERT(uc >= 0x800u && uc <= 0xFFFFu);
678
679 _ptr[0] = char(uint8_t(0xE0u | ((uc >> 12) )));
680 _ptr[1] = char(uint8_t(0x80u | ((uc >> 6) & 63)));
681 _ptr[2] = char(uint8_t(0x80u | ((uc ) & 63)));
682
683 _ptr += 3;
684 return BL_SUCCESS;
685 }
686
687 BL_INLINE BLResult write4Bytes(uint32_t uc) noexcept {
688 BL_ASSERT(uc >= 0x10000u && uc <= 0x10FFFFu);
689
690 _ptr += 4;
691 if (BL_UNLIKELY(_ptr > _end)) {
692 _ptr -= 4;
693 return blTraceError(BL_ERROR_NO_SPACE_LEFT);
694 }
695
696 _ptr[-4] = char(uint8_t(0xF0u | ((uc >> 18) )));
697 _ptr[-3] = char(uint8_t(0x80u | ((uc >> 12) & 63)));
698 _ptr[-2] = char(uint8_t(0x80u | ((uc >> 6) & 63)));
699 _ptr[-1] = char(uint8_t(0x80u | ((uc ) & 63)));
700 return BL_SUCCESS;
701 }
702
703 BL_INLINE BLResult write4BytesUnsafe(uint32_t uc) noexcept {
704 BL_ASSERT(remainingSize() >= 4);
705 BL_ASSERT(uc >= 0x10000u && uc <= 0x10FFFFu);
706
707 _ptr[0] = char(uint8_t(0xF0u | ((uc >> 18) )));
708 _ptr[1] = char(uint8_t(0x80u | ((uc >> 12) & 63)));
709 _ptr[2] = char(uint8_t(0x80u | ((uc >> 6) & 63)));
710 _ptr[3] = char(uint8_t(0x80u | ((uc ) & 63)));
711
712 _ptr += 4;
713 return BL_SUCCESS;
714 }
715};
716
717// ============================================================================
718// [BLUtf16Writer]
719// ============================================================================
720
721//! UTF16 writer that can be parametrized by `ByteOrder` and `Alignment`.
722template<uint32_t ByteOrder = BL_BYTE_ORDER_NATIVE, uint32_t Alignment = 2>
723class BLUtf16Writer {
724public:
725 typedef uint16_t CharType;
726
727 uint16_t* _ptr;
728 uint16_t* _end;
729
730 BL_INLINE BLUtf16Writer(uint16_t* dst, size_t size) noexcept {
731 reset(dst, size);
732 }
733
734 BL_INLINE void reset(uint16_t* dst, size_t size) noexcept {
735 _ptr = dst;
736 _end = dst + size;
737 }
738
739 BL_INLINE size_t index(const uint16_t* start) const noexcept {
740 return (size_t)(_ptr - start);
741 }
742
743 BL_INLINE bool atEnd() const noexcept { return _ptr == _end; }
744 BL_INLINE size_t remainingSize() const noexcept { return (size_t)(_end - _ptr); }
745
746 BL_INLINE BLResult write(uint32_t uc) noexcept {
747 if (uc <= 0xFFFFu)
748 return writeBMP(uc);
749 else
750 return writeSMP(uc);
751 }
752
753 BL_INLINE BLResult writeBMP(uint32_t uc) noexcept {
754 BL_ASSERT(uc <= 0xFFFFu);
755
756 if (BL_UNLIKELY(atEnd()))
757 return blTraceError(BL_ERROR_NO_SPACE_LEFT);
758
759 _blMemWriteU16(_ptr, uc);
760 _ptr++;
761 return BL_SUCCESS;
762 }
763
764 BL_INLINE BLResult writeBMPUnsafe(uint32_t uc) noexcept {
765 BL_ASSERT(remainingSize() >= 1);
766
767 _blMemWriteU16(_ptr, uc);
768 _ptr++;
769 return BL_SUCCESS;
770 }
771
772 BL_INLINE BLResult writeSMP(uint32_t uc) noexcept {
773 BL_ASSERT(uc >= 0x10000u && uc <= 0x10FFFFu);
774
775 _ptr += 2;
776 if (BL_UNLIKELY(_ptr > _end)) {
777 _ptr -= 2;
778 return blTraceError(BL_ERROR_NO_SPACE_LEFT);
779 }
780
781 uint32_t hi, lo;
782 blCharToSurrogate(uc, hi, lo);
783
784 _blMemWriteU16(_ptr - 2, hi);
785 _blMemWriteU16(_ptr - 1, lo);
786 return BL_SUCCESS;
787 }
788
789 BL_INLINE BLResult writeSMPUnsafe(uint32_t uc) noexcept {
790 BL_ASSERT(remainingSize() >= 2);
791 BL_ASSERT(uc >= 0x10000u && uc <= 0x10FFFFu);
792
793 uint32_t hi, lo;
794 blCharToSurrogate(uc, hi, lo);
795
796 _blMemWriteU16(_ptr + 0, hi);
797 _blMemWriteU16(_ptr + 1, lo);
798
799 _ptr += 2;
800 return BL_SUCCESS;
801 }
802
803 // --------------------------------------------------------------------------
804 // [Utilities]
805 // --------------------------------------------------------------------------
806
807 static BL_INLINE void _blMemWriteU16(void* dst, uint32_t value) noexcept {
808 blMemWriteU16<ByteOrder, Alignment>(dst, value);
809 }
810};
811
812// ============================================================================
813// [BLUtf32Writer]
814// ============================================================================
815
816//! UTF32 writer that can be parametrized by `ByteOrder` and `Alignment`.
817template<uint32_t ByteOrder = BL_BYTE_ORDER_NATIVE, uint32_t Alignment = 4>
818class BLUtf32Writer {
819public:
820 typedef uint32_t CharType;
821
822 uint32_t* _ptr;
823 uint32_t* _end;
824
825 BL_INLINE BLUtf32Writer(uint32_t* dst, size_t size) noexcept {
826 reset(dst, size);
827 }
828
829 BL_INLINE void reset(uint32_t* dst, size_t size) noexcept {
830 _ptr = dst;
831 _end = dst + size;
832 }
833
834 BL_INLINE size_t index(const uint32_t* start) const noexcept {
835 return (size_t)(_ptr - start);
836 }
837
838 BL_INLINE bool atEnd() const noexcept { return _ptr == _end; }
839 BL_INLINE size_t remainingSize() const noexcept { return (size_t)(_end - _ptr); }
840
841 BL_INLINE BLResult write(uint32_t uc) noexcept {
842 if (BL_UNLIKELY(atEnd()))
843 return blTraceError(BL_ERROR_NO_SPACE_LEFT);
844
845 _blMemWriteU32(_ptr, uc);
846 _ptr++;
847 return BL_SUCCESS;
848 }
849
850 static BL_INLINE void _blMemWriteU32(void* dst, uint32_t value) noexcept {
851 blMemWriteU32<ByteOrder, Alignment>(dst, value);
852 }
853};
854
855//! \}
856//! \endcond
857
858#endif // BLEND2D_BLUNICODE_P_H
859