1 | // |
2 | // Unicode.cpp |
3 | // |
4 | // Library: Foundation |
5 | // Package: Text |
6 | // Module: Unicode |
7 | // |
8 | // Copyright (c) 2007, Applied Informatics Software Engineering GmbH. |
9 | // and Contributors. |
10 | // |
11 | // SPDX-License-Identifier: BSL-1.0 |
12 | // |
13 | |
14 | |
15 | #include "Poco/Unicode.h" |
16 | |
17 | |
18 | // |
19 | // PCRE Unicode character database (UCD) |
20 | // Taken from pcre_internal.h |
21 | // |
22 | |
23 | |
24 | typedef Poco::UInt8 pcre_uint8; |
25 | typedef Poco::UInt16 pcre_uint16; |
26 | typedef Poco::Int32 pcre_int32; |
27 | typedef Poco::UInt32 pcre_uint32; |
28 | |
29 | typedef struct { |
30 | pcre_uint8 script; /* ucp_Arabic, etc. */ |
31 | pcre_uint8 chartype; /* ucp_Cc, etc. (general categories) */ |
32 | pcre_uint8 gbprop; /* ucp_gbControl, etc. (grapheme break property) */ |
33 | pcre_uint8 caseset; /* offset to multichar other cases or zero */ |
34 | pcre_int32 other_case; /* offset to other case, or zero if none */ |
35 | } ucd_record; |
36 | |
37 | extern "C" const pcre_uint32 _pcre_ucd_caseless_sets[]; |
38 | extern "C" const ucd_record _pcre_ucd_records[]; |
39 | extern "C" const pcre_uint8 _pcre_ucd_stage1[]; |
40 | extern "C" const pcre_uint16 _pcre_ucd_stage2[]; |
41 | extern "C" const pcre_uint32 _pcre_ucp_gentype[]; |
42 | extern "C" const pcre_uint32 _pcre_ucp_gbtable[]; |
43 | |
44 | #define UCD_BLOCK_SIZE 128 |
45 | #define GET_UCD(ch) (_pcre_ucd_records + \ |
46 | _pcre_ucd_stage2[_pcre_ucd_stage1[(int)(ch) / UCD_BLOCK_SIZE] * \ |
47 | UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE]) |
48 | |
49 | #define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype |
50 | #define UCD_SCRIPT(ch) GET_UCD(ch)->script |
51 | #define UCD_CATEGORY(ch) _pcre_ucp_gentype[UCD_CHARTYPE(ch)] |
52 | #define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop |
53 | #define UCD_CASESET(ch) GET_UCD(ch)->caseset |
54 | #define UCD_OTHERCASE(ch) ((pcre_uint32)((int)ch + (int)(GET_UCD(ch)->other_case))) |
55 | |
56 | |
57 | namespace Poco { |
58 | |
59 | |
60 | void Unicode::properties(int ch, CharacterProperties& props) |
61 | { |
62 | if (ch > UCP_MAX_CODEPOINT) ch = 0; |
63 | const ucd_record* ucd = GET_UCD(ch); |
64 | props.category = static_cast<CharacterCategory>(_pcre_ucp_gentype[ucd->chartype]); |
65 | props.type = static_cast<CharacterType>(ucd->chartype); |
66 | props.script = static_cast<Script>(ucd->script); |
67 | } |
68 | |
69 | |
70 | int Unicode::toLower(int ch) |
71 | { |
72 | if (isUpper(ch)) |
73 | return static_cast<int>(UCD_OTHERCASE(static_cast<unsigned>(ch))); |
74 | else |
75 | return ch; |
76 | } |
77 | |
78 | |
79 | int Unicode::toUpper(int ch) |
80 | { |
81 | if (isLower(ch)) |
82 | return static_cast<int>(UCD_OTHERCASE(static_cast<unsigned>(ch))); |
83 | else |
84 | return ch; |
85 | } |
86 | |
87 | |
88 | } // namespace Poco |
89 | |