| 1 | // |
| 2 | // Unicode.cpp |
| 3 | // |
| 4 | // Library: Foundation |
| 5 | // Package: Text |
| 6 | // Module: Unicode |
| 7 | // |
| 8 | // Copyright (c) 2007, Applied Informatics Software Engineering GmbH. |
| 9 | // and Contributors. |
| 10 | // |
| 11 | // SPDX-License-Identifier: BSL-1.0 |
| 12 | // |
| 13 | |
| 14 | |
| 15 | #include "Poco/Unicode.h" |
| 16 | |
| 17 | |
| 18 | // |
| 19 | // PCRE Unicode character database (UCD) |
| 20 | // Taken from pcre_internal.h |
| 21 | // |
| 22 | |
| 23 | |
| 24 | typedef Poco::UInt8 pcre_uint8; |
| 25 | typedef Poco::UInt16 pcre_uint16; |
| 26 | typedef Poco::Int32 pcre_int32; |
| 27 | typedef Poco::UInt32 pcre_uint32; |
| 28 | |
| 29 | typedef struct { |
| 30 | pcre_uint8 script; /* ucp_Arabic, etc. */ |
| 31 | pcre_uint8 chartype; /* ucp_Cc, etc. (general categories) */ |
| 32 | pcre_uint8 gbprop; /* ucp_gbControl, etc. (grapheme break property) */ |
| 33 | pcre_uint8 caseset; /* offset to multichar other cases or zero */ |
| 34 | pcre_int32 other_case; /* offset to other case, or zero if none */ |
| 35 | } ucd_record; |
| 36 | |
| 37 | extern "C" const pcre_uint32 _pcre_ucd_caseless_sets[]; |
| 38 | extern "C" const ucd_record _pcre_ucd_records[]; |
| 39 | extern "C" const pcre_uint8 _pcre_ucd_stage1[]; |
| 40 | extern "C" const pcre_uint16 _pcre_ucd_stage2[]; |
| 41 | extern "C" const pcre_uint32 _pcre_ucp_gentype[]; |
| 42 | extern "C" const pcre_uint32 _pcre_ucp_gbtable[]; |
| 43 | |
| 44 | #define UCD_BLOCK_SIZE 128 |
| 45 | #define GET_UCD(ch) (_pcre_ucd_records + \ |
| 46 | _pcre_ucd_stage2[_pcre_ucd_stage1[(int)(ch) / UCD_BLOCK_SIZE] * \ |
| 47 | UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE]) |
| 48 | |
| 49 | #define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype |
| 50 | #define UCD_SCRIPT(ch) GET_UCD(ch)->script |
| 51 | #define UCD_CATEGORY(ch) _pcre_ucp_gentype[UCD_CHARTYPE(ch)] |
| 52 | #define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop |
| 53 | #define UCD_CASESET(ch) GET_UCD(ch)->caseset |
| 54 | #define UCD_OTHERCASE(ch) ((pcre_uint32)((int)ch + (int)(GET_UCD(ch)->other_case))) |
| 55 | |
| 56 | |
| 57 | namespace Poco { |
| 58 | |
| 59 | |
| 60 | void Unicode::properties(int ch, CharacterProperties& props) |
| 61 | { |
| 62 | if (ch > UCP_MAX_CODEPOINT) ch = 0; |
| 63 | const ucd_record* ucd = GET_UCD(ch); |
| 64 | props.category = static_cast<CharacterCategory>(_pcre_ucp_gentype[ucd->chartype]); |
| 65 | props.type = static_cast<CharacterType>(ucd->chartype); |
| 66 | props.script = static_cast<Script>(ucd->script); |
| 67 | } |
| 68 | |
| 69 | |
| 70 | int Unicode::toLower(int ch) |
| 71 | { |
| 72 | if (isUpper(ch)) |
| 73 | return static_cast<int>(UCD_OTHERCASE(static_cast<unsigned>(ch))); |
| 74 | else |
| 75 | return ch; |
| 76 | } |
| 77 | |
| 78 | |
| 79 | int Unicode::toUpper(int ch) |
| 80 | { |
| 81 | if (isLower(ch)) |
| 82 | return static_cast<int>(UCD_OTHERCASE(static_cast<unsigned>(ch))); |
| 83 | else |
| 84 | return ch; |
| 85 | } |
| 86 | |
| 87 | |
| 88 | } // namespace Poco |
| 89 | |