| 1 | // | 
|---|
| 2 | // Unicode.cpp | 
|---|
| 3 | // | 
|---|
| 4 | // Library: Foundation | 
|---|
| 5 | // Package: Text | 
|---|
| 6 | // Module:  Unicode | 
|---|
| 7 | // | 
|---|
| 8 | // Copyright (c) 2007, Applied Informatics Software Engineering GmbH. | 
|---|
| 9 | // and Contributors. | 
|---|
| 10 | // | 
|---|
| 11 | // SPDX-License-Identifier:	BSL-1.0 | 
|---|
| 12 | // | 
|---|
| 13 |  | 
|---|
| 14 |  | 
|---|
| 15 | #include "Poco/Unicode.h" | 
|---|
| 16 |  | 
|---|
| 17 |  | 
|---|
| 18 | // | 
|---|
| 19 | // PCRE Unicode character database (UCD) | 
|---|
| 20 | // Taken from pcre_internal.h | 
|---|
| 21 | // | 
|---|
| 22 |  | 
|---|
| 23 |  | 
|---|
| 24 | typedef Poco::UInt8 pcre_uint8; | 
|---|
| 25 | typedef Poco::UInt16 pcre_uint16; | 
|---|
| 26 | typedef Poco::Int32 pcre_int32; | 
|---|
| 27 | typedef Poco::UInt32 pcre_uint32; | 
|---|
| 28 |  | 
|---|
| 29 | typedef struct { | 
|---|
| 30 | pcre_uint8 script;     /* ucp_Arabic, etc. */ | 
|---|
| 31 | pcre_uint8 chartype;   /* ucp_Cc, etc. (general categories) */ | 
|---|
| 32 | pcre_uint8 gbprop;     /* ucp_gbControl, etc. (grapheme break property) */ | 
|---|
| 33 | pcre_uint8 caseset;    /* offset to multichar other cases or zero */ | 
|---|
| 34 | pcre_int32 other_case; /* offset to other case, or zero if none */ | 
|---|
| 35 | } ucd_record; | 
|---|
| 36 |  | 
|---|
| 37 | extern "C"const pcre_uint32 _pcre_ucd_caseless_sets[]; | 
|---|
| 38 | extern "C"const ucd_record  _pcre_ucd_records[]; | 
|---|
| 39 | extern "C"const pcre_uint8  _pcre_ucd_stage1[]; | 
|---|
| 40 | extern "C"const pcre_uint16 _pcre_ucd_stage2[]; | 
|---|
| 41 | extern "C"const pcre_uint32 _pcre_ucp_gentype[]; | 
|---|
| 42 | extern "C"const pcre_uint32 _pcre_ucp_gbtable[]; | 
|---|
| 43 |  | 
|---|
| 44 | #define UCD_BLOCK_SIZE 128 | 
|---|
| 45 | #define GET_UCD(ch) (_pcre_ucd_records + \ | 
|---|
| 46 | _pcre_ucd_stage2[_pcre_ucd_stage1[(int)(ch) / UCD_BLOCK_SIZE] * \ | 
|---|
| 47 | UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE]) | 
|---|
| 48 |  | 
|---|
| 49 | #define UCD_CHARTYPE(ch)    GET_UCD(ch)->chartype | 
|---|
| 50 | #define UCD_SCRIPT(ch)      GET_UCD(ch)->script | 
|---|
| 51 | #define UCD_CATEGORY(ch)    _pcre_ucp_gentype[UCD_CHARTYPE(ch)] | 
|---|
| 52 | #define UCD_GRAPHBREAK(ch)  GET_UCD(ch)->gbprop | 
|---|
| 53 | #define UCD_CASESET(ch)     GET_UCD(ch)->caseset | 
|---|
| 54 | #define UCD_OTHERCASE(ch)   ((pcre_uint32)((int)ch + (int)(GET_UCD(ch)->other_case))) | 
|---|
| 55 |  | 
|---|
| 56 |  | 
|---|
| 57 | namespace Poco { | 
|---|
| 58 |  | 
|---|
| 59 |  | 
|---|
| 60 | void Unicode::properties(int ch, CharacterProperties& props) | 
|---|
| 61 | { | 
|---|
| 62 | if (ch > UCP_MAX_CODEPOINT) ch = 0; | 
|---|
| 63 | const ucd_record* ucd = GET_UCD(ch); | 
|---|
| 64 | props.category = static_cast<CharacterCategory>(_pcre_ucp_gentype[ucd->chartype]); | 
|---|
| 65 | props.type     = static_cast<CharacterType>(ucd->chartype); | 
|---|
| 66 | props.script   = static_cast<Script>(ucd->script); | 
|---|
| 67 | } | 
|---|
| 68 |  | 
|---|
| 69 |  | 
|---|
| 70 | int Unicode::toLower(int ch) | 
|---|
| 71 | { | 
|---|
| 72 | if (isUpper(ch)) | 
|---|
| 73 | return static_cast<int>(UCD_OTHERCASE(static_cast<unsigned>(ch))); | 
|---|
| 74 | else | 
|---|
| 75 | return ch; | 
|---|
| 76 | } | 
|---|
| 77 |  | 
|---|
| 78 |  | 
|---|
| 79 | int Unicode::toUpper(int ch) | 
|---|
| 80 | { | 
|---|
| 81 | if (isLower(ch)) | 
|---|
| 82 | return static_cast<int>(UCD_OTHERCASE(static_cast<unsigned>(ch))); | 
|---|
| 83 | else | 
|---|
| 84 | return ch; | 
|---|
| 85 | } | 
|---|
| 86 |  | 
|---|
| 87 |  | 
|---|
| 88 | } // namespace Poco | 
|---|
| 89 |  | 
|---|