1//
2// Unicode.cpp
3//
4// Library: Foundation
5// Package: Text
6// Module: Unicode
7//
8// Copyright (c) 2007, Applied Informatics Software Engineering GmbH.
9// and Contributors.
10//
11// SPDX-License-Identifier: BSL-1.0
12//
13
14
15#include "Poco/Unicode.h"
16
17
18//
19// PCRE Unicode character database (UCD)
20// Taken from pcre_internal.h
21//
22
23
24typedef Poco::UInt8 pcre_uint8;
25typedef Poco::UInt16 pcre_uint16;
26typedef Poco::Int32 pcre_int32;
27typedef Poco::UInt32 pcre_uint32;
28
29typedef struct {
30 pcre_uint8 script; /* ucp_Arabic, etc. */
31 pcre_uint8 chartype; /* ucp_Cc, etc. (general categories) */
32 pcre_uint8 gbprop; /* ucp_gbControl, etc. (grapheme break property) */
33 pcre_uint8 caseset; /* offset to multichar other cases or zero */
34 pcre_int32 other_case; /* offset to other case, or zero if none */
35} ucd_record;
36
37extern "C" const pcre_uint32 _pcre_ucd_caseless_sets[];
38extern "C" const ucd_record _pcre_ucd_records[];
39extern "C" const pcre_uint8 _pcre_ucd_stage1[];
40extern "C" const pcre_uint16 _pcre_ucd_stage2[];
41extern "C" const pcre_uint32 _pcre_ucp_gentype[];
42extern "C" const pcre_uint32 _pcre_ucp_gbtable[];
43
44#define UCD_BLOCK_SIZE 128
45#define GET_UCD(ch) (_pcre_ucd_records + \
46 _pcre_ucd_stage2[_pcre_ucd_stage1[(int)(ch) / UCD_BLOCK_SIZE] * \
47 UCD_BLOCK_SIZE + (int)(ch) % UCD_BLOCK_SIZE])
48
49#define UCD_CHARTYPE(ch) GET_UCD(ch)->chartype
50#define UCD_SCRIPT(ch) GET_UCD(ch)->script
51#define UCD_CATEGORY(ch) _pcre_ucp_gentype[UCD_CHARTYPE(ch)]
52#define UCD_GRAPHBREAK(ch) GET_UCD(ch)->gbprop
53#define UCD_CASESET(ch) GET_UCD(ch)->caseset
54#define UCD_OTHERCASE(ch) ((pcre_uint32)((int)ch + (int)(GET_UCD(ch)->other_case)))
55
56
57namespace Poco {
58
59
60void Unicode::properties(int ch, CharacterProperties& props)
61{
62 if (ch > UCP_MAX_CODEPOINT) ch = 0;
63 const ucd_record* ucd = GET_UCD(ch);
64 props.category = static_cast<CharacterCategory>(_pcre_ucp_gentype[ucd->chartype]);
65 props.type = static_cast<CharacterType>(ucd->chartype);
66 props.script = static_cast<Script>(ucd->script);
67}
68
69
70int Unicode::toLower(int ch)
71{
72 if (isUpper(ch))
73 return static_cast<int>(UCD_OTHERCASE(static_cast<unsigned>(ch)));
74 else
75 return ch;
76}
77
78
79int Unicode::toUpper(int ch)
80{
81 if (isLower(ch))
82 return static_cast<int>(UCD_OTHERCASE(static_cast<unsigned>(ch)));
83 else
84 return ch;
85}
86
87
88} // namespace Poco
89