1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ********************************************************************** |
5 | * Copyright (c) 2001-2011, International Business Machines |
6 | * Corporation and others. All Rights Reserved. |
7 | ********************************************************************** |
8 | * Date Name Description |
9 | * 11/19/2001 aliu Creation. |
10 | ********************************************************************** |
11 | */ |
12 | |
13 | #include "unicode/utypes.h" |
14 | |
15 | #if !UCONFIG_NO_TRANSLITERATION |
16 | |
17 | #include "unicode/utf16.h" |
18 | #include "esctrn.h" |
19 | #include "util.h" |
20 | |
21 | U_NAMESPACE_BEGIN |
22 | |
23 | static const UChar UNIPRE[] = {85,43,0}; // "U+" |
24 | static const UChar BS_u[] = {92,117,0}; // "\\u" |
25 | static const UChar BS_U[] = {92,85,0}; // "\\U" |
26 | static const UChar XMLPRE[] = {38,35,120,0}; // "&#x" |
27 | static const UChar XML10PRE[] = {38,35,0}; // "&#" |
28 | static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{" |
29 | static const UChar SEMI[] = {59,0}; // ";" |
30 | static const UChar RBRACE[] = {125,0}; // "}" |
31 | |
32 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator) |
33 | |
34 | /** |
35 | * Factory methods |
36 | */ |
37 | static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) { |
38 | // Unicode: "U+10FFFF" hex, min=4, max=6 |
39 | return new EscapeTransliterator(ID, UnicodeString(TRUE, UNIPRE, 2), UnicodeString(), 16, 4, TRUE, NULL); |
40 | } |
41 | static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) { |
42 | // Java: "\\uFFFF" hex, min=4, max=4 |
43 | return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, FALSE, NULL); |
44 | } |
45 | static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) { |
46 | // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8 |
47 | return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, TRUE, |
48 | new EscapeTransliterator(UnicodeString(), UnicodeString(TRUE, BS_U, 2), UnicodeString(), 16, 8, TRUE, NULL)); |
49 | } |
50 | static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) { |
51 | // XML: "" hex, min=1, max=6 |
52 | return new EscapeTransliterator(ID, UnicodeString(TRUE, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, TRUE, NULL); |
53 | } |
54 | static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) { |
55 | // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex") |
56 | return new EscapeTransliterator(ID, UnicodeString(TRUE, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, TRUE, NULL); |
57 | } |
58 | static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) { |
59 | // Perl: "\\x{263A}" hex, min=1, max=6 |
60 | return new EscapeTransliterator(ID, UnicodeString(TRUE, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, TRUE, NULL); |
61 | } |
62 | |
63 | /** |
64 | * Registers standard variants with the system. Called by |
65 | * Transliterator during initialization. |
66 | */ |
67 | void EscapeTransliterator::registerIDs() { |
68 | Token t = integerToken(0); |
69 | |
70 | Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode" ), _createEscUnicode, t); |
71 | |
72 | Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java" ), _createEscJava, t); |
73 | |
74 | Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C" ), _createEscC, t); |
75 | |
76 | Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML" ), _createEscXML, t); |
77 | |
78 | Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10" ), _createEscXML10, t); |
79 | |
80 | Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl" ), _createEscPerl, t); |
81 | |
82 | Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex" ), _createEscJava, t); |
83 | } |
84 | |
85 | /** |
86 | * Constructs an escape transliterator with the given ID and |
87 | * parameters. See the class member documentation for details. |
88 | */ |
89 | EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID, |
90 | const UnicodeString& _prefix, const UnicodeString& _suffix, |
91 | int32_t _radix, int32_t _minDigits, |
92 | UBool _grokSupplementals, |
93 | EscapeTransliterator* adoptedSupplementalHandler) : |
94 | Transliterator(newID, NULL) |
95 | { |
96 | this->prefix = _prefix; |
97 | this->suffix = _suffix; |
98 | this->radix = _radix; |
99 | this->minDigits = _minDigits; |
100 | this->grokSupplementals = _grokSupplementals; |
101 | this->supplementalHandler = adoptedSupplementalHandler; |
102 | } |
103 | |
104 | /** |
105 | * Copy constructor. |
106 | */ |
107 | EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) : |
108 | Transliterator(o), |
109 | prefix(o.prefix), |
110 | suffix(o.suffix), |
111 | radix(o.radix), |
112 | minDigits(o.minDigits), |
113 | grokSupplementals(o.grokSupplementals) { |
114 | supplementalHandler = (o.supplementalHandler != 0) ? |
115 | new EscapeTransliterator(*o.supplementalHandler) : NULL; |
116 | } |
117 | |
118 | EscapeTransliterator::~EscapeTransliterator() { |
119 | delete supplementalHandler; |
120 | } |
121 | |
122 | /** |
123 | * Transliterator API. |
124 | */ |
125 | EscapeTransliterator* EscapeTransliterator::clone() const { |
126 | return new EscapeTransliterator(*this); |
127 | } |
128 | |
129 | /** |
130 | * Implements {@link Transliterator#handleTransliterate}. |
131 | */ |
132 | void EscapeTransliterator::handleTransliterate(Replaceable& text, |
133 | UTransPosition& pos, |
134 | UBool /*isIncremental*/) const |
135 | { |
136 | /* TODO: Verify that isIncremental can be ignored */ |
137 | int32_t start = pos.start; |
138 | int32_t limit = pos.limit; |
139 | |
140 | UnicodeString buf(prefix); |
141 | int32_t prefixLen = prefix.length(); |
142 | UBool redoPrefix = FALSE; |
143 | |
144 | while (start < limit) { |
145 | int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start); |
146 | int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1; |
147 | |
148 | if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) { |
149 | buf.truncate(0); |
150 | buf.append(supplementalHandler->prefix); |
151 | ICU_Utility::appendNumber(buf, c, supplementalHandler->radix, |
152 | supplementalHandler->minDigits); |
153 | buf.append(supplementalHandler->suffix); |
154 | redoPrefix = TRUE; |
155 | } else { |
156 | if (redoPrefix) { |
157 | buf.truncate(0); |
158 | buf.append(prefix); |
159 | redoPrefix = FALSE; |
160 | } else { |
161 | buf.truncate(prefixLen); |
162 | } |
163 | ICU_Utility::appendNumber(buf, c, radix, minDigits); |
164 | buf.append(suffix); |
165 | } |
166 | |
167 | text.handleReplaceBetween(start, start + charLen, buf); |
168 | start += buf.length(); |
169 | limit += buf.length() - charLen; |
170 | } |
171 | |
172 | pos.contextLimit += limit - pos.limit; |
173 | pos.limit = limit; |
174 | pos.start = start; |
175 | } |
176 | |
177 | U_NAMESPACE_END |
178 | |
179 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |
180 | |
181 | //eof |
182 | |