esctrn.cpp source code [ClickHouse/contrib/icu/icu4c/source/i18n/esctrn.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (c) 2001-2011, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	* Date Name Description
9	* 11/19/2001 aliu Creation.
10	**********************************************************************
11	*/
12
13	#include "unicode/utypes.h"
14
15	#if !UCONFIG_NO_TRANSLITERATION
16
17	#include "unicode/utf16.h"
18	#include "esctrn.h"
19	#include "util.h"
20
21	U_NAMESPACE_BEGIN
22
23	static const UChar UNIPRE[] = {`85`,`43`,`0`}; // "U+"
24	static const UChar BS_u[] = {`92`,`117`,`0`}; // "\\u"
25	static const UChar BS_U[] = {`92`,`85`,`0`}; // "\\U"
26	static const UChar XMLPRE[] = {`38`,`35`,`120`,`0`}; // "&#x"
27	static const UChar XML10PRE[] = {`38`,`35`,`0`}; // "&#"
28	static const UChar PERLPRE[] = {`92`,`120`,`123`,`0`}; // "\\x{"
29	static const UChar SEMI[] = {`59`,`0`}; // ";"
30	static const UChar RBRACE[] = {`125`,`0`}; // "}"
31
32	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator)
33
34	/**
35	* Factory methods
36	*/
37	static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /context/) {
38	// Unicode: "U+10FFFF" hex, min=4, max=6
39	return new EscapeTransliterator (ID, UnicodeString (TRUE, UNIPRE, `2`), UnicodeString (), `16`, `4`, TRUE, NULL);
40	}
41	static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /context/) {
42	// Java: "\\uFFFF" hex, min=4, max=4
43	return new EscapeTransliterator (ID, UnicodeString (TRUE, BS_u, `2`), UnicodeString (), `16`, `4`, FALSE, NULL);
44	}
45	static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /context/) {
46	// C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
47	return new EscapeTransliterator (ID, UnicodeString (TRUE, BS_u, `2`), UnicodeString (), `16`, `4`, TRUE,
48	new EscapeTransliterator (UnicodeString (), UnicodeString (TRUE, BS_U, `2`), UnicodeString (), `16`, `8`, TRUE, NULL));
49	}
50	static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /context/) {
51	// XML: "􏿿" hex, min=1, max=6
52	return new EscapeTransliterator (ID, UnicodeString (TRUE, XMLPRE, `3`), UnicodeString (SEMI[`0`]), `16`, `1`, TRUE, NULL);
53	}
54	static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /context/) {
55	// XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
56	return new EscapeTransliterator (ID, UnicodeString (TRUE, XML10PRE, `2`), UnicodeString (SEMI[`0`]), `10`, `1`, TRUE, NULL);
57	}
58	static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /context/) {
59	// Perl: "\\x{263A}" hex, min=1, max=6
60	return new EscapeTransliterator (ID, UnicodeString (TRUE, PERLPRE, `3`), UnicodeString (RBRACE[`0`]), `16`, `1`, TRUE, NULL);
61	}
62
63	/**
64	* Registers standard variants with the system. Called by
65	* Transliterator during initialization.
66	*/
67	void EscapeTransliterator::registerIDs() {
68	Token t = integerToken(`0`);
69
70	Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t);
71
72	Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t);
73
74	Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t);
75
76	Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t);
77
78	Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t);
79
80	Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t);
81
82	Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t);
83	}
84
85	/**
86	* Constructs an escape transliterator with the given ID and
87	* parameters. See the class member documentation for details.
88	*/
89	EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID,
90	const UnicodeString& _prefix, const UnicodeString& _suffix,
91	int32_t _radix, int32_t _minDigits,
92	UBool _grokSupplementals,
93	EscapeTransliterator* adoptedSupplementalHandler) :
94	Transliterator (newID, NULL)
95	{
96	this->prefix = _prefix;
97	this->suffix = _suffix;
98	this->radix = _radix;
99	this->minDigits = _minDigits;
100	this->grokSupplementals = _grokSupplementals;
101	this->supplementalHandler = adoptedSupplementalHandler;
102	}
103
104	/**
105	* Copy constructor.
106	*/
107	EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) :
108	Transliterator (o),
109	prefix (o.prefix),
110	suffix (o.suffix),
111	radix(o.radix),
112	minDigits(o.minDigits),
113	grokSupplementals(o.grokSupplementals) {
114	supplementalHandler = (o.supplementalHandler != `0`) ?
115	new EscapeTransliterator (*o.supplementalHandler) : NULL;
116	}
117
118	EscapeTransliterator::~EscapeTransliterator() {
119	delete supplementalHandler;
120	}
121
122	/**
123	* Transliterator API.
124	*/
125	EscapeTransliterator* EscapeTransliterator::clone() const {
126	return new EscapeTransliterator (*this);
127	}
128
129	/**
130	* Implements {@link Transliterator#handleTransliterate}.
131	*/
132	void EscapeTransliterator::handleTransliterate(Replaceable& text,
133	UTransPosition& pos,
134	UBool /isIncremental/) const
135	{
136	/ TODO: Verify that isIncremental can be ignored /
137	int32_t start = pos.start;
138	int32_t limit = pos.limit;
139
140	UnicodeString buf(prefix);
141	int32_t prefixLen = prefix.length();
142	UBool redoPrefix = FALSE;
143
144	while (start < limit) {
145	int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start);
146	int32_t charLen = grokSupplementals ? U16_LENGTH(c) : `1`;
147
148	if ((c & `0xFFFF0000`) != `0` && supplementalHandler != NULL) {
149	buf.truncate(`0`);
150	buf.append(supplementalHandler->prefix);
151	ICU_Utility::appendNumber(buf, c, supplementalHandler->radix,
152	supplementalHandler->minDigits);
153	buf.append(supplementalHandler->suffix);
154	redoPrefix = TRUE;
155	} else {
156	if (redoPrefix) {
157	buf.truncate(`0`);
158	buf.append(prefix);
159	redoPrefix = FALSE;
160	} else {
161	buf.truncate(prefixLen);
162	}
163	ICU_Utility::appendNumber(buf, c, radix, minDigits);
164	buf.append(suffix);
165	}
166
167	text.handleReplaceBetween(start, start + charLen, buf);
168	start += buf.length();
169	limit += buf.length() - charLen;
170	}
171
172	pos.contextLimit += limit - pos.limit;
173	pos.limit = limit;
174	pos.start = start;
175	}
176
177	U_NAMESPACE_END
178
179	#endif /* #if !UCONFIG_NO_TRANSLITERATION */
180
181	//eof
182

Browse the source code of ClickHouse/contrib/icu/icu4c/source/i18n/esctrn.cpp