titletrn.cpp source code [ClickHouse/contrib/icu/icu4c/source/i18n/titletrn.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	**********************************************************************
5	* Copyright (C) 2001-2011, International Business Machines
6	* Corporation and others. All Rights Reserved.
7	**********************************************************************
8	* Date Name Description
9	* 05/24/01 aliu Creation.
10	**********************************************************************
11	*/
12
13	#include "unicode/utypes.h"
14
15	#if !UCONFIG_NO_TRANSLITERATION
16
17	#include "unicode/uchar.h"
18	#include "unicode/uniset.h"
19	#include "unicode/ustring.h"
20	#include "unicode/utf16.h"
21	#include "titletrn.h"
22	#include "umutex.h"
23	#include "ucase.h"
24	#include "cpputils.h"
25
26	U_NAMESPACE_BEGIN
27
28	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TitlecaseTransliterator)
29
30	TitlecaseTransliterator::TitlecaseTransliterator() :
31	CaseMapTransliterator (UNICODE_STRING("Any-Title", `9`), NULL)
32	{
33	// Need to look back 2 characters in the case of "can't"
34	setMaximumContextLength(`2`);
35	}
36
37	/**
38	* Destructor.
39	*/
40	TitlecaseTransliterator::~TitlecaseTransliterator() {
41	}
42
43	/**
44	* Copy constructor.
45	*/
46	TitlecaseTransliterator::TitlecaseTransliterator(const TitlecaseTransliterator& o) :
47	CaseMapTransliterator (o)
48	{
49	}
50
51	/**
52	* Assignment operator.
53	*/
54	/TitlecaseTransliterator& TitlecaseTransliterator::operator=(*
55	const TitlecaseTransliterator& o) {
56	CaseMapTransliterator::operator=(o);
57	return this;*
58	}/*
59
60	/**
61	* Transliterator API.
62	*/
63	TitlecaseTransliterator* TitlecaseTransliterator::clone() const {
64	return new TitlecaseTransliterator (*this);
65	}
66
67	/**
68	* Implements {@link Transliterator#handleTransliterate}.
69	*/
70	void TitlecaseTransliterator::handleTransliterate(
71	Replaceable& text, UTransPosition& offsets,
72	UBool isIncremental) const
73	{
74	// TODO reimplement, see ustrcase.c
75	// using a real word break iterator
76	// instead of just looking for a transition between cased and uncased characters
77	// call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap)
78	// needs to take isIncremental into account because case mappings are context-sensitive
79	// also detect when lowercasing function did not finish because of context
80
81	if (offsets.start >= offsets.limit) {
82	return;
83	}
84
85	// case type: >0 cased (UCASE_LOWER etc.) ==0 uncased <0 case-ignorable
86	int32_t type;
87
88	// Our mode; we are either converting letter toTitle or
89	// toLower.
90	UBool doTitle = TRUE;
91
92	// Determine if there is a preceding context of cased case-ignorable,*
93	// in which case we want to start in toLower mode. If the
94	// prior context is anything else (including empty) then start
95	// in toTitle mode.
96	UChar32 c;
97	int32_t start;
98	for (start = offsets.start - `1`; start >= offsets.contextStart; start -= U16_LENGTH(c)) {
99	c = text.char32At(start);
100	type=ucase_getTypeOrIgnorable(c);
101	if(type>`0`) { // cased
102	doTitle=FALSE;
103	break;
104	} else if(type==`0`) { // uncased but not ignorable
105	break;
106	}
107	// else (type<0) case-ignorable: continue
108	}
109
110	// Convert things after a cased character toLower; things
111	// after an uncased, non-case-ignorable character toTitle. Case-ignorable
112	// characters are copied directly and do not change the mode.
113	UCaseContext csc;
114	uprv_memset(&csc, `0`, sizeof(csc));
115	csc.p = &text;
116	csc.start = offsets.contextStart;
117	csc.limit = offsets.contextLimit;
118
119	UnicodeString tmp;
120	const UChar *s;
121	int32_t textPos, delta, result;
122
123	for(textPos=offsets.start; textPos<offsets.limit;) {
124	csc.cpStart=textPos;
125	c=text.char32At(textPos);
126	csc.cpLimit=textPos+=U16_LENGTH(c);
127
128	type=ucase_getTypeOrIgnorable(c);
129	if(type>=`0`) { // not case-ignorable
130	if(doTitle) {
131	result=ucase_toFullTitle(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT);
132	} else {
133	result=ucase_toFullLower(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT);
134	}
135	doTitle = (UBool)(type==`0`); // doTitle=isUncased
136
137	if(csc.b1 && isIncremental) {
138	// fMap() tried to look beyond the context limit
139	// wait for more input
140	offsets.start=csc.cpStart;
141	return;
142	}
143
144	if(result>=`0`) {
145	// replace the current code point with its full case mapping result
146	// see UCASE_MAX_STRING_LENGTH
147	if(result<=UCASE_MAX_STRING_LENGTH) {
148	// string s[result]
149	tmp.setTo(FALSE, s, result);
150	delta=result-U16_LENGTH(c);
151	} else {
152	// single code point
153	tmp.setTo(result);
154	delta=tmp.length()-U16_LENGTH(c);
155	}
156	text.handleReplaceBetween(csc.cpStart, textPos, tmp);
157	if(delta!=`0`) {
158	textPos+=delta;
159	csc.limit=offsets.contextLimit+=delta;
160	offsets.limit+=delta;
161	}
162	}
163	}
164	}
165	offsets.start=textPos;
166	}
167
168	U_NAMESPACE_END
169
170	#endif /* #if !UCONFIG_NO_TRANSLITERATION */
171

Browse the source code of ClickHouse/contrib/icu/icu4c/source/i18n/titletrn.cpp