ucase.cpp source code [Godot/thirdparty/icu4c/common/ucase.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	*******************************************************************************
5	*
6	* Copyright (C) 2004-2014, International Business Machines
7	* Corporation and others. All Rights Reserved.
8	*
9	*******************************************************************************
10	* file name: ucase.cpp
11	* encoding: UTF-8
12	* tab size: 8 (not used)
13	* indentation:4
14	*
15	* created on: 2004aug30
16	* created by: Markus W. Scherer
17	*
18	* Low-level Unicode character/string case mapping code.
19	* Much code moved here (and modified) from uchar.c.
20	*/
21
22	#include "unicode/utypes.h"
23	#include "unicode/unistr.h"
24	#include "unicode/uset.h"
25	#include "unicode/utf16.h"
26	#include "cmemory.h"
27	#include "uassert.h"
28	#include "ucase.h"
29	#include "umutex.h"
30	#include "utrie2.h"
31
32	/ ucase_props_data.h is machine-generated by genprops/casepropsbuilder.cpp /
33	#define INCLUDED_FROM_UCASE_CPP
34	#include "ucase_props_data.h"
35
36	/ set of property starts for UnicodeSet ------------------------------------ /
37
38	static UBool U_CALLCONV
39	_enumPropertyStartsRange(const void context, UChar32 start, UChar32 /end/, uint32_t /value/*) {
40	/ add the start code point to the USet /
41	const USetAdder sa=(const* USetAdder *)context;
42	sa->add(sa->set, start);
43	return true;
44	}
45
46	U_CFUNC void U_EXPORT2
47	ucase_addPropertyStarts(const USetAdder sa, UErrorCode pErrorCode) {
48	if(U_FAILURE(*pErrorCode)) {
49	return;
50	}
51
52	/ add the start code point of each same-value range of the trie /
53	utrie2_enum(&ucase_props_singleton.trie, nullptr, _enumPropertyStartsRange, sa);
54
55	/ add code points with hardcoded properties, plus the ones following them /
56
57	/ (none right now, see comment below) /
58
59	/*
60	* Omit code points with hardcoded specialcasing properties
61	* because we do not build property UnicodeSets for them right now.
62	*/
63	}
64
65	/ data access primitives --------------------------------------------------- /
66
67	U_CAPI const struct UCaseProps * U_EXPORT2
68	ucase_getSingleton(int32_t pExceptionsLength, int32_t pUnfoldLength) {
69	*pExceptionsLength = UPRV_LENGTHOF(ucase_props_exceptions);
70	*pUnfoldLength = UPRV_LENGTHOF(ucase_props_unfold);
71	return &ucase_props_singleton;
72	}
73
74	U_CFUNC const UTrie2 * U_EXPORT2
75	ucase_getTrie() {
76	return &ucase_props_singleton.trie;
77	}
78
79	#define GET_EXCEPTIONS(csp, props) ((csp)->exceptions+((props)>>UCASE_EXC_SHIFT))
80
81	/ number of bits in an 8-bit integer value /
82	static const uint8_t flagsOffset[`256`]={
83	`0`, `1`, `1`, `2`, `1`, `2`, `2`, `3`, `1`, `2`, `2`, `3`, `2`, `3`, `3`, `4`,
84	`1`, `2`, `2`, `3`, `2`, `3`, `3`, `4`, `2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`,
85	`1`, `2`, `2`, `3`, `2`, `3`, `3`, `4`, `2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`,
86	`2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`,
87	`1`, `2`, `2`, `3`, `2`, `3`, `3`, `4`, `2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`,
88	`2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`,
89	`2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`,
90	`3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`, `4`, `5`, `5`, `6`, `5`, `6`, `6`, `7`,
91	`1`, `2`, `2`, `3`, `2`, `3`, `3`, `4`, `2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`,
92	`2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`,
93	`2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`,
94	`3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`, `4`, `5`, `5`, `6`, `5`, `6`, `6`, `7`,
95	`2`, `3`, `3`, `4`, `3`, `4`, `4`, `5`, `3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`,
96	`3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`, `4`, `5`, `5`, `6`, `5`, `6`, `6`, `7`,
97	`3`, `4`, `4`, `5`, `4`, `5`, `5`, `6`, `4`, `5`, `5`, `6`, `5`, `6`, `6`, `7`,
98	`4`, `5`, `5`, `6`, `5`, `6`, `6`, `7`, `5`, `6`, `6`, `7`, `6`, `7`, `7`, `8`
99	};
100
101	#define HAS_SLOT(flags, idx) ((flags)&(1<<(idx)))
102	#define SLOT_OFFSET(flags, idx) flagsOffset[(flags)&((1<<(idx))-1)]
103
104	/*
105	* Get the value of an optional-value slot where HAS_SLOT(excWord, idx).
106	*
107	* @param excWord (in) initial exceptions word
108	* @param idx (in) desired slot index
109	* @param pExc16 (in/out) const uint16_t * after excWord=*pExc16++;
110	* moved to the last uint16_t of the value, use +1 for beginning of next slot
111	* @param value (out) int32_t or uint32_t output if hasSlot, otherwise not modified
112	*/
113	#define GET_SLOT_VALUE(excWord, idx, pExc16, value) UPRV_BLOCK_MACRO_BEGIN { \
114	if(((excWord)&UCASE_EXC_DOUBLE_SLOTS)==0) { \
115	(pExc16)+=SLOT_OFFSET(excWord, idx); \
116	(value)=*pExc16; \
117	} else { \
118	(pExc16)+=2*SLOT_OFFSET(excWord, idx); \
119	(value)=*pExc16++; \
120	(value)=((value)<<16)\|*pExc16; \
121	} \
122	} UPRV_BLOCK_MACRO_END
123
124	/ simple case mappings ----------------------------------------------------- /
125
126	U_CAPI UChar32 U_EXPORT2
127	ucase_tolower(UChar32 c) {
128	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
129	if(!UCASE_HAS_EXCEPTION(props)) {
130	if(UCASE_IS_UPPER_OR_TITLE(props)) {
131	c+=UCASE_GET_DELTA(props);
132	}
133	} else {
134	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
135	uint16_t excWord=*pe++;
136	if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
137	int32_t delta;
138	GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
139	return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==`0` ? c+delta : c-delta;
140	}
141	if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
142	GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe, c);
143	}
144	}
145	return c;
146	}
147
148	U_CAPI UChar32 U_EXPORT2
149	ucase_toupper(UChar32 c) {
150	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
151	if(!UCASE_HAS_EXCEPTION(props)) {
152	if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
153	c+=UCASE_GET_DELTA(props);
154	}
155	} else {
156	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
157	uint16_t excWord=*pe++;
158	if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
159	int32_t delta;
160	GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
161	return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==`0` ? c+delta : c-delta;
162	}
163	if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
164	GET_SLOT_VALUE(excWord, UCASE_EXC_UPPER, pe, c);
165	}
166	}
167	return c;
168	}
169
170	U_CAPI UChar32 U_EXPORT2
171	ucase_totitle(UChar32 c) {
172	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
173	if(!UCASE_HAS_EXCEPTION(props)) {
174	if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
175	c+=UCASE_GET_DELTA(props);
176	}
177	} else {
178	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
179	uint16_t excWord=*pe++;
180	if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
181	int32_t delta;
182	GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
183	return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==`0` ? c+delta : c-delta;
184	}
185	int32_t idx;
186	if(HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
187	idx=UCASE_EXC_TITLE;
188	} else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
189	idx=UCASE_EXC_UPPER;
190	} else {
191	return c;
192	}
193	GET_SLOT_VALUE(excWord, idx, pe, c);
194	}
195	return c;
196	}
197
198	static const char16_t iDot[`2`] = { `0x69`, `0x307` };
199	static const char16_t jDot[`2`] = { `0x6a`, `0x307` };
200	static const char16_t iOgonekDot[`3`] = { `0x12f`, `0x307` };
201	static const char16_t iDotGrave[`3`] = { `0x69`, `0x307`, `0x300` };
202	static const char16_t iDotAcute[`3`] = { `0x69`, `0x307`, `0x301` };
203	static const char16_t iDotTilde[`3`] = { `0x69`, `0x307`, `0x303` };
204
205
206	U_CFUNC void U_EXPORT2
207	ucase_addCaseClosure(UChar32 c, const USetAdder *sa) {
208	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
209	if(!UCASE_HAS_EXCEPTION(props)) {
210	if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
211	/ add the one simple case mapping, no matter what type it is /
212	int32_t delta=UCASE_GET_DELTA(props);
213	if(delta!=`0`) {
214	sa->add(sa->set, c+delta);
215	}
216	}
217	} else {
218	/*
219	* c has exceptions, so there may be multiple simple and/or
220	* full case mappings. Add them all.
221	*/
222	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
223	uint16_t excWord=*pe++;
224	const uint16_t *pe0=pe;
225
226	// Hardcode the case closure of i and its relatives and ignore the
227	// data file data for these characters.
228	// The Turkic dotless i and dotted I with their case mapping conditions
229	// and case folding option make the related characters behave specially.
230	// This code matches their closure behavior to their case folding behavior.
231	if (excWord&UCASE_EXC_CONDITIONAL_FOLD) {
232	// These characters have Turkic case foldings. Hardcode their closure.
233	if (c == `0x49`) {
234	// Regular i and I are in one equivalence class.
235	sa->add(sa->set, `0x69`);
236	return;
237	} else if (c == `0x130`) {
238	// Dotted I is in a class with <0069 0307>
239	// (for canonical equivalence with <0049 0307>).
240	sa->addString(sa->set, iDot, `2`);
241	return;
242	}
243	} else if (c == `0x69`) {
244	sa->add(sa->set, `0x49`);
245	return;
246	} else if (c == `0x131`) {
247	// Dotless i is in a class by itself.
248	return;
249	}
250
251	/ add all simple case mappings /
252	for(int32_t idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
253	if(HAS_SLOT(excWord, idx)) {
254	pe=pe0;
255	UChar32 mapping;
256	GET_SLOT_VALUE(excWord, idx, pe, mapping);
257	sa->add(sa->set, mapping);
258	}
259	}
260	if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) {
261	pe=pe0;
262	int32_t delta;
263	GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
264	sa->add(sa->set, (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==`0` ? c+delta : c-delta);
265	}
266
267	/ get the closure string pointer & length /
268	const char16_t *closure;
269	int32_t closureLength;
270	if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
271	pe=pe0;
272	GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
273	closureLength&=UCASE_CLOSURE_MAX_LENGTH; / higher bits are reserved /
274	closure=(const char16_t )pe+`1`; /* behind this slot, unless there are full case mappings /
275	} else {
276	closureLength=`0`;
277	closure=nullptr;
278	}
279
280	/ add the full case folding /
281	if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
282	pe=pe0;
283	int32_t fullLength;
284	GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
285
286	/ start of full case mapping strings /
287	++pe;
288
289	fullLength&=`0xffff`; / bits 16 and higher are reserved /
290
291	/ skip the lowercase result string /
292	pe+=fullLength&UCASE_FULL_LOWER;
293	fullLength>>=`4`;
294
295	/ add the full case folding string /
296	int32_t length=fullLength&`0xf`;
297	if(length!=`0`) {
298	sa->addString(sa->set, (const char16_t *)pe, length);
299	pe+=length;
300	}
301
302	/ skip the uppercase and titlecase strings /
303	fullLength>>=`4`;
304	pe+=fullLength&`0xf`;
305	fullLength>>=`4`;
306	pe+=fullLength;
307
308	closure=(const char16_t )pe; /* behind full case mappings /
309	}
310
311	/ add each code point in the closure string /
312	for(int32_t idx=`0`; idx<closureLength;) {
313	UChar32 mapping;
314	U16_NEXT_UNSAFE(closure, idx, mapping);
315	sa->add(sa->set, mapping);
316	}
317	}
318	}
319
320	namespace {
321
322	/**
323	* Add the simple case closure mapping,
324	* except if there is not actually an scf relationship between the two characters.
325	* TODO: Unicode should probably add the corresponding scf mappings.
326	* See https://crbug.com/v8/13377 and Unicode-internal PAG issue #23.
327	* If & when those scf mappings are added, we should be able to remove all of these exceptions.
328	*/
329	void addOneSimpleCaseClosure(UChar32 c, UChar32 t, const USetAdder *sa) {
330	switch (c) {
331	case `0x0390`:
332	if (t == `0x1FD3`) { return; }
333	break;
334	case `0x03B0`:
335	if (t == `0x1FE3`) { return; }
336	break;
337	case `0x1FD3`:
338	if (t == `0x0390`) { return; }
339	break;
340	case `0x1FE3`:
341	if (t == `0x03B0`) { return; }
342	break;
343	case `0xFB05`:
344	if (t == `0xFB06`) { return; }
345	break;
346	case `0xFB06`:
347	if (t == `0xFB05`) { return; }
348	break;
349	default:
350	break;
351	}
352	sa->add(sa->set, t);
353	}
354
355	} // namespace
356
357	U_CFUNC void U_EXPORT2
358	ucase_addSimpleCaseClosure(UChar32 c, const USetAdder *sa) {
359	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
360	if(!UCASE_HAS_EXCEPTION(props)) {
361	if(UCASE_GET_TYPE(props)!=UCASE_NONE) {
362	/ add the one simple case mapping, no matter what type it is /
363	int32_t delta=UCASE_GET_DELTA(props);
364	if(delta!=`0`) {
365	sa->add(sa->set, c+delta);
366	}
367	}
368	} else {
369	// c has exceptions. Add the mappings relevant for scf=Simple_Case_Folding.
370	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
371	uint16_t excWord=*pe++;
372	const uint16_t *pe0=pe;
373
374	// Hardcode the case closure of i and its relatives and ignore the
375	// data file data for these characters, like in ucase_addCaseClosure().
376	if (excWord&UCASE_EXC_CONDITIONAL_FOLD) {
377	// These characters have Turkic case foldings. Hardcode their closure.
378	if (c == `0x49`) {
379	// Regular i and I are in one equivalence class.
380	sa->add(sa->set, `0x69`);
381	return;
382	} else if (c == `0x130`) {
383	// For scf=Simple_Case_Folding, dotted I is in a class by itself.
384	return;
385	}
386	} else if (c == `0x69`) {
387	sa->add(sa->set, `0x49`);
388	return;
389	} else if (c == `0x131`) {
390	// Dotless i is in a class by itself.
391	return;
392	}
393
394	// Add all simple case mappings.
395	for(int32_t idx=UCASE_EXC_LOWER; idx<=UCASE_EXC_TITLE; ++idx) {
396	if(HAS_SLOT(excWord, idx)) {
397	pe=pe0;
398	UChar32 mapping;
399	GET_SLOT_VALUE(excWord, idx, pe, mapping);
400	addOneSimpleCaseClosure(c, mapping, sa);
401	}
402	}
403	if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) {
404	pe=pe0;
405	int32_t delta;
406	GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
407	UChar32 mapping = (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==`0` ? c+delta : c-delta;
408	addOneSimpleCaseClosure(c, mapping, sa);
409	}
410
411	/ get the closure string pointer & length /
412	const char16_t *closure;
413	int32_t closureLength;
414	if(HAS_SLOT(excWord, UCASE_EXC_CLOSURE)) {
415	pe=pe0;
416	GET_SLOT_VALUE(excWord, UCASE_EXC_CLOSURE, pe, closureLength);
417	closureLength&=UCASE_CLOSURE_MAX_LENGTH; / higher bits are reserved /
418	closure=(const char16_t )pe+`1`; /* behind this slot, unless there are full case mappings /
419	} else {
420	closureLength=`0`;
421	closure=nullptr;
422	}
423
424	// Skip the full case mappings.
425	if(closureLength > `0` && HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
426	pe=pe0;
427	int32_t fullLength;
428	GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, fullLength);
429
430	/ start of full case mapping strings /
431	++pe;
432
433	fullLength&=`0xffff`; / bits 16 and higher are reserved /
434
435	// Skip all 4 full case mappings.
436	pe+=fullLength&UCASE_FULL_LOWER;
437	fullLength>>=`4`;
438	pe+=fullLength&`0xf`;
439	fullLength>>=`4`;
440	pe+=fullLength&`0xf`;
441	fullLength>>=`4`;
442	pe+=fullLength;
443
444	closure=(const char16_t )pe; /* behind full case mappings /
445	}
446
447	// Add each code point in the closure string whose scf maps back to c.
448	for(int32_t idx=`0`; idx<closureLength;) {
449	UChar32 mapping;
450	U16_NEXT_UNSAFE(closure, idx, mapping);
451	addOneSimpleCaseClosure(c, mapping, sa);
452	}
453	}
454	}
455
456	/*
457	* compare s, which has a length, with t, which has a maximum length or is NUL-terminated
458	* must be length>0 and max>0 and length<=max
459	*/
460	static inline int32_t
461	strcmpMax(const char16_t s, int32_t length, const* char16_t *t, int32_t max) {
462	int32_t c1, c2;
463
464	max-=length; / we require length<=max, so no need to decrement max in the loop /
465	do {
466	c1=*s++;
467	c2=*t++;
468	if(c2==`0`) {
469	return `1`; / reached the end of t but not of s /
470	}
471	c1-=c2;
472	if(c1!=`0`) {
473	return c1; / return difference result /
474	}
475	} while(--length>`0`);
476	/ ends with length==0 /
477
478	if(max==`0` \|\| *t==`0`) {
479	return `0`; / equal to length of both strings /
480	} else {
481	return -max; / return length difference /
482	}
483	}
484
485	U_CFUNC UBool U_EXPORT2
486	ucase_addStringCaseClosure(const char16_t s, int32_t length, const* USetAdder *sa) {
487	int32_t i, start, limit, result, unfoldRows, unfoldRowWidth, unfoldStringWidth;
488
489	if(ucase_props_singleton.unfold==nullptr \|\| s==nullptr) {
490	return false; / no reverse case folding data, or no string /
491	}
492	if(length<=`1`) {
493	/ the string is too short to find any match /
494	/*
495	* more precise would be:
496	* if(!u_strHasMoreChar32Than(s, length, 1))
497	* but this does not make much practical difference because
498	* a single supplementary code point would just not be found
499	*/
500	return false;
501	}
502
503	const uint16_t *unfold=ucase_props_singleton.unfold;
504	unfoldRows=unfold[UCASE_UNFOLD_ROWS];
505	unfoldRowWidth=unfold[UCASE_UNFOLD_ROW_WIDTH];
506	unfoldStringWidth=unfold[UCASE_UNFOLD_STRING_WIDTH];
507	unfold+=unfoldRowWidth;
508
509	if(length>unfoldStringWidth) {
510	/ the string is too long to find any match /
511	return false;
512	}
513
514	/ do a binary search for the string /
515	start=`0`;
516	limit=unfoldRows;
517	while(start<limit) {
518	i=(start+limit)/`2`;
519	const char16_t p=reinterpret_cast<const* char16_t >(unfold+(iunfoldRowWidth));
520	result=strcmpMax(s, length, p, unfoldStringWidth);
521
522	if(result==`0`) {
523	/ found the string: add each code point, and its case closure /
524	UChar32 c;
525
526	for(i=unfoldStringWidth; i<unfoldRowWidth && p[i]!=`0`;) {
527	U16_NEXT_UNSAFE(p, i, c);
528	sa->add(sa->set, c);
529	ucase_addCaseClosure(c, sa);
530	}
531	return true;
532	} else if(result<`0`) {
533	limit=i;
534	} else / result>0 / {
535	start=i+`1`;
536	}
537	}
538
539	return false; / string not found /
540	}
541
542	U_NAMESPACE_BEGIN
543
544	FullCaseFoldingIterator::FullCaseFoldingIterator()
545	: unfold(reinterpret_cast<const char16_t *>(ucase_props_singleton.unfold)),
546	unfoldRows(unfold[UCASE_UNFOLD_ROWS]),
547	unfoldRowWidth(unfold[UCASE_UNFOLD_ROW_WIDTH]),
548	unfoldStringWidth(unfold[UCASE_UNFOLD_STRING_WIDTH]),
549	currentRow(`0`),
550	rowCpIndex(unfoldStringWidth) {
551	unfold+=unfoldRowWidth;
552	}
553
554	UChar32
555	FullCaseFoldingIterator::next(UnicodeString &full) {
556	// Advance past the last-delivered code point.
557	const char16_t p=unfold+(currentRowunfoldRowWidth);
558	if(rowCpIndex>=unfoldRowWidth \|\| p[rowCpIndex]==`0`) {
559	++currentRow;
560	p+=unfoldRowWidth;
561	rowCpIndex=unfoldStringWidth;
562	}
563	if(currentRow>=unfoldRows) { return U_SENTINEL; }
564	// Set "full" to the NUL-terminated string in the first unfold column.
565	int32_t length=unfoldStringWidth;
566	while(length>`0` && p[length-`1`]==`0`) { --length; }
567	full.setTo(false, p, length);
568	// Return the code point.
569	UChar32 c;
570	U16_NEXT_UNSAFE(p, rowCpIndex, c);
571	return c;
572	}
573
574	namespace LatinCase {
575
576	const int8_t TO_LOWER_NORMAL[LIMIT] = {
577	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
578	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
579	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
580	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
581
582	`0`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
583	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `0`, `0`, `0`, `0`, `0`,
584	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
585	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
586
587	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
588	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
589	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
590	`0`, `0`, `0`, `0`, `0`, EXC, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
591
592	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`,
593	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `0`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, EXC,
594	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
595	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
596
597	`1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`,
598	`1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`,
599	`1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`,
600	EXC, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`,
601
602	`0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, EXC, `1`, `0`, `1`, `0`, `1`, `0`,
603	`1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`,
604	`1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`,
605	`1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, -`121`, `1`, `0`, `1`, `0`, `1`, `0`, EXC
606	};
607
608	const int8_t TO_LOWER_TR_LT[LIMIT] = {
609	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
610	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
611	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
612	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
613
614	`0`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, EXC, EXC, `32`, `32`, `32`, `32`, `32`,
615	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `0`, `0`, `0`, `0`, `0`,
616	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
617	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
618
619	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
620	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
621	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
622	`0`, `0`, `0`, `0`, `0`, EXC, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
623
624	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, EXC, EXC, `32`, `32`,
625	`32`, `32`, `32`, `32`, `32`, `32`, `32`, `0`, `32`, `32`, `32`, `32`, `32`, `32`, `32`, EXC,
626	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
627	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
628
629	`1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`,
630	`1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`,
631	`1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, EXC, `0`, `1`, `0`, `1`, `0`, EXC, `0`,
632	EXC, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`,
633
634	`0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, EXC, `1`, `0`, `1`, `0`, `1`, `0`,
635	`1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`,
636	`1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`,
637	`1`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, -`121`, `1`, `0`, `1`, `0`, `1`, `0`, EXC
638	};
639
640	const int8_t TO_UPPER_NORMAL[LIMIT] = {
641	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
642	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
643	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
644	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
645
646	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
647	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
648	`0`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`,
649	-`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, `0`, `0`, `0`, `0`, `0`,
650
651	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
652	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
653	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
654	`0`, `0`, `0`, `0`, `0`, EXC, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
655
656	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
657	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, EXC,
658	-`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`,
659	-`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, `0`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, `121`,
660
661	`0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`,
662	`0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`,
663	`0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`,
664	`0`, EXC, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`,
665
666	-`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, EXC, `0`, -`1`, `0`, -`1`, `0`, -`1`,
667	`0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`,
668	`0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`,
669	`0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, `0`, -`1`, `0`, -`1`, `0`, -`1`, EXC
670	};
671
672	const int8_t TO_UPPER_TR[LIMIT] = {
673	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
674	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
675	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
676	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
677
678	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
679	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
680	`0`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, EXC, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`,
681	-`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, `0`, `0`, `0`, `0`, `0`,
682
683	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
684	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
685	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
686	`0`, `0`, `0`, `0`, `0`, EXC, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
687
688	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
689	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, EXC,
690	-`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`,
691	-`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, `0`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, -`32`, `121`,
692
693	`0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`,
694	`0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`,
695	`0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`,
696	`0`, EXC, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`,
697
698	-`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, EXC, `0`, -`1`, `0`, -`1`, `0`, -`1`,
699	`0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`,
700	`0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`,
701	`0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, -`1`, `0`, `0`, -`1`, `0`, -`1`, `0`, -`1`, EXC
702	};
703
704	} // namespace LatinCase
705
706	U_NAMESPACE_END
707
708	/* @return UCASE_NONE, UCASE_LOWER, UCASE_UPPER, UCASE_TITLE /
709	U_CAPI int32_t U_EXPORT2
710	ucase_getType(UChar32 c) {
711	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
712	return UCASE_GET_TYPE(props);
713	}
714
715	/* @return same as ucase_getType() and set bit 2 if c is case-ignorable /
716	U_CAPI int32_t U_EXPORT2
717	ucase_getTypeOrIgnorable(UChar32 c) {
718	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
719	return UCASE_GET_TYPE_AND_IGNORABLE(props);
720	}
721
722	/* @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT /
723	static inline int32_t
724	getDotType(UChar32 c) {
725	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
726	if(!UCASE_HAS_EXCEPTION(props)) {
727	return props&UCASE_DOT_MASK;
728	} else {
729	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
730	return (*pe>>UCASE_EXC_DOT_SHIFT)&UCASE_DOT_MASK;
731	}
732	}
733
734	U_CAPI UBool U_EXPORT2
735	ucase_isSoftDotted(UChar32 c) {
736	return (UBool)(getDotType(c)==UCASE_SOFT_DOTTED);
737	}
738
739	U_CAPI UBool U_EXPORT2
740	ucase_isCaseSensitive(UChar32 c) {
741	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
742	if(!UCASE_HAS_EXCEPTION(props)) {
743	return (UBool)((props&UCASE_SENSITIVE)!=`0`);
744	} else {
745	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
746	return (UBool)((*pe&UCASE_EXC_SENSITIVE)!=`0`);
747	}
748	}
749
750	/ string casing ------------------------------------------------------------ /
751
752	/*
753	* These internal functions form the core of string case mappings.
754	* They map single code points to result code points or strings and take
755	* all necessary conditions (context, locale ID, options) into account.
756	*
757	* They do not iterate over the source or write to the destination
758	* so that the same functions are useful for non-standard string storage,
759	* such as in a Replaceable (for Transliterator) or UTF-8/32 strings etc.
760	* For the same reason, the "surrounding text" context is passed in as a
761	* UCaseContextIterator which does not make any assumptions about
762	* the underlying storage.
763	*
764	* This section contains helper functions that check for conditions
765	* in the input text surrounding the current code point
766	* according to SpecialCasing.txt.
767	*
768	* Each helper function gets the index
769	* - after the current code point if it looks at following text
770	* - before the current code point if it looks at preceding text
771	*
772	* Unicode 3.2 UAX 21 "Case Mappings" defines the conditions as follows:
773	*
774	* Final_Sigma
775	* C is preceded by a sequence consisting of
776	* a cased letter and a case-ignorable sequence,
777	* and C is not followed by a sequence consisting of
778	* an ignorable sequence and then a cased letter.
779	*
780	* More_Above
781	* C is followed by one or more characters of combining class 230 (ABOVE)
782	* in the combining character sequence.
783	*
784	* After_Soft_Dotted
785	* The last preceding character with combining class of zero before C
786	* was Soft_Dotted,
787	* and there is no intervening combining character class 230 (ABOVE).
788	*
789	* Before_Dot
790	* C is followed by combining dot above (U+0307).
791	* Any sequence of characters with a combining class that is neither 0 nor 230
792	* may intervene between the current character and the combining dot above.
793	*
794	* The erratum from 2002-10-31 adds the condition
795	*
796	* After_I
797	* The last preceding base character was an uppercase I, and there is no
798	* intervening combining character class 230 (ABOVE).
799	*
800	* (See Jitterbug 2344 and the comments on After_I below.)
801	*
802	* Helper definitions in Unicode 3.2 UAX 21:
803	*
804	* D1. A character C is defined to be cased
805	* if it meets any of the following criteria:
806	*
807	* - The general category of C is Titlecase Letter (Lt)
808	* - In [CoreProps], C has one of the properties Uppercase, or Lowercase
809	* - Given D = NFD(C), then it is not the case that:
810	* D = UCD_lower(D) = UCD_upper(D) = UCD_title(D)
811	* (This third criterion does not add any characters to the list
812	* for Unicode 3.2. Ignored.)
813	*
814	* D2. A character C is defined to be case-ignorable
815	* if it meets either of the following criteria:
816	*
817	* - The general category of C is
818	* Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or
819	* Letter Modifier (Lm), or Symbol Modifier (Sk)
820	* - C is one of the following characters
821	* U+0027 APOSTROPHE
822	* U+00AD SOFT HYPHEN (SHY)
823	* U+2019 RIGHT SINGLE QUOTATION MARK
824	* (the preferred character for apostrophe)
825	*
826	* D3. A case-ignorable sequence is a sequence of
827	* zero or more case-ignorable characters.
828	*/
829
830	#define is_d(c) ((c)=='d' \|\| (c)=='D')
831	#define is_e(c) ((c)=='e' \|\| (c)=='E')
832	#define is_i(c) ((c)=='i' \|\| (c)=='I')
833	#define is_l(c) ((c)=='l' \|\| (c)=='L')
834	#define is_r(c) ((c)=='r' \|\| (c)=='R')
835	#define is_t(c) ((c)=='t' \|\| (c)=='T')
836	#define is_u(c) ((c)=='u' \|\| (c)=='U')
837	#define is_y(c) ((c)=='y' \|\| (c)=='Y')
838	#define is_z(c) ((c)=='z' \|\| (c)=='Z')
839
840	/ separator? /
841	#define is_sep(c) ((c)=='_' \|\| (c)=='-' \|\| (c)==0)
842
843	/**
844	* Requires non-nullptr locale ID but otherwise does the equivalent of
845	* checking for language codes as if uloc_getLanguage() were called:
846	* Accepts both 2- and 3-letter codes and accepts case variants.
847	*/
848	U_CFUNC int32_t
849	ucase_getCaseLocale(const char *locale) {
850	/*
851	* This function used to use uloc_getLanguage(), but the current code
852	* removes the dependency of this low-level code on uloc implementation code
853	* and is faster because not the whole locale ID has to be
854	* examined and copied/transformed.
855	*
856	* Because this code does not want to depend on uloc, the caller must
857	* pass in a non-nullptr locale, i.e., may need to call uloc_getDefault().
858	*/
859	char c=*locale++;
860	// Fastpath for English "en" which is often used for default (=root locale) case mappings,
861	// and for Chinese "zh": Very common but no special case mapping behavior.
862	// Then check lowercase vs. uppercase to reduce the number of comparisons
863	// for other locales without special behavior.
864	if(c==`'e'`) {
865	/ el or ell? /
866	c=*locale++;
867	if(is_l(c)) {
868	c=*locale++;
869	if(is_l(c)) {
870	c=*locale;
871	}
872	if(is_sep(c)) {
873	return UCASE_LOC_GREEK;
874	}
875	}
876	// en, es, ... -> root
877	} else if(c==`'z'`) {
878	return UCASE_LOC_ROOT;
879	#if U_CHARSET_FAMILY==U_ASCII_FAMILY
880	} else if(c>=`'a'`) { // ASCII a-z = 0x61..0x7a, after A-Z
881	#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
882	} else if(c<=`'z'`) { // EBCDIC a-z = 0x81..0xa9 with two gaps, before A-Z
883	#else
884	# error Unknown charset family!
885	#endif
886	// lowercase c
887	if(c==`'t'`) {
888	/ tr or tur? /
889	c=*locale++;
890	if(is_u(c)) {
891	c=*locale++;
892	}
893	if(is_r(c)) {
894	c=*locale;
895	if(is_sep(c)) {
896	return UCASE_LOC_TURKISH;
897	}
898	}
899	} else if(c==`'a'`) {
900	/ az or aze? /
901	c=*locale++;
902	if(is_z(c)) {
903	c=*locale++;
904	if(is_e(c)) {
905	c=*locale;
906	}
907	if(is_sep(c)) {
908	return UCASE_LOC_TURKISH;
909	}
910	}
911	} else if(c==`'l'`) {
912	/ lt or lit? /
913	c=*locale++;
914	if(is_i(c)) {
915	c=*locale++;
916	}
917	if(is_t(c)) {
918	c=*locale;
919	if(is_sep(c)) {
920	return UCASE_LOC_LITHUANIAN;
921	}
922	}
923	} else if(c==`'n'`) {
924	/ nl or nld? /
925	c=*locale++;
926	if(is_l(c)) {
927	c=*locale++;
928	if(is_d(c)) {
929	c=*locale;
930	}
931	if(is_sep(c)) {
932	return UCASE_LOC_DUTCH;
933	}
934	}
935	} else if(c==`'h'`) {
936	/ hy or hye? not hyw /
937	c=*locale++;
938	if(is_y(c)) {
939	c=*locale++;
940	if(is_e(c)) {
941	c=*locale;
942	}
943	if(is_sep(c)) {
944	return UCASE_LOC_ARMENIAN;
945	}
946	}
947	}
948	} else {
949	// uppercase c
950	// Same code as for lowercase c but also check for 'E'.
951	if(c==`'T'`) {
952	/ tr or tur? /
953	c=*locale++;
954	if(is_u(c)) {
955	c=*locale++;
956	}
957	if(is_r(c)) {
958	c=*locale;
959	if(is_sep(c)) {
960	return UCASE_LOC_TURKISH;
961	}
962	}
963	} else if(c==`'A'`) {
964	/ az or aze? /
965	c=*locale++;
966	if(is_z(c)) {
967	c=*locale++;
968	if(is_e(c)) {
969	c=*locale;
970	}
971	if(is_sep(c)) {
972	return UCASE_LOC_TURKISH;
973	}
974	}
975	} else if(c==`'L'`) {
976	/ lt or lit? /
977	c=*locale++;
978	if(is_i(c)) {
979	c=*locale++;
980	}
981	if(is_t(c)) {
982	c=*locale;
983	if(is_sep(c)) {
984	return UCASE_LOC_LITHUANIAN;
985	}
986	}
987	} else if(c==`'E'`) {
988	/ el or ell? /
989	c=*locale++;
990	if(is_l(c)) {
991	c=*locale++;
992	if(is_l(c)) {
993	c=*locale;
994	}
995	if(is_sep(c)) {
996	return UCASE_LOC_GREEK;
997	}
998	}
999	} else if(c==`'N'`) {
1000	/ nl or nld? /
1001	c=*locale++;
1002	if(is_l(c)) {
1003	c=*locale++;
1004	if(is_d(c)) {
1005	c=*locale;
1006	}
1007	if(is_sep(c)) {
1008	return UCASE_LOC_DUTCH;
1009	}
1010	}
1011	} else if(c==`'H'`) {
1012	/ hy or hye? not hyw /
1013	c=*locale++;
1014	if(is_y(c)) {
1015	c=*locale++;
1016	if(is_e(c)) {
1017	c=*locale;
1018	}
1019	if(is_sep(c)) {
1020	return UCASE_LOC_ARMENIAN;
1021	}
1022	}
1023	}
1024	}
1025	return UCASE_LOC_ROOT;
1026	}
1027
1028	/*
1029	* Is followed by
1030	* {case-ignorable}* cased
1031	* ?
1032	* (dir determines looking forward/backward)
1033	* If a character is case-ignorable, it is skipped regardless of whether
1034	* it is also cased or not.
1035	*/
1036	static UBool
1037	isFollowedByCasedLetter(UCaseContextIterator iter, void* *context, int8_t dir) {
1038	UChar32 c;
1039
1040	if(iter==nullptr) {
1041	return false;
1042	}
1043
1044	for(/ dir!=0 sets direction /; (c=iter(context, dir))>=`0`; dir=`0`) {
1045	int32_t type=ucase_getTypeOrIgnorable(c);
1046	if(type&`4`) {
1047	/ case-ignorable, continue with the loop /
1048	} else if(type!=UCASE_NONE) {
1049	return true; / followed by cased letter /
1050	} else {
1051	return false; / uncased and not case-ignorable /
1052	}
1053	}
1054
1055	return false; / not followed by cased letter /
1056	}
1057
1058	/ Is preceded by Soft_Dotted character with no intervening cc=230 ? /
1059	static UBool
1060	isPrecededBySoftDotted(UCaseContextIterator iter, void* *context) {
1061	UChar32 c;
1062	int32_t dotType;
1063	int8_t dir;
1064
1065	if(iter==nullptr) {
1066	return false;
1067	}
1068
1069	for(dir=-`1`; (c=iter(context, dir))>=`0`; dir=`0`) {
1070	dotType=getDotType(c);
1071	if(dotType==UCASE_SOFT_DOTTED) {
1072	return true; / preceded by TYPE_i /
1073	} else if(dotType!=UCASE_OTHER_ACCENT) {
1074	return false; / preceded by different base character (not TYPE_i), or intervening cc==230 /
1075	}
1076	}
1077
1078	return false; / not preceded by TYPE_i /
1079	}
1080
1081	/*
1082	* See Jitterbug 2344:
1083	* The condition After_I for Turkic-lowercasing of U+0307 combining dot above
1084	* is checked in ICU 2.0, 2.1, 2.6 but was not in 2.2 & 2.4 because
1085	* we made those releases compatible with Unicode 3.2 which had not fixed
1086	* a related bug in SpecialCasing.txt.
1087	*
1088	* From the Jitterbug 2344 text:
1089	* ... this bug is listed as a Unicode erratum
1090	* from 2002-10-31 at http://www.unicode.org/uni2errata/UnicodeErrata.html
1091	* <quote>
1092	* There are two errors in SpecialCasing.txt.
1093	* 1. Missing semicolons on two lines. ... [irrelevant for ICU]
1094	* 2. An incorrect context definition. Correct as follows:
1095	* < 0307; ; 0307; 0307; tr After_Soft_Dotted; # COMBINING DOT ABOVE
1096	* < 0307; ; 0307; 0307; az After_Soft_Dotted; # COMBINING DOT ABOVE
1097	* ---
1098	* > 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
1099	* > 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
1100	* where the context After_I is defined as:
1101	* The last preceding base character was an uppercase I, and there is no
1102	* intervening combining character class 230 (ABOVE).
1103	* </quote>
1104	*
1105	* Note that SpecialCasing.txt even in Unicode 3.2 described the condition as:
1106	*
1107	* # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
1108	* # This matches the behavior of the canonically equivalent I-dot_above
1109	*
1110	* See also the description in this place in older versions of uchar.c (revision 1.100).
1111	*
1112	* Markus W. Scherer 2003-feb-15
1113	*/
1114
1115	/ Is preceded by base character 'I' with no intervening cc=230 ? /
1116	static UBool
1117	isPrecededBy_I(UCaseContextIterator iter, void* *context) {
1118	UChar32 c;
1119	int32_t dotType;
1120	int8_t dir;
1121
1122	if(iter==nullptr) {
1123	return false;
1124	}
1125
1126	for(dir=-`1`; (c=iter(context, dir))>=`0`; dir=`0`) {
1127	if(c==`0x49`) {
1128	return true; / preceded by I /
1129	}
1130	dotType=getDotType(c);
1131	if(dotType!=UCASE_OTHER_ACCENT) {
1132	return false; / preceded by different base character (not I), or intervening cc==230 /
1133	}
1134	}
1135
1136	return false; / not preceded by I /
1137	}
1138
1139	/ Is followed by one or more cc==230 ? /
1140	static UBool
1141	isFollowedByMoreAbove(UCaseContextIterator iter, void* *context) {
1142	UChar32 c;
1143	int32_t dotType;
1144	int8_t dir;
1145
1146	if(iter==nullptr) {
1147	return false;
1148	}
1149
1150	for(dir=`1`; (c=iter(context, dir))>=`0`; dir=`0`) {
1151	dotType=getDotType(c);
1152	if(dotType==UCASE_ABOVE) {
1153	return true; / at least one cc==230 following /
1154	} else if(dotType!=UCASE_OTHER_ACCENT) {
1155	return false; / next base character, no more cc==230 following /
1156	}
1157	}
1158
1159	return false; / no more cc==230 following /
1160	}
1161
1162	/ Is followed by a dot above (without cc==230 in between) ? /
1163	static UBool
1164	isFollowedByDotAbove(UCaseContextIterator iter, void* *context) {
1165	UChar32 c;
1166	int32_t dotType;
1167	int8_t dir;
1168
1169	if(iter==nullptr) {
1170	return false;
1171	}
1172
1173	for(dir=`1`; (c=iter(context, dir))>=`0`; dir=`0`) {
1174	if(c==`0x307`) {
1175	return true;
1176	}
1177	dotType=getDotType(c);
1178	if(dotType!=UCASE_OTHER_ACCENT) {
1179	return false; / next base character or cc==230 in between /
1180	}
1181	}
1182
1183	return false; / no dot above following /
1184	}
1185
1186	U_CAPI int32_t U_EXPORT2
1187	ucase_toFullLower(UChar32 c,
1188	UCaseContextIterator iter, void* *context,
1189	const char16_t **pString,
1190	int32_t loc) {
1191	// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
1192	U_ASSERT(c >= `0`);
1193	UChar32 result=c;
1194	// Reset the output pointer in case it was uninitialized.
1195	pString=nullptr*;
1196	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
1197	if(!UCASE_HAS_EXCEPTION(props)) {
1198	if(UCASE_IS_UPPER_OR_TITLE(props)) {
1199	result=c+UCASE_GET_DELTA(props);
1200	}
1201	} else {
1202	const uint16_t pe=GET_EXCEPTIONS(&ucase_props_singleton, props), pe2;
1203	uint16_t excWord=*pe++;
1204	int32_t full;
1205
1206	pe2=pe;
1207
1208	if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
1209	/ use hardcoded conditions and mappings /
1210
1211	/*
1212	* Test for conditional mappings first
1213	* (otherwise the unconditional default mappings are always taken),
1214	* then test for characters that have unconditional mappings in SpecialCasing.txt,
1215	* then get the UnicodeData.txt mappings.
1216	*/
1217	if( loc==UCASE_LOC_LITHUANIAN &&
1218	/ base characters, find accents above /
1219	(((c==`0x49` \|\| c==`0x4a` \|\| c==`0x12e`) &&
1220	isFollowedByMoreAbove(iter, context)) \|\|
1221	/ precomposed with accent above, no need to find one /
1222	(c==`0xcc` \|\| c==`0xcd` \|\| c==`0x128`))
1223	) {
1224	/*
1225	# Lithuanian
1226
1227	# Lithuanian retains the dot in a lowercase i when followed by accents.
1228
1229	# Introduce an explicit dot above when lowercasing capital I's and J's
1230	# whenever there are more accents above.
1231	# (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
1232
1233	0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
1234	004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
1235	012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
1236	00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
1237	00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
1238	0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
1239	*/
1240	switch(c) {
1241	case `0x49`: / LATIN CAPITAL LETTER I /
1242	*pString=iDot;
1243	return `2`;
1244	case `0x4a`: / LATIN CAPITAL LETTER J /
1245	*pString=jDot;
1246	return `2`;
1247	case `0x12e`: / LATIN CAPITAL LETTER I WITH OGONEK /
1248	*pString=iOgonekDot;
1249	return `2`;
1250	case `0xcc`: / LATIN CAPITAL LETTER I WITH GRAVE /
1251	*pString=iDotGrave;
1252	return `3`;
1253	case `0xcd`: / LATIN CAPITAL LETTER I WITH ACUTE /
1254	*pString=iDotAcute;
1255	return `3`;
1256	case `0x128`: / LATIN CAPITAL LETTER I WITH TILDE /
1257	*pString=iDotTilde;
1258	return `3`;
1259	default:
1260	return `0`; / will not occur /
1261	}
1262	/ # Turkish and Azeri /
1263	} else if(loc==UCASE_LOC_TURKISH && c==`0x130`) {
1264	/*
1265	# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
1266	# The following rules handle those cases.
1267
1268	0130; 0069; 0130; 0130; tr # LATIN CAPITAL LETTER I WITH DOT ABOVE
1269	0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
1270	*/
1271	return `0x69`;
1272	} else if(loc==UCASE_LOC_TURKISH && c==`0x307` && isPrecededBy_I(iter, context)) {
1273	/*
1274	# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
1275	# This matches the behavior of the canonically equivalent I-dot_above
1276
1277	0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
1278	0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
1279	*/
1280	return `0`; / remove the dot (continue without output) /
1281	} else if(loc==UCASE_LOC_TURKISH && c==`0x49` && !isFollowedByDotAbove(iter, context)) {
1282	/*
1283	# When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
1284
1285	0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
1286	0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
1287	*/
1288	return `0x131`;
1289	} else if(c==`0x130`) {
1290	/*
1291	# Preserve canonical equivalence for I with dot. Turkic is handled below.
1292
1293	0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1294	*/
1295	*pString=iDot;
1296	return `2`;
1297	} else if( c==`0x3a3` &&
1298	!isFollowedByCasedLetter(iter, context, `1`) &&
1299	isFollowedByCasedLetter(iter, context, -`1`) / -1=preceded /
1300	) {
1301	/ greek capital sigma maps depending on surrounding cased letters (see SpecialCasing.txt) /
1302	/*
1303	# Special case for final form of sigma
1304
1305	03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
1306	*/
1307	return `0x3c2`; / greek small final sigma /
1308	} else {
1309	/ no known conditional special case mapping, use a normal mapping /
1310	}
1311	} else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
1312	GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
1313	full&=UCASE_FULL_LOWER;
1314	if(full!=`0`) {
1315	/ set the output pointer to the lowercase mapping /
1316	pString=reinterpret_cast<const* char16_t *>(pe+`1`);
1317
1318	/ return the string length /
1319	return full;
1320	}
1321	}
1322
1323	if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
1324	int32_t delta;
1325	GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
1326	return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==`0` ? c+delta : c-delta;
1327	}
1328	if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
1329	GET_SLOT_VALUE(excWord, UCASE_EXC_LOWER, pe2, result);
1330	}
1331	}
1332
1333	return (result==c) ? ~result : result;
1334	}
1335
1336	/ internal /
1337	static int32_t
1338	toUpperOrTitle(UChar32 c,
1339	UCaseContextIterator iter, void* *context,
1340	const char16_t **pString,
1341	int32_t loc,
1342	UBool upperNotTitle) {
1343	// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
1344	U_ASSERT(c >= `0`);
1345	UChar32 result=c;
1346	// Reset the output pointer in case it was uninitialized.
1347	pString=nullptr*;
1348	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
1349	if(!UCASE_HAS_EXCEPTION(props)) {
1350	if(UCASE_GET_TYPE(props)==UCASE_LOWER) {
1351	result=c+UCASE_GET_DELTA(props);
1352	}
1353	} else {
1354	const uint16_t pe=GET_EXCEPTIONS(&ucase_props_singleton, props), pe2;
1355	uint16_t excWord=*pe++;
1356	int32_t full, idx;
1357
1358	pe2=pe;
1359
1360	if(excWord&UCASE_EXC_CONDITIONAL_SPECIAL) {
1361	/ use hardcoded conditions and mappings /
1362	if(loc==UCASE_LOC_TURKISH && c==`0x69`) {
1363	/*
1364	# Turkish and Azeri
1365
1366	# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
1367	# The following rules handle those cases.
1368
1369	# When uppercasing, i turns into a dotted capital I
1370
1371	0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
1372	0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
1373	*/
1374	return `0x130`;
1375	} else if(loc==UCASE_LOC_LITHUANIAN && c==`0x307` && isPrecededBySoftDotted(iter, context)) {
1376	/*
1377	# Lithuanian
1378
1379	# Lithuanian retains the dot in a lowercase i when followed by accents.
1380
1381	# Remove DOT ABOVE after "i" with upper or titlecase
1382
1383	0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
1384	*/
1385	return `0`; / remove the dot (continue without output) /
1386	} else if(c==`0x0587`) {
1387	// See ICU-13416:
1388	// և ligature ech-yiwn
1389	// uppercases to ԵՒ=ech+yiwn by default and in Western Armenian,
1390	// but to ԵՎ=ech+vew in Eastern Armenian.
1391	if(loc==UCASE_LOC_ARMENIAN) {
1392	*pString=upperNotTitle ? u"ԵՎ" : u"Եվ";
1393	} else {
1394	*pString=upperNotTitle ? u"ԵՒ" : u"Եւ";
1395	}
1396	return `2`;
1397	} else {
1398	/ no known conditional special case mapping, use a normal mapping /
1399	}
1400	} else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
1401	GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
1402
1403	/ start of full case mapping strings /
1404	++pe;
1405
1406	/ skip the lowercase and case-folding result strings /
1407	pe+=full&UCASE_FULL_LOWER;
1408	full>>=`4`;
1409	pe+=full&`0xf`;
1410	full>>=`4`;
1411
1412	if(upperNotTitle) {
1413	full&=`0xf`;
1414	} else {
1415	/ skip the uppercase result string /
1416	pe+=full&`0xf`;
1417	full=(full>>`4`)&`0xf`;
1418	}
1419
1420	if(full!=`0`) {
1421	/ set the output pointer to the result string /
1422	pString=reinterpret_cast<const* char16_t *>(pe);
1423
1424	/ return the string length /
1425	return full;
1426	}
1427	}
1428
1429	if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) {
1430	int32_t delta;
1431	GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
1432	return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==`0` ? c+delta : c-delta;
1433	}
1434	if(!upperNotTitle && HAS_SLOT(excWord, UCASE_EXC_TITLE)) {
1435	idx=UCASE_EXC_TITLE;
1436	} else if(HAS_SLOT(excWord, UCASE_EXC_UPPER)) {
1437	/ here, titlecase is same as uppercase /
1438	idx=UCASE_EXC_UPPER;
1439	} else {
1440	return ~c;
1441	}
1442	GET_SLOT_VALUE(excWord, idx, pe2, result);
1443	}
1444
1445	return (result==c) ? ~result : result;
1446	}
1447
1448	U_CAPI int32_t U_EXPORT2
1449	ucase_toFullUpper(UChar32 c,
1450	UCaseContextIterator iter, void* *context,
1451	const char16_t **pString,
1452	int32_t caseLocale) {
1453	return toUpperOrTitle(c, iter, context, pString, caseLocale, true);
1454	}
1455
1456	U_CAPI int32_t U_EXPORT2
1457	ucase_toFullTitle(UChar32 c,
1458	UCaseContextIterator iter, void* *context,
1459	const char16_t **pString,
1460	int32_t caseLocale) {
1461	return toUpperOrTitle(c, iter, context, pString, caseLocale, false);
1462	}
1463
1464	/ case folding ------------------------------------------------------------- /
1465
1466	/*
1467	* Case folding is similar to lowercasing.
1468	* The result may be a simple mapping, i.e., a single code point, or
1469	* a full mapping, i.e., a string.
1470	* If the case folding for a code point is the same as its simple (1:1) lowercase mapping,
1471	* then only the lowercase mapping is stored.
1472	*
1473	* Some special cases are hardcoded because their conditions cannot be
1474	* parsed and processed from CaseFolding.txt.
1475	*
1476	* Unicode 3.2 CaseFolding.txt specifies for its status field:
1477
1478	# C: common case folding, common mappings shared by both simple and full mappings.
1479	# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
1480	# S: simple case folding, mappings to single characters where different from F.
1481	# T: special case for uppercase I and dotted uppercase I
1482	# - For non-Turkic languages, this mapping is normally not used.
1483	# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
1484	#
1485	# Usage:
1486	# A. To do a simple case folding, use the mappings with status C + S.
1487	# B. To do a full case folding, use the mappings with status C + F.
1488	#
1489	# The mappings with status T can be used or omitted depending on the desired case-folding
1490	# behavior. (The default option is to exclude them.)
1491
1492	* Unicode 3.2 has 'T' mappings as follows:
1493
1494	0049; T; 0131; # LATIN CAPITAL LETTER I
1495	0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1496
1497	* while the default mappings for these code points are:
1498
1499	0049; C; 0069; # LATIN CAPITAL LETTER I
1500	0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
1501
1502	* U+0130 has no simple case folding (simple-case-folds to itself).
1503	*/
1504
1505	/ return the simple case folding mapping for c /
1506	U_CAPI UChar32 U_EXPORT2
1507	ucase_fold(UChar32 c, uint32_t options) {
1508	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
1509	if(!UCASE_HAS_EXCEPTION(props)) {
1510	if(UCASE_IS_UPPER_OR_TITLE(props)) {
1511	c+=UCASE_GET_DELTA(props);
1512	}
1513	} else {
1514	const uint16_t *pe=GET_EXCEPTIONS(&ucase_props_singleton, props);
1515	uint16_t excWord=*pe++;
1516	int32_t idx;
1517	if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
1518	/ special case folding mappings, hardcoded /
1519	if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
1520	/ default mappings /
1521	if(c==`0x49`) {
1522	/ 0049; C; 0069; # LATIN CAPITAL LETTER I /
1523	return `0x69`;
1524	} else if(c==`0x130`) {
1525	/ no simple case folding for U+0130 /
1526	return c;
1527	}
1528	} else {
1529	/ Turkic mappings /
1530	if(c==`0x49`) {
1531	/ 0049; T; 0131; # LATIN CAPITAL LETTER I /
1532	return `0x131`;
1533	} else if(c==`0x130`) {
1534	/ 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE /
1535	return `0x69`;
1536	}
1537	}
1538	}
1539	if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=`0`) {
1540	return c;
1541	}
1542	if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
1543	int32_t delta;
1544	GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta);
1545	return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==`0` ? c+delta : c-delta;
1546	}
1547	if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
1548	idx=UCASE_EXC_FOLD;
1549	} else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
1550	idx=UCASE_EXC_LOWER;
1551	} else {
1552	return c;
1553	}
1554	GET_SLOT_VALUE(excWord, idx, pe, c);
1555	}
1556	return c;
1557	}
1558
1559	/*
1560	* Issue for canonical caseless match (UAX #21):
1561	* Turkic casefolding (using "T" mappings in CaseFolding.txt) does not preserve
1562	* canonical equivalence, unlike default-option casefolding.
1563	* For example, I-grave and I + grave fold to strings that are not canonically
1564	* equivalent.
1565	* For more details, see the comment in unorm_compare() in unorm.cpp
1566	* and the intermediate prototype changes for Jitterbug 2021.
1567	* (For example, revision 1.104 of uchar.c and 1.4 of CaseFolding.txt.)
1568	*
1569	* This did not get fixed because it appears that it is not possible to fix
1570	* it for uppercase and lowercase characters (I-grave vs. i-grave)
1571	* together in a way that they still fold to common result strings.
1572	*/
1573
1574	U_CAPI int32_t U_EXPORT2
1575	ucase_toFullFolding(UChar32 c,
1576	const char16_t **pString,
1577	uint32_t options) {
1578	// The sign of the result has meaning, input must be non-negative so that it can be returned as is.
1579	U_ASSERT(c >= `0`);
1580	UChar32 result=c;
1581	// Reset the output pointer in case it was uninitialized.
1582	pString=nullptr*;
1583	uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c);
1584	if(!UCASE_HAS_EXCEPTION(props)) {
1585	if(UCASE_IS_UPPER_OR_TITLE(props)) {
1586	result=c+UCASE_GET_DELTA(props);
1587	}
1588	} else {
1589	const uint16_t pe=GET_EXCEPTIONS(&ucase_props_singleton, props), pe2;
1590	uint16_t excWord=*pe++;
1591	int32_t full, idx;
1592
1593	pe2=pe;
1594
1595	if(excWord&UCASE_EXC_CONDITIONAL_FOLD) {
1596	/ use hardcoded conditions and mappings /
1597	if((options&_FOLD_CASE_OPTIONS_MASK)==U_FOLD_CASE_DEFAULT) {
1598	/ default mappings /
1599	if(c==`0x49`) {
1600	/ 0049; C; 0069; # LATIN CAPITAL LETTER I /
1601	return `0x69`;
1602	} else if(c==`0x130`) {
1603	/ 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE /
1604	*pString=iDot;
1605	return `2`;
1606	}
1607	} else {
1608	/ Turkic mappings /
1609	if(c==`0x49`) {
1610	/ 0049; T; 0131; # LATIN CAPITAL LETTER I /
1611	return `0x131`;
1612	} else if(c==`0x130`) {
1613	/ 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE /
1614	return `0x69`;
1615	}
1616	}
1617	} else if(HAS_SLOT(excWord, UCASE_EXC_FULL_MAPPINGS)) {
1618	GET_SLOT_VALUE(excWord, UCASE_EXC_FULL_MAPPINGS, pe, full);
1619
1620	/ start of full case mapping strings /
1621	++pe;
1622
1623	/ skip the lowercase result string /
1624	pe+=full&UCASE_FULL_LOWER;
1625	full=(full>>`4`)&`0xf`;
1626
1627	if(full!=`0`) {
1628	/ set the output pointer to the result string /
1629	pString=reinterpret_cast<const* char16_t *>(pe);
1630
1631	/ return the string length /
1632	return full;
1633	}
1634	}
1635
1636	if((excWord&UCASE_EXC_NO_SIMPLE_CASE_FOLDING)!=`0`) {
1637	return ~c;
1638	}
1639	if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) {
1640	int32_t delta;
1641	GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta);
1642	return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==`0` ? c+delta : c-delta;
1643	}
1644	if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) {
1645	idx=UCASE_EXC_FOLD;
1646	} else if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) {
1647	idx=UCASE_EXC_LOWER;
1648	} else {
1649	return ~c;
1650	}
1651	GET_SLOT_VALUE(excWord, idx, pe2, result);
1652	}
1653
1654	return (result==c) ? ~result : result;
1655	}
1656
1657	/ case mapping properties API ---------------------------------------------- /
1658
1659	/ public API (see uchar.h) /
1660
1661	U_CAPI UBool U_EXPORT2
1662	u_isULowercase(UChar32 c) {
1663	return (UBool)(UCASE_LOWER==ucase_getType(c));
1664	}
1665
1666	U_CAPI UBool U_EXPORT2
1667	u_isUUppercase(UChar32 c) {
1668	return (UBool)(UCASE_UPPER==ucase_getType(c));
1669	}
1670
1671	/ Transforms the Unicode character to its lower case equivalent./
1672	U_CAPI UChar32 U_EXPORT2
1673	u_tolower(UChar32 c) {
1674	return ucase_tolower(c);
1675	}
1676
1677	/ Transforms the Unicode character to its upper case equivalent./
1678	U_CAPI UChar32 U_EXPORT2
1679	u_toupper(UChar32 c) {
1680	return ucase_toupper(c);
1681	}
1682
1683	/ Transforms the Unicode character to its title case equivalent./
1684	U_CAPI UChar32 U_EXPORT2
1685	u_totitle(UChar32 c) {
1686	return ucase_totitle(c);
1687	}
1688
1689	/ return the simple case folding mapping for c /
1690	U_CAPI UChar32 U_EXPORT2
1691	u_foldCase(UChar32 c, uint32_t options) {
1692	return ucase_fold(c, options);
1693	}
1694
1695	U_CFUNC int32_t U_EXPORT2
1696	ucase_hasBinaryProperty(UChar32 c, UProperty which) {
1697	/ case mapping properties /
1698	const char16_t *resultString;
1699	switch(which) {
1700	case UCHAR_LOWERCASE:
1701	return (UBool)(UCASE_LOWER==ucase_getType(c));
1702	case UCHAR_UPPERCASE:
1703	return (UBool)(UCASE_UPPER==ucase_getType(c));
1704	case UCHAR_SOFT_DOTTED:
1705	return ucase_isSoftDotted(c);
1706	case UCHAR_CASE_SENSITIVE:
1707	return ucase_isCaseSensitive(c);
1708	case UCHAR_CASED:
1709	return (UBool)(UCASE_NONE!=ucase_getType(c));
1710	case UCHAR_CASE_IGNORABLE:
1711	return (UBool)(ucase_getTypeOrIgnorable(c)>>`2`);
1712	/*
1713	* Note: The following Changes_When_Xyz are defined as testing whether
1714	* the NFD form of the input changes when Xyz-case-mapped.
1715	* However, this simpler implementation of these properties,
1716	* ignoring NFD, passes the tests.
1717	* The implementation needs to be changed if the tests start failing.
1718	* When that happens, optimizations should be used to work with the
1719	* per-single-code point ucase_toFullXyz() functions unless
1720	* the NFD form has more than one code point,
1721	* and the property starts set needs to be the union of the
1722	* start sets for normalization and case mappings.
1723	*/
1724	case UCHAR_CHANGES_WHEN_LOWERCASED:
1725	return (UBool)(ucase_toFullLower(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=`0`);
1726	case UCHAR_CHANGES_WHEN_UPPERCASED:
1727	return (UBool)(ucase_toFullUpper(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=`0`);
1728	case UCHAR_CHANGES_WHEN_TITLECASED:
1729	return (UBool)(ucase_toFullTitle(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=`0`);
1730	/ case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c /
1731	case UCHAR_CHANGES_WHEN_CASEMAPPED:
1732	return (UBool)(
1733	ucase_toFullLower(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=`0` \|\|
1734	ucase_toFullUpper(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=`0` \|\|
1735	ucase_toFullTitle(c, nullptr, nullptr, &resultString, UCASE_LOC_ROOT)>=`0`);
1736	default:
1737	return false;
1738	}
1739	}
1740

Browse the source code of Godot/thirdparty/icu4c/common/ucase.cpp