ustrcase.cpp source code [Godot/thirdparty/icu4c/common/ustrcase.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	*******************************************************************************
5	*
6	* Copyright (C) 2001-2015, International Business Machines
7	* Corporation and others. All Rights Reserved.
8	*
9	*******************************************************************************
10	* file name: ustrcase.cpp
11	* encoding: UTF-8
12	* tab size: 8 (not used)
13	* indentation:4
14	*
15	* created on: 2002feb20
16	* created by: Markus W. Scherer
17	*
18	* Implementation file for string casing C API functions.
19	* Uses functions from uchar.c for basic functionality that requires access
20	* to the Unicode Character Database (uprops.dat).
21	*/
22
23	#include "unicode/utypes.h"
24	#include "unicode/brkiter.h"
25	#include "unicode/casemap.h"
26	#include "unicode/edits.h"
27	#include "unicode/stringoptions.h"
28	#include "unicode/ustring.h"
29	#include "unicode/ucasemap.h"
30	#include "unicode/ubrk.h"
31	#include "unicode/utf.h"
32	#include "unicode/utf16.h"
33	#include "cmemory.h"
34	#include "ucase.h"
35	#include "ucasemap_imp.h"
36	#include "ustr_imp.h"
37	#include "uassert.h"
38
39	/**
40	* Code point for COMBINING ACUTE ACCENT
41	* @internal
42	*/
43	#define ACUTE u'\u0301'
44
45	U_NAMESPACE_BEGIN
46
47	namespace {
48
49	int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
50	Edits *edits, UErrorCode &errorCode) {
51	if (U_SUCCESS(errorCode)) {
52	if (destIndex > destCapacity) {
53	errorCode = U_BUFFER_OVERFLOW_ERROR;
54	} else if (edits != nullptr) {
55	edits->copyErrorTo(errorCode);
56	}
57	}
58	return destIndex;
59	}
60
61	/ Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. /
62	inline int32_t
63	appendResult(char16_t *dest, int32_t destIndex, int32_t destCapacity,
64	int32_t result, const char16_t *s,
65	int32_t cpLength, uint32_t options, icu::Edits *edits) {
66	UChar32 c;
67	int32_t length;
68
69	/ decode the result /
70	if(result<`0`) {
71	/ (not) original code point /
72	if(edits!=nullptr) {
73	edits->addUnchanged(cpLength);
74	}
75	if(options & U_OMIT_UNCHANGED_TEXT) {
76	return destIndex;
77	}
78	c=~result;
79	if(destIndex<destCapacity && c<=`0xffff`) { // BMP slightly-fastpath
80	dest[destIndex++]=(char16_t)c;
81	return destIndex;
82	}
83	length=cpLength;
84	} else {
85	if(result<=UCASE_MAX_STRING_LENGTH) {
86	c=U_SENTINEL;
87	length=result;
88	} else if(destIndex<destCapacity && result<=`0xffff`) { // BMP slightly-fastpath
89	dest[destIndex++]=(char16_t)result;
90	if(edits!=nullptr) {
91	edits->addReplace(cpLength, `1`);
92	}
93	return destIndex;
94	} else {
95	c=result;
96	length=U16_LENGTH(c);
97	}
98	if(edits!=nullptr) {
99	edits->addReplace(cpLength, length);
100	}
101	}
102	if(length>(INT32_MAX-destIndex)) {
103	return -`1`; // integer overflow
104	}
105
106	if(destIndex<destCapacity) {
107	/ append the result /
108	if(c>=`0`) {
109	/ code point /
110	UBool isError=false;
111	U16_APPEND(dest, destIndex, destCapacity, c, isError);
112	if(isError) {
113	/ overflow, nothing written /
114	destIndex+=length;
115	}
116	} else {
117	/ string /
118	if((destIndex+length)<=destCapacity) {
119	while(length>`0`) {
120	dest[destIndex++]=*s++;
121	--length;
122	}
123	} else {
124	/ overflow /
125	destIndex+=length;
126	}
127	}
128	} else {
129	/ preflight /
130	destIndex+=length;
131	}
132	return destIndex;
133	}
134
135	inline int32_t
136	appendUChar(char16_t dest, int32_t destIndex, int32_t destCapacity, char16_t* c) {
137	if(destIndex<destCapacity) {
138	dest[destIndex]=c;
139	} else if(destIndex==INT32_MAX) {
140	return -`1`; // integer overflow
141	}
142	return destIndex+`1`;
143	}
144
145	int32_t
146	appendNonEmptyUnchanged(char16_t *dest, int32_t destIndex, int32_t destCapacity,
147	const char16_t s, int32_t length, uint32_t options, icu::Edits edits) {
148	if(edits!=nullptr) {
149	edits->addUnchanged(length);
150	}
151	if(options & U_OMIT_UNCHANGED_TEXT) {
152	return destIndex;
153	}
154	if(length>(INT32_MAX-destIndex)) {
155	return -`1`; // integer overflow
156	}
157	if((destIndex+length)<=destCapacity) {
158	u_memcpy(dest+destIndex, s, length);
159	}
160	return destIndex + length;
161	}
162
163	inline int32_t
164	appendUnchanged(char16_t *dest, int32_t destIndex, int32_t destCapacity,
165	const char16_t s, int32_t length, uint32_t options, icu::Edits edits) {
166	if (length <= `0`) {
167	return destIndex;
168	}
169	return appendNonEmptyUnchanged(dest, destIndex, destCapacity, s, length, options, edits);
170	}
171
172	UChar32 U_CALLCONV
173	utf16_caseContextIterator(void *context, int8_t dir) {
174	UCaseContext csc=(UCaseContext )context;
175	UChar32 c;
176
177	if(dir<`0`) {
178	/ reset for backward iteration /
179	csc->index=csc->cpStart;
180	csc->dir=dir;
181	} else if(dir>`0`) {
182	/ reset for forward iteration /
183	csc->index=csc->cpLimit;
184	csc->dir=dir;
185	} else {
186	/ continue current iteration direction /
187	dir=csc->dir;
188	}
189
190	if(dir<`0`) {
191	if(csc->start<csc->index) {
192	U16_PREV((const char16_t *)csc->p, csc->start, csc->index, c);
193	return c;
194	}
195	} else {
196	if(csc->index<csc->limit) {
197	U16_NEXT((const char16_t *)csc->p, csc->index, csc->limit, c);
198	return c;
199	}
200	}
201	return U_SENTINEL;
202	}
203
204	/**
205	* caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
206	* caseLocale < 0: Case-folds [srcStart..srcLimit[.
207	*/
208	int32_t toLower(int32_t caseLocale, uint32_t options,
209	char16_t *dest, int32_t destCapacity,
210	const char16_t src, UCaseContext csc, int32_t srcStart, int32_t srcLimit,
211	icu::Edits *edits, UErrorCode &errorCode) {
212	const int8_t *latinToLower;
213	if (caseLocale == UCASE_LOC_ROOT \|\|
214	(caseLocale >= `0` ?
215	!(caseLocale == UCASE_LOC_TURKISH \|\| caseLocale == UCASE_LOC_LITHUANIAN) :
216	(options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
217	latinToLower = LatinCase::TO_LOWER_NORMAL;
218	} else {
219	latinToLower = LatinCase::TO_LOWER_TR_LT;
220	}
221	const UTrie2 *trie = ucase_getTrie();
222	int32_t destIndex = `0`;
223	int32_t prev = srcStart;
224	int32_t srcIndex = srcStart;
225	for (;;) {
226	// fast path for simple cases
227	char16_t lead = `0`;
228	while (srcIndex < srcLimit) {
229	lead = src[srcIndex];
230	int32_t delta;
231	if (lead < LatinCase::LONG_S) {
232	int8_t d = latinToLower[lead];
233	if (d == LatinCase::EXC) { break; }
234	++srcIndex;
235	if (d == `0`) { continue; }
236	delta = d;
237	} else if (lead >= `0xd800`) {
238	break; // surrogate or higher
239	} else {
240	uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
241	if (UCASE_HAS_EXCEPTION(props)) { break; }
242	++srcIndex;
243	if (!UCASE_IS_UPPER_OR_TITLE(props) \|\| (delta = UCASE_GET_DELTA(props)) == `0`) {
244	continue;
245	}
246	}
247	lead += static_cast<char16_t>(delta);
248	destIndex = appendUnchanged(dest, destIndex, destCapacity,
249	src + prev, srcIndex - `1` - prev, options, edits);
250	if (destIndex >= `0`) {
251	destIndex = appendUChar(dest, destIndex, destCapacity, lead);
252	if (edits != nullptr) {
253	edits->addReplace(`1`, `1`);
254	}
255	}
256	if (destIndex < `0`) {
257	errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
258	return `0`;
259	}
260	prev = srcIndex;
261	}
262	if (srcIndex >= srcLimit) {
263	break;
264	}
265	// slow path
266	int32_t cpStart = srcIndex++;
267	char16_t trail;
268	UChar32 c;
269	if (U16_IS_LEAD(lead) && srcIndex < srcLimit && U16_IS_TRAIL(trail = src[srcIndex])) {
270	c = U16_GET_SUPPLEMENTARY(lead, trail);
271	++srcIndex;
272	} else {
273	c = lead;
274	}
275	const char16_t *s;
276	if (caseLocale >= `0`) {
277	csc->cpStart = cpStart;
278	csc->cpLimit = srcIndex;
279	c = ucase_toFullLower(c, utf16_caseContextIterator, csc, &s, caseLocale);
280	} else {
281	c = ucase_toFullFolding(c, &s, options);
282	}
283	if (c >= `0`) {
284	destIndex = appendUnchanged(dest, destIndex, destCapacity,
285	src + prev, cpStart - prev, options, edits);
286	if (destIndex >= `0`) {
287	destIndex = appendResult(dest, destIndex, destCapacity, c, s,
288	srcIndex - cpStart, options, edits);
289	}
290	if (destIndex < `0`) {
291	errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
292	return `0`;
293	}
294	prev = srcIndex;
295	}
296	}
297	destIndex = appendUnchanged(dest, destIndex, destCapacity,
298	src + prev, srcIndex - prev, options, edits);
299	if (destIndex < `0`) {
300	errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
301	return `0`;
302	}
303	return destIndex;
304	}
305
306	int32_t toUpper(int32_t caseLocale, uint32_t options,
307	char16_t *dest, int32_t destCapacity,
308	const char16_t src, UCaseContext csc, int32_t srcLength,
309	icu::Edits *edits, UErrorCode &errorCode) {
310	const int8_t *latinToUpper;
311	if (caseLocale == UCASE_LOC_TURKISH) {
312	latinToUpper = LatinCase::TO_UPPER_TR;
313	} else {
314	latinToUpper = LatinCase::TO_UPPER_NORMAL;
315	}
316	const UTrie2 *trie = ucase_getTrie();
317	int32_t destIndex = `0`;
318	int32_t prev = `0`;
319	int32_t srcIndex = `0`;
320	for (;;) {
321	// fast path for simple cases
322	char16_t lead = `0`;
323	while (srcIndex < srcLength) {
324	lead = src[srcIndex];
325	int32_t delta;
326	if (lead < LatinCase::LONG_S) {
327	int8_t d = latinToUpper[lead];
328	if (d == LatinCase::EXC) { break; }
329	++srcIndex;
330	if (d == `0`) { continue; }
331	delta = d;
332	} else if (lead >= `0xd800`) {
333	break; // surrogate or higher
334	} else {
335	uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
336	if (UCASE_HAS_EXCEPTION(props)) { break; }
337	++srcIndex;
338	if (UCASE_GET_TYPE(props) != UCASE_LOWER \|\| (delta = UCASE_GET_DELTA(props)) == `0`) {
339	continue;
340	}
341	}
342	lead += static_cast<char16_t>(delta);
343	destIndex = appendUnchanged(dest, destIndex, destCapacity,
344	src + prev, srcIndex - `1` - prev, options, edits);
345	if (destIndex >= `0`) {
346	destIndex = appendUChar(dest, destIndex, destCapacity, lead);
347	if (edits != nullptr) {
348	edits->addReplace(`1`, `1`);
349	}
350	}
351	if (destIndex < `0`) {
352	errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
353	return `0`;
354	}
355	prev = srcIndex;
356	}
357	if (srcIndex >= srcLength) {
358	break;
359	}
360	// slow path
361	int32_t cpStart;
362	csc->cpStart = cpStart = srcIndex++;
363	char16_t trail;
364	UChar32 c;
365	if (U16_IS_LEAD(lead) && srcIndex < srcLength && U16_IS_TRAIL(trail = src[srcIndex])) {
366	c = U16_GET_SUPPLEMENTARY(lead, trail);
367	++srcIndex;
368	} else {
369	c = lead;
370	}
371	csc->cpLimit = srcIndex;
372	const char16_t *s;
373	c = ucase_toFullUpper(c, utf16_caseContextIterator, csc, &s, caseLocale);
374	if (c >= `0`) {
375	destIndex = appendUnchanged(dest, destIndex, destCapacity,
376	src + prev, cpStart - prev, options, edits);
377	if (destIndex >= `0`) {
378	destIndex = appendResult(dest, destIndex, destCapacity, c, s,
379	srcIndex - cpStart, options, edits);
380	}
381	if (destIndex < `0`) {
382	errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
383	return `0`;
384	}
385	prev = srcIndex;
386	}
387	}
388	destIndex = appendUnchanged(dest, destIndex, destCapacity,
389	src + prev, srcIndex - prev, options, edits);
390	if (destIndex < `0`) {
391	errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
392	return `0`;
393	}
394	return destIndex;
395	}
396
397	} // namespace
398
399	U_NAMESPACE_END
400
401	U_NAMESPACE_USE
402
403	#if !UCONFIG_NO_BREAK_ITERATION
404
405	namespace {
406
407	/**
408	* Input: c is a letter I with or without acute accent.
409	* start is the index in src after c, and is less than segmentLimit.
410	* If a plain i/I is followed by a plain j/J,
411	* or an i/I with acute (precomposed or decomposed) is followed by a j/J with acute,
412	* then we output accordingly.
413	*
414	* @return the src index after the titlecased sequence, or the start index if no Dutch IJ
415	*/
416	int32_t maybeTitleDutchIJ(const char16_t *src, UChar32 c, int32_t start, int32_t segmentLimit,
417	char16_t *dest, int32_t &destIndex, int32_t destCapacity, uint32_t options,
418	icu::Edits *edits) {
419	U_ASSERT(start < segmentLimit);
420
421	int32_t index = start;
422	bool withAcute = false;
423
424	// If the conditions are met, then the following variables tell us what to output.
425	int32_t unchanged1 = `0`; // code units before the j, or the whole sequence (0..3)
426	bool doTitleJ = false; // true if the j needs to be titlecased
427	int32_t unchanged2 = `0`; // after the j (0 or 1)
428
429	// next character after the first letter
430	char16_t c2 = src[index++];
431
432	// Is the first letter an i/I with accent?
433	if (c == u`'I'`) {
434	if (c2 == ACUTE) {
435	withAcute = true;
436	unchanged1 = `1`;
437	if (index == segmentLimit) { return start; }
438	c2 = src[index++];
439	}
440	} else { // Í
441	withAcute = true;
442	}
443
444	// Is the next character a j/J?
445	if (c2 == u`'j'`) {
446	doTitleJ = true;
447	} else if (c2 == u`'J'`) {
448	++unchanged1;
449	} else {
450	return start;
451	}
452
453	// A plain i/I must be followed by a plain j/J.
454	// An i/I with acute must be followed by a j/J with acute.
455	if (withAcute) {
456	if (index == segmentLimit \|\| src[index++] != ACUTE) { return start; }
457	if (doTitleJ) {
458	unchanged2 = `1`;
459	} else {
460	++unchanged1;
461	}
462	}
463
464	// There must not be another combining mark.
465	if (index < segmentLimit) {
466	int32_t cp;
467	int32_t i = index;
468	U16_NEXT(src, i, segmentLimit, cp);
469	uint32_t typeMask = U_GET_GC_MASK(cp);
470	if ((typeMask & U_GC_M_MASK) != `0`) {
471	return start;
472	}
473	}
474
475	// Output the rest of the Dutch IJ.
476	destIndex = appendUnchanged(dest, destIndex, destCapacity, src + start, unchanged1, options, edits);
477	start += unchanged1;
478	if (doTitleJ) {
479	destIndex = appendUChar(dest, destIndex, destCapacity, u`'J'`);
480	if (edits != nullptr) {
481	edits->addReplace(`1`, `1`);
482	}
483	++start;
484	}
485	destIndex = appendUnchanged(dest, destIndex, destCapacity, src + start, unchanged2, options, edits);
486
487	U_ASSERT(start + unchanged2 == index);
488	return index;
489	}
490
491	} // namespace
492
493	U_CFUNC int32_t U_CALLCONV
494	ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter,
495	char16_t *dest, int32_t destCapacity,
496	const char16_t *src, int32_t srcLength,
497	icu::Edits *edits,
498	UErrorCode &errorCode) {
499	if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) {
500	return `0`;
501	}
502
503	/ set up local variables /
504	UCaseContext csc=UCASECONTEXT_INITIALIZER;
505	csc.p=(void *)src;
506	csc.limit=srcLength;
507	int32_t destIndex=`0`;
508	int32_t prev=`0`;
509	bool isFirstIndex=true;
510
511	/ titlecasing loop /
512	while(prev<srcLength) {
513	/ find next index where to titlecase /
514	int32_t index;
515	if(isFirstIndex) {
516	isFirstIndex=false;
517	index=iter->first();
518	} else {
519	index=iter->next();
520	}
521	if(index==UBRK_DONE \|\| index>srcLength) {
522	index=srcLength;
523	}
524
525	/*
526	* Segment [prev..index[ into 3 parts:
527	* a) skipped characters (copy as-is) [prev..titleStart[
528	* b) first letter (titlecase) [titleStart..titleLimit[
529	* c) subsequent characters (lowercase) [titleLimit..index[
530	*/
531	if(prev<index) {
532	// Find and copy skipped characters [prev..titleStart[
533	int32_t titleStart=prev;
534	int32_t titleLimit=prev;
535	UChar32 c;
536	U16_NEXT(src, titleLimit, index, c);
537	if ((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==`0`) {
538	// Adjust the titlecasing index to the next cased character,
539	// or to the next letter/number/symbol/private use.
540	// Stop with titleStart<titleLimit<=index
541	// if there is a character to be titlecased,
542	// or else stop with titleStart==titleLimit==index.
543	bool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != `0`;
544	while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) {
545	titleStart=titleLimit;
546	if(titleLimit==index) {
547	break;
548	}
549	U16_NEXT(src, titleLimit, index, c);
550	}
551	if (prev < titleStart) {
552	destIndex=appendUnchanged(dest, destIndex, destCapacity,
553	src+prev, titleStart-prev, options, edits);
554	if(destIndex<`0`) {
555	errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
556	return `0`;
557	}
558	}
559	}
560
561	if(titleStart<titleLimit) {
562	/ titlecase c which is from [titleStart..titleLimit[ /
563	csc.cpStart=titleStart;
564	csc.cpLimit=titleLimit;
565	const char16_t *s;
566	c=ucase_toFullTitle(c, utf16_caseContextIterator, &csc, &s, caseLocale);
567	destIndex=appendResult(dest, destIndex, destCapacity, c, s,
568	titleLimit-titleStart, options, edits);
569	if(destIndex<`0`) {
570	errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
571	return `0`;
572	}
573
574	/ Special case Dutch IJ titlecasing /
575	if (titleStart+`1` < index &&
576	caseLocale == UCASE_LOC_DUTCH) {
577	if (c < `0`) {
578	c = ~c;
579	}
580
581	if (c == u`'I'` \|\| c == u`'Í'`) {
582	titleLimit = maybeTitleDutchIJ(src, c, titleStart + `1`, index,
583	dest, destIndex, destCapacity, options,
584	edits);
585	}
586	}
587
588	/ lowercase [titleLimit..index[ /
589	if(titleLimit<index) {
590	if((options&U_TITLECASE_NO_LOWERCASE)==`0`) {
591	/ Normal operation: Lowercase the rest of the word. /
592	destIndex+=
593	toLower(
594	caseLocale, options,
595	(dest==nullptr) ? nullptr: dest+destIndex, destCapacity-destIndex,
596	src, &csc, titleLimit, index,
597	edits, errorCode);
598	if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
599	errorCode=U_ZERO_ERROR;
600	}
601	if(U_FAILURE(errorCode)) {
602	return destIndex;
603	}
604	} else {
605	/ Optionally just copy the rest of the word unchanged. /
606	destIndex=appendUnchanged(dest, destIndex, destCapacity,
607	src+titleLimit, index-titleLimit, options, edits);
608	if(destIndex<`0`) {
609	errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
610	return `0`;
611	}
612	}
613	}
614	}
615	}
616
617	prev=index;
618	}
619
620	return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
621	}
622
623	#endif // !UCONFIG_NO_BREAK_ITERATION
624
625	U_NAMESPACE_BEGIN
626	namespace GreekUpper {
627
628	// Data generated by prototype code, see
629	// https://icu.unicode.org/design/case/greek-upper
630	// TODO: Move this data into ucase.icu.
631	static const uint16_t data0370[] = {
632	// U+0370..03FF
633	`0x0370`,
634	`0x0370`,
635	`0x0372`,
636	`0x0372`,
637	`0`,
638	`0`,
639	`0x0376`,
640	`0x0376`,
641	`0`,
642	`0`,
643	`0x037A`,
644	`0x03FD`,
645	`0x03FE`,
646	`0x03FF`,
647	`0`,
648	`0x037F`,
649	`0`,
650	`0`,
651	`0`,
652	`0`,
653	`0`,
654	`0`,
655	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
656	`0`,
657	`0x0395` \| HAS_VOWEL \| HAS_ACCENT,
658	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
659	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
660	`0`,
661	`0x039F` \| HAS_VOWEL \| HAS_ACCENT,
662	`0`,
663	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT,
664	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
665	`0x0399` \| HAS_VOWEL \| HAS_ACCENT \| HAS_DIALYTIKA,
666	`0x0391` \| HAS_VOWEL,
667	`0x0392`,
668	`0x0393`,
669	`0x0394`,
670	`0x0395` \| HAS_VOWEL,
671	`0x0396`,
672	`0x0397` \| HAS_VOWEL,
673	`0x0398`,
674	`0x0399` \| HAS_VOWEL,
675	`0x039A`,
676	`0x039B`,
677	`0x039C`,
678	`0x039D`,
679	`0x039E`,
680	`0x039F` \| HAS_VOWEL,
681	`0x03A0`,
682	`0x03A1`,
683	`0`,
684	`0x03A3`,
685	`0x03A4`,
686	`0x03A5` \| HAS_VOWEL,
687	`0x03A6`,
688	`0x03A7`,
689	`0x03A8`,
690	`0x03A9` \| HAS_VOWEL,
691	`0x0399` \| HAS_VOWEL \| HAS_DIALYTIKA,
692	`0x03A5` \| HAS_VOWEL \| HAS_DIALYTIKA,
693	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
694	`0x0395` \| HAS_VOWEL \| HAS_ACCENT,
695	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
696	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
697	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT \| HAS_DIALYTIKA,
698	`0x0391` \| HAS_VOWEL,
699	`0x0392`,
700	`0x0393`,
701	`0x0394`,
702	`0x0395` \| HAS_VOWEL,
703	`0x0396`,
704	`0x0397` \| HAS_VOWEL,
705	`0x0398`,
706	`0x0399` \| HAS_VOWEL,
707	`0x039A`,
708	`0x039B`,
709	`0x039C`,
710	`0x039D`,
711	`0x039E`,
712	`0x039F` \| HAS_VOWEL,
713	`0x03A0`,
714	`0x03A1`,
715	`0x03A3`,
716	`0x03A3`,
717	`0x03A4`,
718	`0x03A5` \| HAS_VOWEL,
719	`0x03A6`,
720	`0x03A7`,
721	`0x03A8`,
722	`0x03A9` \| HAS_VOWEL,
723	`0x0399` \| HAS_VOWEL \| HAS_DIALYTIKA,
724	`0x03A5` \| HAS_VOWEL \| HAS_DIALYTIKA,
725	`0x039F` \| HAS_VOWEL \| HAS_ACCENT,
726	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT,
727	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
728	`0x03CF`,
729	`0x0392`,
730	`0x0398`,
731	`0x03D2`,
732	`0x03D2` \| HAS_ACCENT,
733	`0x03D2` \| HAS_DIALYTIKA,
734	`0x03A6`,
735	`0x03A0`,
736	`0x03CF`,
737	`0x03D8`,
738	`0x03D8`,
739	`0x03DA`,
740	`0x03DA`,
741	`0x03DC`,
742	`0x03DC`,
743	`0x03DE`,
744	`0x03DE`,
745	`0x03E0`,
746	`0x03E0`,
747	`0`,
748	`0`,
749	`0`,
750	`0`,
751	`0`,
752	`0`,
753	`0`,
754	`0`,
755	`0`,
756	`0`,
757	`0`,
758	`0`,
759	`0`,
760	`0`,
761	`0x039A`,
762	`0x03A1`,
763	`0x03F9`,
764	`0x037F`,
765	`0x03F4`,
766	`0x0395` \| HAS_VOWEL,
767	`0`,
768	`0x03F7`,
769	`0x03F7`,
770	`0x03F9`,
771	`0x03FA`,
772	`0x03FA`,
773	`0x03FC`,
774	`0x03FD`,
775	`0x03FE`,
776	`0x03FF`,
777	};
778
779	static const uint16_t data1F00[] = {
780	// U+1F00..1FFF
781	`0x0391` \| HAS_VOWEL,
782	`0x0391` \| HAS_VOWEL,
783	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
784	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
785	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
786	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
787	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
788	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
789	`0x0391` \| HAS_VOWEL,
790	`0x0391` \| HAS_VOWEL,
791	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
792	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
793	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
794	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
795	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
796	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
797	`0x0395` \| HAS_VOWEL,
798	`0x0395` \| HAS_VOWEL,
799	`0x0395` \| HAS_VOWEL \| HAS_ACCENT,
800	`0x0395` \| HAS_VOWEL \| HAS_ACCENT,
801	`0x0395` \| HAS_VOWEL \| HAS_ACCENT,
802	`0x0395` \| HAS_VOWEL \| HAS_ACCENT,
803	`0`,
804	`0`,
805	`0x0395` \| HAS_VOWEL,
806	`0x0395` \| HAS_VOWEL,
807	`0x0395` \| HAS_VOWEL \| HAS_ACCENT,
808	`0x0395` \| HAS_VOWEL \| HAS_ACCENT,
809	`0x0395` \| HAS_VOWEL \| HAS_ACCENT,
810	`0x0395` \| HAS_VOWEL \| HAS_ACCENT,
811	`0`,
812	`0`,
813	`0x0397` \| HAS_VOWEL,
814	`0x0397` \| HAS_VOWEL,
815	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
816	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
817	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
818	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
819	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
820	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
821	`0x0397` \| HAS_VOWEL,
822	`0x0397` \| HAS_VOWEL,
823	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
824	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
825	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
826	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
827	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
828	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
829	`0x0399` \| HAS_VOWEL,
830	`0x0399` \| HAS_VOWEL,
831	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
832	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
833	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
834	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
835	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
836	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
837	`0x0399` \| HAS_VOWEL,
838	`0x0399` \| HAS_VOWEL,
839	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
840	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
841	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
842	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
843	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
844	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
845	`0x039F` \| HAS_VOWEL,
846	`0x039F` \| HAS_VOWEL,
847	`0x039F` \| HAS_VOWEL \| HAS_ACCENT,
848	`0x039F` \| HAS_VOWEL \| HAS_ACCENT,
849	`0x039F` \| HAS_VOWEL \| HAS_ACCENT,
850	`0x039F` \| HAS_VOWEL \| HAS_ACCENT,
851	`0`,
852	`0`,
853	`0x039F` \| HAS_VOWEL,
854	`0x039F` \| HAS_VOWEL,
855	`0x039F` \| HAS_VOWEL \| HAS_ACCENT,
856	`0x039F` \| HAS_VOWEL \| HAS_ACCENT,
857	`0x039F` \| HAS_VOWEL \| HAS_ACCENT,
858	`0x039F` \| HAS_VOWEL \| HAS_ACCENT,
859	`0`,
860	`0`,
861	`0x03A5` \| HAS_VOWEL,
862	`0x03A5` \| HAS_VOWEL,
863	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT,
864	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT,
865	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT,
866	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT,
867	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT,
868	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT,
869	`0`,
870	`0x03A5` \| HAS_VOWEL,
871	`0`,
872	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT,
873	`0`,
874	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT,
875	`0`,
876	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT,
877	`0x03A9` \| HAS_VOWEL,
878	`0x03A9` \| HAS_VOWEL,
879	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
880	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
881	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
882	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
883	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
884	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
885	`0x03A9` \| HAS_VOWEL,
886	`0x03A9` \| HAS_VOWEL,
887	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
888	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
889	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
890	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
891	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
892	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
893	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
894	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
895	`0x0395` \| HAS_VOWEL \| HAS_ACCENT,
896	`0x0395` \| HAS_VOWEL \| HAS_ACCENT,
897	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
898	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
899	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
900	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
901	`0x039F` \| HAS_VOWEL \| HAS_ACCENT,
902	`0x039F` \| HAS_VOWEL \| HAS_ACCENT,
903	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT,
904	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT,
905	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
906	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
907	`0`,
908	`0`,
909	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
910	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
911	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
912	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
913	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
914	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
915	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
916	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
917	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
918	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
919	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
920	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
921	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
922	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
923	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
924	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
925	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
926	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
927	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
928	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
929	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
930	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
931	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
932	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
933	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
934	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
935	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
936	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
937	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
938	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
939	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
940	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
941	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
942	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
943	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
944	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
945	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
946	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
947	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
948	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
949	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
950	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
951	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
952	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
953	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
954	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
955	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
956	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
957	`0x0391` \| HAS_VOWEL,
958	`0x0391` \| HAS_VOWEL,
959	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
960	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
961	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
962	`0`,
963	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
964	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
965	`0x0391` \| HAS_VOWEL,
966	`0x0391` \| HAS_VOWEL,
967	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
968	`0x0391` \| HAS_VOWEL \| HAS_ACCENT,
969	`0x0391` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
970	`0`,
971	`0x0399` \| HAS_VOWEL,
972	`0`,
973	`0`,
974	`0`,
975	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
976	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
977	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
978	`0`,
979	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
980	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
981	`0x0395` \| HAS_VOWEL \| HAS_ACCENT,
982	`0x0395` \| HAS_VOWEL \| HAS_ACCENT,
983	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
984	`0x0397` \| HAS_VOWEL \| HAS_ACCENT,
985	`0x0397` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
986	`0`,
987	`0`,
988	`0`,
989	`0x0399` \| HAS_VOWEL,
990	`0x0399` \| HAS_VOWEL,
991	`0x0399` \| HAS_VOWEL \| HAS_ACCENT \| HAS_DIALYTIKA,
992	`0x0399` \| HAS_VOWEL \| HAS_ACCENT \| HAS_DIALYTIKA,
993	`0`,
994	`0`,
995	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
996	`0x0399` \| HAS_VOWEL \| HAS_ACCENT \| HAS_DIALYTIKA,
997	`0x0399` \| HAS_VOWEL,
998	`0x0399` \| HAS_VOWEL,
999	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
1000	`0x0399` \| HAS_VOWEL \| HAS_ACCENT,
1001	`0`,
1002	`0`,
1003	`0`,
1004	`0`,
1005	`0x03A5` \| HAS_VOWEL,
1006	`0x03A5` \| HAS_VOWEL,
1007	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT \| HAS_DIALYTIKA,
1008	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT \| HAS_DIALYTIKA,
1009	`0x03A1`,
1010	`0x03A1`,
1011	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT,
1012	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT \| HAS_DIALYTIKA,
1013	`0x03A5` \| HAS_VOWEL,
1014	`0x03A5` \| HAS_VOWEL,
1015	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT,
1016	`0x03A5` \| HAS_VOWEL \| HAS_ACCENT,
1017	`0x03A1`,
1018	`0`,
1019	`0`,
1020	`0`,
1021	`0`,
1022	`0`,
1023	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
1024	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
1025	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
1026	`0`,
1027	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
1028	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI \| HAS_ACCENT,
1029	`0x039F` \| HAS_VOWEL \| HAS_ACCENT,
1030	`0x039F` \| HAS_VOWEL \| HAS_ACCENT,
1031	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
1032	`0x03A9` \| HAS_VOWEL \| HAS_ACCENT,
1033	`0x03A9` \| HAS_VOWEL \| HAS_YPOGEGRAMMENI,
1034	`0`,
1035	`0`,
1036	`0`,
1037	};
1038
1039	// U+2126 Ohm sign
1040	static const uint16_t data2126 = `0x03A9` \| HAS_VOWEL;
1041
1042	uint32_t getLetterData(UChar32 c) {
1043	if (c < `0x370` \|\| `0x2126` < c \|\| (`0x3ff` < c && c < `0x1f00`)) {
1044	return `0`;
1045	} else if (c <= `0x3ff`) {
1046	return data0370[c - `0x370`];
1047	} else if (c <= `0x1fff`) {
1048	return data1F00[c - `0x1f00`];
1049	} else if (c == `0x2126`) {
1050	return data2126;
1051	} else {
1052	return `0`;
1053	}
1054	}
1055
1056	uint32_t getDiacriticData(UChar32 c) {
1057	switch (c) {
1058	case `0x0300`: // varia
1059	case `0x0301`: // tonos = oxia
1060	case `0x0342`: // perispomeni
1061	case `0x0302`: // circumflex can look like perispomeni
1062	case `0x0303`: // tilde can look like perispomeni
1063	case `0x0311`: // inverted breve can look like perispomeni
1064	return HAS_ACCENT;
1065	case `0x0308`: // dialytika = diaeresis
1066	return HAS_COMBINING_DIALYTIKA;
1067	case `0x0344`: // dialytika tonos
1068	return HAS_COMBINING_DIALYTIKA \| HAS_ACCENT;
1069	case `0x0345`: // ypogegrammeni = iota subscript
1070	return HAS_YPOGEGRAMMENI;
1071	case `0x0304`: // macron
1072	case `0x0306`: // breve
1073	case `0x0313`: // comma above
1074	case `0x0314`: // reversed comma above
1075	case `0x0343`: // koronis
1076	return HAS_OTHER_GREEK_DIACRITIC;
1077	default:
1078	return `0`;
1079	}
1080	}
1081
1082	UBool isFollowedByCasedLetter(const char16_t *s, int32_t i, int32_t length) {
1083	while (i < length) {
1084	UChar32 c;
1085	U16_NEXT(s, i, length, c);
1086	int32_t type = ucase_getTypeOrIgnorable(c);
1087	if ((type & UCASE_IGNORABLE) != `0`) {
1088	// Case-ignorable, continue with the loop.
1089	} else if (type != UCASE_NONE) {
1090	return true; // Followed by cased letter.
1091	} else {
1092	return false; // Uncased and not case-ignorable.
1093	}
1094	}
1095	return false; // Not followed by cased letter.
1096	}
1097
1098	/**
1099	* Greek string uppercasing with a state machine.
1100	* Probably simpler than a stateless function that has to figure out complex context-before
1101	* for each character.
1102	* TODO: Try to re-consolidate one way or another with the non-Greek function.
1103	*/
1104	int32_t toUpper(uint32_t options,
1105	char16_t *dest, int32_t destCapacity,
1106	const char16_t *src, int32_t srcLength,
1107	Edits *edits,
1108	UErrorCode &errorCode) {
1109	int32_t destIndex=`0`;
1110	uint32_t state = `0`;
1111	for (int32_t i = `0`; i < srcLength;) {
1112	int32_t nextIndex = i;
1113	UChar32 c;
1114	U16_NEXT(src, nextIndex, srcLength, c);
1115	uint32_t nextState = `0`;
1116	int32_t type = ucase_getTypeOrIgnorable(c);
1117	if ((type & UCASE_IGNORABLE) != `0`) {
1118	// c is case-ignorable
1119	nextState \|= (state & AFTER_CASED);
1120	} else if (type != UCASE_NONE) {
1121	// c is cased
1122	nextState \|= AFTER_CASED;
1123	}
1124	uint32_t data = getLetterData(c);
1125	if (data > `0`) {
1126	uint32_t upper = data & UPPER_MASK;
1127	// Add a dialytika to this iota or ypsilon vowel
1128	// if we removed a tonos from the previous vowel,
1129	// and that previous vowel did not also have (or gain) a dialytika.
1130	// Adding one only to the final vowel in a longer sequence
1131	// (which does not occur in normal writing) would require lookahead.
1132	// Set the same flag as for preserving an existing dialytika.
1133	if ((data & HAS_VOWEL) != `0` && (state & AFTER_VOWEL_WITH_ACCENT) != `0` &&
1134	(upper == `0x399` \|\| upper == `0x3A5`)) {
1135	data \|= HAS_DIALYTIKA;
1136	}
1137	int32_t numYpogegrammeni = `0`; // Map each one to a trailing, spacing, capital iota.
1138	if ((data & HAS_YPOGEGRAMMENI) != `0`) {
1139	numYpogegrammeni = `1`;
1140	}
1141	// Skip combining diacritics after this Greek letter.
1142	while (nextIndex < srcLength) {
1143	uint32_t diacriticData = getDiacriticData(src[nextIndex]);
1144	if (diacriticData != `0`) {
1145	data \|= diacriticData;
1146	if ((diacriticData & HAS_YPOGEGRAMMENI) != `0`) {
1147	++numYpogegrammeni;
1148	}
1149	++nextIndex;
1150	} else {
1151	break; // not a Greek diacritic
1152	}
1153	}
1154	if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
1155	nextState \|= AFTER_VOWEL_WITH_ACCENT;
1156	}
1157	// Map according to Greek rules.
1158	UBool addTonos = false;
1159	if (upper == `0x397` &&
1160	(data & HAS_ACCENT) != `0` &&
1161	numYpogegrammeni == `0` &&
1162	(state & AFTER_CASED) == `0` &&
1163	!isFollowedByCasedLetter(src, nextIndex, srcLength)) {
1164	// Keep disjunctive "or" with (only) a tonos.
1165	// We use the same "word boundary" conditions as for the Final_Sigma test.
1166	if (i == nextIndex) {
1167	upper = `0x389`; // Preserve the precomposed form.
1168	} else {
1169	addTonos = true;
1170	}
1171	} else if ((data & HAS_DIALYTIKA) != `0`) {
1172	// Preserve a vowel with dialytika in precomposed form if it exists.
1173	if (upper == `0x399`) {
1174	upper = `0x3AA`;
1175	data &= ~HAS_EITHER_DIALYTIKA;
1176	} else if (upper == `0x3A5`) {
1177	upper = `0x3AB`;
1178	data &= ~HAS_EITHER_DIALYTIKA;
1179	}
1180	}
1181
1182	UBool change;
1183	if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == `0`) {
1184	change = true; // common, simple usage
1185	} else {
1186	// Find out first whether we are changing the text.
1187	change = src[i] != upper \|\| numYpogegrammeni > `0`;
1188	int32_t i2 = i + `1`;
1189	if ((data & HAS_EITHER_DIALYTIKA) != `0`) {
1190	change \|= i2 >= nextIndex \|\| src[i2] != `0x308`;
1191	++i2;
1192	}
1193	if (addTonos) {
1194	change \|= i2 >= nextIndex \|\| src[i2] != `0x301`;
1195	++i2;
1196	}
1197	int32_t oldLength = nextIndex - i;
1198	int32_t newLength = (i2 - i) + numYpogegrammeni;
1199	change \|= oldLength != newLength;
1200	if (change) {
1201	if (edits != nullptr) {
1202	edits->addReplace(oldLength, newLength);
1203	}
1204	} else {
1205	if (edits != nullptr) {
1206	edits->addUnchanged(oldLength);
1207	}
1208	// Write unchanged text?
1209	change = (options & U_OMIT_UNCHANGED_TEXT) == `0`;
1210	}
1211	}
1212
1213	if (change) {
1214	destIndex=appendUChar(dest, destIndex, destCapacity, (char16_t)upper);
1215	if (destIndex >= `0` && (data & HAS_EITHER_DIALYTIKA) != `0`) {
1216	destIndex=appendUChar(dest, destIndex, destCapacity, `0x308`); // restore or add a dialytika
1217	}
1218	if (destIndex >= `0` && addTonos) {
1219	destIndex=appendUChar(dest, destIndex, destCapacity, `0x301`);
1220	}
1221	while (destIndex >= `0` && numYpogegrammeni > `0`) {
1222	destIndex=appendUChar(dest, destIndex, destCapacity, `0x399`);
1223	--numYpogegrammeni;
1224	}
1225	if(destIndex<`0`) {
1226	errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1227	return `0`;
1228	}
1229	}
1230	} else {
1231	const char16_t *s;
1232	c=ucase_toFullUpper(c, nullptr, nullptr, &s, UCASE_LOC_GREEK);
1233	destIndex = appendResult(dest, destIndex, destCapacity, c, s,
1234	nextIndex - i, options, edits);
1235	if (destIndex < `0`) {
1236	errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
1237	return `0`;
1238	}
1239	}
1240	i = nextIndex;
1241	state = nextState;
1242	}
1243
1244	return destIndex;
1245	}
1246
1247	} // namespace GreekUpper
1248	U_NAMESPACE_END
1249
1250	/ functions available in the common library (for unistr_case.cpp) /
1251
1252	U_CFUNC int32_t U_CALLCONV
1253	ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
1254	char16_t *dest, int32_t destCapacity,
1255	const char16_t *src, int32_t srcLength,
1256	icu::Edits *edits,
1257	UErrorCode &errorCode) {
1258	UCaseContext csc=UCASECONTEXT_INITIALIZER;
1259	csc.p=(void *)src;
1260	csc.limit=srcLength;
1261	int32_t destIndex = toLower(
1262	caseLocale, options,
1263	dest, destCapacity,
1264	src, &csc, `0`, srcLength,
1265	edits, errorCode);
1266	return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
1267	}
1268
1269	U_CFUNC int32_t U_CALLCONV
1270	ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
1271	char16_t *dest, int32_t destCapacity,
1272	const char16_t *src, int32_t srcLength,
1273	icu::Edits *edits,
1274	UErrorCode &errorCode) {
1275	int32_t destIndex;
1276	if (caseLocale == UCASE_LOC_GREEK) {
1277	destIndex = GreekUpper::toUpper(options, dest, destCapacity,
1278	src, srcLength, edits, errorCode);
1279	} else {
1280	UCaseContext csc=UCASECONTEXT_INITIALIZER;
1281	csc.p=(void *)src;
1282	csc.limit=srcLength;
1283	destIndex = toUpper(
1284	caseLocale, options,
1285	dest, destCapacity,
1286	src, &csc, srcLength,
1287	edits, errorCode);
1288	}
1289	return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
1290	}
1291
1292	U_CFUNC int32_t U_CALLCONV
1293	ustrcase_internalFold(int32_t / caseLocale /, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
1294	char16_t *dest, int32_t destCapacity,
1295	const char16_t *src, int32_t srcLength,
1296	icu::Edits *edits,
1297	UErrorCode &errorCode) {
1298	int32_t destIndex = toLower(
1299	-`1`, options,
1300	dest, destCapacity,
1301	src, nullptr, `0`, srcLength,
1302	edits, errorCode);
1303	return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
1304	}
1305
1306	U_CFUNC int32_t
1307	ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
1308	char16_t *dest, int32_t destCapacity,
1309	const char16_t *src, int32_t srcLength,
1310	UStringCaseMapper *stringCaseMapper,
1311	icu::Edits *edits,
1312	UErrorCode &errorCode) {
1313	int32_t destLength;
1314
1315	/ check argument values /
1316	if(U_FAILURE(errorCode)) {
1317	return `0`;
1318	}
1319	if( destCapacity<`0` \|\|
1320	(dest==nullptr && destCapacity>`0`) \|\|
1321	src==nullptr \|\|
1322	srcLength<-`1`
1323	) {
1324	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
1325	return `0`;
1326	}
1327
1328	/ get the string length /
1329	if(srcLength==-`1`) {
1330	srcLength=u_strlen(src);
1331	}
1332
1333	/ check for overlapping source and destination /
1334	if( dest!=nullptr &&
1335	((src>=dest && src<(dest+destCapacity)) \|\|
1336	(dest>=src && dest<(src+srcLength)))
1337	) {
1338	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
1339	return `0`;
1340	}
1341
1342	if (edits != nullptr && (options & U_EDITS_NO_RESET) == `0`) {
1343	edits->reset();
1344	}
1345	destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
1346	dest, destCapacity, src, srcLength, edits, errorCode);
1347	return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
1348	}
1349
1350	U_CFUNC int32_t
1351	ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
1352	char16_t *dest, int32_t destCapacity,
1353	const char16_t *src, int32_t srcLength,
1354	UStringCaseMapper *stringCaseMapper,
1355	UErrorCode &errorCode) {
1356	char16_t buffer[`300`];
1357	char16_t *temp;
1358
1359	int32_t destLength;
1360
1361	/ check argument values /
1362	if(U_FAILURE(errorCode)) {
1363	return `0`;
1364	}
1365	if( destCapacity<`0` \|\|
1366	(dest==nullptr && destCapacity>`0`) \|\|
1367	src==nullptr \|\|
1368	srcLength<-`1`
1369	) {
1370	errorCode=U_ILLEGAL_ARGUMENT_ERROR;
1371	return `0`;
1372	}
1373
1374	/ get the string length /
1375	if(srcLength==-`1`) {
1376	srcLength=u_strlen(src);
1377	}
1378
1379	/ check for overlapping source and destination /
1380	if( dest!=nullptr &&
1381	((src>=dest && src<(dest+destCapacity)) \|\|
1382	(dest>=src && dest<(src+srcLength)))
1383	) {
1384	/ overlap: provide a temporary destination buffer and later copy the result /
1385	if(destCapacity<=UPRV_LENGTHOF(buffer)) {
1386	/ the stack buffer is large enough /
1387	temp=buffer;
1388	} else {
1389	/ allocate a buffer /
1390	temp=(char16_t )uprv_malloc(destCapacityU_SIZEOF_UCHAR);
1391	if(temp==nullptr) {
1392	errorCode=U_MEMORY_ALLOCATION_ERROR;
1393	return `0`;
1394	}
1395	}
1396	} else {
1397	temp=dest;
1398	}
1399
1400	destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
1401	temp, destCapacity, src, srcLength, nullptr, errorCode);
1402	if(temp!=dest) {
1403	/ copy the result string to the destination buffer /
1404	if (U_SUCCESS(errorCode) && `0` < destLength && destLength <= destCapacity) {
1405	u_memmove(dest, temp, destLength);
1406	}
1407	if(temp!=buffer) {
1408	uprv_free(temp);
1409	}
1410	}
1411
1412	return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
1413	}
1414
1415	/ public API functions /
1416
1417	U_CAPI int32_t U_EXPORT2
1418	u_strFoldCase(char16_t *dest, int32_t destCapacity,
1419	const char16_t *src, int32_t srcLength,
1420	uint32_t options,
1421	UErrorCode *pErrorCode) {
1422	return ustrcase_mapWithOverlap(
1423	UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
1424	dest, destCapacity,
1425	src, srcLength,
1426	ustrcase_internalFold, *pErrorCode);
1427	}
1428
1429	U_NAMESPACE_BEGIN
1430
1431	int32_t CaseMap::fold(
1432	uint32_t options,
1433	const char16_t *src, int32_t srcLength,
1434	char16_t dest, int32_t destCapacity, Edits edits,
1435	UErrorCode &errorCode) {
1436	return ustrcase_map(
1437	UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
1438	dest, destCapacity,
1439	src, srcLength,
1440	ustrcase_internalFold, edits, errorCode);
1441	}
1442
1443	U_NAMESPACE_END
1444
1445	/ case-insensitive string comparisons -------------------------------------- /
1446
1447	/*
1448	* This function is a copy of unorm_cmpEquivFold() minus the parts for
1449	* canonical equivalence.
1450	* Keep the functions in sync, and see there for how this works.
1451	* The duplication is for modularization:
1452	* It makes caseless (but not canonical caseless) matches independent of
1453	* the normalization code.
1454	*/
1455
1456	/ stack element for previous-level source/decomposition pointers /
1457	struct CmpEquivLevel {
1458	const char16_t start, s, *limit;
1459	};
1460	typedef struct CmpEquivLevel CmpEquivLevel;
1461
1462	/**
1463	* Internal implementation code comparing string with case fold.
1464	* This function is called from u_strcmpFold() and u_caseInsensitivePrefixMatch().
1465	*
1466	* @param s1 input string 1
1467	* @param length1 length of string 1, or -1 (NUL terminated)
1468	* @param s2 input string 2
1469	* @param length2 length of string 2, or -1 (NUL terminated)
1470	* @param options compare options
1471	* @param matchLen1 (output) length of partial prefix match in s1
1472	* @param matchLen2 (output) length of partial prefix match in s2
1473	* @param pErrorCode receives error status
1474	* @return The result of comparison
1475	*/
1476	static int32_t _cmpFold(
1477	const char16_t *s1, int32_t length1,
1478	const char16_t *s2, int32_t length2,
1479	uint32_t options,
1480	int32_t matchLen1, int32_t matchLen2,
1481	UErrorCode *pErrorCode) {
1482	int32_t cmpRes = `0`;
1483
1484	/ current-level start/limit - s1/s2 as current /
1485	const char16_t start1, start2, limit1, limit2;
1486
1487	/ points to the original start address /
1488	const char16_t org1, org2;
1489
1490	/ points to the end of match + 1 /
1491	const char16_t m1, m2;
1492
1493	/ case folding variables /
1494	const char16_t *p;
1495	int32_t length;
1496
1497	/ stacks of previous-level start/current/limit /
1498	CmpEquivLevel stack1[`2`], stack2[`2`];
1499
1500	/ case folding buffers, only use current-level start/limit /
1501	char16_t fold1[UCASE_MAX_STRING_LENGTH+`1`], fold2[UCASE_MAX_STRING_LENGTH+`1`];
1502
1503	/ track which is the current level per string /
1504	int32_t level1, level2;
1505
1506	/ current code units, and code points for lookups /
1507	UChar32 c1, c2, cp1, cp2;
1508
1509	/ no argument error checking because this itself is not an API /
1510
1511	/*
1512	* assume that at least the option U_COMPARE_IGNORE_CASE is set
1513	* otherwise this function would have to behave exactly as uprv_strCompare()
1514	*/
1515	if(U_FAILURE(*pErrorCode)) {
1516	return `0`;
1517	}
1518
1519	/ initialize /
1520	if(matchLen1) {
1521	U_ASSERT(matchLen2 !=nullptr);
1522	*matchLen1=`0`;
1523	*matchLen2=`0`;
1524	}
1525
1526	start1=m1=org1=s1;
1527	if(length1==-`1`) {
1528	limit1=nullptr;
1529	} else {
1530	limit1=s1+length1;
1531	}
1532
1533	start2=m2=org2=s2;
1534	if(length2==-`1`) {
1535	limit2=nullptr;
1536	} else {
1537	limit2=s2+length2;
1538	}
1539
1540	level1=level2=`0`;
1541	c1=c2=-`1`;
1542
1543	/ comparison loop /
1544	for(;;) {
1545	/*
1546	* here a code unit value of -1 means "get another code unit"
1547	* below it will mean "this source is finished"
1548	*/
1549
1550	if(c1<`0`) {
1551	/ get next code unit from string 1, post-increment /
1552	for(;;) {
1553	if(s1==limit1 \|\| ((c1=s1)==`0` && (limit1==nullptr* \|\| (options&_STRNCMP_STYLE)))) {
1554	if(level1==`0`) {
1555	c1=-`1`;
1556	break;
1557	}
1558	} else {
1559	++s1;
1560	break;
1561	}
1562
1563	/ reached end of level buffer, pop one level /
1564	do {
1565	--level1;
1566	start1=stack1[level1].start; /Not uninitialized/
1567	} while(start1==nullptr);
1568	s1=stack1[level1].s; /Not uninitialized/
1569	limit1=stack1[level1].limit; /Not uninitialized/
1570	}
1571	}
1572
1573	if(c2<`0`) {
1574	/ get next code unit from string 2, post-increment /
1575	for(;;) {
1576	if(s2==limit2 \|\| ((c2=s2)==`0` && (limit2==nullptr* \|\| (options&_STRNCMP_STYLE)))) {
1577	if(level2==`0`) {
1578	c2=-`1`;
1579	break;
1580	}
1581	} else {
1582	++s2;
1583	break;
1584	}
1585
1586	/ reached end of level buffer, pop one level /
1587	do {
1588	--level2;
1589	start2=stack2[level2].start; /Not uninitialized/
1590	} while(start2==nullptr);
1591	s2=stack2[level2].s; /Not uninitialized/
1592	limit2=stack2[level2].limit; /Not uninitialized/
1593	}
1594	}
1595
1596	/*
1597	* compare c1 and c2
1598	* either variable c1, c2 is -1 only if the corresponding string is finished
1599	*/
1600	if(c1==c2) {
1601	const char16_t next1, next2;
1602
1603	if(c1<`0`) {
1604	cmpRes=`0`; / c1==c2==-1 indicating end of strings /
1605	break;
1606	}
1607
1608	/*
1609	* Note: Move the match positions in both strings at the same time
1610	* only when corresponding code point(s) in the original strings
1611	* are fully consumed. For example, when comparing s1="Fust" and
1612	* s2="Fu\u00dfball", s2[2] is folded into "ss", and s1[2] matches
1613	* the first code point in the case-folded data. But the second "s"
1614	* has no matching code point in s1, so this implementation returns
1615	* 2 as the prefix match length ("Fu").
1616	*/
1617	next1=next2=nullptr;
1618	if(level1==`0`) {
1619	next1=s1;
1620	} else if(s1==limit1) {
1621	/ Note: This implementation only use a single level of stack.*
1622	* If this code needs to be changed to use multiple levels
1623	* of stacks, the code above should check if the current
1624	* code is at the end of all stacks.
1625	*/
1626	U_ASSERT(level1==`1`);
1627
1628	/ is s1 at the end of the current stack? /
1629	next1=stack1[`0`].s;
1630	}
1631
1632	if (next1!=nullptr) {
1633	if(level2==`0`) {
1634	next2=s2;
1635	} else if(s2==limit2) {
1636	U_ASSERT(level2==`1`);
1637
1638	/ is s2 at the end of the current stack? /
1639	next2=stack2[`0`].s;
1640	}
1641	if(next2!=nullptr) {
1642	m1=next1;
1643	m2=next2;
1644	}
1645	}
1646	c1=c2=-`1`; / make us fetch new code units /
1647	continue;
1648	} else if(c1<`0`) {
1649	cmpRes=-`1`; / string 1 ends before string 2 /
1650	break;
1651	} else if(c2<`0`) {
1652	cmpRes=`1`; / string 2 ends before string 1 /
1653	break;
1654	}
1655	/ c1!=c2 && c1>=0 && c2>=0 /
1656
1657	/ get complete code points for c1, c2 for lookups if either is a surrogate /
1658	cp1=c1;
1659	if(U_IS_SURROGATE(c1)) {
1660	char16_t c;
1661
1662	if(U_IS_SURROGATE_LEAD(c1)) {
1663	if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
1664	/ advance ++s1; only below if cp1 decomposes/case-folds /
1665	cp1=U16_GET_SUPPLEMENTARY(c1, c);
1666	}
1667	} else / isTrail(c1) / {
1668	if(start1<=(s1-`2`) && U16_IS_LEAD(c=*(s1-`2`))) {
1669	cp1=U16_GET_SUPPLEMENTARY(c, c1);
1670	}
1671	}
1672	}
1673
1674	cp2=c2;
1675	if(U_IS_SURROGATE(c2)) {
1676	char16_t c;
1677
1678	if(U_IS_SURROGATE_LEAD(c2)) {
1679	if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
1680	/ advance ++s2; only below if cp2 decomposes/case-folds /
1681	cp2=U16_GET_SUPPLEMENTARY(c2, c);
1682	}
1683	} else / isTrail(c2) / {
1684	if(start2<=(s2-`2`) && U16_IS_LEAD(c=*(s2-`2`))) {
1685	cp2=U16_GET_SUPPLEMENTARY(c, c2);
1686	}
1687	}
1688	}
1689
1690	/*
1691	* go down one level for each string
1692	* continue with the main loop as soon as there is a real change
1693	*/
1694
1695	if( level1==`0` &&
1696	(length=ucase_toFullFolding((UChar32)cp1, &p, options))>=`0`
1697	) {
1698	/ cp1 case-folds to the code point "length" or to p[length] /
1699	if(U_IS_SURROGATE(c1)) {
1700	if(U_IS_SURROGATE_LEAD(c1)) {
1701	/ advance beyond source surrogate pair if it case-folds /
1702	++s1;
1703	} else / isTrail(c1) / {
1704	/*
1705	* we got a supplementary code point when hitting its trail surrogate,
1706	* therefore the lead surrogate must have been the same as in the other string;
1707	* compare this decomposition with the lead surrogate in the other string
1708	* remember that this simulates bulk text replacement:
1709	* the decomposition would replace the entire code point
1710	*/
1711	--s2;
1712	--m2;
1713	c2=*(s2-`1`);
1714	}
1715	}
1716
1717	/ push current level pointers /
1718	stack1[`0`].start=start1;
1719	stack1[`0`].s=s1;
1720	stack1[`0`].limit=limit1;
1721	++level1;
1722
1723	/ copy the folding result to fold1[] /
1724	if(length<=UCASE_MAX_STRING_LENGTH) {
1725	u_memcpy(fold1, p, length);
1726	} else {
1727	int32_t i=`0`;
1728	U16_APPEND_UNSAFE(fold1, i, length);
1729	length=i;
1730	}
1731
1732	/ set next level pointers to case folding /
1733	start1=s1=fold1;
1734	limit1=fold1+length;
1735
1736	/ get ready to read from decomposition, continue with loop /
1737	c1=-`1`;
1738	continue;
1739	}
1740
1741	if( level2==`0` &&
1742	(length=ucase_toFullFolding((UChar32)cp2, &p, options))>=`0`
1743	) {
1744	/ cp2 case-folds to the code point "length" or to p[length] /
1745	if(U_IS_SURROGATE(c2)) {
1746	if(U_IS_SURROGATE_LEAD(c2)) {
1747	/ advance beyond source surrogate pair if it case-folds /
1748	++s2;
1749	} else / isTrail(c2) / {
1750	/*
1751	* we got a supplementary code point when hitting its trail surrogate,
1752	* therefore the lead surrogate must have been the same as in the other string;
1753	* compare this decomposition with the lead surrogate in the other string
1754	* remember that this simulates bulk text replacement:
1755	* the decomposition would replace the entire code point
1756	*/
1757	--s1;
1758	--m2;
1759	c1=*(s1-`1`);
1760	}
1761	}
1762
1763	/ push current level pointers /
1764	stack2[`0`].start=start2;
1765	stack2[`0`].s=s2;
1766	stack2[`0`].limit=limit2;
1767	++level2;
1768
1769	/ copy the folding result to fold2[] /
1770	if(length<=UCASE_MAX_STRING_LENGTH) {
1771	u_memcpy(fold2, p, length);
1772	} else {
1773	int32_t i=`0`;
1774	U16_APPEND_UNSAFE(fold2, i, length);
1775	length=i;
1776	}
1777
1778	/ set next level pointers to case folding /
1779	start2=s2=fold2;
1780	limit2=fold2+length;
1781
1782	/ get ready to read from decomposition, continue with loop /
1783	c2=-`1`;
1784	continue;
1785	}
1786
1787	/*
1788	* no decomposition/case folding, max level for both sides:
1789	* return difference result
1790	*
1791	* code point order comparison must not just return cp1-cp2
1792	* because when single surrogates are present then the surrogate pairs
1793	* that formed cp1 and cp2 may be from different string indexes
1794	*
1795	* example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
1796	* c1=d800 cp1=10001 c2=dc00 cp2=10000
1797	* cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
1798	*
1799	* therefore, use same fix-up as in ustring.c/uprv_strCompare()
1800	* except: uprv_strCompare() fetches c=s while this functions fetches c=s++
1801	* so we have slightly different pointer/start/limit comparisons here
1802	*/
1803
1804	if(c1>=`0xd800` && c2>=`0xd800` && (options&U_COMPARE_CODE_POINT_ORDER)) {
1805	/ subtract 0x2800 from BMP code points to make them smaller than supplementary ones /
1806	if(
1807	(c1<=`0xdbff` && s1!=limit1 && U16_IS_TRAIL(*s1)) \|\|
1808	(U16_IS_TRAIL(c1) && start1!=(s1-`1`) && U16_IS_LEAD(*(s1-`2`)))
1809	) {
1810	/ part of a surrogate pair, leave >=d800 /
1811	} else {
1812	/ BMP code point - may be surrogate code point - make <d800 /
1813	c1-=`0x2800`;
1814	}
1815
1816	if(
1817	(c2<=`0xdbff` && s2!=limit2 && U16_IS_TRAIL(*s2)) \|\|
1818	(U16_IS_TRAIL(c2) && start2!=(s2-`1`) && U16_IS_LEAD(*(s2-`2`)))
1819	) {
1820	/ part of a surrogate pair, leave >=d800 /
1821	} else {
1822	/ BMP code point - may be surrogate code point - make <d800 /
1823	c2-=`0x2800`;
1824	}
1825	}
1826
1827	cmpRes=c1-c2;
1828	break;
1829	}
1830
1831	if(matchLen1) {
1832	matchLen1=static_cast*<int32_t>(m1-org1);
1833	matchLen2=static_cast*<int32_t>(m2-org2);
1834	}
1835	return cmpRes;
1836	}
1837
1838	/ internal function /
1839	U_CFUNC int32_t
1840	u_strcmpFold(const char16_t *s1, int32_t length1,
1841	const char16_t *s2, int32_t length2,
1842	uint32_t options,
1843	UErrorCode *pErrorCode) {
1844	return _cmpFold(s1, length1, s2, length2, options, nullptr, nullptr, pErrorCode);
1845	}
1846
1847	/ public API functions /
1848
1849	U_CAPI int32_t U_EXPORT2
1850	u_strCaseCompare(const char16_t *s1, int32_t length1,
1851	const char16_t *s2, int32_t length2,
1852	uint32_t options,
1853	UErrorCode *pErrorCode) {
1854	/ argument checking /
1855	if(pErrorCode==`0` \|\| U_FAILURE(*pErrorCode)) {
1856	return `0`;
1857	}
1858	if(s1==nullptr \|\| length1<-`1` \|\| s2==nullptr \|\| length2<-`1`) {
1859	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1860	return `0`;
1861	}
1862	return u_strcmpFold(s1, length1, s2, length2,
1863	options\|U_COMPARE_IGNORE_CASE,
1864	pErrorCode);
1865	}
1866
1867	U_CAPI int32_t U_EXPORT2
1868	u_strcasecmp(const char16_t s1, const* char16_t *s2, uint32_t options) {
1869	UErrorCode errorCode=U_ZERO_ERROR;
1870	return u_strcmpFold(s1, -`1`, s2, -`1`,
1871	options\|U_COMPARE_IGNORE_CASE,
1872	&errorCode);
1873	}
1874
1875	U_CAPI int32_t U_EXPORT2
1876	u_memcasecmp(const char16_t s1, const* char16_t *s2, int32_t length, uint32_t options) {
1877	UErrorCode errorCode=U_ZERO_ERROR;
1878	return u_strcmpFold(s1, length, s2, length,
1879	options\|U_COMPARE_IGNORE_CASE,
1880	&errorCode);
1881	}
1882
1883	U_CAPI int32_t U_EXPORT2
1884	u_strncasecmp(const char16_t s1, const* char16_t *s2, int32_t n, uint32_t options) {
1885	UErrorCode errorCode=U_ZERO_ERROR;
1886	return u_strcmpFold(s1, n, s2, n,
1887	options\|(U_COMPARE_IGNORE_CASE\|_STRNCMP_STYLE),
1888	&errorCode);
1889	}
1890
1891	/ internal API - detect length of shared prefix /
1892	U_CAPI void
1893	u_caseInsensitivePrefixMatch(const char16_t *s1, int32_t length1,
1894	const char16_t *s2, int32_t length2,
1895	uint32_t options,
1896	int32_t matchLen1, int32_t matchLen2,
1897	UErrorCode *pErrorCode) {
1898	_cmpFold(s1, length1, s2, length2, options,
1899	matchLen1, matchLen2, pErrorCode);
1900	}
1901

Browse the source code of Godot/thirdparty/icu4c/common/ustrcase.cpp