ustring.cpp source code [Godot/thirdparty/icu4c/common/ustring.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	******************************************************************************
5	*
6	* Copyright (C) 1998-2016, International Business Machines
7	* Corporation and others. All Rights Reserved.
8	*
9	******************************************************************************
10	*
11	* File ustring.cpp
12	*
13	* Modification History:
14	*
15	* Date Name Description
16	* 12/07/98 bertrand Creation.
17	******************************************************************************
18	*/
19
20	#include "unicode/utypes.h"
21	#include "unicode/putil.h"
22	#include "unicode/uchar.h"
23	#include "unicode/ustring.h"
24	#include "unicode/utf16.h"
25	#include "cstring.h"
26	#include "cwchar.h"
27	#include "cmemory.h"
28	#include "ustr_imp.h"
29
30	/ ANSI string.h - style functions ------------------------------------------ /
31
32	/ U+ffff is the highest BMP code point, the highest one that fits into a 16-bit char16_t /
33	#define U_BMP_MAX 0xffff
34
35	/ Forward binary string search functions ----------------------------------- /
36
37	/*
38	* Test if a substring match inside a string is at code point boundaries.
39	* All pointers refer to the same buffer.
40	* The limit pointer may be nullptr, all others must be real pointers.
41	*/
42	static inline UBool
43	isMatchAtCPBoundary(const char16_t start, const* char16_t match, const* char16_t matchLimit, const* char16_t *limit) {
44	if(U16_IS_TRAIL(match) && start!=match && U16_IS_LEAD((match-`1`))) {
45	/ the leading edge of the match is in the middle of a surrogate pair /
46	return false;
47	}
48	if(U16_IS_LEAD((matchLimit-`1`)) && matchLimit!=limit && U16_IS_TRAIL(matchLimit)) {
49	/ the trailing edge of the match is in the middle of a surrogate pair /
50	return false;
51	}
52	return true;
53	}
54
55	U_CAPI char16_t * U_EXPORT2
56	u_strFindFirst(const char16_t *s, int32_t length,
57	const char16_t *sub, int32_t subLength) {
58	const char16_t start, p, q, subLimit;
59	char16_t c, cs, cq;
60
61	if(sub==nullptr \|\| subLength<-`1`) {
62	return (char16_t *)s;
63	}
64	if(s==nullptr \|\| length<-`1`) {
65	return nullptr;
66	}
67
68	start=s;
69
70	if(length<`0` && subLength<`0`) {
71	/ both strings are NUL-terminated /
72	if((cs=*sub++)==`0`) {
73	return (char16_t *)s;
74	}
75	if(*sub==`0` && !U16_IS_SURROGATE(cs)) {
76	/ the substring consists of a single, non-surrogate BMP code point /
77	return u_strchr(s, cs);
78	}
79
80	while((c=*s++)!=`0`) {
81	if(c==cs) {
82	/ found first substring char16_t, compare rest /
83	p=s;
84	q=sub;
85	for(;;) {
86	if((cq=*q)==`0`) {
87	if(isMatchAtCPBoundary(start, s-`1`, p, nullptr)) {
88	return (char16_t )(s-`1`); /* well-formed match /
89	} else {
90	break; / no match because surrogate pair is split /
91	}
92	}
93	if((c=*p)==`0`) {
94	return nullptr; / no match, and none possible after s /
95	}
96	if(c!=cq) {
97	break; / no match /
98	}
99	++p;
100	++q;
101	}
102	}
103	}
104
105	/ not found /
106	return nullptr;
107	}
108
109	if(subLength<`0`) {
110	subLength=u_strlen(sub);
111	}
112	if(subLength==`0`) {
113	return (char16_t *)s;
114	}
115
116	/ get sub[0] to search for it fast /
117	cs=*sub++;
118	--subLength;
119	subLimit=sub+subLength;
120
121	if(subLength==`0` && !U16_IS_SURROGATE(cs)) {
122	/ the substring consists of a single, non-surrogate BMP code point /
123	return length<`0` ? u_strchr(s, cs) : u_memchr(s, cs, length);
124	}
125
126	if(length<`0`) {
127	/ s is NUL-terminated /
128	while((c=*s++)!=`0`) {
129	if(c==cs) {
130	/ found first substring char16_t, compare rest /
131	p=s;
132	q=sub;
133	for(;;) {
134	if(q==subLimit) {
135	if(isMatchAtCPBoundary(start, s-`1`, p, nullptr)) {
136	return (char16_t )(s-`1`); /* well-formed match /
137	} else {
138	break; / no match because surrogate pair is split /
139	}
140	}
141	if((c=*p)==`0`) {
142	return nullptr; / no match, and none possible after s /
143	}
144	if(c!=*q) {
145	break; / no match /
146	}
147	++p;
148	++q;
149	}
150	}
151	}
152	} else {
153	const char16_t limit, preLimit;
154
155	/ subLength was decremented above /
156	if(length<=subLength) {
157	return nullptr; / s is shorter than sub /
158	}
159
160	limit=s+length;
161
162	/ the substring must start before preLimit /
163	preLimit=limit-subLength;
164
165	while(s!=preLimit) {
166	c=*s++;
167	if(c==cs) {
168	/ found first substring char16_t, compare rest /
169	p=s;
170	q=sub;
171	for(;;) {
172	if(q==subLimit) {
173	if(isMatchAtCPBoundary(start, s-`1`, p, limit)) {
174	return (char16_t )(s-`1`); /* well-formed match /
175	} else {
176	break; / no match because surrogate pair is split /
177	}
178	}
179	if(p!=q) {
180	break; / no match /
181	}
182	++p;
183	++q;
184	}
185	}
186	}
187	}
188
189	/ not found /
190	return nullptr;
191	}
192
193	U_CAPI char16_t * U_EXPORT2
194	u_strstr(const char16_t s, const* char16_t *substring) {
195	return u_strFindFirst(s, -`1`, substring, -`1`);
196	}
197
198	U_CAPI char16_t * U_EXPORT2
199	u_strchr(const char16_t s, char16_t* c) {
200	if(U16_IS_SURROGATE(c)) {
201	/ make sure to not find half of a surrogate pair /
202	return u_strFindFirst(s, -`1`, &c, `1`);
203	} else {
204	char16_t cs;
205
206	/ trivial search for a BMP code point /
207	for(;;) {
208	if((cs=*s)==c) {
209	return (char16_t *)s;
210	}
211	if(cs==`0`) {
212	return nullptr;
213	}
214	++s;
215	}
216	}
217	}
218
219	U_CAPI char16_t * U_EXPORT2
220	u_strchr32(const char16_t *s, UChar32 c) {
221	if((uint32_t)c<=U_BMP_MAX) {
222	/ find BMP code point /
223	return u_strchr(s, (char16_t)c);
224	} else if((uint32_t)c<=UCHAR_MAX_VALUE) {
225	/ find supplementary code point as surrogate pair /
226	char16_t cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
227
228	while((cs=*s++)!=`0`) {
229	if(cs==lead && *s==trail) {
230	return (char16_t *)(s-`1`);
231	}
232	}
233	return nullptr;
234	} else {
235	/ not a Unicode code point, not findable /
236	return nullptr;
237	}
238	}
239
240	U_CAPI char16_t * U_EXPORT2
241	u_memchr(const char16_t s, char16_t* c, int32_t count) {
242	if(count<=`0`) {
243	return nullptr; / no string /
244	} else if(U16_IS_SURROGATE(c)) {
245	/ make sure to not find half of a surrogate pair /
246	return u_strFindFirst(s, count, &c, `1`);
247	} else {
248	/ trivial search for a BMP code point /
249	const char16_t *limit=s+count;
250	do {
251	if(*s==c) {
252	return (char16_t *)s;
253	}
254	} while(++s!=limit);
255	return nullptr;
256	}
257	}
258
259	U_CAPI char16_t * U_EXPORT2
260	u_memchr32(const char16_t *s, UChar32 c, int32_t count) {
261	if((uint32_t)c<=U_BMP_MAX) {
262	/ find BMP code point /
263	return u_memchr(s, (char16_t)c, count);
264	} else if(count<`2`) {
265	/ too short for a surrogate pair /
266	return nullptr;
267	} else if((uint32_t)c<=UCHAR_MAX_VALUE) {
268	/ find supplementary code point as surrogate pair /
269	const char16_t limit=s+count-`1`; /* -1 so that we do not need a separate check for the trail unit /
270	char16_t lead=U16_LEAD(c), trail=U16_TRAIL(c);
271
272	do {
273	if(s==lead && (s+`1`)==trail) {
274	return (char16_t *)s;
275	}
276	} while(++s!=limit);
277	return nullptr;
278	} else {
279	/ not a Unicode code point, not findable /
280	return nullptr;
281	}
282	}
283
284	/ Backward binary string search functions ---------------------------------- /
285
286	U_CAPI char16_t * U_EXPORT2
287	u_strFindLast(const char16_t *s, int32_t length,
288	const char16_t *sub, int32_t subLength) {
289	const char16_t start, limit, p, q, *subLimit;
290	char16_t c, cs;
291
292	if(sub==nullptr \|\| subLength<-`1`) {
293	return (char16_t *)s;
294	}
295	if(s==nullptr \|\| length<-`1`) {
296	return nullptr;
297	}
298
299	/*
300	* This implementation is more lazy than the one for u_strFindFirst():
301	* There is no special search code for NUL-terminated strings.
302	* It does not seem to be worth it for searching substrings to
303	* search forward and find all matches like in u_strrchr() and similar.
304	* Therefore, we simply get both string lengths and search backward.
305	*
306	* markus 2002oct23
307	*/
308
309	if(subLength<`0`) {
310	subLength=u_strlen(sub);
311	}
312	if(subLength==`0`) {
313	return (char16_t *)s;
314	}
315
316	/ get sub[subLength-1] to search for it fast /
317	subLimit=sub+subLength;
318	cs=*(--subLimit);
319	--subLength;
320
321	if(subLength==`0` && !U16_IS_SURROGATE(cs)) {
322	/ the substring consists of a single, non-surrogate BMP code point /
323	return length<`0` ? u_strrchr(s, cs) : u_memrchr(s, cs, length);
324	}
325
326	if(length<`0`) {
327	length=u_strlen(s);
328	}
329
330	/ subLength was decremented above /
331	if(length<=subLength) {
332	return nullptr; / s is shorter than sub /
333	}
334
335	start=s;
336	limit=s+length;
337
338	/ the substring must start no later than s+subLength /
339	s+=subLength;
340
341	while(s!=limit) {
342	c=*(--limit);
343	if(c==cs) {
344	/ found last substring char16_t, compare rest /
345	p=limit;
346	q=subLimit;
347	for(;;) {
348	if(q==sub) {
349	if(isMatchAtCPBoundary(start, p, limit+`1`, start+length)) {
350	return (char16_t )p; /* well-formed match /
351	} else {
352	break; / no match because surrogate pair is split /
353	}
354	}
355	if((--p)!=(--q)) {
356	break; / no match /
357	}
358	}
359	}
360	}
361
362	/ not found /
363	return nullptr;
364	}
365
366	U_CAPI char16_t * U_EXPORT2
367	u_strrstr(const char16_t s, const* char16_t *substring) {
368	return u_strFindLast(s, -`1`, substring, -`1`);
369	}
370
371	U_CAPI char16_t * U_EXPORT2
372	u_strrchr(const char16_t s, char16_t* c) {
373	if(U16_IS_SURROGATE(c)) {
374	/ make sure to not find half of a surrogate pair /
375	return u_strFindLast(s, -`1`, &c, `1`);
376	} else {
377	const char16_t result=nullptr*;
378	char16_t cs;
379
380	/ trivial search for a BMP code point /
381	for(;;) {
382	if((cs=*s)==c) {
383	result=s;
384	}
385	if(cs==`0`) {
386	return (char16_t *)result;
387	}
388	++s;
389	}
390	}
391	}
392
393	U_CAPI char16_t * U_EXPORT2
394	u_strrchr32(const char16_t *s, UChar32 c) {
395	if((uint32_t)c<=U_BMP_MAX) {
396	/ find BMP code point /
397	return u_strrchr(s, (char16_t)c);
398	} else if((uint32_t)c<=UCHAR_MAX_VALUE) {
399	/ find supplementary code point as surrogate pair /
400	const char16_t result=nullptr*;
401	char16_t cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
402
403	while((cs=*s++)!=`0`) {
404	if(cs==lead && *s==trail) {
405	result=s-`1`;
406	}
407	}
408	return (char16_t *)result;
409	} else {
410	/ not a Unicode code point, not findable /
411	return nullptr;
412	}
413	}
414
415	U_CAPI char16_t * U_EXPORT2
416	u_memrchr(const char16_t s, char16_t* c, int32_t count) {
417	if(count<=`0`) {
418	return nullptr; / no string /
419	} else if(U16_IS_SURROGATE(c)) {
420	/ make sure to not find half of a surrogate pair /
421	return u_strFindLast(s, count, &c, `1`);
422	} else {
423	/ trivial search for a BMP code point /
424	const char16_t *limit=s+count;
425	do {
426	if(*(--limit)==c) {
427	return (char16_t *)limit;
428	}
429	} while(s!=limit);
430	return nullptr;
431	}
432	}
433
434	U_CAPI char16_t * U_EXPORT2
435	u_memrchr32(const char16_t *s, UChar32 c, int32_t count) {
436	if((uint32_t)c<=U_BMP_MAX) {
437	/ find BMP code point /
438	return u_memrchr(s, (char16_t)c, count);
439	} else if(count<`2`) {
440	/ too short for a surrogate pair /
441	return nullptr;
442	} else if((uint32_t)c<=UCHAR_MAX_VALUE) {
443	/ find supplementary code point as surrogate pair /
444	const char16_t *limit=s+count-`1`;
445	char16_t lead=U16_LEAD(c), trail=U16_TRAIL(c);
446
447	do {
448	if(limit==trail && (limit-`1`)==lead) {
449	return (char16_t *)(limit-`1`);
450	}
451	} while(s!=--limit);
452	return nullptr;
453	} else {
454	/ not a Unicode code point, not findable /
455	return nullptr;
456	}
457	}
458
459	/ Tokenization functions --------------------------------------------------- /
460
461	/*
462	* Match each code point in a string against each code point in the matchSet.
463	* Return the index of the first string code point that
464	* is (polarity==true) or is not (false) contained in the matchSet.
465	* Return -(string length)-1 if there is no such code point.
466	*/
467	static int32_t
468	_matchFromSet(const char16_t string, const* char16_t *matchSet, UBool polarity) {
469	int32_t matchLen, matchBMPLen, strItr, matchItr;
470	UChar32 stringCh, matchCh;
471	char16_t c, c2;
472
473	/ first part of matchSet contains only BMP code points /
474	matchBMPLen = `0`;
475	while((c = matchSet[matchBMPLen]) != `0` && U16_IS_SINGLE(c)) {
476	++matchBMPLen;
477	}
478
479	/ second part of matchSet contains BMP and supplementary code points /
480	matchLen = matchBMPLen;
481	while(matchSet[matchLen] != `0`) {
482	++matchLen;
483	}
484
485	for(strItr = `0`; (c = string[strItr]) != `0`;) {
486	++strItr;
487	if(U16_IS_SINGLE(c)) {
488	if(polarity) {
489	for(matchItr = `0`; matchItr < matchLen; ++matchItr) {
490	if(c == matchSet[matchItr]) {
491	return strItr - `1`; / one matches /
492	}
493	}
494	} else {
495	for(matchItr = `0`; matchItr < matchLen; ++matchItr) {
496	if(c == matchSet[matchItr]) {
497	goto endloop;
498	}
499	}
500	return strItr - `1`; / none matches /
501	}
502	} else {
503	/*
504	* No need to check for string length before U16_IS_TRAIL
505	* because c2 could at worst be the terminating NUL.
506	*/
507	if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) {
508	++strItr;
509	stringCh = U16_GET_SUPPLEMENTARY(c, c2);
510	} else {
511	stringCh = c; / unpaired trail surrogate /
512	}
513
514	if(polarity) {
515	for(matchItr = matchBMPLen; matchItr < matchLen;) {
516	U16_NEXT(matchSet, matchItr, matchLen, matchCh);
517	if(stringCh == matchCh) {
518	return strItr - U16_LENGTH(stringCh); / one matches /
519	}
520	}
521	} else {
522	for(matchItr = matchBMPLen; matchItr < matchLen;) {
523	U16_NEXT(matchSet, matchItr, matchLen, matchCh);
524	if(stringCh == matchCh) {
525	goto endloop;
526	}
527	}
528	return strItr - U16_LENGTH(stringCh); / none matches /
529	}
530	}
531	endloop:
532	/ wish C had continue with labels like Java... /;
533	}
534
535	/ Didn't find it. /
536	return -strItr-`1`;
537	}
538
539	/ Search for a codepoint in a string that matches one of the matchSet codepoints. /
540	U_CAPI char16_t * U_EXPORT2
541	u_strpbrk(const char16_t string, const* char16_t *matchSet)
542	{
543	int32_t idx = _matchFromSet(string, matchSet, true);
544	if(idx >= `0`) {
545	return (char16_t *)string + idx;
546	} else {
547	return nullptr;
548	}
549	}
550
551	/ Search for a codepoint in a string that matches one of the matchSet codepoints. /
552	U_CAPI int32_t U_EXPORT2
553	u_strcspn(const char16_t string, const* char16_t *matchSet)
554	{
555	int32_t idx = _matchFromSet(string, matchSet, true);
556	if(idx >= `0`) {
557	return idx;
558	} else {
559	return -idx - `1`; / == u_strlen(string) /
560	}
561	}
562
563	/ Search for a codepoint in a string that does not match one of the matchSet codepoints. /
564	U_CAPI int32_t U_EXPORT2
565	u_strspn(const char16_t string, const* char16_t *matchSet)
566	{
567	int32_t idx = _matchFromSet(string, matchSet, false);
568	if(idx >= `0`) {
569	return idx;
570	} else {
571	return -idx - `1`; / == u_strlen(string) /
572	}
573	}
574
575	/ ----- Text manipulation functions --- /
576
577	U_CAPI char16_t* U_EXPORT2
578	u_strtok_r(char16_t *src,
579	const char16_t *delim,
580	char16_t **saveState)
581	{
582	char16_t *tokSource;
583	char16_t *nextToken;
584	uint32_t nonDelimIdx;
585
586	/ If saveState is nullptr, the user messed up. /
587	if (src != nullptr) {
588	tokSource = src;
589	saveState = src; /* Set to "src" in case there are no delimiters /
590	}
591	else if (*saveState) {
592	tokSource = *saveState;
593	}
594	else {
595	/ src == nullptr && saveState == nullptr /*
596	/ This shouldn't happen. We already finished tokenizing. /
597	return nullptr;
598	}
599
600	/ Skip initial delimiters /
601	nonDelimIdx = u_strspn(tokSource, delim);
602	tokSource = &tokSource[nonDelimIdx];
603
604	if (*tokSource) {
605	nextToken = u_strpbrk(tokSource, delim);
606	if (nextToken != nullptr) {
607	/ Create a token /
608	*(nextToken++) = `0`;
609	*saveState = nextToken;
610	return tokSource;
611	}
612	else if (*saveState) {
613	/ Return the last token /
614	saveState = nullptr*;
615	return tokSource;
616	}
617	}
618	else {
619	/ No tokens were found. Only delimiters were left. /
620	saveState = nullptr*;
621	}
622	return nullptr;
623	}
624
625	/ Miscellaneous functions -------------------------------------------------- /
626
627	U_CAPI char16_t* U_EXPORT2
628	u_strcat(char16_t *dst,
629	const char16_t *src)
630	{
631	char16_t anchor = dst; /* save a pointer to start of dst /
632
633	while(dst != `0`) { /* To end of first string /
634	++dst;
635	}
636	while(((dst++) = (src++)) != `0`) { / copy string 2 over /
637	}
638
639	return anchor;
640	}
641
642	U_CAPI char16_t* U_EXPORT2
643	u_strncat(char16_t *dst,
644	const char16_t *src,
645	int32_t n )
646	{
647	if(n > `0`) {
648	char16_t anchor = dst; /* save a pointer to start of dst /
649
650	while(dst != `0`) { /* To end of first string /
651	++dst;
652	}
653	while((dst = src) != `0`) { / copy string 2 over /
654	++dst;
655	if(--n == `0`) {
656	*dst = `0`;
657	break;
658	}
659	++src;
660	}
661
662	return anchor;
663	} else {
664	return dst;
665	}
666	}
667
668	/ ----- Text property functions --- /
669
670	U_CAPI int32_t U_EXPORT2
671	u_strcmp(const char16_t *s1,
672	const char16_t *s2)
673	{
674	char16_t c1, c2;
675
676	for(;;) {
677	c1=*s1++;
678	c2=*s2++;
679	if (c1 != c2 \|\| c1 == `0`) {
680	break;
681	}
682	}
683	return (int32_t)c1 - (int32_t)c2;
684	}
685
686	U_CFUNC int32_t U_EXPORT2
687	uprv_strCompare(const char16_t *s1, int32_t length1,
688	const char16_t *s2, int32_t length2,
689	UBool strncmpStyle, UBool codePointOrder) {
690	const char16_t start1, start2, limit1, limit2;
691	char16_t c1, c2;
692
693	/ setup for fix-up /
694	start1=s1;
695	start2=s2;
696
697	/ compare identical prefixes - they do not need to be fixed up /
698	if(length1<`0` && length2<`0`) {
699	/ strcmp style, both NUL-terminated /
700	if(s1==s2) {
701	return `0`;
702	}
703
704	for(;;) {
705	c1=*s1;
706	c2=*s2;
707	if(c1!=c2) {
708	break;
709	}
710	if(c1==`0`) {
711	return `0`;
712	}
713	++s1;
714	++s2;
715	}
716
717	/ setup for fix-up /
718	limit1=limit2=nullptr;
719	} else if(strncmpStyle) {
720	/ special handling for strncmp, assume length1==length2>=0 but also check for NUL /
721	if(s1==s2) {
722	return `0`;
723	}
724
725	limit1=start1+length1;
726
727	for(;;) {
728	/ both lengths are same, check only one limit /
729	if(s1==limit1) {
730	return `0`;
731	}
732
733	c1=*s1;
734	c2=*s2;
735	if(c1!=c2) {
736	break;
737	}
738	if(c1==`0`) {
739	return `0`;
740	}
741	++s1;
742	++s2;
743	}
744
745	/ setup for fix-up /
746	limit2=start2+length1; / use length1 here, too, to enforce assumption /
747	} else {
748	/ memcmp/UnicodeString style, both length-specified /
749	int32_t lengthResult;
750
751	if(length1<`0`) {
752	length1=u_strlen(s1);
753	}
754	if(length2<`0`) {
755	length2=u_strlen(s2);
756	}
757
758	/ limit1=start1+min(length1, length2) /
759	if(length1<length2) {
760	lengthResult=-`1`;
761	limit1=start1+length1;
762	} else if(length1==length2) {
763	lengthResult=`0`;
764	limit1=start1+length1;
765	} else / length1>length2 / {
766	lengthResult=`1`;
767	limit1=start1+length2;
768	}
769
770	if(s1==s2) {
771	return lengthResult;
772	}
773
774	for(;;) {
775	/ check pseudo-limit /
776	if(s1==limit1) {
777	return lengthResult;
778	}
779
780	c1=*s1;
781	c2=*s2;
782	if(c1!=c2) {
783	break;
784	}
785	++s1;
786	++s2;
787	}
788
789	/ setup for fix-up /
790	limit1=start1+length1;
791	limit2=start2+length2;
792	}
793
794	/ if both values are in or above the surrogate range, fix them up /
795	if(c1>=`0xd800` && c2>=`0xd800` && codePointOrder) {
796	/ subtract 0x2800 from BMP code points to make them smaller than supplementary ones /
797	if(
798	(c1<=`0xdbff` && (s1+`1`)!=limit1 && U16_IS_TRAIL(*(s1+`1`))) \|\|
799	(U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-`1`)))
800	) {
801	/ part of a surrogate pair, leave >=d800 /
802	} else {
803	/ BMP code point - may be surrogate code point - make <d800 /
804	c1-=`0x2800`;
805	}
806
807	if(
808	(c2<=`0xdbff` && (s2+`1`)!=limit2 && U16_IS_TRAIL(*(s2+`1`))) \|\|
809	(U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-`1`)))
810	) {
811	/ part of a surrogate pair, leave >=d800 /
812	} else {
813	/ BMP code point - may be surrogate code point - make <d800 /
814	c2-=`0x2800`;
815	}
816	}
817
818	/ now c1 and c2 are in the requested (code unit or code point) order /
819	return (int32_t)c1-(int32_t)c2;
820	}
821
822	/*
823	* Compare two strings as presented by UCharIterators.
824	* Use code unit or code point order.
825	* When the function returns, it is undefined where the iterators
826	* have stopped.
827	*/
828	U_CAPI int32_t U_EXPORT2
829	u_strCompareIter(UCharIterator iter1, UCharIterator iter2, UBool codePointOrder) {
830	UChar32 c1, c2;
831
832	/ argument checking /
833	if(iter1==nullptr \|\| iter2==nullptr) {
834	return `0`; / bad arguments /
835	}
836	if(iter1==iter2) {
837	return `0`; / identical iterators /
838	}
839
840	/ reset iterators to start? /
841	iter1->move(iter1, `0`, UITER_START);
842	iter2->move(iter2, `0`, UITER_START);
843
844	/ compare identical prefixes - they do not need to be fixed up /
845	for(;;) {
846	c1=iter1->next(iter1);
847	c2=iter2->next(iter2);
848	if(c1!=c2) {
849	break;
850	}
851	if(c1==-`1`) {
852	return `0`;
853	}
854	}
855
856	/ if both values are in or above the surrogate range, fix them up /
857	if(c1>=`0xd800` && c2>=`0xd800` && codePointOrder) {
858	/ subtract 0x2800 from BMP code points to make them smaller than supplementary ones /
859	if(
860	(c1<=`0xdbff` && U16_IS_TRAIL(iter1->current(iter1))) \|\|
861	(U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1))))
862	) {
863	/ part of a surrogate pair, leave >=d800 /
864	} else {
865	/ BMP code point - may be surrogate code point - make <d800 /
866	c1-=`0x2800`;
867	}
868
869	if(
870	(c2<=`0xdbff` && U16_IS_TRAIL(iter2->current(iter2))) \|\|
871	(U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2))))
872	) {
873	/ part of a surrogate pair, leave >=d800 /
874	} else {
875	/ BMP code point - may be surrogate code point - make <d800 /
876	c2-=`0x2800`;
877	}
878	}
879
880	/ now c1 and c2 are in the requested (code unit or code point) order /
881	return (int32_t)c1-(int32_t)c2;
882	}
883
884	#if 0
885	/*
886	* u_strCompareIter() does not leave the iterators _on_ the different units.
887	* This is possible but would cost a few extra indirect function calls to back
888	* up if the last unit (c1 or c2 respectively) was >=0.
889	*
890	* Consistently leaving them _behind_ the different units is not an option
891	* because the current "unit" is the end of the string if that is reached,
892	* and in such a case the iterator does not move.
893	* For example, when comparing "ab" with "abc", both iterators rest _on_ the end
894	* of their strings. Calling previous() on each does not move them to where
895	* the comparison fails.
896	*
897	* So the simplest semantics is to not define where the iterators end up.
898	*
899	* The following fragment is part of what would need to be done for backing up.
900	*/
901	void fragment {
902	/ iff a surrogate is part of a surrogate pair, leave >=d800 /
903	if(c1<=`0xdbff`) {
904	if(!U16_IS_TRAIL(iter1->current(iter1))) {
905	/ lead surrogate code point - make <d800 /
906	c1-=`0x2800`;
907	}
908	} else if(c1<=`0xdfff`) {
909	int32_t idx=iter1->getIndex(iter1, UITER_CURRENT);
910	iter1->previous(iter1); / ==c1 /
911	if(!U16_IS_LEAD(iter1->previous(iter1))) {
912	/ trail surrogate code point - make <d800 /
913	c1-=`0x2800`;
914	}
915	/ go back to behind where the difference is /
916	iter1->move(iter1, idx, UITER_ZERO);
917	} else / 0xe000<=c1<=0xffff / {
918	/ BMP code point - make <d800 /
919	c1-=`0x2800`;
920	}
921	}
922	#endif
923
924	U_CAPI int32_t U_EXPORT2
925	u_strCompare(const char16_t *s1, int32_t length1,
926	const char16_t *s2, int32_t length2,
927	UBool codePointOrder) {
928	/ argument checking /
929	if(s1==nullptr \|\| length1<-`1` \|\| s2==nullptr \|\| length2<-`1`) {
930	return `0`;
931	}
932	return uprv_strCompare(s1, length1, s2, length2, false, codePointOrder);
933	}
934
935	/ String compare in code point order - u_strcmp() compares in code unit order. /
936	U_CAPI int32_t U_EXPORT2
937	u_strcmpCodePointOrder(const char16_t s1, const* char16_t *s2) {
938	return uprv_strCompare(s1, -`1`, s2, -`1`, false, true);
939	}
940
941	U_CAPI int32_t U_EXPORT2
942	u_strncmp(const char16_t *s1,
943	const char16_t *s2,
944	int32_t n)
945	{
946	if(n > `0`) {
947	int32_t rc;
948	for(;;) {
949	rc = (int32_t)s1 - (int32_t)s2;
950	if(rc != `0` \|\| *s1 == `0` \|\| --n == `0`) {
951	return rc;
952	}
953	++s1;
954	++s2;
955	}
956	} else {
957	return `0`;
958	}
959	}
960
961	U_CAPI int32_t U_EXPORT2
962	u_strncmpCodePointOrder(const char16_t s1, const* char16_t *s2, int32_t n) {
963	return uprv_strCompare(s1, n, s2, n, true, true);
964	}
965
966	U_CAPI char16_t* U_EXPORT2
967	u_strcpy(char16_t *dst,
968	const char16_t *src)
969	{
970	char16_t anchor = dst; /* save a pointer to start of dst /
971
972	while(((dst++) = (src++)) != `0`) { / copy string 2 over /
973	}
974
975	return anchor;
976	}
977
978	U_CAPI char16_t* U_EXPORT2
979	u_strncpy(char16_t *dst,
980	const char16_t *src,
981	int32_t n)
982	{
983	char16_t anchor = dst; /* save a pointer to start of dst /
984
985	/ copy string 2 over /
986	while(n > `0` && ((dst++) = (src++)) != `0`) {
987	--n;
988	}
989
990	return anchor;
991	}
992
993	U_CAPI int32_t U_EXPORT2
994	u_strlen(const char16_t *s)
995	{
996	#if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
997	return (int32_t)uprv_wcslen((const wchar_t *)s);
998	#else
999	const char16_t *t = s;
1000	while(*t != `0`) {
1001	++t;
1002	}
1003	return t - s;
1004	#endif
1005	}
1006
1007	U_CAPI int32_t U_EXPORT2
1008	u_countChar32(const char16_t *s, int32_t length) {
1009	int32_t count;
1010
1011	if(s==nullptr \|\| length<-`1`) {
1012	return `0`;
1013	}
1014
1015	count=`0`;
1016	if(length>=`0`) {
1017	while(length>`0`) {
1018	++count;
1019	if(U16_IS_LEAD(s) && length>=`2` && U16_IS_TRAIL((s+`1`))) {
1020	s+=`2`;
1021	length-=`2`;
1022	} else {
1023	++s;
1024	--length;
1025	}
1026	}
1027	} else / length==-1 / {
1028	char16_t c;
1029
1030	for(;;) {
1031	if((c=*s++)==`0`) {
1032	break;
1033	}
1034	++count;
1035
1036	/*
1037	* sufficient to look ahead one because of UTF-16;
1038	* safe to look ahead one because at worst that would be the terminating NUL
1039	*/
1040	if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
1041	++s;
1042	}
1043	}
1044	}
1045	return count;
1046	}
1047
1048	U_CAPI UBool U_EXPORT2
1049	u_strHasMoreChar32Than(const char16_t *s, int32_t length, int32_t number) {
1050
1051	if(number<`0`) {
1052	return true;
1053	}
1054	if(s==nullptr \|\| length<-`1`) {
1055	return false;
1056	}
1057
1058	if(length==-`1`) {
1059	/ s is NUL-terminated /
1060	char16_t c;
1061
1062	/ count code points until they exceed /
1063	for(;;) {
1064	if((c=*s++)==`0`) {
1065	return false;
1066	}
1067	if(number==`0`) {
1068	return true;
1069	}
1070	if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
1071	++s;
1072	}
1073	--number;
1074	}
1075	} else {
1076	/ length>=0 known /
1077	const char16_t *limit;
1078	int32_t maxSupplementary;
1079
1080	/ s contains at least (length+1)/2 code points: <=2 UChars per cp /
1081	if(((length+`1`)/`2`)>number) {
1082	return true;
1083	}
1084
1085	/ check if s does not even contain enough UChars /
1086	maxSupplementary=length-number;
1087	if(maxSupplementary<=`0`) {
1088	return false;
1089	}
1090	/ there are maxSupplementary=length-number more UChars than asked-for code points /
1091
1092	/*
1093	* count code points until they exceed and also check that there are
1094	* no more than maxSupplementary supplementary code points (char16_t pairs)
1095	*/
1096	limit=s+length;
1097	for(;;) {
1098	if(s==limit) {
1099	return false;
1100	}
1101	if(number==`0`) {
1102	return true;
1103	}
1104	if(U16_IS_LEAD(s++) && s!=limit && U16_IS_TRAIL(s)) {
1105	++s;
1106	if(--maxSupplementary<=`0`) {
1107	/ too many pairs - too few code points /
1108	return false;
1109	}
1110	}
1111	--number;
1112	}
1113	}
1114	}
1115
1116	U_CAPI char16_t * U_EXPORT2
1117	u_memcpy(char16_t dest, const* char16_t *src, int32_t count) {
1118	if(count > `0`) {
1119	uprv_memcpy(dest, src, (size_t)count*U_SIZEOF_UCHAR);
1120	}
1121	return dest;
1122	}
1123
1124	U_CAPI char16_t * U_EXPORT2
1125	u_memmove(char16_t dest, const* char16_t *src, int32_t count) {
1126	if(count > `0`) {
1127	uprv_memmove(dest, src, (size_t)count*U_SIZEOF_UCHAR);
1128	}
1129	return dest;
1130	}
1131
1132	U_CAPI char16_t * U_EXPORT2
1133	u_memset(char16_t dest, char16_t* c, int32_t count) {
1134	if(count > `0`) {
1135	char16_t *ptr = dest;
1136	char16_t *limit = dest + count;
1137
1138	while (ptr < limit) {
1139	*(ptr++) = c;
1140	}
1141	}
1142	return dest;
1143	}
1144
1145	U_CAPI int32_t U_EXPORT2
1146	u_memcmp(const char16_t buf1, const* char16_t *buf2, int32_t count) {
1147	if(count > `0`) {
1148	const char16_t *limit = buf1 + count;
1149	int32_t result;
1150
1151	while (buf1 < limit) {
1152	result = (int32_t)(uint16_t)buf1 - (int32_t)(uint16_t)buf2;
1153	if (result != `0`) {
1154	return result;
1155	}
1156	buf1++;
1157	buf2++;
1158	}
1159	}
1160	return `0`;
1161	}
1162
1163	U_CAPI int32_t U_EXPORT2
1164	u_memcmpCodePointOrder(const char16_t s1, const* char16_t *s2, int32_t count) {
1165	return uprv_strCompare(s1, count, s2, count, false, true);
1166	}
1167
1168	/ u_unescape & support fns ------------------------------------------------- /
1169
1170	/ This map must be in ASCENDING ORDER OF THE ESCAPE CODE /
1171	static const char16_t UNESCAPE_MAP[] = {
1172	/" 0x22, 0x22 /
1173	/' 0x27, 0x27 /
1174	/? 0x3F, 0x3F /
1175	/\ 0x5C, 0x5C /
1176	/a/ `0x61`, `0x07`,
1177	/b/ `0x62`, `0x08`,
1178	/e/ `0x65`, `0x1b`,
1179	/f/ `0x66`, `0x0c`,
1180	/n/ `0x6E`, `0x0a`,
1181	/r/ `0x72`, `0x0d`,
1182	/t/ `0x74`, `0x09`,
1183	/v/ `0x76`, `0x0b`
1184	};
1185	enum { UNESCAPE_MAP_LENGTH = UPRV_LENGTHOF(UNESCAPE_MAP) };
1186
1187	/ Convert one octal digit to a numeric value 0..7, or -1 on failure /
1188	static int32_t _digit8(char16_t c) {
1189	if (c >= u`'0'` && c <= u`'7'`) {
1190	return c - u`'0'`;
1191	}
1192	return -`1`;
1193	}
1194
1195	/ Convert one hex digit to a numeric value 0..F, or -1 on failure /
1196	static int32_t _digit16(char16_t c) {
1197	if (c >= u`'0'` && c <= u`'9'`) {
1198	return c - u`'0'`;
1199	}
1200	if (c >= u`'A'` && c <= u`'F'`) {
1201	return c - (u`'A'` - `10`);
1202	}
1203	if (c >= u`'a'` && c <= u`'f'`) {
1204	return c - (u`'a'` - `10`);
1205	}
1206	return -`1`;
1207	}
1208
1209	/ Parse a single escape sequence. Although this method deals in*
1210	* UChars, it does not use C++ or UnicodeString. This allows it to
1211	* be used from C contexts. */
1212	U_CAPI UChar32 U_EXPORT2
1213	u_unescapeAt(UNESCAPE_CHAR_AT charAt,
1214	int32_t *offset,
1215	int32_t length,
1216	void *context) {
1217
1218	int32_t start = *offset;
1219	UChar32 c;
1220	UChar32 result = `0`;
1221	int8_t n = `0`;
1222	int8_t minDig = `0`;
1223	int8_t maxDig = `0`;
1224	int8_t bitsPerDigit = `4`;
1225	int32_t dig;
1226	UBool braces = false;
1227
1228	/ Check that offset is in range /
1229	if (offset < `0` \|\| offset >= length) {
1230	goto err;
1231	}
1232
1233	/ Fetch first char16_t after '\\' /
1234	c = charAt((*offset)++, context);
1235
1236	/ Convert hexadecimal and octal escapes /
1237	switch (c) {
1238	case u`'u'`:
1239	minDig = maxDig = `4`;
1240	break;
1241	case u`'U'`:
1242	minDig = maxDig = `8`;
1243	break;
1244	case u`'x'`:
1245	minDig = `1`;
1246	if (offset < length && charAt(offset, context) == u`'{'`) {
1247	++(*offset);
1248	braces = true;
1249	maxDig = `8`;
1250	} else {
1251	maxDig = `2`;
1252	}
1253	break;
1254	default:
1255	dig = _digit8(c);
1256	if (dig >= `0`) {
1257	minDig = `1`;
1258	maxDig = `3`;
1259	n = `1`; / Already have first octal digit /
1260	bitsPerDigit = `3`;
1261	result = dig;
1262	}
1263	break;
1264	}
1265	if (minDig != `0`) {
1266	while (*offset < length && n < maxDig) {
1267	c = charAt(*offset, context);
1268	dig = (bitsPerDigit == `3`) ? _digit8(c) : _digit16(c);
1269	if (dig < `0`) {
1270	break;
1271	}
1272	result = (result << bitsPerDigit) \| dig;
1273	++(*offset);
1274	++n;
1275	}
1276	if (n < minDig) {
1277	goto err;
1278	}
1279	if (braces) {
1280	if (c != u`'}'`) {
1281	goto err;
1282	}
1283	++(*offset);
1284	}
1285	if (result < `0` \|\| result >= `0x110000`) {
1286	goto err;
1287	}
1288	/ If an escape sequence specifies a lead surrogate, see if*
1289	* there is a trail surrogate after it, either as an escape or
1290	* as a literal. If so, join them up into a supplementary.
1291	*/
1292	if (*offset < length && U16_IS_LEAD(result)) {
1293	int32_t ahead = *offset + `1`;
1294	c = charAt(*offset, context);
1295	if (c == u`'\\'` && ahead < length) {
1296	// Calling ourselves recursively may cause a stack overflow if
1297	// we have repeated escaped lead surrogates.
1298	// Limit the length to 11 ("x{0000DFFF}") after ahead.
1299	int32_t tailLimit = ahead + `11`;
1300	if (tailLimit > length) {
1301	tailLimit = length;
1302	}
1303	c = u_unescapeAt(charAt, &ahead, tailLimit, context);
1304	}
1305	if (U16_IS_TRAIL(c)) {
1306	*offset = ahead;
1307	result = U16_GET_SUPPLEMENTARY(result, c);
1308	}
1309	}
1310	return result;
1311	}
1312
1313	/ Convert C-style escapes in table /
1314	for (int32_t i=`0`; i<UNESCAPE_MAP_LENGTH; i+=`2`) {
1315	if (c == UNESCAPE_MAP[i]) {
1316	return UNESCAPE_MAP[i+`1`];
1317	} else if (c < UNESCAPE_MAP[i]) {
1318	break;
1319	}
1320	}
1321
1322	/ Map \cX to control-X: X & 0x1F /
1323	if (c == u`'c'` && *offset < length) {
1324	c = charAt((*offset)++, context);
1325	if (U16_IS_LEAD(c) && *offset < length) {
1326	char16_t c2 = charAt(*offset, context);
1327	if (U16_IS_TRAIL(c2)) {
1328	++(*offset);
1329	c = U16_GET_SUPPLEMENTARY(c, c2);
1330	}
1331	}
1332	return `0x1F` & c;
1333	}
1334
1335	/ If no special forms are recognized, then consider*
1336	* the backslash to generically escape the next character.
1337	* Deal with surrogate pairs. */
1338	if (U16_IS_LEAD(c) && *offset < length) {
1339	char16_t c2 = charAt(*offset, context);
1340	if (U16_IS_TRAIL(c2)) {
1341	++(*offset);
1342	return U16_GET_SUPPLEMENTARY(c, c2);
1343	}
1344	}
1345	return c;
1346
1347	err:
1348	/ Invalid escape sequence /
1349	offset = start; /* Reset to initial value /
1350	return (UChar32)`0xFFFFFFFF`;
1351	}
1352
1353	/ u_unescapeAt() callback to return a char16_t from a char* /
1354	static char16_t U_CALLCONV
1355	_charPtr_charAt(int32_t offset, void *context) {
1356	char16_t c16;
1357	/ It would be more efficient to access the invariant tables*
1358	* directly but there is no API for that. */
1359	u_charsToUChars(((char*) context) + offset, &c16, `1`);
1360	return c16;
1361	}
1362
1363	/ Append an escape-free segment of the text; used by u_unescape() /
1364	static void _appendUChars(char16_t *dest, int32_t destCapacity,
1365	const char *src, int32_t srcLen) {
1366	if (destCapacity < `0`) {
1367	destCapacity = `0`;
1368	}
1369	if (srcLen > destCapacity) {
1370	srcLen = destCapacity;
1371	}
1372	u_charsToUChars(src, dest, srcLen);
1373	}
1374
1375	/ Do an invariant conversion of char* -> char16_t, with escape parsing /*
1376	U_CAPI int32_t U_EXPORT2
1377	u_unescape(const char src, char16_t* *dest, int32_t destCapacity) {
1378	const char *segment = src;
1379	int32_t i = `0`;
1380	char c;
1381
1382	while ((c=*src) != `0`) {
1383	/ '\\' intentionally written as compiler-specific*
1384	* character constant to correspond to compiler-specific
1385	* char* constants. */
1386	if (c == `'\\'`) {
1387	int32_t lenParsed = `0`;
1388	UChar32 c32;
1389	if (src != segment) {
1390	if (dest != nullptr) {
1391	_appendUChars(dest + i, destCapacity - i,
1392	segment, (int32_t)(src - segment));
1393	}
1394	i += (int32_t)(src - segment);
1395	}
1396	++src; / advance past '\\' /
1397	c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src);
1398	if (lenParsed == `0`) {
1399	goto err;
1400	}
1401	src += lenParsed; / advance past escape seq. /
1402	if (dest != nullptr && U16_LENGTH(c32) <= (destCapacity - i)) {
1403	U16_APPEND_UNSAFE(dest, i, c32);
1404	} else {
1405	i += U16_LENGTH(c32);
1406	}
1407	segment = src;
1408	} else {
1409	++src;
1410	}
1411	}
1412	if (src != segment) {
1413	if (dest != nullptr) {
1414	_appendUChars(dest + i, destCapacity - i,
1415	segment, (int32_t)(src - segment));
1416	}
1417	i += (int32_t)(src - segment);
1418	}
1419	if (dest != nullptr && i < destCapacity) {
1420	dest[i] = `0`;
1421	}
1422	return i;
1423
1424	err:
1425	if (dest != nullptr && destCapacity > `0`) {
1426	*dest = `0`;
1427	}
1428	return `0`;
1429	}
1430
1431	/ NUL-termination of strings ----------------------------------------------- /
1432
1433	/**
1434	* NUL-terminate a string no matter what its type.
1435	* Set warning and error codes accordingly.
1436	*/
1437	#define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode) UPRV_BLOCK_MACRO_BEGIN { \
1438	if(pErrorCode!=nullptr && U_SUCCESS(*pErrorCode)) { \
1439	/* not a public function, so no complete argument checking */ \
1440	\
1441	if(length<0) { \
1442	/* assume that the caller handles this */ \
1443	} else if(length<destCapacity) { \
1444	/* NUL-terminate the string, the NUL fits */ \
1445	dest[length]=0; \
1446	/* unset the not-terminated warning but leave all others */ \
1447	if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { \
1448	*pErrorCode=U_ZERO_ERROR; \
1449	} \
1450	} else if(length==destCapacity) { \
1451	/* unable to NUL-terminate, but the string itself fit - set a warning code */ \
1452	*pErrorCode=U_STRING_NOT_TERMINATED_WARNING; \
1453	} else /* length>destCapacity */ { \
1454	/* even the string itself did not fit - set an error code */ \
1455	*pErrorCode=U_BUFFER_OVERFLOW_ERROR; \
1456	} \
1457	} \
1458	} UPRV_BLOCK_MACRO_END
1459
1460	U_CAPI char16_t U_EXPORT2
1461	u_asciiToUpper(char16_t c) {
1462	if (u`'a'` <= c && c <= u`'z'`) {
1463	c = c + u`'A'` - u`'a'`;
1464	}
1465	return c;
1466	}
1467
1468	U_CAPI int32_t U_EXPORT2
1469	u_terminateUChars(char16_t dest, int32_t destCapacity, int32_t length, UErrorCode pErrorCode) {
1470	__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1471	return length;
1472	}
1473
1474	U_CAPI int32_t U_EXPORT2
1475	u_terminateChars(char dest, int32_t destCapacity, int32_t length, UErrorCode pErrorCode) {
1476	__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1477	return length;
1478	}
1479
1480	U_CAPI int32_t U_EXPORT2
1481	u_terminateUChar32s(UChar32 dest, int32_t destCapacity, int32_t length, UErrorCode pErrorCode) {
1482	__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1483	return length;
1484	}
1485
1486	U_CAPI int32_t U_EXPORT2
1487	u_terminateWChars(wchar_t dest, int32_t destCapacity, int32_t length, UErrorCode pErrorCode) {
1488	__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1489	return length;
1490	}
1491
1492	// Compute the hash code for a string -------------------------------------- ***
1493
1494	// Moved here from uhash.c so that UnicodeString::hashCode() does not depend
1495	// on UHashtable code.
1496
1497	/*
1498	Compute the hash by iterating sparsely over about 32 (up to 63)
1499	characters spaced evenly through the string. For each character,
1500	multiply the previous hash value by a prime number and add the new
1501	character in, like a linear congruential random number generator,
1502	producing a pseudorandom deterministic value well distributed over
1503	the output range. [LIU]
1504	*/
1505
1506	#define STRING_HASH(TYPE, STR, STRLEN, DEREF) UPRV_BLOCK_MACRO_BEGIN { \
1507	uint32_t hash = 0; \
1508	const TYPE p = (const TYPE) STR; \
1509	if (p != nullptr) { \
1510	int32_t len = (int32_t)(STRLEN); \
1511	int32_t inc = ((len - 32) / 32) + 1; \
1512	const TYPE *limit = p + len; \
1513	while (p<limit) { \
1514	hash = (hash * 37) + DEREF; \
1515	p += inc; \
1516	} \
1517	} \
1518	return static_cast<int32_t>(hash); \
1519	} UPRV_BLOCK_MACRO_END
1520
1521	/ Used by UnicodeString to compute its hashcode - Not public API. /
1522	U_CAPI int32_t U_EXPORT2
1523	ustr_hashUCharsN(const char16_t *str, int32_t length) {
1524	STRING_HASH(char16_t, str, length, *p);
1525	}
1526
1527	U_CAPI int32_t U_EXPORT2
1528	ustr_hashCharsN(const char *str, int32_t length) {
1529	STRING_HASH(uint8_t, str, length, *p);
1530	}
1531
1532	U_CAPI int32_t U_EXPORT2
1533	ustr_hashICharsN(const char *str, int32_t length) {
1534	STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p));
1535	}
1536

Browse the source code of Godot/thirdparty/icu4c/common/ustring.cpp