ustring.cpp source code [ClickHouse/contrib/icu/icu4c/source/common/ustring.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	******************************************************************************
5	*
6	* Copyright (C) 1998-2016, International Business Machines
7	* Corporation and others. All Rights Reserved.
8	*
9	******************************************************************************
10	*
11	* File ustring.cpp
12	*
13	* Modification History:
14	*
15	* Date Name Description
16	* 12/07/98 bertrand Creation.
17	******************************************************************************
18	*/
19
20	#include "unicode/utypes.h"
21	#include "unicode/putil.h"
22	#include "unicode/uchar.h"
23	#include "unicode/ustring.h"
24	#include "unicode/utf16.h"
25	#include "cstring.h"
26	#include "cwchar.h"
27	#include "cmemory.h"
28	#include "ustr_imp.h"
29
30	/ ANSI string.h - style functions ------------------------------------------ /
31
32	/ U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar /
33	#define U_BMP_MAX 0xffff
34
35	/ Forward binary string search functions ----------------------------------- /
36
37	/*
38	* Test if a substring match inside a string is at code point boundaries.
39	* All pointers refer to the same buffer.
40	* The limit pointer may be NULL, all others must be real pointers.
41	*/
42	static inline UBool
43	isMatchAtCPBoundary(const UChar start, const* UChar match, const* UChar matchLimit, const* UChar *limit) {
44	if(U16_IS_TRAIL(match) && start!=match && U16_IS_LEAD((match-`1`))) {
45	/ the leading edge of the match is in the middle of a surrogate pair /
46	return FALSE;
47	}
48	if(U16_IS_LEAD((matchLimit-`1`)) && match!=limit && U16_IS_TRAIL(matchLimit)) {
49	/ the trailing edge of the match is in the middle of a surrogate pair /
50	return FALSE;
51	}
52	return TRUE;
53	}
54
55	U_CAPI UChar * U_EXPORT2
56	u_strFindFirst(const UChar *s, int32_t length,
57	const UChar *sub, int32_t subLength) {
58	const UChar start, p, q, subLimit;
59	UChar c, cs, cq;
60
61	if(sub==NULL \|\| subLength<-`1`) {
62	return (UChar *)s;
63	}
64	if(s==NULL \|\| length<-`1`) {
65	return NULL;
66	}
67
68	start=s;
69
70	if(length<`0` && subLength<`0`) {
71	/ both strings are NUL-terminated /
72	if((cs=*sub++)==`0`) {
73	return (UChar *)s;
74	}
75	if(*sub==`0` && !U16_IS_SURROGATE(cs)) {
76	/ the substring consists of a single, non-surrogate BMP code point /
77	return u_strchr(s, cs);
78	}
79
80	while((c=*s++)!=`0`) {
81	if(c==cs) {
82	/ found first substring UChar, compare rest /
83	p=s;
84	q=sub;
85	for(;;) {
86	if((cq=*q)==`0`) {
87	if(isMatchAtCPBoundary(start, s-`1`, p, NULL)) {
88	return (UChar )(s-`1`); /* well-formed match /
89	} else {
90	break; / no match because surrogate pair is split /
91	}
92	}
93	if((c=*p)==`0`) {
94	return NULL; / no match, and none possible after s /
95	}
96	if(c!=cq) {
97	break; / no match /
98	}
99	++p;
100	++q;
101	}
102	}
103	}
104
105	/ not found /
106	return NULL;
107	}
108
109	if(subLength<`0`) {
110	subLength=u_strlen(sub);
111	}
112	if(subLength==`0`) {
113	return (UChar *)s;
114	}
115
116	/ get sub[0] to search for it fast /
117	cs=*sub++;
118	--subLength;
119	subLimit=sub+subLength;
120
121	if(subLength==`0` && !U16_IS_SURROGATE(cs)) {
122	/ the substring consists of a single, non-surrogate BMP code point /
123	return length<`0` ? u_strchr(s, cs) : u_memchr(s, cs, length);
124	}
125
126	if(length<`0`) {
127	/ s is NUL-terminated /
128	while((c=*s++)!=`0`) {
129	if(c==cs) {
130	/ found first substring UChar, compare rest /
131	p=s;
132	q=sub;
133	for(;;) {
134	if(q==subLimit) {
135	if(isMatchAtCPBoundary(start, s-`1`, p, NULL)) {
136	return (UChar )(s-`1`); /* well-formed match /
137	} else {
138	break; / no match because surrogate pair is split /
139	}
140	}
141	if((c=*p)==`0`) {
142	return NULL; / no match, and none possible after s /
143	}
144	if(c!=*q) {
145	break; / no match /
146	}
147	++p;
148	++q;
149	}
150	}
151	}
152	} else {
153	const UChar limit, preLimit;
154
155	/ subLength was decremented above /
156	if(length<=subLength) {
157	return NULL; / s is shorter than sub /
158	}
159
160	limit=s+length;
161
162	/ the substring must start before preLimit /
163	preLimit=limit-subLength;
164
165	while(s!=preLimit) {
166	c=*s++;
167	if(c==cs) {
168	/ found first substring UChar, compare rest /
169	p=s;
170	q=sub;
171	for(;;) {
172	if(q==subLimit) {
173	if(isMatchAtCPBoundary(start, s-`1`, p, limit)) {
174	return (UChar )(s-`1`); /* well-formed match /
175	} else {
176	break; / no match because surrogate pair is split /
177	}
178	}
179	if(p!=q) {
180	break; / no match /
181	}
182	++p;
183	++q;
184	}
185	}
186	}
187	}
188
189	/ not found /
190	return NULL;
191	}
192
193	U_CAPI UChar * U_EXPORT2
194	u_strstr(const UChar s, const* UChar *substring) {
195	return u_strFindFirst(s, -`1`, substring, -`1`);
196	}
197
198	U_CAPI UChar * U_EXPORT2
199	u_strchr(const UChar *s, UChar c) {
200	if(U16_IS_SURROGATE(c)) {
201	/ make sure to not find half of a surrogate pair /
202	return u_strFindFirst(s, -`1`, &c, `1`);
203	} else {
204	UChar cs;
205
206	/ trivial search for a BMP code point /
207	for(;;) {
208	if((cs=*s)==c) {
209	return (UChar *)s;
210	}
211	if(cs==`0`) {
212	return NULL;
213	}
214	++s;
215	}
216	}
217	}
218
219	U_CAPI UChar * U_EXPORT2
220	u_strchr32(const UChar *s, UChar32 c) {
221	if((uint32_t)c<=U_BMP_MAX) {
222	/ find BMP code point /
223	return u_strchr(s, (UChar)c);
224	} else if((uint32_t)c<=UCHAR_MAX_VALUE) {
225	/ find supplementary code point as surrogate pair /
226	UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
227
228	while((cs=*s++)!=`0`) {
229	if(cs==lead && *s==trail) {
230	return (UChar *)(s-`1`);
231	}
232	}
233	return NULL;
234	} else {
235	/ not a Unicode code point, not findable /
236	return NULL;
237	}
238	}
239
240	U_CAPI UChar * U_EXPORT2
241	u_memchr(const UChar *s, UChar c, int32_t count) {
242	if(count<=`0`) {
243	return NULL; / no string /
244	} else if(U16_IS_SURROGATE(c)) {
245	/ make sure to not find half of a surrogate pair /
246	return u_strFindFirst(s, count, &c, `1`);
247	} else {
248	/ trivial search for a BMP code point /
249	const UChar *limit=s+count;
250	do {
251	if(*s==c) {
252	return (UChar *)s;
253	}
254	} while(++s!=limit);
255	return NULL;
256	}
257	}
258
259	U_CAPI UChar * U_EXPORT2
260	u_memchr32(const UChar *s, UChar32 c, int32_t count) {
261	if((uint32_t)c<=U_BMP_MAX) {
262	/ find BMP code point /
263	return u_memchr(s, (UChar)c, count);
264	} else if(count<`2`) {
265	/ too short for a surrogate pair /
266	return NULL;
267	} else if((uint32_t)c<=UCHAR_MAX_VALUE) {
268	/ find supplementary code point as surrogate pair /
269	const UChar limit=s+count-`1`; /* -1 so that we do not need a separate check for the trail unit /
270	UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
271
272	do {
273	if(s==lead && (s+`1`)==trail) {
274	return (UChar *)s;
275	}
276	} while(++s!=limit);
277	return NULL;
278	} else {
279	/ not a Unicode code point, not findable /
280	return NULL;
281	}
282	}
283
284	/ Backward binary string search functions ---------------------------------- /
285
286	U_CAPI UChar * U_EXPORT2
287	u_strFindLast(const UChar *s, int32_t length,
288	const UChar *sub, int32_t subLength) {
289	const UChar start, limit, p, q, *subLimit;
290	UChar c, cs;
291
292	if(sub==NULL \|\| subLength<-`1`) {
293	return (UChar *)s;
294	}
295	if(s==NULL \|\| length<-`1`) {
296	return NULL;
297	}
298
299	/*
300	* This implementation is more lazy than the one for u_strFindFirst():
301	* There is no special search code for NUL-terminated strings.
302	* It does not seem to be worth it for searching substrings to
303	* search forward and find all matches like in u_strrchr() and similar.
304	* Therefore, we simply get both string lengths and search backward.
305	*
306	* markus 2002oct23
307	*/
308
309	if(subLength<`0`) {
310	subLength=u_strlen(sub);
311	}
312	if(subLength==`0`) {
313	return (UChar *)s;
314	}
315
316	/ get sub[subLength-1] to search for it fast /
317	subLimit=sub+subLength;
318	cs=*(--subLimit);
319	--subLength;
320
321	if(subLength==`0` && !U16_IS_SURROGATE(cs)) {
322	/ the substring consists of a single, non-surrogate BMP code point /
323	return length<`0` ? u_strrchr(s, cs) : u_memrchr(s, cs, length);
324	}
325
326	if(length<`0`) {
327	length=u_strlen(s);
328	}
329
330	/ subLength was decremented above /
331	if(length<=subLength) {
332	return NULL; / s is shorter than sub /
333	}
334
335	start=s;
336	limit=s+length;
337
338	/ the substring must start no later than s+subLength /
339	s+=subLength;
340
341	while(s!=limit) {
342	c=*(--limit);
343	if(c==cs) {
344	/ found last substring UChar, compare rest /
345	p=limit;
346	q=subLimit;
347	for(;;) {
348	if(q==sub) {
349	if(isMatchAtCPBoundary(start, p, limit+`1`, start+length)) {
350	return (UChar )p; /* well-formed match /
351	} else {
352	break; / no match because surrogate pair is split /
353	}
354	}
355	if((--p)!=(--q)) {
356	break; / no match /
357	}
358	}
359	}
360	}
361
362	/ not found /
363	return NULL;
364	}
365
366	U_CAPI UChar * U_EXPORT2
367	u_strrstr(const UChar s, const* UChar *substring) {
368	return u_strFindLast(s, -`1`, substring, -`1`);
369	}
370
371	U_CAPI UChar * U_EXPORT2
372	u_strrchr(const UChar *s, UChar c) {
373	if(U16_IS_SURROGATE(c)) {
374	/ make sure to not find half of a surrogate pair /
375	return u_strFindLast(s, -`1`, &c, `1`);
376	} else {
377	const UChar *result=NULL;
378	UChar cs;
379
380	/ trivial search for a BMP code point /
381	for(;;) {
382	if((cs=*s)==c) {
383	result=s;
384	}
385	if(cs==`0`) {
386	return (UChar *)result;
387	}
388	++s;
389	}
390	}
391	}
392
393	U_CAPI UChar * U_EXPORT2
394	u_strrchr32(const UChar *s, UChar32 c) {
395	if((uint32_t)c<=U_BMP_MAX) {
396	/ find BMP code point /
397	return u_strrchr(s, (UChar)c);
398	} else if((uint32_t)c<=UCHAR_MAX_VALUE) {
399	/ find supplementary code point as surrogate pair /
400	const UChar *result=NULL;
401	UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
402
403	while((cs=*s++)!=`0`) {
404	if(cs==lead && *s==trail) {
405	result=s-`1`;
406	}
407	}
408	return (UChar *)result;
409	} else {
410	/ not a Unicode code point, not findable /
411	return NULL;
412	}
413	}
414
415	U_CAPI UChar * U_EXPORT2
416	u_memrchr(const UChar *s, UChar c, int32_t count) {
417	if(count<=`0`) {
418	return NULL; / no string /
419	} else if(U16_IS_SURROGATE(c)) {
420	/ make sure to not find half of a surrogate pair /
421	return u_strFindLast(s, count, &c, `1`);
422	} else {
423	/ trivial search for a BMP code point /
424	const UChar *limit=s+count;
425	do {
426	if(*(--limit)==c) {
427	return (UChar *)limit;
428	}
429	} while(s!=limit);
430	return NULL;
431	}
432	}
433
434	U_CAPI UChar * U_EXPORT2
435	u_memrchr32(const UChar *s, UChar32 c, int32_t count) {
436	if((uint32_t)c<=U_BMP_MAX) {
437	/ find BMP code point /
438	return u_memrchr(s, (UChar)c, count);
439	} else if(count<`2`) {
440	/ too short for a surrogate pair /
441	return NULL;
442	} else if((uint32_t)c<=UCHAR_MAX_VALUE) {
443	/ find supplementary code point as surrogate pair /
444	const UChar *limit=s+count-`1`;
445	UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
446
447	do {
448	if(limit==trail && (limit-`1`)==lead) {
449	return (UChar *)(limit-`1`);
450	}
451	} while(s!=--limit);
452	return NULL;
453	} else {
454	/ not a Unicode code point, not findable /
455	return NULL;
456	}
457	}
458
459	/ Tokenization functions --------------------------------------------------- /
460
461	/*
462	* Match each code point in a string against each code point in the matchSet.
463	* Return the index of the first string code point that
464	* is (polarity==TRUE) or is not (FALSE) contained in the matchSet.
465	* Return -(string length)-1 if there is no such code point.
466	*/
467	static int32_t
468	_matchFromSet(const UChar string, const* UChar *matchSet, UBool polarity) {
469	int32_t matchLen, matchBMPLen, strItr, matchItr;
470	UChar32 stringCh, matchCh;
471	UChar c, c2;
472
473	/ first part of matchSet contains only BMP code points /
474	matchBMPLen = `0`;
475	while((c = matchSet[matchBMPLen]) != `0` && U16_IS_SINGLE(c)) {
476	++matchBMPLen;
477	}
478
479	/ second part of matchSet contains BMP and supplementary code points /
480	matchLen = matchBMPLen;
481	while(matchSet[matchLen] != `0`) {
482	++matchLen;
483	}
484
485	for(strItr = `0`; (c = string[strItr]) != `0`;) {
486	++strItr;
487	if(U16_IS_SINGLE(c)) {
488	if(polarity) {
489	for(matchItr = `0`; matchItr < matchLen; ++matchItr) {
490	if(c == matchSet[matchItr]) {
491	return strItr - `1`; / one matches /
492	}
493	}
494	} else {
495	for(matchItr = `0`; matchItr < matchLen; ++matchItr) {
496	if(c == matchSet[matchItr]) {
497	goto endloop;
498	}
499	}
500	return strItr - `1`; / none matches /
501	}
502	} else {
503	/*
504	* No need to check for string length before U16_IS_TRAIL
505	* because c2 could at worst be the terminating NUL.
506	*/
507	if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) {
508	++strItr;
509	stringCh = U16_GET_SUPPLEMENTARY(c, c2);
510	} else {
511	stringCh = c; / unpaired trail surrogate /
512	}
513
514	if(polarity) {
515	for(matchItr = matchBMPLen; matchItr < matchLen;) {
516	U16_NEXT(matchSet, matchItr, matchLen, matchCh);
517	if(stringCh == matchCh) {
518	return strItr - U16_LENGTH(stringCh); / one matches /
519	}
520	}
521	} else {
522	for(matchItr = matchBMPLen; matchItr < matchLen;) {
523	U16_NEXT(matchSet, matchItr, matchLen, matchCh);
524	if(stringCh == matchCh) {
525	goto endloop;
526	}
527	}
528	return strItr - U16_LENGTH(stringCh); / none matches /
529	}
530	}
531	endloop:
532	/ wish C had continue with labels like Java... /;
533	}
534
535	/ Didn't find it. /
536	return -strItr-`1`;
537	}
538
539	/ Search for a codepoint in a string that matches one of the matchSet codepoints. /
540	U_CAPI UChar * U_EXPORT2
541	u_strpbrk(const UChar string, const* UChar *matchSet)
542	{
543	int32_t idx = _matchFromSet(string, matchSet, TRUE);
544	if(idx >= `0`) {
545	return (UChar *)string + idx;
546	} else {
547	return NULL;
548	}
549	}
550
551	/ Search for a codepoint in a string that matches one of the matchSet codepoints. /
552	U_CAPI int32_t U_EXPORT2
553	u_strcspn(const UChar string, const* UChar *matchSet)
554	{
555	int32_t idx = _matchFromSet(string, matchSet, TRUE);
556	if(idx >= `0`) {
557	return idx;
558	} else {
559	return -idx - `1`; / == u_strlen(string) /
560	}
561	}
562
563	/ Search for a codepoint in a string that does not match one of the matchSet codepoints. /
564	U_CAPI int32_t U_EXPORT2
565	u_strspn(const UChar string, const* UChar *matchSet)
566	{
567	int32_t idx = _matchFromSet(string, matchSet, FALSE);
568	if(idx >= `0`) {
569	return idx;
570	} else {
571	return -idx - `1`; / == u_strlen(string) /
572	}
573	}
574
575	/ ----- Text manipulation functions --- /
576
577	U_CAPI UChar* U_EXPORT2
578	u_strtok_r(UChar *src,
579	const UChar *delim,
580	UChar **saveState)
581	{
582	UChar *tokSource;
583	UChar *nextToken;
584	uint32_t nonDelimIdx;
585
586	/ If saveState is NULL, the user messed up. /
587	if (src != NULL) {
588	tokSource = src;
589	saveState = src; /* Set to "src" in case there are no delimiters /
590	}
591	else if (*saveState) {
592	tokSource = *saveState;
593	}
594	else {
595	/ src == NULL && saveState == NULL /*
596	/ This shouldn't happen. We already finished tokenizing. /
597	return NULL;
598	}
599
600	/ Skip initial delimiters /
601	nonDelimIdx = u_strspn(tokSource, delim);
602	tokSource = &tokSource[nonDelimIdx];
603
604	if (*tokSource) {
605	nextToken = u_strpbrk(tokSource, delim);
606	if (nextToken != NULL) {
607	/ Create a token /
608	*(nextToken++) = `0`;
609	*saveState = nextToken;
610	return tokSource;
611	}
612	else if (*saveState) {
613	/ Return the last token /
614	*saveState = NULL;
615	return tokSource;
616	}
617	}
618	else {
619	/ No tokens were found. Only delimiters were left. /
620	*saveState = NULL;
621	}
622	return NULL;
623	}
624
625	/ Miscellaneous functions -------------------------------------------------- /
626
627	U_CAPI UChar* U_EXPORT2
628	u_strcat(UChar *dst,
629	const UChar *src)
630	{
631	UChar anchor = dst; /* save a pointer to start of dst /
632
633	while(dst != `0`) { /* To end of first string /
634	++dst;
635	}
636	while(((dst++) = (src++)) != `0`) { / copy string 2 over /
637	}
638
639	return anchor;
640	}
641
642	U_CAPI UChar* U_EXPORT2
643	u_strncat(UChar *dst,
644	const UChar *src,
645	int32_t n )
646	{
647	if(n > `0`) {
648	UChar anchor = dst; /* save a pointer to start of dst /
649
650	while(dst != `0`) { /* To end of first string /
651	++dst;
652	}
653	while((dst = src) != `0`) { / copy string 2 over /
654	++dst;
655	if(--n == `0`) {
656	*dst = `0`;
657	break;
658	}
659	++src;
660	}
661
662	return anchor;
663	} else {
664	return dst;
665	}
666	}
667
668	/ ----- Text property functions --- /
669
670	U_CAPI int32_t U_EXPORT2
671	u_strcmp(const UChar *s1,
672	const UChar *s2)
673	{
674	UChar c1, c2;
675
676	for(;;) {
677	c1=*s1++;
678	c2=*s2++;
679	if (c1 != c2 \|\| c1 == `0`) {
680	break;
681	}
682	}
683	return (int32_t)c1 - (int32_t)c2;
684	}
685
686	U_CFUNC int32_t U_EXPORT2
687	uprv_strCompare(const UChar *s1, int32_t length1,
688	const UChar *s2, int32_t length2,
689	UBool strncmpStyle, UBool codePointOrder) {
690	const UChar start1, start2, limit1, limit2;
691	UChar c1, c2;
692
693	/ setup for fix-up /
694	start1=s1;
695	start2=s2;
696
697	/ compare identical prefixes - they do not need to be fixed up /
698	if(length1<`0` && length2<`0`) {
699	/ strcmp style, both NUL-terminated /
700	if(s1==s2) {
701	return `0`;
702	}
703
704	for(;;) {
705	c1=*s1;
706	c2=*s2;
707	if(c1!=c2) {
708	break;
709	}
710	if(c1==`0`) {
711	return `0`;
712	}
713	++s1;
714	++s2;
715	}
716
717	/ setup for fix-up /
718	limit1=limit2=NULL;
719	} else if(strncmpStyle) {
720	/ special handling for strncmp, assume length1==length2>=0 but also check for NUL /
721	if(s1==s2) {
722	return `0`;
723	}
724
725	limit1=start1+length1;
726
727	for(;;) {
728	/ both lengths are same, check only one limit /
729	if(s1==limit1) {
730	return `0`;
731	}
732
733	c1=*s1;
734	c2=*s2;
735	if(c1!=c2) {
736	break;
737	}
738	if(c1==`0`) {
739	return `0`;
740	}
741	++s1;
742	++s2;
743	}
744
745	/ setup for fix-up /
746	limit2=start2+length1; / use length1 here, too, to enforce assumption /
747	} else {
748	/ memcmp/UnicodeString style, both length-specified /
749	int32_t lengthResult;
750
751	if(length1<`0`) {
752	length1=u_strlen(s1);
753	}
754	if(length2<`0`) {
755	length2=u_strlen(s2);
756	}
757
758	/ limit1=start1+min(lenght1, length2) /
759	if(length1<length2) {
760	lengthResult=-`1`;
761	limit1=start1+length1;
762	} else if(length1==length2) {
763	lengthResult=`0`;
764	limit1=start1+length1;
765	} else / length1>length2 / {
766	lengthResult=`1`;
767	limit1=start1+length2;
768	}
769
770	if(s1==s2) {
771	return lengthResult;
772	}
773
774	for(;;) {
775	/ check pseudo-limit /
776	if(s1==limit1) {
777	return lengthResult;
778	}
779
780	c1=*s1;
781	c2=*s2;
782	if(c1!=c2) {
783	break;
784	}
785	++s1;
786	++s2;
787	}
788
789	/ setup for fix-up /
790	limit1=start1+length1;
791	limit2=start2+length2;
792	}
793
794	/ if both values are in or above the surrogate range, fix them up /
795	if(c1>=`0xd800` && c2>=`0xd800` && codePointOrder) {
796	/ subtract 0x2800 from BMP code points to make them smaller than supplementary ones /
797	if(
798	(c1<=`0xdbff` && (s1+`1`)!=limit1 && U16_IS_TRAIL(*(s1+`1`))) \|\|
799	(U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-`1`)))
800	) {
801	/ part of a surrogate pair, leave >=d800 /
802	} else {
803	/ BMP code point - may be surrogate code point - make <d800 /
804	c1-=`0x2800`;
805	}
806
807	if(
808	(c2<=`0xdbff` && (s2+`1`)!=limit2 && U16_IS_TRAIL(*(s2+`1`))) \|\|
809	(U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-`1`)))
810	) {
811	/ part of a surrogate pair, leave >=d800 /
812	} else {
813	/ BMP code point - may be surrogate code point - make <d800 /
814	c2-=`0x2800`;
815	}
816	}
817
818	/ now c1 and c2 are in the requested (code unit or code point) order /
819	return (int32_t)c1-(int32_t)c2;
820	}
821
822	/*
823	* Compare two strings as presented by UCharIterators.
824	* Use code unit or code point order.
825	* When the function returns, it is undefined where the iterators
826	* have stopped.
827	*/
828	U_CAPI int32_t U_EXPORT2
829	u_strCompareIter(UCharIterator iter1, UCharIterator iter2, UBool codePointOrder) {
830	UChar32 c1, c2;
831
832	/ argument checking /
833	if(iter1==NULL \|\| iter2==NULL) {
834	return `0`; / bad arguments /
835	}
836	if(iter1==iter2) {
837	return `0`; / identical iterators /
838	}
839
840	/ reset iterators to start? /
841	iter1->move(iter1, `0`, UITER_START);
842	iter2->move(iter2, `0`, UITER_START);
843
844	/ compare identical prefixes - they do not need to be fixed up /
845	for(;;) {
846	c1=iter1->next(iter1);
847	c2=iter2->next(iter2);
848	if(c1!=c2) {
849	break;
850	}
851	if(c1==-`1`) {
852	return `0`;
853	}
854	}
855
856	/ if both values are in or above the surrogate range, fix them up /
857	if(c1>=`0xd800` && c2>=`0xd800` && codePointOrder) {
858	/ subtract 0x2800 from BMP code points to make them smaller than supplementary ones /
859	if(
860	(c1<=`0xdbff` && U16_IS_TRAIL(iter1->current(iter1))) \|\|
861	(U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1))))
862	) {
863	/ part of a surrogate pair, leave >=d800 /
864	} else {
865	/ BMP code point - may be surrogate code point - make <d800 /
866	c1-=`0x2800`;
867	}
868
869	if(
870	(c2<=`0xdbff` && U16_IS_TRAIL(iter2->current(iter2))) \|\|
871	(U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2))))
872	) {
873	/ part of a surrogate pair, leave >=d800 /
874	} else {
875	/ BMP code point - may be surrogate code point - make <d800 /
876	c2-=`0x2800`;
877	}
878	}
879
880	/ now c1 and c2 are in the requested (code unit or code point) order /
881	return (int32_t)c1-(int32_t)c2;
882	}
883
884	#if 0
885	/*
886	* u_strCompareIter() does not leave the iterators _on_ the different units.
887	* This is possible but would cost a few extra indirect function calls to back
888	* up if the last unit (c1 or c2 respectively) was >=0.
889	*
890	* Consistently leaving them _behind_ the different units is not an option
891	* because the current "unit" is the end of the string if that is reached,
892	* and in such a case the iterator does not move.
893	* For example, when comparing "ab" with "abc", both iterators rest _on_ the end
894	* of their strings. Calling previous() on each does not move them to where
895	* the comparison fails.
896	*
897	* So the simplest semantics is to not define where the iterators end up.
898	*
899	* The following fragment is part of what would need to be done for backing up.
900	*/
901	void fragment {
902	/ iff a surrogate is part of a surrogate pair, leave >=d800 /
903	if(c1<=`0xdbff`) {
904	if(!U16_IS_TRAIL(iter1->current(iter1))) {
905	/ lead surrogate code point - make <d800 /
906	c1-=`0x2800`;
907	}
908	} else if(c1<=`0xdfff`) {
909	int32_t idx=iter1->getIndex(iter1, UITER_CURRENT);
910	iter1->previous(iter1); / ==c1 /
911	if(!U16_IS_LEAD(iter1->previous(iter1))) {
912	/ trail surrogate code point - make <d800 /
913	c1-=`0x2800`;
914	}
915	/ go back to behind where the difference is /
916	iter1->move(iter1, idx, UITER_ZERO);
917	} else / 0xe000<=c1<=0xffff / {
918	/ BMP code point - make <d800 /
919	c1-=`0x2800`;
920	}
921	}
922	#endif
923
924	U_CAPI int32_t U_EXPORT2
925	u_strCompare(const UChar *s1, int32_t length1,
926	const UChar *s2, int32_t length2,
927	UBool codePointOrder) {
928	/ argument checking /
929	if(s1==NULL \|\| length1<-`1` \|\| s2==NULL \|\| length2<-`1`) {
930	return `0`;
931	}
932	return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder);
933	}
934
935	/ String compare in code point order - u_strcmp() compares in code unit order. /
936	U_CAPI int32_t U_EXPORT2
937	u_strcmpCodePointOrder(const UChar s1, const* UChar *s2) {
938	return uprv_strCompare(s1, -`1`, s2, -`1`, FALSE, TRUE);
939	}
940
941	U_CAPI int32_t U_EXPORT2
942	u_strncmp(const UChar *s1,
943	const UChar *s2,
944	int32_t n)
945	{
946	if(n > `0`) {
947	int32_t rc;
948	for(;;) {
949	rc = (int32_t)s1 - (int32_t)s2;
950	if(rc != `0` \|\| *s1 == `0` \|\| --n == `0`) {
951	return rc;
952	}
953	++s1;
954	++s2;
955	}
956	} else {
957	return `0`;
958	}
959	}
960
961	U_CAPI int32_t U_EXPORT2
962	u_strncmpCodePointOrder(const UChar s1, const* UChar *s2, int32_t n) {
963	return uprv_strCompare(s1, n, s2, n, TRUE, TRUE);
964	}
965
966	U_CAPI UChar* U_EXPORT2
967	u_strcpy(UChar *dst,
968	const UChar *src)
969	{
970	UChar anchor = dst; /* save a pointer to start of dst /
971
972	while(((dst++) = (src++)) != `0`) { / copy string 2 over /
973	}
974
975	return anchor;
976	}
977
978	U_CAPI UChar* U_EXPORT2
979	u_strncpy(UChar *dst,
980	const UChar *src,
981	int32_t n)
982	{
983	UChar anchor = dst; /* save a pointer to start of dst /
984
985	/ copy string 2 over /
986	while(n > `0` && ((dst++) = (src++)) != `0`) {
987	--n;
988	}
989
990	return anchor;
991	}
992
993	U_CAPI int32_t U_EXPORT2
994	u_strlen(const UChar *s)
995	{
996	#if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
997	return (int32_t)uprv_wcslen((const wchar_t *)s);
998	#else
999	const UChar *t = s;
1000	while(*t != `0`) {
1001	++t;
1002	}
1003	return t - s;
1004	#endif
1005	}
1006
1007	U_CAPI int32_t U_EXPORT2
1008	u_countChar32(const UChar *s, int32_t length) {
1009	int32_t count;
1010
1011	if(s==NULL \|\| length<-`1`) {
1012	return `0`;
1013	}
1014
1015	count=`0`;
1016	if(length>=`0`) {
1017	while(length>`0`) {
1018	++count;
1019	if(U16_IS_LEAD(s) && length>=`2` && U16_IS_TRAIL((s+`1`))) {
1020	s+=`2`;
1021	length-=`2`;
1022	} else {
1023	++s;
1024	--length;
1025	}
1026	}
1027	} else / length==-1 / {
1028	UChar c;
1029
1030	for(;;) {
1031	if((c=*s++)==`0`) {
1032	break;
1033	}
1034	++count;
1035
1036	/*
1037	* sufficient to look ahead one because of UTF-16;
1038	* safe to look ahead one because at worst that would be the terminating NUL
1039	*/
1040	if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
1041	++s;
1042	}
1043	}
1044	}
1045	return count;
1046	}
1047
1048	U_CAPI UBool U_EXPORT2
1049	u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
1050
1051	if(number<`0`) {
1052	return TRUE;
1053	}
1054	if(s==NULL \|\| length<-`1`) {
1055	return FALSE;
1056	}
1057
1058	if(length==-`1`) {
1059	/ s is NUL-terminated /
1060	UChar c;
1061
1062	/ count code points until they exceed /
1063	for(;;) {
1064	if((c=*s++)==`0`) {
1065	return FALSE;
1066	}
1067	if(number==`0`) {
1068	return TRUE;
1069	}
1070	if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
1071	++s;
1072	}
1073	--number;
1074	}
1075	} else {
1076	/ length>=0 known /
1077	const UChar *limit;
1078	int32_t maxSupplementary;
1079
1080	/ s contains at least (length+1)/2 code points: <=2 UChars per cp /
1081	if(((length+`1`)/`2`)>number) {
1082	return TRUE;
1083	}
1084
1085	/ check if s does not even contain enough UChars /
1086	maxSupplementary=length-number;
1087	if(maxSupplementary<=`0`) {
1088	return FALSE;
1089	}
1090	/ there are maxSupplementary=length-number more UChars than asked-for code points /
1091
1092	/*
1093	* count code points until they exceed and also check that there are
1094	* no more than maxSupplementary supplementary code points (UChar pairs)
1095	*/
1096	limit=s+length;
1097	for(;;) {
1098	if(s==limit) {
1099	return FALSE;
1100	}
1101	if(number==`0`) {
1102	return TRUE;
1103	}
1104	if(U16_IS_LEAD(s++) && s!=limit && U16_IS_TRAIL(s)) {
1105	++s;
1106	if(--maxSupplementary<=`0`) {
1107	/ too many pairs - too few code points /
1108	return FALSE;
1109	}
1110	}
1111	--number;
1112	}
1113	}
1114	}
1115
1116	U_CAPI UChar * U_EXPORT2
1117	u_memcpy(UChar dest, const* UChar *src, int32_t count) {
1118	if(count > `0`) {
1119	uprv_memcpy(dest, src, (size_t)count*U_SIZEOF_UCHAR);
1120	}
1121	return dest;
1122	}
1123
1124	U_CAPI UChar * U_EXPORT2
1125	u_memmove(UChar dest, const* UChar *src, int32_t count) {
1126	if(count > `0`) {
1127	uprv_memmove(dest, src, (size_t)count*U_SIZEOF_UCHAR);
1128	}
1129	return dest;
1130	}
1131
1132	U_CAPI UChar * U_EXPORT2
1133	u_memset(UChar *dest, UChar c, int32_t count) {
1134	if(count > `0`) {
1135	UChar *ptr = dest;
1136	UChar *limit = dest + count;
1137
1138	while (ptr < limit) {
1139	*(ptr++) = c;
1140	}
1141	}
1142	return dest;
1143	}
1144
1145	U_CAPI int32_t U_EXPORT2
1146	u_memcmp(const UChar buf1, const* UChar *buf2, int32_t count) {
1147	if(count > `0`) {
1148	const UChar *limit = buf1 + count;
1149	int32_t result;
1150
1151	while (buf1 < limit) {
1152	result = (int32_t)(uint16_t)buf1 - (int32_t)(uint16_t)buf2;
1153	if (result != `0`) {
1154	return result;
1155	}
1156	buf1++;
1157	buf2++;
1158	}
1159	}
1160	return `0`;
1161	}
1162
1163	U_CAPI int32_t U_EXPORT2
1164	u_memcmpCodePointOrder(const UChar s1, const* UChar *s2, int32_t count) {
1165	return uprv_strCompare(s1, count, s2, count, FALSE, TRUE);
1166	}
1167
1168	/ u_unescape & support fns ------------------------------------------------- /
1169
1170	/ This map must be in ASCENDING ORDER OF THE ESCAPE CODE /
1171	static const UChar UNESCAPE_MAP[] = {
1172	/" 0x22, 0x22 /
1173	/' 0x27, 0x27 /
1174	/? 0x3F, 0x3F /
1175	/\ 0x5C, 0x5C /
1176	/a/ `0x61`, `0x07`,
1177	/b/ `0x62`, `0x08`,
1178	/e/ `0x65`, `0x1b`,
1179	/f/ `0x66`, `0x0c`,
1180	/n/ `0x6E`, `0x0a`,
1181	/r/ `0x72`, `0x0d`,
1182	/t/ `0x74`, `0x09`,
1183	/v/ `0x76`, `0x0b`
1184	};
1185	enum { UNESCAPE_MAP_LENGTH = UPRV_LENGTHOF(UNESCAPE_MAP) };
1186
1187	/ Convert one octal digit to a numeric value 0..7, or -1 on failure /
1188	static int8_t _digit8(UChar c) {
1189	if (c >= `0x0030` && c <= `0x0037`) {
1190	return (int8_t)(c - `0x0030`);
1191	}
1192	return -`1`;
1193	}
1194
1195	/ Convert one hex digit to a numeric value 0..F, or -1 on failure /
1196	static int8_t _digit16(UChar c) {
1197	if (c >= `0x0030` && c <= `0x0039`) {
1198	return (int8_t)(c - `0x0030`);
1199	}
1200	if (c >= `0x0041` && c <= `0x0046`) {
1201	return (int8_t)(c - (`0x0041` - `10`));
1202	}
1203	if (c >= `0x0061` && c <= `0x0066`) {
1204	return (int8_t)(c - (`0x0061` - `10`));
1205	}
1206	return -`1`;
1207	}
1208
1209	/ Parse a single escape sequence. Although this method deals in*
1210	* UChars, it does not use C++ or UnicodeString. This allows it to
1211	* be used from C contexts. */
1212	U_CAPI UChar32 U_EXPORT2
1213	u_unescapeAt(UNESCAPE_CHAR_AT charAt,
1214	int32_t *offset,
1215	int32_t length,
1216	void *context) {
1217
1218	int32_t start = *offset;
1219	UChar c;
1220	UChar32 result = `0`;
1221	int8_t n = `0`;
1222	int8_t minDig = `0`;
1223	int8_t maxDig = `0`;
1224	int8_t bitsPerDigit = `4`;
1225	int8_t dig;
1226	int32_t i;
1227	UBool braces = FALSE;
1228
1229	/ Check that offset is in range /
1230	if (offset < `0` \|\| offset >= length) {
1231	goto err;
1232	}
1233
1234	/ Fetch first UChar after '\\' /
1235	c = charAt((*offset)++, context);
1236
1237	/ Convert hexadecimal and octal escapes /
1238	switch (c) {
1239	case `0x0075` /'u'/:
1240	minDig = maxDig = `4`;
1241	break;
1242	case `0x0055` /'U'/:
1243	minDig = maxDig = `8`;
1244	break;
1245	case `0x0078` /'x'/:
1246	minDig = `1`;
1247	if (offset < length && charAt(offset, context) == `0x7B` /{/) {
1248	++(*offset);
1249	braces = TRUE;
1250	maxDig = `8`;
1251	} else {
1252	maxDig = `2`;
1253	}
1254	break;
1255	default:
1256	dig = _digit8(c);
1257	if (dig >= `0`) {
1258	minDig = `1`;
1259	maxDig = `3`;
1260	n = `1`; / Already have first octal digit /
1261	bitsPerDigit = `3`;
1262	result = dig;
1263	}
1264	break;
1265	}
1266	if (minDig != `0`) {
1267	while (*offset < length && n < maxDig) {
1268	c = charAt(*offset, context);
1269	dig = (int8_t)((bitsPerDigit == `3`) ? _digit8(c) : _digit16(c));
1270	if (dig < `0`) {
1271	break;
1272	}
1273	result = (result << bitsPerDigit) \| dig;
1274	++(*offset);
1275	++n;
1276	}
1277	if (n < minDig) {
1278	goto err;
1279	}
1280	if (braces) {
1281	if (c != `0x7D` /}/) {
1282	goto err;
1283	}
1284	++(*offset);
1285	}
1286	if (result < `0` \|\| result >= `0x110000`) {
1287	goto err;
1288	}
1289	/ If an escape sequence specifies a lead surrogate, see if*
1290	* there is a trail surrogate after it, either as an escape or
1291	* as a literal. If so, join them up into a supplementary.
1292	*/
1293	if (*offset < length && U16_IS_LEAD(result)) {
1294	int32_t ahead = *offset + `1`;
1295	c = charAt(*offset, context);
1296	if (c == `0x5C` /'\\'/ && ahead < length) {
1297	c = (UChar) u_unescapeAt(charAt, &ahead, length, context);
1298	}
1299	if (U16_IS_TRAIL(c)) {
1300	*offset = ahead;
1301	result = U16_GET_SUPPLEMENTARY(result, c);
1302	}
1303	}
1304	return result;
1305	}
1306
1307	/ Convert C-style escapes in table /
1308	for (i=`0`; i<UNESCAPE_MAP_LENGTH; i+=`2`) {
1309	if (c == UNESCAPE_MAP[i]) {
1310	return UNESCAPE_MAP[i+`1`];
1311	} else if (c < UNESCAPE_MAP[i]) {
1312	break;
1313	}
1314	}
1315
1316	/ Map \cX to control-X: X & 0x1F /
1317	if (c == `0x0063` /'c'/ && *offset < length) {
1318	c = charAt((*offset)++, context);
1319	if (U16_IS_LEAD(c) && *offset < length) {
1320	UChar c2 = charAt(*offset, context);
1321	if (U16_IS_TRAIL(c2)) {
1322	++(*offset);
1323	c = (UChar) U16_GET_SUPPLEMENTARY(c, c2); / [sic] /
1324	}
1325	}
1326	return `0x1F` & c;
1327	}
1328
1329	/ If no special forms are recognized, then consider*
1330	* the backslash to generically escape the next character.
1331	* Deal with surrogate pairs. */
1332	if (U16_IS_LEAD(c) && *offset < length) {
1333	UChar c2 = charAt(*offset, context);
1334	if (U16_IS_TRAIL(c2)) {
1335	++(*offset);
1336	return U16_GET_SUPPLEMENTARY(c, c2);
1337	}
1338	}
1339	return c;
1340
1341	err:
1342	/ Invalid escape sequence /
1343	offset = start; /* Reset to initial value /
1344	return (UChar32)`0xFFFFFFFF`;
1345	}
1346
1347	/ u_unescapeAt() callback to return a UChar from a char* /
1348	static UChar U_CALLCONV
1349	_charPtr_charAt(int32_t offset, void *context) {
1350	UChar c16;
1351	/ It would be more efficient to access the invariant tables*
1352	* directly but there is no API for that. */
1353	u_charsToUChars(((char*) context) + offset, &c16, `1`);
1354	return c16;
1355	}
1356
1357	/ Append an escape-free segment of the text; used by u_unescape() /
1358	static void _appendUChars(UChar *dest, int32_t destCapacity,
1359	const char *src, int32_t srcLen) {
1360	if (destCapacity < `0`) {
1361	destCapacity = `0`;
1362	}
1363	if (srcLen > destCapacity) {
1364	srcLen = destCapacity;
1365	}
1366	u_charsToUChars(src, dest, srcLen);
1367	}
1368
1369	/ Do an invariant conversion of char* -> UChar, with escape parsing /*
1370	U_CAPI int32_t U_EXPORT2
1371	u_unescape(const char src, UChar dest, int32_t destCapacity) {
1372	const char *segment = src;
1373	int32_t i = `0`;
1374	char c;
1375
1376	while ((c=*src) != `0`) {
1377	/ '\\' intentionally written as compiler-specific*
1378	* character constant to correspond to compiler-specific
1379	* char* constants. */
1380	if (c == `'\\'`) {
1381	int32_t lenParsed = `0`;
1382	UChar32 c32;
1383	if (src != segment) {
1384	if (dest != NULL) {
1385	_appendUChars(dest + i, destCapacity - i,
1386	segment, (int32_t)(src - segment));
1387	}
1388	i += (int32_t)(src - segment);
1389	}
1390	++src; / advance past '\\' /
1391	c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src);
1392	if (lenParsed == `0`) {
1393	goto err;
1394	}
1395	src += lenParsed; / advance past escape seq. /
1396	if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) {
1397	U16_APPEND_UNSAFE(dest, i, c32);
1398	} else {
1399	i += U16_LENGTH(c32);
1400	}
1401	segment = src;
1402	} else {
1403	++src;
1404	}
1405	}
1406	if (src != segment) {
1407	if (dest != NULL) {
1408	_appendUChars(dest + i, destCapacity - i,
1409	segment, (int32_t)(src - segment));
1410	}
1411	i += (int32_t)(src - segment);
1412	}
1413	if (dest != NULL && i < destCapacity) {
1414	dest[i] = `0`;
1415	}
1416	return i;
1417
1418	err:
1419	if (dest != NULL && destCapacity > `0`) {
1420	*dest = `0`;
1421	}
1422	return `0`;
1423	}
1424
1425	/ NUL-termination of strings ----------------------------------------------- /
1426
1427	/**
1428	* NUL-terminate a string no matter what its type.
1429	* Set warning and error codes accordingly.
1430	*/
1431	#define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode) UPRV_BLOCK_MACRO_BEGIN { \
1432	if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) { \
1433	/* not a public function, so no complete argument checking */ \
1434	\
1435	if(length<0) { \
1436	/* assume that the caller handles this */ \
1437	} else if(length<destCapacity) { \
1438	/* NUL-terminate the string, the NUL fits */ \
1439	dest[length]=0; \
1440	/* unset the not-terminated warning but leave all others */ \
1441	if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { \
1442	*pErrorCode=U_ZERO_ERROR; \
1443	} \
1444	} else if(length==destCapacity) { \
1445	/* unable to NUL-terminate, but the string itself fit - set a warning code */ \
1446	*pErrorCode=U_STRING_NOT_TERMINATED_WARNING; \
1447	} else /* length>destCapacity */ { \
1448	/* even the string itself did not fit - set an error code */ \
1449	*pErrorCode=U_BUFFER_OVERFLOW_ERROR; \
1450	} \
1451	} \
1452	} UPRV_BLOCK_MACRO_END
1453
1454	U_CAPI int32_t U_EXPORT2
1455	u_terminateUChars(UChar dest, int32_t destCapacity, int32_t length, UErrorCode pErrorCode) {
1456	__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1457	return length;
1458	}
1459
1460	U_CAPI int32_t U_EXPORT2
1461	u_terminateChars(char dest, int32_t destCapacity, int32_t length, UErrorCode pErrorCode) {
1462	__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1463	return length;
1464	}
1465
1466	U_CAPI int32_t U_EXPORT2
1467	u_terminateUChar32s(UChar32 dest, int32_t destCapacity, int32_t length, UErrorCode pErrorCode) {
1468	__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1469	return length;
1470	}
1471
1472	U_CAPI int32_t U_EXPORT2
1473	u_terminateWChars(wchar_t dest, int32_t destCapacity, int32_t length, UErrorCode pErrorCode) {
1474	__TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1475	return length;
1476	}
1477
1478	// Compute the hash code for a string -------------------------------------- ***
1479
1480	// Moved here from uhash.c so that UnicodeString::hashCode() does not depend
1481	// on UHashtable code.
1482
1483	/*
1484	Compute the hash by iterating sparsely over about 32 (up to 63)
1485	characters spaced evenly through the string. For each character,
1486	multiply the previous hash value by a prime number and add the new
1487	character in, like a linear congruential random number generator,
1488	producing a pseudorandom deterministic value well distributed over
1489	the output range. [LIU]
1490	*/
1491
1492	#define STRING_HASH(TYPE, STR, STRLEN, DEREF) UPRV_BLOCK_MACRO_BEGIN { \
1493	uint32_t hash = 0; \
1494	const TYPE p = (const TYPE) STR; \
1495	if (p != NULL) { \
1496	int32_t len = (int32_t)(STRLEN); \
1497	int32_t inc = ((len - 32) / 32) + 1; \
1498	const TYPE *limit = p + len; \
1499	while (p<limit) { \
1500	hash = (hash * 37) + DEREF; \
1501	p += inc; \
1502	} \
1503	} \
1504	return static_cast<int32_t>(hash); \
1505	} UPRV_BLOCK_MACRO_END
1506
1507	/ Used by UnicodeString to compute its hashcode - Not public API. /
1508	U_CAPI int32_t U_EXPORT2
1509	ustr_hashUCharsN(const UChar *str, int32_t length) {
1510	STRING_HASH(UChar, str, length, *p);
1511	}
1512
1513	U_CAPI int32_t U_EXPORT2
1514	ustr_hashCharsN(const char *str, int32_t length) {
1515	STRING_HASH(uint8_t, str, length, *p);
1516	}
1517
1518	U_CAPI int32_t U_EXPORT2
1519	ustr_hashICharsN(const char *str, int32_t length) {
1520	STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p));
1521	}
1522

Browse the source code of ClickHouse/contrib/icu/icu4c/source/common/ustring.cpp