uidna.cpp source code [ClickHouse/contrib/icu/icu4c/source/common/uidna.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	*******************************************************************************
5	*
6	* Copyright (C) 2003-2014, International Business Machines
7	* Corporation and others. All Rights Reserved.
8	*
9	*******************************************************************************
10	* file name: uidna.cpp
11	* encoding: UTF-8
12	* tab size: 8 (not used)
13	* indentation:4
14	*
15	* created on: 2003feb1
16	* created by: Ram Viswanadha
17	*/
18
19	#include "unicode/utypes.h"
20
21	#if !UCONFIG_NO_IDNA
22
23	#include "unicode/uidna.h"
24	#include "unicode/ustring.h"
25	#include "unicode/usprep.h"
26	#include "punycode.h"
27	#include "ustr_imp.h"
28	#include "cmemory.h"
29	#include "uassert.h"
30	#include "sprpimpl.h"
31
32	/ it is official IDNA ACE Prefix is "xn--" /
33	static const UChar ACE_PREFIX[] ={ `0x0078`,`0x006E`,`0x002d`,`0x002d` } ;
34	#define ACE_PREFIX_LENGTH 4
35
36	#define MAX_LABEL_LENGTH 63
37	/ The Max length of the labels should not be more than MAX_LABEL_LENGTH /
38	#define MAX_LABEL_BUFFER_SIZE 100
39
40	#define MAX_DOMAIN_NAME_LENGTH 255
41	/ The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH /
42	#define MAX_IDN_BUFFER_SIZE MAX_DOMAIN_NAME_LENGTH+1
43
44	#define LOWER_CASE_DELTA 0x0020
45	#define HYPHEN 0x002D
46	#define FULL_STOP 0x002E
47	#define CAPITAL_A 0x0041
48	#define CAPITAL_Z 0x005A
49
50	inline static UChar
51	toASCIILower(UChar ch){
52	if(CAPITAL_A <= ch && ch <= CAPITAL_Z){
53	return ch + LOWER_CASE_DELTA;
54	}
55	return ch;
56	}
57
58	inline static UBool
59	startsWithPrefix(const UChar* src , int32_t srcLength){
60	if(srcLength < ACE_PREFIX_LENGTH){
61	return FALSE;
62	}
63
64	for(int8_t i=`0`; i< ACE_PREFIX_LENGTH; i++){
65	if(toASCIILower(src[i]) != ACE_PREFIX[i]){
66	return FALSE;
67	}
68	}
69	return TRUE;
70	}
71
72
73	inline static int32_t
74	compareCaseInsensitiveASCII(const UChar* s1, int32_t s1Len,
75	const UChar* s2, int32_t s2Len){
76
77	int32_t minLength;
78	int32_t lengthResult;
79
80	// are we comparing different lengths?
81	if(s1Len != s2Len) {
82	if(s1Len < s2Len) {
83	minLength = s1Len;
84	lengthResult = -`1`;
85	} else {
86	minLength = s2Len;
87	lengthResult = `1`;
88	}
89	} else {
90	// ok the lengths are equal
91	minLength = s1Len;
92	lengthResult = `0`;
93	}
94
95	UChar c1,c2;
96	int32_t rc;
97
98	for(int32_t i =`0`;/ no condition /;i++) {
99
100	/ If we reach the ends of both strings then they match /
101	if(i == minLength) {
102	return lengthResult;
103	}
104
105	c1 = s1[i];
106	c2 = s2[i];
107
108	/ Case-insensitive comparison /
109	if(c1!=c2) {
110	rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2);
111	if(rc!=`0`) {
112	lengthResult=rc;
113	break;
114	}
115	}
116	}
117	return lengthResult;
118	}
119
120
121	/**
122	* Ascertain if the given code point is a label separator as
123	* defined by the IDNA RFC
124	*
125	* @param ch The code point to be ascertained
126	* @return true if the char is a label separator
127	* @stable ICU 2.8
128	*/
129	static inline UBool isLabelSeparator(UChar ch){
130	switch(ch){
131	case `0x002e`:
132	case `0x3002`:
133	case `0xFF0E`:
134	case `0xFF61`:
135	return TRUE;
136	default:
137	return FALSE;
138	}
139	}
140
141	// returns the length of the label excluding the separator
142	// if limit == separator then the length returned does not include*
143	// the separtor.
144	static inline int32_t
145	getNextSeparator(UChar *src, int32_t srcLength,
146	UChar *limit, UBool done){
147	if(srcLength == -`1`){
148	int32_t i;
149	for(i=`0` ; ;i++){
150	if(src[i] == `0`){
151	limit = src + i; // point to null*
152	*done = TRUE;
153	return i;
154	}
155	if(isLabelSeparator(src[i])){
156	limit = src + (i+`1`); // go past the delimiter*
157	return i;
158
159	}
160	}
161	}else{
162	int32_t i;
163	for(i=`0`;i<srcLength;i++){
164	if(isLabelSeparator(src[i])){
165	limit = src + (i+`1`); // go past the delimiter*
166	return i;
167	}
168	}
169	// we have not found the delimiter
170	// if(i==srcLength)
171	*limit = src+srcLength;
172	*done = TRUE;
173
174	return i;
175	}
176	}
177	static inline UBool isLDHChar(UChar ch){
178	// high runner case
179	if(ch>`0x007A`){
180	return FALSE;
181	}
182	//[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
183	if( (ch==`0x002D`) \|\|
184	(`0x0030` <= ch && ch <= `0x0039`) \|\|
185	(`0x0041` <= ch && ch <= `0x005A`) \|\|
186	(`0x0061` <= ch && ch <= `0x007A`)
187	){
188	return TRUE;
189	}
190	return FALSE;
191	}
192
193	static int32_t
194	_internal_toASCII(const UChar* src, int32_t srcLength,
195	UChar* dest, int32_t destCapacity,
196	int32_t options,
197	UStringPrepProfile* nameprep,
198	UParseError* parseError,
199	UErrorCode* status)
200	{
201
202	// TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
203	UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE];
204	//initialize pointers to stack buffers
205	UChar b1 = b1Stack, b2 = b2Stack;
206	int32_t b1Len=`0`, b2Len,
207	b1Capacity = MAX_LABEL_BUFFER_SIZE,
208	b2Capacity = MAX_LABEL_BUFFER_SIZE ,
209	reqLength=`0`;
210
211	int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != `0`) ? USPREP_ALLOW_UNASSIGNED: `0`;
212	UBool* caseFlags = NULL;
213
214	// the source contains all ascii codepoints
215	UBool srcIsASCII = TRUE;
216	// assume the source contains all LDH codepoints
217	UBool srcIsLDH = TRUE;
218
219	int32_t j=`0`;
220
221	//get the options
222	UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != `0`);
223
224	int32_t failPos = -`1`;
225
226	if(srcLength == -`1`){
227	srcLength = u_strlen(src);
228	}
229
230	if(srcLength > b1Capacity){
231	b1 = (UChar) uprv_malloc(srcLength U_SIZEOF_UCHAR);
232	if(b1==NULL){
233	*status = U_MEMORY_ALLOCATION_ERROR;
234	goto CLEANUP;
235	}
236	b1Capacity = srcLength;
237	}
238
239	// step 1
240	for( j=`0`;j<srcLength;j++){
241	if(src[j] > `0x7F`){
242	srcIsASCII = FALSE;
243	}
244	b1[b1Len++] = src[j];
245	}
246
247	// step 2 is performed only if the source contains non ASCII
248	if(srcIsASCII == FALSE){
249
250	// step 2
251	b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
252
253	if(*status == U_BUFFER_OVERFLOW_ERROR){
254	// redo processing of string
255	// we do not have enough room so grow the buffer
256	if(b1 != b1Stack){
257	uprv_free(b1);
258	}
259	b1 = (UChar) uprv_malloc(b1Len U_SIZEOF_UCHAR);
260	if(b1==NULL){
261	*status = U_MEMORY_ALLOCATION_ERROR;
262	goto CLEANUP;
263	}
264
265	status = U_ZERO_ERROR; // reset error*
266
267	b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
268	}
269	}
270	// error bail out
271	if(U_FAILURE(*status)){
272	goto CLEANUP;
273	}
274	if(b1Len == `0`){
275	*status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
276	goto CLEANUP;
277	}
278
279	// for step 3 & 4
280	srcIsASCII = TRUE;
281	for( j=`0`;j<b1Len;j++){
282	// check if output of usprep_prepare is all ASCII
283	if(b1[j] > `0x7F`){
284	srcIsASCII = FALSE;
285	}else if(isLDHChar(b1[j])==FALSE){ // if the char is in ASCII range verify that it is an LDH character
286	srcIsLDH = FALSE;
287	failPos = j;
288	}
289	}
290	if(useSTD3ASCIIRules == TRUE){
291	// verify 3a and 3b
292	// 3(a) Verify the absence of non-LDH ASCII code points; that is, the
293	// absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
294	// 3(b) Verify the absence of leading and trailing hyphen-minus; that
295	// is, the absence of U+002D at the beginning and end of the
296	// sequence.
297	if( srcIsLDH == FALSE / source at this point should not contain anyLDH characters /
298	\|\| b1[`0`] == HYPHEN \|\| b1[b1Len-`1`] == HYPHEN){
299	*status = U_IDNA_STD3_ASCII_RULES_ERROR;
300
301	/ populate the parseError struct /
302	if(srcIsLDH==FALSE){
303	// failPos is always set the index of failure
304	uprv_syntaxError(b1,failPos, b1Len,parseError);
305	}else if(b1[`0`] == HYPHEN){
306	// fail position is 0
307	uprv_syntaxError(b1,`0`,b1Len,parseError);
308	}else{
309	// the last index in the source is always length-1
310	uprv_syntaxError(b1, (b1Len>`0`) ? b1Len-`1` : b1Len, b1Len,parseError);
311	}
312
313	goto CLEANUP;
314	}
315	}
316	// Step 4: if the source is ASCII then proceed to step 8
317	if(srcIsASCII){
318	if(b1Len <= destCapacity){
319	u_memmove(dest, b1, b1Len);
320	reqLength = b1Len;
321	}else{
322	reqLength = b1Len;
323	goto CLEANUP;
324	}
325	}else{
326	// step 5 : verify the sequence does not begin with ACE prefix
327	if(!startsWithPrefix(b1,b1Len)){
328
329	//step 6: encode the sequence with punycode
330
331	// do not preserve the case flags for now!
332	// TODO: Preserve the case while implementing the RFE
333	// caseFlags = (UBool) uprv_malloc(b1Len * sizeof(UBool));*
334	// uprv_memset(caseFlags,TRUE,b1Len);
335
336	b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status);
337
338	if(*status == U_BUFFER_OVERFLOW_ERROR){
339	// redo processing of string
340	/ we do not have enough room so grow the buffer/
341	b2 = (UChar) uprv_malloc(b2Len U_SIZEOF_UCHAR);
342	if(b2 == NULL){
343	*status = U_MEMORY_ALLOCATION_ERROR;
344	goto CLEANUP;
345	}
346
347	status = U_ZERO_ERROR; // reset error*
348
349	b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status);
350	}
351	//error bail out
352	if(U_FAILURE(*status)){
353	goto CLEANUP;
354	}
355	// TODO : Reconsider while implementing the case preserve RFE
356	// convert all codepoints to lower case ASCII
357	// toASCIILower(b2,b2Len);
358	reqLength = b2Len+ACE_PREFIX_LENGTH;
359
360	if(reqLength > destCapacity){
361	*status = U_BUFFER_OVERFLOW_ERROR;
362	goto CLEANUP;
363	}
364	//Step 7: prepend the ACE prefix
365	u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH);
366	//Step 6: copy the contents in b2 into dest
367	u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len);
368
369	}else{
370	*status = U_IDNA_ACE_PREFIX_ERROR;
371	//position of failure is 0
372	uprv_syntaxError(b1,`0`,b1Len,parseError);
373	goto CLEANUP;
374	}
375	}
376	// step 8: verify the length of label
377	if(reqLength > MAX_LABEL_LENGTH){
378	*status = U_IDNA_LABEL_TOO_LONG_ERROR;
379	}
380
381	CLEANUP:
382	if(b1 != b1Stack){
383	uprv_free(b1);
384	}
385	if(b2 != b2Stack){
386	uprv_free(b2);
387	}
388	uprv_free(caseFlags);
389
390	return u_terminateUChars(dest, destCapacity, reqLength, status);
391	}
392
393	static int32_t
394	_internal_toUnicode(const UChar* src, int32_t srcLength,
395	UChar* dest, int32_t destCapacity,
396	int32_t options,
397	UStringPrepProfile* nameprep,
398	UParseError* parseError,
399	UErrorCode* status)
400	{
401
402	//get the options
403	//UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0);
404	int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != `0`) ? USPREP_ALLOW_UNASSIGNED: `0`;
405
406	// TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too.
407	UChar b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE];
408
409	//initialize pointers to stack buffers
410	UChar b1 = b1Stack, b2 = b2Stack, b1Prime=NULL, b3=b3Stack;
411	int32_t b1Len = `0`, b2Len, b1PrimeLen, b3Len,
412	b1Capacity = MAX_LABEL_BUFFER_SIZE,
413	b2Capacity = MAX_LABEL_BUFFER_SIZE,
414	b3Capacity = MAX_LABEL_BUFFER_SIZE,
415	reqLength=`0`;
416
417	UBool* caseFlags = NULL;
418
419	UBool srcIsASCII = TRUE;
420	/UBool srcIsLDH = TRUE;*
421	int32_t failPos =0;/*
422
423	// step 1: find out if all the codepoints in src are ASCII
424	if(srcLength==-`1`){
425	srcLength = `0`;
426	for(;src[srcLength]!=`0`;){
427	if(src[srcLength]> `0x7f`){
428	srcIsASCII = FALSE;
429	}/else if(isLDHChar(src[srcLength])==FALSE){*
430	// here we do not assemble surrogates
431	// since we know that LDH code points
432	// are in the ASCII range only
433	srcIsLDH = FALSE;
434	failPos = srcLength;
435	}/*
436	srcLength++;
437	}
438	}else if(srcLength > `0`){
439	for(int32_t j=`0`; j<srcLength; j++){
440	if(src[j]> `0x7f`){
441	srcIsASCII = FALSE;
442	break;
443	}/else if(isLDHChar(src[j])==FALSE){*
444	// here we do not assemble surrogates
445	// since we know that LDH code points
446	// are in the ASCII range only
447	srcIsLDH = FALSE;
448	failPos = j;
449	}/*
450	}
451	}else{
452	return `0`;
453	}
454
455	if(srcIsASCII == FALSE){
456	// step 2: process the string
457	b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status);
458	if(*status == U_BUFFER_OVERFLOW_ERROR){
459	// redo processing of string
460	/ we do not have enough room so grow the buffer/
461	b1 = (UChar) uprv_malloc(b1Len U_SIZEOF_UCHAR);
462	if(b1==NULL){
463	*status = U_MEMORY_ALLOCATION_ERROR;
464	goto CLEANUP;
465	}
466
467	status = U_ZERO_ERROR; // reset error*
468
469	b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status);
470	}
471	//bail out on error
472	if(U_FAILURE(*status)){
473	goto CLEANUP;
474	}
475	}else{
476
477	//just point src to b1
478	b1 = (UChar*) src;
479	b1Len = srcLength;
480	}
481
482	// The RFC states that
483	// <quote>
484	// ToUnicode never fails. If any step fails, then the original input
485	// is returned immediately in that step.
486	// </quote>
487
488	//step 3: verify ACE Prefix
489	if(startsWithPrefix(b1,b1Len)){
490
491	//step 4: Remove the ACE Prefix
492	b1Prime = b1 + ACE_PREFIX_LENGTH;
493	b1PrimeLen = b1Len - ACE_PREFIX_LENGTH;
494
495	//step 5: Decode using punycode
496	b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status);
497
498	if(*status == U_BUFFER_OVERFLOW_ERROR){
499	// redo processing of string
500	/ we do not have enough room so grow the buffer/
501	b2 = (UChar) uprv_malloc(b2Len U_SIZEOF_UCHAR);
502	if(b2==NULL){
503	*status = U_MEMORY_ALLOCATION_ERROR;
504	goto CLEANUP;
505	}
506
507	status = U_ZERO_ERROR; // reset error*
508
509	b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status);
510	}
511
512
513	//step 6:Apply toASCII
514	b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status);
515
516	if(*status == U_BUFFER_OVERFLOW_ERROR){
517	// redo processing of string
518	/ we do not have enough room so grow the buffer/
519	b3 = (UChar) uprv_malloc(b3Len U_SIZEOF_UCHAR);
520	if(b3==NULL){
521	*status = U_MEMORY_ALLOCATION_ERROR;
522	goto CLEANUP;
523	}
524
525	status = U_ZERO_ERROR; // reset error*
526
527	b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status);
528
529	}
530	//bail out on error
531	if(U_FAILURE(*status)){
532	goto CLEANUP;
533	}
534
535	//step 7: verify
536	if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=`0`){
537	// Cause the original to be returned.
538	*status = U_IDNA_VERIFICATION_ERROR;
539	goto CLEANUP;
540	}
541
542	//step 8: return output of step 5
543	reqLength = b2Len;
544	if(b2Len <= destCapacity) {
545	u_memmove(dest, b2, b2Len);
546	}
547	}
548	else{
549	// See the start of this if statement for why this is commented out.
550	// verify that STD3 ASCII rules are satisfied
551	/if(useSTD3ASCIIRules == TRUE){*
552	if( srcIsLDH == FALSE // source contains some non-LDH characters
553	\|\| src[0] == HYPHEN \|\| src[srcLength-1] == HYPHEN){
554	*status = U_IDNA_STD3_ASCII_RULES_ERROR;
555
556	// populate the parseError struct
557	if(srcIsLDH==FALSE){
558	// failPos is always set the index of failure
559	uprv_syntaxError(src,failPos, srcLength,parseError);
560	}else if(src[0] == HYPHEN){
561	// fail position is 0
562	uprv_syntaxError(src,0,srcLength,parseError);
563	}else{
564	// the last index in the source is always length-1
565	uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError);
566	}
567
568	goto CLEANUP;
569	}
570	}/*
571	// just return the source
572	//copy the source to destination
573	if(srcLength <= destCapacity){
574	u_memmove(dest, src, srcLength);
575	}
576	reqLength = srcLength;
577	}
578
579
580	CLEANUP:
581
582	if(b1 != b1Stack && b1!=src){
583	uprv_free(b1);
584	}
585	if(b2 != b2Stack){
586	uprv_free(b2);
587	}
588	uprv_free(caseFlags);
589
590	// The RFC states that
591	// <quote>
592	// ToUnicode never fails. If any step fails, then the original input
593	// is returned immediately in that step.
594	// </quote>
595	// So if any step fails lets copy source to destination
596	if(U_FAILURE(*status)){
597	//copy the source to destination
598	if(dest && srcLength <= destCapacity){
599	// srcLength should have already been set earlier.
600	U_ASSERT(srcLength >= `0`);
601	u_memmove(dest, src, srcLength);
602	}
603	reqLength = srcLength;
604	*status = U_ZERO_ERROR;
605	}
606
607	return u_terminateUChars(dest, destCapacity, reqLength, status);
608	}
609
610	U_CAPI int32_t U_EXPORT2
611	uidna_toASCII(const UChar* src, int32_t srcLength,
612	UChar* dest, int32_t destCapacity,
613	int32_t options,
614	UParseError* parseError,
615	UErrorCode* status){
616
617	if(status == NULL \|\| U_FAILURE(*status)){
618	return `0`;
619	}
620	if((src==NULL) \|\| (srcLength < -`1`) \|\| (destCapacity<`0`) \|\| (!dest && destCapacity > `0`)){
621	*status = U_ILLEGAL_ARGUMENT_ERROR;
622	return `0`;
623	}
624
625	UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
626
627	if(U_FAILURE(*status)){
628	return -`1`;
629	}
630
631	int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
632
633	/ close the profile/
634	usprep_close(nameprep);
635
636	return retLen;
637	}
638
639	U_CAPI int32_t U_EXPORT2
640	uidna_toUnicode(const UChar* src, int32_t srcLength,
641	UChar* dest, int32_t destCapacity,
642	int32_t options,
643	UParseError* parseError,
644	UErrorCode* status){
645
646	if(status == NULL \|\| U_FAILURE(*status)){
647	return `0`;
648	}
649	if( (src==NULL) \|\| (srcLength < -`1`) \|\| (destCapacity<`0`) \|\| (!dest && destCapacity > `0`)){
650	*status = U_ILLEGAL_ARGUMENT_ERROR;
651	return `0`;
652	}
653
654	UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
655
656	if(U_FAILURE(*status)){
657	return -`1`;
658	}
659
660	int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status);
661
662	usprep_close(nameprep);
663
664	return retLen;
665	}
666
667
668	U_CAPI int32_t U_EXPORT2
669	uidna_IDNToASCII( const UChar *src, int32_t srcLength,
670	UChar* dest, int32_t destCapacity,
671	int32_t options,
672	UParseError *parseError,
673	UErrorCode *status){
674
675	if(status == NULL \|\| U_FAILURE(*status)){
676	return `0`;
677	}
678	if((src==NULL) \|\| (srcLength < -`1`) \|\| (destCapacity<`0`) \|\| (!dest && destCapacity > `0`)){
679	*status = U_ILLEGAL_ARGUMENT_ERROR;
680	return `0`;
681	}
682
683	int32_t reqLength = `0`;
684
685	UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
686
687	if(U_FAILURE(*status)){
688	return `0`;
689	}
690
691	//initialize pointers
692	UChar delimiter = (UChar)src;
693	UChar labelStart = (UChar)src;
694	UChar currentDest = (UChar) dest;
695	int32_t remainingLen = srcLength;
696	int32_t remainingDestCapacity = destCapacity;
697	int32_t labelLen = `0`, labelReqLength = `0`;
698	UBool done = FALSE;
699
700
701	for(;;){
702
703	labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
704	labelReqLength = `0`;
705	if(!(labelLen==`0` && done)){// make sure this is not a root label separator.
706
707	labelReqLength = _internal_toASCII( labelStart, labelLen,
708	currentDest, remainingDestCapacity,
709	options, nameprep,
710	parseError, status);
711
712	if(*status == U_BUFFER_OVERFLOW_ERROR){
713
714	status = U_ZERO_ERROR; // reset error*
715	remainingDestCapacity = `0`;
716	}
717	}
718
719
720	if(U_FAILURE(*status)){
721	break;
722	}
723
724	reqLength +=labelReqLength;
725	// adjust the destination pointer
726	if(labelReqLength < remainingDestCapacity){
727	currentDest = currentDest + labelReqLength;
728	remainingDestCapacity -= labelReqLength;
729	}else{
730	// should never occur
731	remainingDestCapacity = `0`;
732	}
733
734	if(done == TRUE){
735	break;
736	}
737
738	// add the label separator
739	if(remainingDestCapacity > `0`){
740	*currentDest++ = FULL_STOP;
741	remainingDestCapacity--;
742	}
743	reqLength++;
744
745	labelStart = delimiter;
746	if(remainingLen >`0` ){
747	remainingLen = (int32_t)(srcLength - (delimiter - src));
748	}
749
750	}
751
752	if(reqLength > MAX_DOMAIN_NAME_LENGTH){
753	*status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
754	}
755
756	usprep_close(nameprep);
757
758	return u_terminateUChars(dest, destCapacity, reqLength, status);
759	}
760
761	U_CAPI int32_t U_EXPORT2
762	uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
763	UChar* dest, int32_t destCapacity,
764	int32_t options,
765	UParseError* parseError,
766	UErrorCode* status){
767
768	if(status == NULL \|\| U_FAILURE(*status)){
769	return `0`;
770	}
771	if((src==NULL) \|\| (srcLength < -`1`) \|\| (destCapacity<`0`) \|\| (!dest && destCapacity > `0`)){
772	*status = U_ILLEGAL_ARGUMENT_ERROR;
773	return `0`;
774	}
775
776	int32_t reqLength = `0`;
777
778	UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status);
779
780	if(U_FAILURE(*status)){
781	return `0`;
782	}
783
784	//initialize pointers
785	UChar delimiter = (UChar)src;
786	UChar labelStart = (UChar)src;
787	UChar currentDest = (UChar) dest;
788	int32_t remainingLen = srcLength;
789	int32_t remainingDestCapacity = destCapacity;
790	int32_t labelLen = `0`, labelReqLength = `0`;
791	UBool done = FALSE;
792
793	for(;;){
794
795	labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done);
796
797	// The RFC states that
798	// <quote>
799	// ToUnicode never fails. If any step fails, then the original input
800	// is returned immediately in that step.
801	// </quote>
802	// _internal_toUnicode will copy the label.
803	/if(labelLen==0 && done==FALSE){*
804	*status = U_IDNA_ZERO_LENGTH_LABEL_ERROR;
805	break;
806	}/*
807
808	labelReqLength = _internal_toUnicode(labelStart, labelLen,
809	currentDest, remainingDestCapacity,
810	options, nameprep,
811	parseError, status);
812
813	if(*status == U_BUFFER_OVERFLOW_ERROR){
814	status = U_ZERO_ERROR; // reset error*
815	remainingDestCapacity = `0`;
816	}
817
818	if(U_FAILURE(*status)){
819	break;
820	}
821
822	reqLength +=labelReqLength;
823	// adjust the destination pointer
824	if(labelReqLength < remainingDestCapacity){
825	currentDest = currentDest + labelReqLength;
826	remainingDestCapacity -= labelReqLength;
827	}else{
828	// should never occur
829	remainingDestCapacity = `0`;
830	}
831
832	if(done == TRUE){
833	break;
834	}
835
836	// add the label separator
837	// Unlike the ToASCII operation we don't normalize the label separators
838	if(remainingDestCapacity > `0`){
839	currentDest++ = (labelStart + labelLen);
840	remainingDestCapacity--;
841	}
842	reqLength++;
843
844	labelStart = delimiter;
845	if(remainingLen >`0` ){
846	remainingLen = (int32_t)(srcLength - (delimiter - src));
847	}
848
849	}
850
851	if(reqLength > MAX_DOMAIN_NAME_LENGTH){
852	*status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR;
853	}
854
855	usprep_close(nameprep);
856
857	return u_terminateUChars(dest, destCapacity, reqLength, status);
858	}
859
860	U_CAPI int32_t U_EXPORT2
861	uidna_compare( const UChar *s1, int32_t length1,
862	const UChar *s2, int32_t length2,
863	int32_t options,
864	UErrorCode* status){
865
866	if(status == NULL \|\| U_FAILURE(*status)){
867	return -`1`;
868	}
869
870	UChar b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE];
871	UChar b1 = b1Stack, b2 = b2Stack;
872	int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE;
873	int32_t result=-`1`;
874
875	UParseError parseError;
876
877	b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status);
878	if(*status == U_BUFFER_OVERFLOW_ERROR){
879	// redo processing of string
880	b1 = (UChar) uprv_malloc(b1Len U_SIZEOF_UCHAR);
881	if(b1==NULL){
882	*status = U_MEMORY_ALLOCATION_ERROR;
883	goto CLEANUP;
884	}
885
886	status = U_ZERO_ERROR; // reset error*
887
888	b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status);
889
890	}
891
892	b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status);
893	if(*status == U_BUFFER_OVERFLOW_ERROR){
894	// redo processing of string
895	b2 = (UChar) uprv_malloc(b2Len U_SIZEOF_UCHAR);
896	if(b2==NULL){
897	*status = U_MEMORY_ALLOCATION_ERROR;
898	goto CLEANUP;
899	}
900
901	status = U_ZERO_ERROR; // reset error*
902
903	b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status);
904
905	}
906	// when toASCII is applied all label separators are replaced with FULL_STOP
907	result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len);
908
909	CLEANUP:
910	if(b1 != b1Stack){
911	uprv_free(b1);
912	}
913
914	if(b2 != b2Stack){
915	uprv_free(b2);
916	}
917
918	return result;
919	}
920
921	#endif /* #if !UCONFIG_NO_IDNA */
922

Browse the source code of ClickHouse/contrib/icu/icu4c/source/common/uidna.cpp