1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************* |
5 | * |
6 | * Copyright (C) 2003-2014, International Business Machines |
7 | * Corporation and others. All Rights Reserved. |
8 | * |
9 | ******************************************************************************* |
10 | * file name: uidna.cpp |
11 | * encoding: UTF-8 |
12 | * tab size: 8 (not used) |
13 | * indentation:4 |
14 | * |
15 | * created on: 2003feb1 |
16 | * created by: Ram Viswanadha |
17 | */ |
18 | |
19 | #include "unicode/utypes.h" |
20 | |
21 | #if !UCONFIG_NO_IDNA |
22 | |
23 | #include "unicode/uidna.h" |
24 | #include "unicode/ustring.h" |
25 | #include "unicode/usprep.h" |
26 | #include "punycode.h" |
27 | #include "ustr_imp.h" |
28 | #include "cmemory.h" |
29 | #include "uassert.h" |
30 | #include "sprpimpl.h" |
31 | |
32 | /* it is official IDNA ACE Prefix is "xn--" */ |
33 | static const char16_t ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; |
34 | #define ACE_PREFIX_LENGTH 4 |
35 | |
36 | #define MAX_LABEL_LENGTH 63 |
37 | /* The Max length of the labels should not be more than MAX_LABEL_LENGTH */ |
38 | #define MAX_LABEL_BUFFER_SIZE 100 |
39 | |
40 | #define MAX_DOMAIN_NAME_LENGTH 255 |
41 | /* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */ |
42 | #define MAX_IDN_BUFFER_SIZE MAX_DOMAIN_NAME_LENGTH+1 |
43 | |
44 | #define LOWER_CASE_DELTA 0x0020 |
45 | #define HYPHEN 0x002D |
46 | #define FULL_STOP 0x002E |
47 | #define CAPITAL_A 0x0041 |
48 | #define CAPITAL_Z 0x005A |
49 | |
50 | inline static char16_t |
51 | toASCIILower(char16_t ch){ |
52 | if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ |
53 | return ch + LOWER_CASE_DELTA; |
54 | } |
55 | return ch; |
56 | } |
57 | |
58 | inline static UBool |
59 | startsWithPrefix(const char16_t* src , int32_t srcLength){ |
60 | if(srcLength < ACE_PREFIX_LENGTH){ |
61 | return false; |
62 | } |
63 | |
64 | for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ |
65 | if(toASCIILower(src[i]) != ACE_PREFIX[i]){ |
66 | return false; |
67 | } |
68 | } |
69 | return true; |
70 | } |
71 | |
72 | |
73 | inline static int32_t |
74 | compareCaseInsensitiveASCII(const char16_t* s1, int32_t s1Len, |
75 | const char16_t* s2, int32_t s2Len){ |
76 | |
77 | int32_t minLength; |
78 | int32_t lengthResult; |
79 | |
80 | // are we comparing different lengths? |
81 | if(s1Len != s2Len) { |
82 | if(s1Len < s2Len) { |
83 | minLength = s1Len; |
84 | lengthResult = -1; |
85 | } else { |
86 | minLength = s2Len; |
87 | lengthResult = 1; |
88 | } |
89 | } else { |
90 | // ok the lengths are equal |
91 | minLength = s1Len; |
92 | lengthResult = 0; |
93 | } |
94 | |
95 | char16_t c1,c2; |
96 | int32_t rc; |
97 | |
98 | for(int32_t i =0;/* no condition */;i++) { |
99 | |
100 | /* If we reach the ends of both strings then they match */ |
101 | if(i == minLength) { |
102 | return lengthResult; |
103 | } |
104 | |
105 | c1 = s1[i]; |
106 | c2 = s2[i]; |
107 | |
108 | /* Case-insensitive comparison */ |
109 | if(c1!=c2) { |
110 | rc=(int32_t)toASCIILower(c1)-(int32_t)toASCIILower(c2); |
111 | if(rc!=0) { |
112 | lengthResult=rc; |
113 | break; |
114 | } |
115 | } |
116 | } |
117 | return lengthResult; |
118 | } |
119 | |
120 | |
121 | /** |
122 | * Ascertain if the given code point is a label separator as |
123 | * defined by the IDNA RFC |
124 | * |
125 | * @param ch The code point to be ascertained |
126 | * @return true if the char is a label separator |
127 | * @stable ICU 2.8 |
128 | */ |
129 | static inline UBool isLabelSeparator(char16_t ch){ |
130 | switch(ch){ |
131 | case 0x002e: |
132 | case 0x3002: |
133 | case 0xFF0E: |
134 | case 0xFF61: |
135 | return true; |
136 | default: |
137 | return false; |
138 | } |
139 | } |
140 | |
141 | // returns the length of the label excluding the separator |
142 | // if *limit == separator then the length returned does not include |
143 | // the separtor. |
144 | static inline int32_t |
145 | getNextSeparator(char16_t *src, int32_t srcLength, |
146 | char16_t **limit, UBool *done){ |
147 | if(srcLength == -1){ |
148 | int32_t i; |
149 | for(i=0 ; ;i++){ |
150 | if(src[i] == 0){ |
151 | *limit = src + i; // point to null |
152 | *done = true; |
153 | return i; |
154 | } |
155 | if(isLabelSeparator(src[i])){ |
156 | *limit = src + (i+1); // go past the delimiter |
157 | return i; |
158 | |
159 | } |
160 | } |
161 | }else{ |
162 | int32_t i; |
163 | for(i=0;i<srcLength;i++){ |
164 | if(isLabelSeparator(src[i])){ |
165 | *limit = src + (i+1); // go past the delimiter |
166 | return i; |
167 | } |
168 | } |
169 | // we have not found the delimiter |
170 | // if(i==srcLength) |
171 | *limit = src+srcLength; |
172 | *done = true; |
173 | |
174 | return i; |
175 | } |
176 | } |
177 | static inline UBool isLDHChar(char16_t ch){ |
178 | // high runner case |
179 | if(ch>0x007A){ |
180 | return false; |
181 | } |
182 | //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A] |
183 | if( (ch==0x002D) || |
184 | (0x0030 <= ch && ch <= 0x0039) || |
185 | (0x0041 <= ch && ch <= 0x005A) || |
186 | (0x0061 <= ch && ch <= 0x007A) |
187 | ){ |
188 | return true; |
189 | } |
190 | return false; |
191 | } |
192 | |
193 | static int32_t |
194 | _internal_toASCII(const char16_t* src, int32_t srcLength, |
195 | char16_t* dest, int32_t destCapacity, |
196 | int32_t options, |
197 | UStringPrepProfile* nameprep, |
198 | UParseError* parseError, |
199 | UErrorCode* status) |
200 | { |
201 | |
202 | // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. |
203 | char16_t b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; |
204 | //initialize pointers to stack buffers |
205 | char16_t *b1 = b1Stack, *b2 = b2Stack; |
206 | int32_t b1Len=0, b2Len, |
207 | b1Capacity = MAX_LABEL_BUFFER_SIZE, |
208 | b2Capacity = MAX_LABEL_BUFFER_SIZE , |
209 | reqLength=0; |
210 | |
211 | int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; |
212 | UBool* caseFlags = nullptr; |
213 | |
214 | // the source contains all ascii codepoints |
215 | UBool srcIsASCII = true; |
216 | // assume the source contains all LDH codepoints |
217 | UBool srcIsLDH = true; |
218 | |
219 | int32_t j=0; |
220 | |
221 | //get the options |
222 | UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); |
223 | |
224 | int32_t failPos = -1; |
225 | |
226 | if(srcLength == -1){ |
227 | srcLength = u_strlen(src); |
228 | } |
229 | |
230 | if(srcLength > b1Capacity){ |
231 | b1 = (char16_t*) uprv_malloc(srcLength * U_SIZEOF_UCHAR); |
232 | if(b1==nullptr){ |
233 | *status = U_MEMORY_ALLOCATION_ERROR; |
234 | goto CLEANUP; |
235 | } |
236 | b1Capacity = srcLength; |
237 | } |
238 | |
239 | // step 1 |
240 | for( j=0;j<srcLength;j++){ |
241 | if(src[j] > 0x7F){ |
242 | srcIsASCII = false; |
243 | } |
244 | b1[b1Len++] = src[j]; |
245 | } |
246 | |
247 | // step 2 is performed only if the source contains non ASCII |
248 | if(srcIsASCII == false){ |
249 | |
250 | // step 2 |
251 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); |
252 | |
253 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
254 | // redo processing of string |
255 | // we do not have enough room so grow the buffer |
256 | if(b1 != b1Stack){ |
257 | uprv_free(b1); |
258 | } |
259 | b1 = (char16_t*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); |
260 | if(b1==nullptr){ |
261 | *status = U_MEMORY_ALLOCATION_ERROR; |
262 | goto CLEANUP; |
263 | } |
264 | |
265 | *status = U_ZERO_ERROR; // reset error |
266 | |
267 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); |
268 | } |
269 | } |
270 | // error bail out |
271 | if(U_FAILURE(*status)){ |
272 | goto CLEANUP; |
273 | } |
274 | if(b1Len == 0){ |
275 | *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; |
276 | goto CLEANUP; |
277 | } |
278 | |
279 | // for step 3 & 4 |
280 | srcIsASCII = true; |
281 | for( j=0;j<b1Len;j++){ |
282 | // check if output of usprep_prepare is all ASCII |
283 | if(b1[j] > 0x7F){ |
284 | srcIsASCII = false; |
285 | }else if(isLDHChar(b1[j])==false){ // if the char is in ASCII range verify that it is an LDH character |
286 | srcIsLDH = false; |
287 | failPos = j; |
288 | } |
289 | } |
290 | if(useSTD3ASCIIRules){ |
291 | // verify 3a and 3b |
292 | // 3(a) Verify the absence of non-LDH ASCII code points; that is, the |
293 | // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. |
294 | // 3(b) Verify the absence of leading and trailing hyphen-minus; that |
295 | // is, the absence of U+002D at the beginning and end of the |
296 | // sequence. |
297 | if( srcIsLDH == false /* source at this point should not contain anyLDH characters */ |
298 | || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ |
299 | *status = U_IDNA_STD3_ASCII_RULES_ERROR; |
300 | |
301 | /* populate the parseError struct */ |
302 | if(srcIsLDH==false){ |
303 | // failPos is always set the index of failure |
304 | uprv_syntaxError(b1,failPos, b1Len,parseError); |
305 | }else if(b1[0] == HYPHEN){ |
306 | // fail position is 0 |
307 | uprv_syntaxError(b1,0,b1Len,parseError); |
308 | }else{ |
309 | // the last index in the source is always length-1 |
310 | uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError); |
311 | } |
312 | |
313 | goto CLEANUP; |
314 | } |
315 | } |
316 | // Step 4: if the source is ASCII then proceed to step 8 |
317 | if(srcIsASCII){ |
318 | if(b1Len <= destCapacity){ |
319 | u_memmove(dest, b1, b1Len); |
320 | reqLength = b1Len; |
321 | }else{ |
322 | reqLength = b1Len; |
323 | goto CLEANUP; |
324 | } |
325 | }else{ |
326 | // step 5 : verify the sequence does not begin with ACE prefix |
327 | if(!startsWithPrefix(b1,b1Len)){ |
328 | |
329 | //step 6: encode the sequence with punycode |
330 | |
331 | // do not preserve the case flags for now! |
332 | // TODO: Preserve the case while implementing the RFE |
333 | // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); |
334 | // uprv_memset(caseFlags,true,b1Len); |
335 | |
336 | b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags, status); |
337 | |
338 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
339 | // redo processing of string |
340 | /* we do not have enough room so grow the buffer*/ |
341 | b2 = (char16_t*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); |
342 | if(b2 == nullptr){ |
343 | *status = U_MEMORY_ALLOCATION_ERROR; |
344 | goto CLEANUP; |
345 | } |
346 | |
347 | *status = U_ZERO_ERROR; // reset error |
348 | |
349 | b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags, status); |
350 | } |
351 | //error bail out |
352 | if(U_FAILURE(*status)){ |
353 | goto CLEANUP; |
354 | } |
355 | // TODO : Reconsider while implementing the case preserve RFE |
356 | // convert all codepoints to lower case ASCII |
357 | // toASCIILower(b2,b2Len); |
358 | reqLength = b2Len+ACE_PREFIX_LENGTH; |
359 | |
360 | if(reqLength > destCapacity){ |
361 | *status = U_BUFFER_OVERFLOW_ERROR; |
362 | goto CLEANUP; |
363 | } |
364 | //Step 7: prepend the ACE prefix |
365 | u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH); |
366 | //Step 6: copy the contents in b2 into dest |
367 | u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len); |
368 | |
369 | }else{ |
370 | *status = U_IDNA_ACE_PREFIX_ERROR; |
371 | //position of failure is 0 |
372 | uprv_syntaxError(b1,0,b1Len,parseError); |
373 | goto CLEANUP; |
374 | } |
375 | } |
376 | // step 8: verify the length of label |
377 | if(reqLength > MAX_LABEL_LENGTH){ |
378 | *status = U_IDNA_LABEL_TOO_LONG_ERROR; |
379 | } |
380 | |
381 | CLEANUP: |
382 | if(b1 != b1Stack){ |
383 | uprv_free(b1); |
384 | } |
385 | if(b2 != b2Stack){ |
386 | uprv_free(b2); |
387 | } |
388 | uprv_free(caseFlags); |
389 | |
390 | return u_terminateUChars(dest, destCapacity, reqLength, status); |
391 | } |
392 | |
393 | static int32_t |
394 | _internal_toUnicode(const char16_t* src, int32_t srcLength, |
395 | char16_t* dest, int32_t destCapacity, |
396 | int32_t options, |
397 | UStringPrepProfile* nameprep, |
398 | UParseError* parseError, |
399 | UErrorCode* status) |
400 | { |
401 | |
402 | //get the options |
403 | //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); |
404 | int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; |
405 | |
406 | // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. |
407 | char16_t b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; |
408 | |
409 | //initialize pointers to stack buffers |
410 | char16_t *b1 = b1Stack, *b2 = b2Stack, *b1Prime=nullptr, *b3=b3Stack; |
411 | int32_t b1Len = 0, b2Len, b1PrimeLen, b3Len, |
412 | b1Capacity = MAX_LABEL_BUFFER_SIZE, |
413 | b2Capacity = MAX_LABEL_BUFFER_SIZE, |
414 | b3Capacity = MAX_LABEL_BUFFER_SIZE, |
415 | reqLength=0; |
416 | |
417 | UBool* caseFlags = nullptr; |
418 | |
419 | UBool srcIsASCII = true; |
420 | /*UBool srcIsLDH = true; |
421 | int32_t failPos =0;*/ |
422 | |
423 | // step 1: find out if all the codepoints in src are ASCII |
424 | if(srcLength==-1){ |
425 | srcLength = 0; |
426 | for(;src[srcLength]!=0;){ |
427 | if(src[srcLength]> 0x7f){ |
428 | srcIsASCII = false; |
429 | }/*else if(isLDHChar(src[srcLength])==false){ |
430 | // here we do not assemble surrogates |
431 | // since we know that LDH code points |
432 | // are in the ASCII range only |
433 | srcIsLDH = false; |
434 | failPos = srcLength; |
435 | }*/ |
436 | srcLength++; |
437 | } |
438 | }else if(srcLength > 0){ |
439 | for(int32_t j=0; j<srcLength; j++){ |
440 | if(src[j]> 0x7f){ |
441 | srcIsASCII = false; |
442 | break; |
443 | }/*else if(isLDHChar(src[j])==false){ |
444 | // here we do not assemble surrogates |
445 | // since we know that LDH code points |
446 | // are in the ASCII range only |
447 | srcIsLDH = false; |
448 | failPos = j; |
449 | }*/ |
450 | } |
451 | }else{ |
452 | return 0; |
453 | } |
454 | |
455 | if(srcIsASCII == false){ |
456 | // step 2: process the string |
457 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, status); |
458 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
459 | // redo processing of string |
460 | /* we do not have enough room so grow the buffer*/ |
461 | b1 = (char16_t*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); |
462 | if(b1==nullptr){ |
463 | *status = U_MEMORY_ALLOCATION_ERROR; |
464 | goto CLEANUP; |
465 | } |
466 | |
467 | *status = U_ZERO_ERROR; // reset error |
468 | |
469 | b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, status); |
470 | } |
471 | //bail out on error |
472 | if(U_FAILURE(*status)){ |
473 | goto CLEANUP; |
474 | } |
475 | }else{ |
476 | |
477 | //just point src to b1 |
478 | b1 = (char16_t*) src; |
479 | b1Len = srcLength; |
480 | } |
481 | |
482 | // The RFC states that |
483 | // <quote> |
484 | // ToUnicode never fails. If any step fails, then the original input |
485 | // is returned immediately in that step. |
486 | // </quote> |
487 | |
488 | //step 3: verify ACE Prefix |
489 | if(startsWithPrefix(b1,b1Len)){ |
490 | |
491 | //step 4: Remove the ACE Prefix |
492 | b1Prime = b1 + ACE_PREFIX_LENGTH; |
493 | b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; |
494 | |
495 | //step 5: Decode using punycode |
496 | b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags,status); |
497 | |
498 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
499 | // redo processing of string |
500 | /* we do not have enough room so grow the buffer*/ |
501 | b2 = (char16_t*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); |
502 | if(b2==nullptr){ |
503 | *status = U_MEMORY_ALLOCATION_ERROR; |
504 | goto CLEANUP; |
505 | } |
506 | |
507 | *status = U_ZERO_ERROR; // reset error |
508 | |
509 | b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, status); |
510 | } |
511 | |
512 | |
513 | //step 6:Apply toASCII |
514 | b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, status); |
515 | |
516 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
517 | // redo processing of string |
518 | /* we do not have enough room so grow the buffer*/ |
519 | b3 = (char16_t*) uprv_malloc(b3Len * U_SIZEOF_UCHAR); |
520 | if(b3==nullptr){ |
521 | *status = U_MEMORY_ALLOCATION_ERROR; |
522 | goto CLEANUP; |
523 | } |
524 | |
525 | *status = U_ZERO_ERROR; // reset error |
526 | |
527 | b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError, status); |
528 | |
529 | } |
530 | //bail out on error |
531 | if(U_FAILURE(*status)){ |
532 | goto CLEANUP; |
533 | } |
534 | |
535 | //step 7: verify |
536 | if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ |
537 | // Cause the original to be returned. |
538 | *status = U_IDNA_VERIFICATION_ERROR; |
539 | goto CLEANUP; |
540 | } |
541 | |
542 | //step 8: return output of step 5 |
543 | reqLength = b2Len; |
544 | if(b2Len <= destCapacity) { |
545 | u_memmove(dest, b2, b2Len); |
546 | } |
547 | } |
548 | else{ |
549 | // See the start of this if statement for why this is commented out. |
550 | // verify that STD3 ASCII rules are satisfied |
551 | /*if(useSTD3ASCIIRules == true){ |
552 | if( srcIsLDH == false // source contains some non-LDH characters |
553 | || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ |
554 | *status = U_IDNA_STD3_ASCII_RULES_ERROR; |
555 | |
556 | // populate the parseError struct |
557 | if(srcIsLDH==false){ |
558 | // failPos is always set the index of failure |
559 | uprv_syntaxError(src,failPos, srcLength,parseError); |
560 | }else if(src[0] == HYPHEN){ |
561 | // fail position is 0 |
562 | uprv_syntaxError(src,0,srcLength,parseError); |
563 | }else{ |
564 | // the last index in the source is always length-1 |
565 | uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); |
566 | } |
567 | |
568 | goto CLEANUP; |
569 | } |
570 | }*/ |
571 | // just return the source |
572 | //copy the source to destination |
573 | if(srcLength <= destCapacity){ |
574 | u_memmove(dest, src, srcLength); |
575 | } |
576 | reqLength = srcLength; |
577 | } |
578 | |
579 | |
580 | CLEANUP: |
581 | |
582 | if(b1 != b1Stack && b1!=src){ |
583 | uprv_free(b1); |
584 | } |
585 | if(b2 != b2Stack){ |
586 | uprv_free(b2); |
587 | } |
588 | uprv_free(caseFlags); |
589 | |
590 | // The RFC states that |
591 | // <quote> |
592 | // ToUnicode never fails. If any step fails, then the original input |
593 | // is returned immediately in that step. |
594 | // </quote> |
595 | // So if any step fails lets copy source to destination |
596 | if(U_FAILURE(*status)){ |
597 | //copy the source to destination |
598 | if(dest && srcLength <= destCapacity){ |
599 | // srcLength should have already been set earlier. |
600 | U_ASSERT(srcLength >= 0); |
601 | u_memmove(dest, src, srcLength); |
602 | } |
603 | reqLength = srcLength; |
604 | *status = U_ZERO_ERROR; |
605 | } |
606 | |
607 | return u_terminateUChars(dest, destCapacity, reqLength, status); |
608 | } |
609 | |
610 | U_CAPI int32_t U_EXPORT2 |
611 | uidna_toASCII(const char16_t* src, int32_t srcLength, |
612 | char16_t* dest, int32_t destCapacity, |
613 | int32_t options, |
614 | UParseError* parseError, |
615 | UErrorCode* status){ |
616 | |
617 | if(status == nullptr || U_FAILURE(*status)){ |
618 | return 0; |
619 | } |
620 | if((src==nullptr) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ |
621 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
622 | return 0; |
623 | } |
624 | |
625 | UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
626 | |
627 | if(U_FAILURE(*status)){ |
628 | return -1; |
629 | } |
630 | |
631 | int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); |
632 | |
633 | /* close the profile*/ |
634 | usprep_close(nameprep); |
635 | |
636 | return retLen; |
637 | } |
638 | |
639 | U_CAPI int32_t U_EXPORT2 |
640 | uidna_toUnicode(const char16_t* src, int32_t srcLength, |
641 | char16_t* dest, int32_t destCapacity, |
642 | int32_t options, |
643 | UParseError* parseError, |
644 | UErrorCode* status){ |
645 | |
646 | if(status == nullptr || U_FAILURE(*status)){ |
647 | return 0; |
648 | } |
649 | if( (src==nullptr) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ |
650 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
651 | return 0; |
652 | } |
653 | |
654 | UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
655 | |
656 | if(U_FAILURE(*status)){ |
657 | return -1; |
658 | } |
659 | |
660 | int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); |
661 | |
662 | usprep_close(nameprep); |
663 | |
664 | return retLen; |
665 | } |
666 | |
667 | |
668 | U_CAPI int32_t U_EXPORT2 |
669 | uidna_IDNToASCII( const char16_t *src, int32_t srcLength, |
670 | char16_t* dest, int32_t destCapacity, |
671 | int32_t options, |
672 | UParseError *parseError, |
673 | UErrorCode *status){ |
674 | |
675 | if(status == nullptr || U_FAILURE(*status)){ |
676 | return 0; |
677 | } |
678 | if((src==nullptr) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ |
679 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
680 | return 0; |
681 | } |
682 | |
683 | int32_t reqLength = 0; |
684 | |
685 | UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
686 | |
687 | if(U_FAILURE(*status)){ |
688 | return 0; |
689 | } |
690 | |
691 | //initialize pointers |
692 | char16_t *delimiter = (char16_t*)src; |
693 | char16_t *labelStart = (char16_t*)src; |
694 | char16_t *currentDest = (char16_t*) dest; |
695 | int32_t remainingLen = srcLength; |
696 | int32_t remainingDestCapacity = destCapacity; |
697 | int32_t labelLen = 0, labelReqLength = 0; |
698 | UBool done = false; |
699 | |
700 | |
701 | for(;;){ |
702 | |
703 | labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); |
704 | labelReqLength = 0; |
705 | if(!(labelLen==0 && done)){// make sure this is not a root label separator. |
706 | |
707 | labelReqLength = _internal_toASCII( labelStart, labelLen, |
708 | currentDest, remainingDestCapacity, |
709 | options, nameprep, |
710 | parseError, status); |
711 | |
712 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
713 | |
714 | *status = U_ZERO_ERROR; // reset error |
715 | remainingDestCapacity = 0; |
716 | } |
717 | } |
718 | |
719 | |
720 | if(U_FAILURE(*status)){ |
721 | break; |
722 | } |
723 | |
724 | reqLength +=labelReqLength; |
725 | // adjust the destination pointer |
726 | if(labelReqLength < remainingDestCapacity){ |
727 | currentDest = currentDest + labelReqLength; |
728 | remainingDestCapacity -= labelReqLength; |
729 | }else{ |
730 | // should never occur |
731 | remainingDestCapacity = 0; |
732 | } |
733 | |
734 | if(done){ |
735 | break; |
736 | } |
737 | |
738 | // add the label separator |
739 | if(remainingDestCapacity > 0){ |
740 | *currentDest++ = FULL_STOP; |
741 | remainingDestCapacity--; |
742 | } |
743 | reqLength++; |
744 | |
745 | labelStart = delimiter; |
746 | if(remainingLen >0 ){ |
747 | remainingLen = (int32_t)(srcLength - (delimiter - src)); |
748 | } |
749 | |
750 | } |
751 | |
752 | if(reqLength > MAX_DOMAIN_NAME_LENGTH){ |
753 | *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; |
754 | } |
755 | |
756 | usprep_close(nameprep); |
757 | |
758 | return u_terminateUChars(dest, destCapacity, reqLength, status); |
759 | } |
760 | |
761 | U_CAPI int32_t U_EXPORT2 |
762 | uidna_IDNToUnicode( const char16_t* src, int32_t srcLength, |
763 | char16_t* dest, int32_t destCapacity, |
764 | int32_t options, |
765 | UParseError* parseError, |
766 | UErrorCode* status){ |
767 | |
768 | if(status == nullptr || U_FAILURE(*status)){ |
769 | return 0; |
770 | } |
771 | if((src==nullptr) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ |
772 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
773 | return 0; |
774 | } |
775 | |
776 | int32_t reqLength = 0; |
777 | |
778 | UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); |
779 | |
780 | if(U_FAILURE(*status)){ |
781 | return 0; |
782 | } |
783 | |
784 | //initialize pointers |
785 | char16_t *delimiter = (char16_t*)src; |
786 | char16_t *labelStart = (char16_t*)src; |
787 | char16_t *currentDest = (char16_t*) dest; |
788 | int32_t remainingLen = srcLength; |
789 | int32_t remainingDestCapacity = destCapacity; |
790 | int32_t labelLen = 0, labelReqLength = 0; |
791 | UBool done = false; |
792 | |
793 | for(;;){ |
794 | |
795 | labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); |
796 | |
797 | // The RFC states that |
798 | // <quote> |
799 | // ToUnicode never fails. If any step fails, then the original input |
800 | // is returned immediately in that step. |
801 | // </quote> |
802 | // _internal_toUnicode will copy the label. |
803 | /*if(labelLen==0 && done==false){ |
804 | *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; |
805 | break; |
806 | }*/ |
807 | |
808 | labelReqLength = _internal_toUnicode(labelStart, labelLen, |
809 | currentDest, remainingDestCapacity, |
810 | options, nameprep, |
811 | parseError, status); |
812 | |
813 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
814 | *status = U_ZERO_ERROR; // reset error |
815 | remainingDestCapacity = 0; |
816 | } |
817 | |
818 | if(U_FAILURE(*status)){ |
819 | break; |
820 | } |
821 | |
822 | reqLength +=labelReqLength; |
823 | // adjust the destination pointer |
824 | if(labelReqLength < remainingDestCapacity){ |
825 | currentDest = currentDest + labelReqLength; |
826 | remainingDestCapacity -= labelReqLength; |
827 | }else{ |
828 | // should never occur |
829 | remainingDestCapacity = 0; |
830 | } |
831 | |
832 | if(done){ |
833 | break; |
834 | } |
835 | |
836 | // add the label separator |
837 | // Unlike the ToASCII operation we don't normalize the label separators |
838 | if(remainingDestCapacity > 0){ |
839 | *currentDest++ = *(labelStart + labelLen); |
840 | remainingDestCapacity--; |
841 | } |
842 | reqLength++; |
843 | |
844 | labelStart = delimiter; |
845 | if(remainingLen >0 ){ |
846 | remainingLen = (int32_t)(srcLength - (delimiter - src)); |
847 | } |
848 | |
849 | } |
850 | |
851 | if(reqLength > MAX_DOMAIN_NAME_LENGTH){ |
852 | *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; |
853 | } |
854 | |
855 | usprep_close(nameprep); |
856 | |
857 | return u_terminateUChars(dest, destCapacity, reqLength, status); |
858 | } |
859 | |
860 | U_CAPI int32_t U_EXPORT2 |
861 | uidna_compare( const char16_t *s1, int32_t length1, |
862 | const char16_t *s2, int32_t length2, |
863 | int32_t options, |
864 | UErrorCode* status){ |
865 | |
866 | if(status == nullptr || U_FAILURE(*status)){ |
867 | return -1; |
868 | } |
869 | |
870 | char16_t b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; |
871 | char16_t *b1 = b1Stack, *b2 = b2Stack; |
872 | int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE; |
873 | int32_t result=-1; |
874 | |
875 | UParseError parseError; |
876 | |
877 | b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, status); |
878 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
879 | // redo processing of string |
880 | b1 = (char16_t*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); |
881 | if(b1==nullptr){ |
882 | *status = U_MEMORY_ALLOCATION_ERROR; |
883 | goto CLEANUP; |
884 | } |
885 | |
886 | *status = U_ZERO_ERROR; // reset error |
887 | |
888 | b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, status); |
889 | |
890 | } |
891 | |
892 | b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, status); |
893 | if(*status == U_BUFFER_OVERFLOW_ERROR){ |
894 | // redo processing of string |
895 | b2 = (char16_t*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); |
896 | if(b2==nullptr){ |
897 | *status = U_MEMORY_ALLOCATION_ERROR; |
898 | goto CLEANUP; |
899 | } |
900 | |
901 | *status = U_ZERO_ERROR; // reset error |
902 | |
903 | b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, status); |
904 | |
905 | } |
906 | // when toASCII is applied all label separators are replaced with FULL_STOP |
907 | result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); |
908 | |
909 | CLEANUP: |
910 | if(b1 != b1Stack){ |
911 | uprv_free(b1); |
912 | } |
913 | |
914 | if(b2 != b2Stack){ |
915 | uprv_free(b2); |
916 | } |
917 | |
918 | return result; |
919 | } |
920 | |
921 | #endif /* #if !UCONFIG_NO_IDNA */ |
922 | |