1 | /* Copyright The libuv project and contributors. All rights reserved. |
2 | * |
3 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
4 | * of this software and associated documentation files (the "Software"), to |
5 | * deal in the Software without restriction, including without limitation the |
6 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
7 | * sell copies of the Software, and to permit persons to whom the Software is |
8 | * furnished to do so, subject to the following conditions: |
9 | * |
10 | * The above copyright notice and this permission notice shall be included in |
11 | * all copies or substantial portions of the Software. |
12 | * |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
16 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
17 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
18 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
19 | * IN THE SOFTWARE. |
20 | */ |
21 | |
22 | #include "task.h" |
23 | #include "../src/idna.c" |
24 | #include <string.h> |
25 | |
26 | TEST_IMPL(utf8_decode1) { |
27 | const char* p; |
28 | char b[32]; |
29 | int i; |
30 | |
31 | /* ASCII. */ |
32 | p = b; |
33 | snprintf(b, sizeof(b), "%c\x7F" , 0x00); |
34 | ASSERT(0 == uv__utf8_decode1(&p, b + sizeof(b))); |
35 | ASSERT(p == b + 1); |
36 | ASSERT(127 == uv__utf8_decode1(&p, b + sizeof(b))); |
37 | ASSERT(p == b + 2); |
38 | |
39 | /* Two-byte sequences. */ |
40 | p = b; |
41 | snprintf(b, sizeof(b), "\xC2\x80\xDF\xBF" ); |
42 | ASSERT(128 == uv__utf8_decode1(&p, b + sizeof(b))); |
43 | ASSERT(p == b + 2); |
44 | ASSERT(0x7FF == uv__utf8_decode1(&p, b + sizeof(b))); |
45 | ASSERT(p == b + 4); |
46 | |
47 | /* Three-byte sequences. */ |
48 | p = b; |
49 | snprintf(b, sizeof(b), "\xE0\xA0\x80\xEF\xBF\xBF" ); |
50 | ASSERT(0x800 == uv__utf8_decode1(&p, b + sizeof(b))); |
51 | ASSERT(p == b + 3); |
52 | ASSERT(0xFFFF == uv__utf8_decode1(&p, b + sizeof(b))); |
53 | ASSERT(p == b + 6); |
54 | |
55 | /* Four-byte sequences. */ |
56 | p = b; |
57 | snprintf(b, sizeof(b), "\xF0\x90\x80\x80\xF4\x8F\xBF\xBF" ); |
58 | ASSERT(0x10000 == uv__utf8_decode1(&p, b + sizeof(b))); |
59 | ASSERT(p == b + 4); |
60 | ASSERT(0x10FFFF == uv__utf8_decode1(&p, b + sizeof(b))); |
61 | ASSERT(p == b + 8); |
62 | |
63 | /* Four-byte sequences > U+10FFFF; disallowed. */ |
64 | p = b; |
65 | snprintf(b, sizeof(b), "\xF4\x90\xC0\xC0\xF7\xBF\xBF\xBF" ); |
66 | ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b))); |
67 | ASSERT(p == b + 4); |
68 | ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b))); |
69 | ASSERT(p == b + 8); |
70 | |
71 | /* Overlong; disallowed. */ |
72 | p = b; |
73 | snprintf(b, sizeof(b), "\xC0\x80\xC1\x80" ); |
74 | ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b))); |
75 | ASSERT(p == b + 2); |
76 | ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b))); |
77 | ASSERT(p == b + 4); |
78 | |
79 | /* Surrogate pairs; disallowed. */ |
80 | p = b; |
81 | snprintf(b, sizeof(b), "\xED\xA0\x80\xED\xA3\xBF" ); |
82 | ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b))); |
83 | ASSERT(p == b + 3); |
84 | ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b))); |
85 | ASSERT(p == b + 6); |
86 | |
87 | /* Simply illegal. */ |
88 | p = b; |
89 | snprintf(b, sizeof(b), "\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF" ); |
90 | |
91 | for (i = 1; i <= 8; i++) { |
92 | ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b))); |
93 | ASSERT(p == b + i); |
94 | } |
95 | |
96 | return 0; |
97 | } |
98 | |
99 | /* Doesn't work on z/OS because that platform uses EBCDIC, not ASCII. */ |
100 | #ifndef __MVS__ |
101 | |
102 | #define F(input, err) \ |
103 | do { \ |
104 | char d[256] = {0}; \ |
105 | static const char s[] = "" input ""; \ |
106 | ASSERT(err == uv__idna_toascii(s, s + sizeof(s) - 1, d, d + sizeof(d))); \ |
107 | } while (0) |
108 | |
109 | #define T(input, expected) \ |
110 | do { \ |
111 | long n; \ |
112 | char d1[256] = {0}; \ |
113 | char d2[256] = {0}; \ |
114 | static const char s[] = "" input ""; \ |
115 | n = uv__idna_toascii(s, s + sizeof(s) - 1, d1, d1 + sizeof(d1)); \ |
116 | ASSERT(n == sizeof(expected)); \ |
117 | ASSERT(0 == memcmp(d1, expected, n)); \ |
118 | /* Sanity check: encoding twice should not change the output. */ \ |
119 | n = uv__idna_toascii(d1, d1 + strlen(d1), d2, d2 + sizeof(d2)); \ |
120 | ASSERT(n == sizeof(expected)); \ |
121 | ASSERT(0 == memcmp(d2, expected, n)); \ |
122 | ASSERT(0 == memcmp(d1, d2, sizeof(d2))); \ |
123 | } while (0) |
124 | |
125 | TEST_IMPL(idna_toascii) { |
126 | /* Illegal inputs. */ |
127 | F("\xC0\x80\xC1\x80" , UV_EINVAL); /* Overlong UTF-8 sequence. */ |
128 | F("\xC0\x80\xC1\x80.com" , UV_EINVAL); /* Overlong UTF-8 sequence. */ |
129 | /* No conversion. */ |
130 | T("" , "" ); |
131 | T("." , "." ); |
132 | T(".com" , ".com" ); |
133 | T("example" , "example" ); |
134 | T("example-" , "example-" ); |
135 | T("straße.de" , "xn--strae-oqa.de" ); |
136 | /* Test cases adapted from punycode.js. Most are from RFC 3492. */ |
137 | T("foo.bar" , "foo.bar" ); |
138 | T("mañana.com" , "xn--maana-pta.com" ); |
139 | T("example.com." , "example.com." ); |
140 | T("bücher.com" , "xn--bcher-kva.com" ); |
141 | T("café.com" , "xn--caf-dma.com" ); |
142 | T("café.café.com" , "xn--caf-dma.xn--caf-dma.com" ); |
143 | T("☃-⌘.com" , "xn----dqo34k.com" ); |
144 | T("퐀☃-⌘.com" , "xn----dqo34kn65z.com" ); |
145 | T("💩.la" , "xn--ls8h.la" ); |
146 | T("mañana.com" , "xn--maana-pta.com" ); |
147 | T("mañana。com" , "xn--maana-pta.com" ); |
148 | T("mañana.com" , "xn--maana-pta.com" ); |
149 | T("mañana。com" , "xn--maana-pta.com" ); |
150 | T("ü" , "xn--tda" ); |
151 | T(".ü" , ".xn--tda" ); |
152 | T("ü.ü" , "xn--tda.xn--tda" ); |
153 | T("ü.ü." , "xn--tda.xn--tda." ); |
154 | T("üëäö♥" , "xn--4can8av2009b" ); |
155 | T("Willst du die Blüthe des frühen, die Früchte des späteren Jahres" , |
156 | "xn--Willst du die Blthe des frhen, " |
157 | "die Frchte des spteren Jahres-x9e96lkal" ); |
158 | T("ليهمابتكلموشعربي؟" , "xn--egbpdaj6bu4bxfgehfvwxn" ); |
159 | T("他们为什么不说中文" , "xn--ihqwcrb4cv8a8dqg056pqjye" ); |
160 | T("他們爲什麽不說中文" , "xn--ihqwctvzc91f659drss3x8bo0yb" ); |
161 | T("Pročprostěnemluvíčesky" , "xn--Proprostnemluvesky-uyb24dma41a" ); |
162 | T("למההםפשוטלאמדבריםעברית" , "xn--4dbcagdahymbxekheh6e0a7fei0b" ); |
163 | T("यहलोगहिन्दीक्योंनहींबोलसकतेहैं" , |
164 | "xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd" ); |
165 | T("なぜみんな日本語を話してくれないのか" , |
166 | "xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa" ); |
167 | T("세계의모든사람들이한국어를이해한다면얼마나좋을까" , |
168 | "xn--989aomsvi5e83db1d2a355cv1e0vak1d" |
169 | "wrv93d5xbh15a0dt30a5jpsd879ccm6fea98c" ); |
170 | T("почемужеонинеговорятпорусски" , "xn--b1abfaaepdrnnbgefbadotcwatmq2g4l" ); |
171 | T("PorquénopuedensimplementehablarenEspañol" , |
172 | "xn--PorqunopuedensimplementehablarenEspaol-fmd56a" ); |
173 | T("TạisaohọkhôngthểchỉnóitiếngViệt" , |
174 | "xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g" ); |
175 | T("3年B組金八先生" , "xn--3B-ww4c5e180e575a65lsy2b" ); |
176 | T("安室奈美恵-with-SUPER-MONKEYS" , |
177 | "xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n" ); |
178 | T("Hello-Another-Way-それぞれの場所" , |
179 | "xn--Hello-Another-Way--fc4qua05auwb3674vfr0b" ); |
180 | T("ひとつ屋根の下2" , "xn--2-u9tlzr9756bt3uc0v" ); |
181 | T("MajiでKoiする5秒前" , "xn--MajiKoi5-783gue6qz075azm5e" ); |
182 | T("パフィーdeルンバ" , "xn--de-jg4avhby1noc0d" ); |
183 | T("そのスピードで" , "xn--d9juau41awczczp" ); |
184 | T("-> $1.00 <-" , "-> $1.00 <-" ); |
185 | /* Test cases from https://unicode.org/reports/tr46/ */ |
186 | T("faß.de" , "xn--fa-hia.de" ); |
187 | T("βόλος.com" , "xn--nxasmm1c.com" ); |
188 | T("ශ්රී.com" , "xn--10cl1a0b660p.com" ); |
189 | T("نامهای.com" , "xn--mgba3gch31f060k.com" ); |
190 | return 0; |
191 | } |
192 | |
193 | #undef T |
194 | |
195 | #endif /* __MVS__ */ |
196 | |