idna.c source code [TIC-80/vendor/libuv/src/idna.c]

1	/ Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl>*
2	*
3	* Permission to use, copy, modify, and/or distribute this software for any
4	* purpose with or without fee is hereby granted, provided that the above
5	* copyright notice and this permission notice appear in all copies.
6	*
7	* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8	* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9	* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10	* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11	* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12	* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13	* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14	*/
15
16	/ Derived from https://github.com/bnoordhuis/punycode*
17	* but updated to support IDNA 2008.
18	*/
19
20	#include "uv.h"
21	#include "idna.h"
22	#include <string.h>
23
24	static unsigned uv__utf8_decode1_slow(const char** p,
25	const char* pe,
26	unsigned a) {
27	unsigned b;
28	unsigned c;
29	unsigned d;
30	unsigned min;
31
32	if (a > `0xF7`)
33	return -`1`;
34
35	switch (*p - pe) {
36	default:
37	if (a > `0xEF`) {
38	min = `0x10000`;
39	a = a & `7`;
40	b = (unsigned char) (p)++;
41	c = (unsigned char) (p)++;
42	d = (unsigned char) (p)++;
43	break;
44	}
45	/ Fall through. /
46	case `2`:
47	if (a > `0xDF`) {
48	min = `0x800`;
49	b = `0x80` \| (a & `15`);
50	c = (unsigned char) (p)++;
51	d = (unsigned char) (p)++;
52	a = `0`;
53	break;
54	}
55	/ Fall through. /
56	case `1`:
57	if (a > `0xBF`) {
58	min = `0x80`;
59	b = `0x80`;
60	c = `0x80` \| (a & `31`);
61	d = (unsigned char) (p)++;
62	a = `0`;
63	break;
64	}
65	return -`1`; / Invalid continuation byte. /
66	}
67
68	if (`0x80` != (`0xC0` & (b ^ c ^ d)))
69	return -`1`; / Invalid sequence. /
70
71	b &= `63`;
72	c &= `63`;
73	d &= `63`;
74	a = (a << `18`) \| (b << `12`) \| (c << `6`) \| d;
75
76	if (a < min)
77	return -`1`; / Overlong sequence. /
78
79	if (a > `0x10FFFF`)
80	return -`1`; / Four-byte sequence > U+10FFFF. /
81
82	if (a >= `0xD800` && a <= `0xDFFF`)
83	return -`1`; / Surrogate pair. /
84
85	return a;
86	}
87
88	unsigned uv__utf8_decode1(const char** p, const char* pe) {
89	unsigned a;
90
91	a = (unsigned char) (p)++;
92
93	if (a < `128`)
94	return a; / ASCII, common case. /
95
96	return uv__utf8_decode1_slow(p, pe, a);
97	}
98
99	#define foreach_codepoint(c, p, pe) \
100	for (; (void) (p <= pe && (c = uv__utf8_decode1(p, pe))), p <= pe;)
101
102	static int uv__idna_toascii_label(const char* s, const char* se,
103	char** d, char* de) {
104	static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789";
105	const char* ss;
106	unsigned c;
107	unsigned h;
108	unsigned k;
109	unsigned n;
110	unsigned m;
111	unsigned q;
112	unsigned t;
113	unsigned x;
114	unsigned y;
115	unsigned bias;
116	unsigned delta;
117	unsigned todo;
118	int first;
119
120	h = `0`;
121	ss = s;
122	todo = `0`;
123
124	foreach_codepoint(c, &s, se) {
125	if (c < `128`)
126	h++;
127	else if (c == (unsigned) -`1`)
128	return UV_EINVAL;
129	else
130	todo++;
131	}
132
133	if (todo > `0`) {
134	if (d < de) (*d)++ = `'x'`;
135	if (d < de) (*d)++ = `'n'`;
136	if (d < de) (*d)++ = `'-'`;
137	if (d < de) (*d)++ = `'-'`;
138	}
139
140	x = `0`;
141	s = ss;
142	foreach_codepoint(c, &s, se) {
143	if (c > `127`)
144	continue;
145
146	if (*d < de)
147	(d)++ = c;
148
149	if (++x == h)
150	break; / Visited all ASCII characters. /
151	}
152
153	if (todo == `0`)
154	return h;
155
156	/ Only write separator when we've written ASCII characters first. /
157	if (h > `0`)
158	if (*d < de)
159	(d)++ = `'-'`;
160
161	n = `128`;
162	bias = `72`;
163	delta = `0`;
164	first = `1`;
165
166	while (todo > `0`) {
167	m = -`1`;
168	s = ss;
169	foreach_codepoint(c, &s, se)
170	if (c >= n)
171	if (c < m)
172	m = c;
173
174	x = m - n;
175	y = h + `1`;
176
177	if (x > ~delta / y)
178	return UV_E2BIG; / Overflow. /
179
180	delta += x * y;
181	n = m;
182
183	s = ss;
184	foreach_codepoint(c, &s, se) {
185	if (c < n)
186	if (++delta == `0`)
187	return UV_E2BIG; / Overflow. /
188
189	if (c != n)
190	continue;
191
192	for (k = `36`, q = delta; / empty /; k += `36`) {
193	t = `1`;
194
195	if (k > bias)
196	t = k - bias;
197
198	if (t > `26`)
199	t = `26`;
200
201	if (q < t)
202	break;
203
204	/ TODO(bnoordhuis) Since 1 <= t <= 26 and therefore*
205	* 10 <= y <= 35, we can optimize the long division
206	* into a table-based reciprocal multiplication.
207	*/
208	x = q - t;
209	y = `36` - t; / 10 <= y <= 35 since 1 <= t <= 26. /
210	q = x / y;
211	t = t + x % y; / 1 <= t <= 35 because of y. /
212
213	if (*d < de)
214	(d)++ = alphabet[t];
215	}
216
217	if (*d < de)
218	(d)++ = alphabet[q];
219
220	delta /= `2`;
221
222	if (first) {
223	delta /= `350`;
224	first = `0`;
225	}
226
227	/ No overflow check is needed because \|delta\| was just*
228	* divided by 2 and \|delta+delta >= delta + delta/h\|.
229	*/
230	h++;
231	delta += delta / h;
232
233	for (bias = `0`; delta > `35` * `26` / `2`; bias += `36`)
234	delta /= `35`;
235
236	bias += `36` * delta / (delta + `38`);
237	delta = `0`;
238	todo--;
239	}
240
241	delta++;
242	n++;
243	}
244
245	return `0`;
246	}
247
248	#undef foreach_codepoint
249
250	long uv__idna_toascii(const char* s, const char* se, char* d, char* de) {
251	const char* si;
252	const char* st;
253	unsigned c;
254	char* ds;
255	int rc;
256
257	ds = d;
258
259	for (si = s; si < se; / empty /) {
260	st = si;
261	c = uv__utf8_decode1(&si, se);
262
263	if (c != `'.'`)
264	if (c != `0x3002`) / 。 /
265	if (c != `0xFF0E`) / ． /
266	if (c != `0xFF61`) / ｡ /
267	continue;
268
269	rc = uv__idna_toascii_label(s, st, &d, de);
270
271	if (rc < `0`)
272	return rc;
273
274	if (d < de)
275	*d++ = `'.'`;
276
277	s = si;
278	}
279
280	if (s < se) {
281	rc = uv__idna_toascii_label(s, se, &d, de);
282
283	if (rc < `0`)
284	return rc;
285	}
286
287	if (d < de)
288	*d++ = `'\0'`;
289
290	return d - ds; / Number of bytes written. /
291	}
292

Browse the source code of TIC-80/vendor/libuv/src/idna.c