urlapi.c source code [Curl/lib/urlapi.c]

1	/***************************************************************************
2	* _ _ ____ _
3	* Project ___\| \| \| \| _ \\| \|
4	* / __\| \| \| \| \|_) \| \|
5	* \| (__\| \|_\| \| _ <\| \|___
6	* \___\|\___/\|_\| \_\_____\|
7	*
8	* Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al.
9	*
10	* This software is licensed as described in the file COPYING, which
11	* you should have received as part of this distribution. The terms
12	* are also available at https://curl.haxx.se/docs/copyright.html.
13	*
14	* You may opt to use, copy, modify, merge, publish, distribute and/or sell
15	* copies of the Software, and permit persons to whom the Software is
16	* furnished to do so, under the terms of the COPYING file.
17	*
18	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19	* KIND, either express or implied.
20	*
21	***************************************************************************/
22
23	#include "curl_setup.h"
24
25	#include "urldata.h"
26	#include "urlapi-int.h"
27	#include "strcase.h"
28	#include "dotdot.h"
29	#include "url.h"
30	#include "escape.h"
31	#include "curl_ctype.h"
32	#include "inet_pton.h"
33
34	/ The last 3 #include files should be in this order /
35	#include "curl_printf.h"
36	#include "curl_memory.h"
37	#include "memdebug.h"
38
39	/ MSDOS/Windows style drive prefix, eg c: in c:foo /
40	#define STARTS_WITH_DRIVE_PREFIX(str) \
41	((('a' <= str[0] && str[0] <= 'z') \|\| \
42	('A' <= str[0] && str[0] <= 'Z')) && \
43	(str[1] == ':'))
44
45	/ MSDOS/Windows style drive prefix, optionally with*
46	* a '\|' instead of ':', followed by a slash or NUL */
47	#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
48	((('a' <= (str)[0] && (str)[0] <= 'z') \|\| \
49	('A' <= (str)[0] && (str)[0] <= 'Z')) && \
50	((str)[1] == ':' \|\| (str)[1] == '\|') && \
51	((str)[2] == '/' \|\| (str)[2] == '\\' \|\| (str)[2] == 0))
52
53	/ Internal representation of CURLU. Point to URL-encoded strings. /
54	struct Curl_URL {
55	char *scheme;
56	char *user;
57	char *password;
58	char options; /* IMAP only? /
59	char *host;
60	char zoneid; /* for numerical IPv6 addresses /
61	char *port;
62	char *path;
63	char *query;
64	char *fragment;
65
66	char scratch; /* temporary scratch area /
67	char temppath; /* temporary path pointer /
68	long portnum; / the numerical version /
69	};
70
71	#define DEFAULT_SCHEME "https"
72
73	static void free_urlhandle(struct Curl_URL *u)
74	{
75	free(u->scheme);
76	free(u->user);
77	free(u->password);
78	free(u->options);
79	free(u->host);
80	free(u->zoneid);
81	free(u->port);
82	free(u->path);
83	free(u->query);
84	free(u->fragment);
85	free(u->scratch);
86	free(u->temppath);
87	}
88
89	/ move the full contents of one handle onto another and*
90	free the original /*
91	static void mv_urlhandle(struct Curl_URL *from,
92	struct Curl_URL *to)
93	{
94	free_urlhandle(to);
95	to = from;
96	free(from);
97	}
98
99	/*
100	* Find the separator at the end of the host name, or the '?' in cases like
101	* http://www.url.com?id=2380
102	*/
103	static const char find_host_sep(const* char *url)
104	{
105	const char *sep;
106	const char *query;
107
108	/ Find the start of the hostname /
109	sep = strstr(url, "//");
110	if(!sep)
111	sep = url;
112	else
113	sep += `2`;
114
115	query = strchr(sep, `'?'`);
116	sep = strchr(sep, `'/'`);
117
118	if(!sep)
119	sep = url + strlen(url);
120
121	if(!query)
122	query = url + strlen(url);
123
124	return sep < query ? sep : query;
125	}
126
127	/*
128	* Decide in an encoding-independent manner whether a character in an
129	* URL must be escaped. The same criterion must be used in strlen_url()
130	* and strcpy_url().
131	*/
132	static bool urlchar_needs_escaping(int c)
133	{
134	return !(ISCNTRL(c) \|\| ISSPACE(c) \|\| ISGRAPH(c));
135	}
136
137	/*
138	* strlen_url() returns the length of the given URL if the spaces within the
139	* URL were properly URL encoded.
140	* URL encoding should be skipped for host names, otherwise IDN resolution
141	* will fail.
142	*/
143	static size_t strlen_url(const char *url, bool relative)
144	{
145	const unsigned char *ptr;
146	size_t newlen = `0`;
147	bool left = TRUE; / left side of the ? /
148	const unsigned char host_sep = (const* unsigned char *) url;
149
150	if(!relative)
151	host_sep = (const unsigned char *) find_host_sep(url);
152
153	for(ptr = (unsigned char )url; ptr; ptr++) {
154
155	if(ptr < host_sep) {
156	++newlen;
157	continue;
158	}
159
160	switch(*ptr) {
161	case `'?'`:
162	left = FALSE;
163	/ FALLTHROUGH /
164	default:
165	if(urlchar_needs_escaping(*ptr))
166	newlen += `2`;
167	newlen++;
168	break;
169	case `' '`:
170	if(left)
171	newlen += `3`;
172	else
173	newlen++;
174	break;
175	}
176	}
177	return newlen;
178	}
179
180	/ strcpy_url() copies a url to a output buffer and URL-encodes the spaces in*
181	* the source URL accordingly.
182	* URL encoding should be skipped for host names, otherwise IDN resolution
183	* will fail.
184	*/
185	static void strcpy_url(char output, const* char *url, bool relative)
186	{
187	/ we must add this with whitespace-replacing /
188	bool left = TRUE;
189	const unsigned char *iptr;
190	char *optr = output;
191	const unsigned char host_sep = (const* unsigned char *) url;
192
193	if(!relative)
194	host_sep = (const unsigned char *) find_host_sep(url);
195
196	for(iptr = (unsigned char )url; /* read from here /
197	iptr; /* until zero byte /
198	iptr++) {
199
200	if(iptr < host_sep) {
201	optr++ = iptr;
202	continue;
203	}
204
205	switch(*iptr) {
206	case `'?'`:
207	left = FALSE;
208	/ FALLTHROUGH /
209	default:
210	if(urlchar_needs_escaping(*iptr)) {
211	msnprintf(optr, `4`, "%%%02x", *iptr);
212	optr += `3`;
213	}
214	else
215	optr++=iptr;
216	break;
217	case `' '`:
218	if(left) {
219	optr++=`'%'`; /* add a '%' /
220	optr++=`'2'`; /* add a '2' /
221	optr++=`'0'`; /* add a '0' /
222	}
223	else
224	optr++=`'+'`; /* add a '+' here /
225	break;
226	}
227	}
228	optr = `0`; /* zero terminate output buffer /
229
230	}
231
232	/*
233	* Returns true if the given URL is absolute (as opposed to relative) within
234	* the buffer size. Returns the scheme in the buffer if TRUE and 'buf' is
235	* non-NULL.
236	*/
237	bool Curl_is_absolute_url(const char url, char* *buf, size_t buflen)
238	{
239	size_t i;
240	#ifdef WIN32
241	if(STARTS_WITH_DRIVE_PREFIX(url))
242	return FALSE;
243	#endif
244	for(i = `0`; i < buflen && url[i]; ++i) {
245	char s = url[i];
246	if((s == `':'`) && (url[i + `1`] == `'/'`)) {
247	if(buf)
248	buf[i] = `0`;
249	return TRUE;
250	}
251	/ RFC 3986 3.1 explains:*
252	scheme = ALPHA ( ALPHA / DIGIT / "+" / "-" / "." )*
253	*/
254	else if(ISALNUM(s) \|\| (s == `'+'`) \|\| (s == `'-'`) \|\| (s == `'.'`) ) {
255	if(buf)
256	buf[i] = (char)TOLOWER(s);
257	}
258	else
259	break;
260	}
261	return FALSE;
262	}
263
264	/*
265	* Concatenate a relative URL to a base URL making it absolute.
266	* URL-encodes any spaces.
267	* The returned pointer must be freed by the caller unless NULL
268	* (returns NULL on out of memory).
269	*/
270	static char concat_url(const* char base, const* char *relurl)
271	{
272	/***
273	TRY to append this new path to the old URL
274	to the right of the host part. Oh crap, this is doomed to cause
275	problems in the future...
276	*/
277	char *newest;
278	char *protsep;
279	char *pathsep;
280	size_t newlen;
281	bool host_changed = FALSE;
282
283	const char *useurl = relurl;
284	size_t urllen;
285
286	/ we must make our own copy of the URL to play with, as it may*
287	point to read-only data /*
288	char *url_clone = strdup(base);
289
290	if(!url_clone)
291	return NULL; / skip out of this NOW /
292
293	/ protsep points to the start of the host name /
294	protsep = strstr(url_clone, "//");
295	if(!protsep)
296	protsep = url_clone;
297	else
298	protsep += `2`; / pass the slashes /
299
300	if(`'/'` != relurl[`0`]) {
301	int level = `0`;
302
303	/ First we need to find out if there's a ?-letter in the URL,*
304	and cut it and the right-side of that off /*
305	pathsep = strchr(protsep, `'?'`);
306	if(pathsep)
307	*pathsep = `0`;
308
309	/ we have a relative path to append to the last slash if there's one*
310	available, or if the new URL is just a query string (starts with a
311	'?') we append the new one at the end of the entire currently worked
312	out URL /*
313	if(useurl[`0`] != `'?'`) {
314	pathsep = strrchr(protsep, `'/'`);
315	if(pathsep)
316	*pathsep = `0`;
317	}
318
319	/ Check if there's any slash after the host name, and if so, remember*
320	that position instead /*
321	pathsep = strchr(protsep, `'/'`);
322	if(pathsep)
323	protsep = pathsep + `1`;
324	else
325	protsep = NULL;
326
327	/ now deal with one "./" or any amount of "../" in the newurl*
328	and act accordingly /*
329
330	if((useurl[`0`] == `'.'`) && (useurl[`1`] == `'/'`))
331	useurl += `2`; / just skip the "./" /
332
333	while((useurl[`0`] == `'.'`) &&
334	(useurl[`1`] == `'.'`) &&
335	(useurl[`2`] == `'/'`)) {
336	level++;
337	useurl += `3`; / pass the "../" /
338	}
339
340	if(protsep) {
341	while(level--) {
342	/ cut off one more level from the right of the original URL /
343	pathsep = strrchr(protsep, `'/'`);
344	if(pathsep)
345	*pathsep = `0`;
346	else {
347	*protsep = `0`;
348	break;
349	}
350	}
351	}
352	}
353	else {
354	/ We got a new absolute path for this server /
355
356	if(relurl[`1`] == `'/'`) {
357	/ the new URL starts with //, just keep the protocol part from the*
358	original one /*
359	*protsep = `0`;
360	useurl = &relurl[`2`]; / we keep the slashes from the original, so we*
361	skip the new ones /*
362	host_changed = TRUE;
363	}
364	else {
365	/ cut off the original URL from the first slash, or deal with URLs*
366	without slash /*
367	pathsep = strchr(protsep, `'/'`);
368	if(pathsep) {
369	/ When people use badly formatted URLs, such as*
370	"http://www.url.com?dir=/home/daniel" we must not use the first
371	slash, if there's a ?-letter before it! /*
372	char *sep = strchr(protsep, `'?'`);
373	if(sep && (sep < pathsep))
374	pathsep = sep;
375	*pathsep = `0`;
376	}
377	else {
378	/ There was no slash. Now, since we might be operating on a badly*
379	formatted URL, such as "http://www.url.com?id=2380" which doesn't
380	use a slash separator as it is supposed to, we need to check for a
381	?-letter as well! /*
382	pathsep = strchr(protsep, `'?'`);
383	if(pathsep)
384	*pathsep = `0`;
385	}
386	}
387	}
388
389	/ If the new part contains a space, this is a mighty stupid redirect*
390	but we still make an effort to do "right". To the left of a '?'
391	letter we replace each space with %20 while it is replaced with '+'
392	on the right side of the '?' letter.
393	*/
394	newlen = strlen_url(useurl, !host_changed);
395
396	urllen = strlen(url_clone);
397
398	newest = malloc(urllen + `1` + / possible slash /
399	newlen + `1` / zero byte /);
400
401	if(!newest) {
402	free(url_clone); / don't leak this /
403	return NULL;
404	}
405
406	/ copy over the root url part /
407	memcpy(newest, url_clone, urllen);
408
409	/ check if we need to append a slash /
410	if((`'/'` == useurl[`0`]) \|\| (protsep && !*protsep) \|\| (`'?'` == useurl[`0`]))
411	;
412	else
413	newest[urllen++]=`'/'`;
414
415	/ then append the new piece on the right side /
416	strcpy_url(&newest[urllen], useurl, !host_changed);
417
418	free(url_clone);
419
420	return newest;
421	}
422
423	/*
424	* parse_hostname_login()
425	*
426	* Parse the login details (user name, password and options) from the URL and
427	* strip them out of the host name
428	*
429	*/
430	static CURLUcode parse_hostname_login(struct Curl_URL *u,
431	const struct Curl_handler *h,
432	char **hostname,
433	unsigned int flags)
434	{
435	CURLUcode result = CURLUE_OK;
436	CURLcode ccode;
437	char *userp = NULL;
438	char *passwdp = NULL;
439	char *optionsp = NULL;
440
441	/ At this point, we're hoping all the other special cases have*
442	* been taken care of, so conn->host.name is at most
443	* [user[:password][;options]]@]hostname
444	*
445	* We need somewhere to put the embedded details, so do that first.
446	*/
447
448	char ptr = strchr(hostname, `'@'`);
449	char login = hostname;
450
451	if(!ptr)
452	goto out;
453
454	/ We will now try to extract the*
455	* possible login information in a string like:
456	* ftp://user:password@ftp.my.site:8021/README */
457	*hostname = ++ptr;
458
459	/ We could use the login information in the URL so extract it. Only parse*
460	options if the handler says we should. Note that 'h' might be NULL! /*
461	ccode = Curl_parse_login_details(login, ptr - login - `1`,
462	&userp, &passwdp,
463	(h && (h->flags & PROTOPT_URLOPTIONS)) ?
464	&optionsp:NULL);
465	if(ccode) {
466	result = CURLUE_MALFORMED_INPUT;
467	goto out;
468	}
469
470	if(userp) {
471	if(flags & CURLU_DISALLOW_USER) {
472	/ Option DISALLOW_USER is set and url contains username. /
473	result = CURLUE_USER_NOT_ALLOWED;
474	goto out;
475	}
476
477	u->user = userp;
478	}
479
480	if(passwdp)
481	u->password = passwdp;
482
483	if(optionsp)
484	u->options = optionsp;
485
486	return CURLUE_OK;
487	out:
488
489	free(userp);
490	free(passwdp);
491	free(optionsp);
492
493	return result;
494	}
495
496	UNITTEST CURLUcode Curl_parse_port(struct Curl_URL u, char* *hostname)
497	{
498	char *portptr = NULL;
499	char endbracket;
500	int len;
501
502	/*
503	* Find the end of an IPv6 address, either on the ']' ending bracket or
504	* a percent-encoded zone index.
505	*/
506	if(`1` == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
507	&endbracket, &len)) {
508	if(`']'` == endbracket)
509	portptr = &hostname[len];
510	else if(`'%'` == endbracket) {
511	int zonelen = len;
512	if(`1` == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
513	if(`']'` != endbracket)
514	return CURLUE_MALFORMED_INPUT;
515	portptr = &hostname[--zonelen + len + `1`];
516	}
517	else
518	return CURLUE_MALFORMED_INPUT;
519	}
520	else
521	return CURLUE_MALFORMED_INPUT;
522
523	/ this is a RFC2732-style specified IP-address /
524	if(portptr && *portptr) {
525	if(*portptr != `':'`)
526	return CURLUE_MALFORMED_INPUT;
527	}
528	else
529	portptr = NULL;
530	}
531	else
532	portptr = strchr(hostname, `':'`);
533
534	if(portptr) {
535	char *rest;
536	long port;
537	char portbuf[`7`];
538
539	/ Browser behavior adaptation. If there's a colon with no digits after,*
540	just cut off the name there which makes us ignore the colon and just
541	use the default port. Firefox, Chrome and Safari all do that. /*
542	if(!portptr[`1`]) {
543	*portptr = `'\0'`;
544	return CURLUE_OK;
545	}
546
547	if(!ISDIGIT(portptr[`1`]))
548	return CURLUE_BAD_PORT_NUMBER;
549
550	port = strtol(portptr + `1`, &rest, `10`); / Port number must be decimal /
551
552	if((port <= `0`) \|\| (port > `0xffff`))
553	/ Single unix standard says port numbers are 16 bits long, but we don't*
554	treat port zero as OK. /*
555	return CURLUE_BAD_PORT_NUMBER;
556
557	if(rest[`0`])
558	return CURLUE_BAD_PORT_NUMBER;
559
560	portptr++ = `'\0'`; /* cut off the name there /
561	*rest = `0`;
562	/ generate a new port number string to get rid of leading zeroes etc /
563	msnprintf(portbuf, sizeof(portbuf), "%ld", port);
564	u->portnum = port;
565	u->port = strdup(portbuf);
566	if(!u->port)
567	return CURLUE_OUT_OF_MEMORY;
568	}
569
570	return CURLUE_OK;
571	}
572
573	/ scan for byte values < 31 or 127 /
574	static CURLUcode junkscan(char *part)
575	{
576	if(part) {
577	static const char badbytes[]={
578	/ / `0x01`, `0x02`, `0x03`, `0x04`, `0x05`, `0x06`, `0x07`,
579	`0x08`, `0x09`, `0x0a`, `0x0b`, `0x0c`, `0x0d`, `0x0e`, `0x0f`,
580	`0x10`, `0x11`, `0x12`, `0x13`, `0x14`, `0x15`, `0x16`, `0x17`,
581	`0x18`, `0x19`, `0x1a`, `0x1b`, `0x1c`, `0x1d`, `0x1e`, `0x1f`,
582	`0x7f`,
583	`0x00` / zero terminate /
584	};
585	size_t n = strlen(part);
586	size_t nfine = strcspn(part, badbytes);
587	if(nfine != n)
588	/ since we don't know which part is scanned, return a generic error*
589	code /*
590	return CURLUE_MALFORMED_INPUT;
591	}
592	return CURLUE_OK;
593	}
594
595	static CURLUcode hostname_check(struct Curl_URL u, char* *hostname)
596	{
597	size_t len;
598	size_t hlen = strlen(hostname);
599
600	if(hostname[`0`] == `'['`) {
601	#ifdef ENABLE_IPV6
602	char dest[`16`]; / fits a binary IPv6 address /
603	#endif
604	const char *l = "0123456789abcdefABCDEF:.";
605	if(hlen < `5`) / '[::1]' is the shortest possible valid string /
606	return CURLUE_MALFORMED_INPUT;
607	hostname++;
608	hlen -= `2`;
609
610	if(hostname[hlen] != `']'`)
611	return CURLUE_MALFORMED_INPUT;
612
613	/ only valid letters are ok /
614	len = strspn(hostname, l);
615	if(hlen != len) {
616	hlen = len;
617	if(hostname[len] == `'%'`) {
618	/ this could now be '%[zone id]' /
619	char zoneid[`16`];
620	int i = `0`;
621	char *h = &hostname[len + `1`];
622	/ pass '25' if present and is a url encoded percent sign /
623	if(!strncmp(h, "25", `2`) && h[`2`] && (h[`2`] != `']'`))
624	h += `2`;
625	while(h && (h != `']'`) && (i < `15`))
626	zoneid[i++] = *h++;
627	if(!i \|\| (`']'` != *h))
628	return CURLUE_MALFORMED_INPUT;
629	zoneid[i] = `0`;
630	u->zoneid = strdup(zoneid);
631	if(!u->zoneid)
632	return CURLUE_OUT_OF_MEMORY;
633	hostname[len] = `']'`; / insert end bracket /
634	hostname[len + `1`] = `0`; / terminate the hostname /
635	}
636	else
637	return CURLUE_MALFORMED_INPUT;
638	/ hostname is fine /
639	}
640	#ifdef ENABLE_IPV6
641	hostname[hlen] = `0`; / end the address there /
642	if(`1` != Curl_inet_pton(AF_INET6, hostname, dest))
643	return CURLUE_MALFORMED_INPUT;
644	hostname[hlen] = `']'`; / restore ending bracket /
645	#endif
646	}
647	else {
648	/ letters from the second string is not ok /
649	len = strcspn(hostname, " ");
650	if(hlen != len)
651	/ hostname with bad content /
652	return CURLUE_MALFORMED_INPUT;
653	}
654	if(!hostname[`0`])
655	return CURLUE_NO_HOST;
656	return CURLUE_OK;
657	}
658
659	#define HOSTNAME_END(x) (((x) == '/') \|\| ((x) == '?') \|\| ((x) == '#'))
660
661	static CURLUcode seturl(const char url, CURLU u, unsigned int flags)
662	{
663	char *path;
664	bool path_alloced = FALSE;
665	char *hostname;
666	char *query = NULL;
667	char *fragment = NULL;
668	CURLUcode result;
669	bool url_has_scheme = FALSE;
670	char schemebuf[MAX_SCHEME_LEN + `1`];
671	char *schemep = NULL;
672	size_t schemelen = `0`;
673	size_t urllen;
674	const struct Curl_handler *h = NULL;
675
676	if(!url)
677	return CURLUE_MALFORMED_INPUT;
678
679	/*************************************************************
680	* Parse the URL.
681	************************************************************/
682	/ allocate scratch area /
683	urllen = strlen(url);
684	if(urllen > CURL_MAX_INPUT_LENGTH)
685	/ excessive input length /
686	return CURLUE_MALFORMED_INPUT;
687
688	path = u->scratch = malloc(urllen * `2` + `2`);
689	if(!path)
690	return CURLUE_OUT_OF_MEMORY;
691
692	hostname = &path[urllen + `1`];
693	hostname[`0`] = `0`;
694
695	if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
696	url_has_scheme = TRUE;
697	schemelen = strlen(schemebuf);
698	}
699
700	/ handle the file: scheme /
701	if(url_has_scheme && strcasecompare(schemebuf, "file")) {
702	/ path has been allocated large enough to hold this /
703	strcpy(path, &url[`5`]);
704
705	hostname = NULL; / no host for file: URLs /
706	u->scheme = strdup("file");
707	if(!u->scheme)
708	return CURLUE_OUT_OF_MEMORY;
709
710	/ Extra handling URLs with an authority component (i.e. that start with*
711	* "file://")
712	*
713	* We allow omitted hostname (e.g. file:/<path>) -- valid according to
714	* RFC 8089, but not the (current) WHAT-WG URL spec.
715	*/
716	if(path[`0`] == `'/'` && path[`1`] == `'/'`) {
717	/ swallow the two slashes /
718	char *ptr = &path[`2`];
719
720	/*
721	* According to RFC 8089, a file: URL can be reliably dereferenced if:
722	*
723	* o it has no/blank hostname, or
724	*
725	* o the hostname matches "localhost" (case-insensitively), or
726	*
727	* o the hostname is a FQDN that resolves to this machine.
728	*
729	* For brevity, we only consider URLs with empty, "localhost", or
730	* "127.0.0.1" hostnames as local.
731	*
732	* Additionally, there is an exception for URLs with a Windows drive
733	* letter in the authority (which was accidentally omitted from RFC 8089
734	* Appendix E, but believe me, it was meant to be there. --MK)
735	*/
736	if(ptr[`0`] != `'/'` && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
737	/ the URL includes a host name, it must match "localhost" or*
738	"127.0.0.1" to be valid /*
739	if(!checkprefix("localhost/", ptr) &&
740	!checkprefix("127.0.0.1/", ptr)) {
741	/ Invalid file://hostname/, expected localhost or 127.0.0.1 or*
742	none /*
743	return CURLUE_MALFORMED_INPUT;
744	}
745	ptr += `9`; / now points to the slash after the host /
746	}
747
748	path = ptr;
749	}
750
751	#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
752	/ Don't allow Windows drive letters when not in Windows.*
753	* This catches both "file:/c:" and "file:c:" */
754	if((`'/'` == path[`0`] && STARTS_WITH_URL_DRIVE_PREFIX(&path[`1`])) \|\|
755	STARTS_WITH_URL_DRIVE_PREFIX(path)) {
756	/ File drive letters are only accepted in MSDOS/Windows /
757	return CURLUE_MALFORMED_INPUT;
758	}
759	#else
760	/ If the path starts with a slash and a drive letter, ditch the slash /
761	if(`'/'` == path[`0`] && STARTS_WITH_URL_DRIVE_PREFIX(&path[`1`])) {
762	/ This cannot be done with strcpy, as the memory chunks overlap! /
763	memmove(path, &path[`1`], strlen(&path[`1`]) + `1`);
764	}
765	#endif
766
767	}
768	else {
769	/ clear path /
770	const char *p;
771	const char *hostp;
772	size_t len;
773	path[`0`] = `0`;
774
775	if(url_has_scheme) {
776	int i = `0`;
777	p = &url[schemelen + `1`];
778	while(p && (*p == `'/'`) && (i < `4`)) {
779	p++;
780	i++;
781	}
782	if((i < `1`) \|\| (i>`3`))
783	/ less than one or more than three slashes /
784	return CURLUE_MALFORMED_INPUT;
785
786	schemep = schemebuf;
787	if(!Curl_builtin_scheme(schemep) &&
788	!(flags & CURLU_NON_SUPPORT_SCHEME))
789	return CURLUE_UNSUPPORTED_SCHEME;
790
791	if(junkscan(schemep))
792	return CURLUE_MALFORMED_INPUT;
793
794	}
795	else {
796	/ no scheme! /
797
798	if(!(flags & (CURLU_DEFAULT_SCHEME\|CURLU_GUESS_SCHEME)))
799	return CURLUE_MALFORMED_INPUT;
800	if(flags & CURLU_DEFAULT_SCHEME)
801	schemep = (char *) DEFAULT_SCHEME;
802
803	/*
804	* The URL was badly formatted, let's try without scheme specified.
805	*/
806	p = url;
807	}
808	hostp = p; / host name starts here /
809
810	while(p && !HOSTNAME_END(p)) / find end of host name /
811	p++;
812
813	len = p - hostp;
814	if(len) {
815	memcpy(hostname, hostp, len);
816	hostname[len] = `0`;
817	}
818	else {
819	if(!(flags & CURLU_NO_AUTHORITY))
820	return CURLUE_MALFORMED_INPUT;
821	}
822
823	if((flags & CURLU_GUESS_SCHEME) && !schemep) {
824	/ legacy curl-style guess based on host name /
825	if(checkprefix("ftp.", hostname))
826	schemep = (char *)"ftp";
827	else if(checkprefix("dict.", hostname))
828	schemep = (char *)"dict";
829	else if(checkprefix("ldap.", hostname))
830	schemep = (char *)"ldap";
831	else if(checkprefix("imap.", hostname))
832	schemep = (char *)"imap";
833	else if(checkprefix("smtp.", hostname))
834	schemep = (char *)"smtp";
835	else if(checkprefix("pop3.", hostname))
836	schemep = (char *)"pop3";
837	else
838	schemep = (char *)"http";
839	}
840
841	len = strlen(p);
842	memcpy(path, p, len);
843	path[len] = `0`;
844
845	u->scheme = strdup(schemep);
846	if(!u->scheme)
847	return CURLUE_OUT_OF_MEMORY;
848	}
849
850	/ if this is a known scheme, get some details /
851	h = Curl_builtin_scheme(u->scheme);
852
853	if(junkscan(path))
854	return CURLUE_MALFORMED_INPUT;
855
856	if((flags & CURLU_URLENCODE) && path[`0`]) {
857	/ worst case output length is 3x the original! /
858	char newp = malloc(strlen(path) `3`);
859	if(!newp)
860	return CURLUE_OUT_OF_MEMORY;
861	path_alloced = TRUE;
862	strcpy_url(newp, path, TRUE); / consider it relative /
863	u->temppath = path = newp;
864	}
865
866	fragment = strchr(path, `'#'`);
867	if(fragment) {
868	*fragment++ = `0`;
869	if(fragment[`0`]) {
870	u->fragment = strdup(fragment);
871	if(!u->fragment)
872	return CURLUE_OUT_OF_MEMORY;
873	}
874	}
875
876	query = strchr(path, `'?'`);
877	if(query) {
878	*query++ = `0`;
879	/ done even if the query part is a blank string /
880	u->query = strdup(query);
881	if(!u->query)
882	return CURLUE_OUT_OF_MEMORY;
883	}
884
885	if(!path[`0`])
886	/ if there's no path left set, unset /
887	path = NULL;
888	else {
889	if(!(flags & CURLU_PATH_AS_IS)) {
890	/ remove ../ and ./ sequences according to RFC3986 /
891	char *newp = Curl_dedotdotify(path);
892	if(!newp)
893	return CURLUE_OUT_OF_MEMORY;
894
895	if(strcmp(newp, path)) {
896	/ if we got a new version /
897	if(path_alloced)
898	Curl_safefree(u->temppath);
899	u->temppath = path = newp;
900	path_alloced = TRUE;
901	}
902	else
903	free(newp);
904	}
905
906	u->path = path_alloced?path:strdup(path);
907	if(!u->path)
908	return CURLUE_OUT_OF_MEMORY;
909	u->temppath = NULL; / used now /
910	}
911
912	if(hostname) {
913	/*
914	* Parse the login details and strip them out of the host name.
915	*/
916	if(junkscan(hostname))
917	return CURLUE_MALFORMED_INPUT;
918
919	result = parse_hostname_login(u, h, &hostname, flags);
920	if(result)
921	return result;
922
923	result = Curl_parse_port(u, hostname);
924	if(result)
925	return result;
926
927	if(`0` == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
928	/ Skip hostname check, it's allowed to be empty. /
929	}
930	else {
931	result = hostname_check(u, hostname);
932	if(result)
933	return result;
934	}
935
936	u->host = strdup(hostname);
937	if(!u->host)
938	return CURLUE_OUT_OF_MEMORY;
939	}
940
941	Curl_safefree(u->scratch);
942	Curl_safefree(u->temppath);
943
944	return CURLUE_OK;
945	}
946
947	/*
948	* Parse the URL and set the relevant members of the Curl_URL struct.
949	*/
950	static CURLUcode parseurl(const char url, CURLU u, unsigned int flags)
951	{
952	CURLUcode result = seturl(url, u, flags);
953	if(result) {
954	free_urlhandle(u);
955	memset(u, `0`, sizeof(struct Curl_URL));
956	}
957	return result;
958	}
959
960	/*
961	*/
962	CURLU curl_url(void*)
963	{
964	return calloc(sizeof(struct Curl_URL), `1`);
965	}
966
967	void curl_url_cleanup(CURLU *u)
968	{
969	if(u) {
970	free_urlhandle(u);
971	free(u);
972	}
973	}
974
975	#define DUP(dest, src, name) \
976	if(src->name) { \
977	dest->name = strdup(src->name); \
978	if(!dest->name) \
979	goto fail; \
980	}
981
982	CURLU curl_url_dup(CURLU in)
983	{
984	struct Curl_URL u = calloc(sizeof(struct* Curl_URL), `1`);
985	if(u) {
986	DUP(u, in, scheme);
987	DUP(u, in, user);
988	DUP(u, in, password);
989	DUP(u, in, options);
990	DUP(u, in, host);
991	DUP(u, in, port);
992	DUP(u, in, path);
993	DUP(u, in, query);
994	DUP(u, in, fragment);
995	u->portnum = in->portnum;
996	}
997	return u;
998	fail:
999	curl_url_cleanup(u);
1000	return NULL;
1001	}
1002
1003	CURLUcode curl_url_get(CURLU *u, CURLUPart what,
1004	char *part, unsigned* int flags)
1005	{
1006	char *ptr;
1007	CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1008	char portbuf[`7`];
1009	bool urldecode = (flags & CURLU_URLDECODE)?`1`:`0`;
1010	bool plusdecode = FALSE;
1011	(void)flags;
1012	if(!u)
1013	return CURLUE_BAD_HANDLE;
1014	if(!part)
1015	return CURLUE_BAD_PARTPOINTER;
1016	*part = NULL;
1017
1018	switch(what) {
1019	case CURLUPART_SCHEME:
1020	ptr = u->scheme;
1021	ifmissing = CURLUE_NO_SCHEME;
1022	urldecode = FALSE; / never for schemes /
1023	break;
1024	case CURLUPART_USER:
1025	ptr = u->user;
1026	ifmissing = CURLUE_NO_USER;
1027	break;
1028	case CURLUPART_PASSWORD:
1029	ptr = u->password;
1030	ifmissing = CURLUE_NO_PASSWORD;
1031	break;
1032	case CURLUPART_OPTIONS:
1033	ptr = u->options;
1034	ifmissing = CURLUE_NO_OPTIONS;
1035	break;
1036	case CURLUPART_HOST:
1037	ptr = u->host;
1038	ifmissing = CURLUE_NO_HOST;
1039	break;
1040	case CURLUPART_ZONEID:
1041	ptr = u->zoneid;
1042	break;
1043	case CURLUPART_PORT:
1044	ptr = u->port;
1045	ifmissing = CURLUE_NO_PORT;
1046	urldecode = FALSE; / never for port /
1047	if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1048	/ there's no stored port number, but asked to deliver*
1049	a default one for the scheme /*
1050	const struct Curl_handler *h =
1051	Curl_builtin_scheme(u->scheme);
1052	if(h) {
1053	msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport);
1054	ptr = portbuf;
1055	}
1056	}
1057	else if(ptr && u->scheme) {
1058	/ there is a stored port number, but ask to inhibit if*
1059	it matches the default one for the scheme /*
1060	const struct Curl_handler *h =
1061	Curl_builtin_scheme(u->scheme);
1062	if(h && (h->defport == u->portnum) &&
1063	(flags & CURLU_NO_DEFAULT_PORT))
1064	ptr = NULL;
1065	}
1066	break;
1067	case CURLUPART_PATH:
1068	ptr = u->path;
1069	if(!ptr) {
1070	ptr = u->path = strdup("/");
1071	if(!u->path)
1072	return CURLUE_OUT_OF_MEMORY;
1073	}
1074	break;
1075	case CURLUPART_QUERY:
1076	ptr = u->query;
1077	ifmissing = CURLUE_NO_QUERY;
1078	plusdecode = urldecode;
1079	break;
1080	case CURLUPART_FRAGMENT:
1081	ptr = u->fragment;
1082	ifmissing = CURLUE_NO_FRAGMENT;
1083	break;
1084	case CURLUPART_URL: {
1085	char *url;
1086	char *scheme;
1087	char *options = u->options;
1088	char *port = u->port;
1089	char *allochost = NULL;
1090	if(u->scheme && strcasecompare("file", u->scheme)) {
1091	url = aprintf("file://%s%s%s",
1092	u->path,
1093	u->fragment? "#": "",
1094	u->fragment? u->fragment : "");
1095	}
1096	else if(!u->host)
1097	return CURLUE_NO_HOST;
1098	else {
1099	const struct Curl_handler *h = NULL;
1100	if(u->scheme)
1101	scheme = u->scheme;
1102	else if(flags & CURLU_DEFAULT_SCHEME)
1103	scheme = (char *) DEFAULT_SCHEME;
1104	else
1105	return CURLUE_NO_SCHEME;
1106
1107	h = Curl_builtin_scheme(scheme);
1108	if(!port && (flags & CURLU_DEFAULT_PORT)) {
1109	/ there's no stored port number, but asked to deliver*
1110	a default one for the scheme /*
1111	if(h) {
1112	msnprintf(portbuf, sizeof(portbuf), "%ld", h->defport);
1113	port = portbuf;
1114	}
1115	}
1116	else if(port) {
1117	/ there is a stored port number, but asked to inhibit if it matches*
1118	the default one for the scheme /*
1119	if(h && (h->defport == u->portnum) &&
1120	(flags & CURLU_NO_DEFAULT_PORT))
1121	port = NULL;
1122	}
1123
1124	if(h && !(h->flags & PROTOPT_URLOPTIONS))
1125	options = NULL;
1126
1127	if((u->host[`0`] == `'['`) && u->zoneid) {
1128	/ make it '[ host %25 zoneid ]' /
1129	size_t hostlen = strlen(u->host);
1130	size_t alen = hostlen + `3` + strlen(u->zoneid) + `1`;
1131	allochost = malloc(alen);
1132	if(!allochost)
1133	return CURLUE_OUT_OF_MEMORY;
1134	memcpy(allochost, u->host, hostlen - `1`);
1135	msnprintf(&allochost[hostlen - `1`], alen - hostlen + `1`,
1136	"%%25%s]", u->zoneid);
1137	}
1138
1139	url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1140	scheme,
1141	u->user ? u->user : "",
1142	u->password ? ":": "",
1143	u->password ? u->password : "",
1144	options ? ";" : "",
1145	options ? options : "",
1146	(u->user \|\| u->password \|\| options) ? "@": "",
1147	allochost ? allochost : u->host,
1148	port ? ":": "",
1149	port ? port : "",
1150	(u->path && (u->path[`0`] != `'/'`)) ? "/": "",
1151	u->path ? u->path : "/",
1152	(u->query && u->query[`0`]) ? "?": "",
1153	(u->query && u->query[`0`]) ? u->query : "",
1154	u->fragment? "#": "",
1155	u->fragment? u->fragment : "");
1156	free(allochost);
1157	}
1158	if(!url)
1159	return CURLUE_OUT_OF_MEMORY;
1160	*part = url;
1161	return CURLUE_OK;
1162	}
1163	default:
1164	ptr = NULL;
1165	break;
1166	}
1167	if(ptr) {
1168	*part = strdup(ptr);
1169	if(!*part)
1170	return CURLUE_OUT_OF_MEMORY;
1171	if(plusdecode) {
1172	/ convert + to space /
1173	char *plus;
1174	for(plus = part; plus; ++plus) {
1175	if(*plus == `'+'`)
1176	*plus = `' '`;
1177	}
1178	}
1179	if(urldecode) {
1180	char *decoded;
1181	size_t dlen;
1182	CURLcode res = Curl_urldecode(NULL, *part, `0`, &decoded, &dlen, TRUE);
1183	free(*part);
1184	if(res) {
1185	*part = NULL;
1186	return CURLUE_URLDECODE;
1187	}
1188	*part = decoded;
1189	}
1190	return CURLUE_OK;
1191	}
1192	else
1193	return ifmissing;
1194	}
1195
1196	CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1197	const char part, unsigned* int flags)
1198	{
1199	char **storep = NULL;
1200	long port = `0`;
1201	bool urlencode = (flags & CURLU_URLENCODE)? `1` : `0`;
1202	bool plusencode = FALSE;
1203	bool urlskipslash = FALSE;
1204	bool appendquery = FALSE;
1205	bool equalsencode = FALSE;
1206
1207	if(!u)
1208	return CURLUE_BAD_HANDLE;
1209	if(!part) {
1210	/ setting a part to NULL clears it /
1211	switch(what) {
1212	case CURLUPART_URL:
1213	break;
1214	case CURLUPART_SCHEME:
1215	storep = &u->scheme;
1216	break;
1217	case CURLUPART_USER:
1218	storep = &u->user;
1219	break;
1220	case CURLUPART_PASSWORD:
1221	storep = &u->password;
1222	break;
1223	case CURLUPART_OPTIONS:
1224	storep = &u->options;
1225	break;
1226	case CURLUPART_HOST:
1227	storep = &u->host;
1228	break;
1229	case CURLUPART_ZONEID:
1230	storep = &u->zoneid;
1231	break;
1232	case CURLUPART_PORT:
1233	u->portnum = `0`;
1234	storep = &u->port;
1235	break;
1236	case CURLUPART_PATH:
1237	storep = &u->path;
1238	break;
1239	case CURLUPART_QUERY:
1240	storep = &u->query;
1241	break;
1242	case CURLUPART_FRAGMENT:
1243	storep = &u->fragment;
1244	break;
1245	default:
1246	return CURLUE_UNKNOWN_PART;
1247	}
1248	if(storep && *storep) {
1249	free(*storep);
1250	*storep = NULL;
1251	}
1252	return CURLUE_OK;
1253	}
1254
1255	switch(what) {
1256	case CURLUPART_SCHEME:
1257	if(strlen(part) > MAX_SCHEME_LEN)
1258	/ too long /
1259	return CURLUE_MALFORMED_INPUT;
1260	if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1261	/ verify that it is a fine scheme /
1262	!Curl_builtin_scheme(part))
1263	return CURLUE_UNSUPPORTED_SCHEME;
1264	storep = &u->scheme;
1265	urlencode = FALSE; / never /
1266	break;
1267	case CURLUPART_USER:
1268	storep = &u->user;
1269	break;
1270	case CURLUPART_PASSWORD:
1271	storep = &u->password;
1272	break;
1273	case CURLUPART_OPTIONS:
1274	storep = &u->options;
1275	break;
1276	case CURLUPART_HOST:
1277	storep = &u->host;
1278	free(u->zoneid);
1279	u->zoneid = NULL;
1280	break;
1281	case CURLUPART_ZONEID:
1282	storep = &u->zoneid;
1283	break;
1284	case CURLUPART_PORT:
1285	{
1286	char *endp;
1287	urlencode = FALSE; / never /
1288	port = strtol(part, &endp, `10`); / Port number must be decimal /
1289	if((port <= `0`) \|\| (port > `0xffff`))
1290	return CURLUE_BAD_PORT_NUMBER;
1291	if(*endp)
1292	/ weirdly provided number, not good! /
1293	return CURLUE_MALFORMED_INPUT;
1294	storep = &u->port;
1295	}
1296	break;
1297	case CURLUPART_PATH:
1298	urlskipslash = TRUE;
1299	storep = &u->path;
1300	break;
1301	case CURLUPART_QUERY:
1302	plusencode = urlencode;
1303	appendquery = (flags & CURLU_APPENDQUERY)?`1`:`0`;
1304	equalsencode = appendquery;
1305	storep = &u->query;
1306	break;
1307	case CURLUPART_FRAGMENT:
1308	storep = &u->fragment;
1309	break;
1310	case CURLUPART_URL: {
1311	/*
1312	* Allow a new URL to replace the existing (if any) contents.
1313	*
1314	* If the existing contents is enough for a URL, allow a relative URL to
1315	* replace it.
1316	*/
1317	CURLUcode result;
1318	char *oldurl;
1319	char *redired_url;
1320	CURLU *handle2;
1321
1322	if(Curl_is_absolute_url(part, NULL, MAX_SCHEME_LEN + `1`)) {
1323	handle2 = curl_url();
1324	if(!handle2)
1325	return CURLUE_OUT_OF_MEMORY;
1326	result = parseurl(part, handle2, flags);
1327	if(!result)
1328	mv_urlhandle(handle2, u);
1329	else
1330	curl_url_cleanup(handle2);
1331	return result;
1332	}
1333	/ extract the full "old" URL to do the redirect on /
1334	result = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1335	if(result) {
1336	/ couldn't get the old URL, just use the new! /
1337	handle2 = curl_url();
1338	if(!handle2)
1339	return CURLUE_OUT_OF_MEMORY;
1340	result = parseurl(part, handle2, flags);
1341	if(!result)
1342	mv_urlhandle(handle2, u);
1343	else
1344	curl_url_cleanup(handle2);
1345	return result;
1346	}
1347
1348	/ apply the relative part to create a new URL /
1349	redired_url = concat_url(oldurl, part);
1350	free(oldurl);
1351	if(!redired_url)
1352	return CURLUE_OUT_OF_MEMORY;
1353
1354	/ now parse the new URL /
1355	handle2 = curl_url();
1356	if(!handle2) {
1357	free(redired_url);
1358	return CURLUE_OUT_OF_MEMORY;
1359	}
1360	result = parseurl(redired_url, handle2, flags);
1361	free(redired_url);
1362	if(!result)
1363	mv_urlhandle(handle2, u);
1364	else
1365	curl_url_cleanup(handle2);
1366	return result;
1367	}
1368	default:
1369	return CURLUE_UNKNOWN_PART;
1370	}
1371	DEBUGASSERT(storep);
1372	{
1373	const char *newp = part;
1374	size_t nalloc = strlen(part);
1375
1376	if(nalloc > CURL_MAX_INPUT_LENGTH)
1377	/ excessive input length /
1378	return CURLUE_MALFORMED_INPUT;
1379
1380	if(urlencode) {
1381	const unsigned char *i;
1382	char *o;
1383	bool free_part = FALSE;
1384	char enc = malloc(nalloc `3` + `1`); / for worst case! /
1385	if(!enc)
1386	return CURLUE_OUT_OF_MEMORY;
1387	if(plusencode) {
1388	/ space to plus /
1389	i = (const unsigned char *)part;
1390	for(o = enc; *i; ++o, ++i)
1391	o = (i == `' '`) ? `'+'` : *i;
1392	o = `0`; /* zero terminate /
1393	part = strdup(enc);
1394	if(!part) {
1395	free(enc);
1396	return CURLUE_OUT_OF_MEMORY;
1397	}
1398	free_part = TRUE;
1399	}
1400	for(i = (const unsigned char )part, o = enc; i; i++) {
1401	if(Curl_isunreserved(*i) \|\|
1402	((*i == `'/'`) && urlskipslash) \|\|
1403	((*i == `'='`) && equalsencode) \|\|
1404	((*i == `'+'`) && plusencode)) {
1405	if((*i == `'='`) && equalsencode)
1406	/ only skip the first equals sign /
1407	equalsencode = FALSE;
1408	o = i;
1409	o++;
1410	}
1411	else {
1412	msnprintf(o, `4`, "%%%02x", *i);
1413	o += `3`;
1414	}
1415	}
1416	o = `0`; /* zero terminate /
1417	newp = enc;
1418	if(free_part)
1419	free((char *)part);
1420	}
1421	else {
1422	char *p;
1423	newp = strdup(part);
1424	if(!newp)
1425	return CURLUE_OUT_OF_MEMORY;
1426	p = (char *)newp;
1427	while(*p) {
1428	/ make sure percent encoded are lower case /
1429	if((*p == `'%'`) && ISXDIGIT(p[`1`]) && ISXDIGIT(p[`2`]) &&
1430	(ISUPPER(p[`1`]) \|\| ISUPPER(p[`2`]))) {
1431	p[`1`] = (char)TOLOWER(p[`1`]);
1432	p[`2`] = (char)TOLOWER(p[`2`]);
1433	p += `3`;
1434	}
1435	else
1436	p++;
1437	}
1438	}
1439
1440	if(appendquery) {
1441	/ Append the string onto the old query. Add a '&' separator if none is*
1442	present at the end of the exsting query already /*
1443	size_t querylen = u->query ? strlen(u->query) : `0`;
1444	bool addamperand = querylen && (u->query[querylen -`1`] != `'&'`);
1445	if(querylen) {
1446	size_t newplen = strlen(newp);
1447	char *p = malloc(querylen + addamperand + newplen + `1`);
1448	if(!p) {
1449	free((char *)newp);
1450	return CURLUE_OUT_OF_MEMORY;
1451	}
1452	strcpy(p, u->query); / original query /
1453	if(addamperand)
1454	p[querylen] = `'&'`; / ampersand /
1455	strcpy(&p[querylen + addamperand], newp); / new suffix /
1456	free((char *)newp);
1457	free(*storep);
1458	*storep = p;
1459	return CURLUE_OK;
1460	}
1461	}
1462
1463	if(what == CURLUPART_HOST) {
1464	if(`0` == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) {
1465	/ Skip hostname check, it's allowed to be empty. /
1466	}
1467	else {
1468	if(hostname_check(u, (char *)newp)) {
1469	free((char *)newp);
1470	return CURLUE_MALFORMED_INPUT;
1471	}
1472	}
1473	}
1474
1475	free(*storep);
1476	storep = (char* *)newp;
1477	}
1478	/ set after the string, to make it not assigned if the allocation above*
1479	fails /*
1480	if(port)
1481	u->portnum = port;
1482	return CURLUE_OK;
1483	}
1484

Browse the source code of Curl/lib/urlapi.c