1/***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24
25#include "curl_setup.h"
26
27#include "urldata.h"
28#include "urlapi-int.h"
29#include "strcase.h"
30#include "url.h"
31#include "escape.h"
32#include "curl_ctype.h"
33#include "inet_pton.h"
34#include "inet_ntop.h"
35#include "strdup.h"
36#include "idn.h"
37#include "curl_memrchr.h"
38
39/* The last 3 #include files should be in this order */
40#include "curl_printf.h"
41#include "curl_memory.h"
42#include "memdebug.h"
43
44 /* MSDOS/Windows style drive prefix, eg c: in c:foo */
45#define STARTS_WITH_DRIVE_PREFIX(str) \
46 ((('a' <= str[0] && str[0] <= 'z') || \
47 ('A' <= str[0] && str[0] <= 'Z')) && \
48 (str[1] == ':'))
49
50 /* MSDOS/Windows style drive prefix, optionally with
51 * a '|' instead of ':', followed by a slash or NUL */
52#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
53 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
54 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
55 ((str)[1] == ':' || (str)[1] == '|') && \
56 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
57
58/* scheme is not URL encoded, the longest libcurl supported ones are... */
59#define MAX_SCHEME_LEN 40
60
61/*
62 * If ENABLE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
63 * sure we have _some_ value for AF_INET6 without polluting our fake value
64 * everywhere.
65 */
66#if !defined(ENABLE_IPV6) && !defined(AF_INET6)
67#define AF_INET6 (AF_INET + 1)
68#endif
69
70/* Internal representation of CURLU. Point to URL-encoded strings. */
71struct Curl_URL {
72 char *scheme;
73 char *user;
74 char *password;
75 char *options; /* IMAP only? */
76 char *host;
77 char *zoneid; /* for numerical IPv6 addresses */
78 char *port;
79 char *path;
80 char *query;
81 char *fragment;
82 long portnum; /* the numerical version */
83};
84
85#define DEFAULT_SCHEME "https"
86
87static void free_urlhandle(struct Curl_URL *u)
88{
89 free(u->scheme);
90 free(u->user);
91 free(u->password);
92 free(u->options);
93 free(u->host);
94 free(u->zoneid);
95 free(u->port);
96 free(u->path);
97 free(u->query);
98 free(u->fragment);
99}
100
101/*
102 * Find the separator at the end of the host name, or the '?' in cases like
103 * http://www.example.com?id=2380
104 */
105static const char *find_host_sep(const char *url)
106{
107 const char *sep;
108 const char *query;
109
110 /* Find the start of the hostname */
111 sep = strstr(haystack: url, needle: "//");
112 if(!sep)
113 sep = url;
114 else
115 sep += 2;
116
117 query = strchr(s: sep, c: '?');
118 sep = strchr(s: sep, c: '/');
119
120 if(!sep)
121 sep = url + strlen(s: url);
122
123 if(!query)
124 query = url + strlen(s: url);
125
126 return sep < query ? sep : query;
127}
128
129/*
130 * Decide whether a character in a URL must be escaped.
131 */
132#define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
133
134static const char hexdigits[] = "0123456789abcdef";
135/* urlencode_str() writes data into an output dynbuf and URL-encodes the
136 * spaces in the source URL accordingly.
137 *
138 * URL encoding should be skipped for host names, otherwise IDN resolution
139 * will fail.
140 */
141static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
142 size_t len, bool relative,
143 bool query)
144{
145 /* we must add this with whitespace-replacing */
146 bool left = !query;
147 const unsigned char *iptr;
148 const unsigned char *host_sep = (const unsigned char *) url;
149
150 if(!relative)
151 host_sep = (const unsigned char *) find_host_sep(url);
152
153 for(iptr = (unsigned char *)url; /* read from here */
154 len; iptr++, len--) {
155
156 if(iptr < host_sep) {
157 if(Curl_dyn_addn(s: o, mem: iptr, len: 1))
158 return CURLUE_OUT_OF_MEMORY;
159 continue;
160 }
161
162 if(*iptr == ' ') {
163 if(left) {
164 if(Curl_dyn_addn(s: o, mem: "%20", len: 3))
165 return CURLUE_OUT_OF_MEMORY;
166 }
167 else {
168 if(Curl_dyn_addn(s: o, mem: "+", len: 1))
169 return CURLUE_OUT_OF_MEMORY;
170 }
171 continue;
172 }
173
174 if(*iptr == '?')
175 left = FALSE;
176
177 if(urlchar_needs_escaping(*iptr)) {
178 char out[3]={'%'};
179 out[1] = hexdigits[*iptr>>4];
180 out[2] = hexdigits[*iptr & 0xf];
181 if(Curl_dyn_addn(s: o, mem: out, len: 3))
182 return CURLUE_OUT_OF_MEMORY;
183 }
184 else {
185 if(Curl_dyn_addn(s: o, mem: iptr, len: 1))
186 return CURLUE_OUT_OF_MEMORY;
187 }
188 }
189
190 return CURLUE_OK;
191}
192
193/*
194 * Returns the length of the scheme if the given URL is absolute (as opposed
195 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
196 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
197 *
198 * If 'guess_scheme' is TRUE, it means the URL might be provided without
199 * scheme.
200 */
201size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
202 bool guess_scheme)
203{
204 int i = 0;
205 DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
206 (void)buflen; /* only used in debug-builds */
207 if(buf)
208 buf[0] = 0; /* always leave a defined value in buf */
209#ifdef WIN32
210 if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
211 return 0;
212#endif
213 if(ISALPHA(url[0]))
214 for(i = 1; i < MAX_SCHEME_LEN; ++i) {
215 char s = url[i];
216 if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
217 /* RFC 3986 3.1 explains:
218 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
219 */
220 }
221 else {
222 break;
223 }
224 }
225 if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
226 /* If this does not guess scheme, the scheme always ends with the colon so
227 that this also detects data: URLs etc. In guessing mode, data: could
228 be the host name "data" with a specified port number. */
229
230 /* the length of the scheme is the name part only */
231 size_t len = i;
232 if(buf) {
233 buf[i] = 0;
234 while(i--) {
235 buf[i] = Curl_raw_tolower(in: url[i]);
236 }
237 }
238 return len;
239 }
240 return 0;
241}
242
243/*
244 * Concatenate a relative URL to a base URL making it absolute.
245 * URL-encodes any spaces.
246 * The returned pointer must be freed by the caller unless NULL
247 * (returns NULL on out of memory).
248 *
249 * Note that this function destroys the 'base' string.
250 */
251static char *concat_url(char *base, const char *relurl)
252{
253 /***
254 TRY to append this new path to the old URL
255 to the right of the host part. Oh crap, this is doomed to cause
256 problems in the future...
257 */
258 struct dynbuf newest;
259 char *protsep;
260 char *pathsep;
261 bool host_changed = FALSE;
262 const char *useurl = relurl;
263
264 /* protsep points to the start of the host name */
265 protsep = strstr(haystack: base, needle: "//");
266 if(!protsep)
267 protsep = base;
268 else
269 protsep += 2; /* pass the slashes */
270
271 if('/' != relurl[0]) {
272 int level = 0;
273
274 /* First we need to find out if there's a ?-letter in the URL,
275 and cut it and the right-side of that off */
276 pathsep = strchr(s: protsep, c: '?');
277 if(pathsep)
278 *pathsep = 0;
279
280 /* we have a relative path to append to the last slash if there's one
281 available, or if the new URL is just a query string (starts with a
282 '?') we append the new one at the end of the entire currently worked
283 out URL */
284 if(useurl[0] != '?') {
285 pathsep = strrchr(s: protsep, c: '/');
286 if(pathsep)
287 *pathsep = 0;
288 }
289
290 /* Check if there's any slash after the host name, and if so, remember
291 that position instead */
292 pathsep = strchr(s: protsep, c: '/');
293 if(pathsep)
294 protsep = pathsep + 1;
295 else
296 protsep = NULL;
297
298 /* now deal with one "./" or any amount of "../" in the newurl
299 and act accordingly */
300
301 if((useurl[0] == '.') && (useurl[1] == '/'))
302 useurl += 2; /* just skip the "./" */
303
304 while((useurl[0] == '.') &&
305 (useurl[1] == '.') &&
306 (useurl[2] == '/')) {
307 level++;
308 useurl += 3; /* pass the "../" */
309 }
310
311 if(protsep) {
312 while(level--) {
313 /* cut off one more level from the right of the original URL */
314 pathsep = strrchr(s: protsep, c: '/');
315 if(pathsep)
316 *pathsep = 0;
317 else {
318 *protsep = 0;
319 break;
320 }
321 }
322 }
323 }
324 else {
325 /* We got a new absolute path for this server */
326
327 if(relurl[1] == '/') {
328 /* the new URL starts with //, just keep the protocol part from the
329 original one */
330 *protsep = 0;
331 useurl = &relurl[2]; /* we keep the slashes from the original, so we
332 skip the new ones */
333 host_changed = TRUE;
334 }
335 else {
336 /* cut off the original URL from the first slash, or deal with URLs
337 without slash */
338 pathsep = strchr(s: protsep, c: '/');
339 if(pathsep) {
340 /* When people use badly formatted URLs, such as
341 "http://www.example.com?dir=/home/daniel" we must not use the first
342 slash, if there's a ?-letter before it! */
343 char *sep = strchr(s: protsep, c: '?');
344 if(sep && (sep < pathsep))
345 pathsep = sep;
346 *pathsep = 0;
347 }
348 else {
349 /* There was no slash. Now, since we might be operating on a badly
350 formatted URL, such as "http://www.example.com?id=2380" which
351 doesn't use a slash separator as it is supposed to, we need to check
352 for a ?-letter as well! */
353 pathsep = strchr(s: protsep, c: '?');
354 if(pathsep)
355 *pathsep = 0;
356 }
357 }
358 }
359
360 Curl_dyn_init(s: &newest, CURL_MAX_INPUT_LENGTH);
361
362 /* copy over the root url part */
363 if(Curl_dyn_add(s: &newest, str: base))
364 return NULL;
365
366 /* check if we need to append a slash */
367 if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
368 ;
369 else {
370 if(Curl_dyn_addn(s: &newest, mem: "/", len: 1))
371 return NULL;
372 }
373
374 /* then append the new piece on the right side */
375 urlencode_str(o: &newest, url: useurl, len: strlen(s: useurl), relative: !host_changed, FALSE);
376
377 return Curl_dyn_ptr(s: &newest);
378}
379
380/* scan for byte values <= 31, 127 and sometimes space */
381static CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags)
382{
383 static const char badbytes[]={
384 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
385 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
386 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
387 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
388 0x7f, 0x00 /* null-terminate */
389 };
390 size_t n = strlen(s: url);
391 size_t nfine;
392
393 if(n > CURL_MAX_INPUT_LENGTH)
394 /* excessive input length */
395 return CURLUE_MALFORMED_INPUT;
396
397 nfine = strcspn(s: url, reject: badbytes);
398 if((nfine != n) ||
399 (!(flags & CURLU_ALLOW_SPACE) && strchr(s: url, c: ' ')))
400 return CURLUE_MALFORMED_INPUT;
401
402 *urllen = n;
403 return CURLUE_OK;
404}
405
406/*
407 * parse_hostname_login()
408 *
409 * Parse the login details (user name, password and options) from the URL and
410 * strip them out of the host name
411 *
412 */
413static CURLUcode parse_hostname_login(struct Curl_URL *u,
414 const char *login,
415 size_t len,
416 unsigned int flags,
417 size_t *offset) /* to the host name */
418{
419 CURLUcode result = CURLUE_OK;
420 CURLcode ccode;
421 char *userp = NULL;
422 char *passwdp = NULL;
423 char *optionsp = NULL;
424 const struct Curl_handler *h = NULL;
425
426 /* At this point, we assume all the other special cases have been taken
427 * care of, so the host is at most
428 *
429 * [user[:password][;options]]@]hostname
430 *
431 * We need somewhere to put the embedded details, so do that first.
432 */
433 char *ptr;
434
435 DEBUGASSERT(login);
436
437 *offset = 0;
438 ptr = memchr(s: login, c: '@', n: len);
439 if(!ptr)
440 goto out;
441
442 /* We will now try to extract the
443 * possible login information in a string like:
444 * ftp://user:password@ftp.my.site:8021/README */
445 ptr++;
446
447 /* if this is a known scheme, get some details */
448 if(u->scheme)
449 h = Curl_builtin_scheme(scheme: u->scheme, CURL_ZERO_TERMINATED);
450
451 /* We could use the login information in the URL so extract it. Only parse
452 options if the handler says we should. Note that 'h' might be NULL! */
453 ccode = Curl_parse_login_details(login, len: ptr - login - 1,
454 userptr: &userp, passwdptr: &passwdp,
455 optionsptr: (h && (h->flags & PROTOPT_URLOPTIONS)) ?
456 &optionsp:NULL);
457 if(ccode) {
458 result = CURLUE_BAD_LOGIN;
459 goto out;
460 }
461
462 if(userp) {
463 if(flags & CURLU_DISALLOW_USER) {
464 /* Option DISALLOW_USER is set and url contains username. */
465 result = CURLUE_USER_NOT_ALLOWED;
466 goto out;
467 }
468 free(u->user);
469 u->user = userp;
470 }
471
472 if(passwdp) {
473 free(u->password);
474 u->password = passwdp;
475 }
476
477 if(optionsp) {
478 free(u->options);
479 u->options = optionsp;
480 }
481
482 /* the host name starts at this offset */
483 *offset = ptr - login;
484 return CURLUE_OK;
485
486out:
487
488 free(userp);
489 free(passwdp);
490 free(optionsp);
491 u->user = NULL;
492 u->password = NULL;
493 u->options = NULL;
494
495 return result;
496}
497
498UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
499 bool has_scheme)
500{
501 char *portptr;
502 char *hostname = Curl_dyn_ptr(s: host);
503 /*
504 * Find the end of an IPv6 address on the ']' ending bracket.
505 */
506 if(hostname[0] == '[') {
507 portptr = strchr(s: hostname, c: ']');
508 if(!portptr)
509 return CURLUE_BAD_IPV6;
510 portptr++;
511 /* this is a RFC2732-style specified IP-address */
512 if(*portptr) {
513 if(*portptr != ':')
514 return CURLUE_BAD_PORT_NUMBER;
515 }
516 else
517 portptr = NULL;
518 }
519 else
520 portptr = strchr(s: hostname, c: ':');
521
522 if(portptr) {
523 char *rest;
524 long port;
525 size_t keep = portptr - hostname;
526
527 /* Browser behavior adaptation. If there's a colon with no digits after,
528 just cut off the name there which makes us ignore the colon and just
529 use the default port. Firefox, Chrome and Safari all do that.
530
531 Don't do it if the URL has no scheme, to make something that looks like
532 a scheme not work!
533 */
534 Curl_dyn_setlen(s: host, set: keep);
535 portptr++;
536 if(!*portptr)
537 return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
538
539 if(!ISDIGIT(*portptr))
540 return CURLUE_BAD_PORT_NUMBER;
541
542 port = strtol(nptr: portptr, endptr: &rest, base: 10); /* Port number must be decimal */
543
544 if(port > 0xffff)
545 return CURLUE_BAD_PORT_NUMBER;
546
547 if(rest[0])
548 return CURLUE_BAD_PORT_NUMBER;
549
550 u->portnum = port;
551 /* generate a new port number string to get rid of leading zeroes etc */
552 free(u->port);
553 u->port = aprintf(format: "%ld", port);
554 if(!u->port)
555 return CURLUE_OUT_OF_MEMORY;
556 }
557
558 return CURLUE_OK;
559}
560
561/* this assumes 'hostname' now starts with [ */
562static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
563 size_t hlen) /* length of hostname */
564{
565 size_t len;
566 DEBUGASSERT(*hostname == '[');
567 if(hlen < 4) /* '[::]' is the shortest possible valid string */
568 return CURLUE_BAD_IPV6;
569 hostname++;
570 hlen -= 2;
571
572 /* only valid IPv6 letters are ok */
573 len = strspn(s: hostname, accept: "0123456789abcdefABCDEF:.");
574
575 if(hlen != len) {
576 hlen = len;
577 if(hostname[len] == '%') {
578 /* this could now be '%[zone id]' */
579 char zoneid[16];
580 int i = 0;
581 char *h = &hostname[len + 1];
582 /* pass '25' if present and is a url encoded percent sign */
583 if(!strncmp(s1: h, s2: "25", n: 2) && h[2] && (h[2] != ']'))
584 h += 2;
585 while(*h && (*h != ']') && (i < 15))
586 zoneid[i++] = *h++;
587 if(!i || (']' != *h))
588 return CURLUE_BAD_IPV6;
589 zoneid[i] = 0;
590 u->zoneid = strdup(zoneid);
591 if(!u->zoneid)
592 return CURLUE_OUT_OF_MEMORY;
593 hostname[len] = ']'; /* insert end bracket */
594 hostname[len + 1] = 0; /* terminate the hostname */
595 }
596 else
597 return CURLUE_BAD_IPV6;
598 /* hostname is fine */
599 }
600
601 /* Check the IPv6 address. */
602 {
603 char dest[16]; /* fits a binary IPv6 address */
604 char norm[MAX_IPADR_LEN];
605 hostname[hlen] = 0; /* end the address there */
606 if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
607 return CURLUE_BAD_IPV6;
608
609 /* check if it can be done shorter */
610 if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
611 (strlen(s: norm) < hlen)) {
612 strcpy(dest: hostname, src: norm);
613 hlen = strlen(s: norm);
614 hostname[hlen + 1] = 0;
615 }
616 hostname[hlen] = ']'; /* restore ending bracket */
617 }
618 return CURLUE_OK;
619}
620
621static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
622 size_t hlen) /* length of hostname */
623{
624 size_t len;
625 DEBUGASSERT(hostname);
626
627 if(!hlen)
628 return CURLUE_NO_HOST;
629 else if(hostname[0] == '[')
630 return ipv6_parse(u, hostname, hlen);
631 else {
632 /* letters from the second string are not ok */
633 len = strcspn(s: hostname, reject: " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
634 if(hlen != len)
635 /* hostname with bad content */
636 return CURLUE_BAD_HOSTNAME;
637 }
638 return CURLUE_OK;
639}
640
641/*
642 * Handle partial IPv4 numerical addresses and different bases, like
643 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
644 *
645 * If the given input string is syntactically wrong IPv4 or any part for
646 * example is too big, this function returns HOST_NAME.
647 *
648 * Output the "normalized" version of that input string in plain quad decimal
649 * integers.
650 *
651 * Returns the host type.
652 */
653
654#define HOST_ERROR -1 /* out of memory */
655#define HOST_BAD -2 /* bad IPv4 address */
656
657#define HOST_NAME 1
658#define HOST_IPV4 2
659#define HOST_IPV6 3
660
661static int ipv4_normalize(struct dynbuf *host)
662{
663 bool done = FALSE;
664 int n = 0;
665 const char *c = Curl_dyn_ptr(s: host);
666 unsigned long parts[4] = {0, 0, 0, 0};
667 CURLcode result = CURLE_OK;
668
669 if(*c == '[')
670 return HOST_IPV6;
671
672 while(!done) {
673 char *endp;
674 unsigned long l;
675 if(!ISDIGIT(*c))
676 /* most importantly this doesn't allow a leading plus or minus */
677 return HOST_NAME;
678 l = strtoul(nptr: c, endptr: &endp, base: 0);
679
680 parts[n] = l;
681 c = endp;
682
683 switch(*c) {
684 case '.':
685 if(n == 3)
686 return HOST_NAME;
687 n++;
688 c++;
689 break;
690
691 case '\0':
692 done = TRUE;
693 break;
694
695 default:
696 return HOST_NAME;
697 }
698
699 /* overflow */
700 if((l == ULONG_MAX) && (errno == ERANGE))
701 return HOST_NAME;
702
703#if SIZEOF_LONG > 4
704 /* a value larger than 32 bits */
705 if(l > UINT_MAX)
706 return HOST_NAME;
707#endif
708 }
709
710 switch(n) {
711 case 0: /* a -- 32 bits */
712 Curl_dyn_reset(s: host);
713
714 result = Curl_dyn_addf(s: host, fmt: "%u.%u.%u.%u",
715 parts[0] >> 24, (parts[0] >> 16) & 0xff,
716 (parts[0] >> 8) & 0xff, parts[0] & 0xff);
717 break;
718 case 1: /* a.b -- 8.24 bits */
719 if((parts[0] > 0xff) || (parts[1] > 0xffffff))
720 return HOST_NAME;
721 Curl_dyn_reset(s: host);
722 result = Curl_dyn_addf(s: host, fmt: "%u.%u.%u.%u",
723 parts[0], (parts[1] >> 16) & 0xff,
724 (parts[1] >> 8) & 0xff, parts[1] & 0xff);
725 break;
726 case 2: /* a.b.c -- 8.8.16 bits */
727 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
728 return HOST_NAME;
729 Curl_dyn_reset(s: host);
730 result = Curl_dyn_addf(s: host, fmt: "%u.%u.%u.%u",
731 parts[0], parts[1], (parts[2] >> 8) & 0xff,
732 parts[2] & 0xff);
733 break;
734 case 3: /* a.b.c.d -- 8.8.8.8 bits */
735 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
736 (parts[3] > 0xff))
737 return HOST_NAME;
738 Curl_dyn_reset(s: host);
739 result = Curl_dyn_addf(s: host, fmt: "%u.%u.%u.%u",
740 parts[0], parts[1], parts[2], parts[3]);
741 break;
742 }
743 if(result)
744 return HOST_ERROR;
745 return HOST_IPV4;
746}
747
748/* if necessary, replace the host content with a URL decoded version */
749static CURLUcode urldecode_host(struct dynbuf *host)
750{
751 char *per = NULL;
752 const char *hostname = Curl_dyn_ptr(s: host);
753 per = strchr(s: hostname, c: '%');
754 if(!per)
755 /* nothing to decode */
756 return CURLUE_OK;
757 else {
758 /* encoded */
759 size_t dlen;
760 char *decoded;
761 CURLcode result = Curl_urldecode(string: hostname, length: 0, ostring: &decoded, olen: &dlen,
762 ctrl: REJECT_CTRL);
763 if(result)
764 return CURLUE_BAD_HOSTNAME;
765 Curl_dyn_reset(s: host);
766 result = Curl_dyn_addn(s: host, mem: decoded, len: dlen);
767 free(decoded);
768 if(result)
769 return CURLUE_OUT_OF_MEMORY;
770 }
771
772 return CURLUE_OK;
773}
774
775static CURLUcode parse_authority(struct Curl_URL *u,
776 const char *auth, size_t authlen,
777 unsigned int flags,
778 struct dynbuf *host,
779 bool has_scheme)
780{
781 size_t offset;
782 CURLUcode result;
783
784 /*
785 * Parse the login details and strip them out of the host name.
786 */
787 result = parse_hostname_login(u, login: auth, len: authlen, flags, offset: &offset);
788 if(result)
789 goto out;
790
791 if(Curl_dyn_addn(s: host, mem: auth + offset, len: authlen - offset)) {
792 result = CURLUE_OUT_OF_MEMORY;
793 goto out;
794 }
795
796 result = Curl_parse_port(u, host, has_scheme);
797 if(result)
798 goto out;
799
800 if(!Curl_dyn_len(s: host))
801 return CURLUE_NO_HOST;
802
803 switch(ipv4_normalize(host)) {
804 case HOST_IPV4:
805 break;
806 case HOST_IPV6:
807 result = ipv6_parse(u, hostname: Curl_dyn_ptr(s: host), hlen: Curl_dyn_len(s: host));
808 break;
809 case HOST_NAME:
810 result = urldecode_host(host);
811 if(!result)
812 result = hostname_check(u, hostname: Curl_dyn_ptr(s: host), hlen: Curl_dyn_len(s: host));
813 break;
814 case HOST_ERROR:
815 result = CURLUE_OUT_OF_MEMORY;
816 break;
817 case HOST_BAD:
818 default:
819 result = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
820 break;
821 }
822
823out:
824 return result;
825}
826
827CURLUcode Curl_url_set_authority(CURLU *u, const char *authority,
828 unsigned int flags)
829{
830 CURLUcode result;
831 struct dynbuf host;
832
833 DEBUGASSERT(authority);
834 Curl_dyn_init(s: &host, CURL_MAX_INPUT_LENGTH);
835
836 result = parse_authority(u, auth: authority, authlen: strlen(s: authority), flags,
837 host: &host, has_scheme: !!u->scheme);
838 if(result)
839 Curl_dyn_free(s: &host);
840 else {
841 free(u->host);
842 u->host = Curl_dyn_ptr(s: &host);
843 }
844 return result;
845}
846
847/*
848 * "Remove Dot Segments"
849 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
850 */
851
852/*
853 * dedotdotify()
854 * @unittest: 1395
855 *
856 * This function gets a null-terminated path with dot and dotdot sequences
857 * passed in and strips them off according to the rules in RFC 3986 section
858 * 5.2.4.
859 *
860 * The function handles a query part ('?' + stuff) appended but it expects
861 * that fragments ('#' + stuff) have already been cut off.
862 *
863 * RETURNS
864 *
865 * Zero for success and 'out' set to an allocated dedotdotified string.
866 */
867UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
868UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
869{
870 char *outptr;
871 const char *endp = &input[clen];
872 char *out;
873
874 *outp = NULL;
875 /* the path always starts with a slash, and a slash has not dot */
876 if((clen < 2) || !memchr(s: input, c: '.', n: clen))
877 return 0;
878
879 out = malloc(clen + 1);
880 if(!out)
881 return 1; /* out of memory */
882
883 *out = 0; /* null-terminates, for inputs like "./" */
884 outptr = out;
885
886 do {
887 bool dotdot = TRUE;
888 if(*input == '.') {
889 /* A. If the input buffer begins with a prefix of "../" or "./", then
890 remove that prefix from the input buffer; otherwise, */
891
892 if(!strncmp(s1: "./", s2: input, n: 2)) {
893 input += 2;
894 clen -= 2;
895 }
896 else if(!strncmp(s1: "../", s2: input, n: 3)) {
897 input += 3;
898 clen -= 3;
899 }
900 /* D. if the input buffer consists only of "." or "..", then remove
901 that from the input buffer; otherwise, */
902
903 else if(!strcmp(s1: ".", s2: input) || !strcmp(s1: "..", s2: input) ||
904 !strncmp(s1: ".?", s2: input, n: 2) || !strncmp(s1: "..?", s2: input, n: 3)) {
905 *out = 0;
906 break;
907 }
908 else
909 dotdot = FALSE;
910 }
911 else if(*input == '/') {
912 /* B. if the input buffer begins with a prefix of "/./" or "/.", where
913 "." is a complete path segment, then replace that prefix with "/" in
914 the input buffer; otherwise, */
915 if(!strncmp(s1: "/./", s2: input, n: 3)) {
916 input += 2;
917 clen -= 2;
918 }
919 else if(!strcmp(s1: "/.", s2: input) || !strncmp(s1: "/.?", s2: input, n: 3)) {
920 *outptr++ = '/';
921 *outptr = 0;
922 break;
923 }
924
925 /* C. if the input buffer begins with a prefix of "/../" or "/..",
926 where ".." is a complete path segment, then replace that prefix with
927 "/" in the input buffer and remove the last segment and its
928 preceding "/" (if any) from the output buffer; otherwise, */
929
930 else if(!strncmp(s1: "/../", s2: input, n: 4)) {
931 input += 3;
932 clen -= 3;
933 /* remove the last segment from the output buffer */
934 while(outptr > out) {
935 outptr--;
936 if(*outptr == '/')
937 break;
938 }
939 *outptr = 0; /* null-terminate where it stops */
940 }
941 else if(!strcmp(s1: "/..", s2: input) || !strncmp(s1: "/..?", s2: input, n: 4)) {
942 /* remove the last segment from the output buffer */
943 while(outptr > out) {
944 outptr--;
945 if(*outptr == '/')
946 break;
947 }
948 *outptr++ = '/';
949 *outptr = 0; /* null-terminate where it stops */
950 break;
951 }
952 else
953 dotdot = FALSE;
954 }
955 else
956 dotdot = FALSE;
957
958 if(!dotdot) {
959 /* E. move the first path segment in the input buffer to the end of
960 the output buffer, including the initial "/" character (if any) and
961 any subsequent characters up to, but not including, the next "/"
962 character or the end of the input buffer. */
963
964 do {
965 *outptr++ = *input++;
966 clen--;
967 } while(*input && (*input != '/') && (*input != '?'));
968 *outptr = 0;
969 }
970
971 /* continue until end of path */
972 } while(input < endp);
973
974 *outp = out;
975 return 0; /* success */
976}
977
978static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
979{
980 const char *path;
981 size_t pathlen;
982 char *query = NULL;
983 char *fragment = NULL;
984 char schemebuf[MAX_SCHEME_LEN + 1];
985 size_t schemelen = 0;
986 size_t urllen;
987 CURLUcode result = CURLUE_OK;
988 size_t fraglen = 0;
989 struct dynbuf host;
990
991 DEBUGASSERT(url);
992
993 Curl_dyn_init(s: &host, CURL_MAX_INPUT_LENGTH);
994
995 result = junkscan(url, urllen: &urllen, flags);
996 if(result)
997 goto fail;
998
999 schemelen = Curl_is_absolute_url(url, buf: schemebuf, buflen: sizeof(schemebuf),
1000 guess_scheme: flags & (CURLU_GUESS_SCHEME|
1001 CURLU_DEFAULT_SCHEME));
1002
1003 /* handle the file: scheme */
1004 if(schemelen && !strcmp(s1: schemebuf, s2: "file")) {
1005 bool uncpath = FALSE;
1006 if(urllen <= 6) {
1007 /* file:/ is not enough to actually be a complete file: URL */
1008 result = CURLUE_BAD_FILE_URL;
1009 goto fail;
1010 }
1011
1012 /* path has been allocated large enough to hold this */
1013 path = (char *)&url[5];
1014 pathlen = urllen - 5;
1015
1016 u->scheme = strdup("file");
1017 if(!u->scheme) {
1018 result = CURLUE_OUT_OF_MEMORY;
1019 goto fail;
1020 }
1021
1022 /* Extra handling URLs with an authority component (i.e. that start with
1023 * "file://")
1024 *
1025 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
1026 * RFC 8089, but not the (current) WHAT-WG URL spec.
1027 */
1028 if(path[0] == '/' && path[1] == '/') {
1029 /* swallow the two slashes */
1030 const char *ptr = &path[2];
1031
1032 /*
1033 * According to RFC 8089, a file: URL can be reliably dereferenced if:
1034 *
1035 * o it has no/blank hostname, or
1036 *
1037 * o the hostname matches "localhost" (case-insensitively), or
1038 *
1039 * o the hostname is a FQDN that resolves to this machine, or
1040 *
1041 * o it is an UNC String transformed to an URI (Windows only, RFC 8089
1042 * Appendix E.3).
1043 *
1044 * For brevity, we only consider URLs with empty, "localhost", or
1045 * "127.0.0.1" hostnames as local, otherwise as an UNC String.
1046 *
1047 * Additionally, there is an exception for URLs with a Windows drive
1048 * letter in the authority (which was accidentally omitted from RFC 8089
1049 * Appendix E, but believe me, it was meant to be there. --MK)
1050 */
1051 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
1052 /* the URL includes a host name, it must match "localhost" or
1053 "127.0.0.1" to be valid */
1054 if(checkprefix("localhost/", ptr) ||
1055 checkprefix("127.0.0.1/", ptr)) {
1056 ptr += 9; /* now points to the slash after the host */
1057 }
1058 else {
1059#if defined(WIN32)
1060 size_t len;
1061
1062 /* the host name, NetBIOS computer name, can not contain disallowed
1063 chars, and the delimiting slash character must be appended to the
1064 host name */
1065 path = strpbrk(ptr, "/\\:*?\"<>|");
1066 if(!path || *path != '/') {
1067 result = CURLUE_BAD_FILE_URL;
1068 goto fail;
1069 }
1070
1071 len = path - ptr;
1072 if(len) {
1073 if(Curl_dyn_addn(&host, ptr, len)) {
1074 result = CURLUE_OUT_OF_MEMORY;
1075 goto fail;
1076 }
1077 uncpath = TRUE;
1078 }
1079
1080 ptr -= 2; /* now points to the // before the host in UNC */
1081#else
1082 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1083 none */
1084 result = CURLUE_BAD_FILE_URL;
1085 goto fail;
1086#endif
1087 }
1088 }
1089
1090 path = ptr;
1091 pathlen = urllen - (ptr - url);
1092 }
1093
1094 if(!uncpath)
1095 /* no host for file: URLs by default */
1096 Curl_dyn_reset(s: &host);
1097
1098#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
1099 /* Don't allow Windows drive letters when not in Windows.
1100 * This catches both "file:/c:" and "file:c:" */
1101 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1102 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1103 /* File drive letters are only accepted in MSDOS/Windows */
1104 result = CURLUE_BAD_FILE_URL;
1105 goto fail;
1106 }
1107#else
1108 /* If the path starts with a slash and a drive letter, ditch the slash */
1109 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1110 /* This cannot be done with strcpy, as the memory chunks overlap! */
1111 path++;
1112 pathlen--;
1113 }
1114#endif
1115
1116 }
1117 else {
1118 /* clear path */
1119 const char *schemep = NULL;
1120 const char *hostp;
1121 size_t hostlen;
1122
1123 if(schemelen) {
1124 int i = 0;
1125 const char *p = &url[schemelen + 1];
1126 while((*p == '/') && (i < 4)) {
1127 p++;
1128 i++;
1129 }
1130
1131 schemep = schemebuf;
1132 if(!Curl_builtin_scheme(scheme: schemep, CURL_ZERO_TERMINATED) &&
1133 !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1134 result = CURLUE_UNSUPPORTED_SCHEME;
1135 goto fail;
1136 }
1137
1138 if((i < 1) || (i > 3)) {
1139 /* less than one or more than three slashes */
1140 result = CURLUE_BAD_SLASHES;
1141 goto fail;
1142 }
1143 hostp = p; /* host name starts here */
1144 }
1145 else {
1146 /* no scheme! */
1147
1148 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1149 result = CURLUE_BAD_SCHEME;
1150 goto fail;
1151 }
1152 if(flags & CURLU_DEFAULT_SCHEME)
1153 schemep = DEFAULT_SCHEME;
1154
1155 /*
1156 * The URL was badly formatted, let's try without scheme specified.
1157 */
1158 hostp = url;
1159 }
1160
1161 if(schemep) {
1162 u->scheme = strdup(schemep);
1163 if(!u->scheme) {
1164 result = CURLUE_OUT_OF_MEMORY;
1165 goto fail;
1166 }
1167 }
1168
1169 /* find the end of the host name + port number */
1170 hostlen = strcspn(s: hostp, reject: "/?#");
1171 path = &hostp[hostlen];
1172
1173 /* this pathlen also contains the query and the fragment */
1174 pathlen = urllen - (path - url);
1175 if(hostlen) {
1176
1177 result = parse_authority(u, auth: hostp, authlen: hostlen, flags, host: &host, has_scheme: schemelen);
1178 if(result)
1179 goto fail;
1180
1181 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1182 const char *hostname = Curl_dyn_ptr(s: &host);
1183 /* legacy curl-style guess based on host name */
1184 if(checkprefix("ftp.", hostname))
1185 schemep = "ftp";
1186 else if(checkprefix("dict.", hostname))
1187 schemep = "dict";
1188 else if(checkprefix("ldap.", hostname))
1189 schemep = "ldap";
1190 else if(checkprefix("imap.", hostname))
1191 schemep = "imap";
1192 else if(checkprefix("smtp.", hostname))
1193 schemep = "smtp";
1194 else if(checkprefix("pop3.", hostname))
1195 schemep = "pop3";
1196 else
1197 schemep = "http";
1198
1199 u->scheme = strdup(schemep);
1200 if(!u->scheme) {
1201 result = CURLUE_OUT_OF_MEMORY;
1202 goto fail;
1203 }
1204 }
1205 }
1206 else if(flags & CURLU_NO_AUTHORITY) {
1207 /* allowed to be empty. */
1208 if(Curl_dyn_add(s: &host, str: "")) {
1209 result = CURLUE_OUT_OF_MEMORY;
1210 goto fail;
1211 }
1212 }
1213 else {
1214 result = CURLUE_NO_HOST;
1215 goto fail;
1216 }
1217 }
1218
1219 fragment = strchr(s: path, c: '#');
1220 if(fragment) {
1221 fraglen = pathlen - (fragment - path);
1222 if(fraglen > 1) {
1223 /* skip the leading '#' in the copy but include the terminating null */
1224 if(flags & CURLU_URLENCODE) {
1225 struct dynbuf enc;
1226 Curl_dyn_init(s: &enc, CURL_MAX_INPUT_LENGTH);
1227 if(urlencode_str(o: &enc, url: fragment + 1, len: fraglen, TRUE, FALSE)) {
1228 result = CURLUE_OUT_OF_MEMORY;
1229 goto fail;
1230 }
1231 u->fragment = Curl_dyn_ptr(s: &enc);
1232 }
1233 else {
1234 u->fragment = Curl_memdup(src: fragment + 1, buffer_length: fraglen);
1235 if(!u->fragment) {
1236 result = CURLUE_OUT_OF_MEMORY;
1237 goto fail;
1238 }
1239 }
1240 }
1241 /* after this, pathlen still contains the query */
1242 pathlen -= fraglen;
1243 }
1244
1245 DEBUGASSERT(pathlen < urllen);
1246 query = memchr(s: path, c: '?', n: pathlen);
1247 if(query) {
1248 size_t qlen = fragment ? (size_t)(fragment - query) :
1249 pathlen - (query - path);
1250 pathlen -= qlen;
1251 if(qlen > 1) {
1252 if(flags & CURLU_URLENCODE) {
1253 struct dynbuf enc;
1254 Curl_dyn_init(s: &enc, CURL_MAX_INPUT_LENGTH);
1255 /* skip the leading question mark */
1256 if(urlencode_str(o: &enc, url: query + 1, len: qlen - 1, TRUE, TRUE)) {
1257 result = CURLUE_OUT_OF_MEMORY;
1258 goto fail;
1259 }
1260 u->query = Curl_dyn_ptr(s: &enc);
1261 }
1262 else {
1263 u->query = Curl_memdup(src: query + 1, buffer_length: qlen);
1264 if(!u->query) {
1265 result = CURLUE_OUT_OF_MEMORY;
1266 goto fail;
1267 }
1268 u->query[qlen - 1] = 0;
1269 }
1270 }
1271 else {
1272 /* single byte query */
1273 u->query = strdup("");
1274 if(!u->query) {
1275 result = CURLUE_OUT_OF_MEMORY;
1276 goto fail;
1277 }
1278 }
1279 }
1280
1281 if(pathlen && (flags & CURLU_URLENCODE)) {
1282 struct dynbuf enc;
1283 Curl_dyn_init(s: &enc, CURL_MAX_INPUT_LENGTH);
1284 if(urlencode_str(o: &enc, url: path, len: pathlen, TRUE, FALSE)) {
1285 result = CURLUE_OUT_OF_MEMORY;
1286 goto fail;
1287 }
1288 pathlen = Curl_dyn_len(s: &enc);
1289 path = u->path = Curl_dyn_ptr(s: &enc);
1290 }
1291
1292 if(pathlen <= 1) {
1293 /* there is no path left or just the slash, unset */
1294 path = NULL;
1295 }
1296 else {
1297 if(!u->path) {
1298 u->path = Curl_memdup(src: path, buffer_length: pathlen + 1);
1299 if(!u->path) {
1300 result = CURLUE_OUT_OF_MEMORY;
1301 goto fail;
1302 }
1303 u->path[pathlen] = 0;
1304 path = u->path;
1305 }
1306 else if(flags & CURLU_URLENCODE)
1307 /* it might have encoded more than just the path so cut it */
1308 u->path[pathlen] = 0;
1309
1310 if(!(flags & CURLU_PATH_AS_IS)) {
1311 /* remove ../ and ./ sequences according to RFC3986 */
1312 char *dedot;
1313 int err = dedotdotify(input: (char *)path, clen: pathlen, outp: &dedot);
1314 if(err) {
1315 result = CURLUE_OUT_OF_MEMORY;
1316 goto fail;
1317 }
1318 if(dedot) {
1319 free(u->path);
1320 u->path = dedot;
1321 }
1322 }
1323 }
1324
1325 u->host = Curl_dyn_ptr(s: &host);
1326
1327 return result;
1328fail:
1329 Curl_dyn_free(s: &host);
1330 free_urlhandle(u);
1331 return result;
1332}
1333
1334/*
1335 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1336 */
1337static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1338 unsigned int flags)
1339{
1340 CURLUcode result;
1341 CURLU tmpurl;
1342 memset(s: &tmpurl, c: 0, n: sizeof(tmpurl));
1343 result = parseurl(url, u: &tmpurl, flags);
1344 if(!result) {
1345 free_urlhandle(u);
1346 *u = tmpurl;
1347 }
1348 return result;
1349}
1350
1351/*
1352 */
1353CURLU *curl_url(void)
1354{
1355 return calloc(sizeof(struct Curl_URL), 1);
1356}
1357
1358void curl_url_cleanup(CURLU *u)
1359{
1360 if(u) {
1361 free_urlhandle(u);
1362 free(u);
1363 }
1364}
1365
1366#define DUP(dest, src, name) \
1367 do { \
1368 if(src->name) { \
1369 dest->name = strdup(src->name); \
1370 if(!dest->name) \
1371 goto fail; \
1372 } \
1373 } while(0)
1374
1375CURLU *curl_url_dup(const CURLU *in)
1376{
1377 struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1378 if(u) {
1379 DUP(u, in, scheme);
1380 DUP(u, in, user);
1381 DUP(u, in, password);
1382 DUP(u, in, options);
1383 DUP(u, in, host);
1384 DUP(u, in, port);
1385 DUP(u, in, path);
1386 DUP(u, in, query);
1387 DUP(u, in, fragment);
1388 DUP(u, in, zoneid);
1389 u->portnum = in->portnum;
1390 }
1391 return u;
1392fail:
1393 curl_url_cleanup(u);
1394 return NULL;
1395}
1396
1397CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1398 char **part, unsigned int flags)
1399{
1400 const char *ptr;
1401 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1402 char portbuf[7];
1403 bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1404 bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1405 bool punycode = FALSE;
1406 bool depunyfy = FALSE;
1407 bool plusdecode = FALSE;
1408 (void)flags;
1409 if(!u)
1410 return CURLUE_BAD_HANDLE;
1411 if(!part)
1412 return CURLUE_BAD_PARTPOINTER;
1413 *part = NULL;
1414
1415 switch(what) {
1416 case CURLUPART_SCHEME:
1417 ptr = u->scheme;
1418 ifmissing = CURLUE_NO_SCHEME;
1419 urldecode = FALSE; /* never for schemes */
1420 break;
1421 case CURLUPART_USER:
1422 ptr = u->user;
1423 ifmissing = CURLUE_NO_USER;
1424 break;
1425 case CURLUPART_PASSWORD:
1426 ptr = u->password;
1427 ifmissing = CURLUE_NO_PASSWORD;
1428 break;
1429 case CURLUPART_OPTIONS:
1430 ptr = u->options;
1431 ifmissing = CURLUE_NO_OPTIONS;
1432 break;
1433 case CURLUPART_HOST:
1434 ptr = u->host;
1435 ifmissing = CURLUE_NO_HOST;
1436 punycode = (flags & CURLU_PUNYCODE)?1:0;
1437 depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1438 break;
1439 case CURLUPART_ZONEID:
1440 ptr = u->zoneid;
1441 ifmissing = CURLUE_NO_ZONEID;
1442 break;
1443 case CURLUPART_PORT:
1444 ptr = u->port;
1445 ifmissing = CURLUE_NO_PORT;
1446 urldecode = FALSE; /* never for port */
1447 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1448 /* there's no stored port number, but asked to deliver
1449 a default one for the scheme */
1450 const struct Curl_handler *h =
1451 Curl_builtin_scheme(scheme: u->scheme, CURL_ZERO_TERMINATED);
1452 if(h) {
1453 msnprintf(buffer: portbuf, maxlength: sizeof(portbuf), format: "%u", h->defport);
1454 ptr = portbuf;
1455 }
1456 }
1457 else if(ptr && u->scheme) {
1458 /* there is a stored port number, but ask to inhibit if
1459 it matches the default one for the scheme */
1460 const struct Curl_handler *h =
1461 Curl_builtin_scheme(scheme: u->scheme, CURL_ZERO_TERMINATED);
1462 if(h && (h->defport == u->portnum) &&
1463 (flags & CURLU_NO_DEFAULT_PORT))
1464 ptr = NULL;
1465 }
1466 break;
1467 case CURLUPART_PATH:
1468 ptr = u->path;
1469 if(!ptr)
1470 ptr = "/";
1471 break;
1472 case CURLUPART_QUERY:
1473 ptr = u->query;
1474 ifmissing = CURLUE_NO_QUERY;
1475 plusdecode = urldecode;
1476 break;
1477 case CURLUPART_FRAGMENT:
1478 ptr = u->fragment;
1479 ifmissing = CURLUE_NO_FRAGMENT;
1480 break;
1481 case CURLUPART_URL: {
1482 char *url;
1483 char *scheme;
1484 char *options = u->options;
1485 char *port = u->port;
1486 char *allochost = NULL;
1487 punycode = (flags & CURLU_PUNYCODE)?1:0;
1488 depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1489 if(u->scheme && strcasecompare("file", u->scheme)) {
1490 url = aprintf(format: "file://%s%s%s",
1491 u->path,
1492 u->fragment? "#": "",
1493 u->fragment? u->fragment : "");
1494 }
1495 else if(!u->host)
1496 return CURLUE_NO_HOST;
1497 else {
1498 const struct Curl_handler *h = NULL;
1499 if(u->scheme)
1500 scheme = u->scheme;
1501 else if(flags & CURLU_DEFAULT_SCHEME)
1502 scheme = (char *) DEFAULT_SCHEME;
1503 else
1504 return CURLUE_NO_SCHEME;
1505
1506 h = Curl_builtin_scheme(scheme, CURL_ZERO_TERMINATED);
1507 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1508 /* there's no stored port number, but asked to deliver
1509 a default one for the scheme */
1510 if(h) {
1511 msnprintf(buffer: portbuf, maxlength: sizeof(portbuf), format: "%u", h->defport);
1512 port = portbuf;
1513 }
1514 }
1515 else if(port) {
1516 /* there is a stored port number, but asked to inhibit if it matches
1517 the default one for the scheme */
1518 if(h && (h->defport == u->portnum) &&
1519 (flags & CURLU_NO_DEFAULT_PORT))
1520 port = NULL;
1521 }
1522
1523 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1524 options = NULL;
1525
1526 if(u->host[0] == '[') {
1527 if(u->zoneid) {
1528 /* make it '[ host %25 zoneid ]' */
1529 struct dynbuf enc;
1530 size_t hostlen = strlen(s: u->host);
1531 Curl_dyn_init(s: &enc, CURL_MAX_INPUT_LENGTH);
1532 if(Curl_dyn_addf(s: &enc, fmt: "%.*s%%25%s]", (int)hostlen - 1, u->host,
1533 u->zoneid))
1534 return CURLUE_OUT_OF_MEMORY;
1535 allochost = Curl_dyn_ptr(s: &enc);
1536 }
1537 }
1538 else if(urlencode) {
1539 allochost = curl_easy_escape(NULL, string: u->host, length: 0);
1540 if(!allochost)
1541 return CURLUE_OUT_OF_MEMORY;
1542 }
1543 else if(punycode) {
1544 if(!Curl_is_ASCII_name(hostname: u->host)) {
1545#ifndef USE_IDN
1546 return CURLUE_LACKS_IDN;
1547#else
1548 CURLcode result = Curl_idn_decode(u->host, &allochost);
1549 if(result)
1550 return (result == CURLE_OUT_OF_MEMORY) ?
1551 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1552#endif
1553 }
1554 }
1555 else if(depunyfy) {
1556 if(Curl_is_ASCII_name(hostname: u->host) && !strncmp(s1: "xn--", s2: u->host, n: 4)) {
1557#ifndef USE_IDN
1558 return CURLUE_LACKS_IDN;
1559#else
1560 CURLcode result = Curl_idn_encode(u->host, &allochost);
1561 if(result)
1562 /* this is the most likely error */
1563 return (result == CURLE_OUT_OF_MEMORY) ?
1564 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1565#endif
1566 }
1567 }
1568
1569 url = aprintf(format: "%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1570 scheme,
1571 u->user ? u->user : "",
1572 u->password ? ":": "",
1573 u->password ? u->password : "",
1574 options ? ";" : "",
1575 options ? options : "",
1576 (u->user || u->password || options) ? "@": "",
1577 allochost ? allochost : u->host,
1578 port ? ":": "",
1579 port ? port : "",
1580 u->path ? u->path : "/",
1581 (u->query && u->query[0]) ? "?": "",
1582 (u->query && u->query[0]) ? u->query : "",
1583 u->fragment? "#": "",
1584 u->fragment? u->fragment : "");
1585 free(allochost);
1586 }
1587 if(!url)
1588 return CURLUE_OUT_OF_MEMORY;
1589 *part = url;
1590 return CURLUE_OK;
1591 }
1592 default:
1593 ptr = NULL;
1594 break;
1595 }
1596 if(ptr) {
1597 size_t partlen = strlen(s: ptr);
1598 size_t i = 0;
1599 *part = Curl_memdup(src: ptr, buffer_length: partlen + 1);
1600 if(!*part)
1601 return CURLUE_OUT_OF_MEMORY;
1602 if(plusdecode) {
1603 /* convert + to space */
1604 char *plus = *part;
1605 for(i = 0; i < partlen; ++plus, i++) {
1606 if(*plus == '+')
1607 *plus = ' ';
1608 }
1609 }
1610 if(urldecode) {
1611 char *decoded;
1612 size_t dlen;
1613 /* this unconditional rejection of control bytes is documented
1614 API behavior */
1615 CURLcode res = Curl_urldecode(string: *part, length: 0, ostring: &decoded, olen: &dlen, ctrl: REJECT_CTRL);
1616 free(*part);
1617 if(res) {
1618 *part = NULL;
1619 return CURLUE_URLDECODE;
1620 }
1621 *part = decoded;
1622 partlen = dlen;
1623 }
1624 if(urlencode) {
1625 struct dynbuf enc;
1626 Curl_dyn_init(s: &enc, CURL_MAX_INPUT_LENGTH);
1627 if(urlencode_str(o: &enc, url: *part, len: partlen, TRUE,
1628 query: what == CURLUPART_QUERY))
1629 return CURLUE_OUT_OF_MEMORY;
1630 free(*part);
1631 *part = Curl_dyn_ptr(s: &enc);
1632 }
1633 else if(punycode) {
1634 if(!Curl_is_ASCII_name(hostname: u->host)) {
1635#ifndef USE_IDN
1636 return CURLUE_LACKS_IDN;
1637#else
1638 char *allochost;
1639 CURLcode result = Curl_idn_decode(*part, &allochost);
1640 if(result)
1641 return (result == CURLE_OUT_OF_MEMORY) ?
1642 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1643 free(*part);
1644 *part = allochost;
1645#endif
1646 }
1647 }
1648 else if(depunyfy) {
1649 if(Curl_is_ASCII_name(hostname: u->host) && !strncmp(s1: "xn--", s2: u->host, n: 4)) {
1650#ifndef USE_IDN
1651 return CURLUE_LACKS_IDN;
1652#else
1653 char *allochost;
1654 CURLcode result = Curl_idn_encode(*part, &allochost);
1655 if(result)
1656 return (result == CURLE_OUT_OF_MEMORY) ?
1657 CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1658 free(*part);
1659 *part = allochost;
1660#endif
1661 }
1662 }
1663
1664 return CURLUE_OK;
1665 }
1666 else
1667 return ifmissing;
1668}
1669
1670CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1671 const char *part, unsigned int flags)
1672{
1673 char **storep = NULL;
1674 long port = 0;
1675 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1676 bool plusencode = FALSE;
1677 bool urlskipslash = FALSE;
1678 bool leadingslash = FALSE;
1679 bool appendquery = FALSE;
1680 bool equalsencode = FALSE;
1681 size_t nalloc;
1682
1683 if(!u)
1684 return CURLUE_BAD_HANDLE;
1685 if(!part) {
1686 /* setting a part to NULL clears it */
1687 switch(what) {
1688 case CURLUPART_URL:
1689 break;
1690 case CURLUPART_SCHEME:
1691 storep = &u->scheme;
1692 break;
1693 case CURLUPART_USER:
1694 storep = &u->user;
1695 break;
1696 case CURLUPART_PASSWORD:
1697 storep = &u->password;
1698 break;
1699 case CURLUPART_OPTIONS:
1700 storep = &u->options;
1701 break;
1702 case CURLUPART_HOST:
1703 storep = &u->host;
1704 break;
1705 case CURLUPART_ZONEID:
1706 storep = &u->zoneid;
1707 break;
1708 case CURLUPART_PORT:
1709 u->portnum = 0;
1710 storep = &u->port;
1711 break;
1712 case CURLUPART_PATH:
1713 storep = &u->path;
1714 break;
1715 case CURLUPART_QUERY:
1716 storep = &u->query;
1717 break;
1718 case CURLUPART_FRAGMENT:
1719 storep = &u->fragment;
1720 break;
1721 default:
1722 return CURLUE_UNKNOWN_PART;
1723 }
1724 if(storep && *storep) {
1725 Curl_safefree(*storep);
1726 }
1727 else if(!storep) {
1728 free_urlhandle(u);
1729 memset(s: u, c: 0, n: sizeof(struct Curl_URL));
1730 }
1731 return CURLUE_OK;
1732 }
1733
1734 nalloc = strlen(s: part);
1735 if(nalloc > CURL_MAX_INPUT_LENGTH)
1736 /* excessive input length */
1737 return CURLUE_MALFORMED_INPUT;
1738
1739 switch(what) {
1740 case CURLUPART_SCHEME: {
1741 size_t plen = strlen(s: part);
1742 const char *s = part;
1743 if((plen > MAX_SCHEME_LEN) || (plen < 1))
1744 /* too long or too short */
1745 return CURLUE_BAD_SCHEME;
1746 if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1747 /* verify that it is a fine scheme */
1748 !Curl_builtin_scheme(scheme: part, CURL_ZERO_TERMINATED))
1749 return CURLUE_UNSUPPORTED_SCHEME;
1750 storep = &u->scheme;
1751 urlencode = FALSE; /* never */
1752 if(ISALPHA(*s)) {
1753 /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1754 while(--plen) {
1755 if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1756 s++; /* fine */
1757 else
1758 return CURLUE_BAD_SCHEME;
1759 }
1760 }
1761 else
1762 return CURLUE_BAD_SCHEME;
1763 break;
1764 }
1765 case CURLUPART_USER:
1766 storep = &u->user;
1767 break;
1768 case CURLUPART_PASSWORD:
1769 storep = &u->password;
1770 break;
1771 case CURLUPART_OPTIONS:
1772 storep = &u->options;
1773 break;
1774 case CURLUPART_HOST:
1775 storep = &u->host;
1776 Curl_safefree(u->zoneid);
1777 break;
1778 case CURLUPART_ZONEID:
1779 storep = &u->zoneid;
1780 break;
1781 case CURLUPART_PORT:
1782 {
1783 char *endp;
1784 urlencode = FALSE; /* never */
1785 port = strtol(nptr: part, endptr: &endp, base: 10); /* Port number must be decimal */
1786 if((port <= 0) || (port > 0xffff))
1787 return CURLUE_BAD_PORT_NUMBER;
1788 if(*endp)
1789 /* weirdly provided number, not good! */
1790 return CURLUE_BAD_PORT_NUMBER;
1791 storep = &u->port;
1792 }
1793 break;
1794 case CURLUPART_PATH:
1795 urlskipslash = TRUE;
1796 leadingslash = TRUE; /* enforce */
1797 storep = &u->path;
1798 break;
1799 case CURLUPART_QUERY:
1800 plusencode = urlencode;
1801 appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1802 equalsencode = appendquery;
1803 storep = &u->query;
1804 break;
1805 case CURLUPART_FRAGMENT:
1806 storep = &u->fragment;
1807 break;
1808 case CURLUPART_URL: {
1809 /*
1810 * Allow a new URL to replace the existing (if any) contents.
1811 *
1812 * If the existing contents is enough for a URL, allow a relative URL to
1813 * replace it.
1814 */
1815 CURLUcode result;
1816 char *oldurl;
1817 char *redired_url;
1818
1819 if(!nalloc)
1820 /* a blank URL is not a valid URL */
1821 return CURLUE_MALFORMED_INPUT;
1822
1823 /* if the new thing is absolute or the old one is not
1824 * (we could not get an absolute url in 'oldurl'),
1825 * then replace the existing with the new. */
1826 if(Curl_is_absolute_url(url: part, NULL, buflen: 0,
1827 guess_scheme: flags & (CURLU_GUESS_SCHEME|
1828 CURLU_DEFAULT_SCHEME))
1829 || curl_url_get(u, what: CURLUPART_URL, part: &oldurl, flags)) {
1830 return parseurl_and_replace(url: part, u, flags);
1831 }
1832
1833 /* apply the relative part to create a new URL
1834 * and replace the existing one with it. */
1835 redired_url = concat_url(base: oldurl, relurl: part);
1836 free(oldurl);
1837 if(!redired_url)
1838 return CURLUE_OUT_OF_MEMORY;
1839
1840 result = parseurl_and_replace(url: redired_url, u, flags);
1841 free(redired_url);
1842 return result;
1843 }
1844 default:
1845 return CURLUE_UNKNOWN_PART;
1846 }
1847 DEBUGASSERT(storep);
1848 {
1849 const char *newp;
1850 struct dynbuf enc;
1851 Curl_dyn_init(s: &enc, toobig: nalloc * 3 + 1 + leadingslash);
1852
1853 if(leadingslash && (part[0] != '/')) {
1854 CURLcode result = Curl_dyn_addn(s: &enc, mem: "/", len: 1);
1855 if(result)
1856 return CURLUE_OUT_OF_MEMORY;
1857 }
1858 if(urlencode) {
1859 const unsigned char *i;
1860
1861 for(i = (const unsigned char *)part; *i; i++) {
1862 CURLcode result;
1863 if((*i == ' ') && plusencode) {
1864 result = Curl_dyn_addn(s: &enc, mem: "+", len: 1);
1865 if(result)
1866 return CURLUE_OUT_OF_MEMORY;
1867 }
1868 else if(ISUNRESERVED(*i) ||
1869 ((*i == '/') && urlskipslash) ||
1870 ((*i == '=') && equalsencode)) {
1871 if((*i == '=') && equalsencode)
1872 /* only skip the first equals sign */
1873 equalsencode = FALSE;
1874 result = Curl_dyn_addn(s: &enc, mem: i, len: 1);
1875 if(result)
1876 return CURLUE_OUT_OF_MEMORY;
1877 }
1878 else {
1879 char out[3]={'%'};
1880 out[1] = hexdigits[*i>>4];
1881 out[2] = hexdigits[*i & 0xf];
1882 result = Curl_dyn_addn(s: &enc, mem: out, len: 3);
1883 if(result)
1884 return CURLUE_OUT_OF_MEMORY;
1885 }
1886 }
1887 }
1888 else {
1889 char *p;
1890 CURLcode result = Curl_dyn_add(s: &enc, str: part);
1891 if(result)
1892 return CURLUE_OUT_OF_MEMORY;
1893 p = Curl_dyn_ptr(s: &enc);
1894 while(*p) {
1895 /* make sure percent encoded are lower case */
1896 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1897 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1898 p[1] = Curl_raw_tolower(in: p[1]);
1899 p[2] = Curl_raw_tolower(in: p[2]);
1900 p += 3;
1901 }
1902 else
1903 p++;
1904 }
1905 }
1906 newp = Curl_dyn_ptr(s: &enc);
1907
1908 if(appendquery) {
1909 /* Append the 'newp' string onto the old query. Add a '&' separator if
1910 none is present at the end of the existing query already */
1911
1912 size_t querylen = u->query ? strlen(s: u->query) : 0;
1913 bool addamperand = querylen && (u->query[querylen -1] != '&');
1914 if(querylen) {
1915 struct dynbuf qbuf;
1916 Curl_dyn_init(s: &qbuf, CURL_MAX_INPUT_LENGTH);
1917
1918 if(Curl_dyn_addn(s: &qbuf, mem: u->query, len: querylen)) /* add original query */
1919 goto nomem;
1920
1921 if(addamperand) {
1922 if(Curl_dyn_addn(s: &qbuf, mem: "&", len: 1))
1923 goto nomem;
1924 }
1925 if(Curl_dyn_add(s: &qbuf, str: newp))
1926 goto nomem;
1927 Curl_dyn_free(s: &enc);
1928 free(*storep);
1929 *storep = Curl_dyn_ptr(s: &qbuf);
1930 return CURLUE_OK;
1931nomem:
1932 Curl_dyn_free(s: &enc);
1933 return CURLUE_OUT_OF_MEMORY;
1934 }
1935 }
1936
1937 if(what == CURLUPART_HOST) {
1938 size_t n = strlen(s: newp);
1939 if(!n && (flags & CURLU_NO_AUTHORITY)) {
1940 /* Skip hostname check, it's allowed to be empty. */
1941 }
1942 else {
1943 if(!n || hostname_check(u, hostname: (char *)newp, hlen: n)) {
1944 Curl_dyn_free(s: &enc);
1945 return CURLUE_BAD_HOSTNAME;
1946 }
1947 }
1948 }
1949
1950 free(*storep);
1951 *storep = (char *)newp;
1952 }
1953 /* set after the string, to make it not assigned if the allocation above
1954 fails */
1955 if(port)
1956 u->portnum = port;
1957 return CURLUE_OK;
1958}
1959