1/***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) 1998 - 2021, Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ***************************************************************************/
22
23#include "curl_setup.h"
24
25#include "urldata.h"
26#include "urlapi-int.h"
27#include "strcase.h"
28#include "dotdot.h"
29#include "url.h"
30#include "escape.h"
31#include "curl_ctype.h"
32#include "inet_pton.h"
33
34/* The last 3 #include files should be in this order */
35#include "curl_printf.h"
36#include "curl_memory.h"
37#include "memdebug.h"
38
39 /* MSDOS/Windows style drive prefix, eg c: in c:foo */
40#define STARTS_WITH_DRIVE_PREFIX(str) \
41 ((('a' <= str[0] && str[0] <= 'z') || \
42 ('A' <= str[0] && str[0] <= 'Z')) && \
43 (str[1] == ':'))
44
45 /* MSDOS/Windows style drive prefix, optionally with
46 * a '|' instead of ':', followed by a slash or NUL */
47#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
48 ((('a' <= (str)[0] && (str)[0] <= 'z') || \
49 ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
50 ((str)[1] == ':' || (str)[1] == '|') && \
51 ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
52
53/* Internal representation of CURLU. Point to URL-encoded strings. */
54struct Curl_URL {
55 char *scheme;
56 char *user;
57 char *password;
58 char *options; /* IMAP only? */
59 char *host;
60 char *zoneid; /* for numerical IPv6 addresses */
61 char *port;
62 char *path;
63 char *query;
64 char *fragment;
65
66 char *scratch; /* temporary scratch area */
67 char *temppath; /* temporary path pointer */
68 long portnum; /* the numerical version */
69};
70
71#define DEFAULT_SCHEME "https"
72
73static void free_urlhandle(struct Curl_URL *u)
74{
75 free(u->scheme);
76 free(u->user);
77 free(u->password);
78 free(u->options);
79 free(u->host);
80 free(u->zoneid);
81 free(u->port);
82 free(u->path);
83 free(u->query);
84 free(u->fragment);
85 free(u->scratch);
86 free(u->temppath);
87}
88
89/* move the full contents of one handle onto another and
90 free the original */
91static void mv_urlhandle(struct Curl_URL *from,
92 struct Curl_URL *to)
93{
94 free_urlhandle(to);
95 *to = *from;
96 free(from);
97}
98
99/*
100 * Find the separator at the end of the host name, or the '?' in cases like
101 * http://www.url.com?id=2380
102 */
103static const char *find_host_sep(const char *url)
104{
105 const char *sep;
106 const char *query;
107
108 /* Find the start of the hostname */
109 sep = strstr(url, "//");
110 if(!sep)
111 sep = url;
112 else
113 sep += 2;
114
115 query = strchr(sep, '?');
116 sep = strchr(sep, '/');
117
118 if(!sep)
119 sep = url + strlen(url);
120
121 if(!query)
122 query = url + strlen(url);
123
124 return sep < query ? sep : query;
125}
126
127/*
128 * Decide in an encoding-independent manner whether a character in an
129 * URL must be escaped. The same criterion must be used in strlen_url()
130 * and strcpy_url().
131 */
132static bool urlchar_needs_escaping(int c)
133{
134 return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
135}
136
137/*
138 * strlen_url() returns the length of the given URL if the spaces within the
139 * URL were properly URL encoded.
140 * URL encoding should be skipped for host names, otherwise IDN resolution
141 * will fail.
142 */
143static size_t strlen_url(const char *url, bool relative)
144{
145 const unsigned char *ptr;
146 size_t newlen = 0;
147 bool left = TRUE; /* left side of the ? */
148 const unsigned char *host_sep = (const unsigned char *) url;
149
150 if(!relative)
151 host_sep = (const unsigned char *) find_host_sep(url);
152
153 for(ptr = (unsigned char *)url; *ptr; ptr++) {
154
155 if(ptr < host_sep) {
156 ++newlen;
157 continue;
158 }
159
160 switch(*ptr) {
161 case '?':
162 left = FALSE;
163 /* FALLTHROUGH */
164 default:
165 if(urlchar_needs_escaping(*ptr))
166 newlen += 2;
167 newlen++;
168 break;
169 case ' ':
170 if(left)
171 newlen += 3;
172 else
173 newlen++;
174 break;
175 }
176 }
177 return newlen;
178}
179
180/* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
181 * the source URL accordingly.
182 * URL encoding should be skipped for host names, otherwise IDN resolution
183 * will fail.
184 */
185static void strcpy_url(char *output, const char *url, bool relative)
186{
187 /* we must add this with whitespace-replacing */
188 bool left = TRUE;
189 const unsigned char *iptr;
190 char *optr = output;
191 const unsigned char *host_sep = (const unsigned char *) url;
192
193 if(!relative)
194 host_sep = (const unsigned char *) find_host_sep(url);
195
196 for(iptr = (unsigned char *)url; /* read from here */
197 *iptr; /* until zero byte */
198 iptr++) {
199
200 if(iptr < host_sep) {
201 *optr++ = *iptr;
202 continue;
203 }
204
205 switch(*iptr) {
206 case '?':
207 left = FALSE;
208 /* FALLTHROUGH */
209 default:
210 if(urlchar_needs_escaping(*iptr)) {
211 msnprintf(optr, 4, "%%%02x", *iptr);
212 optr += 3;
213 }
214 else
215 *optr++=*iptr;
216 break;
217 case ' ':
218 if(left) {
219 *optr++='%'; /* add a '%' */
220 *optr++='2'; /* add a '2' */
221 *optr++='0'; /* add a '0' */
222 }
223 else
224 *optr++='+'; /* add a '+' here */
225 break;
226 }
227 }
228 *optr = 0; /* null-terminate output buffer */
229
230}
231
232/*
233 * Returns true if the given URL is absolute (as opposed to relative) within
234 * the buffer size. Returns the scheme in the buffer if TRUE and 'buf' is
235 * non-NULL.
236 */
237bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
238{
239 size_t i;
240#ifdef WIN32
241 if(STARTS_WITH_DRIVE_PREFIX(url))
242 return FALSE;
243#endif
244 for(i = 0; i < buflen && url[i]; ++i) {
245 char s = url[i];
246 if((s == ':') && (url[i + 1] == '/')) {
247 if(buf)
248 buf[i] = 0;
249 return TRUE;
250 }
251 /* RFC 3986 3.1 explains:
252 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
253 */
254 else if(ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') ) {
255 if(buf)
256 buf[i] = (char)TOLOWER(s);
257 }
258 else
259 break;
260 }
261 return FALSE;
262}
263
264/*
265 * Concatenate a relative URL to a base URL making it absolute.
266 * URL-encodes any spaces.
267 * The returned pointer must be freed by the caller unless NULL
268 * (returns NULL on out of memory).
269 */
270static char *concat_url(const char *base, const char *relurl)
271{
272 /***
273 TRY to append this new path to the old URL
274 to the right of the host part. Oh crap, this is doomed to cause
275 problems in the future...
276 */
277 char *newest;
278 char *protsep;
279 char *pathsep;
280 size_t newlen;
281 bool host_changed = FALSE;
282
283 const char *useurl = relurl;
284 size_t urllen;
285
286 /* we must make our own copy of the URL to play with, as it may
287 point to read-only data */
288 char *url_clone = strdup(base);
289
290 if(!url_clone)
291 return NULL; /* skip out of this NOW */
292
293 /* protsep points to the start of the host name */
294 protsep = strstr(url_clone, "//");
295 if(!protsep)
296 protsep = url_clone;
297 else
298 protsep += 2; /* pass the slashes */
299
300 if('/' != relurl[0]) {
301 int level = 0;
302
303 /* First we need to find out if there's a ?-letter in the URL,
304 and cut it and the right-side of that off */
305 pathsep = strchr(protsep, '?');
306 if(pathsep)
307 *pathsep = 0;
308
309 /* we have a relative path to append to the last slash if there's one
310 available, or if the new URL is just a query string (starts with a
311 '?') we append the new one at the end of the entire currently worked
312 out URL */
313 if(useurl[0] != '?') {
314 pathsep = strrchr(protsep, '/');
315 if(pathsep)
316 *pathsep = 0;
317 }
318
319 /* Check if there's any slash after the host name, and if so, remember
320 that position instead */
321 pathsep = strchr(protsep, '/');
322 if(pathsep)
323 protsep = pathsep + 1;
324 else
325 protsep = NULL;
326
327 /* now deal with one "./" or any amount of "../" in the newurl
328 and act accordingly */
329
330 if((useurl[0] == '.') && (useurl[1] == '/'))
331 useurl += 2; /* just skip the "./" */
332
333 while((useurl[0] == '.') &&
334 (useurl[1] == '.') &&
335 (useurl[2] == '/')) {
336 level++;
337 useurl += 3; /* pass the "../" */
338 }
339
340 if(protsep) {
341 while(level--) {
342 /* cut off one more level from the right of the original URL */
343 pathsep = strrchr(protsep, '/');
344 if(pathsep)
345 *pathsep = 0;
346 else {
347 *protsep = 0;
348 break;
349 }
350 }
351 }
352 }
353 else {
354 /* We got a new absolute path for this server */
355
356 if(relurl[1] == '/') {
357 /* the new URL starts with //, just keep the protocol part from the
358 original one */
359 *protsep = 0;
360 useurl = &relurl[2]; /* we keep the slashes from the original, so we
361 skip the new ones */
362 host_changed = TRUE;
363 }
364 else {
365 /* cut off the original URL from the first slash, or deal with URLs
366 without slash */
367 pathsep = strchr(protsep, '/');
368 if(pathsep) {
369 /* When people use badly formatted URLs, such as
370 "http://www.url.com?dir=/home/daniel" we must not use the first
371 slash, if there's a ?-letter before it! */
372 char *sep = strchr(protsep, '?');
373 if(sep && (sep < pathsep))
374 pathsep = sep;
375 *pathsep = 0;
376 }
377 else {
378 /* There was no slash. Now, since we might be operating on a badly
379 formatted URL, such as "http://www.url.com?id=2380" which doesn't
380 use a slash separator as it is supposed to, we need to check for a
381 ?-letter as well! */
382 pathsep = strchr(protsep, '?');
383 if(pathsep)
384 *pathsep = 0;
385 }
386 }
387 }
388
389 /* If the new part contains a space, this is a mighty stupid redirect
390 but we still make an effort to do "right". To the left of a '?'
391 letter we replace each space with %20 while it is replaced with '+'
392 on the right side of the '?' letter.
393 */
394 newlen = strlen_url(useurl, !host_changed);
395
396 urllen = strlen(url_clone);
397
398 newest = malloc(urllen + 1 + /* possible slash */
399 newlen + 1 /* zero byte */);
400
401 if(!newest) {
402 free(url_clone); /* don't leak this */
403 return NULL;
404 }
405
406 /* copy over the root url part */
407 memcpy(newest, url_clone, urllen);
408
409 /* check if we need to append a slash */
410 if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
411 ;
412 else
413 newest[urllen++]='/';
414
415 /* then append the new piece on the right side */
416 strcpy_url(&newest[urllen], useurl, !host_changed);
417
418 free(url_clone);
419
420 return newest;
421}
422
423/*
424 * parse_hostname_login()
425 *
426 * Parse the login details (user name, password and options) from the URL and
427 * strip them out of the host name
428 *
429 */
430static CURLUcode parse_hostname_login(struct Curl_URL *u,
431 char **hostname,
432 unsigned int flags)
433{
434 CURLUcode result = CURLUE_OK;
435 CURLcode ccode;
436 char *userp = NULL;
437 char *passwdp = NULL;
438 char *optionsp = NULL;
439 const struct Curl_handler *h = NULL;
440
441 /* At this point, we're hoping all the other special cases have
442 * been taken care of, so conn->host.name is at most
443 * [user[:password][;options]]@]hostname
444 *
445 * We need somewhere to put the embedded details, so do that first.
446 */
447
448 char *ptr = strchr(*hostname, '@');
449 char *login = *hostname;
450
451 if(!ptr)
452 goto out;
453
454 /* We will now try to extract the
455 * possible login information in a string like:
456 * ftp://user:password@ftp.my.site:8021/README */
457 *hostname = ++ptr;
458
459 /* if this is a known scheme, get some details */
460 if(u->scheme)
461 h = Curl_builtin_scheme(u->scheme);
462
463 /* We could use the login information in the URL so extract it. Only parse
464 options if the handler says we should. Note that 'h' might be NULL! */
465 ccode = Curl_parse_login_details(login, ptr - login - 1,
466 &userp, &passwdp,
467 (h && (h->flags & PROTOPT_URLOPTIONS)) ?
468 &optionsp:NULL);
469 if(ccode) {
470 result = CURLUE_MALFORMED_INPUT;
471 goto out;
472 }
473
474 if(userp) {
475 if(flags & CURLU_DISALLOW_USER) {
476 /* Option DISALLOW_USER is set and url contains username. */
477 result = CURLUE_USER_NOT_ALLOWED;
478 goto out;
479 }
480
481 u->user = userp;
482 }
483
484 if(passwdp)
485 u->password = passwdp;
486
487 if(optionsp)
488 u->options = optionsp;
489
490 return CURLUE_OK;
491 out:
492
493 free(userp);
494 free(passwdp);
495 free(optionsp);
496
497 return result;
498}
499
500UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
501 bool has_scheme)
502{
503 char *portptr = NULL;
504 char endbracket;
505 int len;
506
507 /*
508 * Find the end of an IPv6 address, either on the ']' ending bracket or
509 * a percent-encoded zone index.
510 */
511 if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
512 &endbracket, &len)) {
513 if(']' == endbracket)
514 portptr = &hostname[len];
515 else if('%' == endbracket) {
516 int zonelen = len;
517 if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
518 if(']' != endbracket)
519 return CURLUE_MALFORMED_INPUT;
520 portptr = &hostname[--zonelen + len + 1];
521 }
522 else
523 return CURLUE_MALFORMED_INPUT;
524 }
525 else
526 return CURLUE_MALFORMED_INPUT;
527
528 /* this is a RFC2732-style specified IP-address */
529 if(portptr && *portptr) {
530 if(*portptr != ':')
531 return CURLUE_MALFORMED_INPUT;
532 }
533 else
534 portptr = NULL;
535 }
536 else
537 portptr = strchr(hostname, ':');
538
539 if(portptr) {
540 char *rest;
541 long port;
542 char portbuf[7];
543
544 /* Browser behavior adaptation. If there's a colon with no digits after,
545 just cut off the name there which makes us ignore the colon and just
546 use the default port. Firefox, Chrome and Safari all do that.
547
548 Don't do it if the URL has no scheme, to make something that looks like
549 a scheme not work!
550 */
551 if(!portptr[1]) {
552 *portptr = '\0';
553 return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
554 }
555
556 if(!ISDIGIT(portptr[1]))
557 return CURLUE_BAD_PORT_NUMBER;
558
559 port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */
560
561 if((port <= 0) || (port > 0xffff))
562 /* Single unix standard says port numbers are 16 bits long, but we don't
563 treat port zero as OK. */
564 return CURLUE_BAD_PORT_NUMBER;
565
566 if(rest[0])
567 return CURLUE_BAD_PORT_NUMBER;
568
569 *portptr++ = '\0'; /* cut off the name there */
570 *rest = 0;
571 /* generate a new port number string to get rid of leading zeroes etc */
572 msnprintf(portbuf, sizeof(portbuf), "%ld", port);
573 u->portnum = port;
574 u->port = strdup(portbuf);
575 if(!u->port)
576 return CURLUE_OUT_OF_MEMORY;
577 }
578
579 return CURLUE_OK;
580}
581
582/* scan for byte values < 31 or 127 */
583static bool junkscan(const char *part, unsigned int flags)
584{
585 if(part) {
586 static const char badbytes[]={
587 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
588 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
589 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
590 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
591 0x7f, 0x00 /* null-terminate */
592 };
593 size_t n = strlen(part);
594 size_t nfine = strcspn(part, badbytes);
595 if(nfine != n)
596 /* since we don't know which part is scanned, return a generic error
597 code */
598 return TRUE;
599 if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
600 return TRUE;
601 }
602 return FALSE;
603}
604
605static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
606{
607 size_t len;
608 size_t hlen = strlen(hostname);
609
610 if(hostname[0] == '[') {
611#ifdef ENABLE_IPV6
612 char dest[16]; /* fits a binary IPv6 address */
613#endif
614 const char *l = "0123456789abcdefABCDEF:.";
615 if(hlen < 4) /* '[::]' is the shortest possible valid string */
616 return CURLUE_MALFORMED_INPUT;
617 hostname++;
618 hlen -= 2;
619
620 if(hostname[hlen] != ']')
621 return CURLUE_MALFORMED_INPUT;
622
623 /* only valid letters are ok */
624 len = strspn(hostname, l);
625 if(hlen != len) {
626 hlen = len;
627 if(hostname[len] == '%') {
628 /* this could now be '%[zone id]' */
629 char zoneid[16];
630 int i = 0;
631 char *h = &hostname[len + 1];
632 /* pass '25' if present and is a url encoded percent sign */
633 if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
634 h += 2;
635 while(*h && (*h != ']') && (i < 15))
636 zoneid[i++] = *h++;
637 if(!i || (']' != *h))
638 return CURLUE_MALFORMED_INPUT;
639 zoneid[i] = 0;
640 u->zoneid = strdup(zoneid);
641 if(!u->zoneid)
642 return CURLUE_OUT_OF_MEMORY;
643 hostname[len] = ']'; /* insert end bracket */
644 hostname[len + 1] = 0; /* terminate the hostname */
645 }
646 else
647 return CURLUE_MALFORMED_INPUT;
648 /* hostname is fine */
649 }
650#ifdef ENABLE_IPV6
651 hostname[hlen] = 0; /* end the address there */
652 if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
653 return CURLUE_MALFORMED_INPUT;
654 hostname[hlen] = ']'; /* restore ending bracket */
655#endif
656 }
657 else {
658 /* letters from the second string is not ok */
659 len = strcspn(hostname, " ");
660 if(hlen != len)
661 /* hostname with bad content */
662 return CURLUE_MALFORMED_INPUT;
663 }
664 if(!hostname[0])
665 return CURLUE_NO_HOST;
666 return CURLUE_OK;
667}
668
669#define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
670
671/*
672 * Handle partial IPv4 numerical addresses and different bases, like
673 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
674 *
675 * If the given input string is syntactically wrong or any part for example is
676 * too big, this function returns FALSE and doesn't create any output.
677 *
678 * Output the "normalized" version of that input string in plain quad decimal
679 * integers and return TRUE.
680 */
681static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
682{
683 bool done = FALSE;
684 int n = 0;
685 const char *c = hostname;
686 unsigned long parts[4] = {0, 0, 0, 0};
687
688 while(!done) {
689 char *endp;
690 unsigned long l;
691 if((*c < '0') || (*c > '9'))
692 /* most importantly this doesn't allow a leading plus or minus */
693 return FALSE;
694 l = strtoul(c, &endp, 0);
695
696 /* overflow or nothing parsed at all */
697 if(((l == ULONG_MAX) && (errno == ERANGE)) || (endp == c))
698 return FALSE;
699
700#if SIZEOF_LONG > 4
701 /* a value larger than 32 bits */
702 if(l > UINT_MAX)
703 return FALSE;
704#endif
705
706 parts[n] = l;
707 c = endp;
708
709 switch (*c) {
710 case '.' :
711 if(n == 3)
712 return FALSE;
713 n++;
714 c++;
715 break;
716
717 case '\0':
718 done = TRUE;
719 break;
720
721 default:
722 return FALSE;
723 }
724 }
725
726 /* this is deemed a valid IPv4 numerical address */
727
728 switch(n) {
729 case 0: /* a -- 32 bits */
730 msnprintf(outp, olen, "%u.%u.%u.%u",
731 parts[0] >> 24, (parts[0] >> 16) & 0xff,
732 (parts[0] >> 8) & 0xff, parts[0] & 0xff);
733 break;
734 case 1: /* a.b -- 8.24 bits */
735 if((parts[0] > 0xff) || (parts[1] > 0xffffff))
736 return FALSE;
737 msnprintf(outp, olen, "%u.%u.%u.%u",
738 parts[0], (parts[1] >> 16) & 0xff,
739 (parts[1] >> 8) & 0xff, parts[1] & 0xff);
740 break;
741 case 2: /* a.b.c -- 8.8.16 bits */
742 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
743 return FALSE;
744 msnprintf(outp, olen, "%u.%u.%u.%u",
745 parts[0], parts[1], (parts[2] >> 8) & 0xff,
746 parts[2] & 0xff);
747 break;
748 case 3: /* a.b.c.d -- 8.8.8.8 bits */
749 if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
750 (parts[3] > 0xff))
751 return FALSE;
752 msnprintf(outp, olen, "%u.%u.%u.%u",
753 parts[0], parts[1], parts[2], parts[3]);
754 break;
755 }
756 return TRUE;
757}
758
759static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
760{
761 char *path;
762 bool path_alloced = FALSE;
763 char *hostname;
764 char *query = NULL;
765 char *fragment = NULL;
766 CURLUcode result;
767 bool url_has_scheme = FALSE;
768 char schemebuf[MAX_SCHEME_LEN + 1];
769 const char *schemep = NULL;
770 size_t schemelen = 0;
771 size_t urllen;
772
773 DEBUGASSERT(url);
774
775 /*************************************************************
776 * Parse the URL.
777 ************************************************************/
778 /* allocate scratch area */
779 urllen = strlen(url);
780 if(urllen > CURL_MAX_INPUT_LENGTH)
781 /* excessive input length */
782 return CURLUE_MALFORMED_INPUT;
783
784 path = u->scratch = malloc(urllen * 2 + 2);
785 if(!path)
786 return CURLUE_OUT_OF_MEMORY;
787
788 hostname = &path[urllen + 1];
789 hostname[0] = 0;
790
791 if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
792 url_has_scheme = TRUE;
793 schemelen = strlen(schemebuf);
794 }
795
796 /* handle the file: scheme */
797 if(url_has_scheme && strcasecompare(schemebuf, "file")) {
798 /* path has been allocated large enough to hold this */
799 strcpy(path, &url[5]);
800
801 hostname = NULL; /* no host for file: URLs */
802 u->scheme = strdup("file");
803 if(!u->scheme)
804 return CURLUE_OUT_OF_MEMORY;
805
806 /* Extra handling URLs with an authority component (i.e. that start with
807 * "file://")
808 *
809 * We allow omitted hostname (e.g. file:/<path>) -- valid according to
810 * RFC 8089, but not the (current) WHAT-WG URL spec.
811 */
812 if(path[0] == '/' && path[1] == '/') {
813 /* swallow the two slashes */
814 char *ptr = &path[2];
815
816 /*
817 * According to RFC 8089, a file: URL can be reliably dereferenced if:
818 *
819 * o it has no/blank hostname, or
820 *
821 * o the hostname matches "localhost" (case-insensitively), or
822 *
823 * o the hostname is a FQDN that resolves to this machine.
824 *
825 * For brevity, we only consider URLs with empty, "localhost", or
826 * "127.0.0.1" hostnames as local.
827 *
828 * Additionally, there is an exception for URLs with a Windows drive
829 * letter in the authority (which was accidentally omitted from RFC 8089
830 * Appendix E, but believe me, it was meant to be there. --MK)
831 */
832 if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
833 /* the URL includes a host name, it must match "localhost" or
834 "127.0.0.1" to be valid */
835 if(!checkprefix("localhost/", ptr) &&
836 !checkprefix("127.0.0.1/", ptr)) {
837 /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
838 none */
839 return CURLUE_MALFORMED_INPUT;
840 }
841 ptr += 9; /* now points to the slash after the host */
842 }
843
844 path = ptr;
845 }
846
847#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
848 /* Don't allow Windows drive letters when not in Windows.
849 * This catches both "file:/c:" and "file:c:" */
850 if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
851 STARTS_WITH_URL_DRIVE_PREFIX(path)) {
852 /* File drive letters are only accepted in MSDOS/Windows */
853 return CURLUE_MALFORMED_INPUT;
854 }
855#else
856 /* If the path starts with a slash and a drive letter, ditch the slash */
857 if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
858 /* This cannot be done with strcpy, as the memory chunks overlap! */
859 memmove(path, &path[1], strlen(&path[1]) + 1);
860 }
861#endif
862
863 }
864 else {
865 /* clear path */
866 const char *p;
867 const char *hostp;
868 size_t len;
869 path[0] = 0;
870
871 if(url_has_scheme) {
872 int i = 0;
873 p = &url[schemelen + 1];
874 while(p && (*p == '/') && (i < 4)) {
875 p++;
876 i++;
877 }
878 if((i < 1) || (i>3))
879 /* less than one or more than three slashes */
880 return CURLUE_MALFORMED_INPUT;
881
882 schemep = schemebuf;
883 if(!Curl_builtin_scheme(schemep) &&
884 !(flags & CURLU_NON_SUPPORT_SCHEME))
885 return CURLUE_UNSUPPORTED_SCHEME;
886
887 if(junkscan(schemep, flags))
888 return CURLUE_MALFORMED_INPUT;
889 }
890 else {
891 /* no scheme! */
892
893 if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))
894 return CURLUE_MALFORMED_INPUT;
895 if(flags & CURLU_DEFAULT_SCHEME)
896 schemep = DEFAULT_SCHEME;
897
898 /*
899 * The URL was badly formatted, let's try without scheme specified.
900 */
901 p = url;
902 }
903 hostp = p; /* host name starts here */
904
905 while(*p && !HOSTNAME_END(*p)) /* find end of host name */
906 p++;
907
908 len = p - hostp;
909 if(len) {
910 memcpy(hostname, hostp, len);
911 hostname[len] = 0;
912 }
913 else {
914 if(!(flags & CURLU_NO_AUTHORITY))
915 return CURLUE_MALFORMED_INPUT;
916 }
917
918 len = strlen(p);
919 memcpy(path, p, len);
920 path[len] = 0;
921
922 if(schemep) {
923 u->scheme = strdup(schemep);
924 if(!u->scheme)
925 return CURLUE_OUT_OF_MEMORY;
926 }
927 }
928
929 if(junkscan(path, flags))
930 return CURLUE_MALFORMED_INPUT;
931
932 if((flags & CURLU_URLENCODE) && path[0]) {
933 /* worst case output length is 3x the original! */
934 char *newp = malloc(strlen(path) * 3);
935 if(!newp)
936 return CURLUE_OUT_OF_MEMORY;
937 path_alloced = TRUE;
938 strcpy_url(newp, path, TRUE); /* consider it relative */
939 u->temppath = path = newp;
940 }
941
942 fragment = strchr(path, '#');
943 if(fragment) {
944 *fragment++ = 0;
945 if(fragment[0]) {
946 u->fragment = strdup(fragment);
947 if(!u->fragment)
948 return CURLUE_OUT_OF_MEMORY;
949 }
950 }
951
952 query = strchr(path, '?');
953 if(query) {
954 *query++ = 0;
955 /* done even if the query part is a blank string */
956 u->query = strdup(query);
957 if(!u->query)
958 return CURLUE_OUT_OF_MEMORY;
959 }
960
961 if(!path[0])
962 /* if there's no path left set, unset */
963 path = NULL;
964 else {
965 if(!(flags & CURLU_PATH_AS_IS)) {
966 /* remove ../ and ./ sequences according to RFC3986 */
967 char *newp = Curl_dedotdotify(path);
968 if(!newp)
969 return CURLUE_OUT_OF_MEMORY;
970
971 if(strcmp(newp, path)) {
972 /* if we got a new version */
973 if(path_alloced)
974 Curl_safefree(u->temppath);
975 u->temppath = path = newp;
976 path_alloced = TRUE;
977 }
978 else
979 free(newp);
980 }
981
982 u->path = path_alloced?path:strdup(path);
983 if(!u->path)
984 return CURLUE_OUT_OF_MEMORY;
985 u->temppath = NULL; /* used now */
986 }
987
988 if(hostname) {
989 char normalized_ipv4[sizeof("255.255.255.255") + 1];
990 /*
991 * Parse the login details and strip them out of the host name.
992 */
993 if(junkscan(hostname, flags))
994 return CURLUE_MALFORMED_INPUT;
995
996 result = parse_hostname_login(u, &hostname, flags);
997 if(result)
998 return result;
999
1000 result = Curl_parse_port(u, hostname, url_has_scheme);
1001 if(result)
1002 return result;
1003
1004 if(0 == strlen(hostname) && (flags & CURLU_NO_AUTHORITY)) {
1005 /* Skip hostname check, it's allowed to be empty. */
1006 }
1007 else {
1008 result = hostname_check(u, hostname);
1009 if(result)
1010 return result;
1011 }
1012
1013 if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
1014 u->host = strdup(normalized_ipv4);
1015 else
1016 u->host = strdup(hostname);
1017 if(!u->host)
1018 return CURLUE_OUT_OF_MEMORY;
1019
1020 if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1021 /* legacy curl-style guess based on host name */
1022 if(checkprefix("ftp.", hostname))
1023 schemep = "ftp";
1024 else if(checkprefix("dict.", hostname))
1025 schemep = "dict";
1026 else if(checkprefix("ldap.", hostname))
1027 schemep = "ldap";
1028 else if(checkprefix("imap.", hostname))
1029 schemep = "imap";
1030 else if(checkprefix("smtp.", hostname))
1031 schemep = "smtp";
1032 else if(checkprefix("pop3.", hostname))
1033 schemep = "pop3";
1034 else
1035 schemep = "http";
1036
1037 u->scheme = strdup(schemep);
1038 if(!u->scheme)
1039 return CURLUE_OUT_OF_MEMORY;
1040 }
1041 }
1042
1043 Curl_safefree(u->scratch);
1044 Curl_safefree(u->temppath);
1045
1046 return CURLUE_OK;
1047}
1048
1049/*
1050 * Parse the URL and set the relevant members of the Curl_URL struct.
1051 */
1052static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1053{
1054 CURLUcode result = seturl(url, u, flags);
1055 if(result) {
1056 free_urlhandle(u);
1057 memset(u, 0, sizeof(struct Curl_URL));
1058 }
1059 return result;
1060}
1061
1062/*
1063 */
1064CURLU *curl_url(void)
1065{
1066 return calloc(sizeof(struct Curl_URL), 1);
1067}
1068
1069void curl_url_cleanup(CURLU *u)
1070{
1071 if(u) {
1072 free_urlhandle(u);
1073 free(u);
1074 }
1075}
1076
1077#define DUP(dest, src, name) \
1078 do { \
1079 if(src->name) { \
1080 dest->name = strdup(src->name); \
1081 if(!dest->name) \
1082 goto fail; \
1083 } \
1084 } while(0)
1085
1086CURLU *curl_url_dup(CURLU *in)
1087{
1088 struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
1089 if(u) {
1090 DUP(u, in, scheme);
1091 DUP(u, in, user);
1092 DUP(u, in, password);
1093 DUP(u, in, options);
1094 DUP(u, in, host);
1095 DUP(u, in, port);
1096 DUP(u, in, path);
1097 DUP(u, in, query);
1098 DUP(u, in, fragment);
1099 u->portnum = in->portnum;
1100 }
1101 return u;
1102 fail:
1103 curl_url_cleanup(u);
1104 return NULL;
1105}
1106
1107CURLUcode curl_url_get(CURLU *u, CURLUPart what,
1108 char **part, unsigned int flags)
1109{
1110 char *ptr;
1111 CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1112 char portbuf[7];
1113 bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1114 bool plusdecode = FALSE;
1115 (void)flags;
1116 if(!u)
1117 return CURLUE_BAD_HANDLE;
1118 if(!part)
1119 return CURLUE_BAD_PARTPOINTER;
1120 *part = NULL;
1121
1122 switch(what) {
1123 case CURLUPART_SCHEME:
1124 ptr = u->scheme;
1125 ifmissing = CURLUE_NO_SCHEME;
1126 urldecode = FALSE; /* never for schemes */
1127 break;
1128 case CURLUPART_USER:
1129 ptr = u->user;
1130 ifmissing = CURLUE_NO_USER;
1131 break;
1132 case CURLUPART_PASSWORD:
1133 ptr = u->password;
1134 ifmissing = CURLUE_NO_PASSWORD;
1135 break;
1136 case CURLUPART_OPTIONS:
1137 ptr = u->options;
1138 ifmissing = CURLUE_NO_OPTIONS;
1139 break;
1140 case CURLUPART_HOST:
1141 ptr = u->host;
1142 ifmissing = CURLUE_NO_HOST;
1143 break;
1144 case CURLUPART_ZONEID:
1145 ptr = u->zoneid;
1146 break;
1147 case CURLUPART_PORT:
1148 ptr = u->port;
1149 ifmissing = CURLUE_NO_PORT;
1150 urldecode = FALSE; /* never for port */
1151 if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1152 /* there's no stored port number, but asked to deliver
1153 a default one for the scheme */
1154 const struct Curl_handler *h =
1155 Curl_builtin_scheme(u->scheme);
1156 if(h) {
1157 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1158 ptr = portbuf;
1159 }
1160 }
1161 else if(ptr && u->scheme) {
1162 /* there is a stored port number, but ask to inhibit if
1163 it matches the default one for the scheme */
1164 const struct Curl_handler *h =
1165 Curl_builtin_scheme(u->scheme);
1166 if(h && (h->defport == u->portnum) &&
1167 (flags & CURLU_NO_DEFAULT_PORT))
1168 ptr = NULL;
1169 }
1170 break;
1171 case CURLUPART_PATH:
1172 ptr = u->path;
1173 if(!ptr) {
1174 ptr = u->path = strdup("/");
1175 if(!u->path)
1176 return CURLUE_OUT_OF_MEMORY;
1177 }
1178 break;
1179 case CURLUPART_QUERY:
1180 ptr = u->query;
1181 ifmissing = CURLUE_NO_QUERY;
1182 plusdecode = urldecode;
1183 break;
1184 case CURLUPART_FRAGMENT:
1185 ptr = u->fragment;
1186 ifmissing = CURLUE_NO_FRAGMENT;
1187 break;
1188 case CURLUPART_URL: {
1189 char *url;
1190 char *scheme;
1191 char *options = u->options;
1192 char *port = u->port;
1193 char *allochost = NULL;
1194 if(u->scheme && strcasecompare("file", u->scheme)) {
1195 url = aprintf("file://%s%s%s",
1196 u->path,
1197 u->fragment? "#": "",
1198 u->fragment? u->fragment : "");
1199 }
1200 else if(!u->host)
1201 return CURLUE_NO_HOST;
1202 else {
1203 const struct Curl_handler *h = NULL;
1204 if(u->scheme)
1205 scheme = u->scheme;
1206 else if(flags & CURLU_DEFAULT_SCHEME)
1207 scheme = (char *) DEFAULT_SCHEME;
1208 else
1209 return CURLUE_NO_SCHEME;
1210
1211 h = Curl_builtin_scheme(scheme);
1212 if(!port && (flags & CURLU_DEFAULT_PORT)) {
1213 /* there's no stored port number, but asked to deliver
1214 a default one for the scheme */
1215 if(h) {
1216 msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1217 port = portbuf;
1218 }
1219 }
1220 else if(port) {
1221 /* there is a stored port number, but asked to inhibit if it matches
1222 the default one for the scheme */
1223 if(h && (h->defport == u->portnum) &&
1224 (flags & CURLU_NO_DEFAULT_PORT))
1225 port = NULL;
1226 }
1227
1228 if(h && !(h->flags & PROTOPT_URLOPTIONS))
1229 options = NULL;
1230
1231 if((u->host[0] == '[') && u->zoneid) {
1232 /* make it '[ host %25 zoneid ]' */
1233 size_t hostlen = strlen(u->host);
1234 size_t alen = hostlen + 3 + strlen(u->zoneid) + 1;
1235 allochost = malloc(alen);
1236 if(!allochost)
1237 return CURLUE_OUT_OF_MEMORY;
1238 memcpy(allochost, u->host, hostlen - 1);
1239 msnprintf(&allochost[hostlen - 1], alen - hostlen + 1,
1240 "%%25%s]", u->zoneid);
1241 }
1242
1243 url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1244 scheme,
1245 u->user ? u->user : "",
1246 u->password ? ":": "",
1247 u->password ? u->password : "",
1248 options ? ";" : "",
1249 options ? options : "",
1250 (u->user || u->password || options) ? "@": "",
1251 allochost ? allochost : u->host,
1252 port ? ":": "",
1253 port ? port : "",
1254 (u->path && (u->path[0] != '/')) ? "/": "",
1255 u->path ? u->path : "/",
1256 (u->query && u->query[0]) ? "?": "",
1257 (u->query && u->query[0]) ? u->query : "",
1258 u->fragment? "#": "",
1259 u->fragment? u->fragment : "");
1260 free(allochost);
1261 }
1262 if(!url)
1263 return CURLUE_OUT_OF_MEMORY;
1264 *part = url;
1265 return CURLUE_OK;
1266 }
1267 default:
1268 ptr = NULL;
1269 break;
1270 }
1271 if(ptr) {
1272 *part = strdup(ptr);
1273 if(!*part)
1274 return CURLUE_OUT_OF_MEMORY;
1275 if(plusdecode) {
1276 /* convert + to space */
1277 char *plus;
1278 for(plus = *part; *plus; ++plus) {
1279 if(*plus == '+')
1280 *plus = ' ';
1281 }
1282 }
1283 if(urldecode) {
1284 char *decoded;
1285 size_t dlen;
1286 /* this unconditional rejection of control bytes is documented
1287 API behavior */
1288 CURLcode res = Curl_urldecode(NULL, *part, 0, &decoded, &dlen,
1289 REJECT_CTRL);
1290 free(*part);
1291 if(res) {
1292 *part = NULL;
1293 return CURLUE_URLDECODE;
1294 }
1295 *part = decoded;
1296 }
1297 return CURLUE_OK;
1298 }
1299 else
1300 return ifmissing;
1301}
1302
1303CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1304 const char *part, unsigned int flags)
1305{
1306 char **storep = NULL;
1307 long port = 0;
1308 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1309 bool plusencode = FALSE;
1310 bool urlskipslash = FALSE;
1311 bool appendquery = FALSE;
1312 bool equalsencode = FALSE;
1313
1314 if(!u)
1315 return CURLUE_BAD_HANDLE;
1316 if(!part) {
1317 /* setting a part to NULL clears it */
1318 switch(what) {
1319 case CURLUPART_URL:
1320 break;
1321 case CURLUPART_SCHEME:
1322 storep = &u->scheme;
1323 break;
1324 case CURLUPART_USER:
1325 storep = &u->user;
1326 break;
1327 case CURLUPART_PASSWORD:
1328 storep = &u->password;
1329 break;
1330 case CURLUPART_OPTIONS:
1331 storep = &u->options;
1332 break;
1333 case CURLUPART_HOST:
1334 storep = &u->host;
1335 break;
1336 case CURLUPART_ZONEID:
1337 storep = &u->zoneid;
1338 break;
1339 case CURLUPART_PORT:
1340 u->portnum = 0;
1341 storep = &u->port;
1342 break;
1343 case CURLUPART_PATH:
1344 storep = &u->path;
1345 break;
1346 case CURLUPART_QUERY:
1347 storep = &u->query;
1348 break;
1349 case CURLUPART_FRAGMENT:
1350 storep = &u->fragment;
1351 break;
1352 default:
1353 return CURLUE_UNKNOWN_PART;
1354 }
1355 if(storep && *storep) {
1356 Curl_safefree(*storep);
1357 }
1358 return CURLUE_OK;
1359 }
1360
1361 switch(what) {
1362 case CURLUPART_SCHEME:
1363 if(strlen(part) > MAX_SCHEME_LEN)
1364 /* too long */
1365 return CURLUE_MALFORMED_INPUT;
1366 if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
1367 /* verify that it is a fine scheme */
1368 !Curl_builtin_scheme(part))
1369 return CURLUE_UNSUPPORTED_SCHEME;
1370 storep = &u->scheme;
1371 urlencode = FALSE; /* never */
1372 break;
1373 case CURLUPART_USER:
1374 storep = &u->user;
1375 break;
1376 case CURLUPART_PASSWORD:
1377 storep = &u->password;
1378 break;
1379 case CURLUPART_OPTIONS:
1380 storep = &u->options;
1381 break;
1382 case CURLUPART_HOST:
1383 storep = &u->host;
1384 Curl_safefree(u->zoneid);
1385 break;
1386 case CURLUPART_ZONEID:
1387 storep = &u->zoneid;
1388 break;
1389 case CURLUPART_PORT:
1390 {
1391 char *endp;
1392 urlencode = FALSE; /* never */
1393 port = strtol(part, &endp, 10); /* Port number must be decimal */
1394 if((port <= 0) || (port > 0xffff))
1395 return CURLUE_BAD_PORT_NUMBER;
1396 if(*endp)
1397 /* weirdly provided number, not good! */
1398 return CURLUE_MALFORMED_INPUT;
1399 storep = &u->port;
1400 }
1401 break;
1402 case CURLUPART_PATH:
1403 urlskipslash = TRUE;
1404 storep = &u->path;
1405 break;
1406 case CURLUPART_QUERY:
1407 plusencode = urlencode;
1408 appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1409 equalsencode = appendquery;
1410 storep = &u->query;
1411 break;
1412 case CURLUPART_FRAGMENT:
1413 storep = &u->fragment;
1414 break;
1415 case CURLUPART_URL: {
1416 /*
1417 * Allow a new URL to replace the existing (if any) contents.
1418 *
1419 * If the existing contents is enough for a URL, allow a relative URL to
1420 * replace it.
1421 */
1422 CURLUcode result;
1423 char *oldurl;
1424 char *redired_url;
1425 CURLU *handle2;
1426
1427 if(Curl_is_absolute_url(part, NULL, MAX_SCHEME_LEN + 1)) {
1428 handle2 = curl_url();
1429 if(!handle2)
1430 return CURLUE_OUT_OF_MEMORY;
1431 result = parseurl(part, handle2, flags);
1432 if(!result)
1433 mv_urlhandle(handle2, u);
1434 else
1435 curl_url_cleanup(handle2);
1436 return result;
1437 }
1438 /* extract the full "old" URL to do the redirect on */
1439 result = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
1440 if(result) {
1441 /* couldn't get the old URL, just use the new! */
1442 handle2 = curl_url();
1443 if(!handle2)
1444 return CURLUE_OUT_OF_MEMORY;
1445 result = parseurl(part, handle2, flags);
1446 if(!result)
1447 mv_urlhandle(handle2, u);
1448 else
1449 curl_url_cleanup(handle2);
1450 return result;
1451 }
1452
1453 /* apply the relative part to create a new URL */
1454 redired_url = concat_url(oldurl, part);
1455 free(oldurl);
1456 if(!redired_url)
1457 return CURLUE_OUT_OF_MEMORY;
1458
1459 /* now parse the new URL */
1460 handle2 = curl_url();
1461 if(!handle2) {
1462 free(redired_url);
1463 return CURLUE_OUT_OF_MEMORY;
1464 }
1465 result = parseurl(redired_url, handle2, flags);
1466 free(redired_url);
1467 if(!result)
1468 mv_urlhandle(handle2, u);
1469 else
1470 curl_url_cleanup(handle2);
1471 return result;
1472 }
1473 default:
1474 return CURLUE_UNKNOWN_PART;
1475 }
1476 DEBUGASSERT(storep);
1477 {
1478 const char *newp = part;
1479 size_t nalloc = strlen(part);
1480
1481 if(nalloc > CURL_MAX_INPUT_LENGTH)
1482 /* excessive input length */
1483 return CURLUE_MALFORMED_INPUT;
1484
1485 if(urlencode) {
1486 const unsigned char *i;
1487 char *o;
1488 char *enc = malloc(nalloc * 3 + 1); /* for worst case! */
1489 if(!enc)
1490 return CURLUE_OUT_OF_MEMORY;
1491 for(i = (const unsigned char *)part, o = enc; *i; i++) {
1492 if((*i == ' ') && plusencode) {
1493 *o = '+';
1494 o++;
1495 }
1496 else if(Curl_isunreserved(*i) ||
1497 ((*i == '/') && urlskipslash) ||
1498 ((*i == '=') && equalsencode)) {
1499 if((*i == '=') && equalsencode)
1500 /* only skip the first equals sign */
1501 equalsencode = FALSE;
1502 *o = *i;
1503 o++;
1504 }
1505 else {
1506 msnprintf(o, 4, "%%%02x", *i);
1507 o += 3;
1508 }
1509 }
1510 *o = 0; /* null-terminate */
1511 newp = enc;
1512 }
1513 else {
1514 char *p;
1515 newp = strdup(part);
1516 if(!newp)
1517 return CURLUE_OUT_OF_MEMORY;
1518 p = (char *)newp;
1519 while(*p) {
1520 /* make sure percent encoded are lower case */
1521 if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1522 (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1523 p[1] = (char)TOLOWER(p[1]);
1524 p[2] = (char)TOLOWER(p[2]);
1525 p += 3;
1526 }
1527 else
1528 p++;
1529 }
1530 }
1531
1532 if(appendquery) {
1533 /* Append the string onto the old query. Add a '&' separator if none is
1534 present at the end of the exsting query already */
1535 size_t querylen = u->query ? strlen(u->query) : 0;
1536 bool addamperand = querylen && (u->query[querylen -1] != '&');
1537 if(querylen) {
1538 size_t newplen = strlen(newp);
1539 char *p = malloc(querylen + addamperand + newplen + 1);
1540 if(!p) {
1541 free((char *)newp);
1542 return CURLUE_OUT_OF_MEMORY;
1543 }
1544 strcpy(p, u->query); /* original query */
1545 if(addamperand)
1546 p[querylen] = '&'; /* ampersand */
1547 strcpy(&p[querylen + addamperand], newp); /* new suffix */
1548 free((char *)newp);
1549 free(*storep);
1550 *storep = p;
1551 return CURLUE_OK;
1552 }
1553 }
1554
1555 if(what == CURLUPART_HOST) {
1556 if(0 == strlen(newp) && (flags & CURLU_NO_AUTHORITY)) {
1557 /* Skip hostname check, it's allowed to be empty. */
1558 }
1559 else {
1560 if(hostname_check(u, (char *)newp)) {
1561 free((char *)newp);
1562 return CURLUE_MALFORMED_INPUT;
1563 }
1564 }
1565 }
1566
1567 free(*storep);
1568 *storep = (char *)newp;
1569 }
1570 /* set after the string, to make it not assigned if the allocation above
1571 fails */
1572 if(port)
1573 u->portnum = port;
1574 return CURLUE_OK;
1575}
1576