1/***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.haxx.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ***************************************************************************/
22#include "tool_setup.h"
23
24#define ENABLE_CURLX_PRINTF
25/* use our own printf() functions */
26#include "curlx.h"
27#include "tool_cfgable.h"
28#include "tool_doswin.h"
29#include "tool_urlglob.h"
30#include "tool_vms.h"
31
32#include "memdebug.h" /* keep this as LAST include */
33
34#define GLOBERROR(string, column, code) \
35 glob->error = string, glob->pos = column, code
36
37static CURLcode glob_fixed(URLGlob *glob, char *fixed, size_t len)
38{
39 URLPattern *pat = &glob->pattern[glob->size];
40 pat->type = UPTSet;
41 pat->content.Set.size = 1;
42 pat->content.Set.ptr_s = 0;
43 pat->globindex = -1;
44
45 pat->content.Set.elements = malloc(sizeof(char *));
46
47 if(!pat->content.Set.elements)
48 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
49
50 pat->content.Set.elements[0] = malloc(len + 1);
51 if(!pat->content.Set.elements[0])
52 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
53
54 memcpy(pat->content.Set.elements[0], fixed, len);
55 pat->content.Set.elements[0][len] = 0;
56
57 return CURLE_OK;
58}
59
60/* multiply
61 *
62 * Multiplies and checks for overflow.
63 */
64static int multiply(unsigned long *amount, long with)
65{
66 unsigned long sum = *amount * with;
67 if(!with) {
68 *amount = 0;
69 return 0;
70 }
71 if(sum/with != *amount)
72 return 1; /* didn't fit, bail out */
73 *amount = sum;
74 return 0;
75}
76
77static CURLcode glob_set(URLGlob *glob, char **patternp,
78 size_t *posp, unsigned long *amount,
79 int globindex)
80{
81 /* processes a set expression with the point behind the opening '{'
82 ','-separated elements are collected until the next closing '}'
83 */
84 URLPattern *pat;
85 bool done = FALSE;
86 char *buf = glob->glob_buffer;
87 char *pattern = *patternp;
88 char *opattern = pattern;
89 size_t opos = *posp-1;
90
91 pat = &glob->pattern[glob->size];
92 /* patterns 0,1,2,... correspond to size=1,3,5,... */
93 pat->type = UPTSet;
94 pat->content.Set.size = 0;
95 pat->content.Set.ptr_s = 0;
96 pat->content.Set.elements = NULL;
97 pat->globindex = globindex;
98
99 while(!done) {
100 switch (*pattern) {
101 case '\0': /* URL ended while set was still open */
102 return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT);
103
104 case '{':
105 case '[': /* no nested expressions at this time */
106 return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT);
107
108 case '}': /* set element completed */
109 if(opattern == pattern)
110 return GLOBERROR("empty string within braces", *posp,
111 CURLE_URL_MALFORMAT);
112
113 /* add 1 to size since it'll be incremented below */
114 if(multiply(amount, pat->content.Set.size + 1))
115 return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT);
116
117 /* FALLTHROUGH */
118 case ',':
119
120 *buf = '\0';
121 if(pat->content.Set.elements) {
122 char **new_arr = realloc(pat->content.Set.elements,
123 (pat->content.Set.size + 1) * sizeof(char *));
124 if(!new_arr)
125 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
126
127 pat->content.Set.elements = new_arr;
128 }
129 else
130 pat->content.Set.elements = malloc(sizeof(char *));
131
132 if(!pat->content.Set.elements)
133 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
134
135 pat->content.Set.elements[pat->content.Set.size] =
136 strdup(glob->glob_buffer);
137 if(!pat->content.Set.elements[pat->content.Set.size])
138 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY);
139 ++pat->content.Set.size;
140
141 if(*pattern == '}') {
142 pattern++; /* pass the closing brace */
143 done = TRUE;
144 continue;
145 }
146
147 buf = glob->glob_buffer;
148 ++pattern;
149 ++(*posp);
150 break;
151
152 case ']': /* illegal closing bracket */
153 return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT);
154
155 case '\\': /* escaped character, skip '\' */
156 if(pattern[1]) {
157 ++pattern;
158 ++(*posp);
159 }
160 /* FALLTHROUGH */
161 default:
162 *buf++ = *pattern++; /* copy character to set element */
163 ++(*posp);
164 }
165 }
166
167 *patternp = pattern; /* return with the new position */
168 return CURLE_OK;
169}
170
171static CURLcode glob_range(URLGlob *glob, char **patternp,
172 size_t *posp, unsigned long *amount,
173 int globindex)
174{
175 /* processes a range expression with the point behind the opening '['
176 - char range: e.g. "a-z]", "B-Q]"
177 - num range: e.g. "0-9]", "17-2000]"
178 - num range with leading zeros: e.g. "001-999]"
179 expression is checked for well-formedness and collected until the next ']'
180 */
181 URLPattern *pat;
182 int rc;
183 char *pattern = *patternp;
184 char *c;
185
186 pat = &glob->pattern[glob->size];
187 pat->globindex = globindex;
188
189 if(ISALPHA(*pattern)) {
190 /* character range detected */
191 char min_c;
192 char max_c;
193 char end_c;
194 unsigned long step = 1;
195
196 pat->type = UPTCharRange;
197
198 rc = sscanf(pattern, "%c-%c%c", &min_c, &max_c, &end_c);
199
200 if(rc == 3) {
201 if(end_c == ':') {
202 char *endp;
203 errno = 0;
204 step = strtoul(&pattern[4], &endp, 10);
205 if(errno || &pattern[4] == endp || *endp != ']')
206 step = 0;
207 else
208 pattern = endp + 1;
209 }
210 else if(end_c != ']')
211 /* then this is wrong */
212 rc = 0;
213 else
214 /* end_c == ']' */
215 pattern += 4;
216 }
217
218 *posp += (pattern - *patternp);
219
220 if(rc != 3 || !step || step > (unsigned)INT_MAX ||
221 (min_c == max_c && step != 1) ||
222 (min_c != max_c && (min_c > max_c || step > (unsigned)(max_c - min_c) ||
223 (max_c - min_c) > ('z' - 'a'))))
224 /* the pattern is not well-formed */
225 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
226
227 /* if there was a ":[num]" thing, use that as step or else use 1 */
228 pat->content.CharRange.step = (int)step;
229 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c;
230 pat->content.CharRange.max_c = max_c;
231
232 if(multiply(amount, ((pat->content.CharRange.max_c -
233 pat->content.CharRange.min_c) /
234 pat->content.CharRange.step + 1)))
235 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
236 }
237 else if(ISDIGIT(*pattern)) {
238 /* numeric range detected */
239 unsigned long min_n;
240 unsigned long max_n = 0;
241 unsigned long step_n = 0;
242 char *endp;
243
244 pat->type = UPTNumRange;
245 pat->content.NumRange.padlength = 0;
246
247 if(*pattern == '0') {
248 /* leading zero specified, count them! */
249 c = pattern;
250 while(ISDIGIT(*c)) {
251 c++;
252 ++pat->content.NumRange.padlength; /* padding length is set for all
253 instances of this pattern */
254 }
255 }
256
257 errno = 0;
258 min_n = strtoul(pattern, &endp, 10);
259 if(errno || (endp == pattern))
260 endp = NULL;
261 else {
262 if(*endp != '-')
263 endp = NULL;
264 else {
265 pattern = endp + 1;
266 while(*pattern && ISBLANK(*pattern))
267 pattern++;
268 if(!ISDIGIT(*pattern)) {
269 endp = NULL;
270 goto fail;
271 }
272 errno = 0;
273 max_n = strtoul(pattern, &endp, 10);
274 if(errno)
275 /* overflow */
276 endp = NULL;
277 else if(*endp == ':') {
278 pattern = endp + 1;
279 errno = 0;
280 step_n = strtoul(pattern, &endp, 10);
281 if(errno)
282 /* over/underflow situation */
283 endp = NULL;
284 }
285 else
286 step_n = 1;
287 if(endp && (*endp == ']')) {
288 pattern = endp + 1;
289 }
290 else
291 endp = NULL;
292 }
293 }
294
295 fail:
296 *posp += (pattern - *patternp);
297
298 if(!endp || !step_n ||
299 (min_n == max_n && step_n != 1) ||
300 (min_n != max_n && (min_n > max_n || step_n > (max_n - min_n))))
301 /* the pattern is not well-formed */
302 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT);
303
304 /* typecasting to ints are fine here since we make sure above that we
305 are within 31 bits */
306 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n;
307 pat->content.NumRange.max_n = max_n;
308 pat->content.NumRange.step = step_n;
309
310 if(multiply(amount, ((pat->content.NumRange.max_n -
311 pat->content.NumRange.min_n) /
312 pat->content.NumRange.step + 1)))
313 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT);
314 }
315 else
316 return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT);
317
318 *patternp = pattern;
319 return CURLE_OK;
320}
321
322static bool peek_ipv6(const char *str, size_t *skip)
323{
324 /*
325 * Scan for a potential IPv6 literal.
326 * - Valid globs contain a hyphen and <= 1 colon.
327 * - IPv6 literals contain no hyphens and >= 2 colons.
328 */
329 size_t i = 0;
330 size_t colons = 0;
331 if(str[i++] != '[') {
332 return FALSE;
333 }
334 for(;;) {
335 const char c = str[i++];
336 if(ISALNUM(c) || c == '.' || c == '%') {
337 /* ok */
338 }
339 else if(c == ':') {
340 colons++;
341 }
342 else if(c == ']') {
343 *skip = i;
344 return colons >= 2 ? TRUE : FALSE;
345 }
346 else {
347 return FALSE;
348 }
349 }
350}
351
352static CURLcode glob_parse(URLGlob *glob, char *pattern,
353 size_t pos, unsigned long *amount)
354{
355 /* processes a literal string component of a URL
356 special characters '{' and '[' branch to set/range processing functions
357 */
358 CURLcode res = CURLE_OK;
359 int globindex = 0; /* count "actual" globs */
360
361 *amount = 1;
362
363 while(*pattern && !res) {
364 char *buf = glob->glob_buffer;
365 size_t sublen = 0;
366 while(*pattern && *pattern != '{') {
367 if(*pattern == '[') {
368 /* skip over IPv6 literals and [] */
369 size_t skip = 0;
370 if(!peek_ipv6(pattern, &skip) && (pattern[1] == ']'))
371 skip = 2;
372 if(skip) {
373 memcpy(buf, pattern, skip);
374 buf += skip;
375 pattern += skip;
376 sublen += skip;
377 continue;
378 }
379 break;
380 }
381 if(*pattern == '}' || *pattern == ']')
382 return GLOBERROR("unmatched close brace/bracket", pos,
383 CURLE_URL_MALFORMAT);
384
385 /* only allow \ to escape known "special letters" */
386 if(*pattern == '\\' &&
387 (*(pattern + 1) == '{' || *(pattern + 1) == '[' ||
388 *(pattern + 1) == '}' || *(pattern + 1) == ']') ) {
389
390 /* escape character, skip '\' */
391 ++pattern;
392 ++pos;
393 }
394 *buf++ = *pattern++; /* copy character to literal */
395 ++pos;
396 sublen++;
397 }
398 if(sublen) {
399 /* we got a literal string, add it as a single-item list */
400 *buf = '\0';
401 res = glob_fixed(glob, glob->glob_buffer, sublen);
402 }
403 else {
404 switch (*pattern) {
405 case '\0': /* done */
406 break;
407
408 case '{':
409 /* process set pattern */
410 pattern++;
411 pos++;
412 res = glob_set(glob, &pattern, &pos, amount, globindex++);
413 break;
414
415 case '[':
416 /* process range pattern */
417 pattern++;
418 pos++;
419 res = glob_range(glob, &pattern, &pos, amount, globindex++);
420 break;
421 }
422 }
423
424 if(++glob->size >= GLOB_PATTERN_NUM)
425 return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT);
426 }
427 return res;
428}
429
430CURLcode glob_url(URLGlob **glob, char *url, unsigned long *urlnum,
431 FILE *error)
432{
433 /*
434 * We can deal with any-size, just make a buffer with the same length
435 * as the specified URL!
436 */
437 URLGlob *glob_expand;
438 unsigned long amount = 0;
439 char *glob_buffer;
440 CURLcode res;
441
442 *glob = NULL;
443
444 glob_buffer = malloc(strlen(url) + 1);
445 if(!glob_buffer)
446 return CURLE_OUT_OF_MEMORY;
447 glob_buffer[0] = 0;
448
449 glob_expand = calloc(1, sizeof(URLGlob));
450 if(!glob_expand) {
451 Curl_safefree(glob_buffer);
452 return CURLE_OUT_OF_MEMORY;
453 }
454 glob_expand->urllen = strlen(url);
455 glob_expand->glob_buffer = glob_buffer;
456
457 res = glob_parse(glob_expand, url, 1, &amount);
458 if(!res)
459 *urlnum = amount;
460 else {
461 if(error && glob_expand->error) {
462 char text[512];
463 const char *t;
464 if(glob_expand->pos) {
465 msnprintf(text, sizeof(text), "%s in URL position %zu:\n%s\n%*s^",
466 glob_expand->error,
467 glob_expand->pos, url, glob_expand->pos - 1, " ");
468 t = text;
469 }
470 else
471 t = glob_expand->error;
472
473 /* send error description to the error-stream */
474 fprintf(error, "curl: (%d) %s\n", res, t);
475 }
476 /* it failed, we cleanup */
477 glob_cleanup(glob_expand);
478 *urlnum = 1;
479 return res;
480 }
481
482 *glob = glob_expand;
483 return CURLE_OK;
484}
485
486void glob_cleanup(URLGlob* glob)
487{
488 size_t i;
489 int elem;
490
491 if(!glob)
492 return;
493
494 for(i = 0; i < glob->size; i++) {
495 if((glob->pattern[i].type == UPTSet) &&
496 (glob->pattern[i].content.Set.elements)) {
497 for(elem = glob->pattern[i].content.Set.size - 1;
498 elem >= 0;
499 --elem) {
500 Curl_safefree(glob->pattern[i].content.Set.elements[elem]);
501 }
502 Curl_safefree(glob->pattern[i].content.Set.elements);
503 }
504 }
505 Curl_safefree(glob->glob_buffer);
506 Curl_safefree(glob);
507}
508
509CURLcode glob_next_url(char **globbed, URLGlob *glob)
510{
511 URLPattern *pat;
512 size_t i;
513 size_t len;
514 size_t buflen = glob->urllen + 1;
515 char *buf = glob->glob_buffer;
516
517 *globbed = NULL;
518
519 if(!glob->beenhere)
520 glob->beenhere = 1;
521 else {
522 bool carry = TRUE;
523
524 /* implement a counter over the index ranges of all patterns, starting
525 with the rightmost pattern */
526 for(i = 0; carry && (i < glob->size); i++) {
527 carry = FALSE;
528 pat = &glob->pattern[glob->size - 1 - i];
529 switch(pat->type) {
530 case UPTSet:
531 if((pat->content.Set.elements) &&
532 (++pat->content.Set.ptr_s == pat->content.Set.size)) {
533 pat->content.Set.ptr_s = 0;
534 carry = TRUE;
535 }
536 break;
537 case UPTCharRange:
538 pat->content.CharRange.ptr_c =
539 (char)(pat->content.CharRange.step +
540 (int)((unsigned char)pat->content.CharRange.ptr_c));
541 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) {
542 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c;
543 carry = TRUE;
544 }
545 break;
546 case UPTNumRange:
547 pat->content.NumRange.ptr_n += pat->content.NumRange.step;
548 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) {
549 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n;
550 carry = TRUE;
551 }
552 break;
553 default:
554 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
555 return CURLE_FAILED_INIT;
556 }
557 }
558 if(carry) { /* first pattern ptr has run into overflow, done! */
559 return CURLE_OK;
560 }
561 }
562
563 for(i = 0; i < glob->size; ++i) {
564 pat = &glob->pattern[i];
565 switch(pat->type) {
566 case UPTSet:
567 if(pat->content.Set.elements) {
568 msnprintf(buf, buflen, "%s",
569 pat->content.Set.elements[pat->content.Set.ptr_s]);
570 len = strlen(buf);
571 buf += len;
572 buflen -= len;
573 }
574 break;
575 case UPTCharRange:
576 if(buflen) {
577 *buf++ = pat->content.CharRange.ptr_c;
578 *buf = '\0';
579 buflen--;
580 }
581 break;
582 case UPTNumRange:
583 msnprintf(buf, buflen, "%0*lu",
584 pat->content.NumRange.padlength,
585 pat->content.NumRange.ptr_n);
586 len = strlen(buf);
587 buf += len;
588 buflen -= len;
589 break;
590 default:
591 printf("internal error: invalid pattern type (%d)\n", (int)pat->type);
592 return CURLE_FAILED_INIT;
593 }
594 }
595
596 *globbed = strdup(glob->glob_buffer);
597 if(!*globbed)
598 return CURLE_OUT_OF_MEMORY;
599
600 return CURLE_OK;
601}
602
603CURLcode glob_match_url(char **result, char *filename, URLGlob *glob)
604{
605 char *target;
606 size_t allocsize;
607 char numbuf[18];
608 char *appendthis = (char *)"";
609 size_t appendlen = 0;
610 size_t stringlen = 0;
611
612 *result = NULL;
613
614 /* We cannot use the glob_buffer for storage here since the filename may
615 * be longer than the URL we use. We allocate a good start size, then
616 * we need to realloc in case of need.
617 */
618 allocsize = strlen(filename) + 1; /* make it at least one byte to store the
619 trailing zero */
620 target = malloc(allocsize);
621 if(!target)
622 return CURLE_OUT_OF_MEMORY;
623
624 while(*filename) {
625 if(*filename == '#' && ISDIGIT(filename[1])) {
626 char *ptr = filename;
627 unsigned long num = strtoul(&filename[1], &filename, 10);
628 URLPattern *pat = NULL;
629
630 if(num < glob->size) {
631 unsigned long i;
632 num--; /* make it zero based */
633 /* find the correct glob entry */
634 for(i = 0; i<glob->size; i++) {
635 if(glob->pattern[i].globindex == (int)num) {
636 pat = &glob->pattern[i];
637 break;
638 }
639 }
640 }
641
642 if(pat) {
643 switch(pat->type) {
644 case UPTSet:
645 if(pat->content.Set.elements) {
646 appendthis = pat->content.Set.elements[pat->content.Set.ptr_s];
647 appendlen =
648 strlen(pat->content.Set.elements[pat->content.Set.ptr_s]);
649 }
650 break;
651 case UPTCharRange:
652 numbuf[0] = pat->content.CharRange.ptr_c;
653 numbuf[1] = 0;
654 appendthis = numbuf;
655 appendlen = 1;
656 break;
657 case UPTNumRange:
658 msnprintf(numbuf, sizeof(numbuf), "%0*lu",
659 pat->content.NumRange.padlength,
660 pat->content.NumRange.ptr_n);
661 appendthis = numbuf;
662 appendlen = strlen(numbuf);
663 break;
664 default:
665 fprintf(stderr, "internal error: invalid pattern type (%d)\n",
666 (int)pat->type);
667 Curl_safefree(target);
668 return CURLE_FAILED_INIT;
669 }
670 }
671 else {
672 /* #[num] out of range, use the #[num] in the output */
673 filename = ptr;
674 appendthis = filename++;
675 appendlen = 1;
676 }
677 }
678 else {
679 appendthis = filename++;
680 appendlen = 1;
681 }
682 if(appendlen + stringlen >= allocsize) {
683 char *newstr;
684 /* we append a single byte to allow for the trailing byte to be appended
685 at the end of this function outside the while() loop */
686 allocsize = (appendlen + stringlen) * 2;
687 newstr = realloc(target, allocsize + 1);
688 if(!newstr) {
689 Curl_safefree(target);
690 return CURLE_OUT_OF_MEMORY;
691 }
692 target = newstr;
693 }
694 memcpy(&target[stringlen], appendthis, appendlen);
695 stringlen += appendlen;
696 }
697 target[stringlen]= '\0';
698
699#if defined(MSDOS) || defined(WIN32)
700 {
701 char *sanitized;
702 SANITIZEcode sc = sanitize_file_name(&sanitized, target,
703 (SANITIZE_ALLOW_PATH |
704 SANITIZE_ALLOW_RESERVED));
705 Curl_safefree(target);
706 if(sc)
707 return CURLE_URL_MALFORMAT;
708 target = sanitized;
709 }
710#endif /* MSDOS || WIN32 */
711
712 *result = target;
713 return CURLE_OK;
714}
715