1 | /*************************************************************************** |
2 | * _ _ ____ _ |
3 | * Project ___| | | | _ \| | |
4 | * / __| | | | |_) | | |
5 | * | (__| |_| | _ <| |___ |
6 | * \___|\___/|_| \_\_____| |
7 | * |
8 | * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al. |
9 | * |
10 | * This software is licensed as described in the file COPYING, which |
11 | * you should have received as part of this distribution. The terms |
12 | * are also available at https://curl.se/docs/copyright.html. |
13 | * |
14 | * You may opt to use, copy, modify, merge, publish, distribute and/or sell |
15 | * copies of the Software, and permit persons to whom the Software is |
16 | * furnished to do so, under the terms of the COPYING file. |
17 | * |
18 | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY |
19 | * KIND, either express or implied. |
20 | * |
21 | ***************************************************************************/ |
22 | /* |
23 | A brief summary of the date string formats this parser groks: |
24 | |
25 | RFC 2616 3.3.1 |
26 | |
27 | Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 |
28 | Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 |
29 | Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format |
30 | |
31 | we support dates without week day name: |
32 | |
33 | 06 Nov 1994 08:49:37 GMT |
34 | 06-Nov-94 08:49:37 GMT |
35 | Nov 6 08:49:37 1994 |
36 | |
37 | without the time zone: |
38 | |
39 | 06 Nov 1994 08:49:37 |
40 | 06-Nov-94 08:49:37 |
41 | |
42 | weird order: |
43 | |
44 | 1994 Nov 6 08:49:37 (GNU date fails) |
45 | GMT 08:49:37 06-Nov-94 Sunday |
46 | 94 6 Nov 08:49:37 (GNU date fails) |
47 | |
48 | time left out: |
49 | |
50 | 1994 Nov 6 |
51 | 06-Nov-94 |
52 | Sun Nov 6 94 |
53 | |
54 | unusual separators: |
55 | |
56 | 1994.Nov.6 |
57 | Sun/Nov/6/94/GMT |
58 | |
59 | commonly used time zone names: |
60 | |
61 | Sun, 06 Nov 1994 08:49:37 CET |
62 | 06 Nov 1994 08:49:37 EST |
63 | |
64 | time zones specified using RFC822 style: |
65 | |
66 | Sun, 12 Sep 2004 15:05:58 -0700 |
67 | Sat, 11 Sep 2004 21:32:11 +0200 |
68 | |
69 | compact numerical date strings: |
70 | |
71 | 20040912 15:05:58 -0700 |
72 | 20040911 +0200 |
73 | |
74 | */ |
75 | |
76 | #include "curl_setup.h" |
77 | |
78 | #include <limits.h> |
79 | |
80 | #include <curl/curl.h> |
81 | #include "strcase.h" |
82 | #include "warnless.h" |
83 | #include "parsedate.h" |
84 | |
85 | /* |
86 | * parsedate() |
87 | * |
88 | * Returns: |
89 | * |
90 | * PARSEDATE_OK - a fine conversion |
91 | * PARSEDATE_FAIL - failed to convert |
92 | * PARSEDATE_LATER - time overflow at the far end of time_t |
93 | * PARSEDATE_SOONER - time underflow at the low end of time_t |
94 | */ |
95 | |
96 | static int parsedate(const char *date, time_t *output); |
97 | |
98 | #define PARSEDATE_OK 0 |
99 | #define PARSEDATE_FAIL -1 |
100 | #define PARSEDATE_LATER 1 |
101 | #define PARSEDATE_SOONER 2 |
102 | |
103 | #if !defined(CURL_DISABLE_PARSEDATE) || !defined(CURL_DISABLE_FTP) || \ |
104 | !defined(CURL_DISABLE_FILE) |
105 | /* These names are also used by FTP and FILE code */ |
106 | const char * const Curl_wkday[] = |
107 | {"Mon" , "Tue" , "Wed" , "Thu" , "Fri" , "Sat" , "Sun" }; |
108 | const char * const Curl_month[]= |
109 | { "Jan" , "Feb" , "Mar" , "Apr" , "May" , "Jun" , |
110 | "Jul" , "Aug" , "Sep" , "Oct" , "Nov" , "Dec" }; |
111 | #endif |
112 | |
113 | #ifndef CURL_DISABLE_PARSEDATE |
114 | static const char * const weekday[] = |
115 | { "Monday" , "Tuesday" , "Wednesday" , "Thursday" , |
116 | "Friday" , "Saturday" , "Sunday" }; |
117 | |
118 | struct tzinfo { |
119 | char name[5]; |
120 | int offset; /* +/- in minutes */ |
121 | }; |
122 | |
123 | /* Here's a bunch of frequently used time zone names. These were supported |
124 | by the old getdate parser. */ |
125 | #define tDAYZONE -60 /* offset for daylight savings time */ |
126 | static const struct tzinfo tz[]= { |
127 | {"GMT" , 0}, /* Greenwich Mean */ |
128 | {"UT" , 0}, /* Universal Time */ |
129 | {"UTC" , 0}, /* Universal (Coordinated) */ |
130 | {"WET" , 0}, /* Western European */ |
131 | {"BST" , 0 tDAYZONE}, /* British Summer */ |
132 | {"WAT" , 60}, /* West Africa */ |
133 | {"AST" , 240}, /* Atlantic Standard */ |
134 | {"ADT" , 240 tDAYZONE}, /* Atlantic Daylight */ |
135 | {"EST" , 300}, /* Eastern Standard */ |
136 | {"EDT" , 300 tDAYZONE}, /* Eastern Daylight */ |
137 | {"CST" , 360}, /* Central Standard */ |
138 | {"CDT" , 360 tDAYZONE}, /* Central Daylight */ |
139 | {"MST" , 420}, /* Mountain Standard */ |
140 | {"MDT" , 420 tDAYZONE}, /* Mountain Daylight */ |
141 | {"PST" , 480}, /* Pacific Standard */ |
142 | {"PDT" , 480 tDAYZONE}, /* Pacific Daylight */ |
143 | {"YST" , 540}, /* Yukon Standard */ |
144 | {"YDT" , 540 tDAYZONE}, /* Yukon Daylight */ |
145 | {"HST" , 600}, /* Hawaii Standard */ |
146 | {"HDT" , 600 tDAYZONE}, /* Hawaii Daylight */ |
147 | {"CAT" , 600}, /* Central Alaska */ |
148 | {"AHST" , 600}, /* Alaska-Hawaii Standard */ |
149 | {"NT" , 660}, /* Nome */ |
150 | {"IDLW" , 720}, /* International Date Line West */ |
151 | {"CET" , -60}, /* Central European */ |
152 | {"MET" , -60}, /* Middle European */ |
153 | {"MEWT" , -60}, /* Middle European Winter */ |
154 | {"MEST" , -60 tDAYZONE}, /* Middle European Summer */ |
155 | {"CEST" , -60 tDAYZONE}, /* Central European Summer */ |
156 | {"MESZ" , -60 tDAYZONE}, /* Middle European Summer */ |
157 | {"FWT" , -60}, /* French Winter */ |
158 | {"FST" , -60 tDAYZONE}, /* French Summer */ |
159 | {"EET" , -120}, /* Eastern Europe, USSR Zone 1 */ |
160 | {"WAST" , -420}, /* West Australian Standard */ |
161 | {"WADT" , -420 tDAYZONE}, /* West Australian Daylight */ |
162 | {"CCT" , -480}, /* China Coast, USSR Zone 7 */ |
163 | {"JST" , -540}, /* Japan Standard, USSR Zone 8 */ |
164 | {"EAST" , -600}, /* Eastern Australian Standard */ |
165 | {"EADT" , -600 tDAYZONE}, /* Eastern Australian Daylight */ |
166 | {"GST" , -600}, /* Guam Standard, USSR Zone 9 */ |
167 | {"NZT" , -720}, /* New Zealand */ |
168 | {"NZST" , -720}, /* New Zealand Standard */ |
169 | {"NZDT" , -720 tDAYZONE}, /* New Zealand Daylight */ |
170 | {"IDLE" , -720}, /* International Date Line East */ |
171 | /* Next up: Military timezone names. RFC822 allowed these, but (as noted in |
172 | RFC 1123) had their signs wrong. Here we use the correct signs to match |
173 | actual military usage. |
174 | */ |
175 | {"A" , 1 * 60}, /* Alpha */ |
176 | {"B" , 2 * 60}, /* Bravo */ |
177 | {"C" , 3 * 60}, /* Charlie */ |
178 | {"D" , 4 * 60}, /* Delta */ |
179 | {"E" , 5 * 60}, /* Echo */ |
180 | {"F" , 6 * 60}, /* Foxtrot */ |
181 | {"G" , 7 * 60}, /* Golf */ |
182 | {"H" , 8 * 60}, /* Hotel */ |
183 | {"I" , 9 * 60}, /* India */ |
184 | /* "J", Juliet is not used as a timezone, to indicate the observer's local |
185 | time */ |
186 | {"K" , 10 * 60}, /* Kilo */ |
187 | {"L" , 11 * 60}, /* Lima */ |
188 | {"M" , 12 * 60}, /* Mike */ |
189 | {"N" , -1 * 60}, /* November */ |
190 | {"O" , -2 * 60}, /* Oscar */ |
191 | {"P" , -3 * 60}, /* Papa */ |
192 | {"Q" , -4 * 60}, /* Quebec */ |
193 | {"R" , -5 * 60}, /* Romeo */ |
194 | {"S" , -6 * 60}, /* Sierra */ |
195 | {"T" , -7 * 60}, /* Tango */ |
196 | {"U" , -8 * 60}, /* Uniform */ |
197 | {"V" , -9 * 60}, /* Victor */ |
198 | {"W" , -10 * 60}, /* Whiskey */ |
199 | {"X" , -11 * 60}, /* X-ray */ |
200 | {"Y" , -12 * 60}, /* Yankee */ |
201 | {"Z" , 0}, /* Zulu, zero meridian, a.k.a. UTC */ |
202 | }; |
203 | |
204 | /* returns: |
205 | -1 no day |
206 | 0 monday - 6 sunday |
207 | */ |
208 | |
209 | static int checkday(const char *check, size_t len) |
210 | { |
211 | int i; |
212 | const char * const *what; |
213 | bool found = FALSE; |
214 | if(len > 3) |
215 | what = &weekday[0]; |
216 | else |
217 | what = &Curl_wkday[0]; |
218 | for(i = 0; i<7; i++) { |
219 | if(strcasecompare(check, what[0])) { |
220 | found = TRUE; |
221 | break; |
222 | } |
223 | what++; |
224 | } |
225 | return found?i:-1; |
226 | } |
227 | |
228 | static int checkmonth(const char *check) |
229 | { |
230 | int i; |
231 | const char * const *what; |
232 | bool found = FALSE; |
233 | |
234 | what = &Curl_month[0]; |
235 | for(i = 0; i<12; i++) { |
236 | if(strcasecompare(check, what[0])) { |
237 | found = TRUE; |
238 | break; |
239 | } |
240 | what++; |
241 | } |
242 | return found?i:-1; /* return the offset or -1, no real offset is -1 */ |
243 | } |
244 | |
245 | /* return the time zone offset between GMT and the input one, in number |
246 | of seconds or -1 if the timezone wasn't found/legal */ |
247 | |
248 | static int checktz(const char *check) |
249 | { |
250 | unsigned int i; |
251 | const struct tzinfo *what; |
252 | bool found = FALSE; |
253 | |
254 | what = tz; |
255 | for(i = 0; i< sizeof(tz)/sizeof(tz[0]); i++) { |
256 | if(strcasecompare(check, what->name)) { |
257 | found = TRUE; |
258 | break; |
259 | } |
260 | what++; |
261 | } |
262 | return found?what->offset*60:-1; |
263 | } |
264 | |
265 | static void skip(const char **date) |
266 | { |
267 | /* skip everything that aren't letters or digits */ |
268 | while(**date && !ISALNUM(**date)) |
269 | (*date)++; |
270 | } |
271 | |
272 | enum assume { |
273 | DATE_MDAY, |
274 | DATE_YEAR, |
275 | DATE_TIME |
276 | }; |
277 | |
278 | /* |
279 | * time2epoch: time stamp to seconds since epoch in GMT time zone. Similar to |
280 | * mktime but for GMT only. |
281 | */ |
282 | static time_t time2epoch(int sec, int min, int hour, |
283 | int mday, int mon, int year) |
284 | { |
285 | static const int month_days_cumulative [12] = |
286 | { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 }; |
287 | int leap_days = year - (mon <= 1); |
288 | leap_days = ((leap_days / 4) - (leap_days / 100) + (leap_days / 400) |
289 | - (1969 / 4) + (1969 / 100) - (1969 / 400)); |
290 | return ((((time_t) (year - 1970) * 365 |
291 | + leap_days + month_days_cumulative[mon] + mday - 1) * 24 |
292 | + hour) * 60 + min) * 60 + sec; |
293 | } |
294 | |
295 | /* |
296 | * parsedate() |
297 | * |
298 | * Returns: |
299 | * |
300 | * PARSEDATE_OK - a fine conversion |
301 | * PARSEDATE_FAIL - failed to convert |
302 | * PARSEDATE_LATER - time overflow at the far end of time_t |
303 | * PARSEDATE_SOONER - time underflow at the low end of time_t |
304 | */ |
305 | |
306 | static int parsedate(const char *date, time_t *output) |
307 | { |
308 | time_t t = 0; |
309 | int wdaynum = -1; /* day of the week number, 0-6 (mon-sun) */ |
310 | int monnum = -1; /* month of the year number, 0-11 */ |
311 | int mdaynum = -1; /* day of month, 1 - 31 */ |
312 | int hournum = -1; |
313 | int minnum = -1; |
314 | int secnum = -1; |
315 | int yearnum = -1; |
316 | int tzoff = -1; |
317 | enum assume dignext = DATE_MDAY; |
318 | const char *indate = date; /* save the original pointer */ |
319 | int part = 0; /* max 6 parts */ |
320 | |
321 | while(*date && (part < 6)) { |
322 | bool found = FALSE; |
323 | |
324 | skip(&date); |
325 | |
326 | if(ISALPHA(*date)) { |
327 | /* a name coming up */ |
328 | char buf[32]="" ; |
329 | size_t len; |
330 | if(sscanf(date, "%31[ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
331 | "abcdefghijklmnopqrstuvwxyz]" , buf)) |
332 | len = strlen(buf); |
333 | else |
334 | len = 0; |
335 | |
336 | if(wdaynum == -1) { |
337 | wdaynum = checkday(buf, len); |
338 | if(wdaynum != -1) |
339 | found = TRUE; |
340 | } |
341 | if(!found && (monnum == -1)) { |
342 | monnum = checkmonth(buf); |
343 | if(monnum != -1) |
344 | found = TRUE; |
345 | } |
346 | |
347 | if(!found && (tzoff == -1)) { |
348 | /* this just must be a time zone string */ |
349 | tzoff = checktz(buf); |
350 | if(tzoff != -1) |
351 | found = TRUE; |
352 | } |
353 | |
354 | if(!found) |
355 | return PARSEDATE_FAIL; /* bad string */ |
356 | |
357 | date += len; |
358 | } |
359 | else if(ISDIGIT(*date)) { |
360 | /* a digit */ |
361 | int val; |
362 | char *end; |
363 | int len = 0; |
364 | if((secnum == -1) && |
365 | (3 == sscanf(date, "%02d:%02d:%02d%n" , |
366 | &hournum, &minnum, &secnum, &len))) { |
367 | /* time stamp! */ |
368 | date += len; |
369 | } |
370 | else if((secnum == -1) && |
371 | (2 == sscanf(date, "%02d:%02d%n" , &hournum, &minnum, &len))) { |
372 | /* time stamp without seconds */ |
373 | date += len; |
374 | secnum = 0; |
375 | } |
376 | else { |
377 | long lval; |
378 | int error; |
379 | int old_errno; |
380 | |
381 | old_errno = errno; |
382 | errno = 0; |
383 | lval = strtol(date, &end, 10); |
384 | error = errno; |
385 | if(errno != old_errno) |
386 | errno = old_errno; |
387 | |
388 | if(error) |
389 | return PARSEDATE_FAIL; |
390 | |
391 | #if LONG_MAX != INT_MAX |
392 | if((lval > (long)INT_MAX) || (lval < (long)INT_MIN)) |
393 | return PARSEDATE_FAIL; |
394 | #endif |
395 | |
396 | val = curlx_sltosi(lval); |
397 | |
398 | if((tzoff == -1) && |
399 | ((end - date) == 4) && |
400 | (val <= 1400) && |
401 | (indate< date) && |
402 | ((date[-1] == '+' || date[-1] == '-'))) { |
403 | /* four digits and a value less than or equal to 1400 (to take into |
404 | account all sorts of funny time zone diffs) and it is preceded |
405 | with a plus or minus. This is a time zone indication. 1400 is |
406 | picked since +1300 is frequently used and +1400 is mentioned as |
407 | an edge number in the document "ISO C 200X Proposal: Timezone |
408 | Functions" at http://david.tribble.com/text/c0xtimezone.html If |
409 | anyone has a more authoritative source for the exact maximum time |
410 | zone offsets, please speak up! */ |
411 | found = TRUE; |
412 | tzoff = (val/100 * 60 + val%100)*60; |
413 | |
414 | /* the + and - prefix indicates the local time compared to GMT, |
415 | this we need their reversed math to get what we want */ |
416 | tzoff = date[-1]=='+'?-tzoff:tzoff; |
417 | } |
418 | |
419 | if(((end - date) == 8) && |
420 | (yearnum == -1) && |
421 | (monnum == -1) && |
422 | (mdaynum == -1)) { |
423 | /* 8 digits, no year, month or day yet. This is YYYYMMDD */ |
424 | found = TRUE; |
425 | yearnum = val/10000; |
426 | monnum = (val%10000)/100-1; /* month is 0 - 11 */ |
427 | mdaynum = val%100; |
428 | } |
429 | |
430 | if(!found && (dignext == DATE_MDAY) && (mdaynum == -1)) { |
431 | if((val > 0) && (val<32)) { |
432 | mdaynum = val; |
433 | found = TRUE; |
434 | } |
435 | dignext = DATE_YEAR; |
436 | } |
437 | |
438 | if(!found && (dignext == DATE_YEAR) && (yearnum == -1)) { |
439 | yearnum = val; |
440 | found = TRUE; |
441 | if(yearnum < 100) { |
442 | if(yearnum > 70) |
443 | yearnum += 1900; |
444 | else |
445 | yearnum += 2000; |
446 | } |
447 | if(mdaynum == -1) |
448 | dignext = DATE_MDAY; |
449 | } |
450 | |
451 | if(!found) |
452 | return PARSEDATE_FAIL; |
453 | |
454 | date = end; |
455 | } |
456 | } |
457 | |
458 | part++; |
459 | } |
460 | |
461 | if(-1 == secnum) |
462 | secnum = minnum = hournum = 0; /* no time, make it zero */ |
463 | |
464 | if((-1 == mdaynum) || |
465 | (-1 == monnum) || |
466 | (-1 == yearnum)) |
467 | /* lacks vital info, fail */ |
468 | return PARSEDATE_FAIL; |
469 | |
470 | #ifdef HAVE_TIME_T_UNSIGNED |
471 | if(yearnum < 1970) { |
472 | /* only positive numbers cannot return earlier */ |
473 | *output = TIME_T_MIN; |
474 | return PARSEDATE_SOONER; |
475 | } |
476 | #endif |
477 | |
478 | #if (SIZEOF_TIME_T < 5) |
479 | |
480 | #ifdef HAVE_TIME_T_UNSIGNED |
481 | /* an unsigned 32 bit time_t can only hold dates to 2106 */ |
482 | if(yearnum > 2105) { |
483 | *output = TIME_T_MAX; |
484 | return PARSEDATE_LATER; |
485 | } |
486 | #else |
487 | /* a signed 32 bit time_t can only hold dates to the beginning of 2038 */ |
488 | if(yearnum > 2037) { |
489 | *output = TIME_T_MAX; |
490 | return PARSEDATE_LATER; |
491 | } |
492 | if(yearnum < 1903) { |
493 | *output = TIME_T_MIN; |
494 | return PARSEDATE_SOONER; |
495 | } |
496 | #endif |
497 | |
498 | #else |
499 | /* The Gregorian calendar was introduced 1582 */ |
500 | if(yearnum < 1583) |
501 | return PARSEDATE_FAIL; |
502 | #endif |
503 | |
504 | if((mdaynum > 31) || (monnum > 11) || |
505 | (hournum > 23) || (minnum > 59) || (secnum > 60)) |
506 | return PARSEDATE_FAIL; /* clearly an illegal date */ |
507 | |
508 | /* time2epoch() returns a time_t. time_t is often 32 bits, sometimes even on |
509 | architectures that feature 64 bit 'long' but ultimately time_t is the |
510 | correct data type to use. |
511 | */ |
512 | t = time2epoch(secnum, minnum, hournum, mdaynum, monnum, yearnum); |
513 | |
514 | /* Add the time zone diff between local time zone and GMT. */ |
515 | if(tzoff == -1) |
516 | tzoff = 0; |
517 | |
518 | if((tzoff > 0) && (t > TIME_T_MAX - tzoff)) { |
519 | *output = TIME_T_MAX; |
520 | return PARSEDATE_LATER; /* time_t overflow */ |
521 | } |
522 | |
523 | t += tzoff; |
524 | |
525 | *output = t; |
526 | |
527 | return PARSEDATE_OK; |
528 | } |
529 | #else |
530 | /* disabled */ |
531 | static int parsedate(const char *date, time_t *output) |
532 | { |
533 | (void)date; |
534 | *output = 0; |
535 | return PARSEDATE_OK; /* a lie */ |
536 | } |
537 | #endif |
538 | |
539 | time_t curl_getdate(const char *p, const time_t *now) |
540 | { |
541 | time_t parsed = -1; |
542 | int rc = parsedate(p, &parsed); |
543 | (void)now; /* legacy argument from the past that we ignore */ |
544 | |
545 | if(rc == PARSEDATE_OK) { |
546 | if(parsed == -1) |
547 | /* avoid returning -1 for a working scenario */ |
548 | parsed++; |
549 | return parsed; |
550 | } |
551 | /* everything else is fail */ |
552 | return -1; |
553 | } |
554 | |
555 | /* Curl_getdate_capped() differs from curl_getdate() in that this will return |
556 | TIME_T_MAX in case the parsed time value was too big, instead of an |
557 | error. */ |
558 | |
559 | time_t Curl_getdate_capped(const char *p) |
560 | { |
561 | time_t parsed = -1; |
562 | int rc = parsedate(p, &parsed); |
563 | |
564 | switch(rc) { |
565 | case PARSEDATE_OK: |
566 | if(parsed == -1) |
567 | /* avoid returning -1 for a working scenario */ |
568 | parsed++; |
569 | return parsed; |
570 | case PARSEDATE_LATER: |
571 | /* this returns the maximum time value */ |
572 | return parsed; |
573 | default: |
574 | return -1; /* everything else is fail */ |
575 | } |
576 | /* UNREACHABLE */ |
577 | } |
578 | |
579 | /* |
580 | * Curl_gmtime() is a gmtime() replacement for portability. Do not use the |
581 | * gmtime_r() or gmtime() functions anywhere else but here. |
582 | * |
583 | */ |
584 | |
585 | CURLcode Curl_gmtime(time_t intime, struct tm *store) |
586 | { |
587 | const struct tm *tm; |
588 | #ifdef HAVE_GMTIME_R |
589 | /* thread-safe version */ |
590 | tm = (struct tm *)gmtime_r(&intime, store); |
591 | #else |
592 | /* !checksrc! disable BANNEDFUNC 1 */ |
593 | tm = gmtime(&intime); |
594 | if(tm) |
595 | *store = *tm; /* copy the pointed struct to the local copy */ |
596 | #endif |
597 | |
598 | if(!tm) |
599 | return CURLE_BAD_FUNCTION_ARGUMENT; |
600 | return CURLE_OK; |
601 | } |
602 | |