parsedate.c source code [ClickHouse/contrib/curl/lib/parsedate.c]

1	/***************************************************************************
2	* _ _ ____ _
3	* Project ___\| \| \| \| _ \\| \|
4	* / __\| \| \| \| \|_) \| \|
5	* \| (__\| \|_\| \| _ <\| \|___
6	* \___\|\___/\|_\| \_\_____\|
7	*
8	* Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al.
9	*
10	* This software is licensed as described in the file COPYING, which
11	* you should have received as part of this distribution. The terms
12	* are also available at https://curl.haxx.se/docs/copyright.html.
13	*
14	* You may opt to use, copy, modify, merge, publish, distribute and/or sell
15	* copies of the Software, and permit persons to whom the Software is
16	* furnished to do so, under the terms of the COPYING file.
17	*
18	* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19	* KIND, either express or implied.
20	*
21	***************************************************************************/
22	/*
23	A brief summary of the date string formats this parser groks:
24
25	RFC 2616 3.3.1
26
27	Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
28	Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
29	Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
30
31	we support dates without week day name:
32
33	06 Nov 1994 08:49:37 GMT
34	06-Nov-94 08:49:37 GMT
35	Nov 6 08:49:37 1994
36
37	without the time zone:
38
39	06 Nov 1994 08:49:37
40	06-Nov-94 08:49:37
41
42	weird order:
43
44	1994 Nov 6 08:49:37 (GNU date fails)
45	GMT 08:49:37 06-Nov-94 Sunday
46	94 6 Nov 08:49:37 (GNU date fails)
47
48	time left out:
49
50	1994 Nov 6
51	06-Nov-94
52	Sun Nov 6 94
53
54	unusual separators:
55
56	1994.Nov.6
57	Sun/Nov/6/94/GMT
58
59	commonly used time zone names:
60
61	Sun, 06 Nov 1994 08:49:37 CET
62	06 Nov 1994 08:49:37 EST
63
64	time zones specified using RFC822 style:
65
66	Sun, 12 Sep 2004 15:05:58 -0700
67	Sat, 11 Sep 2004 21:32:11 +0200
68
69	compact numerical date strings:
70
71	20040912 15:05:58 -0700
72	20040911 +0200
73
74	*/
75
76	#include "curl_setup.h"
77
78	#include <limits.h>
79
80	#include <curl/curl.h>
81	#include "strcase.h"
82	#include "warnless.h"
83	#include "parsedate.h"
84
85	/*
86	* parsedate()
87	*
88	* Returns:
89	*
90	* PARSEDATE_OK - a fine conversion
91	* PARSEDATE_FAIL - failed to convert
92	* PARSEDATE_LATER - time overflow at the far end of time_t
93	* PARSEDATE_SOONER - time underflow at the low end of time_t
94	*/
95
96	static int parsedate(const char date, time_t output);
97
98	#define PARSEDATE_OK 0
99	#define PARSEDATE_FAIL -1
100	#define PARSEDATE_LATER 1
101	#define PARSEDATE_SOONER 2
102
103	#if !defined(CURL_DISABLE_PARSEDATE) \|\| !defined(CURL_DISABLE_FTP) \|\| \
104	!defined(CURL_DISABLE_FILE)
105	/ These names are also used by FTP and FILE code /
106	const char * const Curl_wkday[] =
107	{"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"};
108	const char * const Curl_month[]=
109	{ "Jan", "Feb", "Mar", "Apr", "May", "Jun",
110	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
111	#endif
112
113	#ifndef CURL_DISABLE_PARSEDATE
114	static const char * const weekday[] =
115	{ "Monday", "Tuesday", "Wednesday", "Thursday",
116	"Friday", "Saturday", "Sunday" };
117
118	struct tzinfo {
119	char name[`5`];
120	int offset; / +/- in minutes /
121	};
122
123	/ Here's a bunch of frequently used time zone names. These were supported*
124	by the old getdate parser. /*
125	#define tDAYZONE -60 /* offset for daylight savings time */
126	static const struct tzinfo tz[]= {
127	{"GMT", `0`}, / Greenwich Mean /
128	{"UT", `0`}, / Universal Time /
129	{"UTC", `0`}, / Universal (Coordinated) /
130	{"WET", `0`}, / Western European /
131	{"BST", `0` tDAYZONE}, / British Summer /
132	{"WAT", `60`}, / West Africa /
133	{"AST", `240`}, / Atlantic Standard /
134	{"ADT", `240` tDAYZONE}, / Atlantic Daylight /
135	{"EST", `300`}, / Eastern Standard /
136	{"EDT", `300` tDAYZONE}, / Eastern Daylight /
137	{"CST", `360`}, / Central Standard /
138	{"CDT", `360` tDAYZONE}, / Central Daylight /
139	{"MST", `420`}, / Mountain Standard /
140	{"MDT", `420` tDAYZONE}, / Mountain Daylight /
141	{"PST", `480`}, / Pacific Standard /
142	{"PDT", `480` tDAYZONE}, / Pacific Daylight /
143	{"YST", `540`}, / Yukon Standard /
144	{"YDT", `540` tDAYZONE}, / Yukon Daylight /
145	{"HST", `600`}, / Hawaii Standard /
146	{"HDT", `600` tDAYZONE}, / Hawaii Daylight /
147	{"CAT", `600`}, / Central Alaska /
148	{"AHST", `600`}, / Alaska-Hawaii Standard /
149	{"NT", `660`}, / Nome /
150	{"IDLW", `720`}, / International Date Line West /
151	{"CET", -`60`}, / Central European /
152	{"MET", -`60`}, / Middle European /
153	{"MEWT", -`60`}, / Middle European Winter /
154	{"MEST", -`60` tDAYZONE}, / Middle European Summer /
155	{"CEST", -`60` tDAYZONE}, / Central European Summer /
156	{"MESZ", -`60` tDAYZONE}, / Middle European Summer /
157	{"FWT", -`60`}, / French Winter /
158	{"FST", -`60` tDAYZONE}, / French Summer /
159	{"EET", -`120`}, / Eastern Europe, USSR Zone 1 /
160	{"WAST", -`420`}, / West Australian Standard /
161	{"WADT", -`420` tDAYZONE}, / West Australian Daylight /
162	{"CCT", -`480`}, / China Coast, USSR Zone 7 /
163	{"JST", -`540`}, / Japan Standard, USSR Zone 8 /
164	{"EAST", -`600`}, / Eastern Australian Standard /
165	{"EADT", -`600` tDAYZONE}, / Eastern Australian Daylight /
166	{"GST", -`600`}, / Guam Standard, USSR Zone 9 /
167	{"NZT", -`720`}, / New Zealand /
168	{"NZST", -`720`}, / New Zealand Standard /
169	{"NZDT", -`720` tDAYZONE}, / New Zealand Daylight /
170	{"IDLE", -`720`}, / International Date Line East /
171	/ Next up: Military timezone names. RFC822 allowed these, but (as noted in*
172	RFC 1123) had their signs wrong. Here we use the correct signs to match
173	actual military usage.
174	*/
175	{"A", `1` * `60`}, / Alpha /
176	{"B", `2` * `60`}, / Bravo /
177	{"C", `3` * `60`}, / Charlie /
178	{"D", `4` * `60`}, / Delta /
179	{"E", `5` * `60`}, / Echo /
180	{"F", `6` * `60`}, / Foxtrot /
181	{"G", `7` * `60`}, / Golf /
182	{"H", `8` * `60`}, / Hotel /
183	{"I", `9` * `60`}, / India /
184	/ "J", Juliet is not used as a timezone, to indicate the observer's local*
185	time /*
186	{"K", `10` * `60`}, / Kilo /
187	{"L", `11` * `60`}, / Lima /
188	{"M", `12` * `60`}, / Mike /
189	{"N", -`1` * `60`}, / November /
190	{"O", -`2` * `60`}, / Oscar /
191	{"P", -`3` * `60`}, / Papa /
192	{"Q", -`4` * `60`}, / Quebec /
193	{"R", -`5` * `60`}, / Romeo /
194	{"S", -`6` * `60`}, / Sierra /
195	{"T", -`7` * `60`}, / Tango /
196	{"U", -`8` * `60`}, / Uniform /
197	{"V", -`9` * `60`}, / Victor /
198	{"W", -`10` * `60`}, / Whiskey /
199	{"X", -`11` * `60`}, / X-ray /
200	{"Y", -`12` * `60`}, / Yankee /
201	{"Z", `0`}, / Zulu, zero meridian, a.k.a. UTC /
202	};
203
204	/ returns:*
205	-1 no day
206	0 monday - 6 sunday
207	*/
208
209	static int checkday(const char *check, size_t len)
210	{
211	int i;
212	const char * const *what;
213	bool found = FALSE;
214	if(len > `3`)
215	what = &weekday[`0`];
216	else
217	what = &Curl_wkday[`0`];
218	for(i = `0`; i<`7`; i++) {
219	if(strcasecompare(check, what[`0`])) {
220	found = TRUE;
221	break;
222	}
223	what++;
224	}
225	return found?i:-`1`;
226	}
227
228	static int checkmonth(const char *check)
229	{
230	int i;
231	const char * const *what;
232	bool found = FALSE;
233
234	what = &Curl_month[`0`];
235	for(i = `0`; i<`12`; i++) {
236	if(strcasecompare(check, what[`0`])) {
237	found = TRUE;
238	break;
239	}
240	what++;
241	}
242	return found?i:-`1`; / return the offset or -1, no real offset is -1 /
243	}
244
245	/ return the time zone offset between GMT and the input one, in number*
246	of seconds or -1 if the timezone wasn't found/legal /*
247
248	static int checktz(const char *check)
249	{
250	unsigned int i;
251	const struct tzinfo *what;
252	bool found = FALSE;
253
254	what = tz;
255	for(i = `0`; i< sizeof(tz)/sizeof(tz[`0`]); i++) {
256	if(strcasecompare(check, what->name)) {
257	found = TRUE;
258	break;
259	}
260	what++;
261	}
262	return found?what->offset*`60`:-`1`;
263	}
264
265	static void skip(const char **date)
266	{
267	/ skip everything that aren't letters or digits /
268	while(date && !ISALNUM(date))
269	(*date)++;
270	}
271
272	enum assume {
273	DATE_MDAY,
274	DATE_YEAR,
275	DATE_TIME
276	};
277
278	/ this is a clone of 'struct tm' but with all fields we don't need or use*
279	cut out /*
280	struct my_tm {
281	int tm_sec;
282	int tm_min;
283	int tm_hour;
284	int tm_mday;
285	int tm_mon;
286	int tm_year; / full year /
287	};
288
289	/ struct tm to time since epoch in GMT time zone.*
290	* This is similar to the standard mktime function but for GMT only, and
291	* doesn't suffer from the various bugs and portability problems that
292	* some systems' implementations have.
293	*
294	* Returns 0 on success, otherwise non-zero.
295	*/
296	static void my_timegm(struct my_tm tm, time_t t)
297	{
298	static const int month_days_cumulative [`12`] =
299	{ `0`, `31`, `59`, `90`, `120`, `151`, `181`, `212`, `243`, `273`, `304`, `334` };
300	int month, year, leap_days;
301
302	year = tm->tm_year;
303	month = tm->tm_mon;
304	if(month < `0`) {
305	year += (`11` - month) / `12`;
306	month = `11` - (`11` - month) % `12`;
307	}
308	else if(month >= `12`) {
309	year -= month / `12`;
310	month = month % `12`;
311	}
312
313	leap_days = year - (tm->tm_mon <= `1`);
314	leap_days = ((leap_days / `4`) - (leap_days / `100`) + (leap_days / `400`)
315	- (`1969` / `4`) + (`1969` / `100`) - (`1969` / `400`));
316
317	t = ((((time_t) (year - `1970`) `365`
318	+ leap_days + month_days_cumulative[month] + tm->tm_mday - `1`) * `24`
319	+ tm->tm_hour) * `60` + tm->tm_min) * `60` + tm->tm_sec;
320	}
321
322	/*
323	* parsedate()
324	*
325	* Returns:
326	*
327	* PARSEDATE_OK - a fine conversion
328	* PARSEDATE_FAIL - failed to convert
329	* PARSEDATE_LATER - time overflow at the far end of time_t
330	* PARSEDATE_SOONER - time underflow at the low end of time_t
331	*/
332
333	static int parsedate(const char date, time_t output)
334	{
335	time_t t = `0`;
336	int wdaynum = -`1`; / day of the week number, 0-6 (mon-sun) /
337	int monnum = -`1`; / month of the year number, 0-11 /
338	int mdaynum = -`1`; / day of month, 1 - 31 /
339	int hournum = -`1`;
340	int minnum = -`1`;
341	int secnum = -`1`;
342	int yearnum = -`1`;
343	int tzoff = -`1`;
344	struct my_tm tm;
345	enum assume dignext = DATE_MDAY;
346	const char indate = date; /* save the original pointer /
347	int part = `0`; / max 6 parts /
348
349	while(*date && (part < `6`)) {
350	bool found = FALSE;
351
352	skip(&date);
353
354	if(ISALPHA(*date)) {
355	/ a name coming up /
356	char buf[`32`]="";
357	size_t len;
358	if(sscanf(date, "%31[ABCDEFGHIJKLMNOPQRSTUVWXYZ"
359	"abcdefghijklmnopqrstuvwxyz]", buf))
360	len = strlen(buf);
361	else
362	len = `0`;
363
364	if(wdaynum == -`1`) {
365	wdaynum = checkday(buf, len);
366	if(wdaynum != -`1`)
367	found = TRUE;
368	}
369	if(!found && (monnum == -`1`)) {
370	monnum = checkmonth(buf);
371	if(monnum != -`1`)
372	found = TRUE;
373	}
374
375	if(!found && (tzoff == -`1`)) {
376	/ this just must be a time zone string /
377	tzoff = checktz(buf);
378	if(tzoff != -`1`)
379	found = TRUE;
380	}
381
382	if(!found)
383	return PARSEDATE_FAIL; / bad string /
384
385	date += len;
386	}
387	else if(ISDIGIT(*date)) {
388	/ a digit /
389	int val;
390	char *end;
391	int len = `0`;
392	if((secnum == -`1`) &&
393	(`3` == sscanf(date, "%02d:%02d:%02d%n",
394	&hournum, &minnum, &secnum, &len))) {
395	/ time stamp! /
396	date += len;
397	}
398	else if((secnum == -`1`) &&
399	(`2` == sscanf(date, "%02d:%02d%n", &hournum, &minnum, &len))) {
400	/ time stamp without seconds /
401	date += len;
402	secnum = `0`;
403	}
404	else {
405	long lval;
406	int error;
407	int old_errno;
408
409	old_errno = errno;
410	errno = `0`;
411	lval = strtol(date, &end, `10`);
412	error = errno;
413	if(errno != old_errno)
414	errno = old_errno;
415
416	if(error)
417	return PARSEDATE_FAIL;
418
419	#if LONG_MAX != INT_MAX
420	if((lval > (long)INT_MAX) \|\| (lval < (long)INT_MIN))
421	return PARSEDATE_FAIL;
422	#endif
423
424	val = curlx_sltosi(lval);
425
426	if((tzoff == -`1`) &&
427	((end - date) == `4`) &&
428	(val <= `1400`) &&
429	(indate< date) &&
430	((date[-`1`] == `'+'` \|\| date[-`1`] == `'-'`))) {
431	/ four digits and a value less than or equal to 1400 (to take into*
432	account all sorts of funny time zone diffs) and it is preceded
433	with a plus or minus. This is a time zone indication. 1400 is
434	picked since +1300 is frequently used and +1400 is mentioned as
435	an edge number in the document "ISO C 200X Proposal: Timezone
436	Functions" at http://david.tribble.com/text/c0xtimezone.html If
437	anyone has a more authoritative source for the exact maximum time
438	zone offsets, please speak up! /*
439	found = TRUE;
440	tzoff = (val/`100` * `60` + val%`100`)*`60`;
441
442	/ the + and - prefix indicates the local time compared to GMT,*
443	this we need their reversed math to get what we want /*
444	tzoff = date[-`1`]==`'+'`?-tzoff:tzoff;
445	}
446
447	if(((end - date) == `8`) &&
448	(yearnum == -`1`) &&
449	(monnum == -`1`) &&
450	(mdaynum == -`1`)) {
451	/ 8 digits, no year, month or day yet. This is YYYYMMDD /
452	found = TRUE;
453	yearnum = val/`10000`;
454	monnum = (val%`10000`)/`100`-`1`; / month is 0 - 11 /
455	mdaynum = val%`100`;
456	}
457
458	if(!found && (dignext == DATE_MDAY) && (mdaynum == -`1`)) {
459	if((val > `0`) && (val<`32`)) {
460	mdaynum = val;
461	found = TRUE;
462	}
463	dignext = DATE_YEAR;
464	}
465
466	if(!found && (dignext == DATE_YEAR) && (yearnum == -`1`)) {
467	yearnum = val;
468	found = TRUE;
469	if(yearnum < `100`) {
470	if(yearnum > `70`)
471	yearnum += `1900`;
472	else
473	yearnum += `2000`;
474	}
475	if(mdaynum == -`1`)
476	dignext = DATE_MDAY;
477	}
478
479	if(!found)
480	return PARSEDATE_FAIL;
481
482	date = end;
483	}
484	}
485
486	part++;
487	}
488
489	if(-`1` == secnum)
490	secnum = minnum = hournum = `0`; / no time, make it zero /
491
492	if((-`1` == mdaynum) \|\|
493	(-`1` == monnum) \|\|
494	(-`1` == yearnum))
495	/ lacks vital info, fail /
496	return PARSEDATE_FAIL;
497
498	#ifdef HAVE_TIME_T_UNSIGNED
499	if(yearnum < `1970`) {
500	/ only positive numbers cannot return earlier /
501	*output = TIME_T_MIN;
502	return PARSEDATE_SOONER;
503	}
504	#endif
505
506	#if (SIZEOF_TIME_T < 5)
507
508	#ifdef HAVE_TIME_T_UNSIGNED
509	/ an unsigned 32 bit time_t can only hold dates to 2106 /
510	if(yearnum > `2105`) {
511	*output = TIME_T_MAX;
512	return PARSEDATE_LATER;
513	}
514	#else
515	/ a signed 32 bit time_t can only hold dates to the beginning of 2038 /
516	if(yearnum > `2037`) {
517	*output = TIME_T_MAX;
518	return PARSEDATE_LATER;
519	}
520	if(yearnum < `1903`) {
521	*output = TIME_T_MIN;
522	return PARSEDATE_SOONER;
523	}
524	#endif
525
526	#else
527	/ The Gregorian calendar was introduced 1582 /
528	if(yearnum < `1583`)
529	return PARSEDATE_FAIL;
530	#endif
531
532	if((mdaynum > `31`) \|\| (monnum > `11`) \|\|
533	(hournum > `23`) \|\| (minnum > `59`) \|\| (secnum > `60`))
534	return PARSEDATE_FAIL; / clearly an illegal date /
535
536	tm.tm_sec = secnum;
537	tm.tm_min = minnum;
538	tm.tm_hour = hournum;
539	tm.tm_mday = mdaynum;
540	tm.tm_mon = monnum;
541	tm.tm_year = yearnum;
542
543	/ my_timegm() returns a time_t. time_t is often 32 bits, sometimes even on*
544	architectures that feature 64 bit 'long' but ultimately time_t is the
545	correct data type to use.
546	*/
547	my_timegm(&tm, &t);
548
549	/ Add the time zone diff between local time zone and GMT. /
550	if(tzoff == -`1`)
551	tzoff = `0`;
552
553	if((tzoff > `0`) && (t > TIME_T_MAX - tzoff)) {
554	*output = TIME_T_MAX;
555	return PARSEDATE_LATER; / time_t overflow /
556	}
557
558	t += tzoff;
559
560	*output = t;
561
562	return PARSEDATE_OK;
563	}
564	#else
565	/ disabled /
566	static int parsedate(const char date, time_t output)
567	{
568	(void)date;
569	*output = `0`;
570	return PARSEDATE_OK; / a lie /
571	}
572	#endif
573
574	time_t curl_getdate(const char p, const* time_t *now)
575	{
576	time_t parsed = -`1`;
577	int rc = parsedate(p, &parsed);
578	(void)now; / legacy argument from the past that we ignore /
579
580	if(rc == PARSEDATE_OK) {
581	if(parsed == -`1`)
582	/ avoid returning -1 for a working scenario /
583	parsed++;
584	return parsed;
585	}
586	/ everything else is fail /
587	return -`1`;
588	}
589
590	/ Curl_getdate_capped() differs from curl_getdate() in that this will return*
591	TIME_T_MAX in case the parsed time value was too big, instead of an
592	error. /*
593
594	time_t Curl_getdate_capped(const char *p)
595	{
596	time_t parsed = -`1`;
597	int rc = parsedate(p, &parsed);
598
599	switch(rc) {
600	case PARSEDATE_OK:
601	if(parsed == -`1`)
602	/ avoid returning -1 for a working scenario /
603	parsed++;
604	return parsed;
605	case PARSEDATE_LATER:
606	/ this returns the maximum time value /
607	return parsed;
608	default:
609	return -`1`; / everything else is fail /
610	}
611	/ UNREACHABLE /
612	}
613
614	/*
615	* Curl_gmtime() is a gmtime() replacement for portability. Do not use the
616	* gmtime_r() or gmtime() functions anywhere else but here.
617	*
618	*/
619
620	CURLcode Curl_gmtime(time_t intime, struct tm *store)
621	{
622	const struct tm *tm;
623	#ifdef HAVE_GMTIME_R
624	/ thread-safe version /
625	tm = (struct tm *)gmtime_r(&intime, store);
626	#else
627	tm = gmtime(&intime);
628	if(tm)
629	store = tm; / copy the pointed struct to the local copy /
630	#endif
631
632	if(!tm)
633	return CURLE_BAD_FUNCTION_ARGUMENT;
634	return CURLE_OK;
635	}
636

Browse the source code of ClickHouse/contrib/curl/lib/parsedate.c