1#include <common/DateLUTImpl.h>
2#include <Common/StringUtils/StringUtils.h>
3
4#include <IO/ReadBuffer.h>
5#include <IO/ReadHelpers.h>
6#include <IO/WriteHelpers.h>
7#include <IO/parseDateTimeBestEffort.h>
8
9#include <limits>
10
11namespace DB
12{
13
14namespace ErrorCodes
15{
16 extern const int LOGICAL_ERROR;
17 extern const int CANNOT_PARSE_DATETIME;
18}
19
20
21namespace
22{
23
24inline size_t readDigits(char * res, size_t max_chars, ReadBuffer & in)
25{
26 size_t num_chars = 0;
27 while (!in.eof() && isNumericASCII(*in.position()) && num_chars < max_chars)
28 {
29 res[num_chars] = *in.position() - '0';
30 ++num_chars;
31 ++in.position();
32 }
33 return num_chars;
34}
35
36inline size_t readAlpha(char * res, size_t max_chars, ReadBuffer & in)
37{
38 size_t num_chars = 0;
39 while (!in.eof() && isAlphaASCII(*in.position()) && num_chars < max_chars)
40 {
41 res[num_chars] = *in.position();
42 ++num_chars;
43 ++in.position();
44 }
45 return num_chars;
46}
47
48#if defined(__PPC__)
49#if !__clang__
50#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
51#endif
52#endif
53
54template <size_t digit, size_t power_of_ten, typename T>
55inline void readDecimalNumberImpl(T & res, const char * src)
56{
57 res += src[digit] * power_of_ten;
58 if constexpr (digit > 0)
59 readDecimalNumberImpl<digit - 1, power_of_ten * 10>(res, src);
60}
61
62template <size_t num_digits, typename T>
63inline void readDecimalNumber(T & res, const char * src)
64{
65 readDecimalNumberImpl<num_digits - 1, 1>(res, src);
66}
67
68template <typename T>
69inline void readDecimalNumber(T & res, size_t num_digits, const char * src)
70{
71#define READ_DECIMAL_NUMBER(N) res *= common::exp10_i32(N); readDecimalNumber<N>(res, src); src += N; num_digits -= N; break
72
73 while (num_digits)
74 {
75 switch (num_digits)
76 {
77 case 3: READ_DECIMAL_NUMBER(3); break;
78 case 2: READ_DECIMAL_NUMBER(2); break;
79 case 1: READ_DECIMAL_NUMBER(1); break;
80 default: READ_DECIMAL_NUMBER(4);
81 }
82 }
83#undef DECIMAL_NUMBER_CASE
84}
85
86struct DateTimeSubsecondPart
87{
88 Int64 value;
89 UInt8 digits;
90};
91
92template <typename ReturnType>
93ReturnType parseDateTimeBestEffortImpl(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, DateTimeSubsecondPart * fractional = nullptr)
94{
95 auto on_error = [](const std::string & message [[maybe_unused]], int code [[maybe_unused]])
96 {
97 if constexpr (std::is_same_v<ReturnType, void>)
98 throw Exception(message, code);
99 else
100 return false;
101 };
102
103 UInt16 year = 0;
104 UInt8 month = 0;
105 UInt8 day_of_month = 0;
106 UInt8 hour = 0;
107 UInt8 minute = 0;
108 UInt8 second = 0;
109
110 bool has_time = false;
111
112 bool has_time_zone_offset = false;
113 bool time_zone_offset_negative = false;
114 UInt8 time_zone_offset_hour = 0;
115 UInt8 time_zone_offset_minute = 0;
116
117 bool is_pm = false;
118
119 auto read_alpha_month = [&month] (const auto & alpha)
120 {
121 if (0 == strncasecmp(alpha, "Jan", 3)) month = 1;
122 else if (0 == strncasecmp(alpha, "Feb", 3)) month = 2;
123 else if (0 == strncasecmp(alpha, "Mar", 3)) month = 3;
124 else if (0 == strncasecmp(alpha, "Apr", 3)) month = 4;
125 else if (0 == strncasecmp(alpha, "May", 3)) month = 5;
126 else if (0 == strncasecmp(alpha, "Jun", 3)) month = 6;
127 else if (0 == strncasecmp(alpha, "Jul", 3)) month = 7;
128 else if (0 == strncasecmp(alpha, "Aug", 3)) month = 8;
129 else if (0 == strncasecmp(alpha, "Sep", 3)) month = 9;
130 else if (0 == strncasecmp(alpha, "Oct", 3)) month = 10;
131 else if (0 == strncasecmp(alpha, "Nov", 3)) month = 11;
132 else if (0 == strncasecmp(alpha, "Dec", 3)) month = 12;
133 else
134 return false;
135 return true;
136 };
137
138 while (!in.eof())
139 {
140 char digits[std::numeric_limits<UInt64>::digits10];
141
142 size_t num_digits = 0;
143
144 if (!year || !has_time)
145 {
146 num_digits = readDigits(digits, sizeof(digits), in);
147
148 if (num_digits == 10 && !year && !has_time)
149 {
150 /// This is unix timestamp.
151 readDecimalNumber<10>(res, digits);
152 return ReturnType(true);
153 }
154 else if (num_digits == 9 && !year && !has_time)
155 {
156 /// This is unix timestamp.
157 readDecimalNumber<9>(res, digits);
158 return ReturnType(true);
159 }
160 else if (num_digits == 14 && !year && !has_time)
161 {
162 /// This is YYYYMMDDhhmmss
163 readDecimalNumber<4>(year, digits);
164 readDecimalNumber<2>(month, digits + 4);
165 readDecimalNumber<2>(day_of_month, digits + 6);
166 readDecimalNumber<2>(hour, digits + 8);
167 readDecimalNumber<2>(minute, digits + 10);
168 readDecimalNumber<2>(second, digits + 12);
169 has_time = true;
170 }
171 else if (num_digits == 8 && !year)
172 {
173 /// This is YYYYMMDD
174 readDecimalNumber<4>(year, digits);
175 readDecimalNumber<2>(month, digits + 4);
176 readDecimalNumber<2>(day_of_month, digits + 6);
177 }
178 else if (num_digits == 6)
179 {
180 /// This is YYYYMM
181 if (!year && !month)
182 {
183 readDecimalNumber<4>(year, digits);
184 readDecimalNumber<2>(month, digits + 4);
185 }
186 else if (!has_time)
187 {
188 readDecimalNumber<2>(hour, digits);
189 readDecimalNumber<2>(minute, digits + 2);
190 readDecimalNumber<2>(second, digits + 4);
191 has_time = true;
192 }
193 else
194 return on_error("Cannot read DateTime: ambiguous 6 digits, it can be YYYYMM or hhmmss", ErrorCodes::CANNOT_PARSE_DATETIME);
195 }
196 else if (num_digits == 4 && !year)
197 {
198 /// YYYY
199 /// YYYY*MM
200 /// YYYY*MM*DD
201 /// YYYY*M
202 /// YYYY*M*DD
203 /// YYYY*M*D
204
205 readDecimalNumber<4>(year, digits);
206
207 if (!in.eof())
208 {
209 char delimiter_after_year = *in.position();
210
211 if (delimiter_after_year < 0x20
212 || delimiter_after_year == ','
213 || delimiter_after_year == ';'
214 || delimiter_after_year == '\''
215 || delimiter_after_year == '"')
216 break;
217
218 if (month)
219 continue;
220
221 ++in.position();
222
223 num_digits = readDigits(digits, sizeof(digits), in);
224
225 if (num_digits == 2)
226 readDecimalNumber<2>(month, digits);
227 else if (num_digits == 1)
228 readDecimalNumber<1>(month, digits);
229 else if (delimiter_after_year == ' ')
230 continue;
231 else
232 return on_error("Cannot read DateTime: unexpected number of decimal digits after year: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME);
233
234 /// Only the same delimiter.
235 if (!day_of_month && checkChar(delimiter_after_year, in))
236 {
237 num_digits = readDigits(digits, sizeof(digits), in);
238
239 if (num_digits == 2)
240 readDecimalNumber<2>(day_of_month, digits);
241 else if (num_digits == 1)
242 readDecimalNumber<1>(day_of_month, digits);
243 else if (delimiter_after_year == ' ')
244 continue;
245 else
246 return on_error("Cannot read DateTime: unexpected number of decimal digits after year and month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME);
247 }
248 }
249 }
250 else if (num_digits == 2 || num_digits == 1)
251 {
252 /// hh:mm:ss
253 /// hh:mm
254 /// hh - only if already have day of month
255 /// DD/MM/YYYY
256 /// DD/MM/YY
257 /// DD.MM.YYYY
258 /// DD.MM.YY
259 /// DD-MM-YYYY
260 /// DD-MM-YY
261 /// DD
262
263 UInt8 hour_or_day_of_month = 0;
264 if (num_digits == 2)
265 readDecimalNumber<2>(hour_or_day_of_month, digits);
266 else if (num_digits == 1) //-V547
267 readDecimalNumber<1>(hour_or_day_of_month, digits);
268 else
269 return on_error("Cannot read DateTime: logical error, unexpected branch in code", ErrorCodes::LOGICAL_ERROR);
270
271 if (checkChar(':', in))
272 {
273 if (has_time)
274 return on_error("Cannot read DateTime: time component is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME);
275
276 hour = hour_or_day_of_month;
277 has_time = true;
278
279 num_digits = readDigits(digits, sizeof(digits), in);
280
281 if (num_digits == 2)
282 readDecimalNumber<2>(minute, digits);
283 else if (num_digits == 1)
284 readDecimalNumber<1>(minute, digits);
285 else
286 return on_error("Cannot read DateTime: unexpected number of decimal digits after hour: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME);
287
288 if (checkChar(':', in))
289 {
290 num_digits = readDigits(digits, sizeof(digits), in);
291
292 if (num_digits == 2)
293 readDecimalNumber<2>(second, digits);
294 else if (num_digits == 1)
295 readDecimalNumber<1>(second, digits);
296 else
297 return on_error("Cannot read DateTime: unexpected number of decimal digits after hour and minute: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME);
298 }
299 }
300 else if (checkChar('/', in) || checkChar('.', in) || checkChar('-', in))
301 {
302 if (day_of_month)
303 return on_error("Cannot read DateTime: day of month is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME);
304
305 if (month)
306 return on_error("Cannot read DateTime: month is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME);
307
308 day_of_month = hour_or_day_of_month;
309
310 num_digits = readDigits(digits, sizeof(digits), in);
311
312 if (num_digits == 2)
313 readDecimalNumber<2>(month, digits);
314 else if (num_digits == 1)
315 readDecimalNumber<1>(month, digits);
316 else if (num_digits == 0)
317 {
318 /// Month in alphabetical form
319
320 char alpha[9]; /// The longest month name: September
321 size_t num_alpha = readAlpha(alpha, sizeof(alpha), in);
322
323 if (num_alpha < 3)
324 return on_error("Cannot read DateTime: unexpected number of alphabetical characters after day of month: " + toString(num_alpha), ErrorCodes::CANNOT_PARSE_DATETIME);
325
326 if (!read_alpha_month(alpha))
327 return on_error("Cannot read DateTime: alphabetical characters after day of month don't look like month: " + std::string(alpha, 3), ErrorCodes::CANNOT_PARSE_DATETIME);
328 }
329 else
330 return on_error("Cannot read DateTime: unexpected number of decimal digits after day of month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME);
331
332 if (checkChar('/', in) || checkChar('.', in) || checkChar('-', in))
333 {
334 if (year)
335 return on_error("Cannot read DateTime: year component is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME);
336
337 num_digits = readDigits(digits, sizeof(digits), in);
338
339 if (num_digits == 4)
340 readDecimalNumber<4>(year, digits);
341 else if (num_digits == 2)
342 {
343 readDecimalNumber<2>(year, digits);
344
345 if (year >= 70)
346 year += 1900;
347 else
348 year += 2000;
349 }
350 else
351 return on_error("Cannot read DateTime: unexpected number of decimal digits after day of month and month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME);
352 }
353 }
354 else
355 {
356 if (day_of_month)
357 hour = hour_or_day_of_month;
358 else
359 day_of_month = hour_or_day_of_month;
360 }
361 }
362 else if (num_digits != 0)
363 return on_error("Cannot read DateTime: unexpected number of decimal digits: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME);
364 }
365
366 if (num_digits == 0)
367 {
368 char c = *in.position();
369
370 if (c == ' ' || c == 'T')
371 {
372 ++in.position();
373 }
374 else if (c == 'Z')
375 {
376 ++in.position();
377 has_time_zone_offset = true;
378 }
379 else if (c == '.') /// We don't support comma (ISO 8601:2004) for fractional part of second to not mess up with CSV separator.
380 {
381 if (!has_time)
382 return on_error("Cannot read DateTime: unexpected point symbol", ErrorCodes::CANNOT_PARSE_DATETIME);
383
384 ++in.position();
385 num_digits = readDigits(digits, sizeof(digits), in);
386 if (fractional)
387 {
388 using FractionalType = typename std::decay<decltype(fractional->value)>::type;
389 // Reading more decimal digits than fits into FractionalType would case an
390 // overflow, so it is better to skip all digits from the right side that do not
391 // fit into result type. To provide less precise value rather than bogus one.
392 num_digits = std::min(static_cast<size_t>(std::numeric_limits<FractionalType>::digits10), num_digits);
393
394 fractional->digits = num_digits;
395 readDecimalNumber(fractional->value, num_digits, digits);
396 }
397 }
398 else if (c == '+' || c == '-')
399 {
400 ++in.position();
401 has_time_zone_offset = true;
402 if (c == '-')
403 time_zone_offset_negative = true;
404
405 num_digits = readDigits(digits, sizeof(digits), in);
406
407 if (num_digits == 4)
408 {
409 readDecimalNumber<2>(time_zone_offset_hour, digits);
410 readDecimalNumber<2>(time_zone_offset_minute, digits + 2);
411 }
412 else if (num_digits == 3)
413 {
414 readDecimalNumber<1>(time_zone_offset_hour, digits);
415 readDecimalNumber<2>(time_zone_offset_minute, digits + 1);
416 }
417 else if (num_digits == 2)
418 {
419 readDecimalNumber<2>(time_zone_offset_hour, digits);
420 }
421 else if (num_digits == 1)
422 {
423 readDecimalNumber<1>(time_zone_offset_hour, digits);
424 }
425 else
426 return on_error("Cannot read DateTime: unexpected number of decimal digits for time zone offset: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME);
427
428 if (num_digits < 3 && checkChar(':', in))
429 {
430 num_digits = readDigits(digits, sizeof(digits), in);
431
432 if (num_digits == 2)
433 {
434 readDecimalNumber<2>(time_zone_offset_minute, digits);
435 }
436 else if (num_digits == 1)
437 {
438 readDecimalNumber<1>(time_zone_offset_minute, digits);
439 }
440 else
441 return on_error("Cannot read DateTime: unexpected number of decimal digits for time zone offset in minutes: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME);
442 }
443 }
444 else
445 {
446 char alpha[3];
447
448 size_t num_alpha = readAlpha(alpha, sizeof(alpha), in);
449
450 if (!num_alpha)
451 {
452 break;
453 }
454 else if (num_alpha == 1)
455 {
456 return on_error("Cannot read DateTime: unexpected alphabetical character", ErrorCodes::CANNOT_PARSE_DATETIME);
457 }
458 else if (num_alpha == 2)
459 {
460 if (alpha[1] == 'M' || alpha[1] == 'm')
461 {
462 if (alpha[0] == 'A' || alpha[0] == 'a')
463 {
464 }
465 else if (alpha[0] == 'P' || alpha[0] == 'p')
466 {
467 is_pm = true;
468 }
469 else
470 return on_error("Cannot read DateTime: unexpected word", ErrorCodes::CANNOT_PARSE_DATETIME);
471 }
472 else
473 return on_error("Cannot read DateTime: unexpected word", ErrorCodes::CANNOT_PARSE_DATETIME);
474 }
475 else if (num_alpha == 3)
476 {
477 bool has_day_of_week = false;
478
479 if (read_alpha_month(alpha))
480 {
481 }
482 else if (0 == strncasecmp(alpha, "UTC", 3)) has_time_zone_offset = true;
483 else if (0 == strncasecmp(alpha, "GMT", 3)) has_time_zone_offset = true;
484 else if (0 == strncasecmp(alpha, "MSK", 3)) { has_time_zone_offset = true; time_zone_offset_hour = 3; }
485 else if (0 == strncasecmp(alpha, "MSD", 3)) { has_time_zone_offset = true; time_zone_offset_hour = 4; }
486
487 else if (0 == strncasecmp(alpha, "Mon", 3)) has_day_of_week = true;
488 else if (0 == strncasecmp(alpha, "Tue", 3)) has_day_of_week = true;
489 else if (0 == strncasecmp(alpha, "Wed", 3)) has_day_of_week = true;
490 else if (0 == strncasecmp(alpha, "Thu", 3)) has_day_of_week = true;
491 else if (0 == strncasecmp(alpha, "Fri", 3)) has_day_of_week = true;
492 else if (0 == strncasecmp(alpha, "Sat", 3)) has_day_of_week = true;
493 else if (0 == strncasecmp(alpha, "Sun", 3)) has_day_of_week = true;
494
495 else
496 return on_error("Cannot read DateTime: unexpected word", ErrorCodes::CANNOT_PARSE_DATETIME);
497
498 while (!in.eof() && isAlphaASCII(*in.position()))
499 ++in.position();
500
501 /// For RFC 2822
502 if (has_day_of_week)
503 checkChar(',', in);
504 }
505 else
506 return on_error("Cannot read DateTime: logical error, unexpected branch in code", ErrorCodes::LOGICAL_ERROR);
507 }
508 }
509 }
510
511 if (!year)
512 year = 2000;
513 if (!month)
514 month = 1;
515 if (!day_of_month)
516 day_of_month = 1;
517
518 if (is_pm && hour < 12)
519 hour += 12;
520
521 auto adjust_time_zone = [&]
522 {
523 if (time_zone_offset_hour)
524 {
525 if (time_zone_offset_negative)
526 res += time_zone_offset_hour * 3600;
527 else
528 res -= time_zone_offset_hour * 3600;
529 }
530
531 if (time_zone_offset_minute)
532 {
533 if (time_zone_offset_negative)
534 res += time_zone_offset_minute * 60;
535 else
536 res -= time_zone_offset_minute * 60;
537 }
538 };
539
540 if (has_time_zone_offset)
541 {
542 res = utc_time_zone.makeDateTime(year, month, day_of_month, hour, minute, second);
543 adjust_time_zone();
544 }
545 else
546 {
547 res = local_time_zone.makeDateTime(year, month, day_of_month, hour, minute, second);
548 }
549
550 return ReturnType(true);
551}
552
553template <typename ReturnType>
554ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone)
555{
556 time_t whole;
557 DateTimeSubsecondPart subsecond = {0, 0}; // needs to be explicitly initialized sine it could be missing from input string
558 if (!parseDateTimeBestEffortImpl<bool>(whole, in, local_time_zone, utc_time_zone, &subsecond))
559 return ReturnType(false);
560
561 DateTime64::NativeType fractional = subsecond.value;
562 if (scale < subsecond.digits)
563 {
564 fractional /= common::exp10_i64(subsecond.digits - scale);
565 }
566 else if (scale > subsecond.digits)
567 {
568 fractional *= common::exp10_i64(scale - subsecond.digits);
569 }
570
571 res = DecimalUtils::decimalFromComponents<DateTime64>(whole, fractional, scale);
572 return ReturnType(true);
573}
574
575}
576
577#if defined(__PPC__)
578#if !__clang__
579#pragma GCC diagnostic pop
580#endif
581#endif
582
583void parseDateTimeBestEffort(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone)
584{
585 parseDateTimeBestEffortImpl<void>(res, in, local_time_zone, utc_time_zone);
586}
587
588bool tryParseDateTimeBestEffort(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone)
589{
590 return parseDateTimeBestEffortImpl<bool>(res, in, local_time_zone, utc_time_zone);
591}
592
593void parseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone)
594{
595 return parseDateTime64BestEffortImpl<void>(res, scale, in, local_time_zone, utc_time_zone);
596}
597
598bool tryParseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone)
599{
600 return parseDateTime64BestEffortImpl<bool>(res, scale, in, local_time_zone, utc_time_zone);
601}
602
603}
604