1#include "duckdb/function/scalar/strftime_format.hpp"
2#include "duckdb/common/string_util.hpp"
3#include "duckdb/common/to_string.hpp"
4#include "duckdb/common/types/cast_helpers.hpp"
5#include "duckdb/common/types/date.hpp"
6#include "duckdb/common/types/time.hpp"
7#include "duckdb/common/types/timestamp.hpp"
8
9namespace duckdb {
10
11idx_t StrfTimepecifierSize(StrTimeSpecifier specifier) {
12 switch (specifier) {
13 case StrTimeSpecifier::ABBREVIATED_WEEKDAY_NAME:
14 case StrTimeSpecifier::ABBREVIATED_MONTH_NAME:
15 return 3;
16 case StrTimeSpecifier::WEEKDAY_DECIMAL:
17 return 1;
18 case StrTimeSpecifier::DAY_OF_MONTH_PADDED:
19 case StrTimeSpecifier::MONTH_DECIMAL_PADDED:
20 case StrTimeSpecifier::YEAR_WITHOUT_CENTURY_PADDED:
21 case StrTimeSpecifier::HOUR_24_PADDED:
22 case StrTimeSpecifier::HOUR_12_PADDED:
23 case StrTimeSpecifier::MINUTE_PADDED:
24 case StrTimeSpecifier::SECOND_PADDED:
25 case StrTimeSpecifier::AM_PM:
26 case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST:
27 case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST:
28 return 2;
29 case StrTimeSpecifier::MICROSECOND_PADDED:
30 return 6;
31 case StrTimeSpecifier::MILLISECOND_PADDED:
32 return 3;
33 case StrTimeSpecifier::DAY_OF_YEAR_PADDED:
34 return 3;
35 default:
36 return 0;
37 }
38}
39
40void StrTimeFormat::AddLiteral(string literal) {
41 constant_size += literal.size();
42 literals.push_back(x: std::move(literal));
43}
44
45void StrTimeFormat::AddFormatSpecifier(string preceding_literal, StrTimeSpecifier specifier) {
46 AddLiteral(literal: std::move(preceding_literal));
47 specifiers.push_back(x: specifier);
48}
49
50void StrfTimeFormat::AddFormatSpecifier(string preceding_literal, StrTimeSpecifier specifier) {
51 is_date_specifier.push_back(x: IsDateSpecifier(specifier));
52 idx_t specifier_size = StrfTimepecifierSize(specifier);
53 if (specifier_size == 0) {
54 // variable length specifier
55 var_length_specifiers.push_back(x: specifier);
56 } else {
57 // constant size specifier
58 constant_size += specifier_size;
59 }
60 StrTimeFormat::AddFormatSpecifier(preceding_literal: std::move(preceding_literal), specifier);
61}
62
63idx_t StrfTimeFormat::GetSpecifierLength(StrTimeSpecifier specifier, date_t date, dtime_t time, int32_t utc_offset,
64 const char *tz_name) {
65 switch (specifier) {
66 case StrTimeSpecifier::FULL_WEEKDAY_NAME:
67 return Date::DAY_NAMES[Date::ExtractISODayOfTheWeek(date) % 7].GetSize();
68 case StrTimeSpecifier::FULL_MONTH_NAME:
69 return Date::MONTH_NAMES[Date::ExtractMonth(date) - 1].GetSize();
70 case StrTimeSpecifier::YEAR_DECIMAL: {
71 auto year = Date::ExtractYear(date);
72 // Be consistent with WriteStandardSpecifier
73 if (0 <= year && year <= 9999) {
74 return 4;
75 } else {
76 return NumericHelper::SignedLength<int32_t, uint32_t>(value: year);
77 }
78 }
79 case StrTimeSpecifier::MONTH_DECIMAL: {
80 idx_t len = 1;
81 auto month = Date::ExtractMonth(date);
82 len += month >= 10;
83 return len;
84 }
85 case StrTimeSpecifier::UTC_OFFSET:
86 // ±HH or ±HH:MM
87 return (utc_offset % 60) ? 6 : 3;
88 case StrTimeSpecifier::TZ_NAME:
89 if (tz_name) {
90 return strlen(s: tz_name);
91 }
92 // empty for now
93 return 0;
94 case StrTimeSpecifier::HOUR_24_DECIMAL:
95 case StrTimeSpecifier::HOUR_12_DECIMAL:
96 case StrTimeSpecifier::MINUTE_DECIMAL:
97 case StrTimeSpecifier::SECOND_DECIMAL: {
98 // time specifiers
99 idx_t len = 1;
100 int32_t hour, min, sec, msec;
101 Time::Convert(time, out_hour&: hour, out_min&: min, out_sec&: sec, out_micros&: msec);
102 switch (specifier) {
103 case StrTimeSpecifier::HOUR_24_DECIMAL:
104 len += hour >= 10;
105 break;
106 case StrTimeSpecifier::HOUR_12_DECIMAL:
107 hour = hour % 12;
108 if (hour == 0) {
109 hour = 12;
110 }
111 len += hour >= 10;
112 break;
113 case StrTimeSpecifier::MINUTE_DECIMAL:
114 len += min >= 10;
115 break;
116 case StrTimeSpecifier::SECOND_DECIMAL:
117 len += sec >= 10;
118 break;
119 default:
120 throw InternalException("Time specifier mismatch");
121 }
122 return len;
123 }
124 case StrTimeSpecifier::DAY_OF_MONTH:
125 return NumericHelper::UnsignedLength<uint32_t>(value: Date::ExtractDay(date));
126 case StrTimeSpecifier::DAY_OF_YEAR_DECIMAL:
127 return NumericHelper::UnsignedLength<uint32_t>(value: Date::ExtractDayOfTheYear(date));
128 case StrTimeSpecifier::YEAR_WITHOUT_CENTURY:
129 return NumericHelper::UnsignedLength<uint32_t>(value: AbsValue(a: Date::ExtractYear(date)) % 100);
130 default:
131 throw InternalException("Unimplemented specifier for GetSpecifierLength");
132 }
133}
134
135//! Returns the total length of the date formatted by this format specifier
136idx_t StrfTimeFormat::GetLength(date_t date, dtime_t time, int32_t utc_offset, const char *tz_name) {
137 idx_t size = constant_size;
138 if (!var_length_specifiers.empty()) {
139 for (auto &specifier : var_length_specifiers) {
140 size += GetSpecifierLength(specifier, date, time, utc_offset, tz_name);
141 }
142 }
143 return size;
144}
145
146char *StrfTimeFormat::WriteString(char *target, const string_t &str) {
147 idx_t size = str.GetSize();
148 memcpy(dest: target, src: str.GetData(), n: size);
149 return target + size;
150}
151
152// write a value in the range of 0..99 unpadded (e.g. "1", "2", ... "98", "99")
153char *StrfTimeFormat::Write2(char *target, uint8_t value) {
154 D_ASSERT(value < 100);
155 if (value >= 10) {
156 return WritePadded2(target, value);
157 } else {
158 *target = char(uint8_t('0') + value);
159 return target + 1;
160 }
161}
162
163// write a value in the range of 0..99 padded to 2 digits
164char *StrfTimeFormat::WritePadded2(char *target, uint32_t value) {
165 D_ASSERT(value < 100);
166 auto index = static_cast<unsigned>(value * 2);
167 *target++ = duckdb_fmt::internal::data::digits[index];
168 *target++ = duckdb_fmt::internal::data::digits[index + 1];
169 return target;
170}
171
172// write a value in the range of 0..999 padded
173char *StrfTimeFormat::WritePadded3(char *target, uint32_t value) {
174 D_ASSERT(value < 1000);
175 if (value >= 100) {
176 WritePadded2(target: target + 1, value: value % 100);
177 *target = char(uint8_t('0') + value / 100);
178 return target + 3;
179 } else {
180 *target = '0';
181 target++;
182 return WritePadded2(target, value);
183 }
184}
185
186// write a value in the range of 0..999999 padded to 6 digits
187char *StrfTimeFormat::WritePadded(char *target, uint32_t value, size_t padding) {
188 D_ASSERT(padding % 2 == 0);
189 for (size_t i = 0; i < padding / 2; i++) {
190 int decimals = value % 100;
191 WritePadded2(target: target + padding - 2 * (i + 1), value: decimals);
192 value /= 100;
193 }
194 return target + padding;
195}
196
197bool StrfTimeFormat::IsDateSpecifier(StrTimeSpecifier specifier) {
198 switch (specifier) {
199 case StrTimeSpecifier::ABBREVIATED_WEEKDAY_NAME:
200 case StrTimeSpecifier::FULL_WEEKDAY_NAME:
201 case StrTimeSpecifier::DAY_OF_YEAR_PADDED:
202 case StrTimeSpecifier::DAY_OF_YEAR_DECIMAL:
203 case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST:
204 case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST:
205 case StrTimeSpecifier::WEEKDAY_DECIMAL:
206 return true;
207 default:
208 return false;
209 }
210}
211
212char *StrfTimeFormat::WriteDateSpecifier(StrTimeSpecifier specifier, date_t date, char *target) {
213 switch (specifier) {
214 case StrTimeSpecifier::ABBREVIATED_WEEKDAY_NAME: {
215 auto dow = Date::ExtractISODayOfTheWeek(date);
216 target = WriteString(target, str: Date::DAY_NAMES_ABBREVIATED[dow % 7]);
217 break;
218 }
219 case StrTimeSpecifier::FULL_WEEKDAY_NAME: {
220 auto dow = Date::ExtractISODayOfTheWeek(date);
221 target = WriteString(target, str: Date::DAY_NAMES[dow % 7]);
222 break;
223 }
224 case StrTimeSpecifier::WEEKDAY_DECIMAL: {
225 auto dow = Date::ExtractISODayOfTheWeek(date);
226 *target = char('0' + uint8_t(dow % 7));
227 target++;
228 break;
229 }
230 case StrTimeSpecifier::DAY_OF_YEAR_PADDED: {
231 int32_t doy = Date::ExtractDayOfTheYear(date);
232 target = WritePadded3(target, value: doy);
233 break;
234 }
235 case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST:
236 target = WritePadded2(target, value: Date::ExtractWeekNumberRegular(date, monday_first: true));
237 break;
238 case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST:
239 target = WritePadded2(target, value: Date::ExtractWeekNumberRegular(date, monday_first: false));
240 break;
241 case StrTimeSpecifier::DAY_OF_YEAR_DECIMAL: {
242 uint32_t doy = Date::ExtractDayOfTheYear(date);
243 target += NumericHelper::UnsignedLength<uint32_t>(value: doy);
244 NumericHelper::FormatUnsigned(value: doy, ptr: target);
245 break;
246 }
247 default:
248 throw InternalException("Unimplemented date specifier for strftime");
249 }
250 return target;
251}
252
253char *StrfTimeFormat::WriteStandardSpecifier(StrTimeSpecifier specifier, int32_t data[], const char *tz_name,
254 size_t tz_len, char *target) {
255 // data contains [0] year, [1] month, [2] day, [3] hour, [4] minute, [5] second, [6] msec, [7] utc
256 switch (specifier) {
257 case StrTimeSpecifier::DAY_OF_MONTH_PADDED:
258 target = WritePadded2(target, value: data[2]);
259 break;
260 case StrTimeSpecifier::ABBREVIATED_MONTH_NAME: {
261 auto &month_name = Date::MONTH_NAMES_ABBREVIATED[data[1] - 1];
262 return WriteString(target, str: month_name);
263 }
264 case StrTimeSpecifier::FULL_MONTH_NAME: {
265 auto &month_name = Date::MONTH_NAMES[data[1] - 1];
266 return WriteString(target, str: month_name);
267 }
268 case StrTimeSpecifier::MONTH_DECIMAL_PADDED:
269 target = WritePadded2(target, value: data[1]);
270 break;
271 case StrTimeSpecifier::YEAR_WITHOUT_CENTURY_PADDED:
272 target = WritePadded2(target, value: AbsValue(a: data[0]) % 100);
273 break;
274 case StrTimeSpecifier::YEAR_DECIMAL:
275 if (data[0] >= 0 && data[0] <= 9999) {
276 target = WritePadded(target, value: data[0], padding: 4);
277 } else {
278 int32_t year = data[0];
279 if (data[0] < 0) {
280 *target = '-';
281 year = -year;
282 target++;
283 }
284 auto len = NumericHelper::UnsignedLength<uint32_t>(value: year);
285 NumericHelper::FormatUnsigned(value: year, ptr: target + len);
286 target += len;
287 }
288 break;
289 case StrTimeSpecifier::HOUR_24_PADDED: {
290 target = WritePadded2(target, value: data[3]);
291 break;
292 }
293 case StrTimeSpecifier::HOUR_12_PADDED: {
294 int hour = data[3] % 12;
295 if (hour == 0) {
296 hour = 12;
297 }
298 target = WritePadded2(target, value: hour);
299 break;
300 }
301 case StrTimeSpecifier::AM_PM:
302 *target++ = data[3] >= 12 ? 'P' : 'A';
303 *target++ = 'M';
304 break;
305 case StrTimeSpecifier::MINUTE_PADDED: {
306 target = WritePadded2(target, value: data[4]);
307 break;
308 }
309 case StrTimeSpecifier::SECOND_PADDED:
310 target = WritePadded2(target, value: data[5]);
311 break;
312 case StrTimeSpecifier::MICROSECOND_PADDED:
313 target = WritePadded(target, value: data[6], padding: 6);
314 break;
315 case StrTimeSpecifier::MILLISECOND_PADDED:
316 target = WritePadded3(target, value: data[6] / 1000);
317 break;
318 case StrTimeSpecifier::UTC_OFFSET: {
319 *target++ = (data[7] < 0) ? '-' : '+';
320
321 auto offset = abs(x: data[7]);
322 auto offset_hours = offset / Interval::MINS_PER_HOUR;
323 auto offset_minutes = offset % Interval::MINS_PER_HOUR;
324 target = WritePadded2(target, value: offset_hours);
325 if (offset_minutes) {
326 *target++ = ':';
327 target = WritePadded2(target, value: offset_minutes);
328 }
329 break;
330 }
331 case StrTimeSpecifier::TZ_NAME:
332 if (tz_name) {
333 memcpy(dest: target, src: tz_name, n: tz_len);
334 target += strlen(s: tz_name);
335 }
336 break;
337 case StrTimeSpecifier::DAY_OF_MONTH: {
338 target = Write2(target, value: data[2] % 100);
339 break;
340 }
341 case StrTimeSpecifier::MONTH_DECIMAL: {
342 target = Write2(target, value: data[1]);
343 break;
344 }
345 case StrTimeSpecifier::YEAR_WITHOUT_CENTURY: {
346 target = Write2(target, value: AbsValue(a: data[0]) % 100);
347 break;
348 }
349 case StrTimeSpecifier::HOUR_24_DECIMAL: {
350 target = Write2(target, value: data[3]);
351 break;
352 }
353 case StrTimeSpecifier::HOUR_12_DECIMAL: {
354 int hour = data[3] % 12;
355 if (hour == 0) {
356 hour = 12;
357 }
358 target = Write2(target, value: hour);
359 break;
360 }
361 case StrTimeSpecifier::MINUTE_DECIMAL: {
362 target = Write2(target, value: data[4]);
363 break;
364 }
365 case StrTimeSpecifier::SECOND_DECIMAL: {
366 target = Write2(target, value: data[5]);
367 break;
368 }
369 default:
370 throw InternalException("Unimplemented specifier for WriteStandardSpecifier in strftime");
371 }
372 return target;
373}
374
375void StrfTimeFormat::FormatString(date_t date, int32_t data[8], const char *tz_name, char *target) {
376 D_ASSERT(specifiers.size() + 1 == literals.size());
377 idx_t i;
378 for (i = 0; i < specifiers.size(); i++) {
379 // first copy the current literal
380 memcpy(dest: target, src: literals[i].c_str(), n: literals[i].size());
381 target += literals[i].size();
382 // now copy the specifier
383 if (is_date_specifier[i]) {
384 target = WriteDateSpecifier(specifier: specifiers[i], date, target);
385 } else {
386 auto tz_len = tz_name ? strlen(s: tz_name) : 0;
387 target = WriteStandardSpecifier(specifier: specifiers[i], data, tz_name, tz_len, target);
388 }
389 }
390 // copy the final literal into the target
391 memcpy(dest: target, src: literals[i].c_str(), n: literals[i].size());
392}
393
394void StrfTimeFormat::FormatString(date_t date, dtime_t time, char *target) {
395 int32_t data[8]; // year, month, day, hour, min, sec, µs, offset
396 Date::Convert(date, out_year&: data[0], out_month&: data[1], out_day&: data[2]);
397 Time::Convert(time, out_hour&: data[3], out_min&: data[4], out_sec&: data[5], out_micros&: data[6]);
398 data[7] = 0;
399
400 FormatString(date, data, tz_name: nullptr, target);
401}
402
403string StrfTimeFormat::Format(timestamp_t timestamp, const string &format_str) {
404 StrfTimeFormat format;
405 format.ParseFormatSpecifier(format_string: format_str, format);
406
407 auto date = Timestamp::GetDate(timestamp);
408 auto time = Timestamp::GetTime(timestamp);
409
410 auto len = format.GetLength(date, time, utc_offset: 0, tz_name: nullptr);
411 auto result = make_unsafe_uniq_array<char>(n: len);
412 format.FormatString(date, time, target: result.get());
413 return string(result.get(), len);
414}
415
416string StrTimeFormat::ParseFormatSpecifier(const string &format_string, StrTimeFormat &format) {
417 if (format_string.empty()) {
418 return "Empty format string";
419 }
420 format.format_specifier = format_string;
421 format.specifiers.clear();
422 format.literals.clear();
423 format.numeric_width.clear();
424 format.constant_size = 0;
425 idx_t pos = 0;
426 string current_literal;
427 for (idx_t i = 0; i < format_string.size(); i++) {
428 if (format_string[i] == '%') {
429 if (i + 1 == format_string.size()) {
430 return "Trailing format character %";
431 }
432 if (i > pos) {
433 // push the previous string to the current literal
434 current_literal += format_string.substr(pos: pos, n: i - pos);
435 }
436 char format_char = format_string[++i];
437 if (format_char == '%') {
438 // special case: %%
439 // set the pos for the next literal and continue
440 pos = i;
441 continue;
442 }
443 StrTimeSpecifier specifier;
444 if (format_char == '-' && i + 1 < format_string.size()) {
445 format_char = format_string[++i];
446 switch (format_char) {
447 case 'd':
448 specifier = StrTimeSpecifier::DAY_OF_MONTH;
449 break;
450 case 'm':
451 specifier = StrTimeSpecifier::MONTH_DECIMAL;
452 break;
453 case 'y':
454 specifier = StrTimeSpecifier::YEAR_WITHOUT_CENTURY;
455 break;
456 case 'H':
457 specifier = StrTimeSpecifier::HOUR_24_DECIMAL;
458 break;
459 case 'I':
460 specifier = StrTimeSpecifier::HOUR_12_DECIMAL;
461 break;
462 case 'M':
463 specifier = StrTimeSpecifier::MINUTE_DECIMAL;
464 break;
465 case 'S':
466 specifier = StrTimeSpecifier::SECOND_DECIMAL;
467 break;
468 case 'j':
469 specifier = StrTimeSpecifier::DAY_OF_YEAR_DECIMAL;
470 break;
471 default:
472 return "Unrecognized format for strftime/strptime: %-" + string(1, format_char);
473 }
474 } else {
475 switch (format_char) {
476 case 'a':
477 specifier = StrTimeSpecifier::ABBREVIATED_WEEKDAY_NAME;
478 break;
479 case 'A':
480 specifier = StrTimeSpecifier::FULL_WEEKDAY_NAME;
481 break;
482 case 'w':
483 specifier = StrTimeSpecifier::WEEKDAY_DECIMAL;
484 break;
485 case 'd':
486 specifier = StrTimeSpecifier::DAY_OF_MONTH_PADDED;
487 break;
488 case 'h':
489 case 'b':
490 specifier = StrTimeSpecifier::ABBREVIATED_MONTH_NAME;
491 break;
492 case 'B':
493 specifier = StrTimeSpecifier::FULL_MONTH_NAME;
494 break;
495 case 'm':
496 specifier = StrTimeSpecifier::MONTH_DECIMAL_PADDED;
497 break;
498 case 'y':
499 specifier = StrTimeSpecifier::YEAR_WITHOUT_CENTURY_PADDED;
500 break;
501 case 'Y':
502 specifier = StrTimeSpecifier::YEAR_DECIMAL;
503 break;
504 case 'H':
505 specifier = StrTimeSpecifier::HOUR_24_PADDED;
506 break;
507 case 'I':
508 specifier = StrTimeSpecifier::HOUR_12_PADDED;
509 break;
510 case 'p':
511 specifier = StrTimeSpecifier::AM_PM;
512 break;
513 case 'M':
514 specifier = StrTimeSpecifier::MINUTE_PADDED;
515 break;
516 case 'S':
517 specifier = StrTimeSpecifier::SECOND_PADDED;
518 break;
519 case 'f':
520 specifier = StrTimeSpecifier::MICROSECOND_PADDED;
521 break;
522 case 'g':
523 specifier = StrTimeSpecifier::MILLISECOND_PADDED;
524 break;
525 case 'z':
526 specifier = StrTimeSpecifier::UTC_OFFSET;
527 break;
528 case 'Z':
529 specifier = StrTimeSpecifier::TZ_NAME;
530 break;
531 case 'j':
532 specifier = StrTimeSpecifier::DAY_OF_YEAR_PADDED;
533 break;
534 case 'U':
535 specifier = StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST;
536 break;
537 case 'W':
538 specifier = StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST;
539 break;
540 case 'c':
541 case 'x':
542 case 'X':
543 case 'T': {
544 string subformat;
545 if (format_char == 'c') {
546 // %c: Locale’s appropriate date and time representation.
547 // we push the ISO timestamp representation here
548 subformat = "%Y-%m-%d %H:%M:%S";
549 } else if (format_char == 'x') {
550 // %x - Locale’s appropriate date representation.
551 // we push the ISO date format here
552 subformat = "%Y-%m-%d";
553 } else if (format_char == 'X' || format_char == 'T') {
554 // %X - Locale’s appropriate time representation.
555 // we push the ISO time format here
556 subformat = "%H:%M:%S";
557 }
558 // parse the subformat in a separate format specifier
559 StrfTimeFormat locale_format;
560 string error = StrTimeFormat::ParseFormatSpecifier(format_string: subformat, format&: locale_format);
561 D_ASSERT(error.empty());
562 // add the previous literal to the first literal of the subformat
563 locale_format.literals[0] = std::move(current_literal) + locale_format.literals[0];
564 current_literal = "";
565 // now push the subformat into the current format specifier
566 for (idx_t i = 0; i < locale_format.specifiers.size(); i++) {
567 format.AddFormatSpecifier(preceding_literal: std::move(locale_format.literals[i]), specifier: locale_format.specifiers[i]);
568 }
569 pos = i + 1;
570 continue;
571 }
572 default:
573 return "Unrecognized format for strftime/strptime: %" + string(1, format_char);
574 }
575 }
576 format.AddFormatSpecifier(preceding_literal: std::move(current_literal), specifier);
577 current_literal = "";
578 pos = i + 1;
579 }
580 }
581 // add the final literal
582 if (pos < format_string.size()) {
583 current_literal += format_string.substr(pos: pos, n: format_string.size() - pos);
584 }
585 format.AddLiteral(literal: std::move(current_literal));
586 return string();
587}
588
589void StrfTimeFormat::ConvertDateVector(Vector &input, Vector &result, idx_t count) {
590 D_ASSERT(input.GetType().id() == LogicalTypeId::DATE);
591 D_ASSERT(result.GetType().id() == LogicalTypeId::VARCHAR);
592 UnaryExecutor::ExecuteWithNulls<date_t, string_t>(input, result, count,
593 fun: [&](date_t input, ValidityMask &mask, idx_t idx) {
594 if (Date::IsFinite(date: input)) {
595 dtime_t time(0);
596 idx_t len = GetLength(date: input, time, utc_offset: 0, tz_name: nullptr);
597 string_t target = StringVector::EmptyString(vector&: result, len);
598 FormatString(date: input, time, target: target.GetDataWriteable());
599 target.Finalize();
600 return target;
601 } else {
602 mask.SetInvalid(idx);
603 return string_t();
604 }
605 });
606}
607
608void StrfTimeFormat::ConvertTimestampVector(Vector &input, Vector &result, idx_t count) {
609 D_ASSERT(input.GetType().id() == LogicalTypeId::TIMESTAMP || input.GetType().id() == LogicalTypeId::TIMESTAMP_TZ);
610 D_ASSERT(result.GetType().id() == LogicalTypeId::VARCHAR);
611 UnaryExecutor::ExecuteWithNulls<timestamp_t, string_t>(
612 input, result, count, fun: [&](timestamp_t input, ValidityMask &mask, idx_t idx) {
613 if (Timestamp::IsFinite(timestamp: input)) {
614 date_t date;
615 dtime_t time;
616 Timestamp::Convert(date: input, out_date&: date, out_time&: time);
617 idx_t len = GetLength(date, time, utc_offset: 0, tz_name: nullptr);
618 string_t target = StringVector::EmptyString(vector&: result, len);
619 FormatString(date, time, target: target.GetDataWriteable());
620 target.Finalize();
621 return target;
622 } else {
623 mask.SetInvalid(idx);
624 return string_t();
625 }
626 });
627}
628
629void StrpTimeFormat::AddFormatSpecifier(string preceding_literal, StrTimeSpecifier specifier) {
630 numeric_width.push_back(x: NumericSpecifierWidth(specifier));
631 StrTimeFormat::AddFormatSpecifier(preceding_literal: std::move(preceding_literal), specifier);
632}
633
634int StrpTimeFormat::NumericSpecifierWidth(StrTimeSpecifier specifier) {
635 switch (specifier) {
636 case StrTimeSpecifier::WEEKDAY_DECIMAL:
637 return 1;
638 case StrTimeSpecifier::DAY_OF_MONTH_PADDED:
639 case StrTimeSpecifier::DAY_OF_MONTH:
640 case StrTimeSpecifier::MONTH_DECIMAL_PADDED:
641 case StrTimeSpecifier::MONTH_DECIMAL:
642 case StrTimeSpecifier::YEAR_WITHOUT_CENTURY_PADDED:
643 case StrTimeSpecifier::YEAR_WITHOUT_CENTURY:
644 case StrTimeSpecifier::HOUR_24_PADDED:
645 case StrTimeSpecifier::HOUR_24_DECIMAL:
646 case StrTimeSpecifier::HOUR_12_PADDED:
647 case StrTimeSpecifier::HOUR_12_DECIMAL:
648 case StrTimeSpecifier::MINUTE_PADDED:
649 case StrTimeSpecifier::MINUTE_DECIMAL:
650 case StrTimeSpecifier::SECOND_PADDED:
651 case StrTimeSpecifier::SECOND_DECIMAL:
652 case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST:
653 case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST:
654 return 2;
655 case StrTimeSpecifier::MILLISECOND_PADDED:
656 case StrTimeSpecifier::DAY_OF_YEAR_PADDED:
657 case StrTimeSpecifier::DAY_OF_YEAR_DECIMAL:
658 return 3;
659 case StrTimeSpecifier::YEAR_DECIMAL:
660 return 4;
661 case StrTimeSpecifier::MICROSECOND_PADDED:
662 return 6;
663 default:
664 return -1;
665 }
666}
667
668enum class TimeSpecifierAMOrPM : uint8_t { TIME_SPECIFIER_NONE = 0, TIME_SPECIFIER_AM = 1, TIME_SPECIFIER_PM = 2 };
669
670int32_t StrpTimeFormat::TryParseCollection(const char *data, idx_t &pos, idx_t size, const string_t collection[],
671 idx_t collection_count) {
672 for (idx_t c = 0; c < collection_count; c++) {
673 auto &entry = collection[c];
674 auto entry_data = entry.GetData();
675 auto entry_size = entry.GetSize();
676 // check if this entry matches
677 if (pos + entry_size > size) {
678 // too big: can't match
679 continue;
680 }
681 // compare the characters
682 idx_t i;
683 for (i = 0; i < entry_size; i++) {
684 if (std::tolower(c: entry_data[i]) != std::tolower(c: data[pos + i])) {
685 break;
686 }
687 }
688 if (i == entry_size) {
689 // full match
690 pos += entry_size;
691 return c;
692 }
693 }
694 return -1;
695}
696
697//! Parses a timestamp using the given specifier
698bool StrpTimeFormat::Parse(string_t str, ParseResult &result) {
699 auto &result_data = result.data;
700 auto &error_message = result.error_message;
701 auto &error_position = result.error_position;
702
703 // initialize the result
704 result_data[0] = 1900;
705 result_data[1] = 1;
706 result_data[2] = 1;
707 result_data[3] = 0;
708 result_data[4] = 0;
709 result_data[5] = 0;
710 result_data[6] = 0;
711 result_data[7] = 0;
712
713 auto data = str.GetData();
714 idx_t size = str.GetSize();
715 // skip leading spaces
716 while (StringUtil::CharacterIsSpace(c: *data)) {
717 data++;
718 size--;
719 }
720 idx_t pos = 0;
721 TimeSpecifierAMOrPM ampm = TimeSpecifierAMOrPM::TIME_SPECIFIER_NONE;
722
723 // Year offset state (Year+W/j)
724 auto offset_specifier = StrTimeSpecifier::WEEKDAY_DECIMAL;
725 uint64_t weekno = 0;
726 uint64_t weekday = 0;
727 uint64_t yearday = 0;
728
729 for (idx_t i = 0;; i++) {
730 D_ASSERT(i < literals.size());
731 // first compare the literal
732 const auto &literal = literals[i];
733 for (size_t l = 0; l < literal.size();) {
734 // Match runs of spaces to runs of spaces.
735 if (StringUtil::CharacterIsSpace(c: literal[l])) {
736 if (!StringUtil::CharacterIsSpace(c: data[pos])) {
737 error_message = "Space does not match, expected " + literals[i];
738 error_position = pos;
739 return false;
740 }
741 for (++pos; pos < size && StringUtil::CharacterIsSpace(c: data[pos]); ++pos) {
742 continue;
743 }
744 for (++l; l < literal.size() && StringUtil::CharacterIsSpace(c: literal[l]); ++l) {
745 continue;
746 }
747 continue;
748 }
749 // literal does not match
750 if (data[pos++] != literal[l++]) {
751 error_message = "Literal does not match, expected " + literal;
752 error_position = pos;
753 return false;
754 }
755 }
756 if (i == specifiers.size()) {
757 break;
758 }
759 // now parse the specifier
760 if (numeric_width[i] > 0) {
761 // numeric specifier: parse a number
762 uint64_t number = 0;
763 size_t start_pos = pos;
764 size_t end_pos = start_pos + numeric_width[i];
765 while (pos < size && pos < end_pos && StringUtil::CharacterIsDigit(c: data[pos])) {
766 number = number * 10 + data[pos] - '0';
767 pos++;
768 }
769 if (pos == start_pos) {
770 // expected a number here
771 error_message = "Expected a number";
772 error_position = start_pos;
773 return false;
774 }
775 switch (specifiers[i]) {
776 case StrTimeSpecifier::DAY_OF_MONTH_PADDED:
777 case StrTimeSpecifier::DAY_OF_MONTH:
778 if (number < 1 || number > 31) {
779 error_message = "Day out of range, expected a value between 1 and 31";
780 error_position = start_pos;
781 return false;
782 }
783 // day of the month
784 result_data[2] = number;
785 offset_specifier = specifiers[i];
786 break;
787 case StrTimeSpecifier::MONTH_DECIMAL_PADDED:
788 case StrTimeSpecifier::MONTH_DECIMAL:
789 if (number < 1 || number > 12) {
790 error_message = "Month out of range, expected a value between 1 and 12";
791 error_position = start_pos;
792 return false;
793 }
794 // month number
795 result_data[1] = number;
796 offset_specifier = specifiers[i];
797 break;
798 case StrTimeSpecifier::YEAR_WITHOUT_CENTURY_PADDED:
799 case StrTimeSpecifier::YEAR_WITHOUT_CENTURY:
800 // year without century..
801 // Python uses 69 as a crossover point (i.e. >= 69 is 19.., < 69 is 20..)
802 if (number >= 100) {
803 // %y only supports numbers between [0..99]
804 error_message = "Year without century out of range, expected a value between 0 and 99";
805 error_position = start_pos;
806 return false;
807 }
808 if (number >= 69) {
809 result_data[0] = int32_t(1900 + number);
810 } else {
811 result_data[0] = int32_t(2000 + number);
812 }
813 break;
814 case StrTimeSpecifier::YEAR_DECIMAL:
815 // year as full number
816 result_data[0] = number;
817 break;
818 case StrTimeSpecifier::HOUR_24_PADDED:
819 case StrTimeSpecifier::HOUR_24_DECIMAL:
820 if (number >= 24) {
821 error_message = "Hour out of range, expected a value between 0 and 23";
822 error_position = start_pos;
823 return false;
824 }
825 // hour as full number
826 result_data[3] = number;
827 break;
828 case StrTimeSpecifier::HOUR_12_PADDED:
829 case StrTimeSpecifier::HOUR_12_DECIMAL:
830 if (number < 1 || number > 12) {
831 error_message = "Hour12 out of range, expected a value between 1 and 12";
832 error_position = start_pos;
833 return false;
834 }
835 // 12-hour number: start off by just storing the number
836 result_data[3] = number;
837 break;
838 case StrTimeSpecifier::MINUTE_PADDED:
839 case StrTimeSpecifier::MINUTE_DECIMAL:
840 if (number >= 60) {
841 error_message = "Minutes out of range, expected a value between 0 and 59";
842 error_position = start_pos;
843 return false;
844 }
845 // minutes
846 result_data[4] = number;
847 break;
848 case StrTimeSpecifier::SECOND_PADDED:
849 case StrTimeSpecifier::SECOND_DECIMAL:
850 if (number >= 60) {
851 error_message = "Seconds out of range, expected a value between 0 and 59";
852 error_position = start_pos;
853 return false;
854 }
855 // seconds
856 result_data[5] = number;
857 break;
858 case StrTimeSpecifier::MICROSECOND_PADDED:
859 D_ASSERT(number < 1000000ULL); // enforced by the length of the number
860 // milliseconds
861 result_data[6] = number;
862 break;
863 case StrTimeSpecifier::MILLISECOND_PADDED:
864 D_ASSERT(number < 1000ULL); // enforced by the length of the number
865 // milliseconds
866 result_data[6] = number * 1000;
867 break;
868 case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST:
869 case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST:
870 // m/d overrides WU/w but does not conflict
871 switch (offset_specifier) {
872 case StrTimeSpecifier::DAY_OF_MONTH_PADDED:
873 case StrTimeSpecifier::DAY_OF_MONTH:
874 case StrTimeSpecifier::MONTH_DECIMAL_PADDED:
875 case StrTimeSpecifier::MONTH_DECIMAL:
876 // Just validate, don't use
877 break;
878 case StrTimeSpecifier::WEEKDAY_DECIMAL:
879 // First offset specifier
880 offset_specifier = specifiers[i];
881 break;
882 default:
883 error_message = "Multiple year offsets specified";
884 error_position = start_pos;
885 return false;
886 }
887 if (number > 53) {
888 error_message = "Week out of range, expected a value between 0 and 53";
889 error_position = start_pos;
890 return false;
891 }
892 weekno = number;
893 break;
894 case StrTimeSpecifier::WEEKDAY_DECIMAL:
895 if (number > 6) {
896 error_message = "Weekday out of range, expected a value between 0 and 6";
897 error_position = start_pos;
898 return false;
899 }
900 weekday = number;
901 break;
902 case StrTimeSpecifier::DAY_OF_YEAR_PADDED:
903 case StrTimeSpecifier::DAY_OF_YEAR_DECIMAL:
904 // m/d overrides j but does not conflict
905 switch (offset_specifier) {
906 case StrTimeSpecifier::DAY_OF_MONTH_PADDED:
907 case StrTimeSpecifier::DAY_OF_MONTH:
908 case StrTimeSpecifier::MONTH_DECIMAL_PADDED:
909 case StrTimeSpecifier::MONTH_DECIMAL:
910 // Just validate, don't use
911 break;
912 case StrTimeSpecifier::WEEKDAY_DECIMAL:
913 // First offset specifier
914 offset_specifier = specifiers[i];
915 break;
916 default:
917 error_message = "Multiple year offsets specified";
918 error_position = start_pos;
919 return false;
920 }
921 if (number < 1 || number > 366) {
922 error_message = "Year day out of range, expected a value between 1 and 366";
923 error_position = start_pos;
924 return false;
925 }
926 yearday = number;
927 break;
928 default:
929 throw NotImplementedException("Unsupported specifier for strptime");
930 }
931 } else {
932 switch (specifiers[i]) {
933 case StrTimeSpecifier::AM_PM: {
934 // parse the next 2 characters
935 if (pos + 2 > size) {
936 // no characters left to parse
937 error_message = "Expected AM/PM";
938 error_position = pos;
939 return false;
940 }
941 char pa_char = char(std::tolower(c: data[pos]));
942 char m_char = char(std::tolower(c: data[pos + 1]));
943 if (m_char != 'm') {
944 error_message = "Expected AM/PM";
945 error_position = pos;
946 return false;
947 }
948 if (pa_char == 'p') {
949 ampm = TimeSpecifierAMOrPM::TIME_SPECIFIER_PM;
950 } else if (pa_char == 'a') {
951 ampm = TimeSpecifierAMOrPM::TIME_SPECIFIER_AM;
952 } else {
953 error_message = "Expected AM/PM";
954 error_position = pos;
955 return false;
956 }
957 pos += 2;
958 break;
959 }
960 // we parse weekday names, but we don't use them as information
961 case StrTimeSpecifier::ABBREVIATED_WEEKDAY_NAME:
962 if (TryParseCollection(data, pos, size, collection: Date::DAY_NAMES_ABBREVIATED, collection_count: 7) < 0) {
963 error_message = "Expected an abbreviated day name (Mon, Tue, Wed, Thu, Fri, Sat, Sun)";
964 error_position = pos;
965 return false;
966 }
967 break;
968 case StrTimeSpecifier::FULL_WEEKDAY_NAME:
969 if (TryParseCollection(data, pos, size, collection: Date::DAY_NAMES, collection_count: 7) < 0) {
970 error_message = "Expected a full day name (Monday, Tuesday, etc...)";
971 error_position = pos;
972 return false;
973 }
974 break;
975 case StrTimeSpecifier::ABBREVIATED_MONTH_NAME: {
976 int32_t month = TryParseCollection(data, pos, size, collection: Date::MONTH_NAMES_ABBREVIATED, collection_count: 12);
977 if (month < 0) {
978 error_message = "Expected an abbreviated month name (Jan, Feb, Mar, etc..)";
979 error_position = pos;
980 return false;
981 }
982 result_data[1] = month + 1;
983 break;
984 }
985 case StrTimeSpecifier::FULL_MONTH_NAME: {
986 int32_t month = TryParseCollection(data, pos, size, collection: Date::MONTH_NAMES, collection_count: 12);
987 if (month < 0) {
988 error_message = "Expected a full month name (January, February, etc...)";
989 error_position = pos;
990 return false;
991 }
992 result_data[1] = month + 1;
993 break;
994 }
995 case StrTimeSpecifier::UTC_OFFSET: {
996 int hour_offset, minute_offset;
997 if (!Timestamp::TryParseUTCOffset(str: data, pos, len: size, hour_offset, minute_offset)) {
998 error_message = "Expected +HH[MM] or -HH[MM]";
999 error_position = pos;
1000 return false;
1001 }
1002 result_data[7] = hour_offset * Interval::MINS_PER_HOUR + minute_offset;
1003 break;
1004 }
1005 case StrTimeSpecifier::TZ_NAME: {
1006 // skip leading spaces
1007 while (pos < size && StringUtil::CharacterIsSpace(c: data[pos])) {
1008 pos++;
1009 }
1010 const auto tz_begin = data + pos;
1011 // stop when we encounter a non-tz character
1012 while (pos < size && Timestamp::CharacterIsTimeZone(c: data[pos])) {
1013 pos++;
1014 }
1015 const auto tz_end = data + pos;
1016 // Can't fully validate without a list - caller's responsibility.
1017 // But tz must not be empty.
1018 if (tz_end == tz_begin) {
1019 error_message = "Empty Time Zone name";
1020 error_position = tz_begin - data;
1021 return false;
1022 }
1023 result.tz.assign(first: tz_begin, last: tz_end);
1024 break;
1025 }
1026 default:
1027 throw NotImplementedException("Unsupported specifier for strptime");
1028 }
1029 }
1030 }
1031 // skip trailing spaces
1032 while (pos < size && StringUtil::CharacterIsSpace(c: data[pos])) {
1033 pos++;
1034 }
1035 if (pos != size) {
1036 error_message = "Full specifier did not match: trailing characters";
1037 error_position = pos;
1038 return false;
1039 }
1040 if (ampm != TimeSpecifierAMOrPM::TIME_SPECIFIER_NONE) {
1041 if (result_data[3] > 12) {
1042 error_message =
1043 "Invalid hour: " + to_string(val: result_data[3]) + " AM/PM, expected an hour within the range [0..12]";
1044 return false;
1045 }
1046 // adjust the hours based on the AM or PM specifier
1047 if (ampm == TimeSpecifierAMOrPM::TIME_SPECIFIER_AM) {
1048 // AM: 12AM=0, 1AM=1, 2AM=2, ..., 11AM=11
1049 if (result_data[3] == 12) {
1050 result_data[3] = 0;
1051 }
1052 } else {
1053 // PM: 12PM=12, 1PM=13, 2PM=14, ..., 11PM=23
1054 if (result_data[3] != 12) {
1055 result_data[3] += 12;
1056 }
1057 }
1058 }
1059 switch (offset_specifier) {
1060 case StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST:
1061 case StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST: {
1062 // Adjust weekday to be 0-based for the week type
1063 weekday = (weekday + 7 - int(offset_specifier == StrTimeSpecifier::WEEK_NUMBER_PADDED_MON_FIRST)) % 7;
1064 // Get the start of week 1, move back 7 days and then weekno * 7 + weekday gives the date
1065 const auto jan1 = Date::FromDate(year: result_data[0], month: 1, day: 1);
1066 auto yeardate = Date::GetMondayOfCurrentWeek(date: jan1);
1067 yeardate -= int(offset_specifier == StrTimeSpecifier::WEEK_NUMBER_PADDED_SUN_FIRST);
1068 // Is there a week 0?
1069 yeardate -= 7 * int(yeardate >= jan1);
1070 yeardate += weekno * 7 + weekday;
1071 Date::Convert(date: yeardate, out_year&: result_data[0], out_month&: result_data[1], out_day&: result_data[2]);
1072 break;
1073 }
1074 case StrTimeSpecifier::DAY_OF_YEAR_PADDED:
1075 case StrTimeSpecifier::DAY_OF_YEAR_DECIMAL: {
1076 auto yeardate = Date::FromDate(year: result_data[0], month: 1, day: 1);
1077 yeardate += yearday - 1;
1078 Date::Convert(date: yeardate, out_year&: result_data[0], out_month&: result_data[1], out_day&: result_data[2]);
1079 break;
1080 }
1081 case StrTimeSpecifier::DAY_OF_MONTH_PADDED:
1082 case StrTimeSpecifier::DAY_OF_MONTH:
1083 case StrTimeSpecifier::MONTH_DECIMAL_PADDED:
1084 case StrTimeSpecifier::MONTH_DECIMAL:
1085 // m/d overrides UWw/j
1086 break;
1087 default:
1088 D_ASSERT(offset_specifier == StrTimeSpecifier::WEEKDAY_DECIMAL);
1089 break;
1090 }
1091
1092 return true;
1093}
1094
1095StrpTimeFormat::ParseResult StrpTimeFormat::Parse(const string &format_string, const string &text) {
1096 StrpTimeFormat format;
1097 format.format_specifier = format_string;
1098 string error = StrTimeFormat::ParseFormatSpecifier(format_string, format);
1099 if (!error.empty()) {
1100 throw InvalidInputException("Failed to parse format specifier %s: %s", format_string, error);
1101 }
1102 StrpTimeFormat::ParseResult result;
1103 if (!format.Parse(str: text, result)) {
1104 throw InvalidInputException("Failed to parse string \"%s\" with format specifier \"%s\"", text, format_string);
1105 }
1106 return result;
1107}
1108
1109string StrpTimeFormat::FormatStrpTimeError(const string &input, idx_t position) {
1110 if (position == DConstants::INVALID_INDEX) {
1111 return string();
1112 }
1113 return input + "\n" + string(position, ' ') + "^";
1114}
1115
1116date_t StrpTimeFormat::ParseResult::ToDate() {
1117 return Date::FromDate(year: data[0], month: data[1], day: data[2]);
1118}
1119
1120bool StrpTimeFormat::ParseResult::TryToDate(date_t &result) {
1121 return Date::TryFromDate(year: data[0], month: data[1], day: data[2], result);
1122}
1123
1124timestamp_t StrpTimeFormat::ParseResult::ToTimestamp() {
1125 date_t date = Date::FromDate(year: data[0], month: data[1], day: data[2]);
1126 const auto hour_offset = data[7] / Interval::MINS_PER_HOUR;
1127 const auto mins_offset = data[7] % Interval::MINS_PER_HOUR;
1128 dtime_t time = Time::FromTime(hour: data[3] - hour_offset, minute: data[4] - mins_offset, second: data[5], microseconds: data[6]);
1129 return Timestamp::FromDatetime(date, time);
1130}
1131
1132bool StrpTimeFormat::ParseResult::TryToTimestamp(timestamp_t &result) {
1133 date_t date;
1134 if (!TryToDate(result&: date)) {
1135 return false;
1136 }
1137 const auto hour_offset = data[7] / Interval::MINS_PER_HOUR;
1138 const auto mins_offset = data[7] % Interval::MINS_PER_HOUR;
1139 dtime_t time = Time::FromTime(hour: data[3] - hour_offset, minute: data[4] - mins_offset, second: data[5], microseconds: data[6]);
1140 return Timestamp::TryFromDatetime(date, time, result);
1141}
1142
1143string StrpTimeFormat::ParseResult::FormatError(string_t input, const string &format_specifier) {
1144 return StringUtil::Format(fmt_str: "Could not parse string \"%s\" according to format specifier \"%s\"\n%s\nError: %s",
1145 params: input.GetString(), params: format_specifier,
1146 params: FormatStrpTimeError(input: input.GetString(), position: error_position), params: error_message);
1147}
1148
1149bool StrpTimeFormat::TryParseDate(string_t input, date_t &result, string &error_message) {
1150 ParseResult parse_result;
1151 if (!Parse(str: input, result&: parse_result)) {
1152 error_message = parse_result.FormatError(input, format_specifier);
1153 return false;
1154 }
1155 return parse_result.TryToDate(result);
1156}
1157
1158bool StrpTimeFormat::TryParseTimestamp(string_t input, timestamp_t &result, string &error_message) {
1159 ParseResult parse_result;
1160 if (!Parse(str: input, result&: parse_result)) {
1161 error_message = parse_result.FormatError(input, format_specifier);
1162 return false;
1163 }
1164 return parse_result.TryToTimestamp(result);
1165}
1166
1167date_t StrpTimeFormat::ParseDate(string_t input) {
1168 ParseResult result;
1169 if (!Parse(str: input, result)) {
1170 throw InvalidInputException(result.FormatError(input, format_specifier));
1171 }
1172 return result.ToDate();
1173}
1174
1175timestamp_t StrpTimeFormat::ParseTimestamp(string_t input) {
1176 ParseResult result;
1177 if (!Parse(str: input, result)) {
1178 throw InvalidInputException(result.FormatError(input, format_specifier));
1179 }
1180 return result.ToTimestamp();
1181}
1182
1183} // namespace duckdb
1184