1#include <DataTypes/DataTypeString.h>
2#include <DataTypes/DataTypeDate.h>
3#include <DataTypes/DataTypeDateTime.h>
4#include <DataTypes/DataTypeDateTime64.h>
5#include <Columns/ColumnString.h>
6
7#include <Functions/IFunctionImpl.h>
8#include <Functions/FunctionHelpers.h>
9#include <Functions/FunctionFactory.h>
10#include <Functions/DateTimeTransforms.h>
11#include <Functions/extractTimeZoneFromFunctionArguments.h>
12
13#include <IO/WriteHelpers.h>
14
15#include <common/DateLUTImpl.h>
16#include <common/find_symbols.h>
17#include <Core/DecimalFunctions.h>
18
19#include <type_traits>
20
21
22namespace DB
23{
24
25namespace ErrorCodes
26{
27 extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
28 extern const int ILLEGAL_TYPE_OF_ARGUMENT;
29 extern const int NOT_IMPLEMENTED;
30 extern const int ILLEGAL_COLUMN;
31 extern const int BAD_ARGUMENTS;
32}
33
34namespace
35{
36// in private namespace to avoid GCC 9 error: "explicit specialization in non-namespace scope"
37template <typename DataType> struct ActionaValueTypeMap {};
38template <> struct ActionaValueTypeMap<DataTypeDate> { using ActionValueType = UInt16; };
39template <> struct ActionaValueTypeMap<DataTypeDateTime> { using ActionValueType = UInt32; };
40// TODO(vnemkov): once there is support for Int64 in LUT, make that Int64.
41// TODO(vnemkov): to add sub-second format instruction, make that DateTime64 and do some math in Action<T>.
42template <> struct ActionaValueTypeMap<DataTypeDateTime64> { using ActionValueType = UInt32; };
43}
44
45/** formatDateTime(time, 'pattern')
46 * Performs formatting of time, according to provided pattern.
47 *
48 * This function is optimized with an assumption, that the resulting strings are fixed width.
49 * (This assumption is fulfilled for currently supported formatting options).
50 *
51 * It is implemented in two steps.
52 * At first step, it creates a pattern of zeros, literal characters, whitespaces, etc.
53 * and quickly fills resulting character array (string column) with this pattern.
54 * At second step, it walks across the resulting character array and modifies/replaces specific charaters,
55 * by calling some functions by pointers and shifting cursor by specified amount.
56 *
57 * Advantages:
58 * - memcpy is mostly unrolled;
59 * - low number of arithmetic ops due to pre-filled pattern;
60 * - for somewhat reason, function by pointer call is faster than switch/case.
61 *
62 * Possible further optimization options:
63 * - slightly interleave first and second step for better cache locality
64 * (but it has no sense when character array fits in L1d cache);
65 * - avoid indirect function calls and inline functions with JIT compilation.
66 *
67 * Performance on Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz:
68 *
69 * WITH formatDateTime(now() + number, '%H:%M:%S') AS x SELECT count() FROM system.numbers WHERE NOT ignore(x);
70 * - 97 million rows per second per core;
71 *
72 * WITH formatDateTime(toDateTime('2018-01-01 00:00:00') + number, '%F %T') AS x SELECT count() FROM system.numbers WHERE NOT ignore(x)
73 * - 71 million rows per second per core;
74 *
75 * select count() from (select formatDateTime(t, '%m/%d/%Y %H:%M:%S') from (select toDateTime('2018-01-01 00:00:00')+number as t from numbers(100000000)));
76 * - 53 million rows per second per core;
77 *
78 * select count() from (select formatDateTime(t, 'Hello %Y World') from (select toDateTime('2018-01-01 00:00:00')+number as t from numbers(100000000)));
79 * - 138 million rows per second per core;
80 *
81 * PS. We can make this function to return FixedString. Currently it returns String.
82 */
83class FunctionFormatDateTime : public IFunction
84{
85private:
86 /// Time is either UInt32 for DateTime or UInt16 for Date.
87 template <typename Time>
88 class Action
89 {
90 public:
91 using Func = void (*)(char *, Time, const DateLUTImpl &);
92
93 Func func;
94 size_t shift;
95
96 Action(Func func_, size_t shift_ = 0) : func(func_), shift(shift_) {}
97
98 void perform(char *& target, Time source, const DateLUTImpl & timezone)
99 {
100 func(target, source, timezone);
101 target += shift;
102 }
103
104 private:
105 template <typename T>
106 static inline void writeNumber2(char * p, T v)
107 {
108 memcpy(p, &digits100[v * 2], 2);
109 }
110
111 template <typename T>
112 static inline void writeNumber3(char * p, T v)
113 {
114 writeNumber2(p, v / 10);
115 p[2] += v % 10;
116 }
117
118 template <typename T>
119 static inline void writeNumber4(char * p, T v)
120 {
121 writeNumber2(p, v / 100);
122 writeNumber2(p + 2, v % 100);
123 }
124
125 public:
126 static void noop(char *, Time, const DateLUTImpl &)
127 {
128 }
129
130 static void century(char * target, Time source, const DateLUTImpl & timezone)
131 {
132 auto year = ToYearImpl::execute(source, timezone);
133 auto century = year / 100;
134 writeNumber2(target, century);
135 }
136
137 static void dayOfMonth(char * target, Time source, const DateLUTImpl & timezone)
138 {
139 writeNumber2(target, ToDayOfMonthImpl::execute(source, timezone));
140 }
141
142 static void americanDate(char * target, Time source, const DateLUTImpl & timezone)
143 {
144 writeNumber2(target, ToMonthImpl::execute(source, timezone));
145 writeNumber2(target + 3, ToDayOfMonthImpl::execute(source, timezone));
146 writeNumber2(target + 6, ToYearImpl::execute(source, timezone) % 100);
147 }
148
149 static void dayOfMonthSpacePadded(char * target, Time source, const DateLUTImpl & timezone)
150 {
151 auto day = ToDayOfMonthImpl::execute(source, timezone);
152 if (day < 10)
153 target[1] += day;
154 else
155 writeNumber2(target, day);
156 }
157
158 static void ISO8601Date(char * target, Time source, const DateLUTImpl & timezone)
159 {
160 writeNumber4(target, ToYearImpl::execute(source, timezone));
161 writeNumber2(target + 5, ToMonthImpl::execute(source, timezone));
162 writeNumber2(target + 8, ToDayOfMonthImpl::execute(source, timezone));
163 }
164
165 static void dayOfYear(char * target, Time source, const DateLUTImpl & timezone)
166 {
167 writeNumber3(target, ToDayOfYearImpl::execute(source, timezone));
168 }
169
170 static void month(char * target, Time source, const DateLUTImpl & timezone)
171 {
172 writeNumber2(target, ToMonthImpl::execute(source, timezone));
173 }
174
175 static void dayOfWeek(char * target, Time source, const DateLUTImpl & timezone)
176 {
177 *target += ToDayOfWeekImpl::execute(source, timezone);
178 }
179
180 static void dayOfWeek0To6(char * target, Time source, const DateLUTImpl & timezone)
181 {
182 auto day = ToDayOfWeekImpl::execute(source, timezone);
183 *target += (day == 7 ? 0 : day);
184 }
185
186 static void ISO8601Week(char * target, Time source, const DateLUTImpl & timezone)
187 {
188 writeNumber2(target, ToISOWeekImpl::execute(source, timezone));
189 }
190
191 static void year2(char * target, Time source, const DateLUTImpl & timezone)
192 {
193 writeNumber2(target, ToYearImpl::execute(source, timezone) % 100);
194 }
195
196 static void year4(char * target, Time source, const DateLUTImpl & timezone)
197 {
198 writeNumber4(target, ToYearImpl::execute(source, timezone));
199 }
200
201 static void hour24(char * target, Time source, const DateLUTImpl & timezone)
202 {
203 writeNumber2(target, ToHourImpl::execute(source, timezone));
204 }
205
206 static void hour12(char * target, Time source, const DateLUTImpl & timezone)
207 {
208 auto x = ToHourImpl::execute(source, timezone);
209 writeNumber2(target, x == 0 ? 12 : (x > 12 ? x - 12 : x));
210 }
211
212 static void minute(char * target, Time source, const DateLUTImpl & timezone)
213 {
214 writeNumber2(target, ToMinuteImpl::execute(source, timezone));
215 }
216
217 static void AMPM(char * target, Time source, const DateLUTImpl & timezone)
218 {
219 auto hour = ToHourImpl::execute(source, timezone);
220 if (hour >= 12)
221 *target = 'P';
222 }
223
224 static void hhmm24(char * target, Time source, const DateLUTImpl & timezone)
225 {
226 writeNumber2(target, ToHourImpl::execute(source, timezone));
227 writeNumber2(target + 3, ToMinuteImpl::execute(source, timezone));
228 }
229
230 static void second(char * target, Time source, const DateLUTImpl & timezone)
231 {
232 writeNumber2(target, ToSecondImpl::execute(source, timezone));
233 }
234
235 static void ISO8601Time(char * target, Time source, const DateLUTImpl & timezone)
236 {
237 writeNumber2(target, ToHourImpl::execute(source, timezone));
238 writeNumber2(target + 3, ToMinuteImpl::execute(source, timezone));
239 writeNumber2(target + 6, ToSecondImpl::execute(source, timezone));
240 }
241 };
242
243public:
244 static constexpr auto name = "formatDateTime";
245
246 static FunctionPtr create(const Context &) { return std::make_shared<FunctionFormatDateTime>(); }
247
248 String getName() const override
249 {
250 return name;
251 }
252
253 bool useDefaultImplementationForConstants() const override { return true; }
254
255 ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
256
257 bool isVariadic() const override { return true; }
258 size_t getNumberOfArguments() const override { return 0; }
259
260 DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
261 {
262 if (arguments.size() != 2 && arguments.size() != 3)
263 throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
264 + toString(arguments.size()) + ", should be 2 or 3",
265 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
266
267 if (!WhichDataType(arguments[0].type).isDateOrDateTime())
268 throw Exception("Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() +
269 ". Should be a date or a date with time", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
270
271 if (!WhichDataType(arguments[1].type).isString())
272 throw Exception("Illegal type " + arguments[1].type->getName() + " of 2 argument of function " + getName() + ". Must be String.",
273 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
274
275 if (arguments.size() == 3)
276 {
277 if (!WhichDataType(arguments[2].type).isString())
278 throw Exception("Illegal type " + arguments[2].type->getName() + " of 3 argument of function " + getName() + ". Must be String.",
279 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
280 }
281
282 return std::make_shared<DataTypeString>();
283 }
284
285 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
286 {
287 if (!executeType<DataTypeDate>(block, arguments, result)
288 && !executeType<DataTypeDateTime>(block, arguments, result)
289 && !executeType<DataTypeDateTime64>(block, arguments, result))
290 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
291 + " of function " + getName() + ", must be Date or DateTime",
292 ErrorCodes::ILLEGAL_COLUMN);
293 }
294
295 template <typename DataType>
296 bool executeType(Block & block, const ColumnNumbers & arguments, size_t result)
297 {
298 auto * times = checkAndGetColumn<typename DataType::ColumnType>(block.getByPosition(arguments[0]).column.get());
299 if (!times)
300 return false;
301
302 const ColumnConst * pattern_column = checkAndGetColumnConst<ColumnString>(block.getByPosition(arguments[1]).column.get());
303 if (!pattern_column)
304 throw Exception("Illegal column " + block.getByPosition(arguments[1]).column->getName()
305 + " of second ('format') argument of function " + getName()
306 + ". Must be constant string.",
307 ErrorCodes::ILLEGAL_COLUMN);
308
309 String pattern = pattern_column->getValue<String>();
310
311 using T = typename ActionaValueTypeMap<DataType>::ActionValueType;
312 std::vector<Action<T>> instructions;
313 String pattern_to_fill = parsePattern(pattern, instructions);
314 size_t result_size = pattern_to_fill.size();
315
316 const DateLUTImpl * time_zone_tmp = nullptr;
317 if (arguments.size() == 3)
318 time_zone_tmp = &extractTimeZoneFromFunctionArguments(block, arguments, 2, 0);
319 else
320 time_zone_tmp = &DateLUT::instance();
321
322 const DateLUTImpl & time_zone = *time_zone_tmp;
323
324 const auto & vec = times->getData();
325
326 UInt32 scale [[maybe_unused]] = 0;
327 if constexpr (std::is_same_v<DataType, DataTypeDateTime64>)
328 {
329 scale = vec.getScale();
330 }
331
332 auto col_res = ColumnString::create();
333 auto & dst_data = col_res->getChars();
334 auto & dst_offsets = col_res->getOffsets();
335 dst_data.resize(vec.size() * (result_size + 1));
336 dst_offsets.resize(vec.size());
337
338 /// Fill result with literals.
339 {
340 UInt8 * begin = dst_data.data();
341 UInt8 * end = begin + dst_data.size();
342 UInt8 * pos = begin;
343
344 if (pos < end)
345 {
346 memcpy(pos, pattern_to_fill.data(), result_size + 1); /// With zero terminator.
347 pos += result_size + 1;
348 }
349
350 /// Fill by copying exponential growing ranges.
351 while (pos < end)
352 {
353 size_t bytes_to_copy = std::min(pos - begin, end - pos);
354 memcpy(pos, begin, bytes_to_copy);
355 pos += bytes_to_copy;
356 }
357 }
358
359 auto begin = reinterpret_cast<char *>(dst_data.data());
360 auto pos = begin;
361
362 for (size_t i = 0; i < vec.size(); ++i)
363 {
364 if constexpr (std::is_same_v<DataType, DataTypeDateTime64>)
365 {
366 for (auto & instruction : instructions)
367 {
368 // since right now LUT does not support Int64-values and not format instructions for subsecond parts,
369 // treat DatTime64 values just as DateTime values by ignoring fractional and casting to UInt32.
370 const auto c = DecimalUtils::split(vec[i], scale);
371 instruction.perform(pos, static_cast<UInt32>(c.whole), time_zone);
372 }
373 }
374 else
375 {
376 for (auto & instruction : instructions)
377 instruction.perform(pos, vec[i], time_zone);
378 }
379
380 dst_offsets[i] = pos - begin;
381 }
382
383 dst_data.resize(pos - begin);
384 block.getByPosition(result).column = std::move(col_res);
385 return true;
386 }
387
388 template <typename T>
389 String parsePattern(const String & pattern, std::vector<Action<T>> & instructions) const
390 {
391 String result;
392
393 const char * pos = pattern.data();
394 const char * end = pos + pattern.size();
395
396 /// Add shift to previous action; or if there were none, add noop action with shift.
397 auto addShift = [&](size_t amount)
398 {
399 if (instructions.empty())
400 instructions.emplace_back(&Action<T>::noop);
401 instructions.back().shift += amount;
402 };
403
404 /// If the argument was DateTime, add instruction for printing. If it was date, just shift (the buffer is pre-filled with default values).
405 auto addInstructionOrShift = [&](typename Action<T>::Func func [[maybe_unused]], size_t shift)
406 {
407 if constexpr (std::is_same_v<T, UInt32>)
408 instructions.emplace_back(func, shift);
409 else
410 addShift(shift);
411 };
412
413 while (true)
414 {
415 const char * percent_pos = find_first_symbols<'%'>(pos, end);
416
417 if (percent_pos < end)
418 {
419 if (pos < percent_pos)
420 {
421 result.append(pos, percent_pos);
422 addShift(percent_pos - pos);
423 }
424
425 pos = percent_pos + 1;
426
427 if (pos >= end)
428 throw Exception("Sign '%' is the last in pattern, if you need it, use '%%'", ErrorCodes::BAD_ARGUMENTS);
429
430 switch (*pos)
431 {
432 // Year, divided by 100, zero-padded
433 case 'C':
434 instructions.emplace_back(&Action<T>::century, 2);
435 result.append("00");
436 break;
437
438 // Day of month, zero-padded (01-31)
439 case 'd':
440 instructions.emplace_back(&Action<T>::dayOfMonth, 2);
441 result.append("00");
442 break;
443
444 // Short MM/DD/YY date, equivalent to %m/%d/%y
445 case 'D':
446 instructions.emplace_back(&Action<T>::americanDate, 8);
447 result.append("00/00/00");
448 break;
449
450 // Day of month, space-padded ( 1-31) 23
451 case 'e':
452 instructions.emplace_back(&Action<T>::dayOfMonthSpacePadded, 2);
453 result.append(" 0");
454 break;
455
456 // Short YYYY-MM-DD date, equivalent to %Y-%m-%d 2001-08-23
457 case 'F':
458 instructions.emplace_back(&Action<T>::ISO8601Date, 10);
459 result.append("0000-00-00");
460 break;
461
462 // Day of the year (001-366) 235
463 case 'j':
464 instructions.emplace_back(&Action<T>::dayOfYear, 3);
465 result.append("000");
466 break;
467
468 // Month as a decimal number (01-12)
469 case 'm':
470 instructions.emplace_back(&Action<T>::month, 2);
471 result.append("00");
472 break;
473
474 // ISO 8601 weekday as number with Monday as 1 (1-7)
475 case 'u':
476 instructions.emplace_back(&Action<T>::dayOfWeek, 1);
477 result.append("0");
478 break;
479
480 // ISO 8601 week number (01-53)
481 case 'V':
482 instructions.emplace_back(&Action<T>::ISO8601Week, 2);
483 result.append("00");
484 break;
485
486 // Weekday as a decimal number with Sunday as 0 (0-6) 4
487 case 'w':
488 instructions.emplace_back(&Action<T>::dayOfWeek0To6, 1);
489 result.append("0");
490 break;
491
492 // Two digits year
493 case 'y':
494 instructions.emplace_back(&Action<T>::year2, 2);
495 result.append("00");
496 break;
497
498 // Four digits year
499 case 'Y':
500 instructions.emplace_back(&Action<T>::year4, 4);
501 result.append("0000");
502 break;
503
504 /// Time components. If the argument is Date, not a DateTime, then this components will have default value.
505
506 // Minute (00-59)
507 case 'M':
508 addInstructionOrShift(&Action<T>::minute, 2);
509 result.append("00");
510 break;
511
512 // AM or PM
513 case 'p':
514 addInstructionOrShift(&Action<T>::AMPM, 2);
515 result.append("AM");
516 break;
517
518 // 24-hour HH:MM time, equivalent to %H:%M 14:55
519 case 'R':
520 addInstructionOrShift(&Action<T>::hhmm24, 5);
521 result.append("00:00");
522 break;
523
524 // Seconds
525 case 'S':
526 addInstructionOrShift(&Action<T>::second, 2);
527 result.append("00");
528 break;
529
530 // ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S 14:55:02
531 case 'T':
532 addInstructionOrShift(&Action<T>::ISO8601Time, 8);
533 result.append("00:00:00");
534 break;
535
536 // Hour in 24h format (00-23)
537 case 'H':
538 addInstructionOrShift(&Action<T>::hour24, 2);
539 result.append("00");
540 break;
541
542 // Hour in 12h format (01-12)
543 case 'I':
544 addInstructionOrShift(&Action<T>::hour12, 2);
545 result.append("12");
546 break;
547
548 /// Escaped literal characters.
549 case '%':
550 result += '%';
551 addShift(1);
552 break;
553 case 't':
554 result += '\t';
555 addShift(1);
556 break;
557 case 'n':
558 result += '\n';
559 addShift(1);
560 break;
561
562 // Unimplemented
563 case 'U': [[fallthrough]];
564 case 'W':
565 throw Exception("Wrong pattern '" + pattern + "', symbol '" + *pos + " is not implemented ' for function " + getName(),
566 ErrorCodes::NOT_IMPLEMENTED);
567
568 default:
569 throw Exception(
570 "Wrong pattern '" + pattern + "', unexpected symbol '" + *pos + "' for function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
571 }
572
573 ++pos;
574 }
575 else
576 {
577 result.append(pos, end);
578 addShift(end + 1 - pos); /// including zero terminator
579 break;
580 }
581 }
582
583 return result;
584 }
585};
586
587void registerFunctionFormatDateTime(FunctionFactory & factory)
588{
589 factory.registerFunction<FunctionFormatDateTime>();
590}
591
592}
593