1#pragma once
2
3#include <ext/enumerate.h>
4#include <ext/collection_cast.h>
5#include <ext/range.h>
6#include <type_traits>
7
8#include <IO/WriteBufferFromVector.h>
9#include <IO/ReadBufferFromMemory.h>
10#include <IO/Operators.h>
11#include <IO/parseDateTimeBestEffort.h>
12#include <DataTypes/DataTypeFactory.h>
13#include <DataTypes/DataTypesNumber.h>
14#include <DataTypes/DataTypesDecimal.h>
15#include <DataTypes/DataTypeString.h>
16#include <DataTypes/DataTypeFixedString.h>
17#include <DataTypes/DataTypeDate.h>
18#include <DataTypes/DataTypeDateTime.h>
19#include <DataTypes/DataTypeDateTime64.h>
20#include <DataTypes/DataTypeEnum.h>
21#include <DataTypes/DataTypeArray.h>
22#include <DataTypes/DataTypeTuple.h>
23#include <DataTypes/DataTypeNullable.h>
24#include <DataTypes/DataTypeNothing.h>
25#include <DataTypes/DataTypeUUID.h>
26#include <DataTypes/DataTypeInterval.h>
27#include <DataTypes/DataTypeAggregateFunction.h>
28#include <Formats/FormatSettings.h>
29#include <Columns/ColumnString.h>
30#include <Columns/ColumnFixedString.h>
31#include <Columns/ColumnConst.h>
32#include <Columns/ColumnArray.h>
33#include <Columns/ColumnNullable.h>
34#include <Columns/ColumnTuple.h>
35#include <Columns/ColumnsCommon.h>
36#include <Common/FieldVisitors.h>
37#include <Common/assert_cast.h>
38#include <Interpreters/ExpressionActions.h>
39#include <Functions/IFunctionAdaptors.h>
40#include <Functions/FunctionsMiscellaneous.h>
41#include <Functions/FunctionHelpers.h>
42#include <Functions/DateTimeTransforms.h>
43#include <DataTypes/DataTypeLowCardinality.h>
44#include <Columns/ColumnLowCardinality.h>
45
46
47namespace DB
48{
49
50namespace ErrorCodes
51{
52 extern const int ATTEMPT_TO_READ_AFTER_EOF;
53 extern const int CANNOT_PARSE_NUMBER;
54 extern const int CANNOT_READ_ARRAY_FROM_TEXT;
55 extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
56 extern const int CANNOT_PARSE_QUOTED_STRING;
57 extern const int CANNOT_PARSE_ESCAPE_SEQUENCE;
58 extern const int CANNOT_PARSE_DATE;
59 extern const int CANNOT_PARSE_DATETIME;
60 extern const int CANNOT_PARSE_TEXT;
61 extern const int CANNOT_PARSE_UUID;
62 extern const int TOO_LARGE_STRING_SIZE;
63 extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
64 extern const int LOGICAL_ERROR;
65 extern const int TYPE_MISMATCH;
66 extern const int CANNOT_CONVERT_TYPE;
67 extern const int ILLEGAL_COLUMN;
68 extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
69 extern const int ILLEGAL_TYPE_OF_ARGUMENT;
70 extern const int NOT_IMPLEMENTED;
71 extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN;
72}
73
74
75/** Type conversion functions.
76 * toType - conversion in "natural way";
77 */
78
79inline UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column)
80{
81 const auto * arg_type = named_column.type.get();
82 bool ok = checkAndGetDataType<DataTypeUInt64>(arg_type)
83 || checkAndGetDataType<DataTypeUInt32>(arg_type)
84 || checkAndGetDataType<DataTypeUInt16>(arg_type)
85 || checkAndGetDataType<DataTypeUInt8>(arg_type);
86 if (!ok)
87 throw Exception("Illegal type of toDecimal() scale " + named_column.type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
88
89 Field field;
90 named_column.column->get(0, field);
91 return field.get<UInt32>();
92}
93
94
95/** Conversion of number types to each other, enums to numbers, dates and datetimes to numbers and back: done by straight assignment.
96 * (Date is represented internally as number of days from some day; DateTime - as unix timestamp)
97 */
98template <typename FromDataType, typename ToDataType, typename Name>
99struct ConvertImpl
100{
101 using FromFieldType = typename FromDataType::FieldType;
102 using ToFieldType = typename ToDataType::FieldType;
103
104 template <typename Additions = void *>
105 static void NO_SANITIZE_UNDEFINED execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/,
106 Additions additions [[maybe_unused]] = Additions())
107 {
108 const ColumnWithTypeAndName & named_from = block.getByPosition(arguments[0]);
109
110 using ColVecFrom = typename FromDataType::ColumnType;
111 using ColVecTo = typename ToDataType::ColumnType;
112
113 if constexpr ((IsDataTypeDecimal<FromDataType> || IsDataTypeDecimal<ToDataType>)
114 && !(std::is_same_v<DataTypeDateTime64, FromDataType> || std::is_same_v<DataTypeDateTime64, ToDataType>))
115 {
116 if constexpr (!IsDataTypeDecimalOrNumber<FromDataType> || !IsDataTypeDecimalOrNumber<ToDataType>)
117 {
118 throw Exception("Illegal column " + named_from.column->getName() + " of first argument of function " + Name::name,
119 ErrorCodes::ILLEGAL_COLUMN);
120 }
121 }
122
123 if (const ColVecFrom * col_from = checkAndGetColumn<ColVecFrom>(named_from.column.get()))
124 {
125 typename ColVecTo::MutablePtr col_to = nullptr;
126 if constexpr (IsDataTypeDecimal<ToDataType>)
127 {
128 UInt32 scale = additions;
129 col_to = ColVecTo::create(0, scale);
130 }
131 else
132 col_to = ColVecTo::create();
133
134 const auto & vec_from = col_from->getData();
135 auto & vec_to = col_to->getData();
136 size_t size = vec_from.size();
137 vec_to.resize(size);
138
139 for (size_t i = 0; i < size; ++i)
140 {
141 if constexpr (IsDataTypeDecimal<FromDataType> || IsDataTypeDecimal<ToDataType>)
142 {
143 if constexpr (IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>)
144 vec_to[i] = convertDecimals<FromDataType, ToDataType>(vec_from[i], vec_from.getScale(), vec_to.getScale());
145 else if constexpr (IsDataTypeDecimal<FromDataType> && IsDataTypeNumber<ToDataType>)
146 vec_to[i] = convertFromDecimal<FromDataType, ToDataType>(vec_from[i], vec_from.getScale());
147 else if constexpr (IsDataTypeNumber<FromDataType> && IsDataTypeDecimal<ToDataType>)
148 vec_to[i] = convertToDecimal<FromDataType, ToDataType>(vec_from[i], vec_to.getScale());
149 }
150 else
151 vec_to[i] = static_cast<ToFieldType>(vec_from[i]);
152 }
153
154 block.getByPosition(result).column = std::move(col_to);
155 }
156 else
157 throw Exception("Illegal column " + named_from.column->getName() + " of first argument of function " + Name::name,
158 ErrorCodes::ILLEGAL_COLUMN);
159 }
160};
161
162/** Conversion of DateTime to Date: throw off time component.
163 */
164template <typename Name> struct ConvertImpl<DataTypeDateTime, DataTypeDate, Name>
165 : DateTimeTransformImpl<DataTypeDateTime, DataTypeDate, ToDateImpl> {};
166
167
168/** Conversion of Date to DateTime: adding 00:00:00 time component.
169 */
170struct ToDateTimeImpl
171{
172 static constexpr auto name = "toDateTime";
173
174 static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone)
175 {
176 return time_zone.fromDayNum(DayNum(d));
177 }
178
179 // no-op conversion from DateTime to DateTime, used in DateTime64 to DateTime conversion.
180 static inline UInt32 execute(UInt32 d, const DateLUTImpl & /*time_zone*/)
181 {
182 return d;
183 }
184};
185
186template <typename Name> struct ConvertImpl<DataTypeDate, DataTypeDateTime, Name>
187 : DateTimeTransformImpl<DataTypeDate, DataTypeDateTime, ToDateTimeImpl> {};
188
189/// Implementation of toDate function.
190
191template <typename FromType, typename ToType>
192struct ToDateTransform32Or64
193{
194 static constexpr auto name = "toDate";
195
196 static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
197 {
198 return (from < 0xFFFF) ? from : time_zone.toDayNum(from);
199 }
200};
201
202/** Special case of converting (U)Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to Date.
203 * If number is less than 65536, then it is treated as DayNum, and if greater or equals, then as unix timestamp.
204 * It's a bit illogical, as we actually have two functions in one.
205 * But allows to support frequent case,
206 * when user write toDate(UInt32), expecting conversion of unix timestamp to Date.
207 * (otherwise such usage would be frequent mistake).
208 */
209template <typename Name> struct ConvertImpl<DataTypeUInt32, DataTypeDate, Name>
210 : DateTimeTransformImpl<DataTypeUInt32, DataTypeDate, ToDateTransform32Or64<UInt32, UInt16>> {};
211template <typename Name> struct ConvertImpl<DataTypeUInt64, DataTypeDate, Name>
212 : DateTimeTransformImpl<DataTypeUInt64, DataTypeDate, ToDateTransform32Or64<UInt64, UInt16>> {};
213template <typename Name> struct ConvertImpl<DataTypeInt32, DataTypeDate, Name>
214 : DateTimeTransformImpl<DataTypeInt32, DataTypeDate, ToDateTransform32Or64<Int32, UInt16>> {};
215template <typename Name> struct ConvertImpl<DataTypeInt64, DataTypeDate, Name>
216 : DateTimeTransformImpl<DataTypeInt64, DataTypeDate, ToDateTransform32Or64<Int64, UInt16>> {};
217template <typename Name> struct ConvertImpl<DataTypeFloat32, DataTypeDate, Name>
218 : DateTimeTransformImpl<DataTypeFloat32, DataTypeDate, ToDateTransform32Or64<Float32, UInt16>> {};
219template <typename Name> struct ConvertImpl<DataTypeFloat64, DataTypeDate, Name>
220 : DateTimeTransformImpl<DataTypeFloat64, DataTypeDate, ToDateTransform32Or64<Float64, UInt16>> {};
221
222
223/** Conversion of Date or DateTime to DateTime64: add zero sub-second part.
224 */
225struct ToDateTime64Transform
226{
227 static constexpr auto name = "toDateTime64";
228
229 const DateTime64::NativeType scale_multiplier = 1;
230
231 ToDateTime64Transform(UInt32 scale = 0)
232 : scale_multiplier(DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale))
233 {}
234
235 inline DateTime64::NativeType execute(UInt16 d, const DateLUTImpl & time_zone) const
236 {
237 const auto dt = ToDateTimeImpl::execute(d, time_zone);
238 return execute(dt, time_zone);
239 }
240
241 inline DateTime64::NativeType execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) const
242 {
243 return DecimalUtils::decimalFromComponentsWithMultiplier<DateTime64>(dt, 0, scale_multiplier);
244 }
245};
246
247template <typename Name> struct ConvertImpl<DataTypeDate, DataTypeDateTime64, Name>
248 : DateTimeTransformImpl<DataTypeDate, DataTypeDateTime64, ToDateTime64Transform> {};
249template <typename Name> struct ConvertImpl<DataTypeDateTime, DataTypeDateTime64, Name>
250 : DateTimeTransformImpl<DataTypeDateTime, DataTypeDateTime64, ToDateTime64Transform> {};
251
252/** Conversion of DateTime64 to Date or DateTime: discards fractional part.
253 */
254template <typename Transform>
255struct FromDateTime64Transform
256{
257 static constexpr auto name = Transform::name;
258
259 const DateTime64::NativeType scale_multiplier = 1;
260
261 FromDateTime64Transform(UInt32 scale)
262 : scale_multiplier(DecimalUtils::scaleMultiplier<DateTime64::NativeType>(scale))
263 {}
264
265 inline auto execute(DateTime64::NativeType dt, const DateLUTImpl & time_zone) const
266 {
267 const auto c = DecimalUtils::splitWithScaleMultiplier(DateTime64(dt), scale_multiplier);
268 return Transform::execute(static_cast<UInt32>(c.whole), time_zone);
269 }
270};
271
272template <typename Name> struct ConvertImpl<DataTypeDateTime64, DataTypeDate, Name>
273 : DateTimeTransformImpl<DataTypeDateTime64, DataTypeDate, FromDateTime64Transform<ToDateImpl>> {};
274template <typename Name> struct ConvertImpl<DataTypeDateTime64, DataTypeDateTime, Name>
275 : DateTimeTransformImpl<DataTypeDateTime64, DataTypeDateTime, FromDateTime64Transform<ToDateTimeImpl>> {};
276
277
278/** Transformation of numbers, dates, datetimes to strings: through formatting.
279 */
280template <typename DataType>
281struct FormatImpl
282{
283 static void execute(const typename DataType::FieldType x, WriteBuffer & wb, const DataType *, const DateLUTImpl *)
284 {
285 writeText(x, wb);
286 }
287};
288
289template <>
290struct FormatImpl<DataTypeDate>
291{
292 static void execute(const DataTypeDate::FieldType x, WriteBuffer & wb, const DataTypeDate *, const DateLUTImpl *)
293 {
294 writeDateText(DayNum(x), wb);
295 }
296};
297
298template <>
299struct FormatImpl<DataTypeDateTime>
300{
301 static void execute(const DataTypeDateTime::FieldType x, WriteBuffer & wb, const DataTypeDateTime *, const DateLUTImpl * time_zone)
302 {
303 writeDateTimeText(x, wb, *time_zone);
304 }
305};
306
307template <>
308struct FormatImpl<DataTypeDateTime64>
309{
310 static void execute(const DataTypeDateTime64::FieldType x, WriteBuffer & wb, const DataTypeDateTime64 * type, const DateLUTImpl * time_zone)
311 {
312 writeDateTimeText(DateTime64(x), type->getScale(), wb, *time_zone);
313 }
314};
315
316
317template <typename FieldType>
318struct FormatImpl<DataTypeEnum<FieldType>>
319{
320 static void execute(const FieldType x, WriteBuffer & wb, const DataTypeEnum<FieldType> * type, const DateLUTImpl *)
321 {
322 writeString(type->getNameForValue(x), wb);
323 }
324};
325
326template <typename FieldType>
327struct FormatImpl<DataTypeDecimal<FieldType>>
328{
329 static void execute(const FieldType x, WriteBuffer & wb, const DataTypeDecimal<FieldType> * type, const DateLUTImpl *)
330 {
331 writeText(x, type->getScale(), wb);
332 }
333};
334
335
336/// DataTypeEnum<T> to DataType<T> free conversion
337template <typename FieldType, typename Name>
338struct ConvertImpl<DataTypeEnum<FieldType>, DataTypeNumber<FieldType>, Name>
339{
340 static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
341 {
342 block.getByPosition(result).column = block.getByPosition(arguments[0]).column;
343 }
344};
345
346
347template <typename FromDataType, typename Name>
348struct ConvertImpl<FromDataType, std::enable_if_t<!std::is_same_v<FromDataType, DataTypeString>, DataTypeString>, Name>
349{
350 using FromFieldType = typename FromDataType::FieldType;
351 using ColVecType = std::conditional_t<IsDecimalNumber<FromFieldType>, ColumnDecimal<FromFieldType>, ColumnVector<FromFieldType>>;
352
353 static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
354 {
355 const auto & col_with_type_and_name = block.getByPosition(arguments[0]);
356 const auto & type = static_cast<const FromDataType &>(*col_with_type_and_name.type);
357
358 const DateLUTImpl * time_zone = nullptr;
359
360 /// For argument of DateTime type, second argument with time zone could be specified.
361 if constexpr (std::is_same_v<FromDataType, DataTypeDateTime> || std::is_same_v<FromDataType, DataTypeDateTime64>)
362 time_zone = &extractTimeZoneFromFunctionArguments(block, arguments, 1, 0);
363
364 if (const auto col_from = checkAndGetColumn<ColVecType>(col_with_type_and_name.column.get()))
365 {
366 auto col_to = ColumnString::create();
367
368 const typename ColVecType::Container & vec_from = col_from->getData();
369 ColumnString::Chars & data_to = col_to->getChars();
370 ColumnString::Offsets & offsets_to = col_to->getOffsets();
371 size_t size = vec_from.size();
372
373 if constexpr (std::is_same_v<FromDataType, DataTypeDate>)
374 data_to.resize(size * (strlen("YYYY-MM-DD") + 1));
375 else if constexpr (std::is_same_v<FromDataType, DataTypeDateTime>)
376 data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss") + 1));
377 else if constexpr (std::is_same_v<FromDataType, DataTypeDateTime64>)
378 data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss.") + vec_from.getScale() + 1));
379 else
380 data_to.resize(size * 3); /// Arbitary
381
382 offsets_to.resize(size);
383
384 WriteBufferFromVector<ColumnString::Chars> write_buffer(data_to);
385
386 for (size_t i = 0; i < size; ++i)
387 {
388 FormatImpl<FromDataType>::execute(vec_from[i], write_buffer, &type, time_zone);
389 writeChar(0, write_buffer);
390 offsets_to[i] = write_buffer.count();
391 }
392
393 write_buffer.finish();
394 block.getByPosition(result).column = std::move(col_to);
395 }
396 else
397 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
398 + " of first argument of function " + Name::name,
399 ErrorCodes::ILLEGAL_COLUMN);
400 }
401};
402
403
404/// Generic conversion of any type to String.
405struct ConvertImplGenericToString
406{
407 static void execute(Block & block, const ColumnNumbers & arguments, size_t result)
408 {
409 const auto & col_with_type_and_name = block.getByPosition(arguments[0]);
410 const IDataType & type = *col_with_type_and_name.type;
411 const IColumn & col_from = *col_with_type_and_name.column;
412
413 size_t size = col_from.size();
414
415 auto col_to = ColumnString::create();
416
417 ColumnString::Chars & data_to = col_to->getChars();
418 ColumnString::Offsets & offsets_to = col_to->getOffsets();
419
420 data_to.resize(size * 2); /// Using coefficient 2 for initial size is arbitrary.
421 offsets_to.resize(size);
422
423 WriteBufferFromVector<ColumnString::Chars> write_buffer(data_to);
424
425 FormatSettings format_settings;
426 for (size_t i = 0; i < size; ++i)
427 {
428 type.serializeAsText(col_from, i, write_buffer, format_settings);
429 writeChar(0, write_buffer);
430 offsets_to[i] = write_buffer.count();
431 }
432
433 write_buffer.finish();
434 block.getByPosition(result).column = std::move(col_to);
435 }
436};
437
438
439/** Conversion of strings to numbers, dates, datetimes: through parsing.
440 */
441template <typename DataType>
442void parseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *)
443{
444 readText(x, rb);
445}
446
447template <>
448inline void parseImpl<DataTypeDate>(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl *)
449{
450 DayNum tmp(0);
451 readDateText(tmp, rb);
452 x = tmp;
453}
454
455template <>
456inline void parseImpl<DataTypeDateTime>(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone)
457{
458 time_t tmp = 0;
459 readDateTimeText(tmp, rb, *time_zone);
460 x = tmp;
461}
462
463template <>
464inline void parseImpl<DataTypeUUID>(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *)
465{
466 UUID tmp;
467 readText(tmp, rb);
468 x = tmp;
469}
470
471
472template <typename DataType>
473bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *)
474{
475 if constexpr (std::is_floating_point_v<typename DataType::FieldType>)
476 return tryReadFloatText(x, rb);
477 else /*if constexpr (is_integral_v<typename DataType::FieldType>)*/
478 return tryReadIntText(x, rb);
479}
480
481template <>
482inline bool tryParseImpl<DataTypeDate>(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl *)
483{
484 DayNum tmp(0);
485 if (!tryReadDateText(tmp, rb))
486 return false;
487 x = tmp;
488 return true;
489}
490
491template <>
492inline bool tryParseImpl<DataTypeDateTime>(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone)
493{
494 time_t tmp = 0;
495 if (!tryReadDateTimeText(tmp, rb, *time_zone))
496 return false;
497 x = tmp;
498 return true;
499}
500
501
502/** Throw exception with verbose message when string value is not parsed completely.
503 */
504[[noreturn]] inline void throwExceptionForIncompletelyParsedValue(ReadBuffer & read_buffer, Block & block, size_t result)
505{
506 const IDataType & to_type = *block.getByPosition(result).type;
507
508 WriteBufferFromOwnString message_buf;
509 message_buf << "Cannot parse string " << quote << String(read_buffer.buffer().begin(), read_buffer.buffer().size())
510 << " as " << to_type.getName()
511 << ": syntax error";
512
513 if (read_buffer.offset())
514 message_buf << " at position " << read_buffer.offset()
515 << " (parsed just " << quote << String(read_buffer.buffer().begin(), read_buffer.offset()) << ")";
516 else
517 message_buf << " at begin of string";
518
519 if (isNativeNumber(to_type))
520 message_buf << ". Note: there are to" << to_type.getName() << "OrZero and to" << to_type.getName() << "OrNull functions, which returns zero/NULL instead of throwing exception.";
521
522 throw Exception(message_buf.str(), ErrorCodes::CANNOT_PARSE_TEXT);
523}
524
525
526enum class ConvertFromStringExceptionMode
527{
528 Throw, /// Throw exception if value cannot be parsed.
529 Zero, /// Fill with zero or default if value cannot be parsed.
530 Null /// Return ColumnNullable with NULLs when value cannot be parsed.
531};
532
533enum class ConvertFromStringParsingMode
534{
535 Normal,
536 BestEffort /// Only applicable for DateTime. Will use sophisticated method, that is slower.
537};
538
539template <typename FromDataType, typename ToDataType, typename Name,
540 ConvertFromStringExceptionMode exception_mode, ConvertFromStringParsingMode parsing_mode>
541struct ConvertThroughParsing
542{
543 static_assert(std::is_same_v<FromDataType, DataTypeString> || std::is_same_v<FromDataType, DataTypeFixedString>,
544 "ConvertThroughParsing is only applicable for String or FixedString data types");
545
546 static constexpr bool to_datetime64 = std::is_same_v<ToDataType, DataTypeDateTime64>;
547
548 using ToFieldType = typename ToDataType::FieldType;
549
550 static bool isAllRead(ReadBuffer & in)
551 {
552 /// In case of FixedString, skip zero bytes at end.
553 if constexpr (std::is_same_v<FromDataType, DataTypeFixedString>)
554 while (!in.eof() && *in.position() == 0)
555 ++in.position();
556
557 if (in.eof())
558 return true;
559
560 /// Special case, that allows to parse string with DateTime as Date.
561 if (std::is_same_v<ToDataType, DataTypeDate> && (in.buffer().size()) == strlen("YYYY-MM-DD hh:mm:ss"))
562 return true;
563
564 return false;
565 }
566
567 template <typename Additions = void *>
568 static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count,
569 Additions additions [[maybe_unused]] = Additions())
570 {
571 using ColVecTo = typename ToDataType::ColumnType;
572
573 const DateLUTImpl * local_time_zone [[maybe_unused]] = nullptr;
574 const DateLUTImpl * utc_time_zone [[maybe_unused]] = nullptr;
575
576 /// For conversion to DateTime type, second argument with time zone could be specified.
577 if constexpr (std::is_same_v<ToDataType, DataTypeDateTime> || to_datetime64)
578 {
579 const auto result_type = removeNullable(block.getByPosition(result).type);
580 // Time zone is already figured out during result type resultion, no need to do it here.
581 if (const auto dt_col = checkAndGetDataType<ToDataType>(result_type.get()))
582 local_time_zone = &dt_col->getTimeZone();
583 else
584 {
585 local_time_zone = &extractTimeZoneFromFunctionArguments(block, arguments, 1, 0);
586 }
587
588 if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort)
589 utc_time_zone = &DateLUT::instance("UTC");
590 }
591
592 const IColumn * col_from = block.getByPosition(arguments[0]).column.get();
593 const ColumnString * col_from_string = checkAndGetColumn<ColumnString>(col_from);
594 const ColumnFixedString * col_from_fixed_string = checkAndGetColumn<ColumnFixedString>(col_from);
595
596 if (std::is_same_v<FromDataType, DataTypeString> && !col_from_string)
597 throw Exception("Illegal column " + col_from->getName()
598 + " of first argument of function " + Name::name,
599 ErrorCodes::ILLEGAL_COLUMN);
600
601 if (std::is_same_v<FromDataType, DataTypeFixedString> && !col_from_fixed_string)
602 throw Exception("Illegal column " + col_from->getName()
603 + " of first argument of function " + Name::name,
604 ErrorCodes::ILLEGAL_COLUMN);
605
606 size_t size = input_rows_count;
607 typename ColVecTo::MutablePtr col_to = nullptr;
608
609 if constexpr (IsDataTypeDecimal<ToDataType>)
610 {
611 UInt32 scale = additions;
612 if constexpr (to_datetime64)
613 {
614 ToDataType check_bounds_in_ctor(scale, local_time_zone ? local_time_zone->getTimeZone() : String{});
615 }
616 else
617 {
618 ToDataType check_bounds_in_ctor(ToDataType::maxPrecision(), scale);
619 }
620 col_to = ColVecTo::create(size, scale);
621 }
622 else
623 col_to = ColVecTo::create(size);
624
625 typename ColVecTo::Container & vec_to = col_to->getData();
626
627 ColumnUInt8::MutablePtr col_null_map_to;
628 ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr;
629 if constexpr (exception_mode == ConvertFromStringExceptionMode::Null)
630 {
631 col_null_map_to = ColumnUInt8::create(size);
632 vec_null_map_to = &col_null_map_to->getData();
633 }
634
635 const ColumnString::Chars * chars = nullptr;
636 const IColumn::Offsets * offsets = nullptr;
637 size_t fixed_string_size = 0;
638
639 if constexpr (std::is_same_v<FromDataType, DataTypeString>)
640 {
641 chars = &col_from_string->getChars();
642 offsets = &col_from_string->getOffsets();
643 }
644 else
645 {
646 chars = &col_from_fixed_string->getChars();
647 fixed_string_size = col_from_fixed_string->getN();
648 }
649
650 size_t current_offset = 0;
651
652 for (size_t i = 0; i < size; ++i)
653 {
654 size_t next_offset = std::is_same_v<FromDataType, DataTypeString> ? (*offsets)[i] : (current_offset + fixed_string_size);
655 size_t string_size = std::is_same_v<FromDataType, DataTypeString> ? next_offset - current_offset - 1 : fixed_string_size;
656
657 ReadBufferFromMemory read_buffer(&(*chars)[current_offset], string_size);
658
659 if constexpr (exception_mode == ConvertFromStringExceptionMode::Throw)
660 {
661 if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort)
662 {
663 if constexpr (to_datetime64)
664 {
665 DateTime64 res = 0;
666 parseDateTime64BestEffort(res, vec_to.getScale(), read_buffer, *local_time_zone, *utc_time_zone);
667 vec_to[i] = res;
668 }
669 else
670 {
671 time_t res;
672 parseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone);
673 vec_to[i] = res;
674 }
675 }
676 else
677 {
678 if constexpr (to_datetime64)
679 {
680 DateTime64 value = 0;
681 readDateTime64Text(value, vec_to.getScale(), read_buffer, *local_time_zone);
682 vec_to[i] = value;
683 }
684 else if constexpr (IsDataTypeDecimal<ToDataType>)
685 ToDataType::readText(vec_to[i], read_buffer, ToDataType::maxPrecision(), vec_to.getScale());
686 else
687 parseImpl<ToDataType>(vec_to[i], read_buffer, local_time_zone);
688 }
689
690 if (!isAllRead(read_buffer))
691 throwExceptionForIncompletelyParsedValue(read_buffer, block, result);
692 }
693 else
694 {
695 bool parsed;
696
697 if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort)
698 {
699 if constexpr (to_datetime64)
700 {
701 DateTime64 res = 0;
702 parsed = tryParseDateTime64BestEffort(res, vec_to.getScale(), read_buffer, *local_time_zone, *utc_time_zone);
703 vec_to[i] = res;
704 }
705 else
706 {
707 time_t res;
708 parsed = tryParseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone);
709 vec_to[i] = res;
710 }
711 }
712 else
713 {
714 if constexpr (to_datetime64)
715 {
716 DateTime64 value = 0;
717 parsed = tryReadDateTime64Text(value, vec_to.getScale(), read_buffer, *local_time_zone);
718 vec_to[i] = value;
719 }
720 else if constexpr (IsDataTypeDecimal<ToDataType>)
721 parsed = ToDataType::tryReadText(vec_to[i], read_buffer, ToDataType::maxPrecision(), vec_to.getScale());
722 else
723 parsed = tryParseImpl<ToDataType>(vec_to[i], read_buffer, local_time_zone);
724
725 parsed = parsed && isAllRead(read_buffer);
726 }
727
728 if (!parsed)
729 vec_to[i] = 0;
730
731 if constexpr (exception_mode == ConvertFromStringExceptionMode::Null)
732 (*vec_null_map_to)[i] = !parsed;
733 }
734
735 current_offset = next_offset;
736 }
737
738 if constexpr (exception_mode == ConvertFromStringExceptionMode::Null)
739 block.getByPosition(result).column = ColumnNullable::create(std::move(col_to), std::move(col_null_map_to));
740 else
741 block.getByPosition(result).column = std::move(col_to);
742 }
743};
744
745
746template <typename ToDataType, typename Name>
747struct ConvertImpl<std::enable_if_t<!std::is_same_v<ToDataType, DataTypeString>, DataTypeString>, ToDataType, Name>
748 : ConvertThroughParsing<DataTypeString, ToDataType, Name, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::Normal> {};
749
750template <typename ToDataType, typename Name>
751struct ConvertImpl<std::enable_if_t<!std::is_same_v<ToDataType, DataTypeFixedString>, DataTypeFixedString>, ToDataType, Name>
752 : ConvertThroughParsing<DataTypeFixedString, ToDataType, Name, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::Normal> {};
753
754/// Generic conversion of any type from String. Used for complex types: Array and Tuple.
755struct ConvertImplGenericFromString
756{
757 static void execute(Block & block, const ColumnNumbers & arguments, size_t result)
758 {
759 const IColumn & col_from = *block.getByPosition(arguments[0]).column;
760 size_t size = col_from.size();
761
762 const IDataType & data_type_to = *block.getByPosition(result).type;
763
764 if (const ColumnString * col_from_string = checkAndGetColumn<ColumnString>(&col_from))
765 {
766 auto res = data_type_to.createColumn();
767
768 IColumn & column_to = *res;
769 column_to.reserve(size);
770
771 const ColumnString::Chars & chars = col_from_string->getChars();
772 const IColumn::Offsets & offsets = col_from_string->getOffsets();
773
774 size_t current_offset = 0;
775
776 FormatSettings format_settings;
777 for (size_t i = 0; i < size; ++i)
778 {
779 ReadBufferFromMemory read_buffer(&chars[current_offset], offsets[i] - current_offset - 1);
780
781 data_type_to.deserializeAsWholeText(column_to, read_buffer, format_settings);
782
783 if (!read_buffer.eof())
784 throwExceptionForIncompletelyParsedValue(read_buffer, block, result);
785
786 current_offset = offsets[i];
787 }
788
789 block.getByPosition(result).column = std::move(res);
790 }
791 else
792 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
793 + " of first argument of conversion function from string",
794 ErrorCodes::ILLEGAL_COLUMN);
795 }
796};
797
798
799/// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type.
800struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; };
801
802template <>
803struct ConvertImpl<DataTypeString, DataTypeUInt32, NameToUnixTimestamp>
804 : ConvertImpl<DataTypeString, DataTypeDateTime, NameToUnixTimestamp> {};
805
806
807/** If types are identical, just take reference to column.
808 */
809template <typename T, typename Name>
810struct ConvertImpl<std::enable_if_t<!T::is_parametric, T>, T, Name>
811{
812 static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
813 {
814 block.getByPosition(result).column = block.getByPosition(arguments[0]).column;
815 }
816};
817
818
819/** Conversion from FixedString to String.
820 * Cutting sequences of zero bytes from end of strings.
821 */
822template <typename Name>
823struct ConvertImpl<DataTypeFixedString, DataTypeString, Name>
824{
825 static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
826 {
827 if (const ColumnFixedString * col_from = checkAndGetColumn<ColumnFixedString>(block.getByPosition(arguments[0]).column.get()))
828 {
829 auto col_to = ColumnString::create();
830
831 const ColumnFixedString::Chars & data_from = col_from->getChars();
832 ColumnString::Chars & data_to = col_to->getChars();
833 ColumnString::Offsets & offsets_to = col_to->getOffsets();
834 size_t size = col_from->size();
835 size_t n = col_from->getN();
836 data_to.resize(size * (n + 1)); /// + 1 - zero terminator
837 offsets_to.resize(size);
838
839 size_t offset_from = 0;
840 size_t offset_to = 0;
841 for (size_t i = 0; i < size; ++i)
842 {
843 size_t bytes_to_copy = n;
844 while (bytes_to_copy > 0 && data_from[offset_from + bytes_to_copy - 1] == 0)
845 --bytes_to_copy;
846
847 memcpy(&data_to[offset_to], &data_from[offset_from], bytes_to_copy);
848 offset_from += n;
849 offset_to += bytes_to_copy;
850 data_to[offset_to] = 0;
851 ++offset_to;
852 offsets_to[i] = offset_to;
853 }
854
855 data_to.resize(offset_to);
856 block.getByPosition(result).column = std::move(col_to);
857 }
858 else
859 throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
860 + " of first argument of function " + Name::name,
861 ErrorCodes::ILLEGAL_COLUMN);
862 }
863};
864
865
866/// Declared early because used below.
867struct NameToDate { static constexpr auto name = "toDate"; };
868struct NameToDateTime { static constexpr auto name = "toDateTime"; };
869struct NameToDateTime64 { static constexpr auto name = "toDateTime64"; };
870struct NameToString { static constexpr auto name = "toString"; };
871struct NameToDecimal32 { static constexpr auto name = "toDecimal32"; };
872struct NameToDecimal64 { static constexpr auto name = "toDecimal64"; };
873struct NameToDecimal128 { static constexpr auto name = "toDecimal128"; };
874
875
876#define DEFINE_NAME_TO_INTERVAL(INTERVAL_KIND) \
877 struct NameToInterval ## INTERVAL_KIND \
878 { \
879 static constexpr auto name = "toInterval" #INTERVAL_KIND; \
880 static constexpr auto kind = IntervalKind::INTERVAL_KIND; \
881 };
882
883DEFINE_NAME_TO_INTERVAL(Second)
884DEFINE_NAME_TO_INTERVAL(Minute)
885DEFINE_NAME_TO_INTERVAL(Hour)
886DEFINE_NAME_TO_INTERVAL(Day)
887DEFINE_NAME_TO_INTERVAL(Week)
888DEFINE_NAME_TO_INTERVAL(Month)
889DEFINE_NAME_TO_INTERVAL(Quarter)
890DEFINE_NAME_TO_INTERVAL(Year)
891
892#undef DEFINE_NAME_TO_INTERVAL
893
894
895template <typename ToDataType, typename Name, typename MonotonicityImpl>
896class FunctionConvert : public IFunction
897{
898public:
899 using Monotonic = MonotonicityImpl;
900
901 static constexpr auto name = Name::name;
902 static constexpr bool to_decimal =
903 std::is_same_v<Name, NameToDecimal32> || std::is_same_v<Name, NameToDecimal64> || std::is_same_v<Name, NameToDecimal128>;
904
905 static constexpr bool to_datetime64 = std::is_same_v<ToDataType, DataTypeDateTime64>;
906
907 static FunctionPtr create(const Context &) { return std::make_shared<FunctionConvert>(); }
908 static FunctionPtr create() { return std::make_shared<FunctionConvert>(); }
909
910 String getName() const override
911 {
912 return name;
913 }
914
915 bool isVariadic() const override { return true; }
916 size_t getNumberOfArguments() const override { return 0; }
917 bool isInjective(const Block &) override { return std::is_same_v<Name, NameToString>; }
918
919 DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
920 {
921 FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}};
922 FunctionArgumentDescriptors optional_args;
923
924 if constexpr (to_decimal || to_datetime64)
925 {
926 mandatory_args.push_back({"scale", &isNativeInteger, &isColumnConst, "const Integer"});
927 }
928 // toString(DateTime or DateTime64, [timezone: String])
929 if ((std::is_same_v<Name, NameToString> && arguments.size() > 0 && (isDateTime64(arguments[0].type) || isDateTime(arguments[0].type)))
930 // toUnixTimestamp(value[, timezone : String])
931 || std::is_same_v<Name, NameToUnixTimestamp>
932 // toDate(value[, timezone : String])
933 || std::is_same_v<ToDataType, DataTypeDate> // TODO: shall we allow timestamp argument for toDate? DateTime knows nothing about timezones and this arument is ignored below.
934 // toDateTime(value[, timezone: String])
935 || std::is_same_v<ToDataType, DataTypeDateTime>
936 // toDateTime64(value, scale : Integer[, timezone: String])
937 || std::is_same_v<ToDataType, DataTypeDateTime64>)
938 {
939 optional_args.push_back({"timezone", &isString, &isColumnConst, "const String"});
940 }
941
942 validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
943
944 if constexpr (std::is_same_v<ToDataType, DataTypeInterval>)
945 {
946 return std::make_shared<DataTypeInterval>(Name::kind);
947 }
948 else if constexpr (to_decimal)
949 {
950// if (!arguments[1].column)
951// throw Exception("Second argument for function " + getName() + " must be constant", ErrorCodes::ILLEGAL_COLUMN);
952
953 UInt64 scale = extractToDecimalScale(arguments[1]);
954
955 if constexpr (std::is_same_v<Name, NameToDecimal32>)
956 return createDecimal<DataTypeDecimal>(9, scale);
957 else if constexpr (std::is_same_v<Name, NameToDecimal64>)
958 return createDecimal<DataTypeDecimal>(18, scale);
959 else if constexpr (std::is_same_v<Name, NameToDecimal128>)
960 return createDecimal<DataTypeDecimal>(38, scale);
961
962 throw Exception("Someting wrong with toDecimalNN()", ErrorCodes::LOGICAL_ERROR);
963 }
964 else
965 {
966 // Optional second argument with time zone for DateTime.
967 UInt8 timezone_arg_position = 1;
968 UInt32 scale [[maybe_unused]] = DataTypeDateTime64::default_scale;
969
970 // DateTime64 requires more arguments: scale and timezone. Since timezone is optional, scale should be first.
971 if constexpr (to_datetime64)
972 {
973 timezone_arg_position += 1;
974 scale = static_cast<UInt32>(arguments[1].column->get64(0));
975 }
976
977 if constexpr (std::is_same_v<ToDataType, DataTypeDateTime>)
978 return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0));
979 else if constexpr (to_datetime64)
980 return std::make_shared<DataTypeDateTime64>(scale, extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0));
981 else
982 return std::make_shared<ToDataType>();
983 }
984 }
985
986 bool useDefaultImplementationForConstants() const override { return true; }
987 ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
988 bool canBeExecutedOnDefaultArguments() const override { return false; }
989
990 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
991 {
992 try
993 {
994 executeInternal(block, arguments, result, input_rows_count);
995 }
996 catch (Exception & e)
997 {
998 /// More convenient error message.
999 if (e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF)
1000 {
1001 e.addMessage("Cannot parse "
1002 + block.getByPosition(result).type->getName() + " from "
1003 + block.getByPosition(arguments[0]).type->getName()
1004 + ", because value is too short");
1005 }
1006 else if (e.code() == ErrorCodes::CANNOT_PARSE_NUMBER
1007 || e.code() == ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT
1008 || e.code() == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED
1009 || e.code() == ErrorCodes::CANNOT_PARSE_QUOTED_STRING
1010 || e.code() == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE
1011 || e.code() == ErrorCodes::CANNOT_PARSE_DATE
1012 || e.code() == ErrorCodes::CANNOT_PARSE_DATETIME
1013 || e.code() == ErrorCodes::CANNOT_PARSE_UUID)
1014 {
1015 e.addMessage("Cannot parse "
1016 + block.getByPosition(result).type->getName() + " from "
1017 + block.getByPosition(arguments[0]).type->getName());
1018 }
1019
1020 throw;
1021 }
1022 }
1023
1024 bool hasInformationAboutMonotonicity() const override
1025 {
1026 return Monotonic::has();
1027 }
1028
1029 Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override
1030 {
1031 return Monotonic::get(type, left, right);
1032 }
1033
1034private:
1035 void executeInternal(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count)
1036 {
1037 if (!arguments.size())
1038 throw Exception{"Function " + getName() + " expects at least 1 arguments",
1039 ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION};
1040
1041 const IDataType * from_type = block.getByPosition(arguments[0]).type.get();
1042
1043 auto call = [&](const auto & types) -> bool
1044 {
1045 using Types = std::decay_t<decltype(types)>;
1046 using LeftDataType = typename Types::LeftType;
1047 using RightDataType = typename Types::RightType;
1048
1049 if constexpr (IsDataTypeDecimal<RightDataType>)
1050 {
1051 if constexpr (std::is_same_v<RightDataType, DataTypeDateTime64>)
1052 {
1053 // account for optional timezone argument
1054 if (arguments.size() != 2 && arguments.size() != 3)
1055 throw Exception{"Function " + getName() + " expects 2 or 3 arguments for DataTypeDateTime64.",
1056 ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION};
1057 }
1058 else if (arguments.size() != 2)
1059 {
1060 throw Exception{"Function " + getName() + " expects 2 arguments for Decimal.",
1061 ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION};
1062 }
1063
1064 const ColumnWithTypeAndName & scale_column = block.getByPosition(arguments[1]);
1065 UInt32 scale = extractToDecimalScale(scale_column);
1066
1067 ConvertImpl<LeftDataType, RightDataType, Name>::execute(block, arguments, result, input_rows_count, scale);
1068 }
1069 else if constexpr (IsDataTypeDateOrDateTime<RightDataType> && std::is_same_v<LeftDataType, DataTypeDateTime64>)
1070 {
1071 const auto * dt64 = assert_cast<const DataTypeDateTime64 *>(block.getByPosition(arguments[0]).type.get());
1072 ConvertImpl<LeftDataType, RightDataType, Name>::execute(block, arguments, result, input_rows_count, dt64->getScale());
1073 }
1074 else
1075 ConvertImpl<LeftDataType, RightDataType, Name>::execute(block, arguments, result, input_rows_count);
1076
1077 return true;
1078 };
1079
1080 bool done = callOnIndexAndDataType<ToDataType>(from_type->getTypeId(), call);
1081 if (!done)
1082 {
1083 /// Generic conversion of any type to String.
1084 if (std::is_same_v<ToDataType, DataTypeString>)
1085 {
1086 ConvertImplGenericToString::execute(block, arguments, result);
1087 }
1088 else
1089 throw Exception("Illegal type " + block.getByPosition(arguments[0]).type->getName() + " of argument of function " + getName(),
1090 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
1091 }
1092 }
1093};
1094
1095
1096/** Function toTOrZero (where T is number of date or datetime type):
1097 * try to convert from String to type T through parsing,
1098 * if cannot parse, return default value instead of throwing exception.
1099 * Function toTOrNull will return Nullable type with NULL when cannot parse.
1100 * NOTE Also need to implement tryToUnixTimestamp with timezone.
1101 */
1102template <typename ToDataType, typename Name,
1103 ConvertFromStringExceptionMode exception_mode,
1104 ConvertFromStringParsingMode parsing_mode = ConvertFromStringParsingMode::Normal>
1105class FunctionConvertFromString : public IFunction
1106{
1107public:
1108 static constexpr auto name = Name::name;
1109 static constexpr bool to_decimal =
1110 std::is_same_v<ToDataType, DataTypeDecimal<Decimal32>> ||
1111 std::is_same_v<ToDataType, DataTypeDecimal<Decimal64>> ||
1112 std::is_same_v<ToDataType, DataTypeDecimal<Decimal128>>;
1113
1114 static FunctionPtr create(const Context &) { return std::make_shared<FunctionConvertFromString>(); }
1115 static FunctionPtr create() { return std::make_shared<FunctionConvertFromString>(); }
1116
1117 String getName() const override
1118 {
1119 return name;
1120 }
1121
1122 bool isVariadic() const override { return true; }
1123 size_t getNumberOfArguments() const override { return 0; }
1124
1125 bool useDefaultImplementationForConstants() const override { return true; }
1126 ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
1127
1128 DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
1129 {
1130 if ((arguments.size() != 1 && arguments.size() != 2) || (to_decimal && arguments.size() != 2))
1131 throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) +
1132 ", should be 1 or 2. Second argument only make sense for DateTime (time zone, optional) and Decimal (scale).",
1133 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
1134
1135 if (!isStringOrFixedString(arguments[0].type))
1136 {
1137 if (this->getName().find("OrZero") != std::string::npos ||
1138 this->getName().find("OrNull") != std::string::npos)
1139 throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() +
1140 ". Conversion functions with postfix 'OrZero' or 'OrNull' should take String argument",
1141 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
1142 else
1143 throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName(),
1144 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
1145 }
1146
1147 if (arguments.size() == 2)
1148 {
1149 if constexpr (std::is_same_v<ToDataType, DataTypeDateTime>)
1150 {
1151 if (!isString(arguments[1].type))
1152 throw Exception("Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName(),
1153 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
1154 }
1155 else if constexpr (to_decimal)
1156 {
1157 if (!isInteger(arguments[1].type))
1158 throw Exception("Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName(),
1159 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
1160 if (!arguments[1].column)
1161 throw Exception("Second argument for function " + getName() + " must be constant", ErrorCodes::ILLEGAL_COLUMN);
1162 }
1163 else
1164 {
1165 throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
1166 + toString(arguments.size()) + ", should be 1. Second argument makes sense only for DateTime and Decimal.",
1167 ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
1168 }
1169 }
1170
1171 DataTypePtr res;
1172
1173 if constexpr (std::is_same_v<ToDataType, DataTypeDateTime>)
1174 res = std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0));
1175 else if constexpr (to_decimal)
1176 {
1177 UInt64 scale = extractToDecimalScale(arguments[1]);
1178
1179 if constexpr (std::is_same_v<ToDataType, DataTypeDecimal<Decimal32>>)
1180 res = createDecimal<DataTypeDecimal>(9, scale);
1181 else if constexpr (std::is_same_v<ToDataType, DataTypeDecimal<Decimal64>>)
1182 res = createDecimal<DataTypeDecimal>(18, scale);
1183 else if constexpr (std::is_same_v<ToDataType, DataTypeDecimal<Decimal128>>)
1184 res = createDecimal<DataTypeDecimal>(38, scale);
1185
1186 if (!res)
1187 throw Exception("Someting wrong with toDecimalNNOrZero() or toDecimalNNOrNull()", ErrorCodes::LOGICAL_ERROR);
1188 }
1189 else if constexpr (std::is_same_v<ToDataType, DataTypeDateTime64>)
1190 {
1191 UInt64 scale = DataTypeDateTime64::default_scale;
1192 if (arguments.size() > 1)
1193 scale = extractToDecimalScale(arguments[1]);
1194 const auto timezone = extractTimeZoneNameFromFunctionArguments(arguments, 2, 0);
1195 res = std::make_shared<DataTypeDateTime64>(scale, timezone);
1196 }
1197 else
1198 res = std::make_shared<ToDataType>();
1199
1200 if constexpr (exception_mode == ConvertFromStringExceptionMode::Null)
1201 res = std::make_shared<DataTypeNullable>(res);
1202
1203 return res;
1204 }
1205
1206 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
1207 {
1208 const IDataType * from_type = block.getByPosition(arguments[0]).type.get();
1209
1210 bool ok = true;
1211 if constexpr (to_decimal || std::is_same_v<ToDataType, DataTypeDateTime64>)
1212 {
1213 if (arguments.size() != 2)
1214 throw Exception{"Function " + getName() + " expects 2 arguments for Decimal.", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION};
1215
1216 UInt32 scale = extractToDecimalScale(block.getByPosition(arguments[1]));
1217
1218 if (checkAndGetDataType<DataTypeString>(from_type))
1219 {
1220 ConvertThroughParsing<DataTypeString, ToDataType, Name, exception_mode, parsing_mode>::execute(
1221 block, arguments, result, input_rows_count, scale);
1222 }
1223 else if (checkAndGetDataType<DataTypeFixedString>(from_type))
1224 {
1225 ConvertThroughParsing<DataTypeFixedString, ToDataType, Name, exception_mode, parsing_mode>::execute(
1226 block, arguments, result, input_rows_count, scale);
1227 }
1228 else
1229 ok = false;
1230 }
1231 else
1232 {
1233 if (checkAndGetDataType<DataTypeString>(from_type))
1234 {
1235 ConvertThroughParsing<DataTypeString, ToDataType, Name, exception_mode, parsing_mode>::execute(
1236 block, arguments, result, input_rows_count);
1237 }
1238 else if (checkAndGetDataType<DataTypeFixedString>(from_type))
1239 {
1240 ConvertThroughParsing<DataTypeFixedString, ToDataType, Name, exception_mode, parsing_mode>::execute(
1241 block, arguments, result, input_rows_count);
1242 }
1243 else
1244 ok = false;
1245
1246 }
1247
1248 if (!ok)
1249 throw Exception("Illegal type " + block.getByPosition(arguments[0]).type->getName() + " of argument of function " + getName()
1250 + ". Only String or FixedString argument is accepted for try-conversion function."
1251 + " For other arguments, use function without 'orZero' or 'orNull'.",
1252 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
1253 }
1254};
1255
1256
1257/** Conversion to fixed string is implemented only for strings.
1258 */
1259class FunctionToFixedString : public IFunction
1260{
1261public:
1262 static constexpr auto name = "toFixedString";
1263 static FunctionPtr create(const Context &) { return std::make_shared<FunctionToFixedString>(); }
1264 static FunctionPtr create() { return std::make_shared<FunctionToFixedString>(); }
1265
1266 String getName() const override
1267 {
1268 return name;
1269 }
1270
1271 size_t getNumberOfArguments() const override { return 2; }
1272 bool isInjective(const Block &) override { return true; }
1273
1274 DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
1275 {
1276 if (!isUnsignedInteger(arguments[1].type))
1277 throw Exception("Second argument for function " + getName() + " must be unsigned integer", ErrorCodes::ILLEGAL_COLUMN);
1278 if (!arguments[1].column)
1279 throw Exception("Second argument for function " + getName() + " must be constant", ErrorCodes::ILLEGAL_COLUMN);
1280 if (!isStringOrFixedString(arguments[0].type))
1281 throw Exception(getName() + " is only implemented for types String and FixedString", ErrorCodes::NOT_IMPLEMENTED);
1282
1283 const size_t n = arguments[1].column->getUInt(0);
1284 return std::make_shared<DataTypeFixedString>(n);
1285 }
1286
1287 bool useDefaultImplementationForConstants() const override { return true; }
1288 ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
1289
1290 void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
1291 {
1292 const auto n = block.getByPosition(arguments[1]).column->getUInt(0);
1293 return executeForN(block, arguments, result, n);
1294 }
1295
1296 static void executeForN(Block & block, const ColumnNumbers & arguments, const size_t result, const size_t n)
1297 {
1298 const auto & column = block.getByPosition(arguments[0]).column;
1299
1300 if (const auto column_string = checkAndGetColumn<ColumnString>(column.get()))
1301 {
1302 auto column_fixed = ColumnFixedString::create(n);
1303
1304 auto & out_chars = column_fixed->getChars();
1305 const auto & in_chars = column_string->getChars();
1306 const auto & in_offsets = column_string->getOffsets();
1307
1308 out_chars.resize_fill(in_offsets.size() * n);
1309
1310 for (size_t i = 0; i < in_offsets.size(); ++i)
1311 {
1312 const size_t off = i ? in_offsets[i - 1] : 0;
1313 const size_t len = in_offsets[i] - off - 1;
1314 if (len > n)
1315 throw Exception("String too long for type FixedString(" + toString(n) + ")",
1316 ErrorCodes::TOO_LARGE_STRING_SIZE);
1317 memcpy(&out_chars[i * n], &in_chars[off], len);
1318 }
1319
1320 block.getByPosition(result).column = std::move(column_fixed);
1321 }
1322 else if (const auto column_fixed_string = checkAndGetColumn<ColumnFixedString>(column.get()))
1323 {
1324 const auto src_n = column_fixed_string->getN();
1325 if (src_n > n)
1326 throw Exception{"String too long for type FixedString(" + toString(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE};
1327
1328 auto column_fixed = ColumnFixedString::create(n);
1329
1330 auto & out_chars = column_fixed->getChars();
1331 const auto & in_chars = column_fixed_string->getChars();
1332 const auto size = column_fixed_string->size();
1333 out_chars.resize_fill(size * n);
1334
1335 for (const auto i : ext::range(0, size))
1336 memcpy(&out_chars[i * n], &in_chars[i * src_n], src_n);
1337
1338 block.getByPosition(result).column = std::move(column_fixed);
1339 }
1340 else
1341 throw Exception("Unexpected column: " + column->getName(), ErrorCodes::ILLEGAL_COLUMN);
1342 }
1343};
1344
1345
1346/// Monotonicity.
1347
1348struct PositiveMonotonicity
1349{
1350 static bool has() { return true; }
1351 static IFunction::Monotonicity get(const IDataType &, const Field &, const Field &)
1352 {
1353 return { true };
1354 }
1355};
1356
1357struct UnknownMonotonicity
1358{
1359 static bool has() { return false; }
1360 static IFunction::Monotonicity get(const IDataType &, const Field &, const Field &)
1361 {
1362 return { false };
1363 }
1364};
1365
1366template <typename T>
1367struct ToNumberMonotonicity
1368{
1369 static bool has() { return true; }
1370
1371 static UInt64 divideByRangeOfType(UInt64 x)
1372 {
1373 if constexpr (sizeof(T) < sizeof(UInt64))
1374 return x >> (sizeof(T) * 8);
1375 else
1376 return 0;
1377 }
1378
1379 static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right)
1380 {
1381 if (!type.isValueRepresentedByNumber())
1382 return {};
1383
1384 /// If type is same, the conversion is always monotonic.
1385 /// (Enum has separate case, because it is different data type)
1386 if (checkAndGetDataType<DataTypeNumber<T>>(&type) ||
1387 checkAndGetDataType<DataTypeEnum<T>>(&type))
1388 return { true, true, true };
1389
1390 /// Float cases.
1391
1392 /// When converting to Float, the conversion is always monotonic.
1393 if (std::is_floating_point_v<T>)
1394 return {true, true, true};
1395
1396 /// If converting from Float, for monotonicity, arguments must fit in range of result type.
1397 if (WhichDataType(type).isFloat())
1398 {
1399 if (left.isNull() || right.isNull())
1400 return {};
1401
1402 Float64 left_float = left.get<Float64>();
1403 Float64 right_float = right.get<Float64>();
1404
1405 if (left_float >= std::numeric_limits<T>::min() && left_float <= std::numeric_limits<T>::max()
1406 && right_float >= std::numeric_limits<T>::min() && right_float <= std::numeric_limits<T>::max())
1407 return { true };
1408
1409 return {};
1410 }
1411
1412 /// Integer cases.
1413
1414 const bool from_is_unsigned = type.isValueRepresentedByUnsignedInteger();
1415 const bool to_is_unsigned = is_unsigned_v<T>;
1416
1417 const size_t size_of_from = type.getSizeOfValueInMemory();
1418 const size_t size_of_to = sizeof(T);
1419
1420 const bool left_in_first_half = left.isNull()
1421 ? from_is_unsigned
1422 : (left.get<Int64>() >= 0);
1423
1424 const bool right_in_first_half = right.isNull()
1425 ? !from_is_unsigned
1426 : (right.get<Int64>() >= 0);
1427
1428 /// Size of type is the same.
1429 if (size_of_from == size_of_to)
1430 {
1431 if (from_is_unsigned == to_is_unsigned)
1432 return {true, true, true};
1433
1434 if (left_in_first_half == right_in_first_half)
1435 return {true};
1436
1437 return {};
1438 }
1439
1440 /// Size of type is expanded.
1441 if (size_of_from < size_of_to)
1442 {
1443 if (from_is_unsigned == to_is_unsigned)
1444 return {true, true, true};
1445
1446 if (!to_is_unsigned)
1447 return {true, true, true};
1448
1449 /// signed -> unsigned. If arguments from the same half, then function is monotonic.
1450 if (left_in_first_half == right_in_first_half)
1451 return {true};
1452
1453 return {};
1454 }
1455
1456 /// Size of type is shrinked.
1457 if (size_of_from > size_of_to)
1458 {
1459 /// Function cannot be monotonic on unbounded ranges.
1460 if (left.isNull() || right.isNull())
1461 return {};
1462
1463 if (from_is_unsigned == to_is_unsigned)
1464 {
1465 /// all bits other than that fits, must be same.
1466 if (divideByRangeOfType(left.get<UInt64>()) == divideByRangeOfType(right.get<UInt64>()))
1467 return {true};
1468
1469 return {};
1470 }
1471 else
1472 {
1473 /// When signedness is changed, it's also required for arguments to be from the same half.
1474 /// And they must be in the same half after converting to the result type.
1475 if (left_in_first_half == right_in_first_half
1476 && (T(left.get<Int64>()) >= 0) == (T(right.get<Int64>()) >= 0)
1477 && divideByRangeOfType(left.get<UInt64>()) == divideByRangeOfType(right.get<UInt64>()))
1478 return {true};
1479
1480 return {};
1481 }
1482 }
1483
1484 __builtin_unreachable();
1485 }
1486};
1487
1488/** The monotonicity for the `toString` function is mainly determined for test purposes.
1489 * It is doubtful that anyone is looking to optimize queries with conditions `toString(CounterID) = 34`.
1490 */
1491struct ToStringMonotonicity
1492{
1493 static bool has() { return true; }
1494
1495 static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right)
1496 {
1497 IFunction::Monotonicity positive(true, true);
1498 IFunction::Monotonicity not_monotonic;
1499
1500 /// `toString` function is monotonous if the argument is Date or DateTime, or non-negative numbers with the same number of symbols.
1501
1502 if (checkAndGetDataType<DataTypeDate>(&type)
1503 || typeid_cast<const DataTypeDateTime *>(&type))
1504 return positive;
1505
1506 if (left.isNull() || right.isNull())
1507 return {};
1508
1509 if (left.getType() == Field::Types::UInt64
1510 && right.getType() == Field::Types::UInt64)
1511 {
1512 return (left.get<Int64>() == 0 && right.get<Int64>() == 0)
1513 || (floor(log10(left.get<UInt64>())) == floor(log10(right.get<UInt64>())))
1514 ? positive : not_monotonic;
1515 }
1516
1517 if (left.getType() == Field::Types::Int64
1518 && right.getType() == Field::Types::Int64)
1519 {
1520 return (left.get<Int64>() == 0 && right.get<Int64>() == 0)
1521 || (left.get<Int64>() > 0 && right.get<Int64>() > 0 && floor(log10(left.get<Int64>())) == floor(log10(right.get<Int64>())))
1522 ? positive : not_monotonic;
1523 }
1524
1525 return not_monotonic;
1526 }
1527};
1528
1529
1530struct NameToUInt8 { static constexpr auto name = "toUInt8"; };
1531struct NameToUInt16 { static constexpr auto name = "toUInt16"; };
1532struct NameToUInt32 { static constexpr auto name = "toUInt32"; };
1533struct NameToUInt64 { static constexpr auto name = "toUInt64"; };
1534struct NameToInt8 { static constexpr auto name = "toInt8"; };
1535struct NameToInt16 { static constexpr auto name = "toInt16"; };
1536struct NameToInt32 { static constexpr auto name = "toInt32"; };
1537struct NameToInt64 { static constexpr auto name = "toInt64"; };
1538struct NameToFloat32 { static constexpr auto name = "toFloat32"; };
1539struct NameToFloat64 { static constexpr auto name = "toFloat64"; };
1540struct NameToUUID { static constexpr auto name = "toUUID"; };
1541
1542using FunctionToUInt8 = FunctionConvert<DataTypeUInt8, NameToUInt8, ToNumberMonotonicity<UInt8>>;
1543using FunctionToUInt16 = FunctionConvert<DataTypeUInt16, NameToUInt16, ToNumberMonotonicity<UInt16>>;
1544using FunctionToUInt32 = FunctionConvert<DataTypeUInt32, NameToUInt32, ToNumberMonotonicity<UInt32>>;
1545using FunctionToUInt64 = FunctionConvert<DataTypeUInt64, NameToUInt64, ToNumberMonotonicity<UInt64>>;
1546using FunctionToInt8 = FunctionConvert<DataTypeInt8, NameToInt8, ToNumberMonotonicity<Int8>>;
1547using FunctionToInt16 = FunctionConvert<DataTypeInt16, NameToInt16, ToNumberMonotonicity<Int16>>;
1548using FunctionToInt32 = FunctionConvert<DataTypeInt32, NameToInt32, ToNumberMonotonicity<Int32>>;
1549using FunctionToInt64 = FunctionConvert<DataTypeInt64, NameToInt64, ToNumberMonotonicity<Int64>>;
1550using FunctionToFloat32 = FunctionConvert<DataTypeFloat32, NameToFloat32, ToNumberMonotonicity<Float32>>;
1551using FunctionToFloat64 = FunctionConvert<DataTypeFloat64, NameToFloat64, ToNumberMonotonicity<Float64>>;
1552using FunctionToDate = FunctionConvert<DataTypeDate, NameToDate, ToNumberMonotonicity<UInt16>>;
1553using FunctionToDateTime = FunctionConvert<DataTypeDateTime, NameToDateTime, ToNumberMonotonicity<UInt32>>;
1554using FunctionToDateTime64 = FunctionConvert<DataTypeDateTime64, NameToDateTime64, UnknownMonotonicity>;
1555using FunctionToUUID = FunctionConvert<DataTypeUUID, NameToUUID, ToNumberMonotonicity<UInt128>>;
1556using FunctionToString = FunctionConvert<DataTypeString, NameToString, ToStringMonotonicity>;
1557using FunctionToUnixTimestamp = FunctionConvert<DataTypeUInt32, NameToUnixTimestamp, ToNumberMonotonicity<UInt32>>;
1558using FunctionToDecimal32 = FunctionConvert<DataTypeDecimal<Decimal32>, NameToDecimal32, UnknownMonotonicity>;
1559using FunctionToDecimal64 = FunctionConvert<DataTypeDecimal<Decimal64>, NameToDecimal64, UnknownMonotonicity>;
1560using FunctionToDecimal128 = FunctionConvert<DataTypeDecimal<Decimal128>, NameToDecimal128, UnknownMonotonicity>;
1561
1562
1563template <typename DataType> struct FunctionTo;
1564
1565template <> struct FunctionTo<DataTypeUInt8> { using Type = FunctionToUInt8; };
1566template <> struct FunctionTo<DataTypeUInt16> { using Type = FunctionToUInt16; };
1567template <> struct FunctionTo<DataTypeUInt32> { using Type = FunctionToUInt32; };
1568template <> struct FunctionTo<DataTypeUInt64> { using Type = FunctionToUInt64; };
1569template <> struct FunctionTo<DataTypeInt8> { using Type = FunctionToInt8; };
1570template <> struct FunctionTo<DataTypeInt16> { using Type = FunctionToInt16; };
1571template <> struct FunctionTo<DataTypeInt32> { using Type = FunctionToInt32; };
1572template <> struct FunctionTo<DataTypeInt64> { using Type = FunctionToInt64; };
1573template <> struct FunctionTo<DataTypeFloat32> { using Type = FunctionToFloat32; };
1574template <> struct FunctionTo<DataTypeFloat64> { using Type = FunctionToFloat64; };
1575template <> struct FunctionTo<DataTypeDate> { using Type = FunctionToDate; };
1576template <> struct FunctionTo<DataTypeDateTime> { using Type = FunctionToDateTime; };
1577template <> struct FunctionTo<DataTypeDateTime64> { using Type = FunctionToDateTime64; };
1578template <> struct FunctionTo<DataTypeUUID> { using Type = FunctionToUUID; };
1579template <> struct FunctionTo<DataTypeString> { using Type = FunctionToString; };
1580template <> struct FunctionTo<DataTypeFixedString> { using Type = FunctionToFixedString; };
1581template <> struct FunctionTo<DataTypeDecimal<Decimal32>> { using Type = FunctionToDecimal32; };
1582template <> struct FunctionTo<DataTypeDecimal<Decimal64>> { using Type = FunctionToDecimal64; };
1583template <> struct FunctionTo<DataTypeDecimal<Decimal128>> { using Type = FunctionToDecimal128; };
1584
1585template <typename FieldType> struct FunctionTo<DataTypeEnum<FieldType>>
1586 : FunctionTo<DataTypeNumber<FieldType>>
1587{
1588};
1589
1590struct NameToUInt8OrZero { static constexpr auto name = "toUInt8OrZero"; };
1591struct NameToUInt16OrZero { static constexpr auto name = "toUInt16OrZero"; };
1592struct NameToUInt32OrZero { static constexpr auto name = "toUInt32OrZero"; };
1593struct NameToUInt64OrZero { static constexpr auto name = "toUInt64OrZero"; };
1594struct NameToInt8OrZero { static constexpr auto name = "toInt8OrZero"; };
1595struct NameToInt16OrZero { static constexpr auto name = "toInt16OrZero"; };
1596struct NameToInt32OrZero { static constexpr auto name = "toInt32OrZero"; };
1597struct NameToInt64OrZero { static constexpr auto name = "toInt64OrZero"; };
1598struct NameToFloat32OrZero { static constexpr auto name = "toFloat32OrZero"; };
1599struct NameToFloat64OrZero { static constexpr auto name = "toFloat64OrZero"; };
1600struct NameToDateOrZero { static constexpr auto name = "toDateOrZero"; };
1601struct NameToDateTimeOrZero { static constexpr auto name = "toDateTimeOrZero"; };
1602struct NameToDateTime64OrZero { static constexpr auto name = "toDateTime64OrZero"; };
1603struct NameToDecimal32OrZero { static constexpr auto name = "toDecimal32OrZero"; };
1604struct NameToDecimal64OrZero { static constexpr auto name = "toDecimal64OrZero"; };
1605struct NameToDecimal128OrZero { static constexpr auto name = "toDecimal128OrZero"; };
1606
1607using FunctionToUInt8OrZero = FunctionConvertFromString<DataTypeUInt8, NameToUInt8OrZero, ConvertFromStringExceptionMode::Zero>;
1608using FunctionToUInt16OrZero = FunctionConvertFromString<DataTypeUInt16, NameToUInt16OrZero, ConvertFromStringExceptionMode::Zero>;
1609using FunctionToUInt32OrZero = FunctionConvertFromString<DataTypeUInt32, NameToUInt32OrZero, ConvertFromStringExceptionMode::Zero>;
1610using FunctionToUInt64OrZero = FunctionConvertFromString<DataTypeUInt64, NameToUInt64OrZero, ConvertFromStringExceptionMode::Zero>;
1611using FunctionToInt8OrZero = FunctionConvertFromString<DataTypeInt8, NameToInt8OrZero, ConvertFromStringExceptionMode::Zero>;
1612using FunctionToInt16OrZero = FunctionConvertFromString<DataTypeInt16, NameToInt16OrZero, ConvertFromStringExceptionMode::Zero>;
1613using FunctionToInt32OrZero = FunctionConvertFromString<DataTypeInt32, NameToInt32OrZero, ConvertFromStringExceptionMode::Zero>;
1614using FunctionToInt64OrZero = FunctionConvertFromString<DataTypeInt64, NameToInt64OrZero, ConvertFromStringExceptionMode::Zero>;
1615using FunctionToFloat32OrZero = FunctionConvertFromString<DataTypeFloat32, NameToFloat32OrZero, ConvertFromStringExceptionMode::Zero>;
1616using FunctionToFloat64OrZero = FunctionConvertFromString<DataTypeFloat64, NameToFloat64OrZero, ConvertFromStringExceptionMode::Zero>;
1617using FunctionToDateOrZero = FunctionConvertFromString<DataTypeDate, NameToDateOrZero, ConvertFromStringExceptionMode::Zero>;
1618using FunctionToDateTimeOrZero = FunctionConvertFromString<DataTypeDateTime, NameToDateTimeOrZero, ConvertFromStringExceptionMode::Zero>;
1619using FunctionToDateTime64OrZero = FunctionConvertFromString<DataTypeDateTime64, NameToDateTime64OrZero, ConvertFromStringExceptionMode::Zero>;
1620using FunctionToDecimal32OrZero = FunctionConvertFromString<DataTypeDecimal<Decimal32>, NameToDecimal32OrZero, ConvertFromStringExceptionMode::Zero>;
1621using FunctionToDecimal64OrZero = FunctionConvertFromString<DataTypeDecimal<Decimal64>, NameToDecimal64OrZero, ConvertFromStringExceptionMode::Zero>;
1622using FunctionToDecimal128OrZero = FunctionConvertFromString<DataTypeDecimal<Decimal128>, NameToDecimal128OrZero, ConvertFromStringExceptionMode::Zero>;
1623
1624struct NameToUInt8OrNull { static constexpr auto name = "toUInt8OrNull"; };
1625struct NameToUInt16OrNull { static constexpr auto name = "toUInt16OrNull"; };
1626struct NameToUInt32OrNull { static constexpr auto name = "toUInt32OrNull"; };
1627struct NameToUInt64OrNull { static constexpr auto name = "toUInt64OrNull"; };
1628struct NameToInt8OrNull { static constexpr auto name = "toInt8OrNull"; };
1629struct NameToInt16OrNull { static constexpr auto name = "toInt16OrNull"; };
1630struct NameToInt32OrNull { static constexpr auto name = "toInt32OrNull"; };
1631struct NameToInt64OrNull { static constexpr auto name = "toInt64OrNull"; };
1632struct NameToFloat32OrNull { static constexpr auto name = "toFloat32OrNull"; };
1633struct NameToFloat64OrNull { static constexpr auto name = "toFloat64OrNull"; };
1634struct NameToDateOrNull { static constexpr auto name = "toDateOrNull"; };
1635struct NameToDateTimeOrNull { static constexpr auto name = "toDateTimeOrNull"; };
1636struct NameToDateTime64OrNull { static constexpr auto name = "toDateTime64OrNull"; };
1637struct NameToDecimal32OrNull { static constexpr auto name = "toDecimal32OrNull"; };
1638struct NameToDecimal64OrNull { static constexpr auto name = "toDecimal64OrNull"; };
1639struct NameToDecimal128OrNull { static constexpr auto name = "toDecimal128OrNull"; };
1640
1641using FunctionToUInt8OrNull = FunctionConvertFromString<DataTypeUInt8, NameToUInt8OrNull, ConvertFromStringExceptionMode::Null>;
1642using FunctionToUInt16OrNull = FunctionConvertFromString<DataTypeUInt16, NameToUInt16OrNull, ConvertFromStringExceptionMode::Null>;
1643using FunctionToUInt32OrNull = FunctionConvertFromString<DataTypeUInt32, NameToUInt32OrNull, ConvertFromStringExceptionMode::Null>;
1644using FunctionToUInt64OrNull = FunctionConvertFromString<DataTypeUInt64, NameToUInt64OrNull, ConvertFromStringExceptionMode::Null>;
1645using FunctionToInt8OrNull = FunctionConvertFromString<DataTypeInt8, NameToInt8OrNull, ConvertFromStringExceptionMode::Null>;
1646using FunctionToInt16OrNull = FunctionConvertFromString<DataTypeInt16, NameToInt16OrNull, ConvertFromStringExceptionMode::Null>;
1647using FunctionToInt32OrNull = FunctionConvertFromString<DataTypeInt32, NameToInt32OrNull, ConvertFromStringExceptionMode::Null>;
1648using FunctionToInt64OrNull = FunctionConvertFromString<DataTypeInt64, NameToInt64OrNull, ConvertFromStringExceptionMode::Null>;
1649using FunctionToFloat32OrNull = FunctionConvertFromString<DataTypeFloat32, NameToFloat32OrNull, ConvertFromStringExceptionMode::Null>;
1650using FunctionToFloat64OrNull = FunctionConvertFromString<DataTypeFloat64, NameToFloat64OrNull, ConvertFromStringExceptionMode::Null>;
1651using FunctionToDateOrNull = FunctionConvertFromString<DataTypeDate, NameToDateOrNull, ConvertFromStringExceptionMode::Null>;
1652using FunctionToDateTimeOrNull = FunctionConvertFromString<DataTypeDateTime, NameToDateTimeOrNull, ConvertFromStringExceptionMode::Null>;
1653using FunctionToDateTime64OrNull = FunctionConvertFromString<DataTypeDateTime64, NameToDateTime64OrNull, ConvertFromStringExceptionMode::Null>;
1654using FunctionToDecimal32OrNull = FunctionConvertFromString<DataTypeDecimal<Decimal32>, NameToDecimal32OrNull, ConvertFromStringExceptionMode::Null>;
1655using FunctionToDecimal64OrNull = FunctionConvertFromString<DataTypeDecimal<Decimal64>, NameToDecimal64OrNull, ConvertFromStringExceptionMode::Null>;
1656using FunctionToDecimal128OrNull = FunctionConvertFromString<DataTypeDecimal<Decimal128>, NameToDecimal128OrNull, ConvertFromStringExceptionMode::Null>;
1657
1658struct NameParseDateTimeBestEffort { static constexpr auto name = "parseDateTimeBestEffort"; };
1659struct NameParseDateTimeBestEffortOrZero { static constexpr auto name = "parseDateTimeBestEffortOrZero"; };
1660struct NameParseDateTimeBestEffortOrNull { static constexpr auto name = "parseDateTimeBestEffortOrNull"; };
1661struct NameParseDateTime64BestEffort { static constexpr auto name = "parseDateTime64BestEffort"; };
1662struct NameParseDateTime64BestEffortOrZero { static constexpr auto name = "parseDateTime64BestEffortOrZero"; };
1663struct NameParseDateTime64BestEffortOrNull { static constexpr auto name = "parseDateTime64BestEffortOrNull"; };
1664
1665
1666using FunctionParseDateTimeBestEffort = FunctionConvertFromString<
1667 DataTypeDateTime, NameParseDateTimeBestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>;
1668using FunctionParseDateTimeBestEffortOrZero = FunctionConvertFromString<
1669 DataTypeDateTime, NameParseDateTimeBestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>;
1670using FunctionParseDateTimeBestEffortOrNull = FunctionConvertFromString<
1671 DataTypeDateTime, NameParseDateTimeBestEffortOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffort>;
1672
1673using FunctionParseDateTime64BestEffort = FunctionConvertFromString<
1674 DataTypeDateTime64, NameParseDateTime64BestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>;
1675using FunctionParseDateTime64BestEffortOrZero = FunctionConvertFromString<
1676 DataTypeDateTime64, NameParseDateTime64BestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>;
1677using FunctionParseDateTime64BestEffortOrNull = FunctionConvertFromString<
1678 DataTypeDateTime64, NameParseDateTime64BestEffortOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffort>;
1679
1680class ExecutableFunctionCast : public IExecutableFunctionImpl
1681{
1682public:
1683 using WrapperType = std::function<void(Block &, const ColumnNumbers &, size_t, size_t)>;
1684
1685 explicit ExecutableFunctionCast(WrapperType && wrapper_function_, const char * name_)
1686 : wrapper_function(std::move(wrapper_function_)), name(name_) {}
1687
1688 String getName() const override { return name; }
1689
1690protected:
1691 void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
1692 {
1693 /// drop second argument, pass others
1694 ColumnNumbers new_arguments{arguments.front()};
1695 if (arguments.size() > 2)
1696 new_arguments.insert(std::end(new_arguments), std::next(std::begin(arguments), 2), std::end(arguments));
1697
1698 wrapper_function(block, new_arguments, result, input_rows_count);
1699 }
1700
1701 bool useDefaultImplementationForNulls() const override { return false; }
1702 bool useDefaultImplementationForConstants() const override { return true; }
1703 bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
1704 ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
1705
1706private:
1707 WrapperType wrapper_function;
1708 const char * name;
1709};
1710
1711
1712struct NameCast { static constexpr auto name = "CAST"; };
1713
1714class FunctionCast final : public IFunctionBaseImpl
1715{
1716public:
1717 using WrapperType = std::function<void(Block &, const ColumnNumbers &, size_t, size_t)>;
1718 using MonotonicityForRange = std::function<Monotonicity(const IDataType &, const Field &, const Field &)>;
1719
1720 FunctionCast(const char * name_, MonotonicityForRange && monotonicity_for_range_
1721 , const DataTypes & argument_types_, const DataTypePtr & return_type_)
1722 : name(name_), monotonicity_for_range(monotonicity_for_range_)
1723 , argument_types(argument_types_), return_type(return_type_)
1724 {
1725 }
1726
1727 const DataTypes & getArgumentTypes() const override { return argument_types; }
1728 const DataTypePtr & getReturnType() const override { return return_type; }
1729
1730 ExecutableFunctionImplPtr prepare(const Block & /*sample_block*/, const ColumnNumbers & /*arguments*/, size_t /*result*/) const override
1731 {
1732 return std::make_unique<ExecutableFunctionCast>(
1733 prepareUnpackDictionaries(getArgumentTypes()[0], getReturnType()), name);
1734 }
1735
1736 String getName() const override { return name; }
1737
1738 bool isDeterministic() const override { return true; }
1739 bool isDeterministicInScopeOfQuery() const override { return true; }
1740
1741 bool hasInformationAboutMonotonicity() const override
1742 {
1743 return static_cast<bool>(monotonicity_for_range);
1744 }
1745
1746 Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override
1747 {
1748 return monotonicity_for_range(type, left, right);
1749 }
1750
1751private:
1752
1753 const char * name;
1754 MonotonicityForRange monotonicity_for_range;
1755
1756 DataTypes argument_types;
1757 DataTypePtr return_type;
1758
1759 template <typename DataType>
1760 WrapperType createWrapper(const DataTypePtr & from_type, const DataType * const, bool requested_result_is_nullable) const
1761 {
1762 FunctionPtr function;
1763
1764 if (requested_result_is_nullable && checkAndGetDataType<DataTypeString>(from_type.get()))
1765 {
1766 /// In case when converting to Nullable type, we apply different parsing rule,
1767 /// that will not throw an exception but return NULL in case of malformed input.
1768 function = FunctionConvertFromString<DataType, NameCast, ConvertFromStringExceptionMode::Null>::create();
1769 }
1770 else
1771 function = FunctionTo<DataType>::Type::create();
1772
1773 auto function_adaptor =
1774 FunctionOverloadResolverAdaptor(std::make_unique<DefaultOverloadResolver>(function))
1775 .build({ColumnWithTypeAndName{nullptr, from_type, ""}});
1776
1777 return [function_adaptor] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count)
1778 {
1779 function_adaptor->execute(block, arguments, result, input_rows_count);
1780 };
1781 }
1782
1783 WrapperType createStringWrapper(const DataTypePtr & from_type) const
1784 {
1785 FunctionPtr function = FunctionToString::create();
1786
1787 auto function_adaptor =
1788 FunctionOverloadResolverAdaptor(std::make_unique<DefaultOverloadResolver>(function))
1789 .build({ColumnWithTypeAndName{nullptr, from_type, ""}});
1790
1791 return [function_adaptor] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count)
1792 {
1793 function_adaptor->execute(block, arguments, result, input_rows_count);
1794 };
1795 }
1796
1797 static WrapperType createFixedStringWrapper(const DataTypePtr & from_type, const size_t N)
1798 {
1799 if (!isStringOrFixedString(from_type))
1800 throw Exception{"CAST AS FixedString is only implemented for types String and FixedString", ErrorCodes::NOT_IMPLEMENTED};
1801
1802 return [N] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t /*input_rows_count*/)
1803 {
1804 FunctionToFixedString::executeForN(block, arguments, result, N);
1805 };
1806 }
1807
1808 WrapperType createUUIDWrapper(const DataTypePtr & from_type, const DataTypeUUID * const, bool requested_result_is_nullable) const
1809 {
1810 if (requested_result_is_nullable)
1811 throw Exception{"CAST AS Nullable(UUID) is not implemented", ErrorCodes::NOT_IMPLEMENTED};
1812
1813 FunctionPtr function = FunctionTo<DataTypeUUID>::Type::create();
1814
1815 auto function_adaptor =
1816 FunctionOverloadResolverAdaptor(std::make_unique<DefaultOverloadResolver>(function))
1817 .build({ColumnWithTypeAndName{nullptr, from_type, ""}});
1818
1819 return [function_adaptor] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count)
1820 {
1821 function_adaptor->execute(block, arguments, result, input_rows_count);
1822 };
1823 }
1824
1825 template <typename ToDataType>
1826 std::enable_if_t<IsDataTypeDecimal<ToDataType>, WrapperType>
1827 createDecimalWrapper(const DataTypePtr & from_type, const ToDataType * to_type) const
1828 {
1829 TypeIndex type_index = from_type->getTypeId();
1830 UInt32 scale = to_type->getScale();
1831
1832 WhichDataType which(type_index);
1833 bool ok = which.isNativeInt() ||
1834 which.isNativeUInt() ||
1835 which.isDecimal() ||
1836 which.isFloat() ||
1837 which.isDateOrDateTime() ||
1838 which.isStringOrFixedString();
1839 if (!ok)
1840 throw Exception{"Conversion from " + from_type->getName() + " to " + to_type->getName() + " is not supported",
1841 ErrorCodes::CANNOT_CONVERT_TYPE};
1842
1843 return [type_index, scale, to_type] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count)
1844 {
1845 auto res = callOnIndexAndDataType<ToDataType>(type_index, [&](const auto & types) -> bool
1846 {
1847 using Types = std::decay_t<decltype(types)>;
1848 using LeftDataType = typename Types::LeftType;
1849 using RightDataType = typename Types::RightType;
1850
1851 ConvertImpl<LeftDataType, RightDataType, NameCast>::execute(block, arguments, result, input_rows_count, scale);
1852 return true;
1853 });
1854
1855 /// Additionally check if callOnIndexAndDataType wasn't called at all.
1856 if (!res)
1857 {
1858 throw Exception{"Conversion from " + std::string(getTypeName(type_index)) + " to " + to_type->getName() +
1859 " is not supported", ErrorCodes::CANNOT_CONVERT_TYPE};
1860 }
1861 };
1862 }
1863
1864 WrapperType createAggregateFunctionWrapper(const DataTypePtr & from_type_untyped, const DataTypeAggregateFunction * to_type) const
1865 {
1866 /// Conversion from String through parsing.
1867 if (checkAndGetDataType<DataTypeString>(from_type_untyped.get()))
1868 {
1869 return [] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t /*input_rows_count*/)
1870 {
1871 ConvertImplGenericFromString::execute(block, arguments, result);
1872 };
1873 }
1874 else
1875 throw Exception{"Conversion from " + from_type_untyped->getName() + " to " + to_type->getName() +
1876 " is not supported", ErrorCodes::CANNOT_CONVERT_TYPE};
1877 }
1878
1879 WrapperType createArrayWrapper(const DataTypePtr & from_type_untyped, const DataTypeArray * to_type) const
1880 {
1881 /// Conversion from String through parsing.
1882 if (checkAndGetDataType<DataTypeString>(from_type_untyped.get()))
1883 {
1884 return [] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t /*input_rows_count*/)
1885 {
1886 ConvertImplGenericFromString::execute(block, arguments, result);
1887 };
1888 }
1889
1890 DataTypePtr from_nested_type;
1891 DataTypePtr to_nested_type;
1892 auto from_type = checkAndGetDataType<DataTypeArray>(from_type_untyped.get());
1893
1894 /// get the most nested type
1895 if (from_type && to_type)
1896 {
1897 from_nested_type = from_type->getNestedType();
1898 to_nested_type = to_type->getNestedType();
1899
1900 from_type = checkAndGetDataType<DataTypeArray>(from_nested_type.get());
1901 to_type = checkAndGetDataType<DataTypeArray>(to_nested_type.get());
1902 }
1903
1904 /// both from_type and to_type should be nullptr now is array types had same dimensions
1905 if ((from_type == nullptr) != (to_type == nullptr))
1906 throw Exception{"CAST AS Array can only be performed between same-dimensional array types or from String",
1907 ErrorCodes::TYPE_MISMATCH};
1908
1909 /// Prepare nested type conversion
1910 const auto nested_function = prepareUnpackDictionaries(from_nested_type, to_nested_type);
1911
1912 return [nested_function, from_nested_type, to_nested_type](
1913 Block & block, const ColumnNumbers & arguments, const size_t result, size_t /*input_rows_count*/)
1914 {
1915 const auto & array_arg = block.getByPosition(arguments.front());
1916
1917 if (const ColumnArray * col_array = checkAndGetColumn<ColumnArray>(array_arg.column.get()))
1918 {
1919 /// create block for converting nested column containing original and result columns
1920 Block nested_block
1921 {
1922 { col_array->getDataPtr(), from_nested_type, "" },
1923 { nullptr, to_nested_type, "" }
1924 };
1925
1926 /// convert nested column
1927 nested_function(nested_block, {0}, 1, nested_block.rows());
1928
1929 /// set converted nested column to result
1930 block.getByPosition(result).column = ColumnArray::create(nested_block.getByPosition(1).column, col_array->getOffsetsPtr());
1931 }
1932 else
1933 throw Exception{"Illegal column " + array_arg.column->getName() + " for function CAST AS Array", ErrorCodes::LOGICAL_ERROR};
1934 };
1935 }
1936
1937 WrapperType createTupleWrapper(const DataTypePtr & from_type_untyped, const DataTypeTuple * to_type) const
1938 {
1939 /// Conversion from String through parsing.
1940 if (checkAndGetDataType<DataTypeString>(from_type_untyped.get()))
1941 {
1942 return [] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t /*input_rows_count*/)
1943 {
1944 ConvertImplGenericFromString::execute(block, arguments, result);
1945 };
1946 }
1947
1948 const auto from_type = checkAndGetDataType<DataTypeTuple>(from_type_untyped.get());
1949 if (!from_type)
1950 throw Exception{"CAST AS Tuple can only be performed between tuple types or from String.\nLeft type: "
1951 + from_type_untyped->getName() + ", right type: " + to_type->getName(), ErrorCodes::TYPE_MISMATCH};
1952
1953 if (from_type->getElements().size() != to_type->getElements().size())
1954 throw Exception{"CAST AS Tuple can only be performed between tuple types with the same number of elements or from String.\n"
1955 "Left type: " + from_type->getName() + ", right type: " + to_type->getName(), ErrorCodes::TYPE_MISMATCH};
1956
1957 const auto & from_element_types = from_type->getElements();
1958 const auto & to_element_types = to_type->getElements();
1959 std::vector<WrapperType> element_wrappers;
1960 element_wrappers.reserve(from_element_types.size());
1961
1962 /// Create conversion wrapper for each element in tuple
1963 for (const auto idx_type : ext::enumerate(from_type->getElements()))
1964 element_wrappers.push_back(prepareUnpackDictionaries(idx_type.second, to_element_types[idx_type.first]));
1965
1966 return [element_wrappers, from_element_types, to_element_types]
1967 (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count)
1968 {
1969 const auto col = block.getByPosition(arguments.front()).column.get();
1970
1971 /// copy tuple elements to a separate block
1972 Block element_block;
1973
1974 size_t tuple_size = from_element_types.size();
1975 const ColumnTuple & column_tuple = typeid_cast<const ColumnTuple &>(*col);
1976
1977 /// create columns for source elements
1978 for (size_t i = 0; i < tuple_size; ++i)
1979 element_block.insert({ column_tuple.getColumns()[i], from_element_types[i], "" });
1980
1981 /// create columns for converted elements
1982 for (const auto & to_element_type : to_element_types)
1983 element_block.insert({ nullptr, to_element_type, "" });
1984
1985 /// insert column for converted tuple
1986 element_block.insert({ nullptr, std::make_shared<DataTypeTuple>(to_element_types), "" });
1987
1988 /// invoke conversion for each element
1989 for (const auto idx_element_wrapper : ext::enumerate(element_wrappers))
1990 idx_element_wrapper.second(element_block, { idx_element_wrapper.first },
1991 tuple_size + idx_element_wrapper.first, input_rows_count);
1992
1993 Columns converted_columns(tuple_size);
1994 for (size_t i = 0; i < tuple_size; ++i)
1995 converted_columns[i] = element_block.getByPosition(tuple_size + i).column;
1996
1997 block.getByPosition(result).column = ColumnTuple::create(converted_columns);
1998 };
1999 }
2000
2001 template <typename FieldType>
2002 WrapperType createEnumWrapper(const DataTypePtr & from_type, const DataTypeEnum<FieldType> * to_type) const
2003 {
2004 using EnumType = DataTypeEnum<FieldType>;
2005 using Function = typename FunctionTo<EnumType>::Type;
2006
2007 if (const auto from_enum8 = checkAndGetDataType<DataTypeEnum8>(from_type.get()))
2008 checkEnumToEnumConversion(from_enum8, to_type);
2009 else if (const auto from_enum16 = checkAndGetDataType<DataTypeEnum16>(from_type.get()))
2010 checkEnumToEnumConversion(from_enum16, to_type);
2011
2012 if (checkAndGetDataType<DataTypeString>(from_type.get()))
2013 return createStringToEnumWrapper<ColumnString, EnumType>();
2014 else if (checkAndGetDataType<DataTypeFixedString>(from_type.get()))
2015 return createStringToEnumWrapper<ColumnFixedString, EnumType>();
2016 else if (isNativeNumber(from_type) || isEnum(from_type))
2017 {
2018 auto function = Function::create();
2019 auto func_or_adaptor = FunctionOverloadResolverAdaptor(std::make_unique<DefaultOverloadResolver>(function))
2020 .build(ColumnsWithTypeAndName{{nullptr, from_type, "" }});
2021
2022 return [func_or_adaptor] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count)
2023 {
2024 func_or_adaptor->execute(block, arguments, result, input_rows_count);
2025 };
2026 }
2027 else
2028 throw Exception{"Conversion from " + from_type->getName() + " to " + to_type->getName() +
2029 " is not supported", ErrorCodes::CANNOT_CONVERT_TYPE};
2030 }
2031
2032 template <typename EnumTypeFrom, typename EnumTypeTo>
2033 void checkEnumToEnumConversion(const EnumTypeFrom * from_type, const EnumTypeTo * to_type) const
2034 {
2035 const auto & from_values = from_type->getValues();
2036 const auto & to_values = to_type->getValues();
2037
2038 using ValueType = std::common_type_t<typename EnumTypeFrom::FieldType, typename EnumTypeTo::FieldType>;
2039 using NameValuePair = std::pair<std::string, ValueType>;
2040 using EnumValues = std::vector<NameValuePair>;
2041
2042 EnumValues name_intersection;
2043 std::set_intersection(std::begin(from_values), std::end(from_values),
2044 std::begin(to_values), std::end(to_values), std::back_inserter(name_intersection),
2045 [] (auto && from, auto && to) { return from.first < to.first; });
2046
2047 for (const auto & name_value : name_intersection)
2048 {
2049 const auto & old_value = name_value.second;
2050 const auto & new_value = to_type->getValue(name_value.first);
2051 if (old_value != new_value)
2052 throw Exception{"Enum conversion changes value for element '" + name_value.first +
2053 "' from " + toString(old_value) + " to " + toString(new_value), ErrorCodes::CANNOT_CONVERT_TYPE};
2054 }
2055 }
2056
2057 template <typename ColumnStringType, typename EnumType>
2058 WrapperType createStringToEnumWrapper() const
2059 {
2060 const char * function_name = name;
2061 return [function_name] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t /*input_rows_count*/)
2062 {
2063 const auto first_col = block.getByPosition(arguments.front()).column.get();
2064
2065 auto & col_with_type_and_name = block.getByPosition(result);
2066 const auto & result_type = typeid_cast<const EnumType &>(*col_with_type_and_name.type);
2067
2068 if (const auto col = typeid_cast<const ColumnStringType *>(first_col))
2069 {
2070 const auto size = col->size();
2071
2072 auto res = result_type.createColumn();
2073 auto & out_data = static_cast<typename EnumType::ColumnType &>(*res).getData();
2074 out_data.resize(size);
2075
2076 for (const auto i : ext::range(0, size))
2077 out_data[i] = result_type.getValue(col->getDataAt(i));
2078
2079 col_with_type_and_name.column = std::move(res);
2080 }
2081 else
2082 throw Exception{"Unexpected column " + first_col->getName() + " as first argument of function " + function_name,
2083 ErrorCodes::LOGICAL_ERROR};
2084 };
2085 }
2086
2087 WrapperType createIdentityWrapper(const DataTypePtr &) const
2088 {
2089 return [] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t /*input_rows_count*/)
2090 {
2091 block.getByPosition(result).column = block.getByPosition(arguments.front()).column;
2092 };
2093 }
2094
2095 WrapperType createNothingWrapper(const IDataType * to_type) const
2096 {
2097 ColumnPtr res = to_type->createColumnConstWithDefaultValue(1);
2098 return [res] (Block & block, const ColumnNumbers &, const size_t result, size_t input_rows_count)
2099 {
2100 /// Column of Nothing type is trivially convertible to any other column
2101 block.getByPosition(result).column = res->cloneResized(input_rows_count)->convertToFullColumnIfConst();
2102 };
2103 }
2104
2105 WrapperType prepareUnpackDictionaries(const DataTypePtr & from_type, const DataTypePtr & to_type) const
2106 {
2107 const auto * from_low_cardinality = typeid_cast<const DataTypeLowCardinality *>(from_type.get());
2108 const auto * to_low_cardinality = typeid_cast<const DataTypeLowCardinality *>(to_type.get());
2109 const auto & from_nested = from_low_cardinality ? from_low_cardinality->getDictionaryType() : from_type;
2110 const auto & to_nested = to_low_cardinality ? to_low_cardinality->getDictionaryType() : to_type;
2111
2112 if (from_type->onlyNull())
2113 {
2114 if (!to_nested->isNullable())
2115 throw Exception{"Cannot convert NULL to a non-nullable type", ErrorCodes::CANNOT_CONVERT_TYPE};
2116
2117 return [](Block & block, const ColumnNumbers &, const size_t result, size_t input_rows_count)
2118 {
2119 auto & res = block.getByPosition(result);
2120 res.column = res.type->createColumnConstWithDefaultValue(input_rows_count)->convertToFullColumnIfConst();
2121 };
2122 }
2123
2124 bool skip_not_null_check = false;
2125
2126 if (from_low_cardinality && from_nested->isNullable() && !to_nested->isNullable())
2127 /// Disable check for dictionary. Will check that column doesn't contain NULL in wrapper below.
2128 skip_not_null_check = true;
2129
2130 auto wrapper = prepareRemoveNullable(from_nested, to_nested, skip_not_null_check);
2131 if (!from_low_cardinality && !to_low_cardinality)
2132 return wrapper;
2133
2134 return [wrapper, from_low_cardinality, to_low_cardinality, skip_not_null_check]
2135 (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count)
2136 {
2137 auto & arg = block.getByPosition(arguments[0]);
2138 auto & res = block.getByPosition(result);
2139
2140 ColumnPtr res_indexes;
2141 /// For some types default can't be casted (for example, String to Int). In that case convert column to full.
2142 bool src_converted_to_full_column = false;
2143
2144 {
2145 /// Replace argument and result columns (and types) to dictionary key columns (and types).
2146 /// Call nested wrapper in order to cast dictionary keys. Then restore block.
2147 auto prev_arg_col = arg.column;
2148 auto prev_arg_type = arg.type;
2149 auto prev_res_type = res.type;
2150
2151 auto tmp_rows_count = input_rows_count;
2152
2153 if (to_low_cardinality)
2154 res.type = to_low_cardinality->getDictionaryType();
2155
2156 if (from_low_cardinality)
2157 {
2158 auto * col_low_cardinality = typeid_cast<const ColumnLowCardinality *>(prev_arg_col.get());
2159
2160 if (skip_not_null_check && col_low_cardinality->containsNull())
2161 throw Exception{"Cannot convert NULL value to non-Nullable type",
2162 ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN};
2163
2164 arg.column = col_low_cardinality->getDictionary().getNestedColumn();
2165 arg.type = from_low_cardinality->getDictionaryType();
2166
2167 /// TODO: Make map with defaults conversion.
2168 src_converted_to_full_column = !removeNullable(arg.type)->equals(*removeNullable(res.type));
2169 if (src_converted_to_full_column)
2170 arg.column = arg.column->index(col_low_cardinality->getIndexes(), 0);
2171 else
2172 res_indexes = col_low_cardinality->getIndexesPtr();
2173
2174 tmp_rows_count = arg.column->size();
2175 }
2176
2177 /// Perform the requested conversion.
2178 wrapper(block, arguments, result, tmp_rows_count);
2179
2180 arg.column = prev_arg_col;
2181 arg.type = prev_arg_type;
2182 res.type = prev_res_type;
2183 }
2184
2185 if (to_low_cardinality)
2186 {
2187 auto res_column = to_low_cardinality->createColumn();
2188 auto * col_low_cardinality = typeid_cast<ColumnLowCardinality *>(res_column.get());
2189
2190 if (from_low_cardinality && !src_converted_to_full_column)
2191 {
2192 auto res_keys = std::move(res.column);
2193 col_low_cardinality->insertRangeFromDictionaryEncodedColumn(*res_keys, *res_indexes);
2194 }
2195 else
2196 col_low_cardinality->insertRangeFromFullColumn(*res.column, 0, res.column->size());
2197
2198 res.column = std::move(res_column);
2199 }
2200 else if (!src_converted_to_full_column)
2201 res.column = res.column->index(*res_indexes, 0);
2202 };
2203 }
2204
2205 WrapperType prepareRemoveNullable(const DataTypePtr & from_type, const DataTypePtr & to_type, bool skip_not_null_check) const
2206 {
2207 /// Determine whether pre-processing and/or post-processing must take place during conversion.
2208
2209 bool source_is_nullable = from_type->isNullable();
2210 bool result_is_nullable = to_type->isNullable();
2211
2212 auto wrapper = prepareImpl(removeNullable(from_type), removeNullable(to_type), result_is_nullable);
2213
2214 if (result_is_nullable)
2215 {
2216 return [wrapper, source_is_nullable]
2217 (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count)
2218 {
2219 /// Create a temporary block on which to perform the operation.
2220 auto & res = block.getByPosition(result);
2221 const auto & ret_type = res.type;
2222 const auto & nullable_type = static_cast<const DataTypeNullable &>(*ret_type);
2223 const auto & nested_type = nullable_type.getNestedType();
2224
2225 Block tmp_block;
2226 if (source_is_nullable)
2227 tmp_block = createBlockWithNestedColumns(block, arguments);
2228 else
2229 tmp_block = block;
2230
2231 size_t tmp_res_index = block.columns();
2232 tmp_block.insert({nullptr, nested_type, ""});
2233
2234 /// Perform the requested conversion.
2235 wrapper(tmp_block, arguments, tmp_res_index, input_rows_count);
2236
2237 const auto & tmp_res = tmp_block.getByPosition(tmp_res_index);
2238
2239 /// May happen in fuzzy tests. For debug purpose.
2240 if (!tmp_res.column)
2241 throw Exception("Couldn't convert " + block.getByPosition(arguments[0]).type->getName() + " to "
2242 + nested_type->getName() + " in " + " prepareRemoveNullable wrapper.", ErrorCodes::LOGICAL_ERROR);
2243
2244 res.column = wrapInNullable(tmp_res.column, Block({block.getByPosition(arguments[0]), tmp_res}), {0}, 1, input_rows_count);
2245 };
2246 }
2247 else if (source_is_nullable)
2248 {
2249 /// Conversion from Nullable to non-Nullable.
2250
2251 return [wrapper, skip_not_null_check] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count)
2252 {
2253 Block tmp_block = createBlockWithNestedColumns(block, arguments, result);
2254
2255 /// Check that all values are not-NULL.
2256 /// Check can be skipped in case if LowCardinality dictionary is transformed.
2257 /// In that case, correctness will be checked beforehand.
2258 if (!skip_not_null_check)
2259 {
2260 const auto & col = block.getByPosition(arguments[0]).column;
2261 const auto & nullable_col = assert_cast<const ColumnNullable &>(*col);
2262 const auto & null_map = nullable_col.getNullMapData();
2263
2264 if (!memoryIsZero(null_map.data(), null_map.size()))
2265 throw Exception{"Cannot convert NULL value to non-Nullable type",
2266 ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN};
2267 }
2268
2269 wrapper(tmp_block, arguments, result, input_rows_count);
2270 block.getByPosition(result).column = tmp_block.getByPosition(result).column;
2271 };
2272 }
2273 else
2274 return wrapper;
2275 }
2276
2277 /// 'from_type' and 'to_type' are nested types in case of Nullable.
2278 /// 'requested_result_is_nullable' is true if CAST to Nullable type is requested.
2279 WrapperType prepareImpl(const DataTypePtr & from_type, const DataTypePtr & to_type, bool requested_result_is_nullable) const
2280 {
2281 if (from_type->equals(*to_type))
2282 return createIdentityWrapper(from_type);
2283 else if (WhichDataType(from_type).isNothing())
2284 return createNothingWrapper(to_type.get());
2285
2286 WrapperType ret;
2287
2288 auto make_default_wrapper = [&](const auto & types) -> bool
2289 {
2290 using Types = std::decay_t<decltype(types)>;
2291 using ToDataType = typename Types::LeftType;
2292
2293 if constexpr (
2294 std::is_same_v<ToDataType, DataTypeUInt8> ||
2295 std::is_same_v<ToDataType, DataTypeUInt16> ||
2296 std::is_same_v<ToDataType, DataTypeUInt32> ||
2297 std::is_same_v<ToDataType, DataTypeUInt64> ||
2298 std::is_same_v<ToDataType, DataTypeInt8> ||
2299 std::is_same_v<ToDataType, DataTypeInt16> ||
2300 std::is_same_v<ToDataType, DataTypeInt32> ||
2301 std::is_same_v<ToDataType, DataTypeInt64> ||
2302 std::is_same_v<ToDataType, DataTypeFloat32> ||
2303 std::is_same_v<ToDataType, DataTypeFloat64> ||
2304 std::is_same_v<ToDataType, DataTypeDate> ||
2305 std::is_same_v<ToDataType, DataTypeDateTime>)
2306 {
2307 ret = createWrapper(from_type, checkAndGetDataType<ToDataType>(to_type.get()), requested_result_is_nullable);
2308 return true;
2309 }
2310 if constexpr (
2311 std::is_same_v<ToDataType, DataTypeEnum8> ||
2312 std::is_same_v<ToDataType, DataTypeEnum16>)
2313 {
2314 ret = createEnumWrapper(from_type, checkAndGetDataType<ToDataType>(to_type.get()));
2315 return true;
2316 }
2317 if constexpr (
2318 std::is_same_v<ToDataType, DataTypeDecimal<Decimal32>> ||
2319 std::is_same_v<ToDataType, DataTypeDecimal<Decimal64>> ||
2320 std::is_same_v<ToDataType, DataTypeDecimal<Decimal128>> ||
2321 std::is_same_v<ToDataType, DataTypeDateTime64>)
2322 {
2323 ret = createDecimalWrapper(from_type, checkAndGetDataType<ToDataType>(to_type.get()));
2324 return true;
2325 }
2326 if constexpr (std::is_same_v<ToDataType, DataTypeUUID>)
2327 {
2328 if (isStringOrFixedString(from_type))
2329 {
2330 ret = createUUIDWrapper(from_type, checkAndGetDataType<ToDataType>(to_type.get()), requested_result_is_nullable);
2331 return true;
2332 }
2333 }
2334
2335 return false;
2336 };
2337
2338 if (callOnIndexAndDataType<void>(to_type->getTypeId(), make_default_wrapper))
2339 return ret;
2340
2341 switch (to_type->getTypeId())
2342 {
2343 case TypeIndex::String:
2344 return createStringWrapper(from_type);
2345 case TypeIndex::FixedString:
2346 return createFixedStringWrapper(from_type, checkAndGetDataType<DataTypeFixedString>(to_type.get())->getN());
2347
2348 case TypeIndex::Array:
2349 return createArrayWrapper(from_type, checkAndGetDataType<DataTypeArray>(to_type.get()));
2350 case TypeIndex::Tuple:
2351 return createTupleWrapper(from_type, checkAndGetDataType<DataTypeTuple>(to_type.get()));
2352
2353 case TypeIndex::AggregateFunction:
2354 return createAggregateFunctionWrapper(from_type, checkAndGetDataType<DataTypeAggregateFunction>(to_type.get()));
2355 default:
2356 break;
2357 }
2358
2359 throw Exception{"Conversion from " + from_type->getName() + " to " + to_type->getName() + " is not supported",
2360 ErrorCodes::CANNOT_CONVERT_TYPE};
2361 }
2362};
2363
2364class CastOverloadResolver : public IFunctionOverloadResolverImpl
2365{
2366public:
2367 using MonotonicityForRange = FunctionCast::MonotonicityForRange;
2368
2369 static constexpr auto name = "CAST";
2370 static FunctionOverloadResolverImplPtr create(const Context &) { return createImpl(); }
2371 static FunctionOverloadResolverImplPtr createImpl() { return std::make_unique<CastOverloadResolver>(); }
2372
2373 CastOverloadResolver() {}
2374
2375 String getName() const override { return name; }
2376
2377 size_t getNumberOfArguments() const override { return 2; }
2378
2379 ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
2380
2381protected:
2382
2383 FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
2384 {
2385 DataTypes data_types(arguments.size());
2386
2387 for (size_t i = 0; i < arguments.size(); ++i)
2388 data_types[i] = arguments[i].type;
2389
2390 auto monotonicity = getMonotonicityInformation(arguments.front().type, return_type.get());
2391 return std::make_unique<FunctionCast>(name, std::move(monotonicity), data_types, return_type);
2392 }
2393
2394 DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const override
2395 {
2396 const auto type_col = checkAndGetColumnConst<ColumnString>(arguments.back().column.get());
2397 if (!type_col)
2398 throw Exception("Second argument to " + getName() + " must be a constant string describing type",
2399 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
2400
2401 return DataTypeFactory::instance().get(type_col->getValue<String>());
2402 }
2403
2404 bool useDefaultImplementationForNulls() const override { return false; }
2405 bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
2406
2407private:
2408 template <typename DataType>
2409 static auto monotonicityForType(const DataType * const)
2410 {
2411 return FunctionTo<DataType>::Type::Monotonic::get;
2412 }
2413
2414 MonotonicityForRange getMonotonicityInformation(const DataTypePtr & from_type, const IDataType * to_type) const
2415 {
2416 if (const auto type = checkAndGetDataType<DataTypeUInt8>(to_type))
2417 return monotonicityForType(type);
2418 if (const auto type = checkAndGetDataType<DataTypeUInt16>(to_type))
2419 return monotonicityForType(type);
2420 if (const auto type = checkAndGetDataType<DataTypeUInt32>(to_type))
2421 return monotonicityForType(type);
2422 if (const auto type = checkAndGetDataType<DataTypeUInt64>(to_type))
2423 return monotonicityForType(type);
2424 if (const auto type = checkAndGetDataType<DataTypeInt8>(to_type))
2425 return monotonicityForType(type);
2426 if (const auto type = checkAndGetDataType<DataTypeInt16>(to_type))
2427 return monotonicityForType(type);
2428 if (const auto type = checkAndGetDataType<DataTypeInt32>(to_type))
2429 return monotonicityForType(type);
2430 if (const auto type = checkAndGetDataType<DataTypeInt64>(to_type))
2431 return monotonicityForType(type);
2432 if (const auto type = checkAndGetDataType<DataTypeFloat32>(to_type))
2433 return monotonicityForType(type);
2434 if (const auto type = checkAndGetDataType<DataTypeFloat64>(to_type))
2435 return monotonicityForType(type);
2436 if (const auto type = checkAndGetDataType<DataTypeDate>(to_type))
2437 return monotonicityForType(type);
2438 if (const auto type = checkAndGetDataType<DataTypeDateTime>(to_type))
2439 return monotonicityForType(type);
2440 if (const auto type = checkAndGetDataType<DataTypeString>(to_type))
2441 return monotonicityForType(type);
2442 if (isEnum(from_type))
2443 {
2444 if (const auto type = checkAndGetDataType<DataTypeEnum8>(to_type))
2445 return monotonicityForType(type);
2446 if (const auto type = checkAndGetDataType<DataTypeEnum16>(to_type))
2447 return monotonicityForType(type);
2448 }
2449 /// other types like Null, FixedString, Array and Tuple have no monotonicity defined
2450 return {};
2451 }
2452};
2453
2454}
2455