| 1 | #include <Interpreters/convertFieldToType.h> |
| 2 | |
| 3 | #include <IO/ReadBufferFromString.h> |
| 4 | #include <IO/ReadHelpers.h> |
| 5 | |
| 6 | #include <DataTypes/DataTypeArray.h> |
| 7 | #include <DataTypes/DataTypeTuple.h> |
| 8 | #include <DataTypes/DataTypesNumber.h> |
| 9 | #include <DataTypes/DataTypesDecimal.h> |
| 10 | #include <DataTypes/DataTypeString.h> |
| 11 | #include <DataTypes/DataTypeFixedString.h> |
| 12 | #include <DataTypes/DataTypeDate.h> |
| 13 | #include <DataTypes/DataTypeDateTime.h> |
| 14 | #include <DataTypes/DataTypeDateTime64.h> |
| 15 | #include <DataTypes/DataTypeEnum.h> |
| 16 | #include <DataTypes/DataTypeNullable.h> |
| 17 | |
| 18 | #include <Core/AccurateComparison.h> |
| 19 | #include <Common/FieldVisitors.h> |
| 20 | #include <Common/typeid_cast.h> |
| 21 | #include <Common/NaNUtils.h> |
| 22 | #include <DataTypes/DataTypeUUID.h> |
| 23 | #include <DataTypes/DataTypeLowCardinality.h> |
| 24 | |
| 25 | #include <common/DateLUT.h> |
| 26 | #include <DataTypes/DataTypeAggregateFunction.h> |
| 27 | |
| 28 | |
| 29 | namespace DB |
| 30 | { |
| 31 | |
| 32 | namespace ErrorCodes |
| 33 | { |
| 34 | extern const int LOGICAL_ERROR; |
| 35 | extern const int TYPE_MISMATCH; |
| 36 | extern const int TOO_LARGE_STRING_SIZE; |
| 37 | } |
| 38 | |
| 39 | |
| 40 | /** Checking for a `Field from` of `From` type falls to a range of values of type `To`. |
| 41 | * `From` and `To` - numeric types. They can be floating-point types. |
| 42 | * `From` is one of UInt64, Int64, Float64, |
| 43 | * whereas `To` can also be 8, 16, 32 bit. |
| 44 | * |
| 45 | * If falls into a range, then `from` is converted to the `Field` closest to the `To` type. |
| 46 | * If not, return Field(Null). |
| 47 | */ |
| 48 | |
| 49 | namespace |
| 50 | { |
| 51 | |
| 52 | template <typename From, typename To> |
| 53 | static Field convertNumericTypeImpl(const Field & from) |
| 54 | { |
| 55 | To result; |
| 56 | if (!accurate::convertNumeric(from.get<From>(), result)) |
| 57 | return {}; |
| 58 | return result; |
| 59 | } |
| 60 | |
| 61 | template <typename To> |
| 62 | static Field convertNumericType(const Field & from, const IDataType & type) |
| 63 | { |
| 64 | if (from.getType() == Field::Types::UInt64) |
| 65 | return convertNumericTypeImpl<UInt64, To>(from); |
| 66 | if (from.getType() == Field::Types::Int64) |
| 67 | return convertNumericTypeImpl<Int64, To>(from); |
| 68 | if (from.getType() == Field::Types::Float64) |
| 69 | return convertNumericTypeImpl<Float64, To>(from); |
| 70 | |
| 71 | throw Exception("Type mismatch in IN or VALUES section. Expected: " + type.getName() + ". Got: " |
| 72 | + Field::Types::toString(from.getType()), ErrorCodes::TYPE_MISMATCH); |
| 73 | } |
| 74 | |
| 75 | |
| 76 | template <typename From, typename T> |
| 77 | static Field convertIntToDecimalType(const Field & from, const DataTypeDecimal<T> & type) |
| 78 | { |
| 79 | From value = from.get<From>(); |
| 80 | if (!type.canStoreWhole(value)) |
| 81 | throw Exception("Number is too much to place in " + type.getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); |
| 82 | |
| 83 | T scaled_value = type.getScaleMultiplier() * value; |
| 84 | return DecimalField<T>(scaled_value, type.getScale()); |
| 85 | } |
| 86 | |
| 87 | |
| 88 | template <typename T> |
| 89 | static Field convertStringToDecimalType(const Field & from, const DataTypeDecimal<T> & type) |
| 90 | { |
| 91 | const String & str_value = from.get<String>(); |
| 92 | T value = type.parseFromString(str_value); |
| 93 | return DecimalField<T>(value, type.getScale()); |
| 94 | } |
| 95 | |
| 96 | template <typename From, typename T> |
| 97 | static Field convertDecimalToDecimalType(const Field & from, const DataTypeDecimal<T> & type) |
| 98 | { |
| 99 | auto field = from.get<DecimalField<From>>(); |
| 100 | T value = convertDecimals<DataTypeDecimal<From>, DataTypeDecimal<T>>(field.getValue(), field.getScale(), type.getScale()); |
| 101 | return DecimalField<T>(value, type.getScale()); |
| 102 | } |
| 103 | |
| 104 | template <typename To> |
| 105 | static Field convertDecimalType(const Field & from, const To & type) |
| 106 | { |
| 107 | if (from.getType() == Field::Types::UInt64) |
| 108 | return convertIntToDecimalType<UInt64>(from, type); |
| 109 | if (from.getType() == Field::Types::Int64) |
| 110 | return convertIntToDecimalType<Int64>(from, type); |
| 111 | if (from.getType() == Field::Types::String) |
| 112 | return convertStringToDecimalType(from, type); |
| 113 | |
| 114 | if (from.getType() == Field::Types::Decimal32) |
| 115 | return convertDecimalToDecimalType<Decimal32>(from, type); |
| 116 | if (from.getType() == Field::Types::Decimal64) |
| 117 | return convertDecimalToDecimalType<Decimal64>(from, type); |
| 118 | if (from.getType() == Field::Types::Decimal128) |
| 119 | return convertDecimalToDecimalType<Decimal128>(from, type); |
| 120 | |
| 121 | throw Exception("Type mismatch in IN or VALUES section. Expected: " + type.getName() + ". Got: " |
| 122 | + Field::Types::toString(from.getType()), ErrorCodes::TYPE_MISMATCH); |
| 123 | } |
| 124 | |
| 125 | |
| 126 | DayNum stringToDate(const String & s) |
| 127 | { |
| 128 | ReadBufferFromString in(s); |
| 129 | DayNum date{}; |
| 130 | |
| 131 | readDateText(date, in); |
| 132 | if (!in.eof()) |
| 133 | throw Exception("String is too long for Date: " + s, ErrorCodes::TOO_LARGE_STRING_SIZE); |
| 134 | |
| 135 | return date; |
| 136 | } |
| 137 | |
| 138 | UInt64 stringToDateTime(const String & s) |
| 139 | { |
| 140 | ReadBufferFromString in(s); |
| 141 | time_t date_time{}; |
| 142 | |
| 143 | readDateTimeText(date_time, in); |
| 144 | if (!in.eof()) |
| 145 | throw Exception("String is too long for DateTime: " + s, ErrorCodes::TOO_LARGE_STRING_SIZE); |
| 146 | |
| 147 | return UInt64(date_time); |
| 148 | } |
| 149 | |
| 150 | DateTime64::NativeType stringToDateTime64(const String & s, UInt32 scale) |
| 151 | { |
| 152 | ReadBufferFromString in(s); |
| 153 | DateTime64 datetime64 {0}; |
| 154 | |
| 155 | readDateTime64Text(datetime64, scale, in); |
| 156 | if (!in.eof()) |
| 157 | throw Exception("String is too long for DateTime64: " + s, ErrorCodes::TOO_LARGE_STRING_SIZE); |
| 158 | |
| 159 | return datetime64.value; |
| 160 | } |
| 161 | |
| 162 | Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const IDataType * from_type_hint) |
| 163 | { |
| 164 | WhichDataType which_type(type); |
| 165 | WhichDataType which_from_type; |
| 166 | if (from_type_hint) |
| 167 | { |
| 168 | which_from_type = WhichDataType(*from_type_hint); |
| 169 | |
| 170 | // This was added to mitigate converting DateTime64-Field (a typedef to a Decimal64) to DataTypeDate64-compatitable type. |
| 171 | if (from_type_hint && from_type_hint->equals(type)) |
| 172 | { |
| 173 | return src; |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | /// Conversion between Date and DateTime and vice versa. |
| 178 | if (which_type.isDate() && which_from_type.isDateTime()) |
| 179 | { |
| 180 | return static_cast<const DataTypeDateTime &>(*from_type_hint).getTimeZone().toDayNum(src.get<UInt64>()); |
| 181 | } |
| 182 | else if (which_type.isDateTime() && which_from_type.isDate()) |
| 183 | { |
| 184 | return static_cast<const DataTypeDateTime &>(type).getTimeZone().fromDayNum(DayNum(src.get<UInt64>())); |
| 185 | } |
| 186 | else if (type.isValueRepresentedByNumber()) |
| 187 | { |
| 188 | if (which_type.isUInt8()) return convertNumericType<UInt8>(src, type); |
| 189 | if (which_type.isUInt16()) return convertNumericType<UInt16>(src, type); |
| 190 | if (which_type.isUInt32()) return convertNumericType<UInt32>(src, type); |
| 191 | if (which_type.isUInt64()) return convertNumericType<UInt64>(src, type); |
| 192 | if (which_type.isInt8()) return convertNumericType<Int8>(src, type); |
| 193 | if (which_type.isInt16()) return convertNumericType<Int16>(src, type); |
| 194 | if (which_type.isInt32()) return convertNumericType<Int32>(src, type); |
| 195 | if (which_type.isInt64()) return convertNumericType<Int64>(src, type); |
| 196 | if (which_type.isFloat32()) return convertNumericType<Float32>(src, type); |
| 197 | if (which_type.isFloat64()) return convertNumericType<Float64>(src, type); |
| 198 | if (auto * ptype = typeid_cast<const DataTypeDecimal<Decimal32> *>(&type)) return convertDecimalType(src, *ptype); |
| 199 | if (auto * ptype = typeid_cast<const DataTypeDecimal<Decimal64> *>(&type)) return convertDecimalType(src, *ptype); |
| 200 | if (auto * ptype = typeid_cast<const DataTypeDecimal<Decimal128> *>(&type)) return convertDecimalType(src, *ptype); |
| 201 | |
| 202 | if (!which_type.isDateOrDateTime() && !which_type.isUUID() && !which_type.isEnum()) |
| 203 | throw Exception{"Logical error: unknown numeric type " + type.getName(), ErrorCodes::LOGICAL_ERROR}; |
| 204 | |
| 205 | if (which_type.isEnum() && (src.getType() == Field::Types::UInt64 || src.getType() == Field::Types::Int64)) |
| 206 | { |
| 207 | /// Convert UInt64 or Int64 to Enum's value |
| 208 | return dynamic_cast<const IDataTypeEnum &>(type).castToValue(src); |
| 209 | } |
| 210 | |
| 211 | if (which_type.isDateOrDateTime() && !which_type.isDateTime64() && src.getType() == Field::Types::UInt64) |
| 212 | { |
| 213 | /// We don't need any conversion UInt64 is under type of Date and DateTime |
| 214 | return src; |
| 215 | } |
| 216 | // TODO (vnemkov): extra cases for DateTime64: converting from integer, converting from Decimal |
| 217 | |
| 218 | if (src.getType() == Field::Types::String) |
| 219 | { |
| 220 | if (which_type.isDate()) |
| 221 | { |
| 222 | /// Convert 'YYYY-MM-DD' Strings to Date |
| 223 | return stringToDate(src.get<const String &>()); |
| 224 | } |
| 225 | else if (which_type.isDateTime()) |
| 226 | { |
| 227 | /// Convert 'YYYY-MM-DD hh:mm:ss' Strings to DateTime |
| 228 | return stringToDateTime(src.get<const String &>()); |
| 229 | } |
| 230 | else if (which_type.isDateTime64()) |
| 231 | { |
| 232 | const auto date_time64 = typeid_cast<const DataTypeDateTime64 *>(&type); |
| 233 | /// Convert 'YYYY-MM-DD hh:mm:ss.NNNNNNNNN' Strings to DateTime |
| 234 | return stringToDateTime64(src.get<const String &>(), date_time64->getScale()); |
| 235 | } |
| 236 | else if (which_type.isUUID()) |
| 237 | { |
| 238 | return stringToUUID(src.get<const String &>()); |
| 239 | } |
| 240 | else if (which_type.isEnum()) |
| 241 | { |
| 242 | /// Convert String to Enum's value |
| 243 | return dynamic_cast<const IDataTypeEnum &>(type).castToValue(src); |
| 244 | } |
| 245 | } |
| 246 | } |
| 247 | else if (which_type.isStringOrFixedString()) |
| 248 | { |
| 249 | if (src.getType() == Field::Types::String) |
| 250 | return src; |
| 251 | } |
| 252 | else if (const DataTypeArray * type_array = typeid_cast<const DataTypeArray *>(&type)) |
| 253 | { |
| 254 | if (src.getType() == Field::Types::Array) |
| 255 | { |
| 256 | const Array & src_arr = src.get<Array>(); |
| 257 | size_t src_arr_size = src_arr.size(); |
| 258 | |
| 259 | auto & element_type = *(type_array->getNestedType()); |
| 260 | bool have_unconvertible_element = false; |
| 261 | Array res(src_arr_size); |
| 262 | for (size_t i = 0; i < src_arr_size; ++i) |
| 263 | { |
| 264 | res[i] = convertFieldToType(src_arr[i], element_type); |
| 265 | if (res[i].isNull() && !element_type.isNullable()) |
| 266 | { |
| 267 | // See the comment for Tuples below. |
| 268 | have_unconvertible_element = true; |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | return have_unconvertible_element ? Field(Null()) : Field(res); |
| 273 | } |
| 274 | } |
| 275 | else if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(&type)) |
| 276 | { |
| 277 | if (src.getType() == Field::Types::Tuple) |
| 278 | { |
| 279 | const auto & src_tuple = src.get<Tuple>(); |
| 280 | size_t src_tuple_size = src_tuple.size(); |
| 281 | size_t dst_tuple_size = type_tuple->getElements().size(); |
| 282 | |
| 283 | if (dst_tuple_size != src_tuple_size) |
| 284 | throw Exception("Bad size of tuple in IN or VALUES section. Expected size: " |
| 285 | + toString(dst_tuple_size) + ", actual size: " + toString(src_tuple_size), ErrorCodes::TYPE_MISMATCH); |
| 286 | |
| 287 | Tuple res(dst_tuple_size); |
| 288 | bool have_unconvertible_element = false; |
| 289 | for (size_t i = 0; i < dst_tuple_size; ++i) |
| 290 | { |
| 291 | auto & element_type = *(type_tuple->getElements()[i]); |
| 292 | res[i] = convertFieldToType(src_tuple[i], element_type); |
| 293 | if (!res[i].isNull() || element_type.isNullable()) |
| 294 | continue; |
| 295 | |
| 296 | /* |
| 297 | * Either the source element was Null, or the conversion did not |
| 298 | * succeed, because the source and the requested types of the |
| 299 | * element are compatible, but the value is not convertible |
| 300 | * (e.g. trying to convert -1 from Int8 to UInt8). In these |
| 301 | * cases, consider the whole tuple also compatible but not |
| 302 | * convertible. According to the specification of this function, |
| 303 | * we must return Null in this case. |
| 304 | * |
| 305 | * The following elements might be not even compatible, so it |
| 306 | * makes sense to check them to detect user errors. Remember |
| 307 | * that there is an unconvertible element, and try to process |
| 308 | * the remaining ones. The convertFieldToType for each element |
| 309 | * will throw if it detects incompatibility. |
| 310 | */ |
| 311 | have_unconvertible_element = true; |
| 312 | } |
| 313 | |
| 314 | return have_unconvertible_element ? Field(Null()) : Field(res); |
| 315 | } |
| 316 | } |
| 317 | else if (const DataTypeAggregateFunction * agg_func_type = typeid_cast<const DataTypeAggregateFunction *>(&type)) |
| 318 | { |
| 319 | if (src.getType() != Field::Types::AggregateFunctionState) |
| 320 | throw Exception(String("Cannot convert " ) + src.getTypeName() + " to " + agg_func_type->getName(), |
| 321 | ErrorCodes::TYPE_MISMATCH); |
| 322 | |
| 323 | auto & name = src.get<AggregateFunctionStateData>().name; |
| 324 | if (agg_func_type->getName() != name) |
| 325 | throw Exception("Cannot convert " + name + " to " + agg_func_type->getName(), ErrorCodes::TYPE_MISMATCH); |
| 326 | |
| 327 | return src; |
| 328 | } |
| 329 | |
| 330 | if (src.getType() == Field::Types::String) |
| 331 | { |
| 332 | const auto col = type.createColumn(); |
| 333 | ReadBufferFromString buffer(src.get<String>()); |
| 334 | type.deserializeAsTextEscaped(*col, buffer, FormatSettings{}); |
| 335 | |
| 336 | return (*col)[0]; |
| 337 | } |
| 338 | |
| 339 | |
| 340 | // TODO (nemkov): should we attempt to parse value using or `type.deserializeAsTextEscaped()` type.deserializeAsTextEscaped() ? |
| 341 | throw Exception("Type mismatch in IN or VALUES section. Expected: " + type.getName() + ". Got: " |
| 342 | + Field::Types::toString(src.getType()), ErrorCodes::TYPE_MISMATCH); |
| 343 | } |
| 344 | |
| 345 | } |
| 346 | |
| 347 | Field convertFieldToType(const Field & from_value, const IDataType & to_type, const IDataType * from_type_hint) |
| 348 | { |
| 349 | if (from_value.isNull()) |
| 350 | return from_value; |
| 351 | |
| 352 | if (from_type_hint && from_type_hint->equals(to_type)) |
| 353 | return from_value; |
| 354 | |
| 355 | if (auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(&to_type)) |
| 356 | return convertFieldToType(from_value, *low_cardinality_type->getDictionaryType(), from_type_hint); |
| 357 | else if (auto * nullable_type = typeid_cast<const DataTypeNullable *>(&to_type)) |
| 358 | { |
| 359 | const IDataType & nested_type = *nullable_type->getNestedType(); |
| 360 | if (from_type_hint && from_type_hint->equals(nested_type)) |
| 361 | return from_value; |
| 362 | return convertFieldToTypeImpl(from_value, nested_type, from_type_hint); |
| 363 | } |
| 364 | else |
| 365 | return convertFieldToTypeImpl(from_value, to_type, from_type_hint); |
| 366 | } |
| 367 | |
| 368 | |
| 369 | } |
| 370 | |