1 | #include <Interpreters/convertFieldToType.h> |
2 | |
3 | #include <IO/ReadBufferFromString.h> |
4 | #include <IO/ReadHelpers.h> |
5 | |
6 | #include <DataTypes/DataTypeArray.h> |
7 | #include <DataTypes/DataTypeTuple.h> |
8 | #include <DataTypes/DataTypesNumber.h> |
9 | #include <DataTypes/DataTypesDecimal.h> |
10 | #include <DataTypes/DataTypeString.h> |
11 | #include <DataTypes/DataTypeFixedString.h> |
12 | #include <DataTypes/DataTypeDate.h> |
13 | #include <DataTypes/DataTypeDateTime.h> |
14 | #include <DataTypes/DataTypeDateTime64.h> |
15 | #include <DataTypes/DataTypeEnum.h> |
16 | #include <DataTypes/DataTypeNullable.h> |
17 | |
18 | #include <Core/AccurateComparison.h> |
19 | #include <Common/FieldVisitors.h> |
20 | #include <Common/typeid_cast.h> |
21 | #include <Common/NaNUtils.h> |
22 | #include <DataTypes/DataTypeUUID.h> |
23 | #include <DataTypes/DataTypeLowCardinality.h> |
24 | |
25 | #include <common/DateLUT.h> |
26 | #include <DataTypes/DataTypeAggregateFunction.h> |
27 | |
28 | |
29 | namespace DB |
30 | { |
31 | |
32 | namespace ErrorCodes |
33 | { |
34 | extern const int LOGICAL_ERROR; |
35 | extern const int TYPE_MISMATCH; |
36 | extern const int TOO_LARGE_STRING_SIZE; |
37 | } |
38 | |
39 | |
40 | /** Checking for a `Field from` of `From` type falls to a range of values of type `To`. |
41 | * `From` and `To` - numeric types. They can be floating-point types. |
42 | * `From` is one of UInt64, Int64, Float64, |
43 | * whereas `To` can also be 8, 16, 32 bit. |
44 | * |
45 | * If falls into a range, then `from` is converted to the `Field` closest to the `To` type. |
46 | * If not, return Field(Null). |
47 | */ |
48 | |
49 | namespace |
50 | { |
51 | |
52 | template <typename From, typename To> |
53 | static Field convertNumericTypeImpl(const Field & from) |
54 | { |
55 | To result; |
56 | if (!accurate::convertNumeric(from.get<From>(), result)) |
57 | return {}; |
58 | return result; |
59 | } |
60 | |
61 | template <typename To> |
62 | static Field convertNumericType(const Field & from, const IDataType & type) |
63 | { |
64 | if (from.getType() == Field::Types::UInt64) |
65 | return convertNumericTypeImpl<UInt64, To>(from); |
66 | if (from.getType() == Field::Types::Int64) |
67 | return convertNumericTypeImpl<Int64, To>(from); |
68 | if (from.getType() == Field::Types::Float64) |
69 | return convertNumericTypeImpl<Float64, To>(from); |
70 | |
71 | throw Exception("Type mismatch in IN or VALUES section. Expected: " + type.getName() + ". Got: " |
72 | + Field::Types::toString(from.getType()), ErrorCodes::TYPE_MISMATCH); |
73 | } |
74 | |
75 | |
76 | template <typename From, typename T> |
77 | static Field convertIntToDecimalType(const Field & from, const DataTypeDecimal<T> & type) |
78 | { |
79 | From value = from.get<From>(); |
80 | if (!type.canStoreWhole(value)) |
81 | throw Exception("Number is too much to place in " + type.getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); |
82 | |
83 | T scaled_value = type.getScaleMultiplier() * value; |
84 | return DecimalField<T>(scaled_value, type.getScale()); |
85 | } |
86 | |
87 | |
88 | template <typename T> |
89 | static Field convertStringToDecimalType(const Field & from, const DataTypeDecimal<T> & type) |
90 | { |
91 | const String & str_value = from.get<String>(); |
92 | T value = type.parseFromString(str_value); |
93 | return DecimalField<T>(value, type.getScale()); |
94 | } |
95 | |
96 | template <typename From, typename T> |
97 | static Field convertDecimalToDecimalType(const Field & from, const DataTypeDecimal<T> & type) |
98 | { |
99 | auto field = from.get<DecimalField<From>>(); |
100 | T value = convertDecimals<DataTypeDecimal<From>, DataTypeDecimal<T>>(field.getValue(), field.getScale(), type.getScale()); |
101 | return DecimalField<T>(value, type.getScale()); |
102 | } |
103 | |
104 | template <typename To> |
105 | static Field convertDecimalType(const Field & from, const To & type) |
106 | { |
107 | if (from.getType() == Field::Types::UInt64) |
108 | return convertIntToDecimalType<UInt64>(from, type); |
109 | if (from.getType() == Field::Types::Int64) |
110 | return convertIntToDecimalType<Int64>(from, type); |
111 | if (from.getType() == Field::Types::String) |
112 | return convertStringToDecimalType(from, type); |
113 | |
114 | if (from.getType() == Field::Types::Decimal32) |
115 | return convertDecimalToDecimalType<Decimal32>(from, type); |
116 | if (from.getType() == Field::Types::Decimal64) |
117 | return convertDecimalToDecimalType<Decimal64>(from, type); |
118 | if (from.getType() == Field::Types::Decimal128) |
119 | return convertDecimalToDecimalType<Decimal128>(from, type); |
120 | |
121 | throw Exception("Type mismatch in IN or VALUES section. Expected: " + type.getName() + ". Got: " |
122 | + Field::Types::toString(from.getType()), ErrorCodes::TYPE_MISMATCH); |
123 | } |
124 | |
125 | |
126 | DayNum stringToDate(const String & s) |
127 | { |
128 | ReadBufferFromString in(s); |
129 | DayNum date{}; |
130 | |
131 | readDateText(date, in); |
132 | if (!in.eof()) |
133 | throw Exception("String is too long for Date: " + s, ErrorCodes::TOO_LARGE_STRING_SIZE); |
134 | |
135 | return date; |
136 | } |
137 | |
138 | UInt64 stringToDateTime(const String & s) |
139 | { |
140 | ReadBufferFromString in(s); |
141 | time_t date_time{}; |
142 | |
143 | readDateTimeText(date_time, in); |
144 | if (!in.eof()) |
145 | throw Exception("String is too long for DateTime: " + s, ErrorCodes::TOO_LARGE_STRING_SIZE); |
146 | |
147 | return UInt64(date_time); |
148 | } |
149 | |
150 | DateTime64::NativeType stringToDateTime64(const String & s, UInt32 scale) |
151 | { |
152 | ReadBufferFromString in(s); |
153 | DateTime64 datetime64 {0}; |
154 | |
155 | readDateTime64Text(datetime64, scale, in); |
156 | if (!in.eof()) |
157 | throw Exception("String is too long for DateTime64: " + s, ErrorCodes::TOO_LARGE_STRING_SIZE); |
158 | |
159 | return datetime64.value; |
160 | } |
161 | |
162 | Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const IDataType * from_type_hint) |
163 | { |
164 | WhichDataType which_type(type); |
165 | WhichDataType which_from_type; |
166 | if (from_type_hint) |
167 | { |
168 | which_from_type = WhichDataType(*from_type_hint); |
169 | |
170 | // This was added to mitigate converting DateTime64-Field (a typedef to a Decimal64) to DataTypeDate64-compatitable type. |
171 | if (from_type_hint && from_type_hint->equals(type)) |
172 | { |
173 | return src; |
174 | } |
175 | } |
176 | |
177 | /// Conversion between Date and DateTime and vice versa. |
178 | if (which_type.isDate() && which_from_type.isDateTime()) |
179 | { |
180 | return static_cast<const DataTypeDateTime &>(*from_type_hint).getTimeZone().toDayNum(src.get<UInt64>()); |
181 | } |
182 | else if (which_type.isDateTime() && which_from_type.isDate()) |
183 | { |
184 | return static_cast<const DataTypeDateTime &>(type).getTimeZone().fromDayNum(DayNum(src.get<UInt64>())); |
185 | } |
186 | else if (type.isValueRepresentedByNumber()) |
187 | { |
188 | if (which_type.isUInt8()) return convertNumericType<UInt8>(src, type); |
189 | if (which_type.isUInt16()) return convertNumericType<UInt16>(src, type); |
190 | if (which_type.isUInt32()) return convertNumericType<UInt32>(src, type); |
191 | if (which_type.isUInt64()) return convertNumericType<UInt64>(src, type); |
192 | if (which_type.isInt8()) return convertNumericType<Int8>(src, type); |
193 | if (which_type.isInt16()) return convertNumericType<Int16>(src, type); |
194 | if (which_type.isInt32()) return convertNumericType<Int32>(src, type); |
195 | if (which_type.isInt64()) return convertNumericType<Int64>(src, type); |
196 | if (which_type.isFloat32()) return convertNumericType<Float32>(src, type); |
197 | if (which_type.isFloat64()) return convertNumericType<Float64>(src, type); |
198 | if (auto * ptype = typeid_cast<const DataTypeDecimal<Decimal32> *>(&type)) return convertDecimalType(src, *ptype); |
199 | if (auto * ptype = typeid_cast<const DataTypeDecimal<Decimal64> *>(&type)) return convertDecimalType(src, *ptype); |
200 | if (auto * ptype = typeid_cast<const DataTypeDecimal<Decimal128> *>(&type)) return convertDecimalType(src, *ptype); |
201 | |
202 | if (!which_type.isDateOrDateTime() && !which_type.isUUID() && !which_type.isEnum()) |
203 | throw Exception{"Logical error: unknown numeric type " + type.getName(), ErrorCodes::LOGICAL_ERROR}; |
204 | |
205 | if (which_type.isEnum() && (src.getType() == Field::Types::UInt64 || src.getType() == Field::Types::Int64)) |
206 | { |
207 | /// Convert UInt64 or Int64 to Enum's value |
208 | return dynamic_cast<const IDataTypeEnum &>(type).castToValue(src); |
209 | } |
210 | |
211 | if (which_type.isDateOrDateTime() && !which_type.isDateTime64() && src.getType() == Field::Types::UInt64) |
212 | { |
213 | /// We don't need any conversion UInt64 is under type of Date and DateTime |
214 | return src; |
215 | } |
216 | // TODO (vnemkov): extra cases for DateTime64: converting from integer, converting from Decimal |
217 | |
218 | if (src.getType() == Field::Types::String) |
219 | { |
220 | if (which_type.isDate()) |
221 | { |
222 | /// Convert 'YYYY-MM-DD' Strings to Date |
223 | return stringToDate(src.get<const String &>()); |
224 | } |
225 | else if (which_type.isDateTime()) |
226 | { |
227 | /// Convert 'YYYY-MM-DD hh:mm:ss' Strings to DateTime |
228 | return stringToDateTime(src.get<const String &>()); |
229 | } |
230 | else if (which_type.isDateTime64()) |
231 | { |
232 | const auto date_time64 = typeid_cast<const DataTypeDateTime64 *>(&type); |
233 | /// Convert 'YYYY-MM-DD hh:mm:ss.NNNNNNNNN' Strings to DateTime |
234 | return stringToDateTime64(src.get<const String &>(), date_time64->getScale()); |
235 | } |
236 | else if (which_type.isUUID()) |
237 | { |
238 | return stringToUUID(src.get<const String &>()); |
239 | } |
240 | else if (which_type.isEnum()) |
241 | { |
242 | /// Convert String to Enum's value |
243 | return dynamic_cast<const IDataTypeEnum &>(type).castToValue(src); |
244 | } |
245 | } |
246 | } |
247 | else if (which_type.isStringOrFixedString()) |
248 | { |
249 | if (src.getType() == Field::Types::String) |
250 | return src; |
251 | } |
252 | else if (const DataTypeArray * type_array = typeid_cast<const DataTypeArray *>(&type)) |
253 | { |
254 | if (src.getType() == Field::Types::Array) |
255 | { |
256 | const Array & src_arr = src.get<Array>(); |
257 | size_t src_arr_size = src_arr.size(); |
258 | |
259 | auto & element_type = *(type_array->getNestedType()); |
260 | bool have_unconvertible_element = false; |
261 | Array res(src_arr_size); |
262 | for (size_t i = 0; i < src_arr_size; ++i) |
263 | { |
264 | res[i] = convertFieldToType(src_arr[i], element_type); |
265 | if (res[i].isNull() && !element_type.isNullable()) |
266 | { |
267 | // See the comment for Tuples below. |
268 | have_unconvertible_element = true; |
269 | } |
270 | } |
271 | |
272 | return have_unconvertible_element ? Field(Null()) : Field(res); |
273 | } |
274 | } |
275 | else if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(&type)) |
276 | { |
277 | if (src.getType() == Field::Types::Tuple) |
278 | { |
279 | const auto & src_tuple = src.get<Tuple>(); |
280 | size_t src_tuple_size = src_tuple.size(); |
281 | size_t dst_tuple_size = type_tuple->getElements().size(); |
282 | |
283 | if (dst_tuple_size != src_tuple_size) |
284 | throw Exception("Bad size of tuple in IN or VALUES section. Expected size: " |
285 | + toString(dst_tuple_size) + ", actual size: " + toString(src_tuple_size), ErrorCodes::TYPE_MISMATCH); |
286 | |
287 | Tuple res(dst_tuple_size); |
288 | bool have_unconvertible_element = false; |
289 | for (size_t i = 0; i < dst_tuple_size; ++i) |
290 | { |
291 | auto & element_type = *(type_tuple->getElements()[i]); |
292 | res[i] = convertFieldToType(src_tuple[i], element_type); |
293 | if (!res[i].isNull() || element_type.isNullable()) |
294 | continue; |
295 | |
296 | /* |
297 | * Either the source element was Null, or the conversion did not |
298 | * succeed, because the source and the requested types of the |
299 | * element are compatible, but the value is not convertible |
300 | * (e.g. trying to convert -1 from Int8 to UInt8). In these |
301 | * cases, consider the whole tuple also compatible but not |
302 | * convertible. According to the specification of this function, |
303 | * we must return Null in this case. |
304 | * |
305 | * The following elements might be not even compatible, so it |
306 | * makes sense to check them to detect user errors. Remember |
307 | * that there is an unconvertible element, and try to process |
308 | * the remaining ones. The convertFieldToType for each element |
309 | * will throw if it detects incompatibility. |
310 | */ |
311 | have_unconvertible_element = true; |
312 | } |
313 | |
314 | return have_unconvertible_element ? Field(Null()) : Field(res); |
315 | } |
316 | } |
317 | else if (const DataTypeAggregateFunction * agg_func_type = typeid_cast<const DataTypeAggregateFunction *>(&type)) |
318 | { |
319 | if (src.getType() != Field::Types::AggregateFunctionState) |
320 | throw Exception(String("Cannot convert " ) + src.getTypeName() + " to " + agg_func_type->getName(), |
321 | ErrorCodes::TYPE_MISMATCH); |
322 | |
323 | auto & name = src.get<AggregateFunctionStateData>().name; |
324 | if (agg_func_type->getName() != name) |
325 | throw Exception("Cannot convert " + name + " to " + agg_func_type->getName(), ErrorCodes::TYPE_MISMATCH); |
326 | |
327 | return src; |
328 | } |
329 | |
330 | if (src.getType() == Field::Types::String) |
331 | { |
332 | const auto col = type.createColumn(); |
333 | ReadBufferFromString buffer(src.get<String>()); |
334 | type.deserializeAsTextEscaped(*col, buffer, FormatSettings{}); |
335 | |
336 | return (*col)[0]; |
337 | } |
338 | |
339 | |
340 | // TODO (nemkov): should we attempt to parse value using or `type.deserializeAsTextEscaped()` type.deserializeAsTextEscaped() ? |
341 | throw Exception("Type mismatch in IN or VALUES section. Expected: " + type.getName() + ". Got: " |
342 | + Field::Types::toString(src.getType()), ErrorCodes::TYPE_MISMATCH); |
343 | } |
344 | |
345 | } |
346 | |
347 | Field convertFieldToType(const Field & from_value, const IDataType & to_type, const IDataType * from_type_hint) |
348 | { |
349 | if (from_value.isNull()) |
350 | return from_value; |
351 | |
352 | if (from_type_hint && from_type_hint->equals(to_type)) |
353 | return from_value; |
354 | |
355 | if (auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(&to_type)) |
356 | return convertFieldToType(from_value, *low_cardinality_type->getDictionaryType(), from_type_hint); |
357 | else if (auto * nullable_type = typeid_cast<const DataTypeNullable *>(&to_type)) |
358 | { |
359 | const IDataType & nested_type = *nullable_type->getNestedType(); |
360 | if (from_type_hint && from_type_hint->equals(nested_type)) |
361 | return from_value; |
362 | return convertFieldToTypeImpl(from_value, nested_type, from_type_hint); |
363 | } |
364 | else |
365 | return convertFieldToTypeImpl(from_value, to_type, from_type_hint); |
366 | } |
367 | |
368 | |
369 | } |
370 | |