1 | #include "duckdb/common/operator/cast_operators.hpp" |
2 | |
3 | #include "duckdb/common/exception.hpp" |
4 | #include "duckdb/common/limits.hpp" |
5 | #include "duckdb/common/types/date.hpp" |
6 | #include "duckdb/common/types/time.hpp" |
7 | #include "duckdb/common/types/timestamp.hpp" |
8 | #include "duckdb/common/types/vector.hpp" |
9 | #include "fmt/format.h" |
10 | |
11 | #include <cctype> |
12 | #include <cmath> |
13 | #include <cstdlib> |
14 | |
15 | using namespace std; |
16 | |
17 | namespace duckdb { |
18 | |
19 | template <class SRC, class DST> static bool try_cast_with_overflow_check(SRC value, DST &result) { |
20 | if (value < MinimumValue<DST>() || value > MaximumValue<DST>()) { |
21 | return false; |
22 | } |
23 | result = (DST)value; |
24 | return true; |
25 | } |
26 | |
27 | template <class SRC, class DST> static DST cast_with_overflow_check(SRC value) { |
28 | DST result; |
29 | if (!try_cast_with_overflow_check<SRC, DST>(value, result)) { |
30 | throw ValueOutOfRangeException((int64_t)value, GetTypeId<SRC>(), GetTypeId<DST>()); |
31 | } |
32 | return result; |
33 | } |
34 | |
35 | //===--------------------------------------------------------------------===// |
36 | // Numeric -> int8_t casts |
37 | //===--------------------------------------------------------------------===// |
38 | template <> bool TryCast::Operation(int16_t input, int8_t &result, bool strict) { |
39 | return try_cast_with_overflow_check(input, result); |
40 | } |
41 | template <> bool TryCast::Operation(int32_t input, int8_t &result, bool strict) { |
42 | return try_cast_with_overflow_check(input, result); |
43 | } |
44 | template <> bool TryCast::Operation(int64_t input, int8_t &result, bool strict) { |
45 | return try_cast_with_overflow_check(input, result); |
46 | } |
47 | template <> bool TryCast::Operation(float input, int8_t &result, bool strict) { |
48 | return try_cast_with_overflow_check(input, result); |
49 | } |
50 | template <> bool TryCast::Operation(double input, int8_t &result, bool strict) { |
51 | return try_cast_with_overflow_check(input, result); |
52 | } |
53 | |
54 | template <> int8_t Cast::Operation(int16_t input) { |
55 | return cast_with_overflow_check<int16_t, int8_t>(input); |
56 | } |
57 | template <> int8_t Cast::Operation(int32_t input) { |
58 | return cast_with_overflow_check<int32_t, int8_t>(input); |
59 | } |
60 | template <> int8_t Cast::Operation(int64_t input) { |
61 | return cast_with_overflow_check<int64_t, int8_t>(input); |
62 | } |
63 | template <> int8_t Cast::Operation(float input) { |
64 | return cast_with_overflow_check<float, int8_t>(input); |
65 | } |
66 | template <> int8_t Cast::Operation(double input) { |
67 | return cast_with_overflow_check<double, int8_t>(input); |
68 | } |
69 | //===--------------------------------------------------------------------===// |
70 | // Numeric -> int16_t casts |
71 | //===--------------------------------------------------------------------===// |
72 | template <> bool TryCast::Operation(int32_t input, int16_t &result, bool strict) { |
73 | return try_cast_with_overflow_check(input, result); |
74 | } |
75 | template <> bool TryCast::Operation(int64_t input, int16_t &result, bool strict) { |
76 | return try_cast_with_overflow_check(input, result); |
77 | } |
78 | template <> bool TryCast::Operation(float input, int16_t &result, bool strict) { |
79 | return try_cast_with_overflow_check(input, result); |
80 | } |
81 | template <> bool TryCast::Operation(double input, int16_t &result, bool strict) { |
82 | return try_cast_with_overflow_check(input, result); |
83 | } |
84 | |
85 | template <> int16_t Cast::Operation(int32_t input) { |
86 | return cast_with_overflow_check<int32_t, int16_t>(input); |
87 | } |
88 | template <> int16_t Cast::Operation(int64_t input) { |
89 | return cast_with_overflow_check<int64_t, int16_t>(input); |
90 | } |
91 | template <> int16_t Cast::Operation(float input) { |
92 | return cast_with_overflow_check<float, int16_t>(input); |
93 | } |
94 | template <> int16_t Cast::Operation(double input) { |
95 | return cast_with_overflow_check<double, int16_t>(input); |
96 | } |
97 | //===--------------------------------------------------------------------===// |
98 | // Numeric -> int32_t casts |
99 | //===--------------------------------------------------------------------===// |
100 | template <> bool TryCast::Operation(int64_t input, int32_t &result, bool strict) { |
101 | return try_cast_with_overflow_check(input, result); |
102 | } |
103 | template <> bool TryCast::Operation(float input, int32_t &result, bool strict) { |
104 | return try_cast_with_overflow_check(input, result); |
105 | } |
106 | template <> bool TryCast::Operation(double input, int32_t &result, bool strict) { |
107 | return try_cast_with_overflow_check(input, result); |
108 | } |
109 | |
110 | template <> int32_t Cast::Operation(int64_t input) { |
111 | return cast_with_overflow_check<int64_t, int32_t>(input); |
112 | } |
113 | template <> int32_t Cast::Operation(float input) { |
114 | return cast_with_overflow_check<float, int32_t>(input); |
115 | } |
116 | template <> int32_t Cast::Operation(double input) { |
117 | return cast_with_overflow_check<double, int32_t>(input); |
118 | } |
119 | //===--------------------------------------------------------------------===// |
120 | // Numeric -> int64_t casts |
121 | //===--------------------------------------------------------------------===// |
122 | template <> bool TryCast::Operation(float input, int64_t &result, bool strict) { |
123 | return try_cast_with_overflow_check(input, result); |
124 | } |
125 | template <> bool TryCast::Operation(double input, int64_t &result, bool strict) { |
126 | return try_cast_with_overflow_check(input, result); |
127 | } |
128 | |
129 | template <> int64_t Cast::Operation(float input) { |
130 | return cast_with_overflow_check<float, int64_t>(input); |
131 | } |
132 | template <> int64_t Cast::Operation(double input) { |
133 | return cast_with_overflow_check<double, int64_t>(input); |
134 | } |
135 | |
136 | //===--------------------------------------------------------------------===// |
137 | // Double -> float casts |
138 | //===--------------------------------------------------------------------===// |
139 | template <> bool TryCast::Operation(double input, float &result, bool strict) { |
140 | auto res = (float)input; |
141 | if (std::isnan(res) || std::isinf(res)) { |
142 | return false; |
143 | } |
144 | result = res; |
145 | return true; |
146 | } |
147 | |
148 | template <> float Cast::Operation(double input) { |
149 | float result; |
150 | bool strict = false; |
151 | if (!TryCast::Operation(input, result, strict)) { |
152 | throw ValueOutOfRangeException(input, GetTypeId<double>(), GetTypeId<float>()); |
153 | } |
154 | return result; |
155 | } |
156 | |
157 | //===--------------------------------------------------------------------===// |
158 | // Cast String -> Numeric |
159 | //===--------------------------------------------------------------------===// |
160 | template <class T> static T try_cast_string(string_t input) { |
161 | T result; |
162 | if (!TryCast::Operation<string_t, T>(input, result)) { |
163 | throw ConversionException("Could not convert string '%s' to numeric" , input.GetData()); |
164 | } |
165 | return result; |
166 | } |
167 | |
168 | template <class T> static T try_strict_cast_string(string_t input) { |
169 | T result; |
170 | if (!TryCast::Operation<string_t, T>(input, result, true)) { |
171 | throw ConversionException("Could not convert string '%s' to numeric" , input.GetData()); |
172 | } |
173 | return result; |
174 | } |
175 | |
176 | template <class T, bool NEGATIVE, bool ALLOW_EXPONENT> |
177 | static bool IntegerCastLoop(const char *buf, T &result, bool strict) { |
178 | idx_t pos = NEGATIVE ? 1 : 0; |
179 | while (buf[pos]) { |
180 | if (!std::isdigit((unsigned char)buf[pos])) { |
181 | // not a digit! |
182 | if (buf[pos] == '.') { |
183 | if (strict) { |
184 | return false; |
185 | } |
186 | // decimal point: we accept decimal values for integers as well |
187 | // we just truncate them |
188 | // make sure everything after the period is a number |
189 | pos++; |
190 | while (buf[pos]) { |
191 | if (!std::isdigit((unsigned char)buf[pos++])) { |
192 | return false; |
193 | } |
194 | } |
195 | return true; |
196 | } |
197 | if (std::isspace((unsigned char)buf[pos])) { |
198 | // skip any trailing spaces |
199 | while (buf[++pos]) { |
200 | if (!std::isspace((unsigned char)buf[pos])) { |
201 | return false; |
202 | } |
203 | } |
204 | return true; |
205 | } |
206 | if (ALLOW_EXPONENT) { |
207 | if (buf[pos] == 'e' || buf[pos] == 'E') { |
208 | pos++; |
209 | int64_t exponent = 0; |
210 | int negative = buf[pos] == '-'; |
211 | if (negative) { |
212 | if (!IntegerCastLoop<int64_t, true, false>(buf + pos, exponent, strict)) { |
213 | return false; |
214 | } |
215 | } else { |
216 | if (!IntegerCastLoop<int64_t, false, false>(buf + pos, exponent, strict)) { |
217 | return false; |
218 | } |
219 | } |
220 | double dbl_res = result * pow(10, exponent); |
221 | if (dbl_res < MinimumValue<T>() || dbl_res > MaximumValue<T>()) { |
222 | return false; |
223 | } |
224 | result = (T)dbl_res; |
225 | return true; |
226 | } |
227 | } |
228 | return false; |
229 | } |
230 | T digit = buf[pos++] - '0'; |
231 | if (NEGATIVE) { |
232 | if (result < (MinimumValue<T>() + digit) / 10) { |
233 | return false; |
234 | } |
235 | result = result * 10 - digit; |
236 | } else { |
237 | if (result > (MaximumValue<T>() - digit) / 10) { |
238 | return false; |
239 | } |
240 | result = result * 10 + digit; |
241 | } |
242 | } |
243 | return pos > (NEGATIVE ? 1 : 0); |
244 | } |
245 | |
246 | template <class T, bool ALLOW_EXPONENT = true> static bool TryIntegerCast(const char *buf, T &result, bool strict) { |
247 | if (!*buf) { |
248 | return false; |
249 | } |
250 | // skip any spaces at the start |
251 | while (std::isspace((unsigned char)*buf)) { |
252 | buf++; |
253 | } |
254 | int negative = *buf == '-'; |
255 | |
256 | result = 0; |
257 | if (!negative) { |
258 | return IntegerCastLoop<T, false, ALLOW_EXPONENT>(buf, result, strict); |
259 | } else { |
260 | return IntegerCastLoop<T, true, ALLOW_EXPONENT>(buf, result, strict); |
261 | } |
262 | } |
263 | |
264 | template <> bool TryCast::Operation(string_t input, bool &result, bool strict) { |
265 | auto input_data = input.GetData(); |
266 | // TODO: add support for '0' and '1' as boolean |
267 | if (strict) { |
268 | if (strcmp(input_data, "true" ) == 0 || strcmp(input_data, "True" ) == 0 || strcmp(input_data, "TRUE" ) == 0) { |
269 | result = true; |
270 | } else if (strcmp(input_data, "false" ) == 0 || strcmp(input_data, "False" ) == 0 || |
271 | strcmp(input_data, "FALSE" ) == 0) { |
272 | result = false; |
273 | } else { |
274 | return false; |
275 | } |
276 | } else { |
277 | if (input_data[0] == 't' || input_data[0] == 'T') { |
278 | result = true; |
279 | } else if (input_data[0] == 'f' || input_data[0] == 'F') { |
280 | result = false; |
281 | } else { |
282 | return false; |
283 | } |
284 | } |
285 | |
286 | return true; |
287 | } |
288 | template <> bool TryCast::Operation(string_t input, int8_t &result, bool strict) { |
289 | return TryIntegerCast<int8_t>(input.GetData(), result, strict); |
290 | } |
291 | template <> bool TryCast::Operation(string_t input, int16_t &result, bool strict) { |
292 | return TryIntegerCast<int16_t>(input.GetData(), result, strict); |
293 | } |
294 | template <> bool TryCast::Operation(string_t input, int32_t &result, bool strict) { |
295 | return TryIntegerCast<int32_t>(input.GetData(), result, strict); |
296 | } |
297 | template <> bool TryCast::Operation(string_t input, int64_t &result, bool strict) { |
298 | return TryIntegerCast<int64_t>(input.GetData(), result, strict); |
299 | } |
300 | |
301 | template <class T, bool NEGATIVE> static void ComputeDoubleResult(T &result, idx_t decimal, idx_t decimal_factor) { |
302 | if (decimal_factor > 1) { |
303 | if (NEGATIVE) { |
304 | result -= (T)decimal / (T)decimal_factor; |
305 | } else { |
306 | result += (T)decimal / (T)decimal_factor; |
307 | } |
308 | } |
309 | } |
310 | |
311 | template <class T, bool NEGATIVE> static bool DoubleCastLoop(const char *buf, T &result, bool strict) { |
312 | idx_t pos = NEGATIVE ? 1 : 0; |
313 | idx_t decimal = 0; |
314 | idx_t decimal_factor = 0; |
315 | while (buf[pos]) { |
316 | if (!std::isdigit((unsigned char)buf[pos])) { |
317 | // not a digit! |
318 | if (buf[pos] == '.') { |
319 | // decimal point |
320 | if (decimal_factor != 0) { |
321 | // nested periods |
322 | return false; |
323 | } |
324 | decimal_factor = 1; |
325 | pos++; |
326 | continue; |
327 | } else if (std::isspace((unsigned char)buf[pos])) { |
328 | // skip any trailing spaces |
329 | while (buf[++pos]) { |
330 | if (!std::isspace((unsigned char)buf[pos])) { |
331 | return false; |
332 | } |
333 | } |
334 | ComputeDoubleResult<T, NEGATIVE>(result, decimal, decimal_factor); |
335 | return true; |
336 | } else if (buf[pos] == 'e' || buf[pos] == 'E') { |
337 | // E power |
338 | // parse an integer, this time not allowing another exponent |
339 | pos++; |
340 | int64_t exponent; |
341 | if (!TryIntegerCast<int64_t, false>(buf + pos, exponent, strict)) { |
342 | return false; |
343 | } |
344 | ComputeDoubleResult<T, NEGATIVE>(result, decimal, decimal_factor); |
345 | result = result * pow(10, exponent); |
346 | return true; |
347 | } else { |
348 | return false; |
349 | } |
350 | } |
351 | T digit = buf[pos++] - '0'; |
352 | if (decimal_factor == 0) { |
353 | result = result * 10 + (NEGATIVE ? -digit : digit); |
354 | } else { |
355 | if (decimal_factor >= 1000000000000000000) { |
356 | // decimal value will overflow if we parse more, ignore any subsequent numbers |
357 | continue; |
358 | } |
359 | decimal = decimal * 10 + digit; |
360 | decimal_factor *= 10; |
361 | } |
362 | } |
363 | ComputeDoubleResult<T, NEGATIVE>(result, decimal, decimal_factor); |
364 | return pos > (NEGATIVE ? 1 : 0); |
365 | } |
366 | |
367 | template <class T> bool CheckDoubleValidity(T value); |
368 | |
369 | template <> bool CheckDoubleValidity(float value) { |
370 | return Value::FloatIsValid(value); |
371 | } |
372 | |
373 | template <> bool CheckDoubleValidity(double value) { |
374 | return Value::DoubleIsValid(value); |
375 | } |
376 | |
377 | template <class T> static bool TryDoubleCast(const char *buf, T &result, bool strict) { |
378 | if (!*buf) { |
379 | return false; |
380 | } |
381 | // skip any spaces at the start |
382 | while (std::isspace((unsigned char)*buf)) { |
383 | buf++; |
384 | } |
385 | int negative = *buf == '-'; |
386 | |
387 | result = 0; |
388 | if (!negative) { |
389 | if (!DoubleCastLoop<T, false>(buf, result, strict)) { |
390 | return false; |
391 | } |
392 | } else { |
393 | if (!DoubleCastLoop<T, true>(buf, result, strict)) { |
394 | return false; |
395 | } |
396 | } |
397 | if (!CheckDoubleValidity<T>(result)) { |
398 | return false; |
399 | } |
400 | return true; |
401 | } |
402 | |
403 | template <> bool TryCast::Operation(string_t input, float &result, bool strict) { |
404 | return TryDoubleCast<float>(input.GetData(), result, strict); |
405 | } |
406 | template <> bool TryCast::Operation(string_t input, double &result, bool strict) { |
407 | return TryDoubleCast<double>(input.GetData(), result, strict); |
408 | } |
409 | |
410 | template <> bool Cast::Operation(string_t input) { |
411 | return try_cast_string<bool>(input); |
412 | } |
413 | template <> int8_t Cast::Operation(string_t input) { |
414 | return try_cast_string<int8_t>(input); |
415 | } |
416 | template <> int16_t Cast::Operation(string_t input) { |
417 | return try_cast_string<int16_t>(input); |
418 | } |
419 | template <> int32_t Cast::Operation(string_t input) { |
420 | return try_cast_string<int32_t>(input); |
421 | } |
422 | template <> int64_t Cast::Operation(string_t input) { |
423 | return try_cast_string<int64_t>(input); |
424 | } |
425 | template <> float Cast::Operation(string_t input) { |
426 | return try_cast_string<float>(input); |
427 | } |
428 | template <> double Cast::Operation(string_t input) { |
429 | return try_cast_string<double>(input); |
430 | } |
431 | |
432 | template <> bool StrictCast::Operation(string_t input) { |
433 | return try_strict_cast_string<bool>(input); |
434 | } |
435 | template <> int8_t StrictCast::Operation(string_t input) { |
436 | return try_strict_cast_string<int8_t>(input); |
437 | } |
438 | template <> int16_t StrictCast::Operation(string_t input) { |
439 | return try_strict_cast_string<int16_t>(input); |
440 | } |
441 | template <> int32_t StrictCast::Operation(string_t input) { |
442 | return try_strict_cast_string<int32_t>(input); |
443 | } |
444 | template <> int64_t StrictCast::Operation(string_t input) { |
445 | return try_strict_cast_string<int64_t>(input); |
446 | } |
447 | template <> float StrictCast::Operation(string_t input) { |
448 | return try_strict_cast_string<float>(input); |
449 | } |
450 | template <> double StrictCast::Operation(string_t input) { |
451 | return try_strict_cast_string<double>(input); |
452 | } |
453 | |
454 | //===--------------------------------------------------------------------===// |
455 | // Cast Numeric -> String |
456 | //===--------------------------------------------------------------------===// |
457 | template <class T> string CastToStandardString(T input) { |
458 | Vector v(TypeId::VARCHAR); |
459 | return StringCast::Operation(input, v).GetString(); |
460 | } |
461 | |
462 | template <> string Cast::Operation(bool input) { |
463 | return CastToStandardString(input); |
464 | } |
465 | template <> string Cast::Operation(int8_t input) { |
466 | return CastToStandardString(input); |
467 | } |
468 | template <> string Cast::Operation(int16_t input) { |
469 | return CastToStandardString(input); |
470 | } |
471 | template <> string Cast::Operation(int32_t input) { |
472 | return CastToStandardString(input); |
473 | } |
474 | template <> string Cast::Operation(int64_t input) { |
475 | return CastToStandardString(input); |
476 | } |
477 | template <> string Cast::Operation(float input) { |
478 | return CastToStandardString(input); |
479 | } |
480 | template <> string Cast::Operation(double input) { |
481 | return CastToStandardString(input); |
482 | } |
483 | template <> string Cast::Operation(string_t input) { |
484 | return input.GetString(); |
485 | } |
486 | |
487 | template <> string_t StringCast::Operation(bool input, Vector &vector) { |
488 | if (input) { |
489 | return StringVector::AddString(vector, "true" , 4); |
490 | } else { |
491 | return StringVector::AddString(vector, "false" , 5); |
492 | } |
493 | } |
494 | |
495 | struct StringToIntegerCast { |
496 | template <class T> static int UnsignedLength(T value); |
497 | |
498 | // Formats value in reverse and returns a pointer to the beginning. |
499 | template <class T> static char *FormatUnsigned(T value, char *ptr) { |
500 | while (value >= 100) { |
501 | // Integer division is slow so do it for a group of two digits instead |
502 | // of for every digit. The idea comes from the talk by Alexandrescu |
503 | // "Three Optimization Tips for C++". See speed-test for a comparison. |
504 | auto index = static_cast<unsigned>((value % 100) * 2); |
505 | value /= 100; |
506 | *--ptr = fmt::internal::data::digits[index + 1]; |
507 | *--ptr = fmt::internal::data::digits[index]; |
508 | } |
509 | if (value < 10) { |
510 | *--ptr = static_cast<char>('0' + value); |
511 | return ptr; |
512 | } |
513 | auto index = static_cast<unsigned>(value * 2); |
514 | *--ptr = fmt::internal::data::digits[index + 1]; |
515 | *--ptr = fmt::internal::data::digits[index]; |
516 | return ptr; |
517 | } |
518 | |
519 | template <class SIGNED, class UNSIGNED> static string_t FormatSigned(SIGNED value, Vector &vector) { |
520 | int sign = -(value < 0); |
521 | UNSIGNED unsigned_value = (value ^ sign) - sign; |
522 | int length = UnsignedLength<UNSIGNED>(unsigned_value) - sign; |
523 | string_t result = StringVector::EmptyString(vector, length); |
524 | auto dataptr = result.GetData(); |
525 | auto endptr = dataptr + length; |
526 | endptr = FormatUnsigned(unsigned_value, endptr); |
527 | if (sign) { |
528 | *--endptr = '-'; |
529 | } |
530 | result.Finalize(); |
531 | return result; |
532 | } |
533 | }; |
534 | |
535 | template <> int StringToIntegerCast::UnsignedLength(uint8_t value) { |
536 | int length = 1; |
537 | length += value >= 10; |
538 | length += value >= 100; |
539 | return length; |
540 | } |
541 | |
542 | template <> int StringToIntegerCast::UnsignedLength(uint16_t value) { |
543 | int length = 1; |
544 | length += value >= 10; |
545 | length += value >= 100; |
546 | length += value >= 1000; |
547 | length += value >= 10000; |
548 | return length; |
549 | } |
550 | |
551 | template <> int StringToIntegerCast::UnsignedLength(uint32_t value) { |
552 | if (value >= 10000) { |
553 | int length = 5; |
554 | length += value >= 100000; |
555 | length += value >= 1000000; |
556 | length += value >= 10000000; |
557 | length += value >= 100000000; |
558 | length += value >= 1000000000; |
559 | return length; |
560 | } else { |
561 | int length = 1; |
562 | length += value >= 10; |
563 | length += value >= 100; |
564 | length += value >= 1000; |
565 | return length; |
566 | } |
567 | } |
568 | |
569 | template <> int StringToIntegerCast::UnsignedLength(uint64_t value) { |
570 | if (value >= 10000000000ULL) { |
571 | if (value >= 1000000000000000ULL) { |
572 | int length = 16; |
573 | length += value >= 10000000000000000ULL; |
574 | length += value >= 100000000000000000ULL; |
575 | length += value >= 1000000000000000000ULL; |
576 | length += value >= 10000000000000000000ULL; |
577 | return length; |
578 | } else { |
579 | int length = 11; |
580 | length += value >= 100000000000ULL; |
581 | length += value >= 1000000000000ULL; |
582 | length += value >= 10000000000000ULL; |
583 | length += value >= 100000000000000ULL; |
584 | return length; |
585 | } |
586 | } else { |
587 | if (value >= 100000ULL) { |
588 | int length = 6; |
589 | length += value >= 1000000ULL; |
590 | length += value >= 10000000ULL; |
591 | length += value >= 100000000ULL; |
592 | length += value >= 1000000000ULL; |
593 | return length; |
594 | } else { |
595 | int length = 1; |
596 | length += value >= 10ULL; |
597 | length += value >= 100ULL; |
598 | length += value >= 1000ULL; |
599 | length += value >= 10000ULL; |
600 | return length; |
601 | } |
602 | } |
603 | } |
604 | |
605 | template <> string_t StringCast::Operation(int8_t input, Vector &vector) { |
606 | return StringToIntegerCast::FormatSigned<int8_t, uint8_t>(input, vector); |
607 | } |
608 | |
609 | template <> string_t StringCast::Operation(int16_t input, Vector &vector) { |
610 | return StringToIntegerCast::FormatSigned<int16_t, uint16_t>(input, vector); |
611 | } |
612 | template <> string_t StringCast::Operation(int32_t input, Vector &vector) { |
613 | return StringToIntegerCast::FormatSigned<int32_t, uint32_t>(input, vector); |
614 | } |
615 | |
616 | template <> string_t StringCast::Operation(int64_t input, Vector &vector) { |
617 | return StringToIntegerCast::FormatSigned<int64_t, uint64_t>(input, vector); |
618 | } |
619 | |
620 | template <> string_t StringCast::Operation(float input, Vector &vector) { |
621 | std::string s = fmt::format("{}" , input); |
622 | return StringVector::AddString(vector, s); |
623 | } |
624 | |
625 | template <> string_t StringCast::Operation(double input, Vector &vector) { |
626 | std::string s = fmt::format("{}" , input); |
627 | return StringVector::AddString(vector, s); |
628 | } |
629 | |
630 | //===--------------------------------------------------------------------===// |
631 | // Cast From Date |
632 | //===--------------------------------------------------------------------===// |
633 | struct DateToStringCast { |
634 | static idx_t Length(int32_t date[], idx_t &year_length, bool &add_bc) { |
635 | // format is YYYY-MM-DD with optional (BC) at the end |
636 | // regular length is 10 |
637 | idx_t length = 6; |
638 | year_length = 4; |
639 | add_bc = false; |
640 | if (date[0] <= 0) { |
641 | // add (BC) suffix |
642 | length += 5; |
643 | date[0] = -date[0]; |
644 | add_bc = true; |
645 | } |
646 | |
647 | // potentially add extra characters depending on length of year |
648 | year_length += date[0] >= 10000; |
649 | year_length += date[0] >= 100000; |
650 | year_length += date[0] >= 1000000; |
651 | year_length += date[0] >= 10000000; |
652 | length += year_length; |
653 | return length; |
654 | } |
655 | |
656 | static void Format(char *data, int32_t date[], idx_t year_length, bool add_bc) { |
657 | // now we write the string, first write the year |
658 | auto endptr = data + year_length; |
659 | endptr = StringToIntegerCast::FormatUnsigned(date[0], endptr); |
660 | // add optional leading zeros |
661 | while (endptr > data) { |
662 | *--endptr = '0'; |
663 | } |
664 | // now write the month and day |
665 | auto ptr = data + year_length; |
666 | for (int i = 1; i <= 2; i++) { |
667 | ptr[0] = '-'; |
668 | if (date[i] < 10) { |
669 | ptr[1] = '0'; |
670 | ptr[2] = '0' + date[i]; |
671 | } else { |
672 | auto index = static_cast<unsigned>(date[i] * 2); |
673 | ptr[1] = fmt::internal::data::digits[index]; |
674 | ptr[2] = fmt::internal::data::digits[index + 1]; |
675 | } |
676 | ptr += 3; |
677 | } |
678 | // optionally add BC to the end of the date |
679 | if (add_bc) { |
680 | memcpy(ptr, " (BC)" , 5); |
681 | } |
682 | } |
683 | }; |
684 | |
685 | template <> string_t CastFromDate::Operation(date_t input, Vector &vector) { |
686 | int32_t date[3]; |
687 | Date::Convert(input, date[0], date[1], date[2]); |
688 | |
689 | idx_t year_length; |
690 | bool add_bc; |
691 | idx_t length = DateToStringCast::Length(date, year_length, add_bc); |
692 | |
693 | string_t result = StringVector::EmptyString(vector, length); |
694 | auto data = result.GetData(); |
695 | |
696 | DateToStringCast::Format(data, date, year_length, add_bc); |
697 | |
698 | result.Finalize(); |
699 | return result; |
700 | } |
701 | |
702 | //===--------------------------------------------------------------------===// |
703 | // Cast To Date |
704 | //===--------------------------------------------------------------------===// |
705 | template <> date_t CastToDate::Operation(string_t input) { |
706 | return Date::FromCString(input.GetData()); |
707 | } |
708 | |
709 | template <> date_t StrictCastToDate::Operation(string_t input) { |
710 | return Date::FromCString(input.GetData(), true); |
711 | } |
712 | |
713 | //===--------------------------------------------------------------------===// |
714 | // Cast From Time |
715 | //===--------------------------------------------------------------------===// |
716 | struct TimeToStringCast { |
717 | static idx_t Length(int32_t time[]) { |
718 | // format is HH:MM:DD |
719 | // regular length is 8 |
720 | idx_t length = 8; |
721 | if (time[3] > 0) { |
722 | // if there are msecs, we add the miliseconds after the time with a period separator |
723 | // i.e. the format becomes HH:MM:DD.msec |
724 | length += 4; |
725 | } |
726 | return length; |
727 | } |
728 | |
729 | static void Format(char *data, idx_t length, int32_t time[]) { |
730 | // first write hour, month and day |
731 | auto ptr = data; |
732 | for (int i = 0; i <= 2; i++) { |
733 | if (time[i] < 10) { |
734 | ptr[0] = '0'; |
735 | ptr[1] = '0' + time[i]; |
736 | } else { |
737 | auto index = static_cast<unsigned>(time[i] * 2); |
738 | ptr[0] = fmt::internal::data::digits[index]; |
739 | ptr[1] = fmt::internal::data::digits[index + 1]; |
740 | } |
741 | ptr[2] = ':'; |
742 | ptr += 3; |
743 | } |
744 | // now optionally write ms at the end |
745 | if (time[3] > 0) { |
746 | auto start = ptr; |
747 | ptr = StringToIntegerCast::FormatUnsigned(time[3], data + length); |
748 | while (ptr > start) { |
749 | *--ptr = '0'; |
750 | } |
751 | *--ptr = '.'; |
752 | } |
753 | } |
754 | }; |
755 | |
756 | template <> string_t CastFromTime::Operation(dtime_t input, Vector &vector) { |
757 | int32_t time[4]; |
758 | Time::Convert(input, time[0], time[1], time[2], time[3]); |
759 | |
760 | idx_t length = TimeToStringCast::Length(time); |
761 | |
762 | string_t result = StringVector::EmptyString(vector, length); |
763 | auto data = result.GetData(); |
764 | |
765 | TimeToStringCast::Format(data, length, time); |
766 | |
767 | result.Finalize(); |
768 | return result; |
769 | } |
770 | |
771 | //===--------------------------------------------------------------------===// |
772 | // Cast To Time |
773 | //===--------------------------------------------------------------------===// |
774 | template <> dtime_t CastToTime::Operation(string_t input) { |
775 | return Time::FromCString(input.GetData()); |
776 | } |
777 | |
778 | template <> dtime_t StrictCastToTime::Operation(string_t input) { |
779 | return Time::FromCString(input.GetData(), true); |
780 | } |
781 | |
782 | template <> timestamp_t CastDateToTimestamp::Operation(date_t input) { |
783 | return Timestamp::FromDatetime(input, Time::FromTime(0, 0, 0, 0)); |
784 | } |
785 | |
786 | //===--------------------------------------------------------------------===// |
787 | // Cast From Timestamps |
788 | //===--------------------------------------------------------------------===// |
789 | template <> string_t CastFromTimestamp::Operation(timestamp_t input, Vector &vector) { |
790 | date_t date_entry; |
791 | dtime_t time_entry; |
792 | Timestamp::Convert(input, date_entry, time_entry); |
793 | |
794 | int32_t date[3], time[4]; |
795 | Date::Convert(date_entry, date[0], date[1], date[2]); |
796 | Time::Convert(time_entry, time[0], time[1], time[2], time[3]); |
797 | |
798 | // format for timestamp is DATE TIME (separated by space) |
799 | idx_t year_length; |
800 | bool add_bc; |
801 | idx_t date_length = DateToStringCast::Length(date, year_length, add_bc); |
802 | idx_t time_length = TimeToStringCast::Length(time); |
803 | idx_t length = date_length + time_length + 1; |
804 | |
805 | string_t result = StringVector::EmptyString(vector, length); |
806 | auto data = result.GetData(); |
807 | |
808 | DateToStringCast::Format(data, date, year_length, add_bc); |
809 | data[date_length] = ' '; |
810 | TimeToStringCast::Format(data + date_length + 1, time_length, time); |
811 | |
812 | result.Finalize(); |
813 | return result; |
814 | } |
815 | |
816 | template <> date_t CastTimestampToDate::Operation(timestamp_t input) { |
817 | return Timestamp::GetDate(input); |
818 | } |
819 | |
820 | template <> dtime_t CastTimestampToTime::Operation(timestamp_t input) { |
821 | return Timestamp::GetTime(input); |
822 | } |
823 | |
824 | //===--------------------------------------------------------------------===// |
825 | // Cast To Timestamp |
826 | //===--------------------------------------------------------------------===// |
827 | template <> timestamp_t CastToTimestamp::Operation(string_t input) { |
828 | return Timestamp::FromString(input.GetData()); |
829 | } |
830 | |
831 | //===--------------------------------------------------------------------===// |
832 | // Cast From Blob |
833 | //===--------------------------------------------------------------------===// |
834 | template <> string_t CastFromBlob::Operation(string_t input, Vector &vector) { |
835 | idx_t input_size = input.GetSize(); |
836 | // double chars for hex string plus two because of hex identifier ('\x') |
837 | string_t result = StringVector::EmptyString(vector, input_size * 2 + 2); |
838 | CastFromBlob::ToHexString(input, result); |
839 | return result; |
840 | } |
841 | |
842 | void CastFromBlob::ToHexString(string_t input, string_t &output) { |
843 | const char hexa_table[] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; |
844 | idx_t input_size = input.GetSize(); |
845 | assert(output.GetSize() == (input_size * 2 + 2)); |
846 | auto input_data = input.GetData(); |
847 | auto hexa_data = output.GetData(); |
848 | // hex identifier |
849 | hexa_data[0] = '\\'; hexa_data[1] = 'x'; |
850 | hexa_data += 2; |
851 | for(idx_t idx = 0; idx < input_size; ++idx) { |
852 | hexa_data[idx * 2] = hexa_table[(input_data[idx] >> 4) & 0x0F]; |
853 | hexa_data[idx * 2 + 1] = hexa_table[input_data[idx] & 0x0F]; |
854 | } |
855 | output.Finalize(); |
856 | } |
857 | |
858 | void CastFromBlob::FromHexToBytes(string_t input, string_t &output) { |
859 | idx_t in_size = input.GetSize(); |
860 | // amount of hex chars must be even |
861 | if((in_size % 2) != 0) { |
862 | throw OutOfRangeException("Hex string must have an even number of bytes." ); |
863 | } |
864 | |
865 | auto in_data = input.GetData(); |
866 | // removing '\x' |
867 | in_data += 2; |
868 | in_size -= 2; |
869 | |
870 | auto out_data = output.GetData(); |
871 | idx_t out_size = output.GetSize(); |
872 | assert(out_size == (in_size / 2)); |
873 | idx_t out_idx=0; |
874 | |
875 | idx_t num_hex_per_byte = 2; |
876 | uint8_t hex[2]; |
877 | |
878 | for(idx_t in_idx = 0; in_idx < in_size; in_idx+=2, ++out_idx) { |
879 | for(idx_t hex_idx = 0; hex_idx < num_hex_per_byte; ++hex_idx) { |
880 | uint8_t int_ch = in_data[in_idx + hex_idx]; |
881 | if(int_ch >= (uint8_t)'0' && int_ch <= (uint8_t)'9') { |
882 | // numeric ascii chars: '0' to '9' |
883 | hex[hex_idx] = int_ch & 0X0F; |
884 | } |
885 | else if((int_ch >= (uint8_t)'A' && int_ch <= (uint8_t)'F') || |
886 | (int_ch >= (uint8_t)'a' && int_ch <= (uint8_t)'f')) { |
887 | // hex chars: ['A':'F'] or ['a':'f'] |
888 | // transforming char into an integer in the range of 10 to 15 |
889 | hex[hex_idx] = ((int_ch & 0X0F) - 1) + 10; |
890 | } else { |
891 | throw OutOfRangeException("\"%c\" is not a valid hexadecimal char." , in_data[in_idx + hex_idx]); |
892 | } |
893 | } |
894 | // adding two hex into the same byte |
895 | out_data[out_idx] = hex[0]; |
896 | out_data[out_idx] = (out_data[out_idx] << 4) | hex[1]; |
897 | } |
898 | out_data[out_idx] = '\0'; |
899 | } |
900 | |
901 | //===--------------------------------------------------------------------===// |
902 | // Cast To Blob |
903 | //===--------------------------------------------------------------------===// |
904 | template <> string_t CastToBlob::Operation(string_t input, Vector &vector) { |
905 | idx_t input_size = input.GetSize(); |
906 | auto input_data = input.GetData(); |
907 | string_t result; |
908 | // Check by a hex string |
909 | if(input_size >= 2 && input_data[0] == '\\' && input_data[1] == 'x') { |
910 | auto output = StringVector::EmptyString(vector, (input_size - 2) / 2); |
911 | CastFromBlob::FromHexToBytes(input, output); |
912 | result = output; |
913 | } else { |
914 | // raw string |
915 | result = StringVector::AddBlob(vector, input); |
916 | } |
917 | return result; |
918 | } |
919 | |
920 | } // namespace duckdb |
921 | |