1#include <type_traits>
2#include <IO/ReadHelpers.h>
3#include <Core/Defines.h>
4#include <common/shift10.h>
5#include <common/likely.h>
6#include <Common/StringUtils/StringUtils.h>
7#include <double-conversion/double-conversion.h>
8
9
10/** Methods for reading floating point numbers from text with decimal representation.
11 * There are "precise", "fast" and "simple" implementations.
12 *
13 * Neither of methods support hexadecimal numbers (0xABC), binary exponent (1p100), leading plus sign.
14 *
15 * Precise method always returns a number that is the closest machine representable number to the input.
16 *
17 * Fast method is faster (up to 3 times) and usually return the same value,
18 * but in rare cases result may differ by lest significant bit (for Float32)
19 * and by up to two least significant bits (for Float64) from precise method.
20 * Also fast method may parse some garbage as some other unspecified garbage.
21 *
22 * Simple method is little faster for cases of parsing short (few digit) integers, but less precise and slower in other cases.
23 * It's not recommended to use simple method and it is left only for reference.
24 *
25 * For performance test, look at 'read_float_perf' test.
26 *
27 * For precision test.
28 * Parse all existing Float32 numbers:
29
30CREATE TABLE test.floats ENGINE = Log AS SELECT reinterpretAsFloat32(reinterpretAsString(toUInt32(number))) AS x FROM numbers(0x100000000);
31
32WITH
33 toFloat32(toString(x)) AS y,
34 reinterpretAsUInt32(reinterpretAsString(x)) AS bin_x,
35 reinterpretAsUInt32(reinterpretAsString(y)) AS bin_y,
36 abs(bin_x - bin_y) AS diff
37SELECT
38 diff,
39 count()
40FROM test.floats
41WHERE NOT isNaN(x)
42GROUP BY diff
43ORDER BY diff ASC
44LIMIT 100
45
46 * Here are the results:
47 *
48 Precise:
49 ┌─diff─┬────count()─┐
50 │ 0 │ 4278190082 │
51 └──────┴────────────┘
52 (100% roundtrip property)
53
54 Fast:
55 ┌─diff─┬────count()─┐
56 │ 0 │ 3685260580 │
57 │ 1 │ 592929502 │
58 └──────┴────────────┘
59 (The difference is 1 in least significant bit in 13.8% of numbers.)
60
61 Simple:
62 ┌─diff─┬────count()─┐
63 │ 0 │ 2169879994 │
64 │ 1 │ 1807178292 │
65 │ 2 │ 269505944 │
66 │ 3 │ 28826966 │
67 │ 4 │ 2566488 │
68 │ 5 │ 212878 │
69 │ 6 │ 18276 │
70 │ 7 │ 1214 │
71 │ 8 │ 30 │
72 └──────┴────────────┘
73
74 * Parse random Float64 numbers:
75
76WITH
77 rand64() AS bin_x,
78 reinterpretAsFloat64(reinterpretAsString(bin_x)) AS x,
79 toFloat64(toString(x)) AS y,
80 reinterpretAsUInt64(reinterpretAsString(y)) AS bin_y,
81 abs(bin_x - bin_y) AS diff
82SELECT
83 diff,
84 count()
85FROM numbers(100000000)
86WHERE NOT isNaN(x)
87GROUP BY diff
88ORDER BY diff ASC
89LIMIT 100
90
91 */
92
93
94namespace DB
95{
96
97namespace ErrorCodes
98{
99 extern const int CANNOT_PARSE_NUMBER;
100 extern const int ARGUMENT_OUT_OF_BOUND;
101}
102
103
104/// Returns true, iff parsed.
105bool parseInfinity(ReadBuffer & buf);
106bool parseNaN(ReadBuffer & buf);
107
108void assertInfinity(ReadBuffer & buf);
109void assertNaN(ReadBuffer & buf);
110
111
112template <bool throw_exception>
113bool assertOrParseInfinity(ReadBuffer & buf)
114{
115 if constexpr (throw_exception)
116 {
117 assertInfinity(buf);
118 return true;
119 }
120 else
121 return parseInfinity(buf);
122}
123
124template <bool throw_exception>
125bool assertOrParseNaN(ReadBuffer & buf)
126{
127 if constexpr (throw_exception)
128 {
129 assertNaN(buf);
130 return true;
131 }
132 else
133 return parseNaN(buf);
134}
135
136
137/// Some garbage may be successfully parsed, examples: '--1' parsed as '1'.
138template <typename T, typename ReturnType>
139ReturnType readFloatTextPreciseImpl(T & x, ReadBuffer & buf)
140{
141 static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextImpl must be float or double");
142 static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
143
144 if (buf.eof())
145 {
146 if constexpr (throw_exception)
147 throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
148 else
149 return ReturnType(false);
150 }
151
152 /// We use special code to read denormals (inf, nan), because we support slightly more variants that double-conversion library does:
153 /// Example: inf and Infinity.
154
155 bool negative = false;
156
157 while (true)
158 {
159 switch (*buf.position())
160 {
161 case '-':
162 {
163 negative = true;
164 ++buf.position();
165 continue;
166 }
167
168 case 'i': [[fallthrough]];
169 case 'I':
170 {
171 if (assertOrParseInfinity<throw_exception>(buf))
172 {
173 x = std::numeric_limits<T>::infinity();
174 if (negative)
175 x = -x;
176 return ReturnType(true);
177 }
178 return ReturnType(false);
179 }
180
181 case 'n': [[fallthrough]];
182 case 'N':
183 {
184 if (assertOrParseNaN<throw_exception>(buf))
185 {
186 x = std::numeric_limits<T>::quiet_NaN();
187 if (negative)
188 x = -x;
189 return ReturnType(true);
190 }
191 return ReturnType(false);
192 }
193
194 default:
195 break;
196 }
197 break;
198 }
199
200 static const double_conversion::StringToDoubleConverter converter(
201 double_conversion::StringToDoubleConverter::ALLOW_TRAILING_JUNK,
202 0, 0, nullptr, nullptr);
203
204 /// Fast path (avoid copying) if the buffer have at least MAX_LENGTH bytes.
205 static constexpr int MAX_LENGTH = 316;
206
207 if (buf.position() + MAX_LENGTH <= buf.buffer().end())
208 {
209 int num_processed_characters = 0;
210
211 if constexpr (std::is_same_v<T, double>)
212 x = converter.StringToDouble(buf.position(), buf.buffer().end() - buf.position(), &num_processed_characters);
213 else
214 x = converter.StringToFloat(buf.position(), buf.buffer().end() - buf.position(), &num_processed_characters);
215
216 if (num_processed_characters < 0)
217 {
218 if constexpr (throw_exception)
219 throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
220 else
221 return ReturnType(false);
222 }
223
224 buf.position() += num_processed_characters;
225
226 if (negative)
227 x = -x;
228 return ReturnType(true);
229 }
230 else
231 {
232 /// Slow path. Copy characters that may be present in floating point number to temporary buffer.
233
234 char tmp_buf[MAX_LENGTH];
235 int num_copied_chars = 0;
236
237 while (!buf.eof() && num_copied_chars < MAX_LENGTH)
238 {
239 char c = *buf.position();
240 if (!(isNumericASCII(c) || c == '-' || c == '+' || c == '.' || c == 'e' || c == 'E'))
241 break;
242
243 tmp_buf[num_copied_chars] = c;
244 ++buf.position();
245 ++num_copied_chars;
246 }
247
248 int num_processed_characters = 0;
249
250 if constexpr (std::is_same_v<T, double>)
251 x = converter.StringToDouble(tmp_buf, num_copied_chars, &num_processed_characters);
252 else
253 x = converter.StringToFloat(tmp_buf, num_copied_chars, &num_processed_characters);
254
255 if (num_processed_characters < num_copied_chars)
256 {
257 if constexpr (throw_exception)
258 throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
259 else
260 return ReturnType(false);
261 }
262
263 if (negative)
264 x = -x;
265 return ReturnType(true);
266 }
267}
268
269
270template <size_t N, typename T>
271static inline void readUIntTextUpToNSignificantDigits(T & x, ReadBuffer & buf)
272{
273 /// In optimistic case we can skip bound checking for first loop.
274 if (buf.position() + N <= buf.buffer().end())
275 {
276 for (size_t i = 0; i < N; ++i)
277 {
278 if (isNumericASCII(*buf.position()))
279 {
280 x *= 10;
281 x += *buf.position() & 0x0F;
282 ++buf.position();
283 }
284 else
285 return;
286 }
287
288 while (!buf.eof() && isNumericASCII(*buf.position()))
289 ++buf.position();
290 }
291 else
292 {
293 for (size_t i = 0; i < N; ++i)
294 {
295 if (!buf.eof() && isNumericASCII(*buf.position()))
296 {
297 x *= 10;
298 x += *buf.position() & 0x0F;
299 ++buf.position();
300 }
301 else
302 return;
303 }
304
305 while (!buf.eof() && isNumericASCII(*buf.position()))
306 ++buf.position();
307 }
308}
309
310
311template <typename T, typename ReturnType>
312ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
313{
314 static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextImpl must be float or double");
315 static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII"); //-V590
316
317 static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
318
319 bool negative = false;
320 x = 0;
321 UInt64 before_point = 0;
322 UInt64 after_point = 0;
323 int after_point_exponent = 0;
324 int exponent = 0;
325
326 if (in.eof())
327 {
328 if constexpr (throw_exception)
329 throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
330 else
331 return false;
332 }
333
334 if (*in.position() == '-')
335 {
336 negative = true;
337 ++in.position();
338 }
339
340 auto count_after_sign = in.count();
341
342 constexpr int significant_digits = std::numeric_limits<UInt64>::digits10;
343 readUIntTextUpToNSignificantDigits<significant_digits>(before_point, in);
344
345 int read_digits = in.count() - count_after_sign;
346
347 if (unlikely(read_digits > significant_digits))
348 {
349 int before_point_additional_exponent = read_digits - significant_digits;
350 x = shift10(before_point, before_point_additional_exponent);
351 }
352 else
353 {
354 x = before_point;
355
356 /// Shortcut for the common case when there is an integer that fit in Int64.
357 if (read_digits && (in.eof() || *in.position() < '.'))
358 {
359 if (negative)
360 x = -x;
361 return ReturnType(true);
362 }
363 }
364
365 if (checkChar('.', in))
366 {
367 auto after_point_count = in.count();
368
369 while (!in.eof() && *in.position() == '0')
370 ++in.position();
371
372 auto after_leading_zeros_count = in.count();
373 auto after_point_num_leading_zeros = after_leading_zeros_count - after_point_count;
374
375 readUIntTextUpToNSignificantDigits<significant_digits>(after_point, in);
376 read_digits = in.count() - after_leading_zeros_count;
377 after_point_exponent = (read_digits > significant_digits ? -significant_digits : -read_digits) - after_point_num_leading_zeros;
378 }
379
380 if (checkChar('e', in) || checkChar('E', in))
381 {
382 if (in.eof())
383 {
384 if constexpr (throw_exception)
385 throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
386 else
387 return false;
388 }
389
390 bool exponent_negative = false;
391 if (*in.position() == '-')
392 {
393 exponent_negative = true;
394 ++in.position();
395 }
396 else if (*in.position() == '+')
397 {
398 ++in.position();
399 }
400
401 readUIntTextUpToNSignificantDigits<4>(exponent, in);
402 if (exponent_negative)
403 exponent = -exponent;
404 }
405
406 if (after_point)
407 x += shift10(after_point, after_point_exponent);
408
409 if (exponent)
410 x = shift10(x, exponent);
411
412 if (negative)
413 x = -x;
414
415 auto num_characters_without_sign = in.count() - count_after_sign;
416
417 /// Denormals. At most one character is read before denormal and it is '-'.
418 if (num_characters_without_sign == 0)
419 {
420 if (in.eof())
421 {
422 if constexpr (throw_exception)
423 throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
424 else
425 return false;
426 }
427
428 if (*in.position() == 'i' || *in.position() == 'I')
429 {
430 if (assertOrParseInfinity<throw_exception>(in))
431 {
432 x = std::numeric_limits<T>::infinity();
433 if (negative)
434 x = -x;
435 return ReturnType(true);
436 }
437 return ReturnType(false);
438 }
439 else if (*in.position() == 'n' || *in.position() == 'N')
440 {
441 if (assertOrParseNaN<throw_exception>(in))
442 {
443 x = std::numeric_limits<T>::quiet_NaN();
444 if (negative)
445 x = -x;
446 return ReturnType(true);
447 }
448 return ReturnType(false);
449 }
450 }
451
452 return ReturnType(true);
453}
454
455
456template <typename T, typename ReturnType>
457ReturnType readFloatTextSimpleImpl(T & x, ReadBuffer & buf)
458{
459 static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
460
461 bool negative = false;
462 x = 0;
463 bool after_point = false;
464 double power_of_ten = 1;
465
466 if (buf.eof())
467 throwReadAfterEOF();
468
469 while (!buf.eof())
470 {
471 switch (*buf.position())
472 {
473 case '+':
474 break;
475 case '-':
476 negative = true;
477 break;
478 case '.':
479 after_point = true;
480 break;
481 case '0': [[fallthrough]];
482 case '1': [[fallthrough]];
483 case '2': [[fallthrough]];
484 case '3': [[fallthrough]];
485 case '4': [[fallthrough]];
486 case '5': [[fallthrough]];
487 case '6': [[fallthrough]];
488 case '7': [[fallthrough]];
489 case '8': [[fallthrough]];
490 case '9':
491 if (after_point)
492 {
493 power_of_ten /= 10;
494 x += (*buf.position() - '0') * power_of_ten;
495 }
496 else
497 {
498 x *= 10;
499 x += *buf.position() - '0';
500 }
501 break;
502 case 'e': [[fallthrough]];
503 case 'E':
504 {
505 ++buf.position();
506 Int32 exponent = 0;
507 readIntText(exponent, buf);
508 x = shift10(x, exponent);
509 if (negative)
510 x = -x;
511 return ReturnType(true);
512 }
513
514 case 'i': [[fallthrough]];
515 case 'I':
516 {
517 if (assertOrParseInfinity<throw_exception>(buf))
518 {
519 x = std::numeric_limits<T>::infinity();
520 if (negative)
521 x = -x;
522 return ReturnType(true);
523 }
524 return ReturnType(false);
525 }
526
527 case 'n': [[fallthrough]];
528 case 'N':
529 {
530 if (assertOrParseNaN<throw_exception>(buf))
531 {
532 x = std::numeric_limits<T>::quiet_NaN();
533 if (negative)
534 x = -x;
535 return ReturnType(true);
536 }
537 return ReturnType(false);
538 }
539
540 default:
541 {
542 if (negative)
543 x = -x;
544 return ReturnType(true);
545 }
546 }
547 ++buf.position();
548 }
549
550 if (negative)
551 x = -x;
552
553 return ReturnType(true);
554}
555
556
557template <typename T> void readFloatTextPrecise(T & x, ReadBuffer & in) { readFloatTextPreciseImpl<T, void>(x, in); }
558template <typename T> bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { return readFloatTextPreciseImpl<T, bool>(x, in); }
559
560template <typename T> void readFloatTextFast(T & x, ReadBuffer & in) { readFloatTextFastImpl<T, void>(x, in); }
561template <typename T> bool tryReadFloatTextFast(T & x, ReadBuffer & in) { return readFloatTextFastImpl<T, bool>(x, in); }
562
563template <typename T> void readFloatTextSimple(T & x, ReadBuffer & in) { readFloatTextSimpleImpl<T, void>(x, in); }
564template <typename T> bool tryReadFloatTextSimple(T & x, ReadBuffer & in) { return readFloatTextSimpleImpl<T, bool>(x, in); }
565
566
567/// Implementation that is selected as default.
568
569template <typename T> void readFloatText(T & x, ReadBuffer & in) { readFloatTextFast(x, in); }
570template <typename T> bool tryReadFloatText(T & x, ReadBuffer & in) { return tryReadFloatTextFast(x, in); }
571
572
573}
574