1#include "simdjson/internal/numberparsing_tables.h"
2#include <limits>
3
4namespace simdjson {
5namespace SIMDJSON_IMPLEMENTATION {
6
7namespace ondemand {
8/**
9 * The type of a JSON number
10 */
11enum class number_type {
12 floating_point_number=1, /// a binary64 number
13 signed_integer, /// a signed integer that fits in a 64-bit word using two's complement
14 unsigned_integer /// a positive integer larger or equal to 1<<63
15};
16}
17
18namespace {
19/// @private
20namespace numberparsing {
21
22
23
24#ifdef JSON_TEST_NUMBERS
25#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
26#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE)))
27#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE)))
28#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE)))
29#else
30#define INVALID_NUMBER(SRC) (NUMBER_ERROR)
31#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE))
32#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE))
33#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE))
34#endif
35
36namespace {
37// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
38// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
39// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
40simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
41 double d;
42 mantissa &= ~(1ULL << 52);
43 mantissa |= real_exponent << 52;
44 mantissa |= ((static_cast<uint64_t>(negative)) << 63);
45 std::memcpy(dest: &d, src: &mantissa, n: sizeof(d));
46 return d;
47}
48}
49// Attempts to compute i * 10^(power) exactly; and if "negative" is
50// true, negate the result.
51// This function will only work in some cases, when it does not work, success is
52// set to false. This should work *most of the time* (like 99% of the time).
53// We assume that power is in the [smallest_power,
54// largest_power] interval: the caller is responsible for this check.
55simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) {
56 // we start with a fast path
57 // It was described in
58 // Clinger WD. How to read floating point numbers accurately.
59 // ACM SIGPLAN Notices. 1990
60#ifndef FLT_EVAL_METHOD
61#error "FLT_EVAL_METHOD should be defined, please include cfloat."
62#endif
63#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
64 // We cannot be certain that x/y is rounded to nearest.
65 if (0 <= power && power <= 22 && i <= 9007199254740991) {
66#else
67 if (-22 <= power && power <= 22 && i <= 9007199254740991) {
68#endif
69 // convert the integer into a double. This is lossless since
70 // 0 <= i <= 2^53 - 1.
71 d = double(i);
72 //
73 // The general idea is as follows.
74 // If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then
75 // 1) Both s and p can be represented exactly as 64-bit floating-point
76 // values
77 // (binary64).
78 // 2) Because s and p can be represented exactly as floating-point values,
79 // then s * p
80 // and s / p will produce correctly rounded values.
81 //
82 if (power < 0) {
83 d = d / simdjson::internal::power_of_ten[-power];
84 } else {
85 d = d * simdjson::internal::power_of_ten[power];
86 }
87 if (negative) {
88 d = -d;
89 }
90 return true;
91 }
92 // When 22 < power && power < 22 + 16, we could
93 // hope for another, secondary fast path. It was
94 // described by David M. Gay in "Correctly rounded
95 // binary-decimal and decimal-binary conversions." (1990)
96 // If you need to compute i * 10^(22 + x) for x < 16,
97 // first compute i * 10^x, if you know that result is exact
98 // (e.g., when i * 10^x < 2^53),
99 // then you can still proceed and do (i * 10^x) * 10^22.
100 // Is this worth your time?
101 // You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53)
102 // for this second fast path to work.
103 // If you you have 22 < power *and* power < 22 + 16, and then you
104 // optimistically compute "i * 10^(x-22)", there is still a chance that you
105 // have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of
106 // this optimization maybe less common than we would like. Source:
107 // http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
108 // also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html
109
110 // The fast path has now failed, so we are failing back on the slower path.
111
112 // In the slow path, we need to adjust i so that it is > 1<<63 which is always
113 // possible, except if i == 0, so we handle i == 0 separately.
114 if(i == 0) {
115 d = negative ? -0.0 : 0.0;
116 return true;
117 }
118
119
120 // The exponent is 1024 + 63 + power
121 // + floor(log(5**power)/log(2)).
122 // The 1024 comes from the ieee64 standard.
123 // The 63 comes from the fact that we use a 64-bit word.
124 //
125 // Computing floor(log(5**power)/log(2)) could be
126 // slow. Instead we use a fast function.
127 //
128 // For power in (-400,350), we have that
129 // (((152170 + 65536) * power ) >> 16);
130 // is equal to
131 // floor(log(5**power)/log(2)) + power when power >= 0
132 // and it is equal to
133 // ceil(log(5**-power)/log(2)) + power when power < 0
134 //
135 // The 65536 is (1<<16) and corresponds to
136 // (65536 * power) >> 16 ---> power
137 //
138 // ((152170 * power ) >> 16) is equal to
139 // floor(log(5**power)/log(2))
140 //
141 // Note that this is not magic: 152170/(1<<16) is
142 // approximatively equal to log(5)/log(2).
143 // The 1<<16 value is a power of two; we could use a
144 // larger power of 2 if we wanted to.
145 //
146 int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63;
147
148
149 // We want the most significant bit of i to be 1. Shift if needed.
150 int lz = leading_zeroes(input_num: i);
151 i <<= lz;
152
153
154 // We are going to need to do some 64-bit arithmetic to get a precise product.
155 // We use a table lookup approach.
156 // It is safe because
157 // power >= smallest_power
158 // and power <= largest_power
159 // We recover the mantissa of the power, it has a leading 1. It is always
160 // rounded down.
161 //
162 // We want the most significant 64 bits of the product. We know
163 // this will be non-zero because the most significant bit of i is
164 // 1.
165 const uint32_t index = 2 * uint32_t(power - simdjson::internal::smallest_power);
166 // Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
167 //
168 // The full_multiplication function computes the 128-bit product of two 64-bit words
169 // with a returned value of type value128 with a "low component" corresponding to the
170 // 64-bit least significant bits of the product and with a "high component" corresponding
171 // to the 64-bit most significant bits of the product.
172 simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(value1: i, value2: simdjson::internal::power_of_five_128[index]);
173 // Both i and power_of_five_128[index] have their most significant bit set to 1 which
174 // implies that the either the most or the second most significant bit of the product
175 // is 1. We pack values in this manner for efficiency reasons: it maximizes the use
176 // we make of the product. It also makes it easy to reason about the product: there
177 // is 0 or 1 leading zero in the product.
178
179 // Unless the least significant 9 bits of the high (64-bit) part of the full
180 // product are all 1s, then we know that the most significant 55 bits are
181 // exact and no further work is needed. Having 55 bits is necessary because
182 // we need 53 bits for the mantissa but we have to have one rounding bit and
183 // we can waste a bit if the most significant bit of the product is zero.
184 if((firstproduct.high & 0x1FF) == 0x1FF) {
185 // We want to compute i * 5^q, but only care about the top 55 bits at most.
186 // Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
187 // the full computation is wasteful. So we do what is called a "truncated
188 // multiplication".
189 // We take the most significant 64-bits, and we put them in
190 // power_of_five_128[index]. Usually, that's good enough to approximate i * 5^q
191 // to the desired approximation using one multiplication. Sometimes it does not suffice.
192 // Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
193 // then we get a better approximation to i * 5^q. In very rare cases, even that
194 // will not suffice, though it is seemingly very hard to find such a scenario.
195 //
196 // That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
197 // more complicated.
198 //
199 // There is an extra layer of complexity in that we need more than 55 bits of
200 // accuracy in the round-to-even scenario.
201 //
202 // The full_multiplication function computes the 128-bit product of two 64-bit words
203 // with a returned value of type value128 with a "low component" corresponding to the
204 // 64-bit least significant bits of the product and with a "high component" corresponding
205 // to the 64-bit most significant bits of the product.
206 simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(value1: i, value2: simdjson::internal::power_of_five_128[index + 1]);
207 firstproduct.low += secondproduct.high;
208 if(secondproduct.high > firstproduct.low) { firstproduct.high++; }
209 // At this point, we might need to add at most one to firstproduct, but this
210 // can only change the value of firstproduct.high if firstproduct.low is maximal.
211 if(simdjson_unlikely(firstproduct.low == 0xFFFFFFFFFFFFFFFF)) {
212 // This is very unlikely, but if so, we need to do much more work!
213 return false;
214 }
215 }
216 uint64_t lower = firstproduct.low;
217 uint64_t upper = firstproduct.high;
218 // The final mantissa should be 53 bits with a leading 1.
219 // We shift it so that it occupies 54 bits with a leading 1.
220 ///////
221 uint64_t upperbit = upper >> 63;
222 uint64_t mantissa = upper >> (upperbit + 9);
223 lz += int(1 ^ upperbit);
224
225 // Here we have mantissa < (1<<54).
226 int64_t real_exponent = exponent - lz;
227 if (simdjson_unlikely(real_exponent <= 0)) { // we have a subnormal?
228 // Here have that real_exponent <= 0 so -real_exponent >= 0
229 if(-real_exponent + 1 >= 64) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
230 d = negative ? -0.0 : 0.0;
231 return true;
232 }
233 // next line is safe because -real_exponent + 1 < 0
234 mantissa >>= -real_exponent + 1;
235 // Thankfully, we can't have both "round-to-even" and subnormals because
236 // "round-to-even" only occurs for powers close to 0.
237 mantissa += (mantissa & 1); // round up
238 mantissa >>= 1;
239 // There is a weird scenario where we don't have a subnormal but just.
240 // Suppose we start with 2.2250738585072013e-308, we end up
241 // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
242 // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round
243 // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer
244 // subnormal, but we can only know this after rounding.
245 // So we only declare a subnormal if we are smaller than the threshold.
246 real_exponent = (mantissa < (uint64_t(1) << 52)) ? 0 : 1;
247 d = to_double(mantissa, real_exponent, negative);
248 return true;
249 }
250 // We have to round to even. The "to even" part
251 // is only a problem when we are right in between two floats
252 // which we guard against.
253 // If we have lots of trailing zeros, we may fall right between two
254 // floating-point values.
255 //
256 // The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
257 // times a power of two. That is, it is right between a number with binary significand
258 // m and another number with binary significand m+1; and it must be the case
259 // that it cannot be represented by a float itself.
260 //
261 // We must have that w * 10 ^q == (2m+1) * 2^p for some power of two 2^p.
262 // Recall that 10^q = 5^q * 2^q.
263 // When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
264 // 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23.
265 // When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so
266 // (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
267 // 2^{53} x 5^{-q} < 2^{64}.
268 // Hence we have 5^{-q} < 2^{11}$ or q>= -4.
269 //
270 // We require lower <= 1 and not lower == 0 because we could not prove that
271 // that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
272 if (simdjson_unlikely((lower <= 1) && (power >= -4) && (power <= 23) && ((mantissa & 3) == 1))) {
273 if((mantissa << (upperbit + 64 - 53 - 2)) == upper) {
274 mantissa &= ~1; // flip it so that we do not round up
275 }
276 }
277
278 mantissa += mantissa & 1;
279 mantissa >>= 1;
280
281 // Here we have mantissa < (1<<53), unless there was an overflow
282 if (mantissa >= (1ULL << 53)) {
283 //////////
284 // This will happen when parsing values such as 7.2057594037927933e+16
285 ////////
286 mantissa = (1ULL << 52);
287 real_exponent++;
288 }
289 mantissa &= ~(1ULL << 52);
290 // we have to check that real_exponent is in range, otherwise we bail out
291 if (simdjson_unlikely(real_exponent > 2046)) {
292 // We have an infinite value!!! We could actually throw an error here if we could.
293 return false;
294 }
295 d = to_double(mantissa, real_exponent, negative);
296 return true;
297}
298
299// We call a fallback floating-point parser that might be slow. Note
300// it will accept JSON numbers, but the JSON spec. is more restrictive so
301// before you call parse_float_fallback, you need to have validated the input
302// string with the JSON grammar.
303// It will return an error (false) if the parsed number is infinite.
304// The string parsing itself always succeeds. We know that there is at least
305// one digit.
306static bool parse_float_fallback(const uint8_t *ptr, double *outDouble) {
307 *outDouble = simdjson::internal::from_chars(first: reinterpret_cast<const char *>(ptr));
308 // We do not accept infinite values.
309
310 // Detecting finite values in a portable manner is ridiculously hard, ideally
311 // we would want to do:
312 // return !std::isfinite(*outDouble);
313 // but that mysteriously fails under legacy/old libc++ libraries, see
314 // https://github.com/simdjson/simdjson/issues/1286
315 //
316 // Therefore, fall back to this solution (the extra parens are there
317 // to handle that max may be a macro on windows).
318 return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
319}
320static bool parse_float_fallback(const uint8_t *ptr, const uint8_t *end_ptr, double *outDouble) {
321 *outDouble = simdjson::internal::from_chars(first: reinterpret_cast<const char *>(ptr), end: reinterpret_cast<const char *>(end_ptr));
322 // We do not accept infinite values.
323
324 // Detecting finite values in a portable manner is ridiculously hard, ideally
325 // we would want to do:
326 // return !std::isfinite(*outDouble);
327 // but that mysteriously fails under legacy/old libc++ libraries, see
328 // https://github.com/simdjson/simdjson/issues/1286
329 //
330 // Therefore, fall back to this solution (the extra parens are there
331 // to handle that max may be a macro on windows).
332 return !(*outDouble > (std::numeric_limits<double>::max)() || *outDouble < std::numeric_limits<double>::lowest());
333}
334
335// check quickly whether the next 8 chars are made of digits
336// at a glance, it looks better than Mula's
337// http://0x80.pl/articles/swar-digits-validate.html
338simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
339 uint64_t val;
340 // this can read up to 7 bytes beyond the buffer size, but we require
341 // SIMDJSON_PADDING of padding
342 static_assert(7 <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7");
343 std::memcpy(dest: &val, src: chars, n: 8);
344 // a branchy method might be faster:
345 // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
346 // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) ==
347 // 0x3030303030303030);
348 return (((val & 0xF0F0F0F0F0F0F0F0) |
349 (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) ==
350 0x3333333333333333);
351}
352
353template<typename W>
354error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) {
355 double d;
356 if (parse_float_fallback(ptr: src, outDouble: &d)) {
357 writer.append_double(d);
358 return SUCCESS;
359 }
360 return INVALID_NUMBER(src);
361}
362
363template<typename I>
364SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
365simdjson_inline bool parse_digit(const uint8_t c, I &i) {
366 const uint8_t digit = static_cast<uint8_t>(c - '0');
367 if (digit > 9) {
368 return false;
369 }
370 // PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
371 i = 10 * i + digit; // might overflow, we will handle the overflow later
372 return true;
373}
374
375simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
376 // we continue with the fiction that we have an integer. If the
377 // floating point number is representable as x * 10^z for some integer
378 // z that fits in 53 bits, then we will be able to convert back the
379 // the integer into a float in a lossless manner.
380 const uint8_t *const first_after_period = p;
381
382#ifdef SIMDJSON_SWAR_NUMBER_PARSING
383#if SIMDJSON_SWAR_NUMBER_PARSING
384 // this helps if we have lots of decimals!
385 // this turns out to be frequent enough.
386 if (is_made_of_eight_digits_fast(chars: p)) {
387 i = i * 100000000 + parse_eight_digits_unrolled(chars: p);
388 p += 8;
389 }
390#endif // SIMDJSON_SWAR_NUMBER_PARSING
391#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING
392 // Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
393 if (parse_digit(c: *p, i)) { ++p; }
394 while (parse_digit(c: *p, i)) { p++; }
395 exponent = first_after_period - p;
396 // Decimal without digits (123.) is illegal
397 if (exponent == 0) {
398 return INVALID_NUMBER(src);
399 }
400 return SUCCESS;
401}
402
403simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
404 // Exp Sign: -123.456e[-]78
405 bool neg_exp = ('-' == *p);
406 if (neg_exp || '+' == *p) { p++; } // Skip + as well
407
408 // Exponent: -123.456e-[78]
409 auto start_exp = p;
410 int64_t exp_number = 0;
411 while (parse_digit(c: *p, i&: exp_number)) { ++p; }
412 // It is possible for parse_digit to overflow.
413 // In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN.
414 // Thus we *must* check for possible overflow before we negate exp_number.
415
416 // Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into
417 // a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may
418 // not oblige and may, in fact, generate two distinct paths in any case. It might be
419 // possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off
420 // instructions for a simdjson_likely branch, an unconclusive gain.
421
422 // If there were no digits, it's an error.
423 if (simdjson_unlikely(p == start_exp)) {
424 return INVALID_NUMBER(src);
425 }
426 // We have a valid positive exponent in exp_number at this point, except that
427 // it may have overflowed.
428
429 // If there were more than 18 digits, we may have overflowed the integer. We have to do
430 // something!!!!
431 if (simdjson_unlikely(p > start_exp+18)) {
432 // Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
433 while (*start_exp == '0') { start_exp++; }
434 // 19 digits could overflow int64_t and is kind of absurd anyway. We don't
435 // support exponents smaller than -999,999,999,999,999,999 and bigger
436 // than 999,999,999,999,999,999.
437 // We can truncate.
438 // Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before
439 // infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could
440 // truncate at 324.
441 // Note that there is no reason to fail per se at this point in time.
442 // E.g., 0e999999999999999999999 is a fine number.
443 if (p > start_exp+18) { exp_number = 999999999999999999; }
444 }
445 // At this point, we know that exp_number is a sane, positive, signed integer.
446 // It is <= 999,999,999,999,999,999. As long as 'exponent' is in
447 // [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent'
448 // is bounded in magnitude by the size of the JSON input, we are fine in this universe.
449 // To sum it up: the next line should never overflow.
450 exponent += (neg_exp ? -exp_number : exp_number);
451 return SUCCESS;
452}
453
454simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) {
455 // It is possible that the integer had an overflow.
456 // We have to handle the case where we have 0.0000somenumber.
457 const uint8_t *start = start_digits;
458 while ((*start == '0') || (*start == '.')) { ++start; }
459 // we over-decrement by one when there is a '.'
460 return digit_count - size_t(start - start_digits);
461}
462
463template<typename W>
464simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) {
465 // If we frequently had to deal with long strings of digits,
466 // we could extend our code by using a 128-bit integer instead
467 // of a 64-bit integer. However, this is uncommon in practice.
468 //
469 // 9999999999999999999 < 2**64 so we can accommodate 19 digits.
470 // If we have a decimal separator, then digit_count - 1 is the number of digits, but we
471 // may not have a decimal separator!
472 if (simdjson_unlikely(digit_count > 19 && significant_digits(start_digits, digit_count) > 19)) {
473 // Ok, chances are good that we had an overflow!
474 // this is almost never going to get called!!!
475 // we start anew, going slowly!!!
476 // This will happen in the following examples:
477 // 10000000000000000000000000000000000000000000e+308
478 // 3.1415926535897932384626433832795028841971693993751
479 //
480 // NOTE: This makes a *copy* of the writer and passes it to slow_float_parsing. This happens
481 // because slow_float_parsing is a non-inlined function. If we passed our writer reference to
482 // it, it would force it to be stored in memory, preventing the compiler from picking it apart
483 // and putting into registers. i.e. if we pass it as reference, it gets slow.
484 // This is what forces the skip_double, as well.
485 error_code error = slow_float_parsing(src, writer);
486 writer.skip_double();
487 return error;
488 }
489 // NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
490 // way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331
491 // To future reader: we'd love if someone found a better way, or at least could explain this result!
492 if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) || (exponent > simdjson::internal::largest_power)) {
493 //
494 // Important: smallest_power is such that it leads to a zero value.
495 // Observe that 18446744073709551615e-343 == 0, i.e. (2**64 - 1) e -343 is zero
496 // so something x 10^-343 goes to zero, but not so with something x 10^-342.
497 static_assert(simdjson::internal::smallest_power <= -342, "smallest_power is not small enough");
498 //
499 if((exponent < simdjson::internal::smallest_power) || (i == 0)) {
500 // E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero
501 WRITE_DOUBLE(negative ? -0.0 : 0.0, src, writer);
502 return SUCCESS;
503 } else { // (exponent > largest_power) and (i != 0)
504 // We have, for sure, an infinite value and simdjson refuses to parse infinite values.
505 return INVALID_NUMBER(src);
506 }
507 }
508 double d;
509 if (!compute_float_64(power: exponent, i, negative, d)) {
510 // we are almost never going to get here.
511 if (!parse_float_fallback(ptr: src, outDouble: &d)) { return INVALID_NUMBER(src); }
512 }
513 WRITE_DOUBLE(d, src, writer);
514 return SUCCESS;
515}
516
517// for performance analysis, it is sometimes useful to skip parsing
518#ifdef SIMDJSON_SKIPNUMBERPARSING
519
520template<typename W>
521simdjson_inline error_code parse_number(const uint8_t *const, W &writer) {
522 writer.append_s64(0); // always write zero
523 return SUCCESS; // always succeeds
524}
525
526simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { return 0; }
527simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src) noexcept { return 0; }
528simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * const src) noexcept { return 0; }
529simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept { return 0; }
530simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t * const src) noexcept { return 0; }
531simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * const src) noexcept { return 0; }
532simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; }
533simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept { return false; }
534simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; }
535#else
536
537// parse the number at src
538// define JSON_TEST_NUMBERS for unit testing
539//
540// It is assumed that the number is followed by a structural ({,},],[) character
541// or a white space character. If that is not the case (e.g., when the JSON
542// document is made of a single number), then it is necessary to copy the
543// content and append a space before calling this function.
544//
545// Our objective is accurate parsing (ULP of 0) at high speed.
546template<typename W>
547simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) {
548
549 //
550 // Check for minus sign
551 //
552 bool negative = (*src == '-');
553 const uint8_t *p = src + uint8_t(negative);
554
555 //
556 // Parse the integer part.
557 //
558 // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
559 const uint8_t *const start_digits = p;
560 uint64_t i = 0;
561 while (parse_digit(c: *p, i)) { p++; }
562
563 // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
564 // Optimization note: size_t is expected to be unsigned.
565 size_t digit_count = size_t(p - start_digits);
566 if (digit_count == 0 || ('0' == *start_digits && digit_count > 1)) { return INVALID_NUMBER(src); }
567
568 //
569 // Handle floats if there is a . or e (or both)
570 //
571 int64_t exponent = 0;
572 bool is_float = false;
573 if ('.' == *p) {
574 is_float = true;
575 ++p;
576 SIMDJSON_TRY( parse_decimal(src, p, i, exponent) );
577 digit_count = int(p - start_digits); // used later to guard against overflows
578 }
579 if (('e' == *p) || ('E' == *p)) {
580 is_float = true;
581 ++p;
582 SIMDJSON_TRY( parse_exponent(src, p, exponent) );
583 }
584 if (is_float) {
585 const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(c: *p);
586 SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) );
587 if (dirty_end) { return INVALID_NUMBER(src); }
588 return SUCCESS;
589 }
590
591 // The longest negative 64-bit number is 19 digits.
592 // The longest positive 64-bit number is 20 digits.
593 // We do it this way so we don't trigger this branch unless we must.
594 size_t longest_digit_count = negative ? 19 : 20;
595 if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); }
596 if (digit_count == longest_digit_count) {
597 if (negative) {
598 // Anything negative above INT64_MAX+1 is invalid
599 if (i > uint64_t(INT64_MAX)+1) { return INVALID_NUMBER(src); }
600 WRITE_INTEGER(~i+1, src, writer);
601 if (jsoncharutils::is_not_structural_or_whitespace(c: *p)) { return INVALID_NUMBER(src); }
602 return SUCCESS;
603 // Positive overflow check:
604 // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
605 // biggest uint64_t.
606 // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
607 // If we got here, it's a 20 digit number starting with the digit "1".
608 // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
609 // than 1,553,255,926,290,448,384.
610 // - That is smaller than the smallest possible 20-digit number the user could write:
611 // 10,000,000,000,000,000,000.
612 // - Therefore, if the number is positive and lower than that, it's overflow.
613 // - The value we are looking at is less than or equal to INT64_MAX.
614 //
615 } else if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); }
616 }
617
618 // Write unsigned if it doesn't fit in a signed integer.
619 if (i > uint64_t(INT64_MAX)) {
620 WRITE_UNSIGNED(i, src, writer);
621 } else {
622 WRITE_INTEGER(negative ? (~i+1) : i, src, writer);
623 }
624 if (jsoncharutils::is_not_structural_or_whitespace(c: *p)) { return INVALID_NUMBER(src); }
625 return SUCCESS;
626}
627
628// Inlineable functions
629namespace {
630
631// This table can be used to characterize the final character of an integer
632// string. For JSON structural character and allowable white space characters,
633// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise
634// we return NUMBER_ERROR.
635// Optimization note: we could easily reduce the size of the table by half (to 128)
636// at the cost of an extra branch.
637// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits):
638static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast");
639static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast");
640static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast");
641
642const uint8_t integer_string_finisher[256] = {
643 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
644 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
645 SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR,
646 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
647 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
648 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
649 NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR,
650 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
651 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
652 NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
653 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
654 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR,
655 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
656 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE,
657 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
658 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
659 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
660 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
661 NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR,
662 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
663 NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
664 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
665 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
666 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
667 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR,
668 SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
669 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
670 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
671 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
672 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
673 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
674 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
675 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
676 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
677 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
678 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
679 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
680 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
681 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
682 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
683 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
684 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
685 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
686 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
687 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
688 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
689 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
690 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
691 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
692 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
693 NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
694 NUMBER_ERROR};
695
696// Parse any number from 0 to 18,446,744,073,709,551,615
697simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept {
698 const uint8_t *p = src;
699 //
700 // Parse the integer part.
701 //
702 // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
703 const uint8_t *const start_digits = p;
704 uint64_t i = 0;
705 while (parse_digit(c: *p, i)) { p++; }
706
707 // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
708 // Optimization note: size_t is expected to be unsigned.
709 size_t digit_count = size_t(p - start_digits);
710 // The longest positive 64-bit number is 20 digits.
711 // We do it this way so we don't trigger this branch unless we must.
712 // Optimization note: the compiler can probably merge
713 // ((digit_count == 0) || (digit_count > 20))
714 // into a single branch since digit_count is unsigned.
715 if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
716 // Here digit_count > 0.
717 if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
718 // We can do the following...
719 // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
720 // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
721 // }
722 // as a single table lookup:
723 if (integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
724
725 if (digit_count == 20) {
726 // Positive overflow check:
727 // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
728 // biggest uint64_t.
729 // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
730 // If we got here, it's a 20 digit number starting with the digit "1".
731 // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
732 // than 1,553,255,926,290,448,384.
733 // - That is smaller than the smallest possible 20-digit number the user could write:
734 // 10,000,000,000,000,000,000.
735 // - Therefore, if the number is positive and lower than that, it's overflow.
736 // - The value we are looking at is less than or equal to INT64_MAX.
737 //
738 if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
739 }
740
741 return i;
742}
743
744
745// Parse any number from 0 to 18,446,744,073,709,551,615
746// Never read at src_end or beyond
747simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept {
748 const uint8_t *p = src;
749 //
750 // Parse the integer part.
751 //
752 // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
753 const uint8_t *const start_digits = p;
754 uint64_t i = 0;
755 while ((p != src_end) && parse_digit(c: *p, i)) { p++; }
756
757 // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
758 // Optimization note: size_t is expected to be unsigned.
759 size_t digit_count = size_t(p - start_digits);
760 // The longest positive 64-bit number is 20 digits.
761 // We do it this way so we don't trigger this branch unless we must.
762 // Optimization note: the compiler can probably merge
763 // ((digit_count == 0) || (digit_count > 20))
764 // into a single branch since digit_count is unsigned.
765 if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
766 // Here digit_count > 0.
767 if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
768 // We can do the following...
769 // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
770 // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
771 // }
772 // as a single table lookup:
773 if ((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
774
775 if (digit_count == 20) {
776 // Positive overflow check:
777 // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
778 // biggest uint64_t.
779 // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
780 // If we got here, it's a 20 digit number starting with the digit "1".
781 // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
782 // than 1,553,255,926,290,448,384.
783 // - That is smaller than the smallest possible 20-digit number the user could write:
784 // 10,000,000,000,000,000,000.
785 // - Therefore, if the number is positive and lower than that, it's overflow.
786 // - The value we are looking at is less than or equal to INT64_MAX.
787 //
788 if (src[0] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
789 }
790
791 return i;
792}
793
794// Parse any number from 0 to 18,446,744,073,709,551,615
795simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept {
796 const uint8_t *p = src + 1;
797 //
798 // Parse the integer part.
799 //
800 // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
801 const uint8_t *const start_digits = p;
802 uint64_t i = 0;
803 while (parse_digit(c: *p, i)) { p++; }
804
805 // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
806 // Optimization note: size_t is expected to be unsigned.
807 size_t digit_count = size_t(p - start_digits);
808 // The longest positive 64-bit number is 20 digits.
809 // We do it this way so we don't trigger this branch unless we must.
810 // Optimization note: the compiler can probably merge
811 // ((digit_count == 0) || (digit_count > 20))
812 // into a single branch since digit_count is unsigned.
813 if ((digit_count == 0) || (digit_count > 20)) { return INCORRECT_TYPE; }
814 // Here digit_count > 0.
815 if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
816 // We can do the following...
817 // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
818 // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
819 // }
820 // as a single table lookup:
821 if (*p != '"') { return NUMBER_ERROR; }
822
823 if (digit_count == 20) {
824 // Positive overflow check:
825 // - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
826 // biggest uint64_t.
827 // - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
828 // If we got here, it's a 20 digit number starting with the digit "1".
829 // - If a 20 digit number starting with 1 overflowed (i*10+digit), the result will be smaller
830 // than 1,553,255,926,290,448,384.
831 // - That is smaller than the smallest possible 20-digit number the user could write:
832 // 10,000,000,000,000,000,000.
833 // - Therefore, if the number is positive and lower than that, it's overflow.
834 // - The value we are looking at is less than or equal to INT64_MAX.
835 //
836 // Note: we use src[1] and not src[0] because src[0] is the quote character in this
837 // instance.
838 if (src[1] != uint8_t('1') || i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
839 }
840
841 return i;
842}
843
844// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
845simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t *src) noexcept {
846 //
847 // Check for minus sign
848 //
849 bool negative = (*src == '-');
850 const uint8_t *p = src + uint8_t(negative);
851
852 //
853 // Parse the integer part.
854 //
855 // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
856 const uint8_t *const start_digits = p;
857 uint64_t i = 0;
858 while (parse_digit(c: *p, i)) { p++; }
859
860 // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
861 // Optimization note: size_t is expected to be unsigned.
862 size_t digit_count = size_t(p - start_digits);
863 // We go from
864 // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
865 // so we can never represent numbers that have more than 19 digits.
866 size_t longest_digit_count = 19;
867 // Optimization note: the compiler can probably merge
868 // ((digit_count == 0) || (digit_count > longest_digit_count))
869 // into a single branch since digit_count is unsigned.
870 if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
871 // Here digit_count > 0.
872 if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
873 // We can do the following...
874 // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
875 // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
876 // }
877 // as a single table lookup:
878 if(integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
879 // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
880 // Performance note: This check is only needed when digit_count == longest_digit_count but it is
881 // so cheap that we might as well always make it.
882 if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
883 return negative ? (~i+1) : i;
884}
885
886// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
887// Never read at src_end or beyond
888simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept {
889 //
890 // Check for minus sign
891 //
892 if(src == src_end) { return NUMBER_ERROR; }
893 bool negative = (*src == '-');
894 const uint8_t *p = src + uint8_t(negative);
895
896 //
897 // Parse the integer part.
898 //
899 // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
900 const uint8_t *const start_digits = p;
901 uint64_t i = 0;
902 while ((p != src_end) && parse_digit(c: *p, i)) { p++; }
903
904 // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
905 // Optimization note: size_t is expected to be unsigned.
906 size_t digit_count = size_t(p - start_digits);
907 // We go from
908 // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
909 // so we can never represent numbers that have more than 19 digits.
910 size_t longest_digit_count = 19;
911 // Optimization note: the compiler can probably merge
912 // ((digit_count == 0) || (digit_count > longest_digit_count))
913 // into a single branch since digit_count is unsigned.
914 if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
915 // Here digit_count > 0.
916 if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
917 // We can do the following...
918 // if (!jsoncharutils::is_structural_or_whitespace(*p)) {
919 // return (*p == '.' || *p == 'e' || *p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
920 // }
921 // as a single table lookup:
922 if((p != src_end) && integer_string_finisher[*p] != SUCCESS) { return error_code(integer_string_finisher[*p]); }
923 // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
924 // Performance note: This check is only needed when digit_count == longest_digit_count but it is
925 // so cheap that we might as well always make it.
926 if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
927 return negative ? (~i+1) : i;
928}
929
930// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
931simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t *src) noexcept {
932 //
933 // Check for minus sign
934 //
935 bool negative = (*(src + 1) == '-');
936 src += uint8_t(negative) + 1;
937
938 //
939 // Parse the integer part.
940 //
941 // PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
942 const uint8_t *const start_digits = src;
943 uint64_t i = 0;
944 while (parse_digit(c: *src, i)) { src++; }
945
946 // If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
947 // Optimization note: size_t is expected to be unsigned.
948 size_t digit_count = size_t(src - start_digits);
949 // We go from
950 // -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
951 // so we can never represent numbers that have more than 19 digits.
952 size_t longest_digit_count = 19;
953 // Optimization note: the compiler can probably merge
954 // ((digit_count == 0) || (digit_count > longest_digit_count))
955 // into a single branch since digit_count is unsigned.
956 if ((digit_count == 0) || (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
957 // Here digit_count > 0.
958 if (('0' == *start_digits) && (digit_count > 1)) { return NUMBER_ERROR; }
959 // We can do the following...
960 // if (!jsoncharutils::is_structural_or_whitespace(*src)) {
961 // return (*src == '.' || *src == 'e' || *src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;
962 // }
963 // as a single table lookup:
964 if(*src != '"') { return NUMBER_ERROR; }
965 // Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
966 // Performance note: This check is only needed when digit_count == longest_digit_count but it is
967 // so cheap that we might as well always make it.
968 if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
969 return negative ? (~i+1) : i;
970}
971
972simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
973 //
974 // Check for minus sign
975 //
976 bool negative = (*src == '-');
977 src += uint8_t(negative);
978
979 //
980 // Parse the integer part.
981 //
982 uint64_t i = 0;
983 const uint8_t *p = src;
984 p += parse_digit(c: *p, i);
985 bool leading_zero = (i == 0);
986 while (parse_digit(c: *p, i)) { p++; }
987 // no integer digits, or 0123 (zero must be solo)
988 if ( p == src ) { return INCORRECT_TYPE; }
989 if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
990
991 //
992 // Parse the decimal part.
993 //
994 int64_t exponent = 0;
995 bool overflow;
996 if (simdjson_likely(*p == '.')) {
997 p++;
998 const uint8_t *start_decimal_digits = p;
999 if (!parse_digit(c: *p, i)) { return NUMBER_ERROR; } // no decimal digits
1000 p++;
1001 while (parse_digit(c: *p, i)) { p++; }
1002 exponent = -(p - start_decimal_digits);
1003
1004 // Overflow check. More than 19 digits (minus the decimal) may be overflow.
1005 overflow = p-src-1 > 19;
1006 if (simdjson_unlikely(overflow && leading_zero)) {
1007 // Skip leading 0.00000 and see if it still overflows
1008 const uint8_t *start_digits = src + 2;
1009 while (*start_digits == '0') { start_digits++; }
1010 overflow = start_digits-src > 19;
1011 }
1012 } else {
1013 overflow = p-src > 19;
1014 }
1015
1016 //
1017 // Parse the exponent
1018 //
1019 if (*p == 'e' || *p == 'E') {
1020 p++;
1021 bool exp_neg = *p == '-';
1022 p += exp_neg || *p == '+';
1023
1024 uint64_t exp = 0;
1025 const uint8_t *start_exp_digits = p;
1026 while (parse_digit(c: *p, i&: exp)) { p++; }
1027 // no exp digits, or 20+ exp digits
1028 if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
1029
1030 exponent += exp_neg ? 0-exp : exp;
1031 }
1032
1033 if (jsoncharutils::is_not_structural_or_whitespace(c: *p)) { return NUMBER_ERROR; }
1034
1035 overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
1036
1037 //
1038 // Assemble (or slow-parse) the float
1039 //
1040 double d;
1041 if (simdjson_likely(!overflow)) {
1042 if (compute_float_64(power: exponent, i, negative, d)) { return d; }
1043 }
1044 if (!parse_float_fallback(ptr: src - uint8_t(negative), outDouble: &d)) {
1045 return NUMBER_ERROR;
1046 }
1047 return d;
1048}
1049
1050simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept {
1051 return (*src == '-');
1052}
1053
1054simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept {
1055 bool negative = (*src == '-');
1056 src += uint8_t(negative);
1057 const uint8_t *p = src;
1058 while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
1059 if ( p == src ) { return NUMBER_ERROR; }
1060 if (jsoncharutils::is_structural_or_whitespace(c: *p)) { return true; }
1061 return false;
1062}
1063
1064simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept {
1065 bool negative = (*src == '-');
1066 src += uint8_t(negative);
1067 const uint8_t *p = src;
1068 while(static_cast<uint8_t>(*p - '0') <= 9) { p++; }
1069 if ( p == src ) { return NUMBER_ERROR; }
1070 if (jsoncharutils::is_structural_or_whitespace(c: *p)) {
1071 // We have an integer.
1072 // If the number is negative and valid, it must be a signed integer.
1073 if(negative) { return ondemand::number_type::signed_integer; }
1074 // We want values larger or equal to 9223372036854775808 to be unsigned
1075 // integers, and the other values to be signed integers.
1076 int digit_count = int(p - src);
1077 if(digit_count >= 19) {
1078 const uint8_t * smaller_big_integer = reinterpret_cast<const uint8_t *>("9223372036854775808");
1079 if((digit_count >= 20) || (memcmp(s1: src, s2: smaller_big_integer, n: 19) >= 0)) {
1080 return ondemand::number_type::unsigned_integer;
1081 }
1082 }
1083 return ondemand::number_type::signed_integer;
1084 }
1085 // Hopefully, we have 'e' or 'E' or '.'.
1086 return ondemand::number_type::floating_point_number;
1087}
1088
1089// Never read at src_end or beyond
1090simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept {
1091 if(src == src_end) { return NUMBER_ERROR; }
1092 //
1093 // Check for minus sign
1094 //
1095 bool negative = (*src == '-');
1096 src += uint8_t(negative);
1097
1098 //
1099 // Parse the integer part.
1100 //
1101 uint64_t i = 0;
1102 const uint8_t *p = src;
1103 if(p == src_end) { return NUMBER_ERROR; }
1104 p += parse_digit(c: *p, i);
1105 bool leading_zero = (i == 0);
1106 while ((p != src_end) && parse_digit(c: *p, i)) { p++; }
1107 // no integer digits, or 0123 (zero must be solo)
1108 if ( p == src ) { return INCORRECT_TYPE; }
1109 if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
1110
1111 //
1112 // Parse the decimal part.
1113 //
1114 int64_t exponent = 0;
1115 bool overflow;
1116 if (simdjson_likely((p != src_end) && (*p == '.'))) {
1117 p++;
1118 const uint8_t *start_decimal_digits = p;
1119 if ((p == src_end) || !parse_digit(c: *p, i)) { return NUMBER_ERROR; } // no decimal digits
1120 p++;
1121 while ((p != src_end) && parse_digit(c: *p, i)) { p++; }
1122 exponent = -(p - start_decimal_digits);
1123
1124 // Overflow check. More than 19 digits (minus the decimal) may be overflow.
1125 overflow = p-src-1 > 19;
1126 if (simdjson_unlikely(overflow && leading_zero)) {
1127 // Skip leading 0.00000 and see if it still overflows
1128 const uint8_t *start_digits = src + 2;
1129 while (*start_digits == '0') { start_digits++; }
1130 overflow = start_digits-src > 19;
1131 }
1132 } else {
1133 overflow = p-src > 19;
1134 }
1135
1136 //
1137 // Parse the exponent
1138 //
1139 if ((p != src_end) && (*p == 'e' || *p == 'E')) {
1140 p++;
1141 if(p == src_end) { return NUMBER_ERROR; }
1142 bool exp_neg = *p == '-';
1143 p += exp_neg || *p == '+';
1144
1145 uint64_t exp = 0;
1146 const uint8_t *start_exp_digits = p;
1147 while ((p != src_end) && parse_digit(c: *p, i&: exp)) { p++; }
1148 // no exp digits, or 20+ exp digits
1149 if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
1150
1151 exponent += exp_neg ? 0-exp : exp;
1152 }
1153
1154 if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(c: *p)) { return NUMBER_ERROR; }
1155
1156 overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
1157
1158 //
1159 // Assemble (or slow-parse) the float
1160 //
1161 double d;
1162 if (simdjson_likely(!overflow)) {
1163 if (compute_float_64(power: exponent, i, negative, d)) { return d; }
1164 }
1165 if (!parse_float_fallback(ptr: src - uint8_t(negative), end_ptr: src_end, outDouble: &d)) {
1166 return NUMBER_ERROR;
1167 }
1168 return d;
1169}
1170
1171simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * src) noexcept {
1172 //
1173 // Check for minus sign
1174 //
1175 bool negative = (*(src + 1) == '-');
1176 src += uint8_t(negative) + 1;
1177
1178 //
1179 // Parse the integer part.
1180 //
1181 uint64_t i = 0;
1182 const uint8_t *p = src;
1183 p += parse_digit(c: *p, i);
1184 bool leading_zero = (i == 0);
1185 while (parse_digit(c: *p, i)) { p++; }
1186 // no integer digits, or 0123 (zero must be solo)
1187 if ( p == src ) { return INCORRECT_TYPE; }
1188 if ( (leading_zero && p != src+1)) { return NUMBER_ERROR; }
1189
1190 //
1191 // Parse the decimal part.
1192 //
1193 int64_t exponent = 0;
1194 bool overflow;
1195 if (simdjson_likely(*p == '.')) {
1196 p++;
1197 const uint8_t *start_decimal_digits = p;
1198 if (!parse_digit(c: *p, i)) { return NUMBER_ERROR; } // no decimal digits
1199 p++;
1200 while (parse_digit(c: *p, i)) { p++; }
1201 exponent = -(p - start_decimal_digits);
1202
1203 // Overflow check. More than 19 digits (minus the decimal) may be overflow.
1204 overflow = p-src-1 > 19;
1205 if (simdjson_unlikely(overflow && leading_zero)) {
1206 // Skip leading 0.00000 and see if it still overflows
1207 const uint8_t *start_digits = src + 2;
1208 while (*start_digits == '0') { start_digits++; }
1209 overflow = start_digits-src > 19;
1210 }
1211 } else {
1212 overflow = p-src > 19;
1213 }
1214
1215 //
1216 // Parse the exponent
1217 //
1218 if (*p == 'e' || *p == 'E') {
1219 p++;
1220 bool exp_neg = *p == '-';
1221 p += exp_neg || *p == '+';
1222
1223 uint64_t exp = 0;
1224 const uint8_t *start_exp_digits = p;
1225 while (parse_digit(c: *p, i&: exp)) { p++; }
1226 // no exp digits, or 20+ exp digits
1227 if (p-start_exp_digits == 0 || p-start_exp_digits > 19) { return NUMBER_ERROR; }
1228
1229 exponent += exp_neg ? 0-exp : exp;
1230 }
1231
1232 if (*p != '"') { return NUMBER_ERROR; }
1233
1234 overflow = overflow || exponent < simdjson::internal::smallest_power || exponent > simdjson::internal::largest_power;
1235
1236 //
1237 // Assemble (or slow-parse) the float
1238 //
1239 double d;
1240 if (simdjson_likely(!overflow)) {
1241 if (compute_float_64(power: exponent, i, negative, d)) { return d; }
1242 }
1243 if (!parse_float_fallback(ptr: src - uint8_t(negative), outDouble: &d)) {
1244 return NUMBER_ERROR;
1245 }
1246 return d;
1247}
1248} //namespace {}
1249#endif // SIMDJSON_SKIPNUMBERPARSING
1250
1251} // namespace numberparsing
1252} // unnamed namespace
1253} // namespace SIMDJSON_IMPLEMENTATION
1254} // namespace simdjson
1255