numberparsing.h source code [Velox/build/_deps/simdjson-src/include/simdjson/generic/numberparsing.h]

1	#include "simdjson/internal/numberparsing_tables.h"
2	#include <limits>
3
4	namespace simdjson {
5	namespace SIMDJSON_IMPLEMENTATION {
6
7	namespace ondemand {
8	/**
9	* The type of a JSON number
10	*/
11	enum class number_type {
12	floating_point_number=`1`, /// a binary64 number
13	signed_integer, /// a signed integer that fits in a 64-bit word using two's complement
14	unsigned_integer /// a positive integer larger or equal to 1<<63
15	};
16	}
17
18	namespace {
19	/// @private
20	namespace numberparsing {
21
22
23
24	#ifdef JSON_TEST_NUMBERS
25	#define INVALID_NUMBER(SRC) (found_invalid_number((SRC)), NUMBER_ERROR)
26	#define WRITE_INTEGER(VALUE, SRC, WRITER) (found_integer((VALUE), (SRC)), (WRITER).append_s64((VALUE)))
27	#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (found_unsigned_integer((VALUE), (SRC)), (WRITER).append_u64((VALUE)))
28	#define WRITE_DOUBLE(VALUE, SRC, WRITER) (found_float((VALUE), (SRC)), (WRITER).append_double((VALUE)))
29	#else
30	#define INVALID_NUMBER(SRC) (NUMBER_ERROR)
31	#define WRITE_INTEGER(VALUE, SRC, WRITER) (WRITER).append_s64((VALUE))
32	#define WRITE_UNSIGNED(VALUE, SRC, WRITER) (WRITER).append_u64((VALUE))
33	#define WRITE_DOUBLE(VALUE, SRC, WRITER) (WRITER).append_double((VALUE))
34	#endif
35
36	namespace {
37	// Convert a mantissa, an exponent and a sign bit into an ieee64 double.
38	// The real_exponent needs to be in [0, 2046] (technically real_exponent = 2047 would be acceptable).
39	// The mantissa should be in [0,1<<53). The bit at index (1ULL << 52) while be zeroed.
40	simdjson_inline double to_double(uint64_t mantissa, uint64_t real_exponent, bool negative) {
41	double d;
42	mantissa &= ~(`1ULL` << `52`);
43	mantissa \|= real_exponent << `52`;
44	mantissa \|= ((static_cast<uint64_t>(negative)) << `63`);
45	std::memcpy(dest: &d, src: &mantissa, n: sizeof(d));
46	return d;
47	}
48	}
49	// Attempts to compute i 10^(power) exactly; and if "negative" is*
50	// true, negate the result.
51	// This function will only work in some cases, when it does not work, success is
52	// set to false. This should work most of the time* (like 99% of the time).*
53	// We assume that power is in the [smallest_power,
54	// largest_power] interval: the caller is responsible for this check.
55	simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative, double &d) {
56	// we start with a fast path
57	// It was described in
58	// Clinger WD. How to read floating point numbers accurately.
59	// ACM SIGPLAN Notices. 1990
60	#ifndef FLT_EVAL_METHOD
61	#error "FLT_EVAL_METHOD should be defined, please include cfloat."
62	#endif
63	#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
64	// We cannot be certain that x/y is rounded to nearest.
65	if (`0` <= power && power <= `22` && i <= `9007199254740991`) {
66	#else
67	if (-`22` <= power && power <= `22` && i <= `9007199254740991`) {
68	#endif
69	// convert the integer into a double. This is lossless since
70	// 0 <= i <= 2^53 - 1.
71	d = double(i);
72	//
73	// The general idea is as follows.
74	// If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then
75	// 1) Both s and p can be represented exactly as 64-bit floating-point
76	// values
77	// (binary64).
78	// 2) Because s and p can be represented exactly as floating-point values,
79	// then s p*
80	// and s / p will produce correctly rounded values.
81	//
82	if (power < `0`) {
83	d = d / simdjson::internal::power_of_ten[-power];
84	} else {
85	d = d * simdjson::internal::power_of_ten[power];
86	}
87	if (negative) {
88	d = -d;
89	}
90	return true;
91	}
92	// When 22 < power && power < 22 + 16, we could
93	// hope for another, secondary fast path. It was
94	// described by David M. Gay in "Correctly rounded
95	// binary-decimal and decimal-binary conversions." (1990)
96	// If you need to compute i 10^(22 + x) for x < 16,*
97	// first compute i 10^x, if you know that result is exact*
98	// (e.g., when i 10^x < 2^53),*
99	// then you can still proceed and do (i 10^x) * 10^22.*
100	// Is this worth your time?
101	// You need 22 < power and* power < 22 + 16 and (i * 10^(x-22) < 2^53)*
102	// for this second fast path to work.
103	// If you you have 22 < power and* power < 22 + 16, and then you*
104	// optimistically compute "i 10^(x-22)", there is still a chance that you*
105	// have wasted your time if i 10^(x-22) >= 2^53. It makes the use cases of*
106	// this optimization maybe less common than we would like. Source:
107	// http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
108	// also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html
109
110	// The fast path has now failed, so we are failing back on the slower path.
111
112	// In the slow path, we need to adjust i so that it is > 1<<63 which is always
113	// possible, except if i == 0, so we handle i == 0 separately.
114	if(i == `0`) {
115	d = negative ? -`0.0` : `0.0`;
116	return true;
117	}
118
119
120	// The exponent is 1024 + 63 + power
121	// + floor(log(5power)/log(2)).
122	// The 1024 comes from the ieee64 standard.
123	// The 63 comes from the fact that we use a 64-bit word.
124	//
125	// Computing floor(log(5power)/log(2)) could be
126	// slow. Instead we use a fast function.
127	//
128	// For power in (-400,350), we have that
129	// (((152170 + 65536) power ) >> 16);*
130	// is equal to
131	// floor(log(5power)/log(2)) + power when power >= 0
132	// and it is equal to
133	// ceil(log(5-power)/log(2)) + power when power < 0
134	//
135	// The 65536 is (1<<16) and corresponds to
136	// (65536 power) >> 16 ---> power*
137	//
138	// ((152170 power ) >> 16) is equal to*
139	// floor(log(5power)/log(2))
140	//
141	// Note that this is not magic: 152170/(1<<16) is
142	// approximatively equal to log(5)/log(2).
143	// The 1<<16 value is a power of two; we could use a
144	// larger power of 2 if we wanted to.
145	//
146	int64_t exponent = (((`152170` + `65536`) * power) >> `16`) + `1024` + `63`;
147
148
149	// We want the most significant bit of i to be 1. Shift if needed.
150	int lz = leading_zeroes(input_num: i);
151	i <<= lz;
152
153
154	// We are going to need to do some 64-bit arithmetic to get a precise product.
155	// We use a table lookup approach.
156	// It is safe because
157	// power >= smallest_power
158	// and power <= largest_power
159	// We recover the mantissa of the power, it has a leading 1. It is always
160	// rounded down.
161	//
162	// We want the most significant 64 bits of the product. We know
163	// this will be non-zero because the most significant bit of i is
164	// 1.
165	const uint32_t index = `2` * uint32_t(power - simdjson::internal::smallest_power);
166	// Optimization: It may be that materializing the index as a variable might confuse some compilers and prevent effective complex-addressing loads. (Done for code clarity.)
167	//
168	// The full_multiplication function computes the 128-bit product of two 64-bit words
169	// with a returned value of type value128 with a "low component" corresponding to the
170	// 64-bit least significant bits of the product and with a "high component" corresponding
171	// to the 64-bit most significant bits of the product.
172	simdjson::internal::value128 firstproduct = jsoncharutils::full_multiplication(value1: i, value2: simdjson::internal::power_of_five_128[index]);
173	// Both i and power_of_five_128[index] have their most significant bit set to 1 which
174	// implies that the either the most or the second most significant bit of the product
175	// is 1. We pack values in this manner for efficiency reasons: it maximizes the use
176	// we make of the product. It also makes it easy to reason about the product: there
177	// is 0 or 1 leading zero in the product.
178
179	// Unless the least significant 9 bits of the high (64-bit) part of the full
180	// product are all 1s, then we know that the most significant 55 bits are
181	// exact and no further work is needed. Having 55 bits is necessary because
182	// we need 53 bits for the mantissa but we have to have one rounding bit and
183	// we can waste a bit if the most significant bit of the product is zero.
184	if((firstproduct.high & `0x1FF`) == `0x1FF`) {
185	// We want to compute i 5^q, but only care about the top 55 bits at most.*
186	// Consider the scenario where q>=0. Then 5^q may not fit in 64-bits. Doing
187	// the full computation is wasteful. So we do what is called a "truncated
188	// multiplication".
189	// We take the most significant 64-bits, and we put them in
190	// power_of_five_128[index]. Usually, that's good enough to approximate i 5^q*
191	// to the desired approximation using one multiplication. Sometimes it does not suffice.
192	// Then we store the next most significant 64 bits in power_of_five_128[index + 1], and
193	// then we get a better approximation to i 5^q. In very rare cases, even that*
194	// will not suffice, though it is seemingly very hard to find such a scenario.
195	//
196	// That's for when q>=0. The logic for q<0 is somewhat similar but it is somewhat
197	// more complicated.
198	//
199	// There is an extra layer of complexity in that we need more than 55 bits of
200	// accuracy in the round-to-even scenario.
201	//
202	// The full_multiplication function computes the 128-bit product of two 64-bit words
203	// with a returned value of type value128 with a "low component" corresponding to the
204	// 64-bit least significant bits of the product and with a "high component" corresponding
205	// to the 64-bit most significant bits of the product.
206	simdjson::internal::value128 secondproduct = jsoncharutils::full_multiplication(value1: i, value2: simdjson::internal::power_of_five_128[index + `1`]);
207	firstproduct.low += secondproduct.high;
208	if(secondproduct.high > firstproduct.low) { firstproduct.high++; }
209	// At this point, we might need to add at most one to firstproduct, but this
210	// can only change the value of firstproduct.high if firstproduct.low is maximal.
211	if(simdjson_unlikely(firstproduct.low == `0xFFFFFFFFFFFFFFFF`)) {
212	// This is very unlikely, but if so, we need to do much more work!
213	return false;
214	}
215	}
216	uint64_t lower = firstproduct.low;
217	uint64_t upper = firstproduct.high;
218	// The final mantissa should be 53 bits with a leading 1.
219	// We shift it so that it occupies 54 bits with a leading 1.
220	///////
221	uint64_t upperbit = upper >> `63`;
222	uint64_t mantissa = upper >> (upperbit + `9`);
223	lz += int(`1` ^ upperbit);
224
225	// Here we have mantissa < (1<<54).
226	int64_t real_exponent = exponent - lz;
227	if (simdjson_unlikely(real_exponent <= `0`)) { // we have a subnormal?
228	// Here have that real_exponent <= 0 so -real_exponent >= 0
229	if(-real_exponent + `1` >= `64`) { // if we have more than 64 bits below the minimum exponent, you have a zero for sure.
230	d = negative ? -`0.0` : `0.0`;
231	return true;
232	}
233	// next line is safe because -real_exponent + 1 < 0
234	mantissa >>= -real_exponent + `1`;
235	// Thankfully, we can't have both "round-to-even" and subnormals because
236	// "round-to-even" only occurs for powers close to 0.
237	mantissa += (mantissa & `1`); // round up
238	mantissa >>= `1`;
239	// There is a weird scenario where we don't have a subnormal but just.
240	// Suppose we start with 2.2250738585072013e-308, we end up
241	// with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal
242	// whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round
243	// up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer
244	// subnormal, but we can only know this after rounding.
245	// So we only declare a subnormal if we are smaller than the threshold.
246	real_exponent = (mantissa < (uint64_t(`1`) << `52`)) ? `0` : `1`;
247	d = to_double(mantissa, real_exponent, negative);
248	return true;
249	}
250	// We have to round to even. The "to even" part
251	// is only a problem when we are right in between two floats
252	// which we guard against.
253	// If we have lots of trailing zeros, we may fall right between two
254	// floating-point values.
255	//
256	// The round-to-even cases take the form of a number 2m+1 which is in (2^53,2^54]
257	// times a power of two. That is, it is right between a number with binary significand
258	// m and another number with binary significand m+1; and it must be the case
259	// that it cannot be represented by a float itself.
260	//
261	// We must have that w 10 ^q == (2m+1) * 2^p for some power of two 2^p.*
262	// Recall that 10^q = 5^q 2^q.*
263	// When q >= 0, we must have that (2m+1) is divible by 5^q, so 5^q <= 2^54. We have that
264	// 5^23 <= 2^54 and it is the last power of five to qualify, so q <= 23.
265	// When q<0, we have w >= (2m+1) x 5^{-q}. We must have that w<2^{64} so
266	// (2m+1) x 5^{-q} < 2^{64}. We have that 2m+1>2^{53}. Hence, we must have
267	// 2^{53} x 5^{-q} < 2^{64}.
268	// Hence we have 5^{-q} < 2^{11}$ or q>= -4.
269	//
270	// We require lower <= 1 and not lower == 0 because we could not prove that
271	// that lower == 0 is implied; but we could prove that lower <= 1 is a necessary and sufficient test.
272	if (simdjson_unlikely((lower <= `1`) && (power >= -`4`) && (power <= `23`) && ((mantissa & `3`) == `1`))) {
273	if((mantissa << (upperbit + `64` - `53` - `2`)) == upper) {
274	mantissa &= ~`1`; // flip it so that we do not round up
275	}
276	}
277
278	mantissa += mantissa & `1`;
279	mantissa >>= `1`;
280
281	// Here we have mantissa < (1<<53), unless there was an overflow
282	if (mantissa >= (`1ULL` << `53`)) {
283	//////////
284	// This will happen when parsing values such as 7.2057594037927933e+16
285	////////
286	mantissa = (`1ULL` << `52`);
287	real_exponent++;
288	}
289	mantissa &= ~(`1ULL` << `52`);
290	// we have to check that real_exponent is in range, otherwise we bail out
291	if (simdjson_unlikely(real_exponent > `2046`)) {
292	// We have an infinite value!!! We could actually throw an error here if we could.
293	return false;
294	}
295	d = to_double(mantissa, real_exponent, negative);
296	return true;
297	}
298
299	// We call a fallback floating-point parser that might be slow. Note
300	// it will accept JSON numbers, but the JSON spec. is more restrictive so
301	// before you call parse_float_fallback, you need to have validated the input
302	// string with the JSON grammar.
303	// It will return an error (false) if the parsed number is infinite.
304	// The string parsing itself always succeeds. We know that there is at least
305	// one digit.
306	static bool parse_float_fallback(const uint8_t ptr, double* *outDouble) {
307	outDouble = simdjson::internal::from_chars(first: reinterpret_cast<const* char *>(ptr));
308	// We do not accept infinite values.
309
310	// Detecting finite values in a portable manner is ridiculously hard, ideally
311	// we would want to do:
312	// return !std::isfinite(outDouble);*
313	// but that mysteriously fails under legacy/old libc++ libraries, see
314	// https://github.com/simdjson/simdjson/issues/1286
315	//
316	// Therefore, fall back to this solution (the extra parens are there
317	// to handle that max may be a macro on windows).
318	return !(outDouble > (std::numeric_limits<double>::max)() \|\| outDouble < std::numeric_limits<double>::lowest());
319	}
320	static bool parse_float_fallback(const uint8_t ptr, const* uint8_t end_ptr, double* *outDouble) {
321	outDouble = simdjson::internal::from_chars(first: reinterpret_cast<const* char >(ptr), end: reinterpret_cast<const* char *>(end_ptr));
322	// We do not accept infinite values.
323
324	// Detecting finite values in a portable manner is ridiculously hard, ideally
325	// we would want to do:
326	// return !std::isfinite(outDouble);*
327	// but that mysteriously fails under legacy/old libc++ libraries, see
328	// https://github.com/simdjson/simdjson/issues/1286
329	//
330	// Therefore, fall back to this solution (the extra parens are there
331	// to handle that max may be a macro on windows).
332	return !(outDouble > (std::numeric_limits<double>::max)() \|\| outDouble < std::numeric_limits<double>::lowest());
333	}
334
335	// check quickly whether the next 8 chars are made of digits
336	// at a glance, it looks better than Mula's
337	// http://0x80.pl/articles/swar-digits-validate.html
338	simdjson_inline bool is_made_of_eight_digits_fast(const uint8_t *chars) {
339	uint64_t val;
340	// this can read up to 7 bytes beyond the buffer size, but we require
341	// SIMDJSON_PADDING of padding
342	static_assert(`7` <= SIMDJSON_PADDING, "SIMDJSON_PADDING must be bigger than 7");
343	std::memcpy(dest: &val, src: chars, n: `8`);
344	// a branchy method might be faster:
345	// return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030)
346	// && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) ==
347	// 0x3030303030303030);
348	return (((val & `0xF0F0F0F0F0F0F0F0`) \|
349	(((val + `0x0606060606060606`) & `0xF0F0F0F0F0F0F0F0`) >> `4`)) ==
350	`0x3333333333333333`);
351	}
352
353	template<typename W>
354	error_code slow_float_parsing(simdjson_unused const uint8_t * src, W writer) {
355	double d;
356	if (parse_float_fallback(ptr: src, outDouble: &d)) {
357	writer.append_double(d);
358	return SUCCESS;
359	}
360	return INVALID_NUMBER(src);
361	}
362
363	template<typename I>
364	SIMDJSON_NO_SANITIZE_UNDEFINED // We deliberately allow overflow here and check later
365	simdjson_inline bool parse_digit(const uint8_t c, I &i) {
366	const uint8_t digit = static_cast<uint8_t>(c - `'0'`);
367	if (digit > `9`) {
368	return false;
369	}
370	// PERF NOTE: multiplication by 10 is cheaper than arbitrary integer multiplication
371	i = `10` * i + digit; // might overflow, we will handle the overflow later
372	return true;
373	}
374
375	simdjson_inline error_code parse_decimal(simdjson_unused const uint8_t *const src, const uint8_t *&p, uint64_t &i, int64_t &exponent) {
376	// we continue with the fiction that we have an integer. If the
377	// floating point number is representable as x 10^z for some integer*
378	// z that fits in 53 bits, then we will be able to convert back the
379	// the integer into a float in a lossless manner.
380	const uint8_t *const first_after_period = p;
381
382	#ifdef SIMDJSON_SWAR_NUMBER_PARSING
383	#if SIMDJSON_SWAR_NUMBER_PARSING
384	// this helps if we have lots of decimals!
385	// this turns out to be frequent enough.
386	if (is_made_of_eight_digits_fast(chars: p)) {
387	i = i * `100000000` + parse_eight_digits_unrolled(chars: p);
388	p += `8`;
389	}
390	#endif // SIMDJSON_SWAR_NUMBER_PARSING
391	#endif // #ifdef SIMDJSON_SWAR_NUMBER_PARSING
392	// Unrolling the first digit makes a small difference on some implementations (e.g. westmere)
393	if (parse_digit(c: *p, i)) { ++p; }
394	while (parse_digit(c: *p, i)) { p++; }
395	exponent = first_after_period - p;
396	// Decimal without digits (123.) is illegal
397	if (exponent == `0`) {
398	return INVALID_NUMBER(src);
399	}
400	return SUCCESS;
401	}
402
403	simdjson_inline error_code parse_exponent(simdjson_unused const uint8_t *const src, const uint8_t *&p, int64_t &exponent) {
404	// Exp Sign: -123.456e[-]78
405	bool neg_exp = (`'-'` == *p);
406	if (neg_exp \|\| `'+'` == p) { p++; } // Skip + as well*
407
408	// Exponent: -123.456e-[78]
409	auto start_exp = p;
410	int64_t exp_number = `0`;
411	while (parse_digit(c: *p, i&: exp_number)) { ++p; }
412	// It is possible for parse_digit to overflow.
413	// In particular, it could overflow to INT64_MIN, and we cannot do - INT64_MIN.
414	// Thus we must* check for possible overflow before we negate exp_number.*
415
416	// Performance notes: it may seem like combining the two "simdjson_unlikely checks" below into
417	// a single simdjson_unlikely path would be faster. The reasoning is sound, but the compiler may
418	// not oblige and may, in fact, generate two distinct paths in any case. It might be
419	// possible to do uint64_t(p - start_exp - 1) >= 18 but it could end up trading off
420	// instructions for a simdjson_likely branch, an unconclusive gain.
421
422	// If there were no digits, it's an error.
423	if (simdjson_unlikely(p == start_exp)) {
424	return INVALID_NUMBER(src);
425	}
426	// We have a valid positive exponent in exp_number at this point, except that
427	// it may have overflowed.
428
429	// If there were more than 18 digits, we may have overflowed the integer. We have to do
430	// something!!!!
431	if (simdjson_unlikely(p > start_exp+`18`)) {
432	// Skip leading zeroes: 1e000000000000000000001 is technically valid and doesn't overflow
433	while (*start_exp == `'0'`) { start_exp++; }
434	// 19 digits could overflow int64_t and is kind of absurd anyway. We don't
435	// support exponents smaller than -999,999,999,999,999,999 and bigger
436	// than 999,999,999,999,999,999.
437	// We can truncate.
438	// Note that 999999999999999999 is assuredly too large. The maximal ieee64 value before
439	// infinity is ~1.8e308. The smallest subnormal is ~5e-324. So, actually, we could
440	// truncate at 324.
441	// Note that there is no reason to fail per se at this point in time.
442	// E.g., 0e999999999999999999999 is a fine number.
443	if (p > start_exp+`18`) { exp_number = `999999999999999999`; }
444	}
445	// At this point, we know that exp_number is a sane, positive, signed integer.
446	// It is <= 999,999,999,999,999,999. As long as 'exponent' is in
447	// [-8223372036854775808, 8223372036854775808], we won't overflow. Because 'exponent'
448	// is bounded in magnitude by the size of the JSON input, we are fine in this universe.
449	// To sum it up: the next line should never overflow.
450	exponent += (neg_exp ? -exp_number : exp_number);
451	return SUCCESS;
452	}
453
454	simdjson_inline size_t significant_digits(const uint8_t * start_digits, size_t digit_count) {
455	// It is possible that the integer had an overflow.
456	// We have to handle the case where we have 0.0000somenumber.
457	const uint8_t *start = start_digits;
458	while ((start == `'0'`) \|\| (start == `'.'`)) { ++start; }
459	// we over-decrement by one when there is a '.'
460	return digit_count - size_t(start - start_digits);
461	}
462
463	template<typename W>
464	simdjson_inline error_code write_float(const uint8_t *const src, bool negative, uint64_t i, const uint8_t * start_digits, size_t digit_count, int64_t exponent, W &writer) {
465	// If we frequently had to deal with long strings of digits,
466	// we could extend our code by using a 128-bit integer instead
467	// of a 64-bit integer. However, this is uncommon in practice.
468	//
469	// 9999999999999999999 < 264 so we can accommodate 19 digits.
470	// If we have a decimal separator, then digit_count - 1 is the number of digits, but we
471	// may not have a decimal separator!
472	if (simdjson_unlikely(digit_count > `19` && significant_digits(start_digits, digit_count) > `19`)) {
473	// Ok, chances are good that we had an overflow!
474	// this is almost never going to get called!!!
475	// we start anew, going slowly!!!
476	// This will happen in the following examples:
477	// 10000000000000000000000000000000000000000000e+308
478	// 3.1415926535897932384626433832795028841971693993751
479	//
480	// NOTE: This makes a copy* of the writer and passes it to slow_float_parsing. This happens*
481	// because slow_float_parsing is a non-inlined function. If we passed our writer reference to
482	// it, it would force it to be stored in memory, preventing the compiler from picking it apart
483	// and putting into registers. i.e. if we pass it as reference, it gets slow.
484	// This is what forces the skip_double, as well.
485	error_code error = slow_float_parsing(src, writer);
486	writer.skip_double();
487	return error;
488	}
489	// NOTE: it's weird that the simdjson_unlikely() only wraps half the if, but it seems to get slower any other
490	// way we've tried: https://github.com/simdjson/simdjson/pull/990#discussion_r448497331
491	// To future reader: we'd love if someone found a better way, or at least could explain this result!
492	if (simdjson_unlikely(exponent < simdjson::internal::smallest_power) \|\| (exponent > simdjson::internal::largest_power)) {
493	//
494	// Important: smallest_power is such that it leads to a zero value.
495	// Observe that 18446744073709551615e-343 == 0, i.e. (264 - 1) e -343 is zero
496	// so something x 10^-343 goes to zero, but not so with something x 10^-342.
497	static_assert(simdjson::internal::smallest_power <= -`342`, "smallest_power is not small enough");
498	//
499	if((exponent < simdjson::internal::smallest_power) \|\| (i == `0`)) {
500	// E.g. Parse "-0.0e-999" into the same value as "-0.0". See https://en.wikipedia.org/wiki/Signed_zero
501	WRITE_DOUBLE(negative ? -`0.0` : `0.0`, src, writer);
502	return SUCCESS;
503	} else { // (exponent > largest_power) and (i != 0)
504	// We have, for sure, an infinite value and simdjson refuses to parse infinite values.
505	return INVALID_NUMBER(src);
506	}
507	}
508	double d;
509	if (!compute_float_64(power: exponent, i, negative, d)) {
510	// we are almost never going to get here.
511	if (!parse_float_fallback(ptr: src, outDouble: &d)) { return INVALID_NUMBER(src); }
512	}
513	WRITE_DOUBLE(d, src, writer);
514	return SUCCESS;
515	}
516
517	// for performance analysis, it is sometimes useful to skip parsing
518	#ifdef SIMDJSON_SKIPNUMBERPARSING
519
520	template<typename W>
521	simdjson_inline error_code parse_number(const uint8_t *const, W &writer) {
522	writer.append_s64(`0`); // always write zero
523	return SUCCESS; // always succeeds
524	}
525
526	simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept { return `0`; }
527	simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src) noexcept { return `0`; }
528	simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * const src) noexcept { return `0`; }
529	simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept { return `0`; }
530	simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t * const src) noexcept { return `0`; }
531	simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * const src) noexcept { return `0`; }
532	simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept { return false; }
533	simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept { return false; }
534	simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept { return ondemand::number_type::signed_integer; }
535	#else
536
537	// parse the number at src
538	// define JSON_TEST_NUMBERS for unit testing
539	//
540	// It is assumed that the number is followed by a structural ({,},],[) character
541	// or a white space character. If that is not the case (e.g., when the JSON
542	// document is made of a single number), then it is necessary to copy the
543	// content and append a space before calling this function.
544	//
545	// Our objective is accurate parsing (ULP of 0) at high speed.
546	template<typename W>
547	simdjson_inline error_code parse_number(const uint8_t *const src, W &writer) {
548
549	//
550	// Check for minus sign
551	//
552	bool negative = (*src == `'-'`);
553	const uint8_t *p = src + uint8_t(negative);
554
555	//
556	// Parse the integer part.
557	//
558	// PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
559	const uint8_t *const start_digits = p;
560	uint64_t i = `0`;
561	while (parse_digit(c: *p, i)) { p++; }
562
563	// If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
564	// Optimization note: size_t is expected to be unsigned.
565	size_t digit_count = size_t(p - start_digits);
566	if (digit_count == `0` \|\| (`'0'` == start_digits && digit_count > `1`)) { return* INVALID_NUMBER(src); }
567
568	//
569	// Handle floats if there is a . or e (or both)
570	//
571	int64_t exponent = `0`;
572	bool is_float = false;
573	if (`'.'` == *p) {
574	is_float = true;
575	++p;
576	SIMDJSON_TRY( parse_decimal(src, p, i, exponent) );
577	digit_count = int(p - start_digits); // used later to guard against overflows
578	}
579	if ((`'e'` == p) \|\| (`'E'` == p)) {
580	is_float = true;
581	++p;
582	SIMDJSON_TRY( parse_exponent(src, p, exponent) );
583	}
584	if (is_float) {
585	const bool dirty_end = jsoncharutils::is_not_structural_or_whitespace(c: *p);
586	SIMDJSON_TRY( write_float(src, negative, i, start_digits, digit_count, exponent, writer) );
587	if (dirty_end) { return INVALID_NUMBER(src); }
588	return SUCCESS;
589	}
590
591	// The longest negative 64-bit number is 19 digits.
592	// The longest positive 64-bit number is 20 digits.
593	// We do it this way so we don't trigger this branch unless we must.
594	size_t longest_digit_count = negative ? `19` : `20`;
595	if (digit_count > longest_digit_count) { return INVALID_NUMBER(src); }
596	if (digit_count == longest_digit_count) {
597	if (negative) {
598	// Anything negative above INT64_MAX+1 is invalid
599	if (i > uint64_t(INT64_MAX)+`1`) { return INVALID_NUMBER(src); }
600	WRITE_INTEGER(~i+`1`, src, writer);
601	if (jsoncharutils::is_not_structural_or_whitespace(c: p)) { return* INVALID_NUMBER(src); }
602	return SUCCESS;
603	// Positive overflow check:
604	// - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
605	// biggest uint64_t.
606	// - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
607	// If we got here, it's a 20 digit number starting with the digit "1".
608	// - If a 20 digit number starting with 1 overflowed (i10+digit), the result will be smaller*
609	// than 1,553,255,926,290,448,384.
610	// - That is smaller than the smallest possible 20-digit number the user could write:
611	// 10,000,000,000,000,000,000.
612	// - Therefore, if the number is positive and lower than that, it's overflow.
613	// - The value we are looking at is less than or equal to INT64_MAX.
614	//
615	} else if (src[`0`] != uint8_t(`'1'`) \|\| i <= uint64_t(INT64_MAX)) { return INVALID_NUMBER(src); }
616	}
617
618	// Write unsigned if it doesn't fit in a signed integer.
619	if (i > uint64_t(INT64_MAX)) {
620	WRITE_UNSIGNED(i, src, writer);
621	} else {
622	WRITE_INTEGER(negative ? (~i+`1`) : i, src, writer);
623	}
624	if (jsoncharutils::is_not_structural_or_whitespace(c: p)) { return* INVALID_NUMBER(src); }
625	return SUCCESS;
626	}
627
628	// Inlineable functions
629	namespace {
630
631	// This table can be used to characterize the final character of an integer
632	// string. For JSON structural character and allowable white space characters,
633	// we return SUCCESS. For 'e', '.' and 'E', we return INCORRECT_TYPE. Otherwise
634	// we return NUMBER_ERROR.
635	// Optimization note: we could easily reduce the size of the table by half (to 128)
636	// at the cost of an extra branch.
637	// Optimization note: we want the values to use at most 8 bits (not, e.g., 32 bits):
638	static_assert(error_code(uint8_t(NUMBER_ERROR))== NUMBER_ERROR, "bad NUMBER_ERROR cast");
639	static_assert(error_code(uint8_t(SUCCESS))== SUCCESS, "bad NUMBER_ERROR cast");
640	static_assert(error_code(uint8_t(INCORRECT_TYPE))== INCORRECT_TYPE, "bad NUMBER_ERROR cast");
641
642	const uint8_t integer_string_finisher[`256`] = {
643	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
644	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
645	SUCCESS, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR,
646	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
647	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
648	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
649	NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR, NUMBER_ERROR,
650	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
651	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS,
652	NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
653	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
654	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR,
655	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
656	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, INCORRECT_TYPE,
657	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
658	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
659	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
660	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
661	NUMBER_ERROR, SUCCESS, NUMBER_ERROR, SUCCESS, NUMBER_ERROR,
662	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
663	NUMBER_ERROR, INCORRECT_TYPE, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
664	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
665	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
666	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
667	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, SUCCESS, NUMBER_ERROR,
668	SUCCESS, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
669	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
670	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
671	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
672	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
673	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
674	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
675	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
676	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
677	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
678	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
679	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
680	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
681	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
682	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
683	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
684	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
685	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
686	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
687	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
688	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
689	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
690	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
691	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
692	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
693	NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR, NUMBER_ERROR,
694	NUMBER_ERROR};
695
696	// Parse any number from 0 to 18,446,744,073,709,551,615
697	simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src) noexcept {
698	const uint8_t *p = src;
699	//
700	// Parse the integer part.
701	//
702	// PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
703	const uint8_t *const start_digits = p;
704	uint64_t i = `0`;
705	while (parse_digit(c: *p, i)) { p++; }
706
707	// If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
708	// Optimization note: size_t is expected to be unsigned.
709	size_t digit_count = size_t(p - start_digits);
710	// The longest positive 64-bit number is 20 digits.
711	// We do it this way so we don't trigger this branch unless we must.
712	// Optimization note: the compiler can probably merge
713	// ((digit_count == 0) \|\| (digit_count > 20))
714	// into a single branch since digit_count is unsigned.
715	if ((digit_count == `0`) \|\| (digit_count > `20`)) { return INCORRECT_TYPE; }
716	// Here digit_count > 0.
717	if ((`'0'` == start_digits) && (digit_count > `1`)) { return* NUMBER_ERROR; }
718	// We can do the following...
719	// if (!jsoncharutils::is_structural_or_whitespace(p)) {*
720	// return (p == '.' \|\| p == 'e' \|\| p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;*
721	// }
722	// as a single table lookup:
723	if (integer_string_finisher[p] != SUCCESS) { return* error_code(integer_string_finisher[*p]); }
724
725	if (digit_count == `20`) {
726	// Positive overflow check:
727	// - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
728	// biggest uint64_t.
729	// - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
730	// If we got here, it's a 20 digit number starting with the digit "1".
731	// - If a 20 digit number starting with 1 overflowed (i10+digit), the result will be smaller*
732	// than 1,553,255,926,290,448,384.
733	// - That is smaller than the smallest possible 20-digit number the user could write:
734	// 10,000,000,000,000,000,000.
735	// - Therefore, if the number is positive and lower than that, it's overflow.
736	// - The value we are looking at is less than or equal to INT64_MAX.
737	//
738	if (src[`0`] != uint8_t(`'1'`) \|\| i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
739	}
740
741	return i;
742	}
743
744
745	// Parse any number from 0 to 18,446,744,073,709,551,615
746	// Never read at src_end or beyond
747	simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned(const uint8_t * const src, const uint8_t * const src_end) noexcept {
748	const uint8_t *p = src;
749	//
750	// Parse the integer part.
751	//
752	// PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
753	const uint8_t *const start_digits = p;
754	uint64_t i = `0`;
755	while ((p != src_end) && parse_digit(c: *p, i)) { p++; }
756
757	// If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
758	// Optimization note: size_t is expected to be unsigned.
759	size_t digit_count = size_t(p - start_digits);
760	// The longest positive 64-bit number is 20 digits.
761	// We do it this way so we don't trigger this branch unless we must.
762	// Optimization note: the compiler can probably merge
763	// ((digit_count == 0) \|\| (digit_count > 20))
764	// into a single branch since digit_count is unsigned.
765	if ((digit_count == `0`) \|\| (digit_count > `20`)) { return INCORRECT_TYPE; }
766	// Here digit_count > 0.
767	if ((`'0'` == start_digits) && (digit_count > `1`)) { return* NUMBER_ERROR; }
768	// We can do the following...
769	// if (!jsoncharutils::is_structural_or_whitespace(p)) {*
770	// return (p == '.' \|\| p == 'e' \|\| p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;*
771	// }
772	// as a single table lookup:
773	if ((p != src_end) && integer_string_finisher[p] != SUCCESS) { return* error_code(integer_string_finisher[*p]); }
774
775	if (digit_count == `20`) {
776	// Positive overflow check:
777	// - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
778	// biggest uint64_t.
779	// - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
780	// If we got here, it's a 20 digit number starting with the digit "1".
781	// - If a 20 digit number starting with 1 overflowed (i10+digit), the result will be smaller*
782	// than 1,553,255,926,290,448,384.
783	// - That is smaller than the smallest possible 20-digit number the user could write:
784	// 10,000,000,000,000,000,000.
785	// - Therefore, if the number is positive and lower than that, it's overflow.
786	// - The value we are looking at is less than or equal to INT64_MAX.
787	//
788	if (src[`0`] != uint8_t(`'1'`) \|\| i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
789	}
790
791	return i;
792	}
793
794	// Parse any number from 0 to 18,446,744,073,709,551,615
795	simdjson_unused simdjson_inline simdjson_result<uint64_t> parse_unsigned_in_string(const uint8_t * const src) noexcept {
796	const uint8_t *p = src + `1`;
797	//
798	// Parse the integer part.
799	//
800	// PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
801	const uint8_t *const start_digits = p;
802	uint64_t i = `0`;
803	while (parse_digit(c: *p, i)) { p++; }
804
805	// If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
806	// Optimization note: size_t is expected to be unsigned.
807	size_t digit_count = size_t(p - start_digits);
808	// The longest positive 64-bit number is 20 digits.
809	// We do it this way so we don't trigger this branch unless we must.
810	// Optimization note: the compiler can probably merge
811	// ((digit_count == 0) \|\| (digit_count > 20))
812	// into a single branch since digit_count is unsigned.
813	if ((digit_count == `0`) \|\| (digit_count > `20`)) { return INCORRECT_TYPE; }
814	// Here digit_count > 0.
815	if ((`'0'` == start_digits) && (digit_count > `1`)) { return* NUMBER_ERROR; }
816	// We can do the following...
817	// if (!jsoncharutils::is_structural_or_whitespace(p)) {*
818	// return (p == '.' \|\| p == 'e' \|\| p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;*
819	// }
820	// as a single table lookup:
821	if (p != `'"'`) { return* NUMBER_ERROR; }
822
823	if (digit_count == `20`) {
824	// Positive overflow check:
825	// - A 20 digit number starting with 2-9 is overflow, because 18,446,744,073,709,551,615 is the
826	// biggest uint64_t.
827	// - A 20 digit number starting with 1 is overflow if it is less than INT64_MAX.
828	// If we got here, it's a 20 digit number starting with the digit "1".
829	// - If a 20 digit number starting with 1 overflowed (i10+digit), the result will be smaller*
830	// than 1,553,255,926,290,448,384.
831	// - That is smaller than the smallest possible 20-digit number the user could write:
832	// 10,000,000,000,000,000,000.
833	// - Therefore, if the number is positive and lower than that, it's overflow.
834	// - The value we are looking at is less than or equal to INT64_MAX.
835	//
836	// Note: we use src[1] and not src[0] because src[0] is the quote character in this
837	// instance.
838	if (src[`1`] != uint8_t(`'1'`) \|\| i <= uint64_t(INT64_MAX)) { return INCORRECT_TYPE; }
839	}
840
841	return i;
842	}
843
844	// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
845	simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t src) noexcept* {
846	//
847	// Check for minus sign
848	//
849	bool negative = (*src == `'-'`);
850	const uint8_t *p = src + uint8_t(negative);
851
852	//
853	// Parse the integer part.
854	//
855	// PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
856	const uint8_t *const start_digits = p;
857	uint64_t i = `0`;
858	while (parse_digit(c: *p, i)) { p++; }
859
860	// If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
861	// Optimization note: size_t is expected to be unsigned.
862	size_t digit_count = size_t(p - start_digits);
863	// We go from
864	// -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
865	// so we can never represent numbers that have more than 19 digits.
866	size_t longest_digit_count = `19`;
867	// Optimization note: the compiler can probably merge
868	// ((digit_count == 0) \|\| (digit_count > longest_digit_count))
869	// into a single branch since digit_count is unsigned.
870	if ((digit_count == `0`) \|\| (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
871	// Here digit_count > 0.
872	if ((`'0'` == start_digits) && (digit_count > `1`)) { return* NUMBER_ERROR; }
873	// We can do the following...
874	// if (!jsoncharutils::is_structural_or_whitespace(p)) {*
875	// return (p == '.' \|\| p == 'e' \|\| p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;*
876	// }
877	// as a single table lookup:
878	if(integer_string_finisher[p] != SUCCESS) { return* error_code(integer_string_finisher[*p]); }
879	// Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
880	// Performance note: This check is only needed when digit_count == longest_digit_count but it is
881	// so cheap that we might as well always make it.
882	if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
883	return negative ? (~i+`1`) : i;
884	}
885
886	// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
887	// Never read at src_end or beyond
888	simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer(const uint8_t * const src, const uint8_t * const src_end) noexcept {
889	//
890	// Check for minus sign
891	//
892	if(src == src_end) { return NUMBER_ERROR; }
893	bool negative = (*src == `'-'`);
894	const uint8_t *p = src + uint8_t(negative);
895
896	//
897	// Parse the integer part.
898	//
899	// PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
900	const uint8_t *const start_digits = p;
901	uint64_t i = `0`;
902	while ((p != src_end) && parse_digit(c: *p, i)) { p++; }
903
904	// If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
905	// Optimization note: size_t is expected to be unsigned.
906	size_t digit_count = size_t(p - start_digits);
907	// We go from
908	// -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
909	// so we can never represent numbers that have more than 19 digits.
910	size_t longest_digit_count = `19`;
911	// Optimization note: the compiler can probably merge
912	// ((digit_count == 0) \|\| (digit_count > longest_digit_count))
913	// into a single branch since digit_count is unsigned.
914	if ((digit_count == `0`) \|\| (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
915	// Here digit_count > 0.
916	if ((`'0'` == start_digits) && (digit_count > `1`)) { return* NUMBER_ERROR; }
917	// We can do the following...
918	// if (!jsoncharutils::is_structural_or_whitespace(p)) {*
919	// return (p == '.' \|\| p == 'e' \|\| p == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;*
920	// }
921	// as a single table lookup:
922	if((p != src_end) && integer_string_finisher[p] != SUCCESS) { return* error_code(integer_string_finisher[*p]); }
923	// Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
924	// Performance note: This check is only needed when digit_count == longest_digit_count but it is
925	// so cheap that we might as well always make it.
926	if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
927	return negative ? (~i+`1`) : i;
928	}
929
930	// Parse any number from -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
931	simdjson_unused simdjson_inline simdjson_result<int64_t> parse_integer_in_string(const uint8_t src) noexcept* {
932	//
933	// Check for minus sign
934	//
935	bool negative = (*(src + `1`) == `'-'`);
936	src += uint8_t(negative) + `1`;
937
938	//
939	// Parse the integer part.
940	//
941	// PERF NOTE: we don't use is_made_of_eight_digits_fast because large integers like 123456789 are rare
942	const uint8_t *const start_digits = src;
943	uint64_t i = `0`;
944	while (parse_digit(c: *src, i)) { src++; }
945
946	// If there were no digits, or if the integer starts with 0 and has more than one digit, it's an error.
947	// Optimization note: size_t is expected to be unsigned.
948	size_t digit_count = size_t(src - start_digits);
949	// We go from
950	// -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807
951	// so we can never represent numbers that have more than 19 digits.
952	size_t longest_digit_count = `19`;
953	// Optimization note: the compiler can probably merge
954	// ((digit_count == 0) \|\| (digit_count > longest_digit_count))
955	// into a single branch since digit_count is unsigned.
956	if ((digit_count == `0`) \|\| (digit_count > longest_digit_count)) { return INCORRECT_TYPE; }
957	// Here digit_count > 0.
958	if ((`'0'` == start_digits) && (digit_count > `1`)) { return* NUMBER_ERROR; }
959	// We can do the following...
960	// if (!jsoncharutils::is_structural_or_whitespace(src)) {*
961	// return (src == '.' \|\| src == 'e' \|\| src == 'E') ? INCORRECT_TYPE : NUMBER_ERROR;*
962	// }
963	// as a single table lookup:
964	if(src != `'"'`) { return* NUMBER_ERROR; }
965	// Negative numbers have can go down to - INT64_MAX - 1 whereas positive numbers are limited to INT64_MAX.
966	// Performance note: This check is only needed when digit_count == longest_digit_count but it is
967	// so cheap that we might as well always make it.
968	if(i > uint64_t(INT64_MAX) + uint64_t(negative)) { return INCORRECT_TYPE; }
969	return negative ? (~i+`1`) : i;
970	}
971
972	simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src) noexcept {
973	//
974	// Check for minus sign
975	//
976	bool negative = (*src == `'-'`);
977	src += uint8_t(negative);
978
979	//
980	// Parse the integer part.
981	//
982	uint64_t i = `0`;
983	const uint8_t *p = src;
984	p += parse_digit(c: *p, i);
985	bool leading_zero = (i == `0`);
986	while (parse_digit(c: *p, i)) { p++; }
987	// no integer digits, or 0123 (zero must be solo)
988	if ( p == src ) { return INCORRECT_TYPE; }
989	if ( (leading_zero && p != src+`1`)) { return NUMBER_ERROR; }
990
991	//
992	// Parse the decimal part.
993	//
994	int64_t exponent = `0`;
995	bool overflow;
996	if (simdjson_likely(*p == `'.'`)) {
997	p++;
998	const uint8_t *start_decimal_digits = p;
999	if (!parse_digit(c: p, i)) { return* NUMBER_ERROR; } // no decimal digits
1000	p++;
1001	while (parse_digit(c: *p, i)) { p++; }
1002	exponent = -(p - start_decimal_digits);
1003
1004	// Overflow check. More than 19 digits (minus the decimal) may be overflow.
1005	overflow = p-src-`1` > `19`;
1006	if (simdjson_unlikely(overflow && leading_zero)) {
1007	// Skip leading 0.00000 and see if it still overflows
1008	const uint8_t *start_digits = src + `2`;
1009	while (*start_digits == `'0'`) { start_digits++; }
1010	overflow = start_digits-src > `19`;
1011	}
1012	} else {
1013	overflow = p-src > `19`;
1014	}
1015
1016	//
1017	// Parse the exponent
1018	//
1019	if (p == `'e'` \|\| p == `'E'`) {
1020	p++;
1021	bool exp_neg = *p == `'-'`;
1022	p += exp_neg \|\| *p == `'+'`;
1023
1024	uint64_t exp = `0`;
1025	const uint8_t *start_exp_digits = p;
1026	while (parse_digit(c: *p, i&: exp)) { p++; }
1027	// no exp digits, or 20+ exp digits
1028	if (p-start_exp_digits == `0` \|\| p-start_exp_digits > `19`) { return NUMBER_ERROR; }
1029
1030	exponent += exp_neg ? `0`-exp : exp;
1031	}
1032
1033	if (jsoncharutils::is_not_structural_or_whitespace(c: p)) { return* NUMBER_ERROR; }
1034
1035	overflow = overflow \|\| exponent < simdjson::internal::smallest_power \|\| exponent > simdjson::internal::largest_power;
1036
1037	//
1038	// Assemble (or slow-parse) the float
1039	//
1040	double d;
1041	if (simdjson_likely(!overflow)) {
1042	if (compute_float_64(power: exponent, i, negative, d)) { return d; }
1043	}
1044	if (!parse_float_fallback(ptr: src - uint8_t(negative), outDouble: &d)) {
1045	return NUMBER_ERROR;
1046	}
1047	return d;
1048	}
1049
1050	simdjson_unused simdjson_inline bool is_negative(const uint8_t * src) noexcept {
1051	return (*src == `'-'`);
1052	}
1053
1054	simdjson_unused simdjson_inline simdjson_result<bool> is_integer(const uint8_t * src) noexcept {
1055	bool negative = (*src == `'-'`);
1056	src += uint8_t(negative);
1057	const uint8_t *p = src;
1058	while(static_cast<uint8_t>(*p - `'0'`) <= `9`) { p++; }
1059	if ( p == src ) { return NUMBER_ERROR; }
1060	if (jsoncharutils::is_structural_or_whitespace(c: p)) { return* true; }
1061	return false;
1062	}
1063
1064	simdjson_unused simdjson_inline simdjson_result<ondemand::number_type> get_number_type(const uint8_t * src) noexcept {
1065	bool negative = (*src == `'-'`);
1066	src += uint8_t(negative);
1067	const uint8_t *p = src;
1068	while(static_cast<uint8_t>(*p - `'0'`) <= `9`) { p++; }
1069	if ( p == src ) { return NUMBER_ERROR; }
1070	if (jsoncharutils::is_structural_or_whitespace(c: *p)) {
1071	// We have an integer.
1072	// If the number is negative and valid, it must be a signed integer.
1073	if(negative) { return ondemand::number_type::signed_integer; }
1074	// We want values larger or equal to 9223372036854775808 to be unsigned
1075	// integers, and the other values to be signed integers.
1076	int digit_count = int(p - src);
1077	if(digit_count >= `19`) {
1078	const uint8_t * smaller_big_integer = reinterpret_cast<const uint8_t *>("9223372036854775808");
1079	if((digit_count >= `20`) \|\| (memcmp(s1: src, s2: smaller_big_integer, n: `19`) >= `0`)) {
1080	return ondemand::number_type::unsigned_integer;
1081	}
1082	}
1083	return ondemand::number_type::signed_integer;
1084	}
1085	// Hopefully, we have 'e' or 'E' or '.'.
1086	return ondemand::number_type::floating_point_number;
1087	}
1088
1089	// Never read at src_end or beyond
1090	simdjson_unused simdjson_inline simdjson_result<double> parse_double(const uint8_t * src, const uint8_t * const src_end) noexcept {
1091	if(src == src_end) { return NUMBER_ERROR; }
1092	//
1093	// Check for minus sign
1094	//
1095	bool negative = (*src == `'-'`);
1096	src += uint8_t(negative);
1097
1098	//
1099	// Parse the integer part.
1100	//
1101	uint64_t i = `0`;
1102	const uint8_t *p = src;
1103	if(p == src_end) { return NUMBER_ERROR; }
1104	p += parse_digit(c: *p, i);
1105	bool leading_zero = (i == `0`);
1106	while ((p != src_end) && parse_digit(c: *p, i)) { p++; }
1107	// no integer digits, or 0123 (zero must be solo)
1108	if ( p == src ) { return INCORRECT_TYPE; }
1109	if ( (leading_zero && p != src+`1`)) { return NUMBER_ERROR; }
1110
1111	//
1112	// Parse the decimal part.
1113	//
1114	int64_t exponent = `0`;
1115	bool overflow;
1116	if (simdjson_likely((p != src_end) && (*p == `'.'`))) {
1117	p++;
1118	const uint8_t *start_decimal_digits = p;
1119	if ((p == src_end) \|\| !parse_digit(c: p, i)) { return* NUMBER_ERROR; } // no decimal digits
1120	p++;
1121	while ((p != src_end) && parse_digit(c: *p, i)) { p++; }
1122	exponent = -(p - start_decimal_digits);
1123
1124	// Overflow check. More than 19 digits (minus the decimal) may be overflow.
1125	overflow = p-src-`1` > `19`;
1126	if (simdjson_unlikely(overflow && leading_zero)) {
1127	// Skip leading 0.00000 and see if it still overflows
1128	const uint8_t *start_digits = src + `2`;
1129	while (*start_digits == `'0'`) { start_digits++; }
1130	overflow = start_digits-src > `19`;
1131	}
1132	} else {
1133	overflow = p-src > `19`;
1134	}
1135
1136	//
1137	// Parse the exponent
1138	//
1139	if ((p != src_end) && (p == `'e'` \|\| p == `'E'`)) {
1140	p++;
1141	if(p == src_end) { return NUMBER_ERROR; }
1142	bool exp_neg = *p == `'-'`;
1143	p += exp_neg \|\| *p == `'+'`;
1144
1145	uint64_t exp = `0`;
1146	const uint8_t *start_exp_digits = p;
1147	while ((p != src_end) && parse_digit(c: *p, i&: exp)) { p++; }
1148	// no exp digits, or 20+ exp digits
1149	if (p-start_exp_digits == `0` \|\| p-start_exp_digits > `19`) { return NUMBER_ERROR; }
1150
1151	exponent += exp_neg ? `0`-exp : exp;
1152	}
1153
1154	if ((p != src_end) && jsoncharutils::is_not_structural_or_whitespace(c: p)) { return* NUMBER_ERROR; }
1155
1156	overflow = overflow \|\| exponent < simdjson::internal::smallest_power \|\| exponent > simdjson::internal::largest_power;
1157
1158	//
1159	// Assemble (or slow-parse) the float
1160	//
1161	double d;
1162	if (simdjson_likely(!overflow)) {
1163	if (compute_float_64(power: exponent, i, negative, d)) { return d; }
1164	}
1165	if (!parse_float_fallback(ptr: src - uint8_t(negative), end_ptr: src_end, outDouble: &d)) {
1166	return NUMBER_ERROR;
1167	}
1168	return d;
1169	}
1170
1171	simdjson_unused simdjson_inline simdjson_result<double> parse_double_in_string(const uint8_t * src) noexcept {
1172	//
1173	// Check for minus sign
1174	//
1175	bool negative = (*(src + `1`) == `'-'`);
1176	src += uint8_t(negative) + `1`;
1177
1178	//
1179	// Parse the integer part.
1180	//
1181	uint64_t i = `0`;
1182	const uint8_t *p = src;
1183	p += parse_digit(c: *p, i);
1184	bool leading_zero = (i == `0`);
1185	while (parse_digit(c: *p, i)) { p++; }
1186	// no integer digits, or 0123 (zero must be solo)
1187	if ( p == src ) { return INCORRECT_TYPE; }
1188	if ( (leading_zero && p != src+`1`)) { return NUMBER_ERROR; }
1189
1190	//
1191	// Parse the decimal part.
1192	//
1193	int64_t exponent = `0`;
1194	bool overflow;
1195	if (simdjson_likely(*p == `'.'`)) {
1196	p++;
1197	const uint8_t *start_decimal_digits = p;
1198	if (!parse_digit(c: p, i)) { return* NUMBER_ERROR; } // no decimal digits
1199	p++;
1200	while (parse_digit(c: *p, i)) { p++; }
1201	exponent = -(p - start_decimal_digits);
1202
1203	// Overflow check. More than 19 digits (minus the decimal) may be overflow.
1204	overflow = p-src-`1` > `19`;
1205	if (simdjson_unlikely(overflow && leading_zero)) {
1206	// Skip leading 0.00000 and see if it still overflows
1207	const uint8_t *start_digits = src + `2`;
1208	while (*start_digits == `'0'`) { start_digits++; }
1209	overflow = start_digits-src > `19`;
1210	}
1211	} else {
1212	overflow = p-src > `19`;
1213	}
1214
1215	//
1216	// Parse the exponent
1217	//
1218	if (p == `'e'` \|\| p == `'E'`) {
1219	p++;
1220	bool exp_neg = *p == `'-'`;
1221	p += exp_neg \|\| *p == `'+'`;
1222
1223	uint64_t exp = `0`;
1224	const uint8_t *start_exp_digits = p;
1225	while (parse_digit(c: *p, i&: exp)) { p++; }
1226	// no exp digits, or 20+ exp digits
1227	if (p-start_exp_digits == `0` \|\| p-start_exp_digits > `19`) { return NUMBER_ERROR; }
1228
1229	exponent += exp_neg ? `0`-exp : exp;
1230	}
1231
1232	if (p != `'"'`) { return* NUMBER_ERROR; }
1233
1234	overflow = overflow \|\| exponent < simdjson::internal::smallest_power \|\| exponent > simdjson::internal::largest_power;
1235
1236	//
1237	// Assemble (or slow-parse) the float
1238	//
1239	double d;
1240	if (simdjson_likely(!overflow)) {
1241	if (compute_float_64(power: exponent, i, negative, d)) { return d; }
1242	}
1243	if (!parse_float_fallback(ptr: src - uint8_t(negative), outDouble: &d)) {
1244	return NUMBER_ERROR;
1245	}
1246	return d;
1247	}
1248	} //namespace {}
1249	#endif // SIMDJSON_SKIPNUMBERPARSING
1250
1251	} // namespace numberparsing
1252	} // unnamed namespace
1253	} // namespace SIMDJSON_IMPLEMENTATION
1254	} // namespace simdjson
1255

Browse the source code of Velox/build/_deps/simdjson-src/include/simdjson/generic/numberparsing.h