1 | |
2 | // Allowable floating-point values range |
3 | // std::numeric_limits<double>::lowest() to std::numeric_limits<double>::max(), |
4 | // so from -1.7976e308 all the way to 1.7975e308 in binary64. The lowest |
5 | // non-zero normal values is std::numeric_limits<double>::min() or |
6 | // about 2.225074e-308. |
7 | static const double power_of_ten[] = { |
8 | 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, |
9 | 1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, |
10 | 1e-290, 1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, |
11 | 1e-281, 1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, |
12 | 1e-272, 1e-271, 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, |
13 | 1e-263, 1e-262, 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, |
14 | 1e-254, 1e-253, 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, |
15 | 1e-245, 1e-244, 1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, |
16 | 1e-236, 1e-235, 1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, |
17 | 1e-227, 1e-226, 1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219, |
18 | 1e-218, 1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, |
19 | 1e-209, 1e-208, 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201, |
20 | 1e-200, 1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, |
21 | 1e-191, 1e-190, 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, |
22 | 1e-182, 1e-181, 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, |
23 | 1e-173, 1e-172, 1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, |
24 | 1e-164, 1e-163, 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, |
25 | 1e-155, 1e-154, 1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, |
26 | 1e-146, 1e-145, 1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, |
27 | 1e-137, 1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, |
28 | 1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, |
29 | 1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, |
30 | 1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, |
31 | 1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, |
32 | 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, |
33 | 1e-83, 1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, |
34 | 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, |
35 | 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, |
36 | 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, |
37 | 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, |
38 | 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, |
39 | 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, |
40 | 1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, |
41 | 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, |
42 | 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, |
43 | 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, |
44 | 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, |
45 | 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, 1e32, 1e33, |
46 | 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42, |
47 | 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50, 1e51, |
48 | 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, 1e60, |
49 | 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, |
50 | 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, |
51 | 1e79, 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, |
52 | 1e88, 1e89, 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, |
53 | 1e97, 1e98, 1e99, 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, |
54 | 1e106, 1e107, 1e108, 1e109, 1e110, 1e111, 1e112, 1e113, 1e114, |
55 | 1e115, 1e116, 1e117, 1e118, 1e119, 1e120, 1e121, 1e122, 1e123, |
56 | 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, 1e130, 1e131, 1e132, |
57 | 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, 1e140, 1e141, |
58 | 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, 1e150, |
59 | 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, |
60 | 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, |
61 | 1e169, 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, |
62 | 1e178, 1e179, 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, |
63 | 1e187, 1e188, 1e189, 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, |
64 | 1e196, 1e197, 1e198, 1e199, 1e200, 1e201, 1e202, 1e203, 1e204, |
65 | 1e205, 1e206, 1e207, 1e208, 1e209, 1e210, 1e211, 1e212, 1e213, |
66 | 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, 1e220, 1e221, 1e222, |
67 | 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, 1e230, 1e231, |
68 | 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, 1e240, |
69 | 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249, |
70 | 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, |
71 | 1e259, 1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, |
72 | 1e268, 1e269, 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, |
73 | 1e277, 1e278, 1e279, 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, |
74 | 1e286, 1e287, 1e288, 1e289, 1e290, 1e291, 1e292, 1e293, 1e294, |
75 | 1e295, 1e296, 1e297, 1e298, 1e299, 1e300, 1e301, 1e302, 1e303, |
76 | 1e304, 1e305, 1e306, 1e307, 1e308}; |
77 | |
78 | static inline bool is_integer(char c) { |
79 | return (c >= '0' && c <= '9'); |
80 | // this gets compiled to (uint8_t)(c - '0') <= 9 on all decent compilers |
81 | } |
82 | |
83 | // We need to check that the character following a zero is valid. This is |
84 | // probably frequent and it is hard than it looks. We are building all of this |
85 | // just to differentiate between 0x1 (invalid), 0,1 (valid) 0e1 (valid)... |
86 | const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = { |
87 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
88 | 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, |
89 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, |
90 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, |
91 | 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
92 | 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
93 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
94 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
95 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
96 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
97 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; |
98 | |
99 | really_inline bool |
100 | is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) { |
101 | return structural_or_whitespace_or_exponent_or_decimal_negated[c]; |
102 | } |
103 | |
104 | // check quickly whether the next 8 chars are made of digits |
105 | // at a glance, it looks better than Mula's |
106 | // http://0x80.pl/articles/swar-digits-validate.html |
107 | static inline bool is_made_of_eight_digits_fast(const char *chars) { |
108 | uint64_t val; |
109 | // this can read up to 7 bytes beyond the buffer size, but we require |
110 | // SIMDJSON_PADDING of padding |
111 | static_assert(7 <= SIMDJSON_PADDING); |
112 | memcpy(&val, chars, 8); |
113 | // a branchy method might be faster: |
114 | // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) |
115 | // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == |
116 | // 0x3030303030303030); |
117 | return (((val & 0xF0F0F0F0F0F0F0F0) | |
118 | (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == |
119 | 0x3333333333333333); |
120 | } |
121 | |
122 | |
123 | // |
124 | // This function computes base * 10 ^ (- negative_exponent ). |
125 | // It is only even going to be used when negative_exponent is tiny. |
126 | static double subnormal_power10(double base, int64_t negative_exponent) { |
127 | // avoid integer overflows in the pow expression, those values would |
128 | // become zero anyway. |
129 | if(negative_exponent < -1000) { |
130 | return 0; |
131 | } |
132 | |
133 | // this is probably not going to be fast |
134 | return base * 1e-308 * pow(10, negative_exponent + 308); |
135 | } |
136 | |
137 | // called by parse_number when we know that the output is a float, |
138 | // but where there might be some integer overflow. The trick here is to |
139 | // parse using floats from the start. |
140 | // Do not call this function directly as it skips some of the checks from |
141 | // parse_number |
142 | // |
143 | // This function will almost never be called!!! |
144 | // |
145 | // Note: a redesign could avoid this function entirely. |
146 | // |
147 | static never_inline bool parse_float(const uint8_t *const buf, ParsedJson &pj, |
148 | const uint32_t offset, bool found_minus) { |
149 | const char *p = reinterpret_cast<const char *>(buf + offset); |
150 | bool negative = false; |
151 | if (found_minus) { |
152 | ++p; |
153 | negative = true; |
154 | } |
155 | long double i; |
156 | if (*p == '0') { // 0 cannot be followed by an integer |
157 | ++p; |
158 | i = 0; |
159 | } else { |
160 | unsigned char digit = *p - '0'; |
161 | i = digit; |
162 | p++; |
163 | while (is_integer(*p)) { |
164 | digit = *p - '0'; |
165 | i = 10 * i + digit; |
166 | ++p; |
167 | } |
168 | } |
169 | if ('.' == *p) { |
170 | ++p; |
171 | int fractional_weight = 308; |
172 | if (is_integer(*p)) { |
173 | unsigned char digit = *p - '0'; |
174 | ++p; |
175 | |
176 | fractional_weight--; |
177 | i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight] |
178 | : 0); |
179 | } else { |
180 | #ifdef JSON_TEST_NUMBERS // for unit testing |
181 | found_invalid_number(buf + offset); |
182 | #endif |
183 | return false; |
184 | } |
185 | while (is_integer(*p)) { |
186 | unsigned char digit = *p - '0'; |
187 | ++p; |
188 | fractional_weight--; |
189 | i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight] |
190 | : 0); |
191 | } |
192 | } |
193 | if (('e' == *p) || ('E' == *p)) { |
194 | ++p; |
195 | bool neg_exp = false; |
196 | if ('-' == *p) { |
197 | neg_exp = true; |
198 | ++p; |
199 | } else if ('+' == *p) { |
200 | ++p; |
201 | } |
202 | if (!is_integer(*p)) { |
203 | #ifdef JSON_TEST_NUMBERS // for unit testing |
204 | found_invalid_number(buf + offset); |
205 | #endif |
206 | return false; |
207 | } |
208 | unsigned char digit = *p - '0'; |
209 | int64_t exp_number = digit; // exponential part |
210 | p++; |
211 | if (is_integer(*p)) { |
212 | digit = *p - '0'; |
213 | exp_number = 10 * exp_number + digit; |
214 | ++p; |
215 | } |
216 | if (is_integer(*p)) { |
217 | digit = *p - '0'; |
218 | exp_number = 10 * exp_number + digit; |
219 | ++p; |
220 | } |
221 | if (is_integer(*p)) { |
222 | digit = *p - '0'; |
223 | exp_number = 10 * exp_number + digit; |
224 | ++p; |
225 | } |
226 | while (is_integer(*p)) { |
227 | if (exp_number > 0x100000000) { // we need to check for overflows |
228 | // we refuse to parse this |
229 | #ifdef JSON_TEST_NUMBERS // for unit testing |
230 | found_invalid_number(buf + offset); |
231 | #endif |
232 | return false; |
233 | } |
234 | digit = *p - '0'; |
235 | exp_number = 10 * exp_number + digit; |
236 | ++p; |
237 | } |
238 | if (unlikely(exp_number > 308)) { |
239 | // this path is unlikely |
240 | if (neg_exp) { |
241 | // We either have zero or a subnormal. |
242 | // We expect this to be uncommon so we go through a slow path. |
243 | i = subnormal_power10(i, -exp_number); |
244 | } else { |
245 | // We know for sure that we have a number that is too large, |
246 | // we refuse to parse this |
247 | #ifdef JSON_TEST_NUMBERS // for unit testing |
248 | found_invalid_number(buf + offset); |
249 | #endif |
250 | return false; |
251 | } |
252 | } else { |
253 | int exponent = (neg_exp ? -exp_number : exp_number); |
254 | // we have that exp_number is [0,308] so that |
255 | // exponent is [-308,308] so that |
256 | // 308 + exponent is in [0, 2 * 308] |
257 | i *= power_of_ten[308 + exponent]; |
258 | } |
259 | } |
260 | if (is_not_structural_or_whitespace(*p)) { |
261 | return false; |
262 | } |
263 | // check that we can go from long double to double safely. |
264 | if(i > std::numeric_limits<double>::max()) { |
265 | #ifdef JSON_TEST_NUMBERS // for unit testing |
266 | found_invalid_number(buf + offset); |
267 | #endif |
268 | return false; |
269 | } |
270 | double d = negative ? -i : i; |
271 | pj.write_tape_double(d); |
272 | #ifdef JSON_TEST_NUMBERS // for unit testing |
273 | found_float(d, buf + offset); |
274 | #endif |
275 | return is_structural_or_whitespace(*p); |
276 | } |
277 | |
278 | // called by parse_number when we know that the output is an integer, |
279 | // but where there might be some integer overflow. |
280 | // we want to catch overflows! |
281 | // Do not call this function directly as it skips some of the checks from |
282 | // parse_number |
283 | // |
284 | // This function will almost never be called!!! |
285 | // |
286 | static never_inline bool parse_large_integer(const uint8_t *const buf, |
287 | ParsedJson &pj, |
288 | const uint32_t offset, |
289 | bool found_minus) { |
290 | const char *p = reinterpret_cast<const char *>(buf + offset); |
291 | |
292 | bool negative = false; |
293 | if (found_minus) { |
294 | ++p; |
295 | negative = true; |
296 | } |
297 | uint64_t i; |
298 | if (*p == '0') { // 0 cannot be followed by an integer |
299 | ++p; |
300 | i = 0; |
301 | } else { |
302 | unsigned char digit = *p - '0'; |
303 | i = digit; |
304 | p++; |
305 | // the is_made_of_eight_digits_fast routine is unlikely to help here because |
306 | // we rarely see large integer parts like 123456789 |
307 | while (is_integer(*p)) { |
308 | digit = *p - '0'; |
309 | if (mul_overflow(i, 10, &i)) { |
310 | #ifdef JSON_TEST_NUMBERS // for unit testing |
311 | found_invalid_number(buf + offset); |
312 | #endif |
313 | return false; // overflow |
314 | } |
315 | if (add_overflow(i, digit, &i)) { |
316 | #ifdef JSON_TEST_NUMBERS // for unit testing |
317 | found_invalid_number(buf + offset); |
318 | #endif |
319 | return false; // overflow |
320 | } |
321 | ++p; |
322 | } |
323 | } |
324 | if (negative) { |
325 | if (i > 0x8000000000000000) { |
326 | // overflows! |
327 | #ifdef JSON_TEST_NUMBERS // for unit testing |
328 | found_invalid_number(buf + offset); |
329 | #endif |
330 | return false; // overflow |
331 | } else if (i == 0x8000000000000000) { |
332 | // In two's complement, we cannot represent 0x8000000000000000 |
333 | // as a positive signed integer, but the negative version is |
334 | // possible. |
335 | constexpr int64_t signed_answer = INT64_MIN; |
336 | pj.write_tape_s64(signed_answer); |
337 | #ifdef JSON_TEST_NUMBERS // for unit testing |
338 | found_integer(signed_answer, buf + offset); |
339 | #endif |
340 | } else { |
341 | // we can negate safely |
342 | int64_t signed_answer = -static_cast<int64_t>(i); |
343 | pj.write_tape_s64(signed_answer); |
344 | #ifdef JSON_TEST_NUMBERS // for unit testing |
345 | found_integer(signed_answer, buf + offset); |
346 | #endif |
347 | } |
348 | } else { |
349 | // we have a positive integer, the contract is that |
350 | // we try to represent it as a signed integer and only |
351 | // fallback on unsigned integers if absolutely necessary. |
352 | if(i < 0x8000000000000000) { |
353 | #ifdef JSON_TEST_NUMBERS // for unit testing |
354 | found_integer(i, buf + offset); |
355 | #endif |
356 | pj.write_tape_s64(i); |
357 | } else { |
358 | #ifdef JSON_TEST_NUMBERS // for unit testing |
359 | found_unsigned_integer(i, buf + offset); |
360 | #endif |
361 | pj.write_tape_u64(i); |
362 | } |
363 | } |
364 | return is_structural_or_whitespace(*p); |
365 | } |
366 | |
367 | // parse the number at buf + offset |
368 | // define JSON_TEST_NUMBERS for unit testing |
369 | // |
370 | // It is assumed that the number is followed by a structural ({,},],[) character |
371 | // or a white space character. If that is not the case (e.g., when the JSON |
372 | // document is made of a single number), then it is necessary to copy the |
373 | // content and append a space before calling this function. |
374 | // |
375 | // Our objective is accurate parsing (ULP of 0 or 1) at high speed. |
376 | static really_inline bool parse_number(const uint8_t *const buf, ParsedJson &pj, |
377 | const uint32_t offset, |
378 | bool found_minus) { |
379 | #ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes |
380 | // useful to skip parsing |
381 | pj.write_tape_s64(0); // always write zero |
382 | return true; // always succeeds |
383 | #else |
384 | const char *p = reinterpret_cast<const char *>(buf + offset); |
385 | bool negative = false; |
386 | if (found_minus) { |
387 | ++p; |
388 | negative = true; |
389 | if (!is_integer(*p)) { // a negative sign must be followed by an integer |
390 | #ifdef JSON_TEST_NUMBERS // for unit testing |
391 | found_invalid_number(buf + offset); |
392 | #endif |
393 | return false; |
394 | } |
395 | } |
396 | const char *const start_digits = p; |
397 | |
398 | uint64_t i; // an unsigned int avoids signed overflows (which are bad) |
399 | if (*p == '0') { // 0 cannot be followed by an integer |
400 | ++p; |
401 | if (is_not_structural_or_whitespace_or_exponent_or_decimal(*p)) { |
402 | #ifdef JSON_TEST_NUMBERS // for unit testing |
403 | found_invalid_number(buf + offset); |
404 | #endif |
405 | return false; |
406 | } |
407 | i = 0; |
408 | } else { |
409 | if (!(is_integer(*p))) { // must start with an integer |
410 | #ifdef JSON_TEST_NUMBERS // for unit testing |
411 | found_invalid_number(buf + offset); |
412 | #endif |
413 | return false; |
414 | } |
415 | unsigned char digit = *p - '0'; |
416 | i = digit; |
417 | p++; |
418 | // the is_made_of_eight_digits_fast routine is unlikely to help here because |
419 | // we rarely see large integer parts like 123456789 |
420 | while (is_integer(*p)) { |
421 | digit = *p - '0'; |
422 | // a multiplication by 10 is cheaper than an arbitrary integer |
423 | // multiplication |
424 | i = 10 * i + digit; // might overflow, we will handle the overflow later |
425 | ++p; |
426 | } |
427 | } |
428 | int64_t exponent = 0; |
429 | bool is_float = false; |
430 | if ('.' == *p) { |
431 | is_float = true; // At this point we know that we have a float |
432 | // we continue with the fiction that we have an integer. If the |
433 | // floating point number is representable as x * 10^z for some integer |
434 | // z that fits in 53 bits, then we will be able to convert back the |
435 | // the integer into a float in a lossless manner. |
436 | ++p; |
437 | const char *const first_after_period = p; |
438 | if (is_integer(*p)) { |
439 | unsigned char digit = *p - '0'; |
440 | ++p; |
441 | i = i * 10 + digit; // might overflow + multiplication by 10 is likely |
442 | // cheaper than arbitrary mult. |
443 | // we will handle the overflow later |
444 | } else { |
445 | #ifdef JSON_TEST_NUMBERS // for unit testing |
446 | found_invalid_number(buf + offset); |
447 | #endif |
448 | return false; |
449 | } |
450 | #ifdef SWAR_NUMBER_PARSING |
451 | // this helps if we have lots of decimals! |
452 | // this turns out to be frequent enough. |
453 | if (is_made_of_eight_digits_fast(p)) { |
454 | i = i * 100000000 + parse_eight_digits_unrolled(p); |
455 | p += 8; |
456 | } |
457 | #endif |
458 | while (is_integer(*p)) { |
459 | unsigned char digit = *p - '0'; |
460 | ++p; |
461 | i = i * 10 + digit; // in rare cases, this will overflow, but that's ok |
462 | // because we have parse_highprecision_float later. |
463 | } |
464 | exponent = first_after_period - p; |
465 | } |
466 | int digit_count = |
467 | p - start_digits - 1; // used later to guard against overflows |
468 | int64_t exp_number = 0; // exponential part |
469 | if (('e' == *p) || ('E' == *p)) { |
470 | is_float = true; |
471 | ++p; |
472 | bool neg_exp = false; |
473 | if ('-' == *p) { |
474 | neg_exp = true; |
475 | ++p; |
476 | } else if ('+' == *p) { |
477 | ++p; |
478 | } |
479 | if (!is_integer(*p)) { |
480 | #ifdef JSON_TEST_NUMBERS // for unit testing |
481 | found_invalid_number(buf + offset); |
482 | #endif |
483 | return false; |
484 | } |
485 | unsigned char digit = *p - '0'; |
486 | exp_number = digit; |
487 | p++; |
488 | if (is_integer(*p)) { |
489 | digit = *p - '0'; |
490 | exp_number = 10 * exp_number + digit; |
491 | ++p; |
492 | } |
493 | if (is_integer(*p)) { |
494 | digit = *p - '0'; |
495 | exp_number = 10 * exp_number + digit; |
496 | ++p; |
497 | } |
498 | while (is_integer(*p)) { |
499 | if (exp_number > 0x100000000) { // we need to check for overflows |
500 | // we refuse to parse this |
501 | #ifdef JSON_TEST_NUMBERS // for unit testing |
502 | found_invalid_number(buf + offset); |
503 | #endif |
504 | return false; |
505 | } |
506 | digit = *p - '0'; |
507 | exp_number = 10 * exp_number + digit; |
508 | ++p; |
509 | } |
510 | exponent += (neg_exp ? -exp_number : exp_number); |
511 | } |
512 | if (is_float) { |
513 | uint64_t power_index = 308 + exponent; |
514 | if (unlikely((digit_count >= 19))) { // this is uncommon |
515 | // It is possible that the integer had an overflow. |
516 | // We have to handle the case where we have 0.0000somenumber. |
517 | const char *start = start_digits; |
518 | while ((*start == '0') || (*start == '.')) { |
519 | start++; |
520 | } |
521 | // we over-decrement by one when there is a '.' |
522 | digit_count -= (start - start_digits); |
523 | if (digit_count >= 19) { |
524 | // Ok, chances are good that we had an overflow! |
525 | // this is almost never going to get called!!! |
526 | // we start anew, going slowly!!! |
527 | return parse_float(buf, pj, offset, found_minus); |
528 | } |
529 | } |
530 | if (unlikely((power_index > 2 * 308))) { // this is uncommon!!! |
531 | // this is almost never going to get called!!! |
532 | // we start anew, going slowly!!! |
533 | return parse_float(buf, pj, offset, found_minus); |
534 | } |
535 | double factor = power_of_ten[power_index]; |
536 | factor = negative ? -factor : factor; |
537 | double d = i * factor; |
538 | pj.write_tape_double(d); |
539 | #ifdef JSON_TEST_NUMBERS // for unit testing |
540 | found_float(d, buf + offset); |
541 | #endif |
542 | } else { |
543 | if (unlikely(digit_count >= 18)) { // this is uncommon!!! |
544 | // there is a good chance that we had an overflow, so we need |
545 | // need to recover: we parse the whole thing again. |
546 | return parse_large_integer(buf, pj, offset, found_minus); |
547 | } |
548 | i = negative ? 0 - i : i; |
549 | pj.write_tape_s64(i); |
550 | #ifdef JSON_TEST_NUMBERS // for unit testing |
551 | found_integer(i, buf + offset); |
552 | #endif |
553 | } |
554 | return is_structural_or_whitespace(*p); |
555 | #endif // SIMDJSON_SKIPNUMBERPARSING |
556 | } |
557 | |
558 | |