| 1 | |
| 2 | // Allowable floating-point values range |
| 3 | // std::numeric_limits<double>::lowest() to std::numeric_limits<double>::max(), |
| 4 | // so from -1.7976e308 all the way to 1.7975e308 in binary64. The lowest |
| 5 | // non-zero normal values is std::numeric_limits<double>::min() or |
| 6 | // about 2.225074e-308. |
| 7 | static const double power_of_ten[] = { |
| 8 | 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301, 1e-300, |
| 9 | 1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291, |
| 10 | 1e-290, 1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, |
| 11 | 1e-281, 1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, |
| 12 | 1e-272, 1e-271, 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, |
| 13 | 1e-263, 1e-262, 1e-261, 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, |
| 14 | 1e-254, 1e-253, 1e-252, 1e-251, 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, |
| 15 | 1e-245, 1e-244, 1e-243, 1e-242, 1e-241, 1e-240, 1e-239, 1e-238, 1e-237, |
| 16 | 1e-236, 1e-235, 1e-234, 1e-233, 1e-232, 1e-231, 1e-230, 1e-229, 1e-228, |
| 17 | 1e-227, 1e-226, 1e-225, 1e-224, 1e-223, 1e-222, 1e-221, 1e-220, 1e-219, |
| 18 | 1e-218, 1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211, 1e-210, |
| 19 | 1e-209, 1e-208, 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201, |
| 20 | 1e-200, 1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, |
| 21 | 1e-191, 1e-190, 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, |
| 22 | 1e-182, 1e-181, 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, |
| 23 | 1e-173, 1e-172, 1e-171, 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, |
| 24 | 1e-164, 1e-163, 1e-162, 1e-161, 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, |
| 25 | 1e-155, 1e-154, 1e-153, 1e-152, 1e-151, 1e-150, 1e-149, 1e-148, 1e-147, |
| 26 | 1e-146, 1e-145, 1e-144, 1e-143, 1e-142, 1e-141, 1e-140, 1e-139, 1e-138, |
| 27 | 1e-137, 1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131, 1e-130, 1e-129, |
| 28 | 1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121, 1e-120, |
| 29 | 1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111, |
| 30 | 1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, |
| 31 | 1e-101, 1e-100, 1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, |
| 32 | 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, |
| 33 | 1e-83, 1e-82, 1e-81, 1e-80, 1e-79, 1e-78, 1e-77, 1e-76, 1e-75, |
| 34 | 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, |
| 35 | 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57, |
| 36 | 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, |
| 37 | 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 1e-39, |
| 38 | 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, |
| 39 | 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, |
| 40 | 1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, |
| 41 | 1e-11, 1e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, |
| 42 | 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, |
| 43 | 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, |
| 44 | 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, |
| 45 | 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31, 1e32, 1e33, |
| 46 | 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40, 1e41, 1e42, |
| 47 | 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, 1e50, 1e51, |
| 48 | 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, 1e60, |
| 49 | 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, |
| 50 | 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, |
| 51 | 1e79, 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, |
| 52 | 1e88, 1e89, 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, |
| 53 | 1e97, 1e98, 1e99, 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, |
| 54 | 1e106, 1e107, 1e108, 1e109, 1e110, 1e111, 1e112, 1e113, 1e114, |
| 55 | 1e115, 1e116, 1e117, 1e118, 1e119, 1e120, 1e121, 1e122, 1e123, |
| 56 | 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, 1e130, 1e131, 1e132, |
| 57 | 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, 1e140, 1e141, |
| 58 | 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, 1e150, |
| 59 | 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, |
| 60 | 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, |
| 61 | 1e169, 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, |
| 62 | 1e178, 1e179, 1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, |
| 63 | 1e187, 1e188, 1e189, 1e190, 1e191, 1e192, 1e193, 1e194, 1e195, |
| 64 | 1e196, 1e197, 1e198, 1e199, 1e200, 1e201, 1e202, 1e203, 1e204, |
| 65 | 1e205, 1e206, 1e207, 1e208, 1e209, 1e210, 1e211, 1e212, 1e213, |
| 66 | 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, 1e220, 1e221, 1e222, |
| 67 | 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, 1e230, 1e231, |
| 68 | 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239, 1e240, |
| 69 | 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249, |
| 70 | 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, |
| 71 | 1e259, 1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, |
| 72 | 1e268, 1e269, 1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, |
| 73 | 1e277, 1e278, 1e279, 1e280, 1e281, 1e282, 1e283, 1e284, 1e285, |
| 74 | 1e286, 1e287, 1e288, 1e289, 1e290, 1e291, 1e292, 1e293, 1e294, |
| 75 | 1e295, 1e296, 1e297, 1e298, 1e299, 1e300, 1e301, 1e302, 1e303, |
| 76 | 1e304, 1e305, 1e306, 1e307, 1e308}; |
| 77 | |
| 78 | static inline bool is_integer(char c) { |
| 79 | return (c >= '0' && c <= '9'); |
| 80 | // this gets compiled to (uint8_t)(c - '0') <= 9 on all decent compilers |
| 81 | } |
| 82 | |
| 83 | // We need to check that the character following a zero is valid. This is |
| 84 | // probably frequent and it is hard than it looks. We are building all of this |
| 85 | // just to differentiate between 0x1 (invalid), 0,1 (valid) 0e1 (valid)... |
| 86 | const bool structural_or_whitespace_or_exponent_or_decimal_negated[256] = { |
| 87 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 88 | 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, |
| 89 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, |
| 90 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, |
| 91 | 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 92 | 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 93 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 94 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 95 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 96 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 97 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; |
| 98 | |
| 99 | really_inline bool |
| 100 | is_not_structural_or_whitespace_or_exponent_or_decimal(unsigned char c) { |
| 101 | return structural_or_whitespace_or_exponent_or_decimal_negated[c]; |
| 102 | } |
| 103 | |
| 104 | // check quickly whether the next 8 chars are made of digits |
| 105 | // at a glance, it looks better than Mula's |
| 106 | // http://0x80.pl/articles/swar-digits-validate.html |
| 107 | static inline bool is_made_of_eight_digits_fast(const char *chars) { |
| 108 | uint64_t val; |
| 109 | // this can read up to 7 bytes beyond the buffer size, but we require |
| 110 | // SIMDJSON_PADDING of padding |
| 111 | static_assert(7 <= SIMDJSON_PADDING); |
| 112 | memcpy(&val, chars, 8); |
| 113 | // a branchy method might be faster: |
| 114 | // return (( val & 0xF0F0F0F0F0F0F0F0 ) == 0x3030303030303030) |
| 115 | // && (( (val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0 ) == |
| 116 | // 0x3030303030303030); |
| 117 | return (((val & 0xF0F0F0F0F0F0F0F0) | |
| 118 | (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == |
| 119 | 0x3333333333333333); |
| 120 | } |
| 121 | |
| 122 | |
| 123 | // |
| 124 | // This function computes base * 10 ^ (- negative_exponent ). |
| 125 | // It is only even going to be used when negative_exponent is tiny. |
| 126 | static double subnormal_power10(double base, int64_t negative_exponent) { |
| 127 | // avoid integer overflows in the pow expression, those values would |
| 128 | // become zero anyway. |
| 129 | if(negative_exponent < -1000) { |
| 130 | return 0; |
| 131 | } |
| 132 | |
| 133 | // this is probably not going to be fast |
| 134 | return base * 1e-308 * pow(10, negative_exponent + 308); |
| 135 | } |
| 136 | |
| 137 | // called by parse_number when we know that the output is a float, |
| 138 | // but where there might be some integer overflow. The trick here is to |
| 139 | // parse using floats from the start. |
| 140 | // Do not call this function directly as it skips some of the checks from |
| 141 | // parse_number |
| 142 | // |
| 143 | // This function will almost never be called!!! |
| 144 | // |
| 145 | // Note: a redesign could avoid this function entirely. |
| 146 | // |
| 147 | static never_inline bool parse_float(const uint8_t *const buf, ParsedJson &pj, |
| 148 | const uint32_t offset, bool found_minus) { |
| 149 | const char *p = reinterpret_cast<const char *>(buf + offset); |
| 150 | bool negative = false; |
| 151 | if (found_minus) { |
| 152 | ++p; |
| 153 | negative = true; |
| 154 | } |
| 155 | long double i; |
| 156 | if (*p == '0') { // 0 cannot be followed by an integer |
| 157 | ++p; |
| 158 | i = 0; |
| 159 | } else { |
| 160 | unsigned char digit = *p - '0'; |
| 161 | i = digit; |
| 162 | p++; |
| 163 | while (is_integer(*p)) { |
| 164 | digit = *p - '0'; |
| 165 | i = 10 * i + digit; |
| 166 | ++p; |
| 167 | } |
| 168 | } |
| 169 | if ('.' == *p) { |
| 170 | ++p; |
| 171 | int fractional_weight = 308; |
| 172 | if (is_integer(*p)) { |
| 173 | unsigned char digit = *p - '0'; |
| 174 | ++p; |
| 175 | |
| 176 | fractional_weight--; |
| 177 | i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight] |
| 178 | : 0); |
| 179 | } else { |
| 180 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 181 | found_invalid_number(buf + offset); |
| 182 | #endif |
| 183 | return false; |
| 184 | } |
| 185 | while (is_integer(*p)) { |
| 186 | unsigned char digit = *p - '0'; |
| 187 | ++p; |
| 188 | fractional_weight--; |
| 189 | i = i + digit * (fractional_weight >= 0 ? power_of_ten[fractional_weight] |
| 190 | : 0); |
| 191 | } |
| 192 | } |
| 193 | if (('e' == *p) || ('E' == *p)) { |
| 194 | ++p; |
| 195 | bool neg_exp = false; |
| 196 | if ('-' == *p) { |
| 197 | neg_exp = true; |
| 198 | ++p; |
| 199 | } else if ('+' == *p) { |
| 200 | ++p; |
| 201 | } |
| 202 | if (!is_integer(*p)) { |
| 203 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 204 | found_invalid_number(buf + offset); |
| 205 | #endif |
| 206 | return false; |
| 207 | } |
| 208 | unsigned char digit = *p - '0'; |
| 209 | int64_t exp_number = digit; // exponential part |
| 210 | p++; |
| 211 | if (is_integer(*p)) { |
| 212 | digit = *p - '0'; |
| 213 | exp_number = 10 * exp_number + digit; |
| 214 | ++p; |
| 215 | } |
| 216 | if (is_integer(*p)) { |
| 217 | digit = *p - '0'; |
| 218 | exp_number = 10 * exp_number + digit; |
| 219 | ++p; |
| 220 | } |
| 221 | if (is_integer(*p)) { |
| 222 | digit = *p - '0'; |
| 223 | exp_number = 10 * exp_number + digit; |
| 224 | ++p; |
| 225 | } |
| 226 | while (is_integer(*p)) { |
| 227 | if (exp_number > 0x100000000) { // we need to check for overflows |
| 228 | // we refuse to parse this |
| 229 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 230 | found_invalid_number(buf + offset); |
| 231 | #endif |
| 232 | return false; |
| 233 | } |
| 234 | digit = *p - '0'; |
| 235 | exp_number = 10 * exp_number + digit; |
| 236 | ++p; |
| 237 | } |
| 238 | if (unlikely(exp_number > 308)) { |
| 239 | // this path is unlikely |
| 240 | if (neg_exp) { |
| 241 | // We either have zero or a subnormal. |
| 242 | // We expect this to be uncommon so we go through a slow path. |
| 243 | i = subnormal_power10(i, -exp_number); |
| 244 | } else { |
| 245 | // We know for sure that we have a number that is too large, |
| 246 | // we refuse to parse this |
| 247 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 248 | found_invalid_number(buf + offset); |
| 249 | #endif |
| 250 | return false; |
| 251 | } |
| 252 | } else { |
| 253 | int exponent = (neg_exp ? -exp_number : exp_number); |
| 254 | // we have that exp_number is [0,308] so that |
| 255 | // exponent is [-308,308] so that |
| 256 | // 308 + exponent is in [0, 2 * 308] |
| 257 | i *= power_of_ten[308 + exponent]; |
| 258 | } |
| 259 | } |
| 260 | if (is_not_structural_or_whitespace(*p)) { |
| 261 | return false; |
| 262 | } |
| 263 | // check that we can go from long double to double safely. |
| 264 | if(i > std::numeric_limits<double>::max()) { |
| 265 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 266 | found_invalid_number(buf + offset); |
| 267 | #endif |
| 268 | return false; |
| 269 | } |
| 270 | double d = negative ? -i : i; |
| 271 | pj.write_tape_double(d); |
| 272 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 273 | found_float(d, buf + offset); |
| 274 | #endif |
| 275 | return is_structural_or_whitespace(*p); |
| 276 | } |
| 277 | |
| 278 | // called by parse_number when we know that the output is an integer, |
| 279 | // but where there might be some integer overflow. |
| 280 | // we want to catch overflows! |
| 281 | // Do not call this function directly as it skips some of the checks from |
| 282 | // parse_number |
| 283 | // |
| 284 | // This function will almost never be called!!! |
| 285 | // |
| 286 | static never_inline bool parse_large_integer(const uint8_t *const buf, |
| 287 | ParsedJson &pj, |
| 288 | const uint32_t offset, |
| 289 | bool found_minus) { |
| 290 | const char *p = reinterpret_cast<const char *>(buf + offset); |
| 291 | |
| 292 | bool negative = false; |
| 293 | if (found_minus) { |
| 294 | ++p; |
| 295 | negative = true; |
| 296 | } |
| 297 | uint64_t i; |
| 298 | if (*p == '0') { // 0 cannot be followed by an integer |
| 299 | ++p; |
| 300 | i = 0; |
| 301 | } else { |
| 302 | unsigned char digit = *p - '0'; |
| 303 | i = digit; |
| 304 | p++; |
| 305 | // the is_made_of_eight_digits_fast routine is unlikely to help here because |
| 306 | // we rarely see large integer parts like 123456789 |
| 307 | while (is_integer(*p)) { |
| 308 | digit = *p - '0'; |
| 309 | if (mul_overflow(i, 10, &i)) { |
| 310 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 311 | found_invalid_number(buf + offset); |
| 312 | #endif |
| 313 | return false; // overflow |
| 314 | } |
| 315 | if (add_overflow(i, digit, &i)) { |
| 316 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 317 | found_invalid_number(buf + offset); |
| 318 | #endif |
| 319 | return false; // overflow |
| 320 | } |
| 321 | ++p; |
| 322 | } |
| 323 | } |
| 324 | if (negative) { |
| 325 | if (i > 0x8000000000000000) { |
| 326 | // overflows! |
| 327 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 328 | found_invalid_number(buf + offset); |
| 329 | #endif |
| 330 | return false; // overflow |
| 331 | } else if (i == 0x8000000000000000) { |
| 332 | // In two's complement, we cannot represent 0x8000000000000000 |
| 333 | // as a positive signed integer, but the negative version is |
| 334 | // possible. |
| 335 | constexpr int64_t signed_answer = INT64_MIN; |
| 336 | pj.write_tape_s64(signed_answer); |
| 337 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 338 | found_integer(signed_answer, buf + offset); |
| 339 | #endif |
| 340 | } else { |
| 341 | // we can negate safely |
| 342 | int64_t signed_answer = -static_cast<int64_t>(i); |
| 343 | pj.write_tape_s64(signed_answer); |
| 344 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 345 | found_integer(signed_answer, buf + offset); |
| 346 | #endif |
| 347 | } |
| 348 | } else { |
| 349 | // we have a positive integer, the contract is that |
| 350 | // we try to represent it as a signed integer and only |
| 351 | // fallback on unsigned integers if absolutely necessary. |
| 352 | if(i < 0x8000000000000000) { |
| 353 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 354 | found_integer(i, buf + offset); |
| 355 | #endif |
| 356 | pj.write_tape_s64(i); |
| 357 | } else { |
| 358 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 359 | found_unsigned_integer(i, buf + offset); |
| 360 | #endif |
| 361 | pj.write_tape_u64(i); |
| 362 | } |
| 363 | } |
| 364 | return is_structural_or_whitespace(*p); |
| 365 | } |
| 366 | |
| 367 | // parse the number at buf + offset |
| 368 | // define JSON_TEST_NUMBERS for unit testing |
| 369 | // |
| 370 | // It is assumed that the number is followed by a structural ({,},],[) character |
| 371 | // or a white space character. If that is not the case (e.g., when the JSON |
| 372 | // document is made of a single number), then it is necessary to copy the |
| 373 | // content and append a space before calling this function. |
| 374 | // |
| 375 | // Our objective is accurate parsing (ULP of 0 or 1) at high speed. |
| 376 | static really_inline bool parse_number(const uint8_t *const buf, ParsedJson &pj, |
| 377 | const uint32_t offset, |
| 378 | bool found_minus) { |
| 379 | #ifdef SIMDJSON_SKIPNUMBERPARSING // for performance analysis, it is sometimes |
| 380 | // useful to skip parsing |
| 381 | pj.write_tape_s64(0); // always write zero |
| 382 | return true; // always succeeds |
| 383 | #else |
| 384 | const char *p = reinterpret_cast<const char *>(buf + offset); |
| 385 | bool negative = false; |
| 386 | if (found_minus) { |
| 387 | ++p; |
| 388 | negative = true; |
| 389 | if (!is_integer(*p)) { // a negative sign must be followed by an integer |
| 390 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 391 | found_invalid_number(buf + offset); |
| 392 | #endif |
| 393 | return false; |
| 394 | } |
| 395 | } |
| 396 | const char *const start_digits = p; |
| 397 | |
| 398 | uint64_t i; // an unsigned int avoids signed overflows (which are bad) |
| 399 | if (*p == '0') { // 0 cannot be followed by an integer |
| 400 | ++p; |
| 401 | if (is_not_structural_or_whitespace_or_exponent_or_decimal(*p)) { |
| 402 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 403 | found_invalid_number(buf + offset); |
| 404 | #endif |
| 405 | return false; |
| 406 | } |
| 407 | i = 0; |
| 408 | } else { |
| 409 | if (!(is_integer(*p))) { // must start with an integer |
| 410 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 411 | found_invalid_number(buf + offset); |
| 412 | #endif |
| 413 | return false; |
| 414 | } |
| 415 | unsigned char digit = *p - '0'; |
| 416 | i = digit; |
| 417 | p++; |
| 418 | // the is_made_of_eight_digits_fast routine is unlikely to help here because |
| 419 | // we rarely see large integer parts like 123456789 |
| 420 | while (is_integer(*p)) { |
| 421 | digit = *p - '0'; |
| 422 | // a multiplication by 10 is cheaper than an arbitrary integer |
| 423 | // multiplication |
| 424 | i = 10 * i + digit; // might overflow, we will handle the overflow later |
| 425 | ++p; |
| 426 | } |
| 427 | } |
| 428 | int64_t exponent = 0; |
| 429 | bool is_float = false; |
| 430 | if ('.' == *p) { |
| 431 | is_float = true; // At this point we know that we have a float |
| 432 | // we continue with the fiction that we have an integer. If the |
| 433 | // floating point number is representable as x * 10^z for some integer |
| 434 | // z that fits in 53 bits, then we will be able to convert back the |
| 435 | // the integer into a float in a lossless manner. |
| 436 | ++p; |
| 437 | const char *const first_after_period = p; |
| 438 | if (is_integer(*p)) { |
| 439 | unsigned char digit = *p - '0'; |
| 440 | ++p; |
| 441 | i = i * 10 + digit; // might overflow + multiplication by 10 is likely |
| 442 | // cheaper than arbitrary mult. |
| 443 | // we will handle the overflow later |
| 444 | } else { |
| 445 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 446 | found_invalid_number(buf + offset); |
| 447 | #endif |
| 448 | return false; |
| 449 | } |
| 450 | #ifdef SWAR_NUMBER_PARSING |
| 451 | // this helps if we have lots of decimals! |
| 452 | // this turns out to be frequent enough. |
| 453 | if (is_made_of_eight_digits_fast(p)) { |
| 454 | i = i * 100000000 + parse_eight_digits_unrolled(p); |
| 455 | p += 8; |
| 456 | } |
| 457 | #endif |
| 458 | while (is_integer(*p)) { |
| 459 | unsigned char digit = *p - '0'; |
| 460 | ++p; |
| 461 | i = i * 10 + digit; // in rare cases, this will overflow, but that's ok |
| 462 | // because we have parse_highprecision_float later. |
| 463 | } |
| 464 | exponent = first_after_period - p; |
| 465 | } |
| 466 | int digit_count = |
| 467 | p - start_digits - 1; // used later to guard against overflows |
| 468 | int64_t exp_number = 0; // exponential part |
| 469 | if (('e' == *p) || ('E' == *p)) { |
| 470 | is_float = true; |
| 471 | ++p; |
| 472 | bool neg_exp = false; |
| 473 | if ('-' == *p) { |
| 474 | neg_exp = true; |
| 475 | ++p; |
| 476 | } else if ('+' == *p) { |
| 477 | ++p; |
| 478 | } |
| 479 | if (!is_integer(*p)) { |
| 480 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 481 | found_invalid_number(buf + offset); |
| 482 | #endif |
| 483 | return false; |
| 484 | } |
| 485 | unsigned char digit = *p - '0'; |
| 486 | exp_number = digit; |
| 487 | p++; |
| 488 | if (is_integer(*p)) { |
| 489 | digit = *p - '0'; |
| 490 | exp_number = 10 * exp_number + digit; |
| 491 | ++p; |
| 492 | } |
| 493 | if (is_integer(*p)) { |
| 494 | digit = *p - '0'; |
| 495 | exp_number = 10 * exp_number + digit; |
| 496 | ++p; |
| 497 | } |
| 498 | while (is_integer(*p)) { |
| 499 | if (exp_number > 0x100000000) { // we need to check for overflows |
| 500 | // we refuse to parse this |
| 501 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 502 | found_invalid_number(buf + offset); |
| 503 | #endif |
| 504 | return false; |
| 505 | } |
| 506 | digit = *p - '0'; |
| 507 | exp_number = 10 * exp_number + digit; |
| 508 | ++p; |
| 509 | } |
| 510 | exponent += (neg_exp ? -exp_number : exp_number); |
| 511 | } |
| 512 | if (is_float) { |
| 513 | uint64_t power_index = 308 + exponent; |
| 514 | if (unlikely((digit_count >= 19))) { // this is uncommon |
| 515 | // It is possible that the integer had an overflow. |
| 516 | // We have to handle the case where we have 0.0000somenumber. |
| 517 | const char *start = start_digits; |
| 518 | while ((*start == '0') || (*start == '.')) { |
| 519 | start++; |
| 520 | } |
| 521 | // we over-decrement by one when there is a '.' |
| 522 | digit_count -= (start - start_digits); |
| 523 | if (digit_count >= 19) { |
| 524 | // Ok, chances are good that we had an overflow! |
| 525 | // this is almost never going to get called!!! |
| 526 | // we start anew, going slowly!!! |
| 527 | return parse_float(buf, pj, offset, found_minus); |
| 528 | } |
| 529 | } |
| 530 | if (unlikely((power_index > 2 * 308))) { // this is uncommon!!! |
| 531 | // this is almost never going to get called!!! |
| 532 | // we start anew, going slowly!!! |
| 533 | return parse_float(buf, pj, offset, found_minus); |
| 534 | } |
| 535 | double factor = power_of_ten[power_index]; |
| 536 | factor = negative ? -factor : factor; |
| 537 | double d = i * factor; |
| 538 | pj.write_tape_double(d); |
| 539 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 540 | found_float(d, buf + offset); |
| 541 | #endif |
| 542 | } else { |
| 543 | if (unlikely(digit_count >= 18)) { // this is uncommon!!! |
| 544 | // there is a good chance that we had an overflow, so we need |
| 545 | // need to recover: we parse the whole thing again. |
| 546 | return parse_large_integer(buf, pj, offset, found_minus); |
| 547 | } |
| 548 | i = negative ? 0 - i : i; |
| 549 | pj.write_tape_s64(i); |
| 550 | #ifdef JSON_TEST_NUMBERS // for unit testing |
| 551 | found_integer(i, buf + offset); |
| 552 | #endif |
| 553 | } |
| 554 | return is_structural_or_whitespace(*p); |
| 555 | #endif // SIMDJSON_SKIPNUMBERPARSING |
| 556 | } |
| 557 | |
| 558 | |