| 1 | #include <limits> |
| 2 | namespace simdjson { |
| 3 | namespace internal { |
| 4 | |
| 5 | /** |
| 6 | * The code in the internal::from_chars function is meant to handle the floating-point number parsing |
| 7 | * when we have more than 19 digits in the decimal mantissa. This should only be seen |
| 8 | * in adversarial scenarios: we do not expect production systems to even produce |
| 9 | * such floating-point numbers. |
| 10 | * |
| 11 | * The parser is based on work by Nigel Tao (at https://github.com/google/wuffs/) |
| 12 | * who credits Ken Thompson for the design (via a reference to the Go source |
| 13 | * code). See |
| 14 | * https://github.com/google/wuffs/blob/aa46859ea40c72516deffa1b146121952d6dfd3b/internal/cgen/base/floatconv-submodule-data.c |
| 15 | * https://github.com/google/wuffs/blob/46cd8105f47ca07ae2ba8e6a7818ef9c0df6c152/internal/cgen/base/floatconv-submodule-code.c |
| 16 | * It is probably not very fast but it is a fallback that should almost never be |
| 17 | * called in real life. Google Wuffs is published under APL 2.0. |
| 18 | **/ |
| 19 | |
| 20 | namespace { |
| 21 | constexpr uint32_t max_digits = 768; |
| 22 | constexpr int32_t decimal_point_range = 2047; |
| 23 | } // namespace |
| 24 | |
| 25 | struct adjusted_mantissa { |
| 26 | uint64_t mantissa; |
| 27 | int power2; |
| 28 | adjusted_mantissa() : mantissa(0), power2(0) {} |
| 29 | }; |
| 30 | |
| 31 | struct decimal { |
| 32 | uint32_t num_digits; |
| 33 | int32_t decimal_point; |
| 34 | bool negative; |
| 35 | bool truncated; |
| 36 | uint8_t digits[max_digits]; |
| 37 | }; |
| 38 | |
| 39 | template <typename T> struct binary_format { |
| 40 | static constexpr int mantissa_explicit_bits(); |
| 41 | static constexpr int minimum_exponent(); |
| 42 | static constexpr int infinite_power(); |
| 43 | static constexpr int sign_index(); |
| 44 | }; |
| 45 | |
| 46 | template <> constexpr int binary_format<double>::mantissa_explicit_bits() { |
| 47 | return 52; |
| 48 | } |
| 49 | |
| 50 | template <> constexpr int binary_format<double>::minimum_exponent() { |
| 51 | return -1023; |
| 52 | } |
| 53 | template <> constexpr int binary_format<double>::infinite_power() { |
| 54 | return 0x7FF; |
| 55 | } |
| 56 | |
| 57 | template <> constexpr int binary_format<double>::sign_index() { return 63; } |
| 58 | |
| 59 | bool is_integer(char c) noexcept { return (c >= '0' && c <= '9'); } |
| 60 | |
| 61 | // This should always succeed since it follows a call to parse_number. |
| 62 | decimal parse_decimal(const char *&p) noexcept { |
| 63 | decimal answer; |
| 64 | answer.num_digits = 0; |
| 65 | answer.decimal_point = 0; |
| 66 | answer.truncated = false; |
| 67 | answer.negative = (*p == '-'); |
| 68 | if ((*p == '-') || (*p == '+')) { |
| 69 | ++p; |
| 70 | } |
| 71 | |
| 72 | while (*p == '0') { |
| 73 | ++p; |
| 74 | } |
| 75 | while (is_integer(c: *p)) { |
| 76 | if (answer.num_digits < max_digits) { |
| 77 | answer.digits[answer.num_digits] = uint8_t(*p - '0'); |
| 78 | } |
| 79 | answer.num_digits++; |
| 80 | ++p; |
| 81 | } |
| 82 | if (*p == '.') { |
| 83 | ++p; |
| 84 | const char *first_after_period = p; |
| 85 | // if we have not yet encountered a zero, we have to skip it as well |
| 86 | if (answer.num_digits == 0) { |
| 87 | // skip zeros |
| 88 | while (*p == '0') { |
| 89 | ++p; |
| 90 | } |
| 91 | } |
| 92 | while (is_integer(c: *p)) { |
| 93 | if (answer.num_digits < max_digits) { |
| 94 | answer.digits[answer.num_digits] = uint8_t(*p - '0'); |
| 95 | } |
| 96 | answer.num_digits++; |
| 97 | ++p; |
| 98 | } |
| 99 | answer.decimal_point = int32_t(first_after_period - p); |
| 100 | } |
| 101 | if(answer.num_digits > 0) { |
| 102 | const char *preverse = p - 1; |
| 103 | int32_t trailing_zeros = 0; |
| 104 | while ((*preverse == '0') || (*preverse == '.')) { |
| 105 | if(*preverse == '0') { trailing_zeros++; }; |
| 106 | --preverse; |
| 107 | } |
| 108 | answer.decimal_point += int32_t(answer.num_digits); |
| 109 | answer.num_digits -= uint32_t(trailing_zeros); |
| 110 | } |
| 111 | if(answer.num_digits > max_digits ) { |
| 112 | answer.num_digits = max_digits; |
| 113 | answer.truncated = true; |
| 114 | } |
| 115 | if (('e' == *p) || ('E' == *p)) { |
| 116 | ++p; |
| 117 | bool neg_exp = false; |
| 118 | if ('-' == *p) { |
| 119 | neg_exp = true; |
| 120 | ++p; |
| 121 | } else if ('+' == *p) { |
| 122 | ++p; |
| 123 | } |
| 124 | int32_t exp_number = 0; // exponential part |
| 125 | while (is_integer(c: *p)) { |
| 126 | uint8_t digit = uint8_t(*p - '0'); |
| 127 | if (exp_number < 0x10000) { |
| 128 | exp_number = 10 * exp_number + digit; |
| 129 | } |
| 130 | ++p; |
| 131 | } |
| 132 | answer.decimal_point += (neg_exp ? -exp_number : exp_number); |
| 133 | } |
| 134 | return answer; |
| 135 | } |
| 136 | |
| 137 | // This should always succeed since it follows a call to parse_number. |
| 138 | // Will not read at or beyond the "end" pointer. |
| 139 | decimal parse_decimal(const char *&p, const char * end) noexcept { |
| 140 | decimal answer; |
| 141 | answer.num_digits = 0; |
| 142 | answer.decimal_point = 0; |
| 143 | answer.truncated = false; |
| 144 | if(p == end) { return answer; } // should never happen |
| 145 | answer.negative = (*p == '-'); |
| 146 | if ((*p == '-') || (*p == '+')) { |
| 147 | ++p; |
| 148 | } |
| 149 | |
| 150 | while ((p != end) && (*p == '0')) { |
| 151 | ++p; |
| 152 | } |
| 153 | while ((p != end) && is_integer(c: *p)) { |
| 154 | if (answer.num_digits < max_digits) { |
| 155 | answer.digits[answer.num_digits] = uint8_t(*p - '0'); |
| 156 | } |
| 157 | answer.num_digits++; |
| 158 | ++p; |
| 159 | } |
| 160 | if ((p != end) && (*p == '.')) { |
| 161 | ++p; |
| 162 | if(p == end) { return answer; } // should never happen |
| 163 | const char *first_after_period = p; |
| 164 | // if we have not yet encountered a zero, we have to skip it as well |
| 165 | if (answer.num_digits == 0) { |
| 166 | // skip zeros |
| 167 | while (*p == '0') { |
| 168 | ++p; |
| 169 | } |
| 170 | } |
| 171 | while ((p != end) && is_integer(c: *p)) { |
| 172 | if (answer.num_digits < max_digits) { |
| 173 | answer.digits[answer.num_digits] = uint8_t(*p - '0'); |
| 174 | } |
| 175 | answer.num_digits++; |
| 176 | ++p; |
| 177 | } |
| 178 | answer.decimal_point = int32_t(first_after_period - p); |
| 179 | } |
| 180 | if(answer.num_digits > 0) { |
| 181 | const char *preverse = p - 1; |
| 182 | int32_t trailing_zeros = 0; |
| 183 | while ((*preverse == '0') || (*preverse == '.')) { |
| 184 | if(*preverse == '0') { trailing_zeros++; }; |
| 185 | --preverse; |
| 186 | } |
| 187 | answer.decimal_point += int32_t(answer.num_digits); |
| 188 | answer.num_digits -= uint32_t(trailing_zeros); |
| 189 | } |
| 190 | if(answer.num_digits > max_digits ) { |
| 191 | answer.num_digits = max_digits; |
| 192 | answer.truncated = true; |
| 193 | } |
| 194 | if ((p != end) && (('e' == *p) || ('E' == *p))) { |
| 195 | ++p; |
| 196 | if(p == end) { return answer; } // should never happen |
| 197 | bool neg_exp = false; |
| 198 | if ('-' == *p) { |
| 199 | neg_exp = true; |
| 200 | ++p; |
| 201 | } else if ('+' == *p) { |
| 202 | ++p; |
| 203 | } |
| 204 | int32_t exp_number = 0; // exponential part |
| 205 | while ((p != end) && is_integer(c: *p)) { |
| 206 | uint8_t digit = uint8_t(*p - '0'); |
| 207 | if (exp_number < 0x10000) { |
| 208 | exp_number = 10 * exp_number + digit; |
| 209 | } |
| 210 | ++p; |
| 211 | } |
| 212 | answer.decimal_point += (neg_exp ? -exp_number : exp_number); |
| 213 | } |
| 214 | return answer; |
| 215 | } |
| 216 | |
| 217 | namespace { |
| 218 | |
| 219 | // remove all final zeroes |
| 220 | inline void trim(decimal &h) { |
| 221 | while ((h.num_digits > 0) && (h.digits[h.num_digits - 1] == 0)) { |
| 222 | h.num_digits--; |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | uint32_t number_of_digits_decimal_left_shift(decimal &h, uint32_t shift) { |
| 227 | shift &= 63; |
| 228 | const static uint16_t number_of_digits_decimal_left_shift_table[65] = { |
| 229 | 0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817, |
| 230 | 0x181D, 0x2024, 0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067, |
| 231 | 0x3073, 0x3080, 0x388E, 0x389C, 0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF, |
| 232 | 0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169, 0x5180, 0x5998, 0x59B0, |
| 233 | 0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B, 0x72AA, |
| 234 | 0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC, |
| 235 | 0x8C02, 0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C, |
| 236 | 0x051C, 0x051C, |
| 237 | }; |
| 238 | uint32_t x_a = number_of_digits_decimal_left_shift_table[shift]; |
| 239 | uint32_t x_b = number_of_digits_decimal_left_shift_table[shift + 1]; |
| 240 | uint32_t num_new_digits = x_a >> 11; |
| 241 | uint32_t pow5_a = 0x7FF & x_a; |
| 242 | uint32_t pow5_b = 0x7FF & x_b; |
| 243 | const static uint8_t |
| 244 | number_of_digits_decimal_left_shift_table_powers_of_5[0x051C] = { |
| 245 | 5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5, |
| 246 | 3, 9, 0, 6, 2, 5, 1, 9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8, |
| 247 | 2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5, 1, 2, 2, 0, 7, 0, 3, 1, 2, |
| 248 | 5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2, 5, 1, |
| 249 | 5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5, |
| 250 | 3, 8, 1, 4, 6, 9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2, |
| 251 | 8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1, 6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3, |
| 252 | 7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9, 1, 0, 1, 5, |
| 253 | 6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6, |
| 254 | 0, 4, 6, 4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3, |
| 255 | 8, 7, 6, 9, 5, 3, 1, 2, 5, 1, 4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7, |
| 256 | 6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2, 3, 8, 2, 8, 1, 2, |
| 257 | 5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8, |
| 258 | 6, 2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3, |
| 259 | 2, 2, 5, 7, 4, 6, 1, 5, 4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1, |
| 260 | 2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2, 5, 2, 3, 2, 8, 3, 0, 6, |
| 261 | 4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5, 3, |
| 262 | 2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6, |
| 263 | 6, 0, 9, 1, 3, 4, 6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3, |
| 264 | 8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6, 1, 3, 2, 8, 1, 2, 5, 1, 4, 5, |
| 265 | 5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0, 6, 2, 5, |
| 266 | 7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3, |
| 267 | 1, 2, 5, 3, 6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6, |
| 268 | 6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8, 9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6, |
| 269 | 4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4, 7, 0, 1, 7, 7, |
| 270 | 2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7, |
| 271 | 3, 5, 0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5, |
| 272 | 2, 2, 7, 3, 7, 3, 6, 7, 5, 4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5, |
| 273 | 9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7, 7, 2, 1, 6, 1, 6, 0, |
| 274 | 2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8, |
| 275 | 8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5, |
| 276 | 2, 8, 4, 2, 1, 7, 0, 9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4, |
| 277 | 9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0, 8, 5, 4, 7, 1, 5, 2, 0, 2, |
| 278 | 0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1, 0, 5, |
| 279 | 4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7, |
| 280 | 5, 7, 8, 1, 2, 5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9, |
| 281 | 2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8, 9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5, |
| 282 | 6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0, 6, 6, 8, 9, |
| 283 | 4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3, |
| 284 | 2, 3, 3, 8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8, |
| 285 | 9, 2, 0, 9, 8, 5, 0, 0, 6, 2, 6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2, |
| 286 | 3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4, 9, 2, 5, 0, 3, 1, |
| 287 | 3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1, |
| 288 | 1, 1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3, |
| 289 | 1, 6, 6, 8, 0, 9, 0, 8, 2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2, |
| 290 | 3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5, 8, 3, 4, 0, 4, 5, 4, 1, |
| 291 | 0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1, 3, |
| 292 | 5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1, |
| 293 | 3, 8, 7, 7, 7, 8, 7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3, |
| 294 | 9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0, 6, 2, 5, 6, 9, 3, 8, 8, 9, 3, |
| 295 | 9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2, 5, 5, 6, |
| 296 | 7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3, |
| 297 | 6, 1, 4, 1, 8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7, |
| 298 | 6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2, 3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9, |
| 299 | 4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3, 8, 2, 8, 1, 2, |
| 300 | 5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9, |
| 301 | 6, 2, 2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5, |
| 302 | }; |
| 303 | const uint8_t *pow5 = |
| 304 | &number_of_digits_decimal_left_shift_table_powers_of_5[pow5_a]; |
| 305 | uint32_t i = 0; |
| 306 | uint32_t n = pow5_b - pow5_a; |
| 307 | for (; i < n; i++) { |
| 308 | if (i >= h.num_digits) { |
| 309 | return num_new_digits - 1; |
| 310 | } else if (h.digits[i] == pow5[i]) { |
| 311 | continue; |
| 312 | } else if (h.digits[i] < pow5[i]) { |
| 313 | return num_new_digits - 1; |
| 314 | } else { |
| 315 | return num_new_digits; |
| 316 | } |
| 317 | } |
| 318 | return num_new_digits; |
| 319 | } |
| 320 | |
| 321 | } // end of anonymous namespace |
| 322 | |
| 323 | uint64_t round(decimal &h) { |
| 324 | if ((h.num_digits == 0) || (h.decimal_point < 0)) { |
| 325 | return 0; |
| 326 | } else if (h.decimal_point > 18) { |
| 327 | return UINT64_MAX; |
| 328 | } |
| 329 | // at this point, we know that h.decimal_point >= 0 |
| 330 | uint32_t dp = uint32_t(h.decimal_point); |
| 331 | uint64_t n = 0; |
| 332 | for (uint32_t i = 0; i < dp; i++) { |
| 333 | n = (10 * n) + ((i < h.num_digits) ? h.digits[i] : 0); |
| 334 | } |
| 335 | bool round_up = false; |
| 336 | if (dp < h.num_digits) { |
| 337 | round_up = h.digits[dp] >= 5; // normally, we round up |
| 338 | // but we may need to round to even! |
| 339 | if ((h.digits[dp] == 5) && (dp + 1 == h.num_digits)) { |
| 340 | round_up = h.truncated || ((dp > 0) && (1 & h.digits[dp - 1])); |
| 341 | } |
| 342 | } |
| 343 | if (round_up) { |
| 344 | n++; |
| 345 | } |
| 346 | return n; |
| 347 | } |
| 348 | |
| 349 | // computes h * 2^-shift |
| 350 | void decimal_left_shift(decimal &h, uint32_t shift) { |
| 351 | if (h.num_digits == 0) { |
| 352 | return; |
| 353 | } |
| 354 | uint32_t num_new_digits = number_of_digits_decimal_left_shift(h, shift); |
| 355 | int32_t read_index = int32_t(h.num_digits - 1); |
| 356 | uint32_t write_index = h.num_digits - 1 + num_new_digits; |
| 357 | uint64_t n = 0; |
| 358 | |
| 359 | while (read_index >= 0) { |
| 360 | n += uint64_t(h.digits[read_index]) << shift; |
| 361 | uint64_t quotient = n / 10; |
| 362 | uint64_t remainder = n - (10 * quotient); |
| 363 | if (write_index < max_digits) { |
| 364 | h.digits[write_index] = uint8_t(remainder); |
| 365 | } else if (remainder > 0) { |
| 366 | h.truncated = true; |
| 367 | } |
| 368 | n = quotient; |
| 369 | write_index--; |
| 370 | read_index--; |
| 371 | } |
| 372 | while (n > 0) { |
| 373 | uint64_t quotient = n / 10; |
| 374 | uint64_t remainder = n - (10 * quotient); |
| 375 | if (write_index < max_digits) { |
| 376 | h.digits[write_index] = uint8_t(remainder); |
| 377 | } else if (remainder > 0) { |
| 378 | h.truncated = true; |
| 379 | } |
| 380 | n = quotient; |
| 381 | write_index--; |
| 382 | } |
| 383 | h.num_digits += num_new_digits; |
| 384 | if (h.num_digits > max_digits) { |
| 385 | h.num_digits = max_digits; |
| 386 | } |
| 387 | h.decimal_point += int32_t(num_new_digits); |
| 388 | trim(h); |
| 389 | } |
| 390 | |
| 391 | // computes h * 2^shift |
| 392 | void decimal_right_shift(decimal &h, uint32_t shift) { |
| 393 | uint32_t read_index = 0; |
| 394 | uint32_t write_index = 0; |
| 395 | |
| 396 | uint64_t n = 0; |
| 397 | |
| 398 | while ((n >> shift) == 0) { |
| 399 | if (read_index < h.num_digits) { |
| 400 | n = (10 * n) + h.digits[read_index++]; |
| 401 | } else if (n == 0) { |
| 402 | return; |
| 403 | } else { |
| 404 | while ((n >> shift) == 0) { |
| 405 | n = 10 * n; |
| 406 | read_index++; |
| 407 | } |
| 408 | break; |
| 409 | } |
| 410 | } |
| 411 | h.decimal_point -= int32_t(read_index - 1); |
| 412 | if (h.decimal_point < -decimal_point_range) { // it is zero |
| 413 | h.num_digits = 0; |
| 414 | h.decimal_point = 0; |
| 415 | h.negative = false; |
| 416 | h.truncated = false; |
| 417 | return; |
| 418 | } |
| 419 | uint64_t mask = (uint64_t(1) << shift) - 1; |
| 420 | while (read_index < h.num_digits) { |
| 421 | uint8_t new_digit = uint8_t(n >> shift); |
| 422 | n = (10 * (n & mask)) + h.digits[read_index++]; |
| 423 | h.digits[write_index++] = new_digit; |
| 424 | } |
| 425 | while (n > 0) { |
| 426 | uint8_t new_digit = uint8_t(n >> shift); |
| 427 | n = 10 * (n & mask); |
| 428 | if (write_index < max_digits) { |
| 429 | h.digits[write_index++] = new_digit; |
| 430 | } else if (new_digit > 0) { |
| 431 | h.truncated = true; |
| 432 | } |
| 433 | } |
| 434 | h.num_digits = write_index; |
| 435 | trim(h); |
| 436 | } |
| 437 | |
| 438 | template <typename binary> adjusted_mantissa compute_float(decimal &d) { |
| 439 | adjusted_mantissa answer; |
| 440 | if (d.num_digits == 0) { |
| 441 | // should be zero |
| 442 | answer.power2 = 0; |
| 443 | answer.mantissa = 0; |
| 444 | return answer; |
| 445 | } |
| 446 | // At this point, going further, we can assume that d.num_digits > 0. |
| 447 | // We want to guard against excessive decimal point values because |
| 448 | // they can result in long running times. Indeed, we do |
| 449 | // shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22 |
| 450 | // which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not |
| 451 | // fine (runs for a long time). |
| 452 | // |
| 453 | if(d.decimal_point < -324) { |
| 454 | // We have something smaller than 1e-324 which is always zero |
| 455 | // in binary64 and binary32. |
| 456 | // It should be zero. |
| 457 | answer.power2 = 0; |
| 458 | answer.mantissa = 0; |
| 459 | return answer; |
| 460 | } else if(d.decimal_point >= 310) { |
| 461 | // We have something at least as large as 0.1e310 which is |
| 462 | // always infinite. |
| 463 | answer.power2 = binary::infinite_power(); |
| 464 | answer.mantissa = 0; |
| 465 | return answer; |
| 466 | } |
| 467 | |
| 468 | static const uint32_t max_shift = 60; |
| 469 | static const uint32_t num_powers = 19; |
| 470 | static const uint8_t powers[19] = { |
| 471 | 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, // |
| 472 | 33, 36, 39, 43, 46, 49, 53, 56, 59, // |
| 473 | }; |
| 474 | int32_t exp2 = 0; |
| 475 | while (d.decimal_point > 0) { |
| 476 | uint32_t n = uint32_t(d.decimal_point); |
| 477 | uint32_t shift = (n < num_powers) ? powers[n] : max_shift; |
| 478 | decimal_right_shift(h&: d, shift); |
| 479 | if (d.decimal_point < -decimal_point_range) { |
| 480 | // should be zero |
| 481 | answer.power2 = 0; |
| 482 | answer.mantissa = 0; |
| 483 | return answer; |
| 484 | } |
| 485 | exp2 += int32_t(shift); |
| 486 | } |
| 487 | // We shift left toward [1/2 ... 1]. |
| 488 | while (d.decimal_point <= 0) { |
| 489 | uint32_t shift; |
| 490 | if (d.decimal_point == 0) { |
| 491 | if (d.digits[0] >= 5) { |
| 492 | break; |
| 493 | } |
| 494 | shift = (d.digits[0] < 2) ? 2 : 1; |
| 495 | } else { |
| 496 | uint32_t n = uint32_t(-d.decimal_point); |
| 497 | shift = (n < num_powers) ? powers[n] : max_shift; |
| 498 | } |
| 499 | decimal_left_shift(h&: d, shift); |
| 500 | if (d.decimal_point > decimal_point_range) { |
| 501 | // we want to get infinity: |
| 502 | answer.power2 = 0xFF; |
| 503 | answer.mantissa = 0; |
| 504 | return answer; |
| 505 | } |
| 506 | exp2 -= int32_t(shift); |
| 507 | } |
| 508 | // We are now in the range [1/2 ... 1] but the binary format uses [1 ... 2]. |
| 509 | exp2--; |
| 510 | constexpr int32_t minimum_exponent = binary::minimum_exponent(); |
| 511 | while ((minimum_exponent + 1) > exp2) { |
| 512 | uint32_t n = uint32_t((minimum_exponent + 1) - exp2); |
| 513 | if (n > max_shift) { |
| 514 | n = max_shift; |
| 515 | } |
| 516 | decimal_right_shift(h&: d, shift: n); |
| 517 | exp2 += int32_t(n); |
| 518 | } |
| 519 | if ((exp2 - minimum_exponent) >= binary::infinite_power()) { |
| 520 | answer.power2 = binary::infinite_power(); |
| 521 | answer.mantissa = 0; |
| 522 | return answer; |
| 523 | } |
| 524 | |
| 525 | const int mantissa_size_in_bits = binary::mantissa_explicit_bits() + 1; |
| 526 | decimal_left_shift(h&: d, shift: mantissa_size_in_bits); |
| 527 | |
| 528 | uint64_t mantissa = round(h&: d); |
| 529 | // It is possible that we have an overflow, in which case we need |
| 530 | // to shift back. |
| 531 | if (mantissa >= (uint64_t(1) << mantissa_size_in_bits)) { |
| 532 | decimal_right_shift(h&: d, shift: 1); |
| 533 | exp2 += 1; |
| 534 | mantissa = round(h&: d); |
| 535 | if ((exp2 - minimum_exponent) >= binary::infinite_power()) { |
| 536 | answer.power2 = binary::infinite_power(); |
| 537 | answer.mantissa = 0; |
| 538 | return answer; |
| 539 | } |
| 540 | } |
| 541 | answer.power2 = exp2 - binary::minimum_exponent(); |
| 542 | if (mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) { |
| 543 | answer.power2--; |
| 544 | } |
| 545 | answer.mantissa = |
| 546 | mantissa & ((uint64_t(1) << binary::mantissa_explicit_bits()) - 1); |
| 547 | return answer; |
| 548 | } |
| 549 | |
| 550 | template <typename binary> |
| 551 | adjusted_mantissa parse_long_mantissa(const char *first) { |
| 552 | decimal d = parse_decimal(p&: first); |
| 553 | return compute_float<binary>(d); |
| 554 | } |
| 555 | |
| 556 | template <typename binary> |
| 557 | adjusted_mantissa parse_long_mantissa(const char *first, const char *end) { |
| 558 | decimal d = parse_decimal(p&: first, end); |
| 559 | return compute_float<binary>(d); |
| 560 | } |
| 561 | |
| 562 | double from_chars(const char *first) noexcept { |
| 563 | bool negative = first[0] == '-'; |
| 564 | if (negative) { |
| 565 | first++; |
| 566 | } |
| 567 | adjusted_mantissa am = parse_long_mantissa<binary_format<double>>(first); |
| 568 | uint64_t word = am.mantissa; |
| 569 | word |= uint64_t(am.power2) |
| 570 | << binary_format<double>::mantissa_explicit_bits(); |
| 571 | word = negative ? word | (uint64_t(1) << binary_format<double>::sign_index()) |
| 572 | : word; |
| 573 | double value; |
| 574 | std::memcpy(dest: &value, src: &word, n: sizeof(double)); |
| 575 | return value; |
| 576 | } |
| 577 | |
| 578 | |
| 579 | double from_chars(const char *first, const char *end) noexcept { |
| 580 | bool negative = first[0] == '-'; |
| 581 | if (negative) { |
| 582 | first++; |
| 583 | } |
| 584 | adjusted_mantissa am = parse_long_mantissa<binary_format<double>>(first, end); |
| 585 | uint64_t word = am.mantissa; |
| 586 | word |= uint64_t(am.power2) |
| 587 | << binary_format<double>::mantissa_explicit_bits(); |
| 588 | word = negative ? word | (uint64_t(1) << binary_format<double>::sign_index()) |
| 589 | : word; |
| 590 | double value; |
| 591 | std::memcpy(dest: &value, src: &word, n: sizeof(double)); |
| 592 | return value; |
| 593 | } |
| 594 | |
| 595 | } // internal |
| 596 | } // simdjson |