1 | #include <limits> |
2 | namespace simdjson { |
3 | namespace internal { |
4 | |
5 | /** |
6 | * The code in the internal::from_chars function is meant to handle the floating-point number parsing |
7 | * when we have more than 19 digits in the decimal mantissa. This should only be seen |
8 | * in adversarial scenarios: we do not expect production systems to even produce |
9 | * such floating-point numbers. |
10 | * |
11 | * The parser is based on work by Nigel Tao (at https://github.com/google/wuffs/) |
12 | * who credits Ken Thompson for the design (via a reference to the Go source |
13 | * code). See |
14 | * https://github.com/google/wuffs/blob/aa46859ea40c72516deffa1b146121952d6dfd3b/internal/cgen/base/floatconv-submodule-data.c |
15 | * https://github.com/google/wuffs/blob/46cd8105f47ca07ae2ba8e6a7818ef9c0df6c152/internal/cgen/base/floatconv-submodule-code.c |
16 | * It is probably not very fast but it is a fallback that should almost never be |
17 | * called in real life. Google Wuffs is published under APL 2.0. |
18 | **/ |
19 | |
20 | namespace { |
21 | constexpr uint32_t max_digits = 768; |
22 | constexpr int32_t decimal_point_range = 2047; |
23 | } // namespace |
24 | |
25 | struct adjusted_mantissa { |
26 | uint64_t mantissa; |
27 | int power2; |
28 | adjusted_mantissa() : mantissa(0), power2(0) {} |
29 | }; |
30 | |
31 | struct decimal { |
32 | uint32_t num_digits; |
33 | int32_t decimal_point; |
34 | bool negative; |
35 | bool truncated; |
36 | uint8_t digits[max_digits]; |
37 | }; |
38 | |
39 | template <typename T> struct binary_format { |
40 | static constexpr int mantissa_explicit_bits(); |
41 | static constexpr int minimum_exponent(); |
42 | static constexpr int infinite_power(); |
43 | static constexpr int sign_index(); |
44 | }; |
45 | |
46 | template <> constexpr int binary_format<double>::mantissa_explicit_bits() { |
47 | return 52; |
48 | } |
49 | |
50 | template <> constexpr int binary_format<double>::minimum_exponent() { |
51 | return -1023; |
52 | } |
53 | template <> constexpr int binary_format<double>::infinite_power() { |
54 | return 0x7FF; |
55 | } |
56 | |
57 | template <> constexpr int binary_format<double>::sign_index() { return 63; } |
58 | |
59 | bool is_integer(char c) noexcept { return (c >= '0' && c <= '9'); } |
60 | |
61 | // This should always succeed since it follows a call to parse_number. |
62 | decimal parse_decimal(const char *&p) noexcept { |
63 | decimal answer; |
64 | answer.num_digits = 0; |
65 | answer.decimal_point = 0; |
66 | answer.truncated = false; |
67 | answer.negative = (*p == '-'); |
68 | if ((*p == '-') || (*p == '+')) { |
69 | ++p; |
70 | } |
71 | |
72 | while (*p == '0') { |
73 | ++p; |
74 | } |
75 | while (is_integer(c: *p)) { |
76 | if (answer.num_digits < max_digits) { |
77 | answer.digits[answer.num_digits] = uint8_t(*p - '0'); |
78 | } |
79 | answer.num_digits++; |
80 | ++p; |
81 | } |
82 | if (*p == '.') { |
83 | ++p; |
84 | const char *first_after_period = p; |
85 | // if we have not yet encountered a zero, we have to skip it as well |
86 | if (answer.num_digits == 0) { |
87 | // skip zeros |
88 | while (*p == '0') { |
89 | ++p; |
90 | } |
91 | } |
92 | while (is_integer(c: *p)) { |
93 | if (answer.num_digits < max_digits) { |
94 | answer.digits[answer.num_digits] = uint8_t(*p - '0'); |
95 | } |
96 | answer.num_digits++; |
97 | ++p; |
98 | } |
99 | answer.decimal_point = int32_t(first_after_period - p); |
100 | } |
101 | if(answer.num_digits > 0) { |
102 | const char *preverse = p - 1; |
103 | int32_t trailing_zeros = 0; |
104 | while ((*preverse == '0') || (*preverse == '.')) { |
105 | if(*preverse == '0') { trailing_zeros++; }; |
106 | --preverse; |
107 | } |
108 | answer.decimal_point += int32_t(answer.num_digits); |
109 | answer.num_digits -= uint32_t(trailing_zeros); |
110 | } |
111 | if(answer.num_digits > max_digits ) { |
112 | answer.num_digits = max_digits; |
113 | answer.truncated = true; |
114 | } |
115 | if (('e' == *p) || ('E' == *p)) { |
116 | ++p; |
117 | bool neg_exp = false; |
118 | if ('-' == *p) { |
119 | neg_exp = true; |
120 | ++p; |
121 | } else if ('+' == *p) { |
122 | ++p; |
123 | } |
124 | int32_t exp_number = 0; // exponential part |
125 | while (is_integer(c: *p)) { |
126 | uint8_t digit = uint8_t(*p - '0'); |
127 | if (exp_number < 0x10000) { |
128 | exp_number = 10 * exp_number + digit; |
129 | } |
130 | ++p; |
131 | } |
132 | answer.decimal_point += (neg_exp ? -exp_number : exp_number); |
133 | } |
134 | return answer; |
135 | } |
136 | |
137 | // This should always succeed since it follows a call to parse_number. |
138 | // Will not read at or beyond the "end" pointer. |
139 | decimal parse_decimal(const char *&p, const char * end) noexcept { |
140 | decimal answer; |
141 | answer.num_digits = 0; |
142 | answer.decimal_point = 0; |
143 | answer.truncated = false; |
144 | if(p == end) { return answer; } // should never happen |
145 | answer.negative = (*p == '-'); |
146 | if ((*p == '-') || (*p == '+')) { |
147 | ++p; |
148 | } |
149 | |
150 | while ((p != end) && (*p == '0')) { |
151 | ++p; |
152 | } |
153 | while ((p != end) && is_integer(c: *p)) { |
154 | if (answer.num_digits < max_digits) { |
155 | answer.digits[answer.num_digits] = uint8_t(*p - '0'); |
156 | } |
157 | answer.num_digits++; |
158 | ++p; |
159 | } |
160 | if ((p != end) && (*p == '.')) { |
161 | ++p; |
162 | if(p == end) { return answer; } // should never happen |
163 | const char *first_after_period = p; |
164 | // if we have not yet encountered a zero, we have to skip it as well |
165 | if (answer.num_digits == 0) { |
166 | // skip zeros |
167 | while (*p == '0') { |
168 | ++p; |
169 | } |
170 | } |
171 | while ((p != end) && is_integer(c: *p)) { |
172 | if (answer.num_digits < max_digits) { |
173 | answer.digits[answer.num_digits] = uint8_t(*p - '0'); |
174 | } |
175 | answer.num_digits++; |
176 | ++p; |
177 | } |
178 | answer.decimal_point = int32_t(first_after_period - p); |
179 | } |
180 | if(answer.num_digits > 0) { |
181 | const char *preverse = p - 1; |
182 | int32_t trailing_zeros = 0; |
183 | while ((*preverse == '0') || (*preverse == '.')) { |
184 | if(*preverse == '0') { trailing_zeros++; }; |
185 | --preverse; |
186 | } |
187 | answer.decimal_point += int32_t(answer.num_digits); |
188 | answer.num_digits -= uint32_t(trailing_zeros); |
189 | } |
190 | if(answer.num_digits > max_digits ) { |
191 | answer.num_digits = max_digits; |
192 | answer.truncated = true; |
193 | } |
194 | if ((p != end) && (('e' == *p) || ('E' == *p))) { |
195 | ++p; |
196 | if(p == end) { return answer; } // should never happen |
197 | bool neg_exp = false; |
198 | if ('-' == *p) { |
199 | neg_exp = true; |
200 | ++p; |
201 | } else if ('+' == *p) { |
202 | ++p; |
203 | } |
204 | int32_t exp_number = 0; // exponential part |
205 | while ((p != end) && is_integer(c: *p)) { |
206 | uint8_t digit = uint8_t(*p - '0'); |
207 | if (exp_number < 0x10000) { |
208 | exp_number = 10 * exp_number + digit; |
209 | } |
210 | ++p; |
211 | } |
212 | answer.decimal_point += (neg_exp ? -exp_number : exp_number); |
213 | } |
214 | return answer; |
215 | } |
216 | |
217 | namespace { |
218 | |
219 | // remove all final zeroes |
220 | inline void trim(decimal &h) { |
221 | while ((h.num_digits > 0) && (h.digits[h.num_digits - 1] == 0)) { |
222 | h.num_digits--; |
223 | } |
224 | } |
225 | |
226 | uint32_t number_of_digits_decimal_left_shift(decimal &h, uint32_t shift) { |
227 | shift &= 63; |
228 | const static uint16_t number_of_digits_decimal_left_shift_table[65] = { |
229 | 0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817, |
230 | 0x181D, 0x2024, 0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067, |
231 | 0x3073, 0x3080, 0x388E, 0x389C, 0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF, |
232 | 0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169, 0x5180, 0x5998, 0x59B0, |
233 | 0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B, 0x72AA, |
234 | 0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC, |
235 | 0x8C02, 0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C, |
236 | 0x051C, 0x051C, |
237 | }; |
238 | uint32_t x_a = number_of_digits_decimal_left_shift_table[shift]; |
239 | uint32_t x_b = number_of_digits_decimal_left_shift_table[shift + 1]; |
240 | uint32_t num_new_digits = x_a >> 11; |
241 | uint32_t pow5_a = 0x7FF & x_a; |
242 | uint32_t pow5_b = 0x7FF & x_b; |
243 | const static uint8_t |
244 | number_of_digits_decimal_left_shift_table_powers_of_5[0x051C] = { |
245 | 5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5, |
246 | 3, 9, 0, 6, 2, 5, 1, 9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8, |
247 | 2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5, 1, 2, 2, 0, 7, 0, 3, 1, 2, |
248 | 5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2, 5, 1, |
249 | 5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5, |
250 | 3, 8, 1, 4, 6, 9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2, |
251 | 8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1, 6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3, |
252 | 7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9, 1, 0, 1, 5, |
253 | 6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6, |
254 | 0, 4, 6, 4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3, |
255 | 8, 7, 6, 9, 5, 3, 1, 2, 5, 1, 4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7, |
256 | 6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2, 3, 8, 2, 8, 1, 2, |
257 | 5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8, |
258 | 6, 2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3, |
259 | 2, 2, 5, 7, 4, 6, 1, 5, 4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1, |
260 | 2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2, 5, 2, 3, 2, 8, 3, 0, 6, |
261 | 4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5, 3, |
262 | 2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6, |
263 | 6, 0, 9, 1, 3, 4, 6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3, |
264 | 8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6, 1, 3, 2, 8, 1, 2, 5, 1, 4, 5, |
265 | 5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0, 6, 2, 5, |
266 | 7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3, |
267 | 1, 2, 5, 3, 6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6, |
268 | 6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8, 9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6, |
269 | 4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4, 7, 0, 1, 7, 7, |
270 | 2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7, |
271 | 3, 5, 0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5, |
272 | 2, 2, 7, 3, 7, 3, 6, 7, 5, 4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5, |
273 | 9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7, 7, 2, 1, 6, 1, 6, 0, |
274 | 2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8, |
275 | 8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5, |
276 | 2, 8, 4, 2, 1, 7, 0, 9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4, |
277 | 9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0, 8, 5, 4, 7, 1, 5, 2, 0, 2, |
278 | 0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1, 0, 5, |
279 | 4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7, |
280 | 5, 7, 8, 1, 2, 5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9, |
281 | 2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8, 9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5, |
282 | 6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0, 6, 6, 8, 9, |
283 | 4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3, |
284 | 2, 3, 3, 8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8, |
285 | 9, 2, 0, 9, 8, 5, 0, 0, 6, 2, 6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2, |
286 | 3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4, 9, 2, 5, 0, 3, 1, |
287 | 3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1, |
288 | 1, 1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3, |
289 | 1, 6, 6, 8, 0, 9, 0, 8, 2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2, |
290 | 3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5, 8, 3, 4, 0, 4, 5, 4, 1, |
291 | 0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1, 3, |
292 | 5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1, |
293 | 3, 8, 7, 7, 7, 8, 7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3, |
294 | 9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0, 6, 2, 5, 6, 9, 3, 8, 8, 9, 3, |
295 | 9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2, 5, 5, 6, |
296 | 7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3, |
297 | 6, 1, 4, 1, 8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7, |
298 | 6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2, 3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9, |
299 | 4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3, 8, 2, 8, 1, 2, |
300 | 5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9, |
301 | 6, 2, 2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5, |
302 | }; |
303 | const uint8_t *pow5 = |
304 | &number_of_digits_decimal_left_shift_table_powers_of_5[pow5_a]; |
305 | uint32_t i = 0; |
306 | uint32_t n = pow5_b - pow5_a; |
307 | for (; i < n; i++) { |
308 | if (i >= h.num_digits) { |
309 | return num_new_digits - 1; |
310 | } else if (h.digits[i] == pow5[i]) { |
311 | continue; |
312 | } else if (h.digits[i] < pow5[i]) { |
313 | return num_new_digits - 1; |
314 | } else { |
315 | return num_new_digits; |
316 | } |
317 | } |
318 | return num_new_digits; |
319 | } |
320 | |
321 | } // end of anonymous namespace |
322 | |
323 | uint64_t round(decimal &h) { |
324 | if ((h.num_digits == 0) || (h.decimal_point < 0)) { |
325 | return 0; |
326 | } else if (h.decimal_point > 18) { |
327 | return UINT64_MAX; |
328 | } |
329 | // at this point, we know that h.decimal_point >= 0 |
330 | uint32_t dp = uint32_t(h.decimal_point); |
331 | uint64_t n = 0; |
332 | for (uint32_t i = 0; i < dp; i++) { |
333 | n = (10 * n) + ((i < h.num_digits) ? h.digits[i] : 0); |
334 | } |
335 | bool round_up = false; |
336 | if (dp < h.num_digits) { |
337 | round_up = h.digits[dp] >= 5; // normally, we round up |
338 | // but we may need to round to even! |
339 | if ((h.digits[dp] == 5) && (dp + 1 == h.num_digits)) { |
340 | round_up = h.truncated || ((dp > 0) && (1 & h.digits[dp - 1])); |
341 | } |
342 | } |
343 | if (round_up) { |
344 | n++; |
345 | } |
346 | return n; |
347 | } |
348 | |
349 | // computes h * 2^-shift |
350 | void decimal_left_shift(decimal &h, uint32_t shift) { |
351 | if (h.num_digits == 0) { |
352 | return; |
353 | } |
354 | uint32_t num_new_digits = number_of_digits_decimal_left_shift(h, shift); |
355 | int32_t read_index = int32_t(h.num_digits - 1); |
356 | uint32_t write_index = h.num_digits - 1 + num_new_digits; |
357 | uint64_t n = 0; |
358 | |
359 | while (read_index >= 0) { |
360 | n += uint64_t(h.digits[read_index]) << shift; |
361 | uint64_t quotient = n / 10; |
362 | uint64_t remainder = n - (10 * quotient); |
363 | if (write_index < max_digits) { |
364 | h.digits[write_index] = uint8_t(remainder); |
365 | } else if (remainder > 0) { |
366 | h.truncated = true; |
367 | } |
368 | n = quotient; |
369 | write_index--; |
370 | read_index--; |
371 | } |
372 | while (n > 0) { |
373 | uint64_t quotient = n / 10; |
374 | uint64_t remainder = n - (10 * quotient); |
375 | if (write_index < max_digits) { |
376 | h.digits[write_index] = uint8_t(remainder); |
377 | } else if (remainder > 0) { |
378 | h.truncated = true; |
379 | } |
380 | n = quotient; |
381 | write_index--; |
382 | } |
383 | h.num_digits += num_new_digits; |
384 | if (h.num_digits > max_digits) { |
385 | h.num_digits = max_digits; |
386 | } |
387 | h.decimal_point += int32_t(num_new_digits); |
388 | trim(h); |
389 | } |
390 | |
391 | // computes h * 2^shift |
392 | void decimal_right_shift(decimal &h, uint32_t shift) { |
393 | uint32_t read_index = 0; |
394 | uint32_t write_index = 0; |
395 | |
396 | uint64_t n = 0; |
397 | |
398 | while ((n >> shift) == 0) { |
399 | if (read_index < h.num_digits) { |
400 | n = (10 * n) + h.digits[read_index++]; |
401 | } else if (n == 0) { |
402 | return; |
403 | } else { |
404 | while ((n >> shift) == 0) { |
405 | n = 10 * n; |
406 | read_index++; |
407 | } |
408 | break; |
409 | } |
410 | } |
411 | h.decimal_point -= int32_t(read_index - 1); |
412 | if (h.decimal_point < -decimal_point_range) { // it is zero |
413 | h.num_digits = 0; |
414 | h.decimal_point = 0; |
415 | h.negative = false; |
416 | h.truncated = false; |
417 | return; |
418 | } |
419 | uint64_t mask = (uint64_t(1) << shift) - 1; |
420 | while (read_index < h.num_digits) { |
421 | uint8_t new_digit = uint8_t(n >> shift); |
422 | n = (10 * (n & mask)) + h.digits[read_index++]; |
423 | h.digits[write_index++] = new_digit; |
424 | } |
425 | while (n > 0) { |
426 | uint8_t new_digit = uint8_t(n >> shift); |
427 | n = 10 * (n & mask); |
428 | if (write_index < max_digits) { |
429 | h.digits[write_index++] = new_digit; |
430 | } else if (new_digit > 0) { |
431 | h.truncated = true; |
432 | } |
433 | } |
434 | h.num_digits = write_index; |
435 | trim(h); |
436 | } |
437 | |
438 | template <typename binary> adjusted_mantissa compute_float(decimal &d) { |
439 | adjusted_mantissa answer; |
440 | if (d.num_digits == 0) { |
441 | // should be zero |
442 | answer.power2 = 0; |
443 | answer.mantissa = 0; |
444 | return answer; |
445 | } |
446 | // At this point, going further, we can assume that d.num_digits > 0. |
447 | // We want to guard against excessive decimal point values because |
448 | // they can result in long running times. Indeed, we do |
449 | // shifts by at most 60 bits. We have that log(10**400)/log(2**60) ~= 22 |
450 | // which is fine, but log(10**299995)/log(2**60) ~= 16609 which is not |
451 | // fine (runs for a long time). |
452 | // |
453 | if(d.decimal_point < -324) { |
454 | // We have something smaller than 1e-324 which is always zero |
455 | // in binary64 and binary32. |
456 | // It should be zero. |
457 | answer.power2 = 0; |
458 | answer.mantissa = 0; |
459 | return answer; |
460 | } else if(d.decimal_point >= 310) { |
461 | // We have something at least as large as 0.1e310 which is |
462 | // always infinite. |
463 | answer.power2 = binary::infinite_power(); |
464 | answer.mantissa = 0; |
465 | return answer; |
466 | } |
467 | |
468 | static const uint32_t max_shift = 60; |
469 | static const uint32_t num_powers = 19; |
470 | static const uint8_t powers[19] = { |
471 | 0, 3, 6, 9, 13, 16, 19, 23, 26, 29, // |
472 | 33, 36, 39, 43, 46, 49, 53, 56, 59, // |
473 | }; |
474 | int32_t exp2 = 0; |
475 | while (d.decimal_point > 0) { |
476 | uint32_t n = uint32_t(d.decimal_point); |
477 | uint32_t shift = (n < num_powers) ? powers[n] : max_shift; |
478 | decimal_right_shift(h&: d, shift); |
479 | if (d.decimal_point < -decimal_point_range) { |
480 | // should be zero |
481 | answer.power2 = 0; |
482 | answer.mantissa = 0; |
483 | return answer; |
484 | } |
485 | exp2 += int32_t(shift); |
486 | } |
487 | // We shift left toward [1/2 ... 1]. |
488 | while (d.decimal_point <= 0) { |
489 | uint32_t shift; |
490 | if (d.decimal_point == 0) { |
491 | if (d.digits[0] >= 5) { |
492 | break; |
493 | } |
494 | shift = (d.digits[0] < 2) ? 2 : 1; |
495 | } else { |
496 | uint32_t n = uint32_t(-d.decimal_point); |
497 | shift = (n < num_powers) ? powers[n] : max_shift; |
498 | } |
499 | decimal_left_shift(h&: d, shift); |
500 | if (d.decimal_point > decimal_point_range) { |
501 | // we want to get infinity: |
502 | answer.power2 = 0xFF; |
503 | answer.mantissa = 0; |
504 | return answer; |
505 | } |
506 | exp2 -= int32_t(shift); |
507 | } |
508 | // We are now in the range [1/2 ... 1] but the binary format uses [1 ... 2]. |
509 | exp2--; |
510 | constexpr int32_t minimum_exponent = binary::minimum_exponent(); |
511 | while ((minimum_exponent + 1) > exp2) { |
512 | uint32_t n = uint32_t((minimum_exponent + 1) - exp2); |
513 | if (n > max_shift) { |
514 | n = max_shift; |
515 | } |
516 | decimal_right_shift(h&: d, shift: n); |
517 | exp2 += int32_t(n); |
518 | } |
519 | if ((exp2 - minimum_exponent) >= binary::infinite_power()) { |
520 | answer.power2 = binary::infinite_power(); |
521 | answer.mantissa = 0; |
522 | return answer; |
523 | } |
524 | |
525 | const int mantissa_size_in_bits = binary::mantissa_explicit_bits() + 1; |
526 | decimal_left_shift(h&: d, shift: mantissa_size_in_bits); |
527 | |
528 | uint64_t mantissa = round(h&: d); |
529 | // It is possible that we have an overflow, in which case we need |
530 | // to shift back. |
531 | if (mantissa >= (uint64_t(1) << mantissa_size_in_bits)) { |
532 | decimal_right_shift(h&: d, shift: 1); |
533 | exp2 += 1; |
534 | mantissa = round(h&: d); |
535 | if ((exp2 - minimum_exponent) >= binary::infinite_power()) { |
536 | answer.power2 = binary::infinite_power(); |
537 | answer.mantissa = 0; |
538 | return answer; |
539 | } |
540 | } |
541 | answer.power2 = exp2 - binary::minimum_exponent(); |
542 | if (mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) { |
543 | answer.power2--; |
544 | } |
545 | answer.mantissa = |
546 | mantissa & ((uint64_t(1) << binary::mantissa_explicit_bits()) - 1); |
547 | return answer; |
548 | } |
549 | |
550 | template <typename binary> |
551 | adjusted_mantissa parse_long_mantissa(const char *first) { |
552 | decimal d = parse_decimal(p&: first); |
553 | return compute_float<binary>(d); |
554 | } |
555 | |
556 | template <typename binary> |
557 | adjusted_mantissa parse_long_mantissa(const char *first, const char *end) { |
558 | decimal d = parse_decimal(p&: first, end); |
559 | return compute_float<binary>(d); |
560 | } |
561 | |
562 | double from_chars(const char *first) noexcept { |
563 | bool negative = first[0] == '-'; |
564 | if (negative) { |
565 | first++; |
566 | } |
567 | adjusted_mantissa am = parse_long_mantissa<binary_format<double>>(first); |
568 | uint64_t word = am.mantissa; |
569 | word |= uint64_t(am.power2) |
570 | << binary_format<double>::mantissa_explicit_bits(); |
571 | word = negative ? word | (uint64_t(1) << binary_format<double>::sign_index()) |
572 | : word; |
573 | double value; |
574 | std::memcpy(dest: &value, src: &word, n: sizeof(double)); |
575 | return value; |
576 | } |
577 | |
578 | |
579 | double from_chars(const char *first, const char *end) noexcept { |
580 | bool negative = first[0] == '-'; |
581 | if (negative) { |
582 | first++; |
583 | } |
584 | adjusted_mantissa am = parse_long_mantissa<binary_format<double>>(first, end); |
585 | uint64_t word = am.mantissa; |
586 | word |= uint64_t(am.power2) |
587 | << binary_format<double>::mantissa_explicit_bits(); |
588 | word = negative ? word | (uint64_t(1) << binary_format<double>::sign_index()) |
589 | : word; |
590 | double value; |
591 | std::memcpy(dest: &value, src: &word, n: sizeof(double)); |
592 | return value; |
593 | } |
594 | |
595 | } // internal |
596 | } // simdjson |