1 | // Copyright 2017 The Abseil Authors. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | // This file contains string processing functions related to |
16 | // numeric values. |
17 | |
18 | #include "absl/strings/numbers.h" |
19 | |
20 | #include <algorithm> |
21 | #include <cassert> |
22 | #include <cfloat> // for DBL_DIG and FLT_DIG |
23 | #include <cmath> // for HUGE_VAL |
24 | #include <cstdint> |
25 | #include <cstdio> |
26 | #include <cstdlib> |
27 | #include <cstring> |
28 | #include <iterator> |
29 | #include <limits> |
30 | #include <memory> |
31 | #include <utility> |
32 | |
33 | #include "absl/base/internal/bits.h" |
34 | #include "absl/base/internal/raw_logging.h" |
35 | #include "absl/strings/ascii.h" |
36 | #include "absl/strings/charconv.h" |
37 | #include "absl/strings/internal/memutil.h" |
38 | #include "absl/strings/match.h" |
39 | #include "absl/strings/str_cat.h" |
40 | |
41 | namespace absl { |
42 | |
43 | bool SimpleAtof(absl::string_view str, float* out) { |
44 | *out = 0.0; |
45 | str = StripAsciiWhitespace(str); |
46 | if (!str.empty() && str[0] == '+') { |
47 | str.remove_prefix(1); |
48 | } |
49 | auto result = absl::from_chars(str.data(), str.data() + str.size(), *out); |
50 | if (result.ec == std::errc::invalid_argument) { |
51 | return false; |
52 | } |
53 | if (result.ptr != str.data() + str.size()) { |
54 | // not all non-whitespace characters consumed |
55 | return false; |
56 | } |
57 | // from_chars() with DR 3081's current wording will return max() on |
58 | // overflow. SimpleAtof returns infinity instead. |
59 | if (result.ec == std::errc::result_out_of_range) { |
60 | if (*out > 1.0) { |
61 | *out = std::numeric_limits<float>::infinity(); |
62 | } else if (*out < -1.0) { |
63 | *out = -std::numeric_limits<float>::infinity(); |
64 | } |
65 | } |
66 | return true; |
67 | } |
68 | |
69 | bool SimpleAtod(absl::string_view str, double* out) { |
70 | *out = 0.0; |
71 | str = StripAsciiWhitespace(str); |
72 | if (!str.empty() && str[0] == '+') { |
73 | str.remove_prefix(1); |
74 | } |
75 | auto result = absl::from_chars(str.data(), str.data() + str.size(), *out); |
76 | if (result.ec == std::errc::invalid_argument) { |
77 | return false; |
78 | } |
79 | if (result.ptr != str.data() + str.size()) { |
80 | // not all non-whitespace characters consumed |
81 | return false; |
82 | } |
83 | // from_chars() with DR 3081's current wording will return max() on |
84 | // overflow. SimpleAtod returns infinity instead. |
85 | if (result.ec == std::errc::result_out_of_range) { |
86 | if (*out > 1.0) { |
87 | *out = std::numeric_limits<double>::infinity(); |
88 | } else if (*out < -1.0) { |
89 | *out = -std::numeric_limits<double>::infinity(); |
90 | } |
91 | } |
92 | return true; |
93 | } |
94 | |
95 | namespace { |
96 | |
97 | // Writes a two-character representation of 'i' to 'buf'. 'i' must be in the |
98 | // range 0 <= i < 100, and buf must have space for two characters. Example: |
99 | // char buf[2]; |
100 | // PutTwoDigits(42, buf); |
101 | // // buf[0] == '4' |
102 | // // buf[1] == '2' |
103 | inline void PutTwoDigits(size_t i, char* buf) { |
104 | static const char two_ASCII_digits[100][2] = { |
105 | {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, |
106 | {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, |
107 | {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'}, |
108 | {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'}, |
109 | {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'}, |
110 | {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'}, |
111 | {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, |
112 | {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, |
113 | {'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'}, |
114 | {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'}, |
115 | {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'}, |
116 | {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, |
117 | {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, |
118 | {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, |
119 | {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'}, |
120 | {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'}, |
121 | {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'}, |
122 | {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, |
123 | {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, |
124 | {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'} |
125 | }; |
126 | assert(i < 100); |
127 | memcpy(buf, two_ASCII_digits[i], 2); |
128 | } |
129 | |
130 | } // namespace |
131 | |
132 | bool SimpleAtob(absl::string_view str, bool* out) { |
133 | ABSL_RAW_CHECK(out != nullptr, "Output pointer must not be nullptr." ); |
134 | if (EqualsIgnoreCase(str, "true" ) || EqualsIgnoreCase(str, "t" ) || |
135 | EqualsIgnoreCase(str, "yes" ) || EqualsIgnoreCase(str, "y" ) || |
136 | EqualsIgnoreCase(str, "1" )) { |
137 | *out = true; |
138 | return true; |
139 | } |
140 | if (EqualsIgnoreCase(str, "false" ) || EqualsIgnoreCase(str, "f" ) || |
141 | EqualsIgnoreCase(str, "no" ) || EqualsIgnoreCase(str, "n" ) || |
142 | EqualsIgnoreCase(str, "0" )) { |
143 | *out = false; |
144 | return true; |
145 | } |
146 | return false; |
147 | } |
148 | |
149 | // ---------------------------------------------------------------------- |
150 | // FastIntToBuffer() overloads |
151 | // |
152 | // Like the Fast*ToBuffer() functions above, these are intended for speed. |
153 | // Unlike the Fast*ToBuffer() functions, however, these functions write |
154 | // their output to the beginning of the buffer. The caller is responsible |
155 | // for ensuring that the buffer has enough space to hold the output. |
156 | // |
157 | // Returns a pointer to the end of the string (i.e. the null character |
158 | // terminating the string). |
159 | // ---------------------------------------------------------------------- |
160 | |
161 | namespace { |
162 | |
163 | // Used to optimize printing a decimal number's final digit. |
164 | const char one_ASCII_final_digits[10][2] { |
165 | {'0', 0}, {'1', 0}, {'2', 0}, {'3', 0}, {'4', 0}, |
166 | {'5', 0}, {'6', 0}, {'7', 0}, {'8', 0}, {'9', 0}, |
167 | }; |
168 | |
169 | } // namespace |
170 | |
171 | char* numbers_internal::FastIntToBuffer(uint32_t i, char* buffer) { |
172 | uint32_t digits; |
173 | // The idea of this implementation is to trim the number of divides to as few |
174 | // as possible, and also reducing memory stores and branches, by going in |
175 | // steps of two digits at a time rather than one whenever possible. |
176 | // The huge-number case is first, in the hopes that the compiler will output |
177 | // that case in one branch-free block of code, and only output conditional |
178 | // branches into it from below. |
179 | if (i >= 1000000000) { // >= 1,000,000,000 |
180 | digits = i / 100000000; // 100,000,000 |
181 | i -= digits * 100000000; |
182 | PutTwoDigits(digits, buffer); |
183 | buffer += 2; |
184 | lt100_000_000: |
185 | digits = i / 1000000; // 1,000,000 |
186 | i -= digits * 1000000; |
187 | PutTwoDigits(digits, buffer); |
188 | buffer += 2; |
189 | lt1_000_000: |
190 | digits = i / 10000; // 10,000 |
191 | i -= digits * 10000; |
192 | PutTwoDigits(digits, buffer); |
193 | buffer += 2; |
194 | lt10_000: |
195 | digits = i / 100; |
196 | i -= digits * 100; |
197 | PutTwoDigits(digits, buffer); |
198 | buffer += 2; |
199 | lt100: |
200 | digits = i; |
201 | PutTwoDigits(digits, buffer); |
202 | buffer += 2; |
203 | *buffer = 0; |
204 | return buffer; |
205 | } |
206 | |
207 | if (i < 100) { |
208 | digits = i; |
209 | if (i >= 10) goto lt100; |
210 | memcpy(buffer, one_ASCII_final_digits[i], 2); |
211 | return buffer + 1; |
212 | } |
213 | if (i < 10000) { // 10,000 |
214 | if (i >= 1000) goto lt10_000; |
215 | digits = i / 100; |
216 | i -= digits * 100; |
217 | *buffer++ = '0' + digits; |
218 | goto lt100; |
219 | } |
220 | if (i < 1000000) { // 1,000,000 |
221 | if (i >= 100000) goto lt1_000_000; |
222 | digits = i / 10000; // 10,000 |
223 | i -= digits * 10000; |
224 | *buffer++ = '0' + digits; |
225 | goto lt10_000; |
226 | } |
227 | if (i < 100000000) { // 100,000,000 |
228 | if (i >= 10000000) goto lt100_000_000; |
229 | digits = i / 1000000; // 1,000,000 |
230 | i -= digits * 1000000; |
231 | *buffer++ = '0' + digits; |
232 | goto lt1_000_000; |
233 | } |
234 | // we already know that i < 1,000,000,000 |
235 | digits = i / 100000000; // 100,000,000 |
236 | i -= digits * 100000000; |
237 | *buffer++ = '0' + digits; |
238 | goto lt100_000_000; |
239 | } |
240 | |
241 | char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) { |
242 | uint32_t u = i; |
243 | if (i < 0) { |
244 | *buffer++ = '-'; |
245 | // We need to do the negation in modular (i.e., "unsigned") |
246 | // arithmetic; MSVC++ apprently warns for plain "-u", so |
247 | // we write the equivalent expression "0 - u" instead. |
248 | u = 0 - u; |
249 | } |
250 | return numbers_internal::FastIntToBuffer(u, buffer); |
251 | } |
252 | |
253 | char* numbers_internal::FastIntToBuffer(uint64_t i, char* buffer) { |
254 | uint32_t u32 = static_cast<uint32_t>(i); |
255 | if (u32 == i) return numbers_internal::FastIntToBuffer(u32, buffer); |
256 | |
257 | // Here we know i has at least 10 decimal digits. |
258 | uint64_t top_1to11 = i / 1000000000; |
259 | u32 = static_cast<uint32_t>(i - top_1to11 * 1000000000); |
260 | uint32_t top_1to11_32 = static_cast<uint32_t>(top_1to11); |
261 | |
262 | if (top_1to11_32 == top_1to11) { |
263 | buffer = numbers_internal::FastIntToBuffer(top_1to11_32, buffer); |
264 | } else { |
265 | // top_1to11 has more than 32 bits too; print it in two steps. |
266 | uint32_t top_8to9 = static_cast<uint32_t>(top_1to11 / 100); |
267 | uint32_t mid_2 = static_cast<uint32_t>(top_1to11 - top_8to9 * 100); |
268 | buffer = numbers_internal::FastIntToBuffer(top_8to9, buffer); |
269 | PutTwoDigits(mid_2, buffer); |
270 | buffer += 2; |
271 | } |
272 | |
273 | // We have only 9 digits now, again the maximum uint32_t can handle fully. |
274 | uint32_t digits = u32 / 10000000; // 10,000,000 |
275 | u32 -= digits * 10000000; |
276 | PutTwoDigits(digits, buffer); |
277 | buffer += 2; |
278 | digits = u32 / 100000; // 100,000 |
279 | u32 -= digits * 100000; |
280 | PutTwoDigits(digits, buffer); |
281 | buffer += 2; |
282 | digits = u32 / 1000; // 1,000 |
283 | u32 -= digits * 1000; |
284 | PutTwoDigits(digits, buffer); |
285 | buffer += 2; |
286 | digits = u32 / 10; |
287 | u32 -= digits * 10; |
288 | PutTwoDigits(digits, buffer); |
289 | buffer += 2; |
290 | memcpy(buffer, one_ASCII_final_digits[u32], 2); |
291 | return buffer + 1; |
292 | } |
293 | |
294 | char* numbers_internal::FastIntToBuffer(int64_t i, char* buffer) { |
295 | uint64_t u = i; |
296 | if (i < 0) { |
297 | *buffer++ = '-'; |
298 | u = 0 - u; |
299 | } |
300 | return numbers_internal::FastIntToBuffer(u, buffer); |
301 | } |
302 | |
303 | // Given a 128-bit number expressed as a pair of uint64_t, high half first, |
304 | // return that number multiplied by the given 32-bit value. If the result is |
305 | // too large to fit in a 128-bit number, divide it by 2 until it fits. |
306 | static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num, |
307 | uint32_t mul) { |
308 | uint64_t bits0_31 = num.second & 0xFFFFFFFF; |
309 | uint64_t bits32_63 = num.second >> 32; |
310 | uint64_t bits64_95 = num.first & 0xFFFFFFFF; |
311 | uint64_t bits96_127 = num.first >> 32; |
312 | |
313 | // The picture so far: each of these 64-bit values has only the lower 32 bits |
314 | // filled in. |
315 | // bits96_127: [ 00000000 xxxxxxxx ] |
316 | // bits64_95: [ 00000000 xxxxxxxx ] |
317 | // bits32_63: [ 00000000 xxxxxxxx ] |
318 | // bits0_31: [ 00000000 xxxxxxxx ] |
319 | |
320 | bits0_31 *= mul; |
321 | bits32_63 *= mul; |
322 | bits64_95 *= mul; |
323 | bits96_127 *= mul; |
324 | |
325 | // Now the top halves may also have value, though all 64 of their bits will |
326 | // never be set at the same time, since they are a result of a 32x32 bit |
327 | // multiply. This makes the carry calculation slightly easier. |
328 | // bits96_127: [ mmmmmmmm | mmmmmmmm ] |
329 | // bits64_95: [ | mmmmmmmm mmmmmmmm | ] |
330 | // bits32_63: | [ mmmmmmmm | mmmmmmmm ] |
331 | // bits0_31: | [ | mmmmmmmm mmmmmmmm ] |
332 | // eventually: [ bits128_up | ...bits64_127.... | ..bits0_63... ] |
333 | |
334 | uint64_t bits0_63 = bits0_31 + (bits32_63 << 32); |
335 | uint64_t bits64_127 = bits64_95 + (bits96_127 << 32) + (bits32_63 >> 32) + |
336 | (bits0_63 < bits0_31); |
337 | uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95); |
338 | if (bits128_up == 0) return {bits64_127, bits0_63}; |
339 | |
340 | int shift = 64 - base_internal::CountLeadingZeros64(bits128_up); |
341 | uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift)); |
342 | uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift)); |
343 | return {hi, lo}; |
344 | } |
345 | |
346 | // Compute num * 5 ^ expfive, and return the first 128 bits of the result, |
347 | // where the first bit is always a one. So PowFive(1, 0) starts 0b100000, |
348 | // PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc. |
349 | static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) { |
350 | std::pair<uint64_t, uint64_t> result = {num, 0}; |
351 | while (expfive >= 13) { |
352 | // 5^13 is the highest power of five that will fit in a 32-bit integer. |
353 | result = Mul32(result, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5); |
354 | expfive -= 13; |
355 | } |
356 | constexpr int powers_of_five[13] = { |
357 | 1, |
358 | 5, |
359 | 5 * 5, |
360 | 5 * 5 * 5, |
361 | 5 * 5 * 5 * 5, |
362 | 5 * 5 * 5 * 5 * 5, |
363 | 5 * 5 * 5 * 5 * 5 * 5, |
364 | 5 * 5 * 5 * 5 * 5 * 5 * 5, |
365 | 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, |
366 | 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, |
367 | 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, |
368 | 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5, |
369 | 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5}; |
370 | result = Mul32(result, powers_of_five[expfive & 15]); |
371 | int shift = base_internal::CountLeadingZeros64(result.first); |
372 | if (shift != 0) { |
373 | result.first = (result.first << shift) + (result.second >> (64 - shift)); |
374 | result.second = (result.second << shift); |
375 | } |
376 | return result; |
377 | } |
378 | |
379 | struct ExpDigits { |
380 | int32_t exponent; |
381 | char digits[6]; |
382 | }; |
383 | |
384 | // SplitToSix converts value, a positive double-precision floating-point number, |
385 | // into a base-10 exponent and 6 ASCII digits, where the first digit is never |
386 | // zero. For example, SplitToSix(1) returns an exponent of zero and a digits |
387 | // array of {'1', '0', '0', '0', '0', '0'}. If value is exactly halfway between |
388 | // two possible representations, e.g. value = 100000.5, then "round to even" is |
389 | // performed. |
390 | static ExpDigits SplitToSix(const double value) { |
391 | ExpDigits exp_dig; |
392 | int exp = 5; |
393 | double d = value; |
394 | // First step: calculate a close approximation of the output, where the |
395 | // value d will be between 100,000 and 999,999, representing the digits |
396 | // in the output ASCII array, and exp is the base-10 exponent. It would be |
397 | // faster to use a table here, and to look up the base-2 exponent of value, |
398 | // however value is an IEEE-754 64-bit number, so the table would have 2,000 |
399 | // entries, which is not cache-friendly. |
400 | if (d >= 999999.5) { |
401 | if (d >= 1e+261) exp += 256, d *= 1e-256; |
402 | if (d >= 1e+133) exp += 128, d *= 1e-128; |
403 | if (d >= 1e+69) exp += 64, d *= 1e-64; |
404 | if (d >= 1e+37) exp += 32, d *= 1e-32; |
405 | if (d >= 1e+21) exp += 16, d *= 1e-16; |
406 | if (d >= 1e+13) exp += 8, d *= 1e-8; |
407 | if (d >= 1e+9) exp += 4, d *= 1e-4; |
408 | if (d >= 1e+7) exp += 2, d *= 1e-2; |
409 | if (d >= 1e+6) exp += 1, d *= 1e-1; |
410 | } else { |
411 | if (d < 1e-250) exp -= 256, d *= 1e256; |
412 | if (d < 1e-122) exp -= 128, d *= 1e128; |
413 | if (d < 1e-58) exp -= 64, d *= 1e64; |
414 | if (d < 1e-26) exp -= 32, d *= 1e32; |
415 | if (d < 1e-10) exp -= 16, d *= 1e16; |
416 | if (d < 1e-2) exp -= 8, d *= 1e8; |
417 | if (d < 1e+2) exp -= 4, d *= 1e4; |
418 | if (d < 1e+4) exp -= 2, d *= 1e2; |
419 | if (d < 1e+5) exp -= 1, d *= 1e1; |
420 | } |
421 | // At this point, d is in the range [99999.5..999999.5) and exp is in the |
422 | // range [-324..308]. Since we need to round d up, we want to add a half |
423 | // and truncate. |
424 | // However, the technique above may have lost some precision, due to its |
425 | // repeated multiplication by constants that each may be off by half a bit |
426 | // of precision. This only matters if we're close to the edge though. |
427 | // Since we'd like to know if the fractional part of d is close to a half, |
428 | // we multiply it by 65536 and see if the fractional part is close to 32768. |
429 | // (The number doesn't have to be a power of two,but powers of two are faster) |
430 | uint64_t d64k = d * 65536; |
431 | int dddddd; // A 6-digit decimal integer. |
432 | if ((d64k % 65536) == 32767 || (d64k % 65536) == 32768) { |
433 | // OK, it's fairly likely that precision was lost above, which is |
434 | // not a surprise given only 52 mantissa bits are available. Therefore |
435 | // redo the calculation using 128-bit numbers. (64 bits are not enough). |
436 | |
437 | // Start out with digits rounded down; maybe add one below. |
438 | dddddd = static_cast<int>(d64k / 65536); |
439 | |
440 | // mantissa is a 64-bit integer representing M.mmm... * 2^63. The actual |
441 | // value we're representing, of course, is M.mmm... * 2^exp2. |
442 | int exp2; |
443 | double m = std::frexp(value, &exp2); |
444 | uint64_t mantissa = m * (32768.0 * 65536.0 * 65536.0 * 65536.0); |
445 | // std::frexp returns an m value in the range [0.5, 1.0), however we |
446 | // can't multiply it by 2^64 and convert to an integer because some FPUs |
447 | // throw an exception when converting an number higher than 2^63 into an |
448 | // integer - even an unsigned 64-bit integer! Fortunately it doesn't matter |
449 | // since m only has 52 significant bits anyway. |
450 | mantissa <<= 1; |
451 | exp2 -= 64; // not needed, but nice for debugging |
452 | |
453 | // OK, we are here to compare: |
454 | // (dddddd + 0.5) * 10^(exp-5) vs. mantissa * 2^exp2 |
455 | // so we can round up dddddd if appropriate. Those values span the full |
456 | // range of 600 orders of magnitude of IEE 64-bit floating-point. |
457 | // Fortunately, we already know they are very close, so we don't need to |
458 | // track the base-2 exponent of both sides. This greatly simplifies the |
459 | // the math since the 2^exp2 calculation is unnecessary and the power-of-10 |
460 | // calculation can become a power-of-5 instead. |
461 | |
462 | std::pair<uint64_t, uint64_t> edge, val; |
463 | if (exp >= 6) { |
464 | // Compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa |
465 | // Since we're tossing powers of two, 2 * dddddd + 1 is the |
466 | // same as dddddd + 0.5 |
467 | edge = PowFive(2 * dddddd + 1, exp - 5); |
468 | |
469 | val.first = mantissa; |
470 | val.second = 0; |
471 | } else { |
472 | // We can't compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa as we did |
473 | // above because (exp - 5) is negative. So we compare (dddddd + 0.5) to |
474 | // mantissa * 5 ^ (5 - exp) |
475 | edge = PowFive(2 * dddddd + 1, 0); |
476 | |
477 | val = PowFive(mantissa, 5 - exp); |
478 | } |
479 | // printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first, |
480 | // val.second, edge.first, edge.second); |
481 | if (val > edge) { |
482 | dddddd++; |
483 | } else if (val == edge) { |
484 | dddddd += (dddddd & 1); |
485 | } |
486 | } else { |
487 | // Here, we are not close to the edge. |
488 | dddddd = static_cast<int>((d64k + 32768) / 65536); |
489 | } |
490 | if (dddddd == 1000000) { |
491 | dddddd = 100000; |
492 | exp += 1; |
493 | } |
494 | exp_dig.exponent = exp; |
495 | |
496 | int two_digits = dddddd / 10000; |
497 | dddddd -= two_digits * 10000; |
498 | PutTwoDigits(two_digits, &exp_dig.digits[0]); |
499 | |
500 | two_digits = dddddd / 100; |
501 | dddddd -= two_digits * 100; |
502 | PutTwoDigits(two_digits, &exp_dig.digits[2]); |
503 | |
504 | PutTwoDigits(dddddd, &exp_dig.digits[4]); |
505 | return exp_dig; |
506 | } |
507 | |
508 | // Helper function for fast formatting of floating-point. |
509 | // The result is the same as "%g", a.k.a. "%.6g". |
510 | size_t numbers_internal::SixDigitsToBuffer(double d, char* const buffer) { |
511 | static_assert(std::numeric_limits<float>::is_iec559, |
512 | "IEEE-754/IEC-559 support only" ); |
513 | |
514 | char* out = buffer; // we write data to out, incrementing as we go, but |
515 | // FloatToBuffer always returns the address of the buffer |
516 | // passed in. |
517 | |
518 | if (std::isnan(d)) { |
519 | strcpy(out, "nan" ); // NOLINT(runtime/printf) |
520 | return 3; |
521 | } |
522 | if (d == 0) { // +0 and -0 are handled here |
523 | if (std::signbit(d)) *out++ = '-'; |
524 | *out++ = '0'; |
525 | *out = 0; |
526 | return out - buffer; |
527 | } |
528 | if (d < 0) { |
529 | *out++ = '-'; |
530 | d = -d; |
531 | } |
532 | if (std::isinf(d)) { |
533 | strcpy(out, "inf" ); // NOLINT(runtime/printf) |
534 | return out + 3 - buffer; |
535 | } |
536 | |
537 | auto exp_dig = SplitToSix(d); |
538 | int exp = exp_dig.exponent; |
539 | const char* digits = exp_dig.digits; |
540 | out[0] = '0'; |
541 | out[1] = '.'; |
542 | switch (exp) { |
543 | case 5: |
544 | memcpy(out, &digits[0], 6), out += 6; |
545 | *out = 0; |
546 | return out - buffer; |
547 | case 4: |
548 | memcpy(out, &digits[0], 5), out += 5; |
549 | if (digits[5] != '0') { |
550 | *out++ = '.'; |
551 | *out++ = digits[5]; |
552 | } |
553 | *out = 0; |
554 | return out - buffer; |
555 | case 3: |
556 | memcpy(out, &digits[0], 4), out += 4; |
557 | if ((digits[5] | digits[4]) != '0') { |
558 | *out++ = '.'; |
559 | *out++ = digits[4]; |
560 | if (digits[5] != '0') *out++ = digits[5]; |
561 | } |
562 | *out = 0; |
563 | return out - buffer; |
564 | case 2: |
565 | memcpy(out, &digits[0], 3), out += 3; |
566 | *out++ = '.'; |
567 | memcpy(out, &digits[3], 3); |
568 | out += 3; |
569 | while (out[-1] == '0') --out; |
570 | if (out[-1] == '.') --out; |
571 | *out = 0; |
572 | return out - buffer; |
573 | case 1: |
574 | memcpy(out, &digits[0], 2), out += 2; |
575 | *out++ = '.'; |
576 | memcpy(out, &digits[2], 4); |
577 | out += 4; |
578 | while (out[-1] == '0') --out; |
579 | if (out[-1] == '.') --out; |
580 | *out = 0; |
581 | return out - buffer; |
582 | case 0: |
583 | memcpy(out, &digits[0], 1), out += 1; |
584 | *out++ = '.'; |
585 | memcpy(out, &digits[1], 5); |
586 | out += 5; |
587 | while (out[-1] == '0') --out; |
588 | if (out[-1] == '.') --out; |
589 | *out = 0; |
590 | return out - buffer; |
591 | case -4: |
592 | out[2] = '0'; |
593 | ++out; |
594 | ABSL_FALLTHROUGH_INTENDED; |
595 | case -3: |
596 | out[2] = '0'; |
597 | ++out; |
598 | ABSL_FALLTHROUGH_INTENDED; |
599 | case -2: |
600 | out[2] = '0'; |
601 | ++out; |
602 | ABSL_FALLTHROUGH_INTENDED; |
603 | case -1: |
604 | out += 2; |
605 | memcpy(out, &digits[0], 6); |
606 | out += 6; |
607 | while (out[-1] == '0') --out; |
608 | *out = 0; |
609 | return out - buffer; |
610 | } |
611 | assert(exp < -4 || exp >= 6); |
612 | out[0] = digits[0]; |
613 | assert(out[1] == '.'); |
614 | out += 2; |
615 | memcpy(out, &digits[1], 5), out += 5; |
616 | while (out[-1] == '0') --out; |
617 | if (out[-1] == '.') --out; |
618 | *out++ = 'e'; |
619 | if (exp > 0) { |
620 | *out++ = '+'; |
621 | } else { |
622 | *out++ = '-'; |
623 | exp = -exp; |
624 | } |
625 | if (exp > 99) { |
626 | int dig1 = exp / 100; |
627 | exp -= dig1 * 100; |
628 | *out++ = '0' + dig1; |
629 | } |
630 | PutTwoDigits(exp, out); |
631 | out += 2; |
632 | *out = 0; |
633 | return out - buffer; |
634 | } |
635 | |
636 | namespace { |
637 | // Represents integer values of digits. |
638 | // Uses 36 to indicate an invalid character since we support |
639 | // bases up to 36. |
640 | static const int8_t kAsciiToInt[256] = { |
641 | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s. |
642 | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
643 | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 1, 2, 3, 4, 5, |
644 | 6, 7, 8, 9, 36, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, |
645 | 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, |
646 | 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, |
647 | 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36, |
648 | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
649 | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
650 | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
651 | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
652 | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
653 | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, |
654 | 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36}; |
655 | |
656 | // Parse the sign and optional hex or oct prefix in text. |
657 | inline bool safe_parse_sign_and_base(absl::string_view* text /*inout*/, |
658 | int* base_ptr /*inout*/, |
659 | bool* negative_ptr /*output*/) { |
660 | if (text->data() == nullptr) { |
661 | return false; |
662 | } |
663 | |
664 | const char* start = text->data(); |
665 | const char* end = start + text->size(); |
666 | int base = *base_ptr; |
667 | |
668 | // Consume whitespace. |
669 | while (start < end && absl::ascii_isspace(start[0])) { |
670 | ++start; |
671 | } |
672 | while (start < end && absl::ascii_isspace(end[-1])) { |
673 | --end; |
674 | } |
675 | if (start >= end) { |
676 | return false; |
677 | } |
678 | |
679 | // Consume sign. |
680 | *negative_ptr = (start[0] == '-'); |
681 | if (*negative_ptr || start[0] == '+') { |
682 | ++start; |
683 | if (start >= end) { |
684 | return false; |
685 | } |
686 | } |
687 | |
688 | // Consume base-dependent prefix. |
689 | // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10 |
690 | // base 16: "0x" -> base 16 |
691 | // Also validate the base. |
692 | if (base == 0) { |
693 | if (end - start >= 2 && start[0] == '0' && |
694 | (start[1] == 'x' || start[1] == 'X')) { |
695 | base = 16; |
696 | start += 2; |
697 | if (start >= end) { |
698 | // "0x" with no digits after is invalid. |
699 | return false; |
700 | } |
701 | } else if (end - start >= 1 && start[0] == '0') { |
702 | base = 8; |
703 | start += 1; |
704 | } else { |
705 | base = 10; |
706 | } |
707 | } else if (base == 16) { |
708 | if (end - start >= 2 && start[0] == '0' && |
709 | (start[1] == 'x' || start[1] == 'X')) { |
710 | start += 2; |
711 | if (start >= end) { |
712 | // "0x" with no digits after is invalid. |
713 | return false; |
714 | } |
715 | } |
716 | } else if (base >= 2 && base <= 36) { |
717 | // okay |
718 | } else { |
719 | return false; |
720 | } |
721 | *text = absl::string_view(start, end - start); |
722 | *base_ptr = base; |
723 | return true; |
724 | } |
725 | |
726 | // Consume digits. |
727 | // |
728 | // The classic loop: |
729 | // |
730 | // for each digit |
731 | // value = value * base + digit |
732 | // value *= sign |
733 | // |
734 | // The classic loop needs overflow checking. It also fails on the most |
735 | // negative integer, -2147483648 in 32-bit two's complement representation. |
736 | // |
737 | // My improved loop: |
738 | // |
739 | // if (!negative) |
740 | // for each digit |
741 | // value = value * base |
742 | // value = value + digit |
743 | // else |
744 | // for each digit |
745 | // value = value * base |
746 | // value = value - digit |
747 | // |
748 | // Overflow checking becomes simple. |
749 | |
750 | // Lookup tables per IntType: |
751 | // vmax/base and vmin/base are precomputed because division costs at least 8ns. |
752 | // TODO(junyer): Doing this per base instead (i.e. an array of structs, not a |
753 | // struct of arrays) would probably be better in terms of d-cache for the most |
754 | // commonly used bases. |
755 | template <typename IntType> |
756 | struct LookupTables { |
757 | static const IntType kVmaxOverBase[]; |
758 | static const IntType kVminOverBase[]; |
759 | }; |
760 | |
761 | // An array initializer macro for X/base where base in [0, 36]. |
762 | // However, note that lookups for base in [0, 1] should never happen because |
763 | // base has been validated to be in [2, 36] by safe_parse_sign_and_base(). |
764 | #define X_OVER_BASE_INITIALIZER(X) \ |
765 | { \ |
766 | 0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \ |
767 | X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18, \ |
768 | X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26, \ |
769 | X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34, \ |
770 | X / 35, X / 36, \ |
771 | } |
772 | |
773 | template <typename IntType> |
774 | const IntType LookupTables<IntType>::kVmaxOverBase[] = |
775 | X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max()); |
776 | |
777 | template <typename IntType> |
778 | const IntType LookupTables<IntType>::kVminOverBase[] = |
779 | X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min()); |
780 | |
781 | #undef X_OVER_BASE_INITIALIZER |
782 | |
783 | template <typename IntType> |
784 | inline bool safe_parse_positive_int(absl::string_view text, int base, |
785 | IntType* value_p) { |
786 | IntType value = 0; |
787 | const IntType vmax = std::numeric_limits<IntType>::max(); |
788 | assert(vmax > 0); |
789 | assert(base >= 0); |
790 | assert(vmax >= static_cast<IntType>(base)); |
791 | const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base]; |
792 | const char* start = text.data(); |
793 | const char* end = start + text.size(); |
794 | // loop over digits |
795 | for (; start < end; ++start) { |
796 | unsigned char c = static_cast<unsigned char>(start[0]); |
797 | int digit = kAsciiToInt[c]; |
798 | if (digit >= base) { |
799 | *value_p = value; |
800 | return false; |
801 | } |
802 | if (value > vmax_over_base) { |
803 | *value_p = vmax; |
804 | return false; |
805 | } |
806 | value *= base; |
807 | if (value > vmax - digit) { |
808 | *value_p = vmax; |
809 | return false; |
810 | } |
811 | value += digit; |
812 | } |
813 | *value_p = value; |
814 | return true; |
815 | } |
816 | |
817 | template <typename IntType> |
818 | inline bool safe_parse_negative_int(absl::string_view text, int base, |
819 | IntType* value_p) { |
820 | IntType value = 0; |
821 | const IntType vmin = std::numeric_limits<IntType>::min(); |
822 | assert(vmin < 0); |
823 | assert(vmin <= 0 - base); |
824 | IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base]; |
825 | // 2003 c++ standard [expr.mul] |
826 | // "... the sign of the remainder is implementation-defined." |
827 | // Although (vmin/base)*base + vmin%base is always vmin. |
828 | // 2011 c++ standard tightens the spec but we cannot rely on it. |
829 | // TODO(junyer): Handle this in the lookup table generation. |
830 | if (vmin % base > 0) { |
831 | vmin_over_base += 1; |
832 | } |
833 | const char* start = text.data(); |
834 | const char* end = start + text.size(); |
835 | // loop over digits |
836 | for (; start < end; ++start) { |
837 | unsigned char c = static_cast<unsigned char>(start[0]); |
838 | int digit = kAsciiToInt[c]; |
839 | if (digit >= base) { |
840 | *value_p = value; |
841 | return false; |
842 | } |
843 | if (value < vmin_over_base) { |
844 | *value_p = vmin; |
845 | return false; |
846 | } |
847 | value *= base; |
848 | if (value < vmin + digit) { |
849 | *value_p = vmin; |
850 | return false; |
851 | } |
852 | value -= digit; |
853 | } |
854 | *value_p = value; |
855 | return true; |
856 | } |
857 | |
858 | // Input format based on POSIX.1-2008 strtol |
859 | // http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html |
860 | template <typename IntType> |
861 | inline bool safe_int_internal(absl::string_view text, IntType* value_p, |
862 | int base) { |
863 | *value_p = 0; |
864 | bool negative; |
865 | if (!safe_parse_sign_and_base(&text, &base, &negative)) { |
866 | return false; |
867 | } |
868 | if (!negative) { |
869 | return safe_parse_positive_int(text, base, value_p); |
870 | } else { |
871 | return safe_parse_negative_int(text, base, value_p); |
872 | } |
873 | } |
874 | |
875 | template <typename IntType> |
876 | inline bool safe_uint_internal(absl::string_view text, IntType* value_p, |
877 | int base) { |
878 | *value_p = 0; |
879 | bool negative; |
880 | if (!safe_parse_sign_and_base(&text, &base, &negative) || negative) { |
881 | return false; |
882 | } |
883 | return safe_parse_positive_int(text, base, value_p); |
884 | } |
885 | } // anonymous namespace |
886 | |
887 | namespace numbers_internal { |
888 | bool safe_strto32_base(absl::string_view text, int32_t* value, int base) { |
889 | return safe_int_internal<int32_t>(text, value, base); |
890 | } |
891 | |
892 | bool safe_strto64_base(absl::string_view text, int64_t* value, int base) { |
893 | return safe_int_internal<int64_t>(text, value, base); |
894 | } |
895 | |
896 | bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) { |
897 | return safe_uint_internal<uint32_t>(text, value, base); |
898 | } |
899 | |
900 | bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base) { |
901 | return safe_uint_internal<uint64_t>(text, value, base); |
902 | } |
903 | } // namespace numbers_internal |
904 | |
905 | } // namespace absl |
906 | |