1// Copyright 2017 The Abseil Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// This file contains string processing functions related to
16// numeric values.
17
18#include "absl/strings/numbers.h"
19
20#include <algorithm>
21#include <cassert>
22#include <cfloat> // for DBL_DIG and FLT_DIG
23#include <cmath> // for HUGE_VAL
24#include <cstdint>
25#include <cstdio>
26#include <cstdlib>
27#include <cstring>
28#include <iterator>
29#include <limits>
30#include <memory>
31#include <utility>
32
33#include "absl/base/internal/bits.h"
34#include "absl/base/internal/raw_logging.h"
35#include "absl/strings/ascii.h"
36#include "absl/strings/charconv.h"
37#include "absl/strings/internal/memutil.h"
38#include "absl/strings/match.h"
39#include "absl/strings/str_cat.h"
40
41namespace absl {
42
43bool SimpleAtof(absl::string_view str, float* out) {
44 *out = 0.0;
45 str = StripAsciiWhitespace(str);
46 if (!str.empty() && str[0] == '+') {
47 str.remove_prefix(1);
48 }
49 auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
50 if (result.ec == std::errc::invalid_argument) {
51 return false;
52 }
53 if (result.ptr != str.data() + str.size()) {
54 // not all non-whitespace characters consumed
55 return false;
56 }
57 // from_chars() with DR 3081's current wording will return max() on
58 // overflow. SimpleAtof returns infinity instead.
59 if (result.ec == std::errc::result_out_of_range) {
60 if (*out > 1.0) {
61 *out = std::numeric_limits<float>::infinity();
62 } else if (*out < -1.0) {
63 *out = -std::numeric_limits<float>::infinity();
64 }
65 }
66 return true;
67}
68
69bool SimpleAtod(absl::string_view str, double* out) {
70 *out = 0.0;
71 str = StripAsciiWhitespace(str);
72 if (!str.empty() && str[0] == '+') {
73 str.remove_prefix(1);
74 }
75 auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
76 if (result.ec == std::errc::invalid_argument) {
77 return false;
78 }
79 if (result.ptr != str.data() + str.size()) {
80 // not all non-whitespace characters consumed
81 return false;
82 }
83 // from_chars() with DR 3081's current wording will return max() on
84 // overflow. SimpleAtod returns infinity instead.
85 if (result.ec == std::errc::result_out_of_range) {
86 if (*out > 1.0) {
87 *out = std::numeric_limits<double>::infinity();
88 } else if (*out < -1.0) {
89 *out = -std::numeric_limits<double>::infinity();
90 }
91 }
92 return true;
93}
94
95namespace {
96
97// Writes a two-character representation of 'i' to 'buf'. 'i' must be in the
98// range 0 <= i < 100, and buf must have space for two characters. Example:
99// char buf[2];
100// PutTwoDigits(42, buf);
101// // buf[0] == '4'
102// // buf[1] == '2'
103inline void PutTwoDigits(size_t i, char* buf) {
104 static const char two_ASCII_digits[100][2] = {
105 {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'},
106 {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'},
107 {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'},
108 {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'},
109 {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'},
110 {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
111 {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'},
112 {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'},
113 {'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'},
114 {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'},
115 {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'},
116 {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
117 {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'},
118 {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'},
119 {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'},
120 {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'},
121 {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'},
122 {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
123 {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'},
124 {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}
125 };
126 assert(i < 100);
127 memcpy(buf, two_ASCII_digits[i], 2);
128}
129
130} // namespace
131
132bool SimpleAtob(absl::string_view str, bool* out) {
133 ABSL_RAW_CHECK(out != nullptr, "Output pointer must not be nullptr.");
134 if (EqualsIgnoreCase(str, "true") || EqualsIgnoreCase(str, "t") ||
135 EqualsIgnoreCase(str, "yes") || EqualsIgnoreCase(str, "y") ||
136 EqualsIgnoreCase(str, "1")) {
137 *out = true;
138 return true;
139 }
140 if (EqualsIgnoreCase(str, "false") || EqualsIgnoreCase(str, "f") ||
141 EqualsIgnoreCase(str, "no") || EqualsIgnoreCase(str, "n") ||
142 EqualsIgnoreCase(str, "0")) {
143 *out = false;
144 return true;
145 }
146 return false;
147}
148
149// ----------------------------------------------------------------------
150// FastIntToBuffer() overloads
151//
152// Like the Fast*ToBuffer() functions above, these are intended for speed.
153// Unlike the Fast*ToBuffer() functions, however, these functions write
154// their output to the beginning of the buffer. The caller is responsible
155// for ensuring that the buffer has enough space to hold the output.
156//
157// Returns a pointer to the end of the string (i.e. the null character
158// terminating the string).
159// ----------------------------------------------------------------------
160
161namespace {
162
163// Used to optimize printing a decimal number's final digit.
164const char one_ASCII_final_digits[10][2] {
165 {'0', 0}, {'1', 0}, {'2', 0}, {'3', 0}, {'4', 0},
166 {'5', 0}, {'6', 0}, {'7', 0}, {'8', 0}, {'9', 0},
167};
168
169} // namespace
170
171char* numbers_internal::FastIntToBuffer(uint32_t i, char* buffer) {
172 uint32_t digits;
173 // The idea of this implementation is to trim the number of divides to as few
174 // as possible, and also reducing memory stores and branches, by going in
175 // steps of two digits at a time rather than one whenever possible.
176 // The huge-number case is first, in the hopes that the compiler will output
177 // that case in one branch-free block of code, and only output conditional
178 // branches into it from below.
179 if (i >= 1000000000) { // >= 1,000,000,000
180 digits = i / 100000000; // 100,000,000
181 i -= digits * 100000000;
182 PutTwoDigits(digits, buffer);
183 buffer += 2;
184 lt100_000_000:
185 digits = i / 1000000; // 1,000,000
186 i -= digits * 1000000;
187 PutTwoDigits(digits, buffer);
188 buffer += 2;
189 lt1_000_000:
190 digits = i / 10000; // 10,000
191 i -= digits * 10000;
192 PutTwoDigits(digits, buffer);
193 buffer += 2;
194 lt10_000:
195 digits = i / 100;
196 i -= digits * 100;
197 PutTwoDigits(digits, buffer);
198 buffer += 2;
199 lt100:
200 digits = i;
201 PutTwoDigits(digits, buffer);
202 buffer += 2;
203 *buffer = 0;
204 return buffer;
205 }
206
207 if (i < 100) {
208 digits = i;
209 if (i >= 10) goto lt100;
210 memcpy(buffer, one_ASCII_final_digits[i], 2);
211 return buffer + 1;
212 }
213 if (i < 10000) { // 10,000
214 if (i >= 1000) goto lt10_000;
215 digits = i / 100;
216 i -= digits * 100;
217 *buffer++ = '0' + digits;
218 goto lt100;
219 }
220 if (i < 1000000) { // 1,000,000
221 if (i >= 100000) goto lt1_000_000;
222 digits = i / 10000; // 10,000
223 i -= digits * 10000;
224 *buffer++ = '0' + digits;
225 goto lt10_000;
226 }
227 if (i < 100000000) { // 100,000,000
228 if (i >= 10000000) goto lt100_000_000;
229 digits = i / 1000000; // 1,000,000
230 i -= digits * 1000000;
231 *buffer++ = '0' + digits;
232 goto lt1_000_000;
233 }
234 // we already know that i < 1,000,000,000
235 digits = i / 100000000; // 100,000,000
236 i -= digits * 100000000;
237 *buffer++ = '0' + digits;
238 goto lt100_000_000;
239}
240
241char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) {
242 uint32_t u = i;
243 if (i < 0) {
244 *buffer++ = '-';
245 // We need to do the negation in modular (i.e., "unsigned")
246 // arithmetic; MSVC++ apprently warns for plain "-u", so
247 // we write the equivalent expression "0 - u" instead.
248 u = 0 - u;
249 }
250 return numbers_internal::FastIntToBuffer(u, buffer);
251}
252
253char* numbers_internal::FastIntToBuffer(uint64_t i, char* buffer) {
254 uint32_t u32 = static_cast<uint32_t>(i);
255 if (u32 == i) return numbers_internal::FastIntToBuffer(u32, buffer);
256
257 // Here we know i has at least 10 decimal digits.
258 uint64_t top_1to11 = i / 1000000000;
259 u32 = static_cast<uint32_t>(i - top_1to11 * 1000000000);
260 uint32_t top_1to11_32 = static_cast<uint32_t>(top_1to11);
261
262 if (top_1to11_32 == top_1to11) {
263 buffer = numbers_internal::FastIntToBuffer(top_1to11_32, buffer);
264 } else {
265 // top_1to11 has more than 32 bits too; print it in two steps.
266 uint32_t top_8to9 = static_cast<uint32_t>(top_1to11 / 100);
267 uint32_t mid_2 = static_cast<uint32_t>(top_1to11 - top_8to9 * 100);
268 buffer = numbers_internal::FastIntToBuffer(top_8to9, buffer);
269 PutTwoDigits(mid_2, buffer);
270 buffer += 2;
271 }
272
273 // We have only 9 digits now, again the maximum uint32_t can handle fully.
274 uint32_t digits = u32 / 10000000; // 10,000,000
275 u32 -= digits * 10000000;
276 PutTwoDigits(digits, buffer);
277 buffer += 2;
278 digits = u32 / 100000; // 100,000
279 u32 -= digits * 100000;
280 PutTwoDigits(digits, buffer);
281 buffer += 2;
282 digits = u32 / 1000; // 1,000
283 u32 -= digits * 1000;
284 PutTwoDigits(digits, buffer);
285 buffer += 2;
286 digits = u32 / 10;
287 u32 -= digits * 10;
288 PutTwoDigits(digits, buffer);
289 buffer += 2;
290 memcpy(buffer, one_ASCII_final_digits[u32], 2);
291 return buffer + 1;
292}
293
294char* numbers_internal::FastIntToBuffer(int64_t i, char* buffer) {
295 uint64_t u = i;
296 if (i < 0) {
297 *buffer++ = '-';
298 u = 0 - u;
299 }
300 return numbers_internal::FastIntToBuffer(u, buffer);
301}
302
303// Given a 128-bit number expressed as a pair of uint64_t, high half first,
304// return that number multiplied by the given 32-bit value. If the result is
305// too large to fit in a 128-bit number, divide it by 2 until it fits.
306static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num,
307 uint32_t mul) {
308 uint64_t bits0_31 = num.second & 0xFFFFFFFF;
309 uint64_t bits32_63 = num.second >> 32;
310 uint64_t bits64_95 = num.first & 0xFFFFFFFF;
311 uint64_t bits96_127 = num.first >> 32;
312
313 // The picture so far: each of these 64-bit values has only the lower 32 bits
314 // filled in.
315 // bits96_127: [ 00000000 xxxxxxxx ]
316 // bits64_95: [ 00000000 xxxxxxxx ]
317 // bits32_63: [ 00000000 xxxxxxxx ]
318 // bits0_31: [ 00000000 xxxxxxxx ]
319
320 bits0_31 *= mul;
321 bits32_63 *= mul;
322 bits64_95 *= mul;
323 bits96_127 *= mul;
324
325 // Now the top halves may also have value, though all 64 of their bits will
326 // never be set at the same time, since they are a result of a 32x32 bit
327 // multiply. This makes the carry calculation slightly easier.
328 // bits96_127: [ mmmmmmmm | mmmmmmmm ]
329 // bits64_95: [ | mmmmmmmm mmmmmmmm | ]
330 // bits32_63: | [ mmmmmmmm | mmmmmmmm ]
331 // bits0_31: | [ | mmmmmmmm mmmmmmmm ]
332 // eventually: [ bits128_up | ...bits64_127.... | ..bits0_63... ]
333
334 uint64_t bits0_63 = bits0_31 + (bits32_63 << 32);
335 uint64_t bits64_127 = bits64_95 + (bits96_127 << 32) + (bits32_63 >> 32) +
336 (bits0_63 < bits0_31);
337 uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95);
338 if (bits128_up == 0) return {bits64_127, bits0_63};
339
340 int shift = 64 - base_internal::CountLeadingZeros64(bits128_up);
341 uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift));
342 uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift));
343 return {hi, lo};
344}
345
346// Compute num * 5 ^ expfive, and return the first 128 bits of the result,
347// where the first bit is always a one. So PowFive(1, 0) starts 0b100000,
348// PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc.
349static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) {
350 std::pair<uint64_t, uint64_t> result = {num, 0};
351 while (expfive >= 13) {
352 // 5^13 is the highest power of five that will fit in a 32-bit integer.
353 result = Mul32(result, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5);
354 expfive -= 13;
355 }
356 constexpr int powers_of_five[13] = {
357 1,
358 5,
359 5 * 5,
360 5 * 5 * 5,
361 5 * 5 * 5 * 5,
362 5 * 5 * 5 * 5 * 5,
363 5 * 5 * 5 * 5 * 5 * 5,
364 5 * 5 * 5 * 5 * 5 * 5 * 5,
365 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
366 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
367 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
368 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
369 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5};
370 result = Mul32(result, powers_of_five[expfive & 15]);
371 int shift = base_internal::CountLeadingZeros64(result.first);
372 if (shift != 0) {
373 result.first = (result.first << shift) + (result.second >> (64 - shift));
374 result.second = (result.second << shift);
375 }
376 return result;
377}
378
379struct ExpDigits {
380 int32_t exponent;
381 char digits[6];
382};
383
384// SplitToSix converts value, a positive double-precision floating-point number,
385// into a base-10 exponent and 6 ASCII digits, where the first digit is never
386// zero. For example, SplitToSix(1) returns an exponent of zero and a digits
387// array of {'1', '0', '0', '0', '0', '0'}. If value is exactly halfway between
388// two possible representations, e.g. value = 100000.5, then "round to even" is
389// performed.
390static ExpDigits SplitToSix(const double value) {
391 ExpDigits exp_dig;
392 int exp = 5;
393 double d = value;
394 // First step: calculate a close approximation of the output, where the
395 // value d will be between 100,000 and 999,999, representing the digits
396 // in the output ASCII array, and exp is the base-10 exponent. It would be
397 // faster to use a table here, and to look up the base-2 exponent of value,
398 // however value is an IEEE-754 64-bit number, so the table would have 2,000
399 // entries, which is not cache-friendly.
400 if (d >= 999999.5) {
401 if (d >= 1e+261) exp += 256, d *= 1e-256;
402 if (d >= 1e+133) exp += 128, d *= 1e-128;
403 if (d >= 1e+69) exp += 64, d *= 1e-64;
404 if (d >= 1e+37) exp += 32, d *= 1e-32;
405 if (d >= 1e+21) exp += 16, d *= 1e-16;
406 if (d >= 1e+13) exp += 8, d *= 1e-8;
407 if (d >= 1e+9) exp += 4, d *= 1e-4;
408 if (d >= 1e+7) exp += 2, d *= 1e-2;
409 if (d >= 1e+6) exp += 1, d *= 1e-1;
410 } else {
411 if (d < 1e-250) exp -= 256, d *= 1e256;
412 if (d < 1e-122) exp -= 128, d *= 1e128;
413 if (d < 1e-58) exp -= 64, d *= 1e64;
414 if (d < 1e-26) exp -= 32, d *= 1e32;
415 if (d < 1e-10) exp -= 16, d *= 1e16;
416 if (d < 1e-2) exp -= 8, d *= 1e8;
417 if (d < 1e+2) exp -= 4, d *= 1e4;
418 if (d < 1e+4) exp -= 2, d *= 1e2;
419 if (d < 1e+5) exp -= 1, d *= 1e1;
420 }
421 // At this point, d is in the range [99999.5..999999.5) and exp is in the
422 // range [-324..308]. Since we need to round d up, we want to add a half
423 // and truncate.
424 // However, the technique above may have lost some precision, due to its
425 // repeated multiplication by constants that each may be off by half a bit
426 // of precision. This only matters if we're close to the edge though.
427 // Since we'd like to know if the fractional part of d is close to a half,
428 // we multiply it by 65536 and see if the fractional part is close to 32768.
429 // (The number doesn't have to be a power of two,but powers of two are faster)
430 uint64_t d64k = d * 65536;
431 int dddddd; // A 6-digit decimal integer.
432 if ((d64k % 65536) == 32767 || (d64k % 65536) == 32768) {
433 // OK, it's fairly likely that precision was lost above, which is
434 // not a surprise given only 52 mantissa bits are available. Therefore
435 // redo the calculation using 128-bit numbers. (64 bits are not enough).
436
437 // Start out with digits rounded down; maybe add one below.
438 dddddd = static_cast<int>(d64k / 65536);
439
440 // mantissa is a 64-bit integer representing M.mmm... * 2^63. The actual
441 // value we're representing, of course, is M.mmm... * 2^exp2.
442 int exp2;
443 double m = std::frexp(value, &exp2);
444 uint64_t mantissa = m * (32768.0 * 65536.0 * 65536.0 * 65536.0);
445 // std::frexp returns an m value in the range [0.5, 1.0), however we
446 // can't multiply it by 2^64 and convert to an integer because some FPUs
447 // throw an exception when converting an number higher than 2^63 into an
448 // integer - even an unsigned 64-bit integer! Fortunately it doesn't matter
449 // since m only has 52 significant bits anyway.
450 mantissa <<= 1;
451 exp2 -= 64; // not needed, but nice for debugging
452
453 // OK, we are here to compare:
454 // (dddddd + 0.5) * 10^(exp-5) vs. mantissa * 2^exp2
455 // so we can round up dddddd if appropriate. Those values span the full
456 // range of 600 orders of magnitude of IEE 64-bit floating-point.
457 // Fortunately, we already know they are very close, so we don't need to
458 // track the base-2 exponent of both sides. This greatly simplifies the
459 // the math since the 2^exp2 calculation is unnecessary and the power-of-10
460 // calculation can become a power-of-5 instead.
461
462 std::pair<uint64_t, uint64_t> edge, val;
463 if (exp >= 6) {
464 // Compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa
465 // Since we're tossing powers of two, 2 * dddddd + 1 is the
466 // same as dddddd + 0.5
467 edge = PowFive(2 * dddddd + 1, exp - 5);
468
469 val.first = mantissa;
470 val.second = 0;
471 } else {
472 // We can't compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa as we did
473 // above because (exp - 5) is negative. So we compare (dddddd + 0.5) to
474 // mantissa * 5 ^ (5 - exp)
475 edge = PowFive(2 * dddddd + 1, 0);
476
477 val = PowFive(mantissa, 5 - exp);
478 }
479 // printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first,
480 // val.second, edge.first, edge.second);
481 if (val > edge) {
482 dddddd++;
483 } else if (val == edge) {
484 dddddd += (dddddd & 1);
485 }
486 } else {
487 // Here, we are not close to the edge.
488 dddddd = static_cast<int>((d64k + 32768) / 65536);
489 }
490 if (dddddd == 1000000) {
491 dddddd = 100000;
492 exp += 1;
493 }
494 exp_dig.exponent = exp;
495
496 int two_digits = dddddd / 10000;
497 dddddd -= two_digits * 10000;
498 PutTwoDigits(two_digits, &exp_dig.digits[0]);
499
500 two_digits = dddddd / 100;
501 dddddd -= two_digits * 100;
502 PutTwoDigits(two_digits, &exp_dig.digits[2]);
503
504 PutTwoDigits(dddddd, &exp_dig.digits[4]);
505 return exp_dig;
506}
507
508// Helper function for fast formatting of floating-point.
509// The result is the same as "%g", a.k.a. "%.6g".
510size_t numbers_internal::SixDigitsToBuffer(double d, char* const buffer) {
511 static_assert(std::numeric_limits<float>::is_iec559,
512 "IEEE-754/IEC-559 support only");
513
514 char* out = buffer; // we write data to out, incrementing as we go, but
515 // FloatToBuffer always returns the address of the buffer
516 // passed in.
517
518 if (std::isnan(d)) {
519 strcpy(out, "nan"); // NOLINT(runtime/printf)
520 return 3;
521 }
522 if (d == 0) { // +0 and -0 are handled here
523 if (std::signbit(d)) *out++ = '-';
524 *out++ = '0';
525 *out = 0;
526 return out - buffer;
527 }
528 if (d < 0) {
529 *out++ = '-';
530 d = -d;
531 }
532 if (std::isinf(d)) {
533 strcpy(out, "inf"); // NOLINT(runtime/printf)
534 return out + 3 - buffer;
535 }
536
537 auto exp_dig = SplitToSix(d);
538 int exp = exp_dig.exponent;
539 const char* digits = exp_dig.digits;
540 out[0] = '0';
541 out[1] = '.';
542 switch (exp) {
543 case 5:
544 memcpy(out, &digits[0], 6), out += 6;
545 *out = 0;
546 return out - buffer;
547 case 4:
548 memcpy(out, &digits[0], 5), out += 5;
549 if (digits[5] != '0') {
550 *out++ = '.';
551 *out++ = digits[5];
552 }
553 *out = 0;
554 return out - buffer;
555 case 3:
556 memcpy(out, &digits[0], 4), out += 4;
557 if ((digits[5] | digits[4]) != '0') {
558 *out++ = '.';
559 *out++ = digits[4];
560 if (digits[5] != '0') *out++ = digits[5];
561 }
562 *out = 0;
563 return out - buffer;
564 case 2:
565 memcpy(out, &digits[0], 3), out += 3;
566 *out++ = '.';
567 memcpy(out, &digits[3], 3);
568 out += 3;
569 while (out[-1] == '0') --out;
570 if (out[-1] == '.') --out;
571 *out = 0;
572 return out - buffer;
573 case 1:
574 memcpy(out, &digits[0], 2), out += 2;
575 *out++ = '.';
576 memcpy(out, &digits[2], 4);
577 out += 4;
578 while (out[-1] == '0') --out;
579 if (out[-1] == '.') --out;
580 *out = 0;
581 return out - buffer;
582 case 0:
583 memcpy(out, &digits[0], 1), out += 1;
584 *out++ = '.';
585 memcpy(out, &digits[1], 5);
586 out += 5;
587 while (out[-1] == '0') --out;
588 if (out[-1] == '.') --out;
589 *out = 0;
590 return out - buffer;
591 case -4:
592 out[2] = '0';
593 ++out;
594 ABSL_FALLTHROUGH_INTENDED;
595 case -3:
596 out[2] = '0';
597 ++out;
598 ABSL_FALLTHROUGH_INTENDED;
599 case -2:
600 out[2] = '0';
601 ++out;
602 ABSL_FALLTHROUGH_INTENDED;
603 case -1:
604 out += 2;
605 memcpy(out, &digits[0], 6);
606 out += 6;
607 while (out[-1] == '0') --out;
608 *out = 0;
609 return out - buffer;
610 }
611 assert(exp < -4 || exp >= 6);
612 out[0] = digits[0];
613 assert(out[1] == '.');
614 out += 2;
615 memcpy(out, &digits[1], 5), out += 5;
616 while (out[-1] == '0') --out;
617 if (out[-1] == '.') --out;
618 *out++ = 'e';
619 if (exp > 0) {
620 *out++ = '+';
621 } else {
622 *out++ = '-';
623 exp = -exp;
624 }
625 if (exp > 99) {
626 int dig1 = exp / 100;
627 exp -= dig1 * 100;
628 *out++ = '0' + dig1;
629 }
630 PutTwoDigits(exp, out);
631 out += 2;
632 *out = 0;
633 return out - buffer;
634}
635
636namespace {
637// Represents integer values of digits.
638// Uses 36 to indicate an invalid character since we support
639// bases up to 36.
640static const int8_t kAsciiToInt[256] = {
641 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s.
642 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
643 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 1, 2, 3, 4, 5,
644 6, 7, 8, 9, 36, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17,
645 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
646 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
647 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36,
648 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
649 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
650 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
651 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
652 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
653 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
654 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
655
656// Parse the sign and optional hex or oct prefix in text.
657inline bool safe_parse_sign_and_base(absl::string_view* text /*inout*/,
658 int* base_ptr /*inout*/,
659 bool* negative_ptr /*output*/) {
660 if (text->data() == nullptr) {
661 return false;
662 }
663
664 const char* start = text->data();
665 const char* end = start + text->size();
666 int base = *base_ptr;
667
668 // Consume whitespace.
669 while (start < end && absl::ascii_isspace(start[0])) {
670 ++start;
671 }
672 while (start < end && absl::ascii_isspace(end[-1])) {
673 --end;
674 }
675 if (start >= end) {
676 return false;
677 }
678
679 // Consume sign.
680 *negative_ptr = (start[0] == '-');
681 if (*negative_ptr || start[0] == '+') {
682 ++start;
683 if (start >= end) {
684 return false;
685 }
686 }
687
688 // Consume base-dependent prefix.
689 // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
690 // base 16: "0x" -> base 16
691 // Also validate the base.
692 if (base == 0) {
693 if (end - start >= 2 && start[0] == '0' &&
694 (start[1] == 'x' || start[1] == 'X')) {
695 base = 16;
696 start += 2;
697 if (start >= end) {
698 // "0x" with no digits after is invalid.
699 return false;
700 }
701 } else if (end - start >= 1 && start[0] == '0') {
702 base = 8;
703 start += 1;
704 } else {
705 base = 10;
706 }
707 } else if (base == 16) {
708 if (end - start >= 2 && start[0] == '0' &&
709 (start[1] == 'x' || start[1] == 'X')) {
710 start += 2;
711 if (start >= end) {
712 // "0x" with no digits after is invalid.
713 return false;
714 }
715 }
716 } else if (base >= 2 && base <= 36) {
717 // okay
718 } else {
719 return false;
720 }
721 *text = absl::string_view(start, end - start);
722 *base_ptr = base;
723 return true;
724}
725
726// Consume digits.
727//
728// The classic loop:
729//
730// for each digit
731// value = value * base + digit
732// value *= sign
733//
734// The classic loop needs overflow checking. It also fails on the most
735// negative integer, -2147483648 in 32-bit two's complement representation.
736//
737// My improved loop:
738//
739// if (!negative)
740// for each digit
741// value = value * base
742// value = value + digit
743// else
744// for each digit
745// value = value * base
746// value = value - digit
747//
748// Overflow checking becomes simple.
749
750// Lookup tables per IntType:
751// vmax/base and vmin/base are precomputed because division costs at least 8ns.
752// TODO(junyer): Doing this per base instead (i.e. an array of structs, not a
753// struct of arrays) would probably be better in terms of d-cache for the most
754// commonly used bases.
755template <typename IntType>
756struct LookupTables {
757 static const IntType kVmaxOverBase[];
758 static const IntType kVminOverBase[];
759};
760
761// An array initializer macro for X/base where base in [0, 36].
762// However, note that lookups for base in [0, 1] should never happen because
763// base has been validated to be in [2, 36] by safe_parse_sign_and_base().
764#define X_OVER_BASE_INITIALIZER(X) \
765 { \
766 0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \
767 X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18, \
768 X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26, \
769 X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34, \
770 X / 35, X / 36, \
771 }
772
773template <typename IntType>
774const IntType LookupTables<IntType>::kVmaxOverBase[] =
775 X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max());
776
777template <typename IntType>
778const IntType LookupTables<IntType>::kVminOverBase[] =
779 X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min());
780
781#undef X_OVER_BASE_INITIALIZER
782
783template <typename IntType>
784inline bool safe_parse_positive_int(absl::string_view text, int base,
785 IntType* value_p) {
786 IntType value = 0;
787 const IntType vmax = std::numeric_limits<IntType>::max();
788 assert(vmax > 0);
789 assert(base >= 0);
790 assert(vmax >= static_cast<IntType>(base));
791 const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base];
792 const char* start = text.data();
793 const char* end = start + text.size();
794 // loop over digits
795 for (; start < end; ++start) {
796 unsigned char c = static_cast<unsigned char>(start[0]);
797 int digit = kAsciiToInt[c];
798 if (digit >= base) {
799 *value_p = value;
800 return false;
801 }
802 if (value > vmax_over_base) {
803 *value_p = vmax;
804 return false;
805 }
806 value *= base;
807 if (value > vmax - digit) {
808 *value_p = vmax;
809 return false;
810 }
811 value += digit;
812 }
813 *value_p = value;
814 return true;
815}
816
817template <typename IntType>
818inline bool safe_parse_negative_int(absl::string_view text, int base,
819 IntType* value_p) {
820 IntType value = 0;
821 const IntType vmin = std::numeric_limits<IntType>::min();
822 assert(vmin < 0);
823 assert(vmin <= 0 - base);
824 IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base];
825 // 2003 c++ standard [expr.mul]
826 // "... the sign of the remainder is implementation-defined."
827 // Although (vmin/base)*base + vmin%base is always vmin.
828 // 2011 c++ standard tightens the spec but we cannot rely on it.
829 // TODO(junyer): Handle this in the lookup table generation.
830 if (vmin % base > 0) {
831 vmin_over_base += 1;
832 }
833 const char* start = text.data();
834 const char* end = start + text.size();
835 // loop over digits
836 for (; start < end; ++start) {
837 unsigned char c = static_cast<unsigned char>(start[0]);
838 int digit = kAsciiToInt[c];
839 if (digit >= base) {
840 *value_p = value;
841 return false;
842 }
843 if (value < vmin_over_base) {
844 *value_p = vmin;
845 return false;
846 }
847 value *= base;
848 if (value < vmin + digit) {
849 *value_p = vmin;
850 return false;
851 }
852 value -= digit;
853 }
854 *value_p = value;
855 return true;
856}
857
858// Input format based on POSIX.1-2008 strtol
859// http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
860template <typename IntType>
861inline bool safe_int_internal(absl::string_view text, IntType* value_p,
862 int base) {
863 *value_p = 0;
864 bool negative;
865 if (!safe_parse_sign_and_base(&text, &base, &negative)) {
866 return false;
867 }
868 if (!negative) {
869 return safe_parse_positive_int(text, base, value_p);
870 } else {
871 return safe_parse_negative_int(text, base, value_p);
872 }
873}
874
875template <typename IntType>
876inline bool safe_uint_internal(absl::string_view text, IntType* value_p,
877 int base) {
878 *value_p = 0;
879 bool negative;
880 if (!safe_parse_sign_and_base(&text, &base, &negative) || negative) {
881 return false;
882 }
883 return safe_parse_positive_int(text, base, value_p);
884}
885} // anonymous namespace
886
887namespace numbers_internal {
888bool safe_strto32_base(absl::string_view text, int32_t* value, int base) {
889 return safe_int_internal<int32_t>(text, value, base);
890}
891
892bool safe_strto64_base(absl::string_view text, int64_t* value, int base) {
893 return safe_int_internal<int64_t>(text, value, base);
894}
895
896bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) {
897 return safe_uint_internal<uint32_t>(text, value, base);
898}
899
900bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base) {
901 return safe_uint_internal<uint64_t>(text, value, base);
902}
903} // namespace numbers_internal
904
905} // namespace absl
906