| 1 | // Copyright 2018 The Abseil Authors. | 
|---|
| 2 | // | 
|---|
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|---|
| 4 | // you may not use this file except in compliance with the License. | 
|---|
| 5 | // You may obtain a copy of the License at | 
|---|
| 6 | // | 
|---|
| 7 | //      https://www.apache.org/licenses/LICENSE-2.0 | 
|---|
| 8 | // | 
|---|
| 9 | // Unless required by applicable law or agreed to in writing, software | 
|---|
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, | 
|---|
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|---|
| 12 | // See the License for the specific language governing permissions and | 
|---|
| 13 | // limitations under the License. | 
|---|
| 14 |  | 
|---|
| 15 | #include "absl/strings/internal/charconv_parse.h" | 
|---|
| 16 | #include "absl/strings/charconv.h" | 
|---|
| 17 |  | 
|---|
| 18 | #include <cassert> | 
|---|
| 19 | #include <cstdint> | 
|---|
| 20 | #include <limits> | 
|---|
| 21 |  | 
|---|
| 22 | #include "absl/strings/internal/memutil.h" | 
|---|
| 23 |  | 
|---|
| 24 | namespace absl { | 
|---|
| 25 | namespace { | 
|---|
| 26 |  | 
|---|
| 27 | // ParseFloat<10> will read the first 19 significant digits of the mantissa. | 
|---|
| 28 | // This number was chosen for multiple reasons. | 
|---|
| 29 | // | 
|---|
| 30 | // (a) First, for whatever integer type we choose to represent the mantissa, we | 
|---|
| 31 | // want to choose the largest possible number of decimal digits for that integer | 
|---|
| 32 | // type.  We are using uint64_t, which can express any 19-digit unsigned | 
|---|
| 33 | // integer. | 
|---|
| 34 | // | 
|---|
| 35 | // (b) Second, we need to parse enough digits that the binary value of any | 
|---|
| 36 | // mantissa we capture has more bits of resolution than the mantissa | 
|---|
| 37 | // representation in the target float.  Our algorithm requires at least 3 bits | 
|---|
| 38 | // of headway, but 19 decimal digits give a little more than that. | 
|---|
| 39 | // | 
|---|
| 40 | // The following static assertions verify the above comments: | 
|---|
| 41 | constexpr int kDecimalMantissaDigitsMax = 19; | 
|---|
| 42 |  | 
|---|
| 43 | static_assert(std::numeric_limits<uint64_t>::digits10 == | 
|---|
| 44 | kDecimalMantissaDigitsMax, | 
|---|
| 45 | "(a) above"); | 
|---|
| 46 |  | 
|---|
| 47 | // IEEE doubles, which we assume in Abseil, have 53 binary bits of mantissa. | 
|---|
| 48 | static_assert(std::numeric_limits<double>::is_iec559, "IEEE double assumed"); | 
|---|
| 49 | static_assert(std::numeric_limits<double>::radix == 2, "IEEE double fact"); | 
|---|
| 50 | static_assert(std::numeric_limits<double>::digits == 53, "IEEE double fact"); | 
|---|
| 51 |  | 
|---|
| 52 | // The lowest valued 19-digit decimal mantissa we can read still contains | 
|---|
| 53 | // sufficient information to reconstruct a binary mantissa. | 
|---|
| 54 | static_assert(1000000000000000000u > (uint64_t(1) << (53 + 3)), "(b) above"); | 
|---|
| 55 |  | 
|---|
| 56 | // ParseFloat<16> will read the first 15 significant digits of the mantissa. | 
|---|
| 57 | // | 
|---|
| 58 | // Because a base-16-to-base-2 conversion can be done exactly, we do not need | 
|---|
| 59 | // to maximize the number of scanned hex digits to improve our conversion.  What | 
|---|
| 60 | // is required is to scan two more bits than the mantissa can represent, so that | 
|---|
| 61 | // we always round correctly. | 
|---|
| 62 | // | 
|---|
| 63 | // (One extra bit does not suffice to perform correct rounding, since a number | 
|---|
| 64 | // exactly halfway between two representable floats has unique rounding rules, | 
|---|
| 65 | // so we need to differentiate between a "halfway between" number and a "closer | 
|---|
| 66 | // to the larger value" number.) | 
|---|
| 67 | constexpr int kHexadecimalMantissaDigitsMax = 15; | 
|---|
| 68 |  | 
|---|
| 69 | // The minimum number of significant bits that will be read from | 
|---|
| 70 | // kHexadecimalMantissaDigitsMax hex digits.  We must subtract by three, since | 
|---|
| 71 | // the most significant digit can be a "1", which only contributes a single | 
|---|
| 72 | // significant bit. | 
|---|
| 73 | constexpr int kGuaranteedHexadecimalMantissaBitPrecision = | 
|---|
| 74 | 4 * kHexadecimalMantissaDigitsMax - 3; | 
|---|
| 75 |  | 
|---|
| 76 | static_assert(kGuaranteedHexadecimalMantissaBitPrecision > | 
|---|
| 77 | std::numeric_limits<double>::digits + 2, | 
|---|
| 78 | "kHexadecimalMantissaDigitsMax too small"); | 
|---|
| 79 |  | 
|---|
| 80 | // We also impose a limit on the number of significant digits we will read from | 
|---|
| 81 | // an exponent, to avoid having to deal with integer overflow.  We use 9 for | 
|---|
| 82 | // this purpose. | 
|---|
| 83 | // | 
|---|
| 84 | // If we read a 9 digit exponent, the end result of the conversion will | 
|---|
| 85 | // necessarily be infinity or zero, depending on the sign of the exponent. | 
|---|
| 86 | // Therefore we can just drop extra digits on the floor without any extra | 
|---|
| 87 | // logic. | 
|---|
| 88 | constexpr int kDecimalExponentDigitsMax = 9; | 
|---|
| 89 | static_assert(std::numeric_limits<int>::digits10 >= kDecimalExponentDigitsMax, | 
|---|
| 90 | "int type too small"); | 
|---|
| 91 |  | 
|---|
| 92 | // To avoid incredibly large inputs causing integer overflow for our exponent, | 
|---|
| 93 | // we impose an arbitrary but very large limit on the number of significant | 
|---|
| 94 | // digits we will accept.  The implementation refuses to match a string with | 
|---|
| 95 | // more consecutive significant mantissa digits than this. | 
|---|
| 96 | constexpr int kDecimalDigitLimit = 50000000; | 
|---|
| 97 |  | 
|---|
| 98 | // Corresponding limit for hexadecimal digit inputs.  This is one fourth the | 
|---|
| 99 | // amount of kDecimalDigitLimit, since each dropped hexadecimal digit requires | 
|---|
| 100 | // a binary exponent adjustment of 4. | 
|---|
| 101 | constexpr int kHexadecimalDigitLimit = kDecimalDigitLimit / 4; | 
|---|
| 102 |  | 
|---|
| 103 | // The largest exponent we can read is 999999999 (per | 
|---|
| 104 | // kDecimalExponentDigitsMax), and the largest exponent adjustment we can get | 
|---|
| 105 | // from dropped mantissa digits is 2 * kDecimalDigitLimit, and the sum of these | 
|---|
| 106 | // comfortably fits in an integer. | 
|---|
| 107 | // | 
|---|
| 108 | // We count kDecimalDigitLimit twice because there are independent limits for | 
|---|
| 109 | // numbers before and after the decimal point.  (In the case where there are no | 
|---|
| 110 | // significant digits before the decimal point, there are independent limits for | 
|---|
| 111 | // post-decimal-point leading zeroes and for significant digits.) | 
|---|
| 112 | static_assert(999999999 + 2 * kDecimalDigitLimit < | 
|---|
| 113 | std::numeric_limits<int>::max(), | 
|---|
| 114 | "int type too small"); | 
|---|
| 115 | static_assert(999999999 + 2 * (4 * kHexadecimalDigitLimit) < | 
|---|
| 116 | std::numeric_limits<int>::max(), | 
|---|
| 117 | "int type too small"); | 
|---|
| 118 |  | 
|---|
| 119 | // Returns true if the provided bitfield allows parsing an exponent value | 
|---|
| 120 | // (e.g., "1.5e100"). | 
|---|
| 121 | bool AllowExponent(chars_format flags) { | 
|---|
| 122 | bool fixed = (flags & chars_format::fixed) == chars_format::fixed; | 
|---|
| 123 | bool scientific = | 
|---|
| 124 | (flags & chars_format::scientific) == chars_format::scientific; | 
|---|
| 125 | return scientific || !fixed; | 
|---|
| 126 | } | 
|---|
| 127 |  | 
|---|
| 128 | // Returns true if the provided bitfield requires an exponent value be present. | 
|---|
| 129 | bool RequireExponent(chars_format flags) { | 
|---|
| 130 | bool fixed = (flags & chars_format::fixed) == chars_format::fixed; | 
|---|
| 131 | bool scientific = | 
|---|
| 132 | (flags & chars_format::scientific) == chars_format::scientific; | 
|---|
| 133 | return scientific && !fixed; | 
|---|
| 134 | } | 
|---|
| 135 |  | 
|---|
| 136 | const int8_t kAsciiToInt[256] = { | 
|---|
| 137 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
|---|
| 138 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
|---|
| 139 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0,  1,  2,  3,  4,  5,  6,  7,  8, | 
|---|
| 140 | 9,  -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, | 
|---|
| 141 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
|---|
| 142 | -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
|---|
| 143 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
|---|
| 144 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
|---|
| 145 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
|---|
| 146 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
|---|
| 147 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
|---|
| 148 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
|---|
| 149 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | 
|---|
| 150 | -1, -1, -1, -1, -1, -1, -1, -1, -1}; | 
|---|
| 151 |  | 
|---|
| 152 | // Returns true if `ch` is a digit in the given base | 
|---|
| 153 | template <int base> | 
|---|
| 154 | bool IsDigit(char ch); | 
|---|
| 155 |  | 
|---|
| 156 | // Converts a valid `ch` to its digit value in the given base. | 
|---|
| 157 | template <int base> | 
|---|
| 158 | unsigned ToDigit(char ch); | 
|---|
| 159 |  | 
|---|
| 160 | // Returns true if `ch` is the exponent delimiter for the given base. | 
|---|
| 161 | template <int base> | 
|---|
| 162 | bool IsExponentCharacter(char ch); | 
|---|
| 163 |  | 
|---|
| 164 | // Returns the maximum number of significant digits we will read for a float | 
|---|
| 165 | // in the given base. | 
|---|
| 166 | template <int base> | 
|---|
| 167 | constexpr int MantissaDigitsMax(); | 
|---|
| 168 |  | 
|---|
| 169 | // Returns the largest consecutive run of digits we will accept when parsing a | 
|---|
| 170 | // number in the given base. | 
|---|
| 171 | template <int base> | 
|---|
| 172 | constexpr int DigitLimit(); | 
|---|
| 173 |  | 
|---|
| 174 | // Returns the amount the exponent must be adjusted by for each dropped digit. | 
|---|
| 175 | // (For decimal this is 1, since the digits are in base 10 and the exponent base | 
|---|
| 176 | // is also 10, but for hexadecimal this is 4, since the digits are base 16 but | 
|---|
| 177 | // the exponent base is 2.) | 
|---|
| 178 | template <int base> | 
|---|
| 179 | constexpr int DigitMagnitude(); | 
|---|
| 180 |  | 
|---|
| 181 | template <> | 
|---|
| 182 | bool IsDigit<10>(char ch) { | 
|---|
| 183 | return ch >= '0' && ch <= '9'; | 
|---|
| 184 | } | 
|---|
| 185 | template <> | 
|---|
| 186 | bool IsDigit<16>(char ch) { | 
|---|
| 187 | return kAsciiToInt[static_cast<unsigned char>(ch)] >= 0; | 
|---|
| 188 | } | 
|---|
| 189 |  | 
|---|
| 190 | template <> | 
|---|
| 191 | unsigned ToDigit<10>(char ch) { | 
|---|
| 192 | return ch - '0'; | 
|---|
| 193 | } | 
|---|
| 194 | template <> | 
|---|
| 195 | unsigned ToDigit<16>(char ch) { | 
|---|
| 196 | return kAsciiToInt[static_cast<unsigned char>(ch)]; | 
|---|
| 197 | } | 
|---|
| 198 |  | 
|---|
| 199 | template <> | 
|---|
| 200 | bool IsExponentCharacter<10>(char ch) { | 
|---|
| 201 | return ch == 'e' || ch == 'E'; | 
|---|
| 202 | } | 
|---|
| 203 |  | 
|---|
| 204 | template <> | 
|---|
| 205 | bool IsExponentCharacter<16>(char ch) { | 
|---|
| 206 | return ch == 'p' || ch == 'P'; | 
|---|
| 207 | } | 
|---|
| 208 |  | 
|---|
| 209 | template <> | 
|---|
| 210 | constexpr int MantissaDigitsMax<10>() { | 
|---|
| 211 | return kDecimalMantissaDigitsMax; | 
|---|
| 212 | } | 
|---|
| 213 | template <> | 
|---|
| 214 | constexpr int MantissaDigitsMax<16>() { | 
|---|
| 215 | return kHexadecimalMantissaDigitsMax; | 
|---|
| 216 | } | 
|---|
| 217 |  | 
|---|
| 218 | template <> | 
|---|
| 219 | constexpr int DigitLimit<10>() { | 
|---|
| 220 | return kDecimalDigitLimit; | 
|---|
| 221 | } | 
|---|
| 222 | template <> | 
|---|
| 223 | constexpr int DigitLimit<16>() { | 
|---|
| 224 | return kHexadecimalDigitLimit; | 
|---|
| 225 | } | 
|---|
| 226 |  | 
|---|
| 227 | template <> | 
|---|
| 228 | constexpr int DigitMagnitude<10>() { | 
|---|
| 229 | return 1; | 
|---|
| 230 | } | 
|---|
| 231 | template <> | 
|---|
| 232 | constexpr int DigitMagnitude<16>() { | 
|---|
| 233 | return 4; | 
|---|
| 234 | } | 
|---|
| 235 |  | 
|---|
| 236 | // Reads decimal digits from [begin, end) into *out.  Returns the number of | 
|---|
| 237 | // digits consumed. | 
|---|
| 238 | // | 
|---|
| 239 | // After max_digits has been read, keeps consuming characters, but no longer | 
|---|
| 240 | // adjusts *out.  If a nonzero digit is dropped this way, *dropped_nonzero_digit | 
|---|
| 241 | // is set; otherwise, it is left unmodified. | 
|---|
| 242 | // | 
|---|
| 243 | // If no digits are matched, returns 0 and leaves *out unchanged. | 
|---|
| 244 | // | 
|---|
| 245 | // ConsumeDigits does not protect against overflow on *out; max_digits must | 
|---|
| 246 | // be chosen with respect to type T to avoid the possibility of overflow. | 
|---|
| 247 | template <int base, typename T> | 
|---|
| 248 | std::size_t ConsumeDigits(const char* begin, const char* end, int max_digits, | 
|---|
| 249 | T* out, bool* dropped_nonzero_digit) { | 
|---|
| 250 | if (base == 10) { | 
|---|
| 251 | assert(max_digits <= std::numeric_limits<T>::digits10); | 
|---|
| 252 | } else if (base == 16) { | 
|---|
| 253 | assert(max_digits * 4 <= std::numeric_limits<T>::digits); | 
|---|
| 254 | } | 
|---|
| 255 | const char* const original_begin = begin; | 
|---|
| 256 | T accumulator = *out; | 
|---|
| 257 | const char* significant_digits_end = | 
|---|
| 258 | (end - begin > max_digits) ? begin + max_digits : end; | 
|---|
| 259 | while (begin < significant_digits_end && IsDigit<base>(*begin)) { | 
|---|
| 260 | // Do not guard against *out overflow; max_digits was chosen to avoid this. | 
|---|
| 261 | // Do assert against it, to detect problems in debug builds. | 
|---|
| 262 | auto digit = static_cast<T>(ToDigit<base>(*begin)); | 
|---|
| 263 | assert(accumulator * base >= accumulator); | 
|---|
| 264 | accumulator *= base; | 
|---|
| 265 | assert(accumulator + digit >= accumulator); | 
|---|
| 266 | accumulator += digit; | 
|---|
| 267 | ++begin; | 
|---|
| 268 | } | 
|---|
| 269 | bool dropped_nonzero = false; | 
|---|
| 270 | while (begin < end && IsDigit<base>(*begin)) { | 
|---|
| 271 | dropped_nonzero = dropped_nonzero || (*begin != '0'); | 
|---|
| 272 | ++begin; | 
|---|
| 273 | } | 
|---|
| 274 | if (dropped_nonzero && dropped_nonzero_digit != nullptr) { | 
|---|
| 275 | *dropped_nonzero_digit = true; | 
|---|
| 276 | } | 
|---|
| 277 | *out = accumulator; | 
|---|
| 278 | return begin - original_begin; | 
|---|
| 279 | } | 
|---|
| 280 |  | 
|---|
| 281 | // Returns true if `v` is one of the chars allowed inside parentheses following | 
|---|
| 282 | // a NaN. | 
|---|
| 283 | bool IsNanChar(char v) { | 
|---|
| 284 | return (v == '_') || (v >= '0' && v <= '9') || (v >= 'a' && v <= 'z') || | 
|---|
| 285 | (v >= 'A' && v <= 'Z'); | 
|---|
| 286 | } | 
|---|
| 287 |  | 
|---|
| 288 | // Checks the range [begin, end) for a strtod()-formatted infinity or NaN.  If | 
|---|
| 289 | // one is found, sets `out` appropriately and returns true. | 
|---|
| 290 | bool ParseInfinityOrNan(const char* begin, const char* end, | 
|---|
| 291 | strings_internal::ParsedFloat* out) { | 
|---|
| 292 | if (end - begin < 3) { | 
|---|
| 293 | return false; | 
|---|
| 294 | } | 
|---|
| 295 | switch (*begin) { | 
|---|
| 296 | case 'i': | 
|---|
| 297 | case 'I': { | 
|---|
| 298 | // An infinity std::string consists of the characters "inf" or "infinity", | 
|---|
| 299 | // case insensitive. | 
|---|
| 300 | if (strings_internal::memcasecmp(begin + 1, "nf", 2) != 0) { | 
|---|
| 301 | return false; | 
|---|
| 302 | } | 
|---|
| 303 | out->type = strings_internal::FloatType::kInfinity; | 
|---|
| 304 | if (end - begin >= 8 && | 
|---|
| 305 | strings_internal::memcasecmp(begin + 3, "inity", 5) == 0) { | 
|---|
| 306 | out->end = begin + 8; | 
|---|
| 307 | } else { | 
|---|
| 308 | out->end = begin + 3; | 
|---|
| 309 | } | 
|---|
| 310 | return true; | 
|---|
| 311 | } | 
|---|
| 312 | case 'n': | 
|---|
| 313 | case 'N': { | 
|---|
| 314 | // A NaN consists of the characters "nan", case insensitive, optionally | 
|---|
| 315 | // followed by a parenthesized sequence of zero or more alphanumeric | 
|---|
| 316 | // characters and/or underscores. | 
|---|
| 317 | if (strings_internal::memcasecmp(begin + 1, "an", 2) != 0) { | 
|---|
| 318 | return false; | 
|---|
| 319 | } | 
|---|
| 320 | out->type = strings_internal::FloatType::kNan; | 
|---|
| 321 | out->end = begin + 3; | 
|---|
| 322 | // NaN is allowed to be followed by a parenthesized std::string, consisting of | 
|---|
| 323 | // only the characters [a-zA-Z0-9_].  Match that if it's present. | 
|---|
| 324 | begin += 3; | 
|---|
| 325 | if (begin < end && *begin == '(') { | 
|---|
| 326 | const char* nan_begin = begin + 1; | 
|---|
| 327 | while (nan_begin < end && IsNanChar(*nan_begin)) { | 
|---|
| 328 | ++nan_begin; | 
|---|
| 329 | } | 
|---|
| 330 | if (nan_begin < end && *nan_begin == ')') { | 
|---|
| 331 | // We found an extra NaN specifier range | 
|---|
| 332 | out->subrange_begin = begin + 1; | 
|---|
| 333 | out->subrange_end = nan_begin; | 
|---|
| 334 | out->end = nan_begin + 1; | 
|---|
| 335 | } | 
|---|
| 336 | } | 
|---|
| 337 | return true; | 
|---|
| 338 | } | 
|---|
| 339 | default: | 
|---|
| 340 | return false; | 
|---|
| 341 | } | 
|---|
| 342 | } | 
|---|
| 343 | }  // namespace | 
|---|
| 344 |  | 
|---|
| 345 | namespace strings_internal { | 
|---|
| 346 |  | 
|---|
| 347 | template <int base> | 
|---|
| 348 | strings_internal::ParsedFloat ParseFloat(const char* begin, const char* end, | 
|---|
| 349 | chars_format format_flags) { | 
|---|
| 350 | strings_internal::ParsedFloat result; | 
|---|
| 351 |  | 
|---|
| 352 | // Exit early if we're given an empty range. | 
|---|
| 353 | if (begin == end) return result; | 
|---|
| 354 |  | 
|---|
| 355 | // Handle the infinity and NaN cases. | 
|---|
| 356 | if (ParseInfinityOrNan(begin, end, &result)) { | 
|---|
| 357 | return result; | 
|---|
| 358 | } | 
|---|
| 359 |  | 
|---|
| 360 | const char* const mantissa_begin = begin; | 
|---|
| 361 | while (begin < end && *begin == '0') { | 
|---|
| 362 | ++begin;  // skip leading zeros | 
|---|
| 363 | } | 
|---|
| 364 | uint64_t mantissa = 0; | 
|---|
| 365 |  | 
|---|
| 366 | int exponent_adjustment = 0; | 
|---|
| 367 | bool mantissa_is_inexact = false; | 
|---|
| 368 | std::size_t pre_decimal_digits = ConsumeDigits<base>( | 
|---|
| 369 | begin, end, MantissaDigitsMax<base>(), &mantissa, &mantissa_is_inexact); | 
|---|
| 370 | begin += pre_decimal_digits; | 
|---|
| 371 | int digits_left; | 
|---|
| 372 | if (pre_decimal_digits >= DigitLimit<base>()) { | 
|---|
| 373 | // refuse to parse pathological inputs | 
|---|
| 374 | return result; | 
|---|
| 375 | } else if (pre_decimal_digits > MantissaDigitsMax<base>()) { | 
|---|
| 376 | // We dropped some non-fraction digits on the floor.  Adjust our exponent | 
|---|
| 377 | // to compensate. | 
|---|
| 378 | exponent_adjustment = | 
|---|
| 379 | static_cast<int>(pre_decimal_digits - MantissaDigitsMax<base>()); | 
|---|
| 380 | digits_left = 0; | 
|---|
| 381 | } else { | 
|---|
| 382 | digits_left = | 
|---|
| 383 | static_cast<int>(MantissaDigitsMax<base>() - pre_decimal_digits); | 
|---|
| 384 | } | 
|---|
| 385 | if (begin < end && *begin == '.') { | 
|---|
| 386 | ++begin; | 
|---|
| 387 | if (mantissa == 0) { | 
|---|
| 388 | // If we haven't seen any nonzero digits yet, keep skipping zeros.  We | 
|---|
| 389 | // have to adjust the exponent to reflect the changed place value. | 
|---|
| 390 | const char* begin_zeros = begin; | 
|---|
| 391 | while (begin < end && *begin == '0') { | 
|---|
| 392 | ++begin; | 
|---|
| 393 | } | 
|---|
| 394 | std::size_t zeros_skipped = begin - begin_zeros; | 
|---|
| 395 | if (zeros_skipped >= DigitLimit<base>()) { | 
|---|
| 396 | // refuse to parse pathological inputs | 
|---|
| 397 | return result; | 
|---|
| 398 | } | 
|---|
| 399 | exponent_adjustment -= static_cast<int>(zeros_skipped); | 
|---|
| 400 | } | 
|---|
| 401 | std::size_t post_decimal_digits = ConsumeDigits<base>( | 
|---|
| 402 | begin, end, digits_left, &mantissa, &mantissa_is_inexact); | 
|---|
| 403 | begin += post_decimal_digits; | 
|---|
| 404 |  | 
|---|
| 405 | // Since `mantissa` is an integer, each significant digit we read after | 
|---|
| 406 | // the decimal point requires an adjustment to the exponent. "1.23e0" will | 
|---|
| 407 | // be stored as `mantissa` == 123 and `exponent` == -2 (that is, | 
|---|
| 408 | // "123e-2"). | 
|---|
| 409 | if (post_decimal_digits >= DigitLimit<base>()) { | 
|---|
| 410 | // refuse to parse pathological inputs | 
|---|
| 411 | return result; | 
|---|
| 412 | } else if (post_decimal_digits > digits_left) { | 
|---|
| 413 | exponent_adjustment -= digits_left; | 
|---|
| 414 | } else { | 
|---|
| 415 | exponent_adjustment -= post_decimal_digits; | 
|---|
| 416 | } | 
|---|
| 417 | } | 
|---|
| 418 | // If we've found no mantissa whatsoever, this isn't a number. | 
|---|
| 419 | if (mantissa_begin == begin) { | 
|---|
| 420 | return result; | 
|---|
| 421 | } | 
|---|
| 422 | // A bare "." doesn't count as a mantissa either. | 
|---|
| 423 | if (begin - mantissa_begin == 1 && *mantissa_begin == '.') { | 
|---|
| 424 | return result; | 
|---|
| 425 | } | 
|---|
| 426 |  | 
|---|
| 427 | if (mantissa_is_inexact) { | 
|---|
| 428 | // We dropped significant digits on the floor.  Handle this appropriately. | 
|---|
| 429 | if (base == 10) { | 
|---|
| 430 | // If we truncated significant decimal digits, store the full range of the | 
|---|
| 431 | // mantissa for future big integer math for exact rounding. | 
|---|
| 432 | result.subrange_begin = mantissa_begin; | 
|---|
| 433 | result.subrange_end = begin; | 
|---|
| 434 | } else if (base == 16) { | 
|---|
| 435 | // If we truncated hex digits, reflect this fact by setting the low | 
|---|
| 436 | // ("sticky") bit.  This allows for correct rounding in all cases. | 
|---|
| 437 | mantissa |= 1; | 
|---|
| 438 | } | 
|---|
| 439 | } | 
|---|
| 440 | result.mantissa = mantissa; | 
|---|
| 441 |  | 
|---|
| 442 | const char* const exponent_begin = begin; | 
|---|
| 443 | result.literal_exponent = 0; | 
|---|
| 444 | bool found_exponent = false; | 
|---|
| 445 | if (AllowExponent(format_flags) && begin < end && | 
|---|
| 446 | IsExponentCharacter<base>(*begin)) { | 
|---|
| 447 | bool negative_exponent = false; | 
|---|
| 448 | ++begin; | 
|---|
| 449 | if (begin < end && *begin == '-') { | 
|---|
| 450 | negative_exponent = true; | 
|---|
| 451 | ++begin; | 
|---|
| 452 | } else if (begin < end && *begin == '+') { | 
|---|
| 453 | ++begin; | 
|---|
| 454 | } | 
|---|
| 455 | const char* const exponent_digits_begin = begin; | 
|---|
| 456 | // Exponent is always expressed in decimal, even for hexadecimal floats. | 
|---|
| 457 | begin += ConsumeDigits<10>(begin, end, kDecimalExponentDigitsMax, | 
|---|
| 458 | &result.literal_exponent, nullptr); | 
|---|
| 459 | if (begin == exponent_digits_begin) { | 
|---|
| 460 | // there were no digits where we expected an exponent.  We failed to read | 
|---|
| 461 | // an exponent and should not consume the 'e' after all.  Rewind 'begin'. | 
|---|
| 462 | found_exponent = false; | 
|---|
| 463 | begin = exponent_begin; | 
|---|
| 464 | } else { | 
|---|
| 465 | found_exponent = true; | 
|---|
| 466 | if (negative_exponent) { | 
|---|
| 467 | result.literal_exponent = -result.literal_exponent; | 
|---|
| 468 | } | 
|---|
| 469 | } | 
|---|
| 470 | } | 
|---|
| 471 |  | 
|---|
| 472 | if (!found_exponent && RequireExponent(format_flags)) { | 
|---|
| 473 | // Provided flags required an exponent, but none was found.  This results | 
|---|
| 474 | // in a failure to scan. | 
|---|
| 475 | return result; | 
|---|
| 476 | } | 
|---|
| 477 |  | 
|---|
| 478 | // Success! | 
|---|
| 479 | result.type = strings_internal::FloatType::kNumber; | 
|---|
| 480 | if (result.mantissa > 0) { | 
|---|
| 481 | result.exponent = result.literal_exponent + | 
|---|
| 482 | (DigitMagnitude<base>() * exponent_adjustment); | 
|---|
| 483 | } else { | 
|---|
| 484 | result.exponent = 0; | 
|---|
| 485 | } | 
|---|
| 486 | result.end = begin; | 
|---|
| 487 | return result; | 
|---|
| 488 | } | 
|---|
| 489 |  | 
|---|
| 490 | template ParsedFloat ParseFloat<10>(const char* begin, const char* end, | 
|---|
| 491 | chars_format format_flags); | 
|---|
| 492 | template ParsedFloat ParseFloat<16>(const char* begin, const char* end, | 
|---|
| 493 | chars_format format_flags); | 
|---|
| 494 |  | 
|---|
| 495 | }  // namespace strings_internal | 
|---|
| 496 | }  // namespace absl | 
|---|
| 497 |  | 
|---|