| 1 | /* |
| 2 | * Copyright 2011-present Facebook, Inc. |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | #include <folly/Conv.h> |
| 17 | #include <array> |
| 18 | |
| 19 | namespace folly { |
| 20 | namespace detail { |
| 21 | |
| 22 | namespace { |
| 23 | |
| 24 | /** |
| 25 | * Finds the first non-digit in a string. The number of digits |
| 26 | * searched depends on the precision of the Tgt integral. Assumes the |
| 27 | * string starts with NO whitespace and NO sign. |
| 28 | * |
| 29 | * The semantics of the routine is: |
| 30 | * for (;; ++b) { |
| 31 | * if (b >= e || !isdigit(*b)) return b; |
| 32 | * } |
| 33 | * |
| 34 | * Complete unrolling marks bottom-line (i.e. entire conversion) |
| 35 | * improvements of 20%. |
| 36 | */ |
| 37 | inline const char* findFirstNonDigit(const char* b, const char* e) { |
| 38 | for (; b < e; ++b) { |
| 39 | auto const c = static_cast<unsigned>(*b) - '0'; |
| 40 | if (c >= 10) { |
| 41 | break; |
| 42 | } |
| 43 | } |
| 44 | return b; |
| 45 | } |
| 46 | |
| 47 | // Maximum value of number when represented as a string |
| 48 | template <class T> |
| 49 | struct MaxString { |
| 50 | static const char* const value; |
| 51 | }; |
| 52 | |
| 53 | template <> |
| 54 | const char* const MaxString<uint8_t>::value = "255" ; |
| 55 | template <> |
| 56 | const char* const MaxString<uint16_t>::value = "65535" ; |
| 57 | template <> |
| 58 | const char* const MaxString<uint32_t>::value = "4294967295" ; |
| 59 | #if __SIZEOF_LONG__ == 4 |
| 60 | template <> |
| 61 | const char* const MaxString<unsigned long>::value = "4294967295" ; |
| 62 | #else |
| 63 | template <> |
| 64 | const char* const MaxString<unsigned long>::value = "18446744073709551615" ; |
| 65 | #endif |
| 66 | static_assert( |
| 67 | sizeof(unsigned long) >= 4, |
| 68 | "Wrong value for MaxString<unsigned long>::value," |
| 69 | " please update." ); |
| 70 | template <> |
| 71 | const char* const MaxString<unsigned long long>::value = "18446744073709551615" ; |
| 72 | static_assert( |
| 73 | sizeof(unsigned long long) >= 8, |
| 74 | "Wrong value for MaxString<unsigned long long>::value" |
| 75 | ", please update." ); |
| 76 | |
| 77 | #if FOLLY_HAVE_INT128_T |
| 78 | template <> |
| 79 | const char* const MaxString<__uint128_t>::value = |
| 80 | "340282366920938463463374607431768211455" ; |
| 81 | #endif |
| 82 | |
| 83 | /* |
| 84 | * Lookup tables that converts from a decimal character value to an integral |
| 85 | * binary value, shifted by a decimal "shift" multiplier. |
| 86 | * For all character values in the range '0'..'9', the table at those |
| 87 | * index locations returns the actual decimal value shifted by the multiplier. |
| 88 | * For all other values, the lookup table returns an invalid OOR value. |
| 89 | */ |
| 90 | // Out-of-range flag value, larger than the largest value that can fit in |
| 91 | // four decimal bytes (9999), but four of these added up together should |
| 92 | // still not overflow uint16_t. |
| 93 | constexpr int32_t OOR = 10000; |
| 94 | |
| 95 | alignas(16) constexpr uint16_t shift1[] = { |
| 96 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9 |
| 97 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10 |
| 98 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20 |
| 99 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 30 |
| 100 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, 0, 1, // 40 |
| 101 | 2, 3, 4, 5, 6, 7, 8, 9, OOR, OOR, |
| 102 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 60 |
| 103 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 70 |
| 104 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 80 |
| 105 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 90 |
| 106 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 100 |
| 107 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 110 |
| 108 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 120 |
| 109 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 130 |
| 110 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 140 |
| 111 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 150 |
| 112 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 160 |
| 113 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 170 |
| 114 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 180 |
| 115 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 190 |
| 116 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 200 |
| 117 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 210 |
| 118 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 220 |
| 119 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 230 |
| 120 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240 |
| 121 | OOR, OOR, OOR, OOR, OOR, OOR // 250 |
| 122 | }; |
| 123 | |
| 124 | alignas(16) constexpr uint16_t shift10[] = { |
| 125 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9 |
| 126 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10 |
| 127 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20 |
| 128 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 30 |
| 129 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, 0, 10, // 40 |
| 130 | 20, 30, 40, 50, 60, 70, 80, 90, OOR, OOR, |
| 131 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 60 |
| 132 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 70 |
| 133 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 80 |
| 134 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 90 |
| 135 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 100 |
| 136 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 110 |
| 137 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 120 |
| 138 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 130 |
| 139 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 140 |
| 140 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 150 |
| 141 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 160 |
| 142 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 170 |
| 143 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 180 |
| 144 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 190 |
| 145 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 200 |
| 146 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 210 |
| 147 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 220 |
| 148 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 230 |
| 149 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240 |
| 150 | OOR, OOR, OOR, OOR, OOR, OOR // 250 |
| 151 | }; |
| 152 | |
| 153 | alignas(16) constexpr uint16_t shift100[] = { |
| 154 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9 |
| 155 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10 |
| 156 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20 |
| 157 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 30 |
| 158 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, 0, 100, // 40 |
| 159 | 200, 300, 400, 500, 600, 700, 800, 900, OOR, OOR, |
| 160 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 60 |
| 161 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 70 |
| 162 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 80 |
| 163 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 90 |
| 164 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 100 |
| 165 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 110 |
| 166 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 120 |
| 167 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 130 |
| 168 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 140 |
| 169 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 150 |
| 170 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 160 |
| 171 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 170 |
| 172 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 180 |
| 173 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 190 |
| 174 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 200 |
| 175 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 210 |
| 176 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 220 |
| 177 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 230 |
| 178 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240 |
| 179 | OOR, OOR, OOR, OOR, OOR, OOR // 250 |
| 180 | }; |
| 181 | |
| 182 | alignas(16) constexpr uint16_t shift1000[] = { |
| 183 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9 |
| 184 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10 |
| 185 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20 |
| 186 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 30 |
| 187 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, 0, 1000, // 40 |
| 188 | 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, OOR, OOR, |
| 189 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 60 |
| 190 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 70 |
| 191 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 80 |
| 192 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 90 |
| 193 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 100 |
| 194 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 110 |
| 195 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 120 |
| 196 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 130 |
| 197 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 140 |
| 198 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 150 |
| 199 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 160 |
| 200 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 170 |
| 201 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 180 |
| 202 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 190 |
| 203 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 200 |
| 204 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 210 |
| 205 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 220 |
| 206 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 230 |
| 207 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240 |
| 208 | OOR, OOR, OOR, OOR, OOR, OOR // 250 |
| 209 | }; |
| 210 | |
| 211 | struct ErrorString { |
| 212 | const char* string; |
| 213 | bool quote; |
| 214 | }; |
| 215 | |
| 216 | // Keep this in sync with ConversionCode in Conv.h |
| 217 | constexpr const std::array< |
| 218 | ErrorString, |
| 219 | static_cast<std::size_t>(ConversionCode::NUM_ERROR_CODES)> |
| 220 | kErrorStrings{{ |
| 221 | {"Success" , true}, |
| 222 | {"Empty input string" , true}, |
| 223 | {"No digits found in input string" , true}, |
| 224 | {"Integer overflow when parsing bool (must be 0 or 1)" , true}, |
| 225 | {"Invalid value for bool" , true}, |
| 226 | {"Non-digit character found" , true}, |
| 227 | {"Invalid leading character" , true}, |
| 228 | {"Overflow during conversion" , true}, |
| 229 | {"Negative overflow during conversion" , true}, |
| 230 | {"Unable to convert string to floating point value" , true}, |
| 231 | {"Non-whitespace character found after end of conversion" , true}, |
| 232 | {"Overflow during arithmetic conversion" , false}, |
| 233 | {"Negative overflow during arithmetic conversion" , false}, |
| 234 | {"Loss of precision during arithmetic conversion" , false}, |
| 235 | }}; |
| 236 | |
| 237 | // Check if ASCII is really ASCII |
| 238 | using IsAscii = |
| 239 | bool_constant<'A' == 65 && 'Z' == 90 && 'a' == 97 && 'z' == 122>; |
| 240 | |
| 241 | // The code in this file that uses tolower() really only cares about |
| 242 | // 7-bit ASCII characters, so we can take a nice shortcut here. |
| 243 | inline char tolower_ascii(char in) { |
| 244 | return IsAscii::value ? in | 0x20 : char(std::tolower(in)); |
| 245 | } |
| 246 | |
| 247 | inline bool bool_str_cmp(const char** b, size_t len, const char* value) { |
| 248 | // Can't use strncasecmp, since we want to ensure that the full value matches |
| 249 | const char* p = *b; |
| 250 | const char* e = *b + len; |
| 251 | const char* v = value; |
| 252 | while (*v != '\0') { |
| 253 | if (p == e || tolower_ascii(*p) != *v) { // value is already lowercase |
| 254 | return false; |
| 255 | } |
| 256 | ++p; |
| 257 | ++v; |
| 258 | } |
| 259 | |
| 260 | *b = p; |
| 261 | return true; |
| 262 | } |
| 263 | |
| 264 | } // namespace |
| 265 | |
| 266 | Expected<bool, ConversionCode> str_to_bool(StringPiece* src) noexcept { |
| 267 | auto b = src->begin(), e = src->end(); |
| 268 | for (;; ++b) { |
| 269 | if (b >= e) { |
| 270 | return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING); |
| 271 | } |
| 272 | if (!std::isspace(*b)) { |
| 273 | break; |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | bool result; |
| 278 | size_t len = size_t(e - b); |
| 279 | switch (*b) { |
| 280 | case '0': |
| 281 | case '1': { |
| 282 | result = false; |
| 283 | for (; b < e && isdigit(*b); ++b) { |
| 284 | if (result || (*b != '0' && *b != '1')) { |
| 285 | return makeUnexpected(ConversionCode::BOOL_OVERFLOW); |
| 286 | } |
| 287 | result = (*b == '1'); |
| 288 | } |
| 289 | break; |
| 290 | } |
| 291 | case 'y': |
| 292 | case 'Y': |
| 293 | result = true; |
| 294 | if (!bool_str_cmp(&b, len, "yes" )) { |
| 295 | ++b; // accept the single 'y' character |
| 296 | } |
| 297 | break; |
| 298 | case 'n': |
| 299 | case 'N': |
| 300 | result = false; |
| 301 | if (!bool_str_cmp(&b, len, "no" )) { |
| 302 | ++b; |
| 303 | } |
| 304 | break; |
| 305 | case 't': |
| 306 | case 'T': |
| 307 | result = true; |
| 308 | if (!bool_str_cmp(&b, len, "true" )) { |
| 309 | ++b; |
| 310 | } |
| 311 | break; |
| 312 | case 'f': |
| 313 | case 'F': |
| 314 | result = false; |
| 315 | if (!bool_str_cmp(&b, len, "false" )) { |
| 316 | ++b; |
| 317 | } |
| 318 | break; |
| 319 | case 'o': |
| 320 | case 'O': |
| 321 | if (bool_str_cmp(&b, len, "on" )) { |
| 322 | result = true; |
| 323 | } else if (bool_str_cmp(&b, len, "off" )) { |
| 324 | result = false; |
| 325 | } else { |
| 326 | return makeUnexpected(ConversionCode::BOOL_INVALID_VALUE); |
| 327 | } |
| 328 | break; |
| 329 | default: |
| 330 | return makeUnexpected(ConversionCode::BOOL_INVALID_VALUE); |
| 331 | } |
| 332 | |
| 333 | src->assign(b, e); |
| 334 | |
| 335 | return result; |
| 336 | } |
| 337 | |
| 338 | /** |
| 339 | * StringPiece to double, with progress information. Alters the |
| 340 | * StringPiece parameter to munch the already-parsed characters. |
| 341 | */ |
| 342 | template <class Tgt> |
| 343 | Expected<Tgt, ConversionCode> str_to_floating(StringPiece* src) noexcept { |
| 344 | using namespace double_conversion; |
| 345 | static StringToDoubleConverter conv( |
| 346 | StringToDoubleConverter::ALLOW_TRAILING_JUNK | |
| 347 | StringToDoubleConverter::ALLOW_LEADING_SPACES, |
| 348 | 0.0, |
| 349 | // return this for junk input string |
| 350 | std::numeric_limits<double>::quiet_NaN(), |
| 351 | nullptr, |
| 352 | nullptr); |
| 353 | |
| 354 | if (src->empty()) { |
| 355 | return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING); |
| 356 | } |
| 357 | |
| 358 | int length; |
| 359 | auto result = conv.StringToDouble( |
| 360 | src->data(), |
| 361 | static_cast<int>(src->size()), |
| 362 | &length); // processed char count |
| 363 | |
| 364 | if (!std::isnan(result)) { |
| 365 | // If we get here with length = 0, the input string is empty. |
| 366 | // If we get here with result = 0.0, it's either because the string |
| 367 | // contained only whitespace, or because we had an actual zero value |
| 368 | // (with potential trailing junk). If it was only whitespace, we |
| 369 | // want to raise an error; length will point past the last character |
| 370 | // that was processed, so we need to check if that character was |
| 371 | // whitespace or not. |
| 372 | if (length == 0 || |
| 373 | (result == 0.0 && std::isspace((*src)[size_t(length) - 1]))) { |
| 374 | return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING); |
| 375 | } |
| 376 | if (length >= 2) { |
| 377 | const char* suffix = src->data() + length - 1; |
| 378 | // double_conversion doesn't update length correctly when there is an |
| 379 | // incomplete exponent specifier. Converting "12e-f-g" shouldn't consume |
| 380 | // any more than "12", but it will consume "12e-". |
| 381 | |
| 382 | // "123-" should only parse "123" |
| 383 | if (*suffix == '-' || *suffix == '+') { |
| 384 | --suffix; |
| 385 | --length; |
| 386 | } |
| 387 | // "12e-f-g" or "12euro" should only parse "12" |
| 388 | if (*suffix == 'e' || *suffix == 'E') { |
| 389 | --length; |
| 390 | } |
| 391 | } |
| 392 | src->advance(size_t(length)); |
| 393 | return Tgt(result); |
| 394 | } |
| 395 | |
| 396 | auto* e = src->end(); |
| 397 | auto* b = |
| 398 | std::find_if_not(src->begin(), e, [](char c) { return std::isspace(c); }); |
| 399 | |
| 400 | // There must be non-whitespace, otherwise we would have caught this above |
| 401 | assert(b < e); |
| 402 | size_t size = size_t(e - b); |
| 403 | |
| 404 | bool negative = false; |
| 405 | if (*b == '-') { |
| 406 | negative = true; |
| 407 | ++b; |
| 408 | --size; |
| 409 | } |
| 410 | |
| 411 | result = 0.0; |
| 412 | |
| 413 | switch (tolower_ascii(*b)) { |
| 414 | case 'i': |
| 415 | if (size >= 3 && tolower_ascii(b[1]) == 'n' && |
| 416 | tolower_ascii(b[2]) == 'f') { |
| 417 | if (size >= 8 && tolower_ascii(b[3]) == 'i' && |
| 418 | tolower_ascii(b[4]) == 'n' && tolower_ascii(b[5]) == 'i' && |
| 419 | tolower_ascii(b[6]) == 't' && tolower_ascii(b[7]) == 'y') { |
| 420 | b += 8; |
| 421 | } else { |
| 422 | b += 3; |
| 423 | } |
| 424 | result = std::numeric_limits<Tgt>::infinity(); |
| 425 | } |
| 426 | break; |
| 427 | |
| 428 | case 'n': |
| 429 | if (size >= 3 && tolower_ascii(b[1]) == 'a' && |
| 430 | tolower_ascii(b[2]) == 'n') { |
| 431 | b += 3; |
| 432 | result = std::numeric_limits<Tgt>::quiet_NaN(); |
| 433 | } |
| 434 | break; |
| 435 | |
| 436 | default: |
| 437 | break; |
| 438 | } |
| 439 | |
| 440 | if (result == 0.0) { |
| 441 | // All bets are off |
| 442 | return makeUnexpected(ConversionCode::STRING_TO_FLOAT_ERROR); |
| 443 | } |
| 444 | |
| 445 | if (negative) { |
| 446 | result = -result; |
| 447 | } |
| 448 | |
| 449 | src->assign(b, e); |
| 450 | |
| 451 | return Tgt(result); |
| 452 | } |
| 453 | |
| 454 | template Expected<float, ConversionCode> str_to_floating<float>( |
| 455 | StringPiece* src) noexcept; |
| 456 | template Expected<double, ConversionCode> str_to_floating<double>( |
| 457 | StringPiece* src) noexcept; |
| 458 | |
| 459 | /** |
| 460 | * This class takes care of additional processing needed for signed values, |
| 461 | * like leading sign character and overflow checks. |
| 462 | */ |
| 463 | template <typename T, bool IsSigned = std::is_signed<T>::value> |
| 464 | class SignedValueHandler; |
| 465 | |
| 466 | template <typename T> |
| 467 | class SignedValueHandler<T, true> { |
| 468 | public: |
| 469 | ConversionCode init(const char*& b) { |
| 470 | negative_ = false; |
| 471 | if (!std::isdigit(*b)) { |
| 472 | if (*b == '-') { |
| 473 | negative_ = true; |
| 474 | } else if (UNLIKELY(*b != '+')) { |
| 475 | return ConversionCode::INVALID_LEADING_CHAR; |
| 476 | } |
| 477 | ++b; |
| 478 | } |
| 479 | return ConversionCode::SUCCESS; |
| 480 | } |
| 481 | |
| 482 | ConversionCode overflow() { |
| 483 | return negative_ ? ConversionCode::NEGATIVE_OVERFLOW |
| 484 | : ConversionCode::POSITIVE_OVERFLOW; |
| 485 | } |
| 486 | |
| 487 | template <typename U> |
| 488 | Expected<T, ConversionCode> finalize(U value) { |
| 489 | T rv; |
| 490 | if (negative_) { |
| 491 | rv = T(-value); |
| 492 | if (UNLIKELY(rv > 0)) { |
| 493 | return makeUnexpected(ConversionCode::NEGATIVE_OVERFLOW); |
| 494 | } |
| 495 | } else { |
| 496 | rv = T(value); |
| 497 | if (UNLIKELY(rv < 0)) { |
| 498 | return makeUnexpected(ConversionCode::POSITIVE_OVERFLOW); |
| 499 | } |
| 500 | } |
| 501 | return rv; |
| 502 | } |
| 503 | |
| 504 | private: |
| 505 | bool negative_; |
| 506 | }; |
| 507 | |
| 508 | // For unsigned types, we don't need any extra processing |
| 509 | template <typename T> |
| 510 | class SignedValueHandler<T, false> { |
| 511 | public: |
| 512 | ConversionCode init(const char*&) { |
| 513 | return ConversionCode::SUCCESS; |
| 514 | } |
| 515 | |
| 516 | ConversionCode overflow() { |
| 517 | return ConversionCode::POSITIVE_OVERFLOW; |
| 518 | } |
| 519 | |
| 520 | Expected<T, ConversionCode> finalize(T value) { |
| 521 | return value; |
| 522 | } |
| 523 | }; |
| 524 | |
| 525 | /** |
| 526 | * String represented as a pair of pointers to char to signed/unsigned |
| 527 | * integrals. Assumes NO whitespace before or after, and also that the |
| 528 | * string is composed entirely of digits (and an optional sign only for |
| 529 | * signed types). String may be empty, in which case digits_to returns |
| 530 | * an appropriate error. |
| 531 | */ |
| 532 | template <class Tgt> |
| 533 | inline Expected<Tgt, ConversionCode> digits_to( |
| 534 | const char* b, |
| 535 | const char* const e) noexcept { |
| 536 | using UT = typename std::make_unsigned<Tgt>::type; |
| 537 | assert(b <= e); |
| 538 | |
| 539 | SignedValueHandler<Tgt> sgn; |
| 540 | |
| 541 | auto err = sgn.init(b); |
| 542 | if (UNLIKELY(err != ConversionCode::SUCCESS)) { |
| 543 | return makeUnexpected(err); |
| 544 | } |
| 545 | |
| 546 | size_t size = size_t(e - b); |
| 547 | |
| 548 | /* Although the string is entirely made of digits, we still need to |
| 549 | * check for overflow. |
| 550 | */ |
| 551 | if (size > std::numeric_limits<UT>::digits10) { |
| 552 | // Leading zeros? |
| 553 | if (b < e && *b == '0') { |
| 554 | for (++b;; ++b) { |
| 555 | if (b == e) { |
| 556 | return Tgt(0); // just zeros, e.g. "0000" |
| 557 | } |
| 558 | if (*b != '0') { |
| 559 | size = size_t(e - b); |
| 560 | break; |
| 561 | } |
| 562 | } |
| 563 | } |
| 564 | if (size > std::numeric_limits<UT>::digits10 && |
| 565 | (size != std::numeric_limits<UT>::digits10 + 1 || |
| 566 | strncmp(b, MaxString<UT>::value, size) > 0)) { |
| 567 | return makeUnexpected(sgn.overflow()); |
| 568 | } |
| 569 | } |
| 570 | |
| 571 | // Here we know that the number won't overflow when |
| 572 | // converted. Proceed without checks. |
| 573 | |
| 574 | UT result = 0; |
| 575 | |
| 576 | for (; e - b >= 4; b += 4) { |
| 577 | result *= static_cast<UT>(10000); |
| 578 | const int32_t r0 = shift1000[static_cast<size_t>(b[0])]; |
| 579 | const int32_t r1 = shift100[static_cast<size_t>(b[1])]; |
| 580 | const int32_t r2 = shift10[static_cast<size_t>(b[2])]; |
| 581 | const int32_t r3 = shift1[static_cast<size_t>(b[3])]; |
| 582 | const auto sum = r0 + r1 + r2 + r3; |
| 583 | if (sum >= OOR) { |
| 584 | goto outOfRange; |
| 585 | } |
| 586 | result += UT(sum); |
| 587 | } |
| 588 | |
| 589 | switch (e - b) { |
| 590 | case 3: { |
| 591 | const int32_t r0 = shift100[static_cast<size_t>(b[0])]; |
| 592 | const int32_t r1 = shift10[static_cast<size_t>(b[1])]; |
| 593 | const int32_t r2 = shift1[static_cast<size_t>(b[2])]; |
| 594 | const auto sum = r0 + r1 + r2; |
| 595 | if (sum >= OOR) { |
| 596 | goto outOfRange; |
| 597 | } |
| 598 | result = UT(1000 * result + sum); |
| 599 | break; |
| 600 | } |
| 601 | case 2: { |
| 602 | const int32_t r0 = shift10[static_cast<size_t>(b[0])]; |
| 603 | const int32_t r1 = shift1[static_cast<size_t>(b[1])]; |
| 604 | const auto sum = r0 + r1; |
| 605 | if (sum >= OOR) { |
| 606 | goto outOfRange; |
| 607 | } |
| 608 | result = UT(100 * result + sum); |
| 609 | break; |
| 610 | } |
| 611 | case 1: { |
| 612 | const int32_t sum = shift1[static_cast<size_t>(b[0])]; |
| 613 | if (sum >= OOR) { |
| 614 | goto outOfRange; |
| 615 | } |
| 616 | result = UT(10 * result + sum); |
| 617 | break; |
| 618 | } |
| 619 | default: |
| 620 | assert(b == e); |
| 621 | if (size == 0) { |
| 622 | return makeUnexpected(ConversionCode::NO_DIGITS); |
| 623 | } |
| 624 | break; |
| 625 | } |
| 626 | |
| 627 | return sgn.finalize(result); |
| 628 | |
| 629 | outOfRange: |
| 630 | return makeUnexpected(ConversionCode::NON_DIGIT_CHAR); |
| 631 | } |
| 632 | |
| 633 | template Expected<char, ConversionCode> digits_to<char>( |
| 634 | const char*, |
| 635 | const char*) noexcept; |
| 636 | template Expected<signed char, ConversionCode> digits_to<signed char>( |
| 637 | const char*, |
| 638 | const char*) noexcept; |
| 639 | template Expected<unsigned char, ConversionCode> digits_to<unsigned char>( |
| 640 | const char*, |
| 641 | const char*) noexcept; |
| 642 | |
| 643 | template Expected<short, ConversionCode> digits_to<short>( |
| 644 | const char*, |
| 645 | const char*) noexcept; |
| 646 | template Expected<unsigned short, ConversionCode> digits_to<unsigned short>( |
| 647 | const char*, |
| 648 | const char*) noexcept; |
| 649 | |
| 650 | template Expected<int, ConversionCode> digits_to<int>( |
| 651 | const char*, |
| 652 | const char*) noexcept; |
| 653 | template Expected<unsigned int, ConversionCode> digits_to<unsigned int>( |
| 654 | const char*, |
| 655 | const char*) noexcept; |
| 656 | |
| 657 | template Expected<long, ConversionCode> digits_to<long>( |
| 658 | const char*, |
| 659 | const char*) noexcept; |
| 660 | template Expected<unsigned long, ConversionCode> digits_to<unsigned long>( |
| 661 | const char*, |
| 662 | const char*) noexcept; |
| 663 | |
| 664 | template Expected<long long, ConversionCode> digits_to<long long>( |
| 665 | const char*, |
| 666 | const char*) noexcept; |
| 667 | template Expected<unsigned long long, ConversionCode> |
| 668 | digits_to<unsigned long long>(const char*, const char*) noexcept; |
| 669 | |
| 670 | #if FOLLY_HAVE_INT128_T |
| 671 | template Expected<__int128, ConversionCode> digits_to<__int128>( |
| 672 | const char*, |
| 673 | const char*) noexcept; |
| 674 | template Expected<unsigned __int128, ConversionCode> |
| 675 | digits_to<unsigned __int128>(const char*, const char*) noexcept; |
| 676 | #endif |
| 677 | |
| 678 | /** |
| 679 | * StringPiece to integrals, with progress information. Alters the |
| 680 | * StringPiece parameter to munch the already-parsed characters. |
| 681 | */ |
| 682 | template <class Tgt> |
| 683 | Expected<Tgt, ConversionCode> str_to_integral(StringPiece* src) noexcept { |
| 684 | using UT = typename std::make_unsigned<Tgt>::type; |
| 685 | |
| 686 | auto b = src->data(), past = src->data() + src->size(); |
| 687 | |
| 688 | for (;; ++b) { |
| 689 | if (UNLIKELY(b >= past)) { |
| 690 | return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING); |
| 691 | } |
| 692 | if (!std::isspace(*b)) { |
| 693 | break; |
| 694 | } |
| 695 | } |
| 696 | |
| 697 | SignedValueHandler<Tgt> sgn; |
| 698 | auto err = sgn.init(b); |
| 699 | |
| 700 | if (UNLIKELY(err != ConversionCode::SUCCESS)) { |
| 701 | return makeUnexpected(err); |
| 702 | } |
| 703 | if (std::is_signed<Tgt>::value && UNLIKELY(b >= past)) { |
| 704 | return makeUnexpected(ConversionCode::NO_DIGITS); |
| 705 | } |
| 706 | if (UNLIKELY(!isdigit(*b))) { |
| 707 | return makeUnexpected(ConversionCode::NON_DIGIT_CHAR); |
| 708 | } |
| 709 | |
| 710 | auto m = findFirstNonDigit(b + 1, past); |
| 711 | |
| 712 | auto tmp = digits_to<UT>(b, m); |
| 713 | |
| 714 | if (UNLIKELY(!tmp.hasValue())) { |
| 715 | return makeUnexpected( |
| 716 | tmp.error() == ConversionCode::POSITIVE_OVERFLOW ? sgn.overflow() |
| 717 | : tmp.error()); |
| 718 | } |
| 719 | |
| 720 | auto res = sgn.finalize(tmp.value()); |
| 721 | |
| 722 | if (res.hasValue()) { |
| 723 | src->advance(size_t(m - src->data())); |
| 724 | } |
| 725 | |
| 726 | return res; |
| 727 | } |
| 728 | |
| 729 | template Expected<char, ConversionCode> str_to_integral<char>( |
| 730 | StringPiece* src) noexcept; |
| 731 | template Expected<signed char, ConversionCode> str_to_integral<signed char>( |
| 732 | StringPiece* src) noexcept; |
| 733 | template Expected<unsigned char, ConversionCode> str_to_integral<unsigned char>( |
| 734 | StringPiece* src) noexcept; |
| 735 | |
| 736 | template Expected<short, ConversionCode> str_to_integral<short>( |
| 737 | StringPiece* src) noexcept; |
| 738 | template Expected<unsigned short, ConversionCode> |
| 739 | str_to_integral<unsigned short>(StringPiece* src) noexcept; |
| 740 | |
| 741 | template Expected<int, ConversionCode> str_to_integral<int>( |
| 742 | StringPiece* src) noexcept; |
| 743 | template Expected<unsigned int, ConversionCode> str_to_integral<unsigned int>( |
| 744 | StringPiece* src) noexcept; |
| 745 | |
| 746 | template Expected<long, ConversionCode> str_to_integral<long>( |
| 747 | StringPiece* src) noexcept; |
| 748 | template Expected<unsigned long, ConversionCode> str_to_integral<unsigned long>( |
| 749 | StringPiece* src) noexcept; |
| 750 | |
| 751 | template Expected<long long, ConversionCode> str_to_integral<long long>( |
| 752 | StringPiece* src) noexcept; |
| 753 | template Expected<unsigned long long, ConversionCode> |
| 754 | str_to_integral<unsigned long long>(StringPiece* src) noexcept; |
| 755 | |
| 756 | #if FOLLY_HAVE_INT128_T |
| 757 | template Expected<__int128, ConversionCode> str_to_integral<__int128>( |
| 758 | StringPiece* src) noexcept; |
| 759 | template Expected<unsigned __int128, ConversionCode> |
| 760 | str_to_integral<unsigned __int128>(StringPiece* src) noexcept; |
| 761 | #endif |
| 762 | |
| 763 | } // namespace detail |
| 764 | |
| 765 | ConversionError makeConversionError(ConversionCode code, StringPiece input) { |
| 766 | using namespace detail; |
| 767 | static_assert( |
| 768 | std::is_unsigned<std::underlying_type<ConversionCode>::type>::value, |
| 769 | "ConversionCode should be unsigned" ); |
| 770 | assert((std::size_t)code < kErrorStrings.size()); |
| 771 | const ErrorString& err = kErrorStrings[(std::size_t)code]; |
| 772 | if (code == ConversionCode::EMPTY_INPUT_STRING && input.empty()) { |
| 773 | return {err.string, code}; |
| 774 | } |
| 775 | std::string tmp(err.string); |
| 776 | tmp.append(": " ); |
| 777 | if (err.quote) { |
| 778 | tmp.append(1, '"'); |
| 779 | } |
| 780 | if (input.size() > 0) { |
| 781 | tmp.append(input.data(), input.size()); |
| 782 | } |
| 783 | if (err.quote) { |
| 784 | tmp.append(1, '"'); |
| 785 | } |
| 786 | return {tmp, code}; |
| 787 | } |
| 788 | |
| 789 | } // namespace folly |
| 790 | |