1 | /* |
2 | * Copyright 2011-present Facebook, Inc. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. |
6 | * You may obtain a copy of the License at |
7 | * |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * |
10 | * Unless required by applicable law or agreed to in writing, software |
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | * See the License for the specific language governing permissions and |
14 | * limitations under the License. |
15 | */ |
16 | #include <folly/Conv.h> |
17 | #include <array> |
18 | |
19 | namespace folly { |
20 | namespace detail { |
21 | |
22 | namespace { |
23 | |
24 | /** |
25 | * Finds the first non-digit in a string. The number of digits |
26 | * searched depends on the precision of the Tgt integral. Assumes the |
27 | * string starts with NO whitespace and NO sign. |
28 | * |
29 | * The semantics of the routine is: |
30 | * for (;; ++b) { |
31 | * if (b >= e || !isdigit(*b)) return b; |
32 | * } |
33 | * |
34 | * Complete unrolling marks bottom-line (i.e. entire conversion) |
35 | * improvements of 20%. |
36 | */ |
37 | inline const char* findFirstNonDigit(const char* b, const char* e) { |
38 | for (; b < e; ++b) { |
39 | auto const c = static_cast<unsigned>(*b) - '0'; |
40 | if (c >= 10) { |
41 | break; |
42 | } |
43 | } |
44 | return b; |
45 | } |
46 | |
47 | // Maximum value of number when represented as a string |
48 | template <class T> |
49 | struct MaxString { |
50 | static const char* const value; |
51 | }; |
52 | |
53 | template <> |
54 | const char* const MaxString<uint8_t>::value = "255" ; |
55 | template <> |
56 | const char* const MaxString<uint16_t>::value = "65535" ; |
57 | template <> |
58 | const char* const MaxString<uint32_t>::value = "4294967295" ; |
59 | #if __SIZEOF_LONG__ == 4 |
60 | template <> |
61 | const char* const MaxString<unsigned long>::value = "4294967295" ; |
62 | #else |
63 | template <> |
64 | const char* const MaxString<unsigned long>::value = "18446744073709551615" ; |
65 | #endif |
66 | static_assert( |
67 | sizeof(unsigned long) >= 4, |
68 | "Wrong value for MaxString<unsigned long>::value," |
69 | " please update." ); |
70 | template <> |
71 | const char* const MaxString<unsigned long long>::value = "18446744073709551615" ; |
72 | static_assert( |
73 | sizeof(unsigned long long) >= 8, |
74 | "Wrong value for MaxString<unsigned long long>::value" |
75 | ", please update." ); |
76 | |
77 | #if FOLLY_HAVE_INT128_T |
78 | template <> |
79 | const char* const MaxString<__uint128_t>::value = |
80 | "340282366920938463463374607431768211455" ; |
81 | #endif |
82 | |
83 | /* |
84 | * Lookup tables that converts from a decimal character value to an integral |
85 | * binary value, shifted by a decimal "shift" multiplier. |
86 | * For all character values in the range '0'..'9', the table at those |
87 | * index locations returns the actual decimal value shifted by the multiplier. |
88 | * For all other values, the lookup table returns an invalid OOR value. |
89 | */ |
90 | // Out-of-range flag value, larger than the largest value that can fit in |
91 | // four decimal bytes (9999), but four of these added up together should |
92 | // still not overflow uint16_t. |
93 | constexpr int32_t OOR = 10000; |
94 | |
95 | alignas(16) constexpr uint16_t shift1[] = { |
96 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9 |
97 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10 |
98 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20 |
99 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 30 |
100 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, 0, 1, // 40 |
101 | 2, 3, 4, 5, 6, 7, 8, 9, OOR, OOR, |
102 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 60 |
103 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 70 |
104 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 80 |
105 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 90 |
106 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 100 |
107 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 110 |
108 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 120 |
109 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 130 |
110 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 140 |
111 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 150 |
112 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 160 |
113 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 170 |
114 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 180 |
115 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 190 |
116 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 200 |
117 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 210 |
118 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 220 |
119 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 230 |
120 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240 |
121 | OOR, OOR, OOR, OOR, OOR, OOR // 250 |
122 | }; |
123 | |
124 | alignas(16) constexpr uint16_t shift10[] = { |
125 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9 |
126 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10 |
127 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20 |
128 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 30 |
129 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, 0, 10, // 40 |
130 | 20, 30, 40, 50, 60, 70, 80, 90, OOR, OOR, |
131 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 60 |
132 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 70 |
133 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 80 |
134 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 90 |
135 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 100 |
136 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 110 |
137 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 120 |
138 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 130 |
139 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 140 |
140 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 150 |
141 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 160 |
142 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 170 |
143 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 180 |
144 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 190 |
145 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 200 |
146 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 210 |
147 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 220 |
148 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 230 |
149 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240 |
150 | OOR, OOR, OOR, OOR, OOR, OOR // 250 |
151 | }; |
152 | |
153 | alignas(16) constexpr uint16_t shift100[] = { |
154 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9 |
155 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10 |
156 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20 |
157 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 30 |
158 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, 0, 100, // 40 |
159 | 200, 300, 400, 500, 600, 700, 800, 900, OOR, OOR, |
160 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 60 |
161 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 70 |
162 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 80 |
163 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 90 |
164 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 100 |
165 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 110 |
166 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 120 |
167 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 130 |
168 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 140 |
169 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 150 |
170 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 160 |
171 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 170 |
172 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 180 |
173 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 190 |
174 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 200 |
175 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 210 |
176 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 220 |
177 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 230 |
178 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240 |
179 | OOR, OOR, OOR, OOR, OOR, OOR // 250 |
180 | }; |
181 | |
182 | alignas(16) constexpr uint16_t shift1000[] = { |
183 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9 |
184 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10 |
185 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20 |
186 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 30 |
187 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, 0, 1000, // 40 |
188 | 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, OOR, OOR, |
189 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 60 |
190 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 70 |
191 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 80 |
192 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 90 |
193 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 100 |
194 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 110 |
195 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 120 |
196 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 130 |
197 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 140 |
198 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 150 |
199 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 160 |
200 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 170 |
201 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 180 |
202 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 190 |
203 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 200 |
204 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 210 |
205 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 220 |
206 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 230 |
207 | OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240 |
208 | OOR, OOR, OOR, OOR, OOR, OOR // 250 |
209 | }; |
210 | |
211 | struct ErrorString { |
212 | const char* string; |
213 | bool quote; |
214 | }; |
215 | |
216 | // Keep this in sync with ConversionCode in Conv.h |
217 | constexpr const std::array< |
218 | ErrorString, |
219 | static_cast<std::size_t>(ConversionCode::NUM_ERROR_CODES)> |
220 | kErrorStrings{{ |
221 | {"Success" , true}, |
222 | {"Empty input string" , true}, |
223 | {"No digits found in input string" , true}, |
224 | {"Integer overflow when parsing bool (must be 0 or 1)" , true}, |
225 | {"Invalid value for bool" , true}, |
226 | {"Non-digit character found" , true}, |
227 | {"Invalid leading character" , true}, |
228 | {"Overflow during conversion" , true}, |
229 | {"Negative overflow during conversion" , true}, |
230 | {"Unable to convert string to floating point value" , true}, |
231 | {"Non-whitespace character found after end of conversion" , true}, |
232 | {"Overflow during arithmetic conversion" , false}, |
233 | {"Negative overflow during arithmetic conversion" , false}, |
234 | {"Loss of precision during arithmetic conversion" , false}, |
235 | }}; |
236 | |
237 | // Check if ASCII is really ASCII |
238 | using IsAscii = |
239 | bool_constant<'A' == 65 && 'Z' == 90 && 'a' == 97 && 'z' == 122>; |
240 | |
241 | // The code in this file that uses tolower() really only cares about |
242 | // 7-bit ASCII characters, so we can take a nice shortcut here. |
243 | inline char tolower_ascii(char in) { |
244 | return IsAscii::value ? in | 0x20 : char(std::tolower(in)); |
245 | } |
246 | |
247 | inline bool bool_str_cmp(const char** b, size_t len, const char* value) { |
248 | // Can't use strncasecmp, since we want to ensure that the full value matches |
249 | const char* p = *b; |
250 | const char* e = *b + len; |
251 | const char* v = value; |
252 | while (*v != '\0') { |
253 | if (p == e || tolower_ascii(*p) != *v) { // value is already lowercase |
254 | return false; |
255 | } |
256 | ++p; |
257 | ++v; |
258 | } |
259 | |
260 | *b = p; |
261 | return true; |
262 | } |
263 | |
264 | } // namespace |
265 | |
266 | Expected<bool, ConversionCode> str_to_bool(StringPiece* src) noexcept { |
267 | auto b = src->begin(), e = src->end(); |
268 | for (;; ++b) { |
269 | if (b >= e) { |
270 | return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING); |
271 | } |
272 | if (!std::isspace(*b)) { |
273 | break; |
274 | } |
275 | } |
276 | |
277 | bool result; |
278 | size_t len = size_t(e - b); |
279 | switch (*b) { |
280 | case '0': |
281 | case '1': { |
282 | result = false; |
283 | for (; b < e && isdigit(*b); ++b) { |
284 | if (result || (*b != '0' && *b != '1')) { |
285 | return makeUnexpected(ConversionCode::BOOL_OVERFLOW); |
286 | } |
287 | result = (*b == '1'); |
288 | } |
289 | break; |
290 | } |
291 | case 'y': |
292 | case 'Y': |
293 | result = true; |
294 | if (!bool_str_cmp(&b, len, "yes" )) { |
295 | ++b; // accept the single 'y' character |
296 | } |
297 | break; |
298 | case 'n': |
299 | case 'N': |
300 | result = false; |
301 | if (!bool_str_cmp(&b, len, "no" )) { |
302 | ++b; |
303 | } |
304 | break; |
305 | case 't': |
306 | case 'T': |
307 | result = true; |
308 | if (!bool_str_cmp(&b, len, "true" )) { |
309 | ++b; |
310 | } |
311 | break; |
312 | case 'f': |
313 | case 'F': |
314 | result = false; |
315 | if (!bool_str_cmp(&b, len, "false" )) { |
316 | ++b; |
317 | } |
318 | break; |
319 | case 'o': |
320 | case 'O': |
321 | if (bool_str_cmp(&b, len, "on" )) { |
322 | result = true; |
323 | } else if (bool_str_cmp(&b, len, "off" )) { |
324 | result = false; |
325 | } else { |
326 | return makeUnexpected(ConversionCode::BOOL_INVALID_VALUE); |
327 | } |
328 | break; |
329 | default: |
330 | return makeUnexpected(ConversionCode::BOOL_INVALID_VALUE); |
331 | } |
332 | |
333 | src->assign(b, e); |
334 | |
335 | return result; |
336 | } |
337 | |
338 | /** |
339 | * StringPiece to double, with progress information. Alters the |
340 | * StringPiece parameter to munch the already-parsed characters. |
341 | */ |
342 | template <class Tgt> |
343 | Expected<Tgt, ConversionCode> str_to_floating(StringPiece* src) noexcept { |
344 | using namespace double_conversion; |
345 | static StringToDoubleConverter conv( |
346 | StringToDoubleConverter::ALLOW_TRAILING_JUNK | |
347 | StringToDoubleConverter::ALLOW_LEADING_SPACES, |
348 | 0.0, |
349 | // return this for junk input string |
350 | std::numeric_limits<double>::quiet_NaN(), |
351 | nullptr, |
352 | nullptr); |
353 | |
354 | if (src->empty()) { |
355 | return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING); |
356 | } |
357 | |
358 | int length; |
359 | auto result = conv.StringToDouble( |
360 | src->data(), |
361 | static_cast<int>(src->size()), |
362 | &length); // processed char count |
363 | |
364 | if (!std::isnan(result)) { |
365 | // If we get here with length = 0, the input string is empty. |
366 | // If we get here with result = 0.0, it's either because the string |
367 | // contained only whitespace, or because we had an actual zero value |
368 | // (with potential trailing junk). If it was only whitespace, we |
369 | // want to raise an error; length will point past the last character |
370 | // that was processed, so we need to check if that character was |
371 | // whitespace or not. |
372 | if (length == 0 || |
373 | (result == 0.0 && std::isspace((*src)[size_t(length) - 1]))) { |
374 | return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING); |
375 | } |
376 | if (length >= 2) { |
377 | const char* suffix = src->data() + length - 1; |
378 | // double_conversion doesn't update length correctly when there is an |
379 | // incomplete exponent specifier. Converting "12e-f-g" shouldn't consume |
380 | // any more than "12", but it will consume "12e-". |
381 | |
382 | // "123-" should only parse "123" |
383 | if (*suffix == '-' || *suffix == '+') { |
384 | --suffix; |
385 | --length; |
386 | } |
387 | // "12e-f-g" or "12euro" should only parse "12" |
388 | if (*suffix == 'e' || *suffix == 'E') { |
389 | --length; |
390 | } |
391 | } |
392 | src->advance(size_t(length)); |
393 | return Tgt(result); |
394 | } |
395 | |
396 | auto* e = src->end(); |
397 | auto* b = |
398 | std::find_if_not(src->begin(), e, [](char c) { return std::isspace(c); }); |
399 | |
400 | // There must be non-whitespace, otherwise we would have caught this above |
401 | assert(b < e); |
402 | size_t size = size_t(e - b); |
403 | |
404 | bool negative = false; |
405 | if (*b == '-') { |
406 | negative = true; |
407 | ++b; |
408 | --size; |
409 | } |
410 | |
411 | result = 0.0; |
412 | |
413 | switch (tolower_ascii(*b)) { |
414 | case 'i': |
415 | if (size >= 3 && tolower_ascii(b[1]) == 'n' && |
416 | tolower_ascii(b[2]) == 'f') { |
417 | if (size >= 8 && tolower_ascii(b[3]) == 'i' && |
418 | tolower_ascii(b[4]) == 'n' && tolower_ascii(b[5]) == 'i' && |
419 | tolower_ascii(b[6]) == 't' && tolower_ascii(b[7]) == 'y') { |
420 | b += 8; |
421 | } else { |
422 | b += 3; |
423 | } |
424 | result = std::numeric_limits<Tgt>::infinity(); |
425 | } |
426 | break; |
427 | |
428 | case 'n': |
429 | if (size >= 3 && tolower_ascii(b[1]) == 'a' && |
430 | tolower_ascii(b[2]) == 'n') { |
431 | b += 3; |
432 | result = std::numeric_limits<Tgt>::quiet_NaN(); |
433 | } |
434 | break; |
435 | |
436 | default: |
437 | break; |
438 | } |
439 | |
440 | if (result == 0.0) { |
441 | // All bets are off |
442 | return makeUnexpected(ConversionCode::STRING_TO_FLOAT_ERROR); |
443 | } |
444 | |
445 | if (negative) { |
446 | result = -result; |
447 | } |
448 | |
449 | src->assign(b, e); |
450 | |
451 | return Tgt(result); |
452 | } |
453 | |
454 | template Expected<float, ConversionCode> str_to_floating<float>( |
455 | StringPiece* src) noexcept; |
456 | template Expected<double, ConversionCode> str_to_floating<double>( |
457 | StringPiece* src) noexcept; |
458 | |
459 | /** |
460 | * This class takes care of additional processing needed for signed values, |
461 | * like leading sign character and overflow checks. |
462 | */ |
463 | template <typename T, bool IsSigned = std::is_signed<T>::value> |
464 | class SignedValueHandler; |
465 | |
466 | template <typename T> |
467 | class SignedValueHandler<T, true> { |
468 | public: |
469 | ConversionCode init(const char*& b) { |
470 | negative_ = false; |
471 | if (!std::isdigit(*b)) { |
472 | if (*b == '-') { |
473 | negative_ = true; |
474 | } else if (UNLIKELY(*b != '+')) { |
475 | return ConversionCode::INVALID_LEADING_CHAR; |
476 | } |
477 | ++b; |
478 | } |
479 | return ConversionCode::SUCCESS; |
480 | } |
481 | |
482 | ConversionCode overflow() { |
483 | return negative_ ? ConversionCode::NEGATIVE_OVERFLOW |
484 | : ConversionCode::POSITIVE_OVERFLOW; |
485 | } |
486 | |
487 | template <typename U> |
488 | Expected<T, ConversionCode> finalize(U value) { |
489 | T rv; |
490 | if (negative_) { |
491 | rv = T(-value); |
492 | if (UNLIKELY(rv > 0)) { |
493 | return makeUnexpected(ConversionCode::NEGATIVE_OVERFLOW); |
494 | } |
495 | } else { |
496 | rv = T(value); |
497 | if (UNLIKELY(rv < 0)) { |
498 | return makeUnexpected(ConversionCode::POSITIVE_OVERFLOW); |
499 | } |
500 | } |
501 | return rv; |
502 | } |
503 | |
504 | private: |
505 | bool negative_; |
506 | }; |
507 | |
508 | // For unsigned types, we don't need any extra processing |
509 | template <typename T> |
510 | class SignedValueHandler<T, false> { |
511 | public: |
512 | ConversionCode init(const char*&) { |
513 | return ConversionCode::SUCCESS; |
514 | } |
515 | |
516 | ConversionCode overflow() { |
517 | return ConversionCode::POSITIVE_OVERFLOW; |
518 | } |
519 | |
520 | Expected<T, ConversionCode> finalize(T value) { |
521 | return value; |
522 | } |
523 | }; |
524 | |
525 | /** |
526 | * String represented as a pair of pointers to char to signed/unsigned |
527 | * integrals. Assumes NO whitespace before or after, and also that the |
528 | * string is composed entirely of digits (and an optional sign only for |
529 | * signed types). String may be empty, in which case digits_to returns |
530 | * an appropriate error. |
531 | */ |
532 | template <class Tgt> |
533 | inline Expected<Tgt, ConversionCode> digits_to( |
534 | const char* b, |
535 | const char* const e) noexcept { |
536 | using UT = typename std::make_unsigned<Tgt>::type; |
537 | assert(b <= e); |
538 | |
539 | SignedValueHandler<Tgt> sgn; |
540 | |
541 | auto err = sgn.init(b); |
542 | if (UNLIKELY(err != ConversionCode::SUCCESS)) { |
543 | return makeUnexpected(err); |
544 | } |
545 | |
546 | size_t size = size_t(e - b); |
547 | |
548 | /* Although the string is entirely made of digits, we still need to |
549 | * check for overflow. |
550 | */ |
551 | if (size > std::numeric_limits<UT>::digits10) { |
552 | // Leading zeros? |
553 | if (b < e && *b == '0') { |
554 | for (++b;; ++b) { |
555 | if (b == e) { |
556 | return Tgt(0); // just zeros, e.g. "0000" |
557 | } |
558 | if (*b != '0') { |
559 | size = size_t(e - b); |
560 | break; |
561 | } |
562 | } |
563 | } |
564 | if (size > std::numeric_limits<UT>::digits10 && |
565 | (size != std::numeric_limits<UT>::digits10 + 1 || |
566 | strncmp(b, MaxString<UT>::value, size) > 0)) { |
567 | return makeUnexpected(sgn.overflow()); |
568 | } |
569 | } |
570 | |
571 | // Here we know that the number won't overflow when |
572 | // converted. Proceed without checks. |
573 | |
574 | UT result = 0; |
575 | |
576 | for (; e - b >= 4; b += 4) { |
577 | result *= static_cast<UT>(10000); |
578 | const int32_t r0 = shift1000[static_cast<size_t>(b[0])]; |
579 | const int32_t r1 = shift100[static_cast<size_t>(b[1])]; |
580 | const int32_t r2 = shift10[static_cast<size_t>(b[2])]; |
581 | const int32_t r3 = shift1[static_cast<size_t>(b[3])]; |
582 | const auto sum = r0 + r1 + r2 + r3; |
583 | if (sum >= OOR) { |
584 | goto outOfRange; |
585 | } |
586 | result += UT(sum); |
587 | } |
588 | |
589 | switch (e - b) { |
590 | case 3: { |
591 | const int32_t r0 = shift100[static_cast<size_t>(b[0])]; |
592 | const int32_t r1 = shift10[static_cast<size_t>(b[1])]; |
593 | const int32_t r2 = shift1[static_cast<size_t>(b[2])]; |
594 | const auto sum = r0 + r1 + r2; |
595 | if (sum >= OOR) { |
596 | goto outOfRange; |
597 | } |
598 | result = UT(1000 * result + sum); |
599 | break; |
600 | } |
601 | case 2: { |
602 | const int32_t r0 = shift10[static_cast<size_t>(b[0])]; |
603 | const int32_t r1 = shift1[static_cast<size_t>(b[1])]; |
604 | const auto sum = r0 + r1; |
605 | if (sum >= OOR) { |
606 | goto outOfRange; |
607 | } |
608 | result = UT(100 * result + sum); |
609 | break; |
610 | } |
611 | case 1: { |
612 | const int32_t sum = shift1[static_cast<size_t>(b[0])]; |
613 | if (sum >= OOR) { |
614 | goto outOfRange; |
615 | } |
616 | result = UT(10 * result + sum); |
617 | break; |
618 | } |
619 | default: |
620 | assert(b == e); |
621 | if (size == 0) { |
622 | return makeUnexpected(ConversionCode::NO_DIGITS); |
623 | } |
624 | break; |
625 | } |
626 | |
627 | return sgn.finalize(result); |
628 | |
629 | outOfRange: |
630 | return makeUnexpected(ConversionCode::NON_DIGIT_CHAR); |
631 | } |
632 | |
633 | template Expected<char, ConversionCode> digits_to<char>( |
634 | const char*, |
635 | const char*) noexcept; |
636 | template Expected<signed char, ConversionCode> digits_to<signed char>( |
637 | const char*, |
638 | const char*) noexcept; |
639 | template Expected<unsigned char, ConversionCode> digits_to<unsigned char>( |
640 | const char*, |
641 | const char*) noexcept; |
642 | |
643 | template Expected<short, ConversionCode> digits_to<short>( |
644 | const char*, |
645 | const char*) noexcept; |
646 | template Expected<unsigned short, ConversionCode> digits_to<unsigned short>( |
647 | const char*, |
648 | const char*) noexcept; |
649 | |
650 | template Expected<int, ConversionCode> digits_to<int>( |
651 | const char*, |
652 | const char*) noexcept; |
653 | template Expected<unsigned int, ConversionCode> digits_to<unsigned int>( |
654 | const char*, |
655 | const char*) noexcept; |
656 | |
657 | template Expected<long, ConversionCode> digits_to<long>( |
658 | const char*, |
659 | const char*) noexcept; |
660 | template Expected<unsigned long, ConversionCode> digits_to<unsigned long>( |
661 | const char*, |
662 | const char*) noexcept; |
663 | |
664 | template Expected<long long, ConversionCode> digits_to<long long>( |
665 | const char*, |
666 | const char*) noexcept; |
667 | template Expected<unsigned long long, ConversionCode> |
668 | digits_to<unsigned long long>(const char*, const char*) noexcept; |
669 | |
670 | #if FOLLY_HAVE_INT128_T |
671 | template Expected<__int128, ConversionCode> digits_to<__int128>( |
672 | const char*, |
673 | const char*) noexcept; |
674 | template Expected<unsigned __int128, ConversionCode> |
675 | digits_to<unsigned __int128>(const char*, const char*) noexcept; |
676 | #endif |
677 | |
678 | /** |
679 | * StringPiece to integrals, with progress information. Alters the |
680 | * StringPiece parameter to munch the already-parsed characters. |
681 | */ |
682 | template <class Tgt> |
683 | Expected<Tgt, ConversionCode> str_to_integral(StringPiece* src) noexcept { |
684 | using UT = typename std::make_unsigned<Tgt>::type; |
685 | |
686 | auto b = src->data(), past = src->data() + src->size(); |
687 | |
688 | for (;; ++b) { |
689 | if (UNLIKELY(b >= past)) { |
690 | return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING); |
691 | } |
692 | if (!std::isspace(*b)) { |
693 | break; |
694 | } |
695 | } |
696 | |
697 | SignedValueHandler<Tgt> sgn; |
698 | auto err = sgn.init(b); |
699 | |
700 | if (UNLIKELY(err != ConversionCode::SUCCESS)) { |
701 | return makeUnexpected(err); |
702 | } |
703 | if (std::is_signed<Tgt>::value && UNLIKELY(b >= past)) { |
704 | return makeUnexpected(ConversionCode::NO_DIGITS); |
705 | } |
706 | if (UNLIKELY(!isdigit(*b))) { |
707 | return makeUnexpected(ConversionCode::NON_DIGIT_CHAR); |
708 | } |
709 | |
710 | auto m = findFirstNonDigit(b + 1, past); |
711 | |
712 | auto tmp = digits_to<UT>(b, m); |
713 | |
714 | if (UNLIKELY(!tmp.hasValue())) { |
715 | return makeUnexpected( |
716 | tmp.error() == ConversionCode::POSITIVE_OVERFLOW ? sgn.overflow() |
717 | : tmp.error()); |
718 | } |
719 | |
720 | auto res = sgn.finalize(tmp.value()); |
721 | |
722 | if (res.hasValue()) { |
723 | src->advance(size_t(m - src->data())); |
724 | } |
725 | |
726 | return res; |
727 | } |
728 | |
729 | template Expected<char, ConversionCode> str_to_integral<char>( |
730 | StringPiece* src) noexcept; |
731 | template Expected<signed char, ConversionCode> str_to_integral<signed char>( |
732 | StringPiece* src) noexcept; |
733 | template Expected<unsigned char, ConversionCode> str_to_integral<unsigned char>( |
734 | StringPiece* src) noexcept; |
735 | |
736 | template Expected<short, ConversionCode> str_to_integral<short>( |
737 | StringPiece* src) noexcept; |
738 | template Expected<unsigned short, ConversionCode> |
739 | str_to_integral<unsigned short>(StringPiece* src) noexcept; |
740 | |
741 | template Expected<int, ConversionCode> str_to_integral<int>( |
742 | StringPiece* src) noexcept; |
743 | template Expected<unsigned int, ConversionCode> str_to_integral<unsigned int>( |
744 | StringPiece* src) noexcept; |
745 | |
746 | template Expected<long, ConversionCode> str_to_integral<long>( |
747 | StringPiece* src) noexcept; |
748 | template Expected<unsigned long, ConversionCode> str_to_integral<unsigned long>( |
749 | StringPiece* src) noexcept; |
750 | |
751 | template Expected<long long, ConversionCode> str_to_integral<long long>( |
752 | StringPiece* src) noexcept; |
753 | template Expected<unsigned long long, ConversionCode> |
754 | str_to_integral<unsigned long long>(StringPiece* src) noexcept; |
755 | |
756 | #if FOLLY_HAVE_INT128_T |
757 | template Expected<__int128, ConversionCode> str_to_integral<__int128>( |
758 | StringPiece* src) noexcept; |
759 | template Expected<unsigned __int128, ConversionCode> |
760 | str_to_integral<unsigned __int128>(StringPiece* src) noexcept; |
761 | #endif |
762 | |
763 | } // namespace detail |
764 | |
765 | ConversionError makeConversionError(ConversionCode code, StringPiece input) { |
766 | using namespace detail; |
767 | static_assert( |
768 | std::is_unsigned<std::underlying_type<ConversionCode>::type>::value, |
769 | "ConversionCode should be unsigned" ); |
770 | assert((std::size_t)code < kErrorStrings.size()); |
771 | const ErrorString& err = kErrorStrings[(std::size_t)code]; |
772 | if (code == ConversionCode::EMPTY_INPUT_STRING && input.empty()) { |
773 | return {err.string, code}; |
774 | } |
775 | std::string tmp(err.string); |
776 | tmp.append(": " ); |
777 | if (err.quote) { |
778 | tmp.append(1, '"'); |
779 | } |
780 | if (input.size() > 0) { |
781 | tmp.append(input.data(), input.size()); |
782 | } |
783 | if (err.quote) { |
784 | tmp.append(1, '"'); |
785 | } |
786 | return {tmp, code}; |
787 | } |
788 | |
789 | } // namespace folly |
790 | |