Conv.cpp source code [folly/Conv.cpp]

1	/*
2	* Copyright 2011-present Facebook, Inc.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16	#include <folly/Conv.h>
17	#include <array>
18
19	namespace folly {
20	namespace detail {
21
22	namespace {
23
24	/**
25	* Finds the first non-digit in a string. The number of digits
26	* searched depends on the precision of the Tgt integral. Assumes the
27	* string starts with NO whitespace and NO sign.
28	*
29	* The semantics of the routine is:
30	* for (;; ++b) {
31	* if (b >= e \|\| !isdigit(*b)) return b;
32	* }
33	*
34	* Complete unrolling marks bottom-line (i.e. entire conversion)
35	* improvements of 20%.
36	*/
37	inline const char* findFirstNonDigit(const char* b, const char* e) {
38	for (; b < e; ++b) {
39	auto const c = static_cast<unsigned>(*b) - `'0'`;
40	if (c >= `10`) {
41	break;
42	}
43	}
44	return b;
45	}
46
47	// Maximum value of number when represented as a string
48	template <class T>
49	struct MaxString {
50	static const char* const value;
51	};
52
53	template <>
54	const char* const MaxString<uint8_t>::value = "255";
55	template <>
56	const char* const MaxString<uint16_t>::value = "65535";
57	template <>
58	const char* const MaxString<uint32_t>::value = "4294967295";
59	#if __SIZEOF_LONG__ == 4
60	template <>
61	const char* const MaxString<unsigned long>::value = "4294967295";
62	#else
63	template <>
64	const char* const MaxString<unsigned long>::value = "18446744073709551615";
65	#endif
66	static_assert(
67	sizeof(unsigned long) >= `4`,
68	"Wrong value for MaxString<unsigned long>::value,"
69	" please update.");
70	template <>
71	const char* const MaxString<unsigned long long>::value = "18446744073709551615";
72	static_assert(
73	sizeof(unsigned long long) >= `8`,
74	"Wrong value for MaxString<unsigned long long>::value"
75	", please update.");
76
77	#if FOLLY_HAVE_INT128_T
78	template <>
79	const char* const MaxString<__uint128_t>::value =
80	"340282366920938463463374607431768211455";
81	#endif
82
83	/*
84	* Lookup tables that converts from a decimal character value to an integral
85	* binary value, shifted by a decimal "shift" multiplier.
86	* For all character values in the range '0'..'9', the table at those
87	* index locations returns the actual decimal value shifted by the multiplier.
88	* For all other values, the lookup table returns an invalid OOR value.
89	*/
90	// Out-of-range flag value, larger than the largest value that can fit in
91	// four decimal bytes (9999), but four of these added up together should
92	// still not overflow uint16_t.
93	constexpr int32_t OOR = `10000`;
94
95	alignas(`16`) constexpr uint16_t shift1[] = {
96	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9
97	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10
98	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20
99	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 30
100	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, `0`, `1`, // 40
101	`2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, OOR, OOR,
102	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 60
103	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 70
104	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 80
105	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 90
106	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 100
107	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 110
108	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 120
109	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 130
110	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 140
111	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 150
112	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 160
113	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 170
114	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 180
115	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 190
116	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 200
117	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 210
118	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 220
119	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 230
120	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240
121	OOR, OOR, OOR, OOR, OOR, OOR // 250
122	};
123
124	alignas(`16`) constexpr uint16_t shift10[] = {
125	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9
126	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10
127	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20
128	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 30
129	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, `0`, `10`, // 40
130	`20`, `30`, `40`, `50`, `60`, `70`, `80`, `90`, OOR, OOR,
131	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 60
132	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 70
133	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 80
134	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 90
135	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 100
136	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 110
137	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 120
138	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 130
139	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 140
140	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 150
141	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 160
142	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 170
143	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 180
144	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 190
145	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 200
146	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 210
147	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 220
148	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 230
149	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240
150	OOR, OOR, OOR, OOR, OOR, OOR // 250
151	};
152
153	alignas(`16`) constexpr uint16_t shift100[] = {
154	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9
155	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10
156	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20
157	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 30
158	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, `0`, `100`, // 40
159	`200`, `300`, `400`, `500`, `600`, `700`, `800`, `900`, OOR, OOR,
160	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 60
161	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 70
162	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 80
163	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 90
164	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 100
165	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 110
166	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 120
167	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 130
168	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 140
169	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 150
170	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 160
171	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 170
172	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 180
173	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 190
174	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 200
175	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 210
176	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 220
177	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 230
178	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240
179	OOR, OOR, OOR, OOR, OOR, OOR // 250
180	};
181
182	alignas(`16`) constexpr uint16_t shift1000[] = {
183	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9
184	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10
185	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20
186	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 30
187	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, `0`, `1000`, // 40
188	`2000`, `3000`, `4000`, `5000`, `6000`, `7000`, `8000`, `9000`, OOR, OOR,
189	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 60
190	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 70
191	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 80
192	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 90
193	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 100
194	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 110
195	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 120
196	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 130
197	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 140
198	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 150
199	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 160
200	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 170
201	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 180
202	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 190
203	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 200
204	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 210
205	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 220
206	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 230
207	OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 240
208	OOR, OOR, OOR, OOR, OOR, OOR // 250
209	};
210
211	struct ErrorString {
212	const char* string;
213	bool quote;
214	};
215
216	// Keep this in sync with ConversionCode in Conv.h
217	constexpr const std::array<
218	ErrorString,
219	static_cast<std::size_t>(ConversionCode::NUM_ERROR_CODES)>
220	kErrorStrings{{
221	{"Success", true},
222	{"Empty input string", true},
223	{"No digits found in input string", true},
224	{"Integer overflow when parsing bool (must be 0 or 1)", true},
225	{"Invalid value for bool", true},
226	{"Non-digit character found", true},
227	{"Invalid leading character", true},
228	{"Overflow during conversion", true},
229	{"Negative overflow during conversion", true},
230	{"Unable to convert string to floating point value", true},
231	{"Non-whitespace character found after end of conversion", true},
232	{"Overflow during arithmetic conversion", false},
233	{"Negative overflow during arithmetic conversion", false},
234	{"Loss of precision during arithmetic conversion", false},
235	}};
236
237	// Check if ASCII is really ASCII
238	using IsAscii =
239	bool_constant<`'A'` == `65` && `'Z'` == `90` && `'a'` == `97` && `'z'` == `122`>;
240
241	// The code in this file that uses tolower() really only cares about
242	// 7-bit ASCII characters, so we can take a nice shortcut here.
243	inline char tolower_ascii(char in) {
244	return IsAscii::value ? in \| `0x20` : char(std::tolower(in));
245	}
246
247	inline bool bool_str_cmp(const char** b, size_t len, const char* value) {
248	// Can't use strncasecmp, since we want to ensure that the full value matches
249	const char* p = *b;
250	const char* e = *b + len;
251	const char* v = value;
252	while (*v != `'\0'`) {
253	if (p == e \|\| tolower_ascii(p) != v) { // value is already lowercase
254	return false;
255	}
256	++p;
257	++v;
258	}
259
260	*b = p;
261	return true;
262	}
263
264	} // namespace
265
266	Expected<bool, ConversionCode> str_to_bool(StringPiece* src) noexcept {
267	auto b = src->begin(), e = src->end();
268	for (;; ++b) {
269	if (b >= e) {
270	return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING);
271	}
272	if (!std::isspace(*b)) {
273	break;
274	}
275	}
276
277	bool result;
278	size_t len = size_t(e - b);
279	switch (*b) {
280	case `'0'`:
281	case `'1'`: {
282	result = false;
283	for (; b < e && isdigit(*b); ++b) {
284	if (result \|\| (b != `'0'` && b != `'1'`)) {
285	return makeUnexpected(ConversionCode::BOOL_OVERFLOW);
286	}
287	result = (*b == `'1'`);
288	}
289	break;
290	}
291	case `'y'`:
292	case `'Y'`:
293	result = true;
294	if (!bool_str_cmp(&b, len, "yes")) {
295	++b; // accept the single 'y' character
296	}
297	break;
298	case `'n'`:
299	case `'N'`:
300	result = false;
301	if (!bool_str_cmp(&b, len, "no")) {
302	++b;
303	}
304	break;
305	case `'t'`:
306	case `'T'`:
307	result = true;
308	if (!bool_str_cmp(&b, len, "true")) {
309	++b;
310	}
311	break;
312	case `'f'`:
313	case `'F'`:
314	result = false;
315	if (!bool_str_cmp(&b, len, "false")) {
316	++b;
317	}
318	break;
319	case `'o'`:
320	case `'O'`:
321	if (bool_str_cmp(&b, len, "on")) {
322	result = true;
323	} else if (bool_str_cmp(&b, len, "off")) {
324	result = false;
325	} else {
326	return makeUnexpected(ConversionCode::BOOL_INVALID_VALUE);
327	}
328	break;
329	default:
330	return makeUnexpected(ConversionCode::BOOL_INVALID_VALUE);
331	}
332
333	src->assign(b, e);
334
335	return result;
336	}
337
338	/**
339	* StringPiece to double, with progress information. Alters the
340	* StringPiece parameter to munch the already-parsed characters.
341	*/
342	template <class Tgt>
343	Expected<Tgt, ConversionCode> str_to_floating(StringPiece* src) noexcept {
344	using namespace double_conversion;
345	static StringToDoubleConverter conv(
346	StringToDoubleConverter::ALLOW_TRAILING_JUNK \|
347	StringToDoubleConverter::ALLOW_LEADING_SPACES,
348	`0.0`,
349	// return this for junk input string
350	std::numeric_limits<double>::quiet_NaN(),
351	nullptr,
352	nullptr);
353
354	if (src->empty()) {
355	return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING);
356	}
357
358	int length;
359	auto result = conv.StringToDouble(
360	src->data(),
361	static_cast<int>(src->size()),
362	&length); // processed char count
363
364	if (!std::isnan(result)) {
365	// If we get here with length = 0, the input string is empty.
366	// If we get here with result = 0.0, it's either because the string
367	// contained only whitespace, or because we had an actual zero value
368	// (with potential trailing junk). If it was only whitespace, we
369	// want to raise an error; length will point past the last character
370	// that was processed, so we need to check if that character was
371	// whitespace or not.
372	if (length == `0` \|\|
373	(result == `0.0` && std::isspace((*src)[size_t(length) - `1`]))) {
374	return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING);
375	}
376	if (length >= `2`) {
377	const char* suffix = src->data() + length - `1`;
378	// double_conversion doesn't update length correctly when there is an
379	// incomplete exponent specifier. Converting "12e-f-g" shouldn't consume
380	// any more than "12", but it will consume "12e-".
381
382	// "123-" should only parse "123"
383	if (suffix == `'-'` \|\| suffix == `'+'`) {
384	--suffix;
385	--length;
386	}
387	// "12e-f-g" or "12euro" should only parse "12"
388	if (suffix == `'e'` \|\| suffix == `'E'`) {
389	--length;
390	}
391	}
392	src->advance(size_t(length));
393	return Tgt(result);
394	}
395
396	auto* e = src->end();
397	auto* b =
398	std::find_if_not(src->begin(), e, [](char c) { return std::isspace(c); });
399
400	// There must be non-whitespace, otherwise we would have caught this above
401	assert(b < e);
402	size_t size = size_t(e - b);
403
404	bool negative = false;
405	if (*b == `'-'`) {
406	negative = true;
407	++b;
408	--size;
409	}
410
411	result = `0.0`;
412
413	switch (tolower_ascii(*b)) {
414	case `'i'`:
415	if (size >= `3` && tolower_ascii(b[`1`]) == `'n'` &&
416	tolower_ascii(b[`2`]) == `'f'`) {
417	if (size >= `8` && tolower_ascii(b[`3`]) == `'i'` &&
418	tolower_ascii(b[`4`]) == `'n'` && tolower_ascii(b[`5`]) == `'i'` &&
419	tolower_ascii(b[`6`]) == `'t'` && tolower_ascii(b[`7`]) == `'y'`) {
420	b += `8`;
421	} else {
422	b += `3`;
423	}
424	result = std::numeric_limits<Tgt>::infinity();
425	}
426	break;
427
428	case `'n'`:
429	if (size >= `3` && tolower_ascii(b[`1`]) == `'a'` &&
430	tolower_ascii(b[`2`]) == `'n'`) {
431	b += `3`;
432	result = std::numeric_limits<Tgt>::quiet_NaN();
433	}
434	break;
435
436	default:
437	break;
438	}
439
440	if (result == `0.0`) {
441	// All bets are off
442	return makeUnexpected(ConversionCode::STRING_TO_FLOAT_ERROR);
443	}
444
445	if (negative) {
446	result = -result;
447	}
448
449	src->assign(b, e);
450
451	return Tgt(result);
452	}
453
454	template Expected<float, ConversionCode> str_to_floating<float>(
455	StringPiece* src) noexcept;
456	template Expected<double, ConversionCode> str_to_floating<double>(
457	StringPiece* src) noexcept;
458
459	/**
460	* This class takes care of additional processing needed for signed values,
461	* like leading sign character and overflow checks.
462	*/
463	template <typename T, bool IsSigned = std::is_signed<T>::value>
464	class SignedValueHandler;
465
466	template <typename T>
467	class SignedValueHandler<T, true> {
468	public:
469	ConversionCode init(const char*& b) {
470	negative_ = false;
471	if (!std::isdigit(*b)) {
472	if (*b == `'-'`) {
473	negative_ = true;
474	} else if (UNLIKELY(*b != `'+'`)) {
475	return ConversionCode::INVALID_LEADING_CHAR;
476	}
477	++b;
478	}
479	return ConversionCode::SUCCESS;
480	}
481
482	ConversionCode overflow() {
483	return negative_ ? ConversionCode::NEGATIVE_OVERFLOW
484	: ConversionCode::POSITIVE_OVERFLOW;
485	}
486
487	template <typename U>
488	Expected<T, ConversionCode> finalize(U value) {
489	T rv;
490	if (negative_) {
491	rv = T(-value);
492	if (UNLIKELY(rv > `0`)) {
493	return makeUnexpected(ConversionCode::NEGATIVE_OVERFLOW);
494	}
495	} else {
496	rv = T(value);
497	if (UNLIKELY(rv < `0`)) {
498	return makeUnexpected(ConversionCode::POSITIVE_OVERFLOW);
499	}
500	}
501	return rv;
502	}
503
504	private:
505	bool negative_;
506	};
507
508	// For unsigned types, we don't need any extra processing
509	template <typename T>
510	class SignedValueHandler<T, false> {
511	public:
512	ConversionCode init(const char*&) {
513	return ConversionCode::SUCCESS;
514	}
515
516	ConversionCode overflow() {
517	return ConversionCode::POSITIVE_OVERFLOW;
518	}
519
520	Expected<T, ConversionCode> finalize(T value) {
521	return value;
522	}
523	};
524
525	/**
526	* String represented as a pair of pointers to char to signed/unsigned
527	* integrals. Assumes NO whitespace before or after, and also that the
528	* string is composed entirely of digits (and an optional sign only for
529	* signed types). String may be empty, in which case digits_to returns
530	* an appropriate error.
531	*/
532	template <class Tgt>
533	inline Expected<Tgt, ConversionCode> digits_to(
534	const char* b,
535	const char* const e) noexcept {
536	using UT = typename std::make_unsigned<Tgt>::type;
537	assert(b <= e);
538
539	SignedValueHandler<Tgt> sgn;
540
541	auto err = sgn.init(b);
542	if (UNLIKELY(err != ConversionCode::SUCCESS)) {
543	return makeUnexpected(err);
544	}
545
546	size_t size = size_t(e - b);
547
548	/ Although the string is entirely made of digits, we still need to*
549	* check for overflow.
550	*/
551	if (size > std::numeric_limits<UT>::digits10) {
552	// Leading zeros?
553	if (b < e && *b == `'0'`) {
554	for (++b;; ++b) {
555	if (b == e) {
556	return Tgt(`0`); // just zeros, e.g. "0000"
557	}
558	if (*b != `'0'`) {
559	size = size_t(e - b);
560	break;
561	}
562	}
563	}
564	if (size > std::numeric_limits<UT>::digits10 &&
565	(size != std::numeric_limits<UT>::digits10 + `1` \|\|
566	strncmp(b, MaxString<UT>::value, size) > `0`)) {
567	return makeUnexpected(sgn.overflow());
568	}
569	}
570
571	// Here we know that the number won't overflow when
572	// converted. Proceed without checks.
573
574	UT result = `0`;
575
576	for (; e - b >= `4`; b += `4`) {
577	result = static_cast*<UT>(`10000`);
578	const int32_t r0 = shift1000[static_cast<size_t>(b[`0`])];
579	const int32_t r1 = shift100[static_cast<size_t>(b[`1`])];
580	const int32_t r2 = shift10[static_cast<size_t>(b[`2`])];
581	const int32_t r3 = shift1[static_cast<size_t>(b[`3`])];
582	const auto sum = r0 + r1 + r2 + r3;
583	if (sum >= OOR) {
584	goto outOfRange;
585	}
586	result += UT(sum);
587	}
588
589	switch (e - b) {
590	case `3`: {
591	const int32_t r0 = shift100[static_cast<size_t>(b[`0`])];
592	const int32_t r1 = shift10[static_cast<size_t>(b[`1`])];
593	const int32_t r2 = shift1[static_cast<size_t>(b[`2`])];
594	const auto sum = r0 + r1 + r2;
595	if (sum >= OOR) {
596	goto outOfRange;
597	}
598	result = UT(`1000` * result + sum);
599	break;
600	}
601	case `2`: {
602	const int32_t r0 = shift10[static_cast<size_t>(b[`0`])];
603	const int32_t r1 = shift1[static_cast<size_t>(b[`1`])];
604	const auto sum = r0 + r1;
605	if (sum >= OOR) {
606	goto outOfRange;
607	}
608	result = UT(`100` * result + sum);
609	break;
610	}
611	case `1`: {
612	const int32_t sum = shift1[static_cast<size_t>(b[`0`])];
613	if (sum >= OOR) {
614	goto outOfRange;
615	}
616	result = UT(`10` * result + sum);
617	break;
618	}
619	default:
620	assert(b == e);
621	if (size == `0`) {
622	return makeUnexpected(ConversionCode::NO_DIGITS);
623	}
624	break;
625	}
626
627	return sgn.finalize(result);
628
629	outOfRange:
630	return makeUnexpected(ConversionCode::NON_DIGIT_CHAR);
631	}
632
633	template Expected<char, ConversionCode> digits_to<char>(
634	const char*,
635	const char) noexcept*;
636	template Expected<signed char, ConversionCode> digits_to<signed char>(
637	const char*,
638	const char) noexcept*;
639	template Expected<unsigned char, ConversionCode> digits_to<unsigned char>(
640	const char*,
641	const char) noexcept*;
642
643	template Expected<short, ConversionCode> digits_to<short>(
644	const char*,
645	const char) noexcept*;
646	template Expected<unsigned short, ConversionCode> digits_to<unsigned short>(
647	const char*,
648	const char) noexcept*;
649
650	template Expected<int, ConversionCode> digits_to<int>(
651	const char*,
652	const char) noexcept*;
653	template Expected<unsigned int, ConversionCode> digits_to<unsigned int>(
654	const char*,
655	const char) noexcept*;
656
657	template Expected<long, ConversionCode> digits_to<long>(
658	const char*,
659	const char) noexcept*;
660	template Expected<unsigned long, ConversionCode> digits_to<unsigned long>(
661	const char*,
662	const char) noexcept*;
663
664	template Expected<long long, ConversionCode> digits_to<long long>(
665	const char*,
666	const char) noexcept*;
667	template Expected<unsigned long long, ConversionCode>
668	digits_to<unsigned long long>(const char, const* char) noexcept*;
669
670	#if FOLLY_HAVE_INT128_T
671	template Expected<__int128, ConversionCode> digits_to<__int128>(
672	const char*,
673	const char) noexcept*;
674	template Expected<unsigned __int128, ConversionCode>
675	digits_to<unsigned __int128>(const char, const* char) noexcept*;
676	#endif
677
678	/**
679	* StringPiece to integrals, with progress information. Alters the
680	* StringPiece parameter to munch the already-parsed characters.
681	*/
682	template <class Tgt>
683	Expected<Tgt, ConversionCode> str_to_integral(StringPiece* src) noexcept {
684	using UT = typename std::make_unsigned<Tgt>::type;
685
686	auto b = src->data(), past = src->data() + src->size();
687
688	for (;; ++b) {
689	if (UNLIKELY(b >= past)) {
690	return makeUnexpected(ConversionCode::EMPTY_INPUT_STRING);
691	}
692	if (!std::isspace(*b)) {
693	break;
694	}
695	}
696
697	SignedValueHandler<Tgt> sgn;
698	auto err = sgn.init(b);
699
700	if (UNLIKELY(err != ConversionCode::SUCCESS)) {
701	return makeUnexpected(err);
702	}
703	if (std::is_signed<Tgt>::value && UNLIKELY(b >= past)) {
704	return makeUnexpected(ConversionCode::NO_DIGITS);
705	}
706	if (UNLIKELY(!isdigit(*b))) {
707	return makeUnexpected(ConversionCode::NON_DIGIT_CHAR);
708	}
709
710	auto m = findFirstNonDigit(b + `1`, past);
711
712	auto tmp = digits_to<UT>(b, m);
713
714	if (UNLIKELY(!tmp.hasValue())) {
715	return makeUnexpected(
716	tmp.error() == ConversionCode::POSITIVE_OVERFLOW ? sgn.overflow()
717	: tmp.error());
718	}
719
720	auto res = sgn.finalize(tmp.value());
721
722	if (res.hasValue()) {
723	src->advance(size_t(m - src->data()));
724	}
725
726	return res;
727	}
728
729	template Expected<char, ConversionCode> str_to_integral<char>(
730	StringPiece* src) noexcept;
731	template Expected<signed char, ConversionCode> str_to_integral<signed char>(
732	StringPiece* src) noexcept;
733	template Expected<unsigned char, ConversionCode> str_to_integral<unsigned char>(
734	StringPiece* src) noexcept;
735
736	template Expected<short, ConversionCode> str_to_integral<short>(
737	StringPiece* src) noexcept;
738	template Expected<unsigned short, ConversionCode>
739	str_to_integral<unsigned short>(StringPiece* src) noexcept;
740
741	template Expected<int, ConversionCode> str_to_integral<int>(
742	StringPiece* src) noexcept;
743	template Expected<unsigned int, ConversionCode> str_to_integral<unsigned int>(
744	StringPiece* src) noexcept;
745
746	template Expected<long, ConversionCode> str_to_integral<long>(
747	StringPiece* src) noexcept;
748	template Expected<unsigned long, ConversionCode> str_to_integral<unsigned long>(
749	StringPiece* src) noexcept;
750
751	template Expected<long long, ConversionCode> str_to_integral<long long>(
752	StringPiece* src) noexcept;
753	template Expected<unsigned long long, ConversionCode>
754	str_to_integral<unsigned long long>(StringPiece* src) noexcept;
755
756	#if FOLLY_HAVE_INT128_T
757	template Expected<__int128, ConversionCode> str_to_integral<__int128>(
758	StringPiece* src) noexcept;
759	template Expected<unsigned __int128, ConversionCode>
760	str_to_integral<unsigned __int128>(StringPiece* src) noexcept;
761	#endif
762
763	} // namespace detail
764
765	ConversionError makeConversionError(ConversionCode code, StringPiece input) {
766	using namespace detail;
767	static_assert(
768	std::is_unsigned<std::underlying_type<ConversionCode>::type>::value,
769	"ConversionCode should be unsigned");
770	assert((std::size_t)code < kErrorStrings.size());
771	const ErrorString& err = kErrorStrings [(std::size_t)code];
772	if (code == ConversionCode::EMPTY_INPUT_STRING && input.empty()) {
773	return {err.string, code};
774	}
775	std::string tmp(err.string);
776	tmp.append(": ");
777	if (err.quote) {
778	tmp.append(`1`, `'"'`);
779	}
780	if (input.size() > `0`) {
781	tmp.append(input.data(), input.size());
782	}
783	if (err.quote) {
784	tmp.append(`1`, `'"'`);
785	}
786	return {tmp, code};
787	}
788
789	} // namespace folly
790

Browse the source code of folly/Conv.cpp