str_to_integer.h source code [llvm/libc/src/__support/str_to_integer.h]

1	//===-- String to integer conversion utils ----------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	// -----------------------------------------------------------------------------
10	// ** WARNING **
11	// This file is shared with libc++. You should also be careful when adding
12	// dependencies to this file, since it needs to build for all libc++ targets.
13	// -----------------------------------------------------------------------------
14
15	#ifndef LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
16	#define LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
17
18	#include "src/__support/CPP/limits.h"
19	#include "src/__support/CPP/type_traits.h"
20	#include "src/__support/CPP/type_traits/make_unsigned.h"
21	#include "src/__support/big_int.h"
22	#include "src/__support/common.h"
23	#include "src/__support/ctype_utils.h"
24	#include "src/__support/macros/config.h"
25	#include "src/__support/str_to_num_result.h"
26	#include "src/__support/uint128.h"
27	#include "src/errno/libc_errno.h" // For ERANGE
28
29	namespace LIBC_NAMESPACE_DECL {
30	namespace internal {
31
32	// Returns a pointer to the first character in src that is not a whitespace
33	// character (as determined by isspace())
34	// TODO: Change from returning a pointer to returning a length.
35	LIBC_INLINE const char *
36	first_non_whitespace(const char *__restrict src,
37	size_t src_len = cpp::numeric_limits<size_t>::max()) {
38	size_t src_cur = `0`;
39	while (src_cur < src_len && internal::isspace(ch: src[src_cur])) {
40	++src_cur;
41	}
42	return src + src_cur;
43	}
44
45	// checks if the next 3 characters of the string pointer are the start of a
46	// hexadecimal number. Does not advance the string pointer.
47	LIBC_INLINE bool
48	is_hex_start(const char *__restrict src,
49	size_t src_len = cpp::numeric_limits<size_t>::max()) {
50	if (src_len < `3`)
51	return false;
52	return src == `'0'` && tolower(ch: (src + `1`)) == `'x'` && isalnum(ch: *(src + `2`)) &&
53	b36_char_to_int(ch: *(src + `2`)) < `16`;
54	}
55
56	// Takes the address of the string pointer and parses the base from the start of
57	// it.
58	LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
59	// A hexadecimal number is defined as "the prefix 0x or 0X followed by a
60	// sequence of the decimal digits and the letters a (or A) through f (or F)
61	// with values 10 through 15 respectively." (C standard 6.4.4.1)
62	if (is_hex_start(src, src_len))
63	return `16`;
64	// An octal number is defined as "the prefix 0 optionally followed by a
65	// sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
66	// number that starts with 0, including just 0, is an octal number.
67	if (src_len > `0` && src[`0`] == `'0'`)
68	return `8`;
69	// A decimal number is defined as beginning "with a nonzero digit and
70	// consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
71	return `10`;
72	}
73
74	// -----------------------------------------------------------------------------
75	// ** WARNING **
76	// This interface is shared with libc++, if you change this interface you need
77	// to update it in both libc and libc++.
78	// -----------------------------------------------------------------------------
79	// Takes a pointer to a string and the base to convert to. This function is used
80	// as the backend for all of the string to int functions.
81	template <class T>
82	LIBC_INLINE StrToNumResult<T>
83	strtointeger(const char *__restrict src, int base,
84	const size_t src_len = cpp::numeric_limits<size_t>::max()) {
85	using ResultType = make_integral_or_big_int_unsigned_t<T>;
86
87	ResultType result = `0`;
88
89	bool is_number = false;
90	size_t src_cur = `0`;
91	int error_val = `0`;
92
93	if (src_len == `0`)
94	return {`0`, `0`, `0`};
95
96	if (base < `0` \|\| base == `1` \|\| base > `36`)
97	return {`0`, `0`, EINVAL};
98
99	src_cur = first_non_whitespace(src, src_len) - src;
100
101	char result_sign = `'+'`;
102	if (src[src_cur] == `'+'` \|\| src[src_cur] == `'-'`) {
103	result_sign = src[src_cur];
104	++src_cur;
105	}
106
107	if (base == `0`)
108	base = infer_base(src: src + src_cur, src_len: src_len - src_cur);
109
110	if (base == `16` && is_hex_start(src: src + src_cur, src_len: src_len - src_cur))
111	src_cur = src_cur + `2`;
112
113	constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
114	const bool is_positive = (result_sign == `'+'`);
115
116	ResultType constexpr NEGATIVE_MAX =
117	!IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + `1`
118	: cpp::numeric_limits<T>::max();
119	ResultType const abs_max =
120	(is_positive ? cpp::numeric_limits<T>::max() : NEGATIVE_MAX);
121	ResultType const abs_max_div_by_base =
122	static_cast<ResultType>(abs_max / base);
123
124	while (src_cur < src_len && isalnum(ch: src[src_cur])) {
125	int cur_digit = b36_char_to_int(ch: src[src_cur]);
126	if (cur_digit >= base)
127	break;
128
129	is_number = true;
130	++src_cur;
131
132	// If the number has already hit the maximum value for the current type then
133	// the result cannot change, but we still need to advance src to the end of
134	// the number.
135	if (result == abs_max) {
136	error_val = ERANGE;
137	continue;
138	}
139
140	if (result > abs_max_div_by_base) {
141	result = abs_max;
142	error_val = ERANGE;
143	} else {
144	result = static_cast<ResultType>(result * base);
145	}
146	if (result > abs_max - cur_digit) {
147	result = abs_max;
148	error_val = ERANGE;
149	} else {
150	result = static_cast<ResultType>(result + cur_digit);
151	}
152	}
153
154	ptrdiff_t str_len = is_number ? (src_cur) : `0`;
155
156	if (error_val == ERANGE) {
157	if (is_positive \|\| IS_UNSIGNED)
158	return {cpp::numeric_limits<T>::max(), str_len, error_val};
159	else // T is signed and there is a negative overflow
160	return {cpp::numeric_limits<T>::min(), str_len, error_val};
161	}
162
163	return {static_cast<T>(is_positive ? result : -result), str_len, error_val};
164	}
165
166	} // namespace internal
167	} // namespace LIBC_NAMESPACE_DECL
168
169	#endif // LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
170

Browse the source code of llvm/libc/src/__support/str_to_integer.h