1//===-- String to integer conversion utils ----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// -----------------------------------------------------------------------------
10// **** WARNING ****
11// This file is shared with libc++. You should also be careful when adding
12// dependencies to this file, since it needs to build for all libc++ targets.
13// -----------------------------------------------------------------------------
14
15#ifndef LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
16#define LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
17
18#include "src/__support/CPP/limits.h"
19#include "src/__support/CPP/type_traits.h"
20#include "src/__support/CPP/type_traits/make_unsigned.h"
21#include "src/__support/big_int.h"
22#include "src/__support/common.h"
23#include "src/__support/ctype_utils.h"
24#include "src/__support/macros/config.h"
25#include "src/__support/str_to_num_result.h"
26#include "src/__support/uint128.h"
27#include "src/errno/libc_errno.h" // For ERANGE
28
29namespace LIBC_NAMESPACE_DECL {
30namespace internal {
31
32// Returns a pointer to the first character in src that is not a whitespace
33// character (as determined by isspace())
34// TODO: Change from returning a pointer to returning a length.
35LIBC_INLINE const char *
36first_non_whitespace(const char *__restrict src,
37 size_t src_len = cpp::numeric_limits<size_t>::max()) {
38 size_t src_cur = 0;
39 while (src_cur < src_len && internal::isspace(ch: src[src_cur])) {
40 ++src_cur;
41 }
42 return src + src_cur;
43}
44
45// checks if the next 3 characters of the string pointer are the start of a
46// hexadecimal number. Does not advance the string pointer.
47LIBC_INLINE bool
48is_hex_start(const char *__restrict src,
49 size_t src_len = cpp::numeric_limits<size_t>::max()) {
50 if (src_len < 3)
51 return false;
52 return *src == '0' && tolower(ch: *(src + 1)) == 'x' && isalnum(ch: *(src + 2)) &&
53 b36_char_to_int(ch: *(src + 2)) < 16;
54}
55
56// Takes the address of the string pointer and parses the base from the start of
57// it.
58LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
59 // A hexadecimal number is defined as "the prefix 0x or 0X followed by a
60 // sequence of the decimal digits and the letters a (or A) through f (or F)
61 // with values 10 through 15 respectively." (C standard 6.4.4.1)
62 if (is_hex_start(src, src_len))
63 return 16;
64 // An octal number is defined as "the prefix 0 optionally followed by a
65 // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
66 // number that starts with 0, including just 0, is an octal number.
67 if (src_len > 0 && src[0] == '0')
68 return 8;
69 // A decimal number is defined as beginning "with a nonzero digit and
70 // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
71 return 10;
72}
73
74// -----------------------------------------------------------------------------
75// **** WARNING ****
76// This interface is shared with libc++, if you change this interface you need
77// to update it in both libc and libc++.
78// -----------------------------------------------------------------------------
79// Takes a pointer to a string and the base to convert to. This function is used
80// as the backend for all of the string to int functions.
81template <class T>
82LIBC_INLINE StrToNumResult<T>
83strtointeger(const char *__restrict src, int base,
84 const size_t src_len = cpp::numeric_limits<size_t>::max()) {
85 using ResultType = make_integral_or_big_int_unsigned_t<T>;
86
87 ResultType result = 0;
88
89 bool is_number = false;
90 size_t src_cur = 0;
91 int error_val = 0;
92
93 if (src_len == 0)
94 return {0, 0, 0};
95
96 if (base < 0 || base == 1 || base > 36)
97 return {0, 0, EINVAL};
98
99 src_cur = first_non_whitespace(src, src_len) - src;
100
101 char result_sign = '+';
102 if (src[src_cur] == '+' || src[src_cur] == '-') {
103 result_sign = src[src_cur];
104 ++src_cur;
105 }
106
107 if (base == 0)
108 base = infer_base(src: src + src_cur, src_len: src_len - src_cur);
109
110 if (base == 16 && is_hex_start(src: src + src_cur, src_len: src_len - src_cur))
111 src_cur = src_cur + 2;
112
113 constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
114 const bool is_positive = (result_sign == '+');
115
116 ResultType constexpr NEGATIVE_MAX =
117 !IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1
118 : cpp::numeric_limits<T>::max();
119 ResultType const abs_max =
120 (is_positive ? cpp::numeric_limits<T>::max() : NEGATIVE_MAX);
121 ResultType const abs_max_div_by_base =
122 static_cast<ResultType>(abs_max / base);
123
124 while (src_cur < src_len && isalnum(ch: src[src_cur])) {
125 int cur_digit = b36_char_to_int(ch: src[src_cur]);
126 if (cur_digit >= base)
127 break;
128
129 is_number = true;
130 ++src_cur;
131
132 // If the number has already hit the maximum value for the current type then
133 // the result cannot change, but we still need to advance src to the end of
134 // the number.
135 if (result == abs_max) {
136 error_val = ERANGE;
137 continue;
138 }
139
140 if (result > abs_max_div_by_base) {
141 result = abs_max;
142 error_val = ERANGE;
143 } else {
144 result = static_cast<ResultType>(result * base);
145 }
146 if (result > abs_max - cur_digit) {
147 result = abs_max;
148 error_val = ERANGE;
149 } else {
150 result = static_cast<ResultType>(result + cur_digit);
151 }
152 }
153
154 ptrdiff_t str_len = is_number ? (src_cur) : 0;
155
156 if (error_val == ERANGE) {
157 if (is_positive || IS_UNSIGNED)
158 return {cpp::numeric_limits<T>::max(), str_len, error_val};
159 else // T is signed and there is a negative overflow
160 return {cpp::numeric_limits<T>::min(), str_len, error_val};
161 }
162
163 return {static_cast<T>(is_positive ? result : -result), str_len, error_val};
164}
165
166} // namespace internal
167} // namespace LIBC_NAMESPACE_DECL
168
169#endif // LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
170