1 | // |
2 | // Copyright 2017 The Abseil Authors. |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
5 | // you may not use this file except in compliance with the License. |
6 | // You may obtain a copy of the License at |
7 | // |
8 | // https://www.apache.org/licenses/LICENSE-2.0 |
9 | // |
10 | // Unless required by applicable law or agreed to in writing, software |
11 | // distributed under the License is distributed on an "AS IS" BASIS, |
12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | // See the License for the specific language governing permissions and |
14 | // limitations under the License. |
15 | // |
16 | // ----------------------------------------------------------------------------- |
17 | // File: ascii.h |
18 | // ----------------------------------------------------------------------------- |
19 | // |
20 | // This package contains functions operating on characters and strings |
21 | // restricted to standard ASCII. These include character classification |
22 | // functions analogous to those found in the ANSI C Standard Library <ctype.h> |
23 | // header file. |
24 | // |
25 | // C++ implementations provide <ctype.h> functionality based on their |
26 | // C environment locale. In general, reliance on such a locale is not ideal, as |
27 | // the locale standard is problematic (and may not return invariant information |
28 | // for the same character set, for example). These `ascii_*()` functions are |
29 | // hard-wired for standard ASCII, much faster, and guaranteed to behave |
30 | // consistently. They will never be overloaded, nor will their function |
31 | // signature change. |
32 | // |
33 | // `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`, |
34 | // `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`, |
35 | // `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`, |
36 | // `ascii_isxdigit()` |
37 | // Analogous to the <ctype.h> functions with similar names, these |
38 | // functions take an unsigned char and return a bool, based on whether the |
39 | // character matches the condition specified. |
40 | // |
41 | // If the input character has a numerical value greater than 127, these |
42 | // functions return `false`. |
43 | // |
44 | // `ascii_tolower()`, `ascii_toupper()` |
45 | // Analogous to the <ctype.h> functions with similar names, these functions |
46 | // take an unsigned char and return a char. |
47 | // |
48 | // If the input character is not an ASCII {lower,upper}-case letter (including |
49 | // numerical values greater than 127) then the functions return the same value |
50 | // as the input character. |
51 | |
52 | #ifndef ABSL_STRINGS_ASCII_H_ |
53 | #define ABSL_STRINGS_ASCII_H_ |
54 | |
55 | #include <algorithm> |
56 | #include <string> |
57 | |
58 | #include "absl/base/attributes.h" |
59 | #include "absl/strings/string_view.h" |
60 | |
61 | namespace absl { |
62 | namespace ascii_internal { |
63 | |
64 | // Declaration for an array of bitfields holding character information. |
65 | extern const unsigned char kPropertyBits[256]; |
66 | |
67 | // Declaration for the array of characters to upper-case characters. |
68 | extern const char kToUpper[256]; |
69 | |
70 | // Declaration for the array of characters to lower-case characters. |
71 | extern const char kToLower[256]; |
72 | |
73 | } // namespace ascii_internal |
74 | |
75 | // ascii_isalpha() |
76 | // |
77 | // Determines whether the given character is an alphabetic character. |
78 | inline bool ascii_isalpha(unsigned char c) { |
79 | return (ascii_internal::kPropertyBits[c] & 0x01) != 0; |
80 | } |
81 | |
82 | // ascii_isalnum() |
83 | // |
84 | // Determines whether the given character is an alphanumeric character. |
85 | inline bool ascii_isalnum(unsigned char c) { |
86 | return (ascii_internal::kPropertyBits[c] & 0x04) != 0; |
87 | } |
88 | |
89 | // ascii_isspace() |
90 | // |
91 | // Determines whether the given character is a whitespace character (space, |
92 | // tab, vertical tab, formfeed, linefeed, or carriage return). |
93 | inline bool ascii_isspace(unsigned char c) { |
94 | return (ascii_internal::kPropertyBits[c] & 0x08) != 0; |
95 | } |
96 | |
97 | // ascii_ispunct() |
98 | // |
99 | // Determines whether the given character is a punctuation character. |
100 | inline bool ascii_ispunct(unsigned char c) { |
101 | return (ascii_internal::kPropertyBits[c] & 0x10) != 0; |
102 | } |
103 | |
104 | // ascii_isblank() |
105 | // |
106 | // Determines whether the given character is a blank character (tab or space). |
107 | inline bool ascii_isblank(unsigned char c) { |
108 | return (ascii_internal::kPropertyBits[c] & 0x20) != 0; |
109 | } |
110 | |
111 | // ascii_iscntrl() |
112 | // |
113 | // Determines whether the given character is a control character. |
114 | inline bool ascii_iscntrl(unsigned char c) { |
115 | return (ascii_internal::kPropertyBits[c] & 0x40) != 0; |
116 | } |
117 | |
118 | // ascii_isxdigit() |
119 | // |
120 | // Determines whether the given character can be represented as a hexadecimal |
121 | // digit character (i.e. {0-9} or {A-F}). |
122 | inline bool ascii_isxdigit(unsigned char c) { |
123 | return (ascii_internal::kPropertyBits[c] & 0x80) != 0; |
124 | } |
125 | |
126 | // ascii_isdigit() |
127 | // |
128 | // Determines whether the given character can be represented as a decimal |
129 | // digit character (i.e. {0-9}). |
130 | inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; } |
131 | |
132 | // ascii_isprint() |
133 | // |
134 | // Determines whether the given character is printable, including whitespace. |
135 | inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; } |
136 | |
137 | // ascii_isgraph() |
138 | // |
139 | // Determines whether the given character has a graphical representation. |
140 | inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; } |
141 | |
142 | // ascii_isupper() |
143 | // |
144 | // Determines whether the given character is uppercase. |
145 | inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; } |
146 | |
147 | // ascii_islower() |
148 | // |
149 | // Determines whether the given character is lowercase. |
150 | inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; } |
151 | |
152 | // ascii_isascii() |
153 | // |
154 | // Determines whether the given character is ASCII. |
155 | inline bool ascii_isascii(unsigned char c) { return c < 128; } |
156 | |
157 | // ascii_tolower() |
158 | // |
159 | // Returns an ASCII character, converting to lowercase if uppercase is |
160 | // passed. Note that character values > 127 are simply returned. |
161 | inline char ascii_tolower(unsigned char c) { |
162 | return ascii_internal::kToLower[c]; |
163 | } |
164 | |
165 | // Converts the characters in `s` to lowercase, changing the contents of `s`. |
166 | void AsciiStrToLower(std::string* s); |
167 | |
168 | // Creates a lowercase string from a given absl::string_view. |
169 | ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) { |
170 | std::string result(s); |
171 | absl::AsciiStrToLower(&result); |
172 | return result; |
173 | } |
174 | |
175 | // ascii_toupper() |
176 | // |
177 | // Returns the ASCII character, converting to upper-case if lower-case is |
178 | // passed. Note that characters values > 127 are simply returned. |
179 | inline char ascii_toupper(unsigned char c) { |
180 | return ascii_internal::kToUpper[c]; |
181 | } |
182 | |
183 | // Converts the characters in `s` to uppercase, changing the contents of `s`. |
184 | void AsciiStrToUpper(std::string* s); |
185 | |
186 | // Creates an uppercase string from a given absl::string_view. |
187 | ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) { |
188 | std::string result(s); |
189 | absl::AsciiStrToUpper(&result); |
190 | return result; |
191 | } |
192 | |
193 | // Returns absl::string_view with whitespace stripped from the beginning of the |
194 | // given string_view. |
195 | ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace( |
196 | absl::string_view str) { |
197 | auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace); |
198 | return str.substr(it - str.begin()); |
199 | } |
200 | |
201 | // Strips in place whitespace from the beginning of the given string. |
202 | inline void StripLeadingAsciiWhitespace(std::string* str) { |
203 | auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace); |
204 | str->erase(str->begin(), it); |
205 | } |
206 | |
207 | // Returns absl::string_view with whitespace stripped from the end of the given |
208 | // string_view. |
209 | ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace( |
210 | absl::string_view str) { |
211 | auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace); |
212 | return str.substr(0, str.rend() - it); |
213 | } |
214 | |
215 | // Strips in place whitespace from the end of the given string |
216 | inline void StripTrailingAsciiWhitespace(std::string* str) { |
217 | auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace); |
218 | str->erase(str->rend() - it); |
219 | } |
220 | |
221 | // Returns absl::string_view with whitespace stripped from both ends of the |
222 | // given string_view. |
223 | ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace( |
224 | absl::string_view str) { |
225 | return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str)); |
226 | } |
227 | |
228 | // Strips in place whitespace from both ends of the given string |
229 | inline void StripAsciiWhitespace(std::string* str) { |
230 | StripTrailingAsciiWhitespace(str); |
231 | StripLeadingAsciiWhitespace(str); |
232 | } |
233 | |
234 | // Removes leading, trailing, and consecutive internal whitespace. |
235 | void (std::string*); |
236 | |
237 | } // namespace absl |
238 | |
239 | #endif // ABSL_STRINGS_ASCII_H_ |
240 | |