| 1 | // | 
| 2 | // Copyright 2017 The Abseil Authors. | 
| 3 | // | 
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
| 5 | // you may not use this file except in compliance with the License. | 
| 6 | // You may obtain a copy of the License at | 
| 7 | // | 
| 8 | //      https://www.apache.org/licenses/LICENSE-2.0 | 
| 9 | // | 
| 10 | // Unless required by applicable law or agreed to in writing, software | 
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, | 
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
| 13 | // See the License for the specific language governing permissions and | 
| 14 | // limitations under the License. | 
| 15 | // | 
| 16 | // ----------------------------------------------------------------------------- | 
| 17 | // File: ascii.h | 
| 18 | // ----------------------------------------------------------------------------- | 
| 19 | // | 
| 20 | // This package contains functions operating on characters and strings | 
| 21 | // restricted to standard ASCII. These include character classification | 
| 22 | // functions analogous to those found in the ANSI C Standard Library <ctype.h> | 
| 23 | // header file. | 
| 24 | // | 
| 25 | // C++ implementations provide <ctype.h> functionality based on their | 
| 26 | // C environment locale. In general, reliance on such a locale is not ideal, as | 
| 27 | // the locale standard is problematic (and may not return invariant information | 
| 28 | // for the same character set, for example). These `ascii_*()` functions are | 
| 29 | // hard-wired for standard ASCII, much faster, and guaranteed to behave | 
| 30 | // consistently.  They will never be overloaded, nor will their function | 
| 31 | // signature change. | 
| 32 | // | 
| 33 | // `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`, | 
| 34 | // `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`, | 
| 35 | // `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`, | 
| 36 | // `ascii_isxdigit()` | 
| 37 | //   Analogous to the <ctype.h> functions with similar names, these | 
| 38 | //   functions take an unsigned char and return a bool, based on whether the | 
| 39 | //   character matches the condition specified. | 
| 40 | // | 
| 41 | //   If the input character has a numerical value greater than 127, these | 
| 42 | //   functions return `false`. | 
| 43 | // | 
| 44 | // `ascii_tolower()`, `ascii_toupper()` | 
| 45 | //   Analogous to the <ctype.h> functions with similar names, these functions | 
| 46 | //   take an unsigned char and return a char. | 
| 47 | // | 
| 48 | //   If the input character is not an ASCII {lower,upper}-case letter (including | 
| 49 | //   numerical values greater than 127) then the functions return the same value | 
| 50 | //   as the input character. | 
| 51 |  | 
| 52 | #ifndef ABSL_STRINGS_ASCII_H_ | 
| 53 | #define ABSL_STRINGS_ASCII_H_ | 
| 54 |  | 
| 55 | #include <algorithm> | 
| 56 | #include <string> | 
| 57 |  | 
| 58 | #include "absl/base/attributes.h" | 
| 59 | #include "absl/strings/string_view.h" | 
| 60 |  | 
| 61 | namespace absl { | 
| 62 | namespace ascii_internal { | 
| 63 |  | 
| 64 | // Declaration for an array of bitfields holding character information. | 
| 65 | extern const unsigned char kPropertyBits[256]; | 
| 66 |  | 
| 67 | // Declaration for the array of characters to upper-case characters. | 
| 68 | extern const char kToUpper[256]; | 
| 69 |  | 
| 70 | // Declaration for the array of characters to lower-case characters. | 
| 71 | extern const char kToLower[256]; | 
| 72 |  | 
| 73 | }  // namespace ascii_internal | 
| 74 |  | 
| 75 | // ascii_isalpha() | 
| 76 | // | 
| 77 | // Determines whether the given character is an alphabetic character. | 
| 78 | inline bool ascii_isalpha(unsigned char c) { | 
| 79 |   return (ascii_internal::kPropertyBits[c] & 0x01) != 0; | 
| 80 | } | 
| 81 |  | 
| 82 | // ascii_isalnum() | 
| 83 | // | 
| 84 | // Determines whether the given character is an alphanumeric character. | 
| 85 | inline bool ascii_isalnum(unsigned char c) { | 
| 86 |   return (ascii_internal::kPropertyBits[c] & 0x04) != 0; | 
| 87 | } | 
| 88 |  | 
| 89 | // ascii_isspace() | 
| 90 | // | 
| 91 | // Determines whether the given character is a whitespace character (space, | 
| 92 | // tab, vertical tab, formfeed, linefeed, or carriage return). | 
| 93 | inline bool ascii_isspace(unsigned char c) { | 
| 94 |   return (ascii_internal::kPropertyBits[c] & 0x08) != 0; | 
| 95 | } | 
| 96 |  | 
| 97 | // ascii_ispunct() | 
| 98 | // | 
| 99 | // Determines whether the given character is a punctuation character. | 
| 100 | inline bool ascii_ispunct(unsigned char c) { | 
| 101 |   return (ascii_internal::kPropertyBits[c] & 0x10) != 0; | 
| 102 | } | 
| 103 |  | 
| 104 | // ascii_isblank() | 
| 105 | // | 
| 106 | // Determines whether the given character is a blank character (tab or space). | 
| 107 | inline bool ascii_isblank(unsigned char c) { | 
| 108 |   return (ascii_internal::kPropertyBits[c] & 0x20) != 0; | 
| 109 | } | 
| 110 |  | 
| 111 | // ascii_iscntrl() | 
| 112 | // | 
| 113 | // Determines whether the given character is a control character. | 
| 114 | inline bool ascii_iscntrl(unsigned char c) { | 
| 115 |   return (ascii_internal::kPropertyBits[c] & 0x40) != 0; | 
| 116 | } | 
| 117 |  | 
| 118 | // ascii_isxdigit() | 
| 119 | // | 
| 120 | // Determines whether the given character can be represented as a hexadecimal | 
| 121 | // digit character (i.e. {0-9} or {A-F}). | 
| 122 | inline bool ascii_isxdigit(unsigned char c) { | 
| 123 |   return (ascii_internal::kPropertyBits[c] & 0x80) != 0; | 
| 124 | } | 
| 125 |  | 
| 126 | // ascii_isdigit() | 
| 127 | // | 
| 128 | // Determines whether the given character can be represented as a decimal | 
| 129 | // digit character (i.e. {0-9}). | 
| 130 | inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; } | 
| 131 |  | 
| 132 | // ascii_isprint() | 
| 133 | // | 
| 134 | // Determines whether the given character is printable, including whitespace. | 
| 135 | inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; } | 
| 136 |  | 
| 137 | // ascii_isgraph() | 
| 138 | // | 
| 139 | // Determines whether the given character has a graphical representation. | 
| 140 | inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; } | 
| 141 |  | 
| 142 | // ascii_isupper() | 
| 143 | // | 
| 144 | // Determines whether the given character is uppercase. | 
| 145 | inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; } | 
| 146 |  | 
| 147 | // ascii_islower() | 
| 148 | // | 
| 149 | // Determines whether the given character is lowercase. | 
| 150 | inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; } | 
| 151 |  | 
| 152 | // ascii_isascii() | 
| 153 | // | 
| 154 | // Determines whether the given character is ASCII. | 
| 155 | inline bool ascii_isascii(unsigned char c) { return c < 128; } | 
| 156 |  | 
| 157 | // ascii_tolower() | 
| 158 | // | 
| 159 | // Returns an ASCII character, converting to lowercase if uppercase is | 
| 160 | // passed. Note that character values > 127 are simply returned. | 
| 161 | inline char ascii_tolower(unsigned char c) { | 
| 162 |   return ascii_internal::kToLower[c]; | 
| 163 | } | 
| 164 |  | 
| 165 | // Converts the characters in `s` to lowercase, changing the contents of `s`. | 
| 166 | void AsciiStrToLower(std::string* s); | 
| 167 |  | 
| 168 | // Creates a lowercase string from a given absl::string_view. | 
| 169 | ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) { | 
| 170 |   std::string result(s); | 
| 171 |   absl::AsciiStrToLower(&result); | 
| 172 |   return result; | 
| 173 | } | 
| 174 |  | 
| 175 | // ascii_toupper() | 
| 176 | // | 
| 177 | // Returns the ASCII character, converting to upper-case if lower-case is | 
| 178 | // passed. Note that characters values > 127 are simply returned. | 
| 179 | inline char ascii_toupper(unsigned char c) { | 
| 180 |   return ascii_internal::kToUpper[c]; | 
| 181 | } | 
| 182 |  | 
| 183 | // Converts the characters in `s` to uppercase, changing the contents of `s`. | 
| 184 | void AsciiStrToUpper(std::string* s); | 
| 185 |  | 
| 186 | // Creates an uppercase string from a given absl::string_view. | 
| 187 | ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) { | 
| 188 |   std::string result(s); | 
| 189 |   absl::AsciiStrToUpper(&result); | 
| 190 |   return result; | 
| 191 | } | 
| 192 |  | 
| 193 | // Returns absl::string_view with whitespace stripped from the beginning of the | 
| 194 | // given string_view. | 
| 195 | ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace( | 
| 196 |     absl::string_view str) { | 
| 197 |   auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace); | 
| 198 |   return str.substr(it - str.begin()); | 
| 199 | } | 
| 200 |  | 
| 201 | // Strips in place whitespace from the beginning of the given string. | 
| 202 | inline void StripLeadingAsciiWhitespace(std::string* str) { | 
| 203 |   auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace); | 
| 204 |   str->erase(str->begin(), it); | 
| 205 | } | 
| 206 |  | 
| 207 | // Returns absl::string_view with whitespace stripped from the end of the given | 
| 208 | // string_view. | 
| 209 | ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace( | 
| 210 |     absl::string_view str) { | 
| 211 |   auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace); | 
| 212 |   return str.substr(0, str.rend() - it); | 
| 213 | } | 
| 214 |  | 
| 215 | // Strips in place whitespace from the end of the given string | 
| 216 | inline void StripTrailingAsciiWhitespace(std::string* str) { | 
| 217 |   auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace); | 
| 218 |   str->erase(str->rend() - it); | 
| 219 | } | 
| 220 |  | 
| 221 | // Returns absl::string_view with whitespace stripped from both ends of the | 
| 222 | // given string_view. | 
| 223 | ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace( | 
| 224 |     absl::string_view str) { | 
| 225 |   return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str)); | 
| 226 | } | 
| 227 |  | 
| 228 | // Strips in place whitespace from both ends of the given string | 
| 229 | inline void StripAsciiWhitespace(std::string* str) { | 
| 230 |   StripTrailingAsciiWhitespace(str); | 
| 231 |   StripLeadingAsciiWhitespace(str); | 
| 232 | } | 
| 233 |  | 
| 234 | // Removes leading, trailing, and consecutive internal whitespace. | 
| 235 | void (std::string*); | 
| 236 |  | 
| 237 | }  // namespace absl | 
| 238 |  | 
| 239 | #endif  // ABSL_STRINGS_ASCII_H_ | 
| 240 |  |