| 1 | // Scintilla source code edit control |
| 2 | /** @file CharacterSet.h |
| 3 | ** Encapsulates a set of characters. Used to test if a character is within a set. |
| 4 | **/ |
| 5 | // Copyright 2007 by Neil Hodgson <neilh@scintilla.org> |
| 6 | // The License.txt file describes the conditions under which this software may be distributed. |
| 7 | |
| 8 | #ifndef CHARACTERSET_H |
| 9 | #define CHARACTERSET_H |
| 10 | |
| 11 | namespace Lexilla { |
| 12 | |
| 13 | template<int N> |
| 14 | class CharacterSetArray { |
| 15 | unsigned char bset[(N-1)/8 + 1] = {}; |
| 16 | bool valueAfter = false; |
| 17 | public: |
| 18 | enum setBase { |
| 19 | setNone=0, |
| 20 | setLower=1, |
| 21 | setUpper=2, |
| 22 | setDigits=4, |
| 23 | setAlpha=setLower|setUpper, |
| 24 | setAlphaNum=setAlpha|setDigits |
| 25 | }; |
| 26 | CharacterSetArray(setBase base=setNone, const char *initialSet="" , bool valueAfter_=false) noexcept { |
| 27 | valueAfter = valueAfter_; |
| 28 | AddString(initialSet); |
| 29 | if (base & setLower) |
| 30 | AddString("abcdefghijklmnopqrstuvwxyz" ); |
| 31 | if (base & setUpper) |
| 32 | AddString("ABCDEFGHIJKLMNOPQRSTUVWXYZ" ); |
| 33 | if (base & setDigits) |
| 34 | AddString("0123456789" ); |
| 35 | } |
| 36 | // For compatibility with previous version but should not be used in new code. |
| 37 | CharacterSetArray(setBase base, const char *initialSet, [[maybe_unused]]int size_, bool valueAfter_=false) noexcept : |
| 38 | CharacterSetArray(base, initialSet, valueAfter_) { |
| 39 | assert(size_ == N); |
| 40 | } |
| 41 | void Add(int val) noexcept { |
| 42 | assert(val >= 0); |
| 43 | assert(val < N); |
| 44 | bset[val >> 3] |= 1 << (val & 7); |
| 45 | } |
| 46 | void AddString(const char *setToAdd) noexcept { |
| 47 | for (const char *cp=setToAdd; *cp; cp++) { |
| 48 | const unsigned char uch = *cp; |
| 49 | assert(uch < N); |
| 50 | Add(uch); |
| 51 | } |
| 52 | } |
| 53 | bool Contains(int val) const noexcept { |
| 54 | assert(val >= 0); |
| 55 | if (val < 0) return false; |
| 56 | if (val >= N) return valueAfter; |
| 57 | return bset[val >> 3] & (1 << (val & 7)); |
| 58 | } |
| 59 | bool Contains(char ch) const noexcept { |
| 60 | // Overload char as char may be signed |
| 61 | const unsigned char uch = ch; |
| 62 | return Contains(uch); |
| 63 | } |
| 64 | }; |
| 65 | |
| 66 | using CharacterSet = CharacterSetArray<0x80>; |
| 67 | |
| 68 | // Functions for classifying characters |
| 69 | |
| 70 | template <typename T, typename... Args> |
| 71 | constexpr bool AnyOf(T t, Args... args) noexcept { |
| 72 | #if defined(__clang__) |
| 73 | static_assert(__is_integral(T)); |
| 74 | #endif |
| 75 | return ((t == args) || ...); |
| 76 | } |
| 77 | |
| 78 | // prevent pointer without <type_traits> |
| 79 | template <typename T, typename... Args> |
| 80 | constexpr void AnyOf([[maybe_unused]] T *t, [[maybe_unused]] Args... args) noexcept {} |
| 81 | template <typename T, typename... Args> |
| 82 | constexpr void AnyOf([[maybe_unused]] const T *t, [[maybe_unused]] Args... args) noexcept {} |
| 83 | |
| 84 | constexpr bool IsASpace(int ch) noexcept { |
| 85 | return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)); |
| 86 | } |
| 87 | |
| 88 | constexpr bool IsASpaceOrTab(int ch) noexcept { |
| 89 | return (ch == ' ') || (ch == '\t'); |
| 90 | } |
| 91 | |
| 92 | constexpr bool IsADigit(int ch) noexcept { |
| 93 | return (ch >= '0') && (ch <= '9'); |
| 94 | } |
| 95 | |
| 96 | constexpr bool IsADigit(int ch, int base) noexcept { |
| 97 | if (base <= 10) { |
| 98 | return (ch >= '0') && (ch < '0' + base); |
| 99 | } else { |
| 100 | return ((ch >= '0') && (ch <= '9')) || |
| 101 | ((ch >= 'A') && (ch < 'A' + base - 10)) || |
| 102 | ((ch >= 'a') && (ch < 'a' + base - 10)); |
| 103 | } |
| 104 | } |
| 105 | |
| 106 | constexpr bool IsASCII(int ch) noexcept { |
| 107 | return (ch >= 0) && (ch < 0x80); |
| 108 | } |
| 109 | |
| 110 | constexpr bool IsLowerCase(int ch) noexcept { |
| 111 | return (ch >= 'a') && (ch <= 'z'); |
| 112 | } |
| 113 | |
| 114 | constexpr bool IsUpperCase(int ch) noexcept { |
| 115 | return (ch >= 'A') && (ch <= 'Z'); |
| 116 | } |
| 117 | |
| 118 | constexpr bool IsUpperOrLowerCase(int ch) noexcept { |
| 119 | return IsUpperCase(ch) || IsLowerCase(ch); |
| 120 | } |
| 121 | |
| 122 | constexpr bool IsAlphaNumeric(int ch) noexcept { |
| 123 | return |
| 124 | ((ch >= '0') && (ch <= '9')) || |
| 125 | ((ch >= 'a') && (ch <= 'z')) || |
| 126 | ((ch >= 'A') && (ch <= 'Z')); |
| 127 | } |
| 128 | |
| 129 | /** |
| 130 | * Check if a character is a space. |
| 131 | * This is ASCII specific but is safe with chars >= 0x80. |
| 132 | */ |
| 133 | constexpr bool isspacechar(int ch) noexcept { |
| 134 | return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)); |
| 135 | } |
| 136 | |
| 137 | constexpr bool iswordchar(int ch) noexcept { |
| 138 | return IsAlphaNumeric(ch) || ch == '.' || ch == '_'; |
| 139 | } |
| 140 | |
| 141 | constexpr bool iswordstart(int ch) noexcept { |
| 142 | return IsAlphaNumeric(ch) || ch == '_'; |
| 143 | } |
| 144 | |
| 145 | constexpr bool isoperator(int ch) noexcept { |
| 146 | if (IsAlphaNumeric(ch)) |
| 147 | return false; |
| 148 | if (ch == '%' || ch == '^' || ch == '&' || ch == '*' || |
| 149 | ch == '(' || ch == ')' || ch == '-' || ch == '+' || |
| 150 | ch == '=' || ch == '|' || ch == '{' || ch == '}' || |
| 151 | ch == '[' || ch == ']' || ch == ':' || ch == ';' || |
| 152 | ch == '<' || ch == '>' || ch == ',' || ch == '/' || |
| 153 | ch == '?' || ch == '!' || ch == '.' || ch == '~') |
| 154 | return true; |
| 155 | return false; |
| 156 | } |
| 157 | |
| 158 | // Simple case functions for ASCII supersets. |
| 159 | |
| 160 | template <typename T> |
| 161 | constexpr T MakeUpperCase(T ch) noexcept { |
| 162 | if (ch < 'a' || ch > 'z') |
| 163 | return ch; |
| 164 | else |
| 165 | return ch - 'a' + 'A'; |
| 166 | } |
| 167 | |
| 168 | template <typename T> |
| 169 | constexpr T MakeLowerCase(T ch) noexcept { |
| 170 | if (ch < 'A' || ch > 'Z') |
| 171 | return ch; |
| 172 | else |
| 173 | return ch - 'A' + 'a'; |
| 174 | } |
| 175 | |
| 176 | int CompareCaseInsensitive(const char *a, const char *b) noexcept; |
| 177 | int CompareNCaseInsensitive(const char *a, const char *b, size_t len) noexcept; |
| 178 | |
| 179 | } |
| 180 | |
| 181 | #endif |
| 182 | |