1 | // Scintilla source code edit control |
2 | /** @file CharacterSet.h |
3 | ** Encapsulates a set of characters. Used to test if a character is within a set. |
4 | **/ |
5 | // Copyright 2007 by Neil Hodgson <neilh@scintilla.org> |
6 | // The License.txt file describes the conditions under which this software may be distributed. |
7 | |
8 | #ifndef CHARACTERSET_H |
9 | #define CHARACTERSET_H |
10 | |
11 | namespace Lexilla { |
12 | |
13 | template<int N> |
14 | class CharacterSetArray { |
15 | unsigned char bset[(N-1)/8 + 1] = {}; |
16 | bool valueAfter = false; |
17 | public: |
18 | enum setBase { |
19 | setNone=0, |
20 | setLower=1, |
21 | setUpper=2, |
22 | setDigits=4, |
23 | setAlpha=setLower|setUpper, |
24 | setAlphaNum=setAlpha|setDigits |
25 | }; |
26 | CharacterSetArray(setBase base=setNone, const char *initialSet="" , bool valueAfter_=false) noexcept { |
27 | valueAfter = valueAfter_; |
28 | AddString(initialSet); |
29 | if (base & setLower) |
30 | AddString("abcdefghijklmnopqrstuvwxyz" ); |
31 | if (base & setUpper) |
32 | AddString("ABCDEFGHIJKLMNOPQRSTUVWXYZ" ); |
33 | if (base & setDigits) |
34 | AddString("0123456789" ); |
35 | } |
36 | // For compatibility with previous version but should not be used in new code. |
37 | CharacterSetArray(setBase base, const char *initialSet, [[maybe_unused]]int size_, bool valueAfter_=false) noexcept : |
38 | CharacterSetArray(base, initialSet, valueAfter_) { |
39 | assert(size_ == N); |
40 | } |
41 | void Add(int val) noexcept { |
42 | assert(val >= 0); |
43 | assert(val < N); |
44 | bset[val >> 3] |= 1 << (val & 7); |
45 | } |
46 | void AddString(const char *setToAdd) noexcept { |
47 | for (const char *cp=setToAdd; *cp; cp++) { |
48 | const unsigned char uch = *cp; |
49 | assert(uch < N); |
50 | Add(uch); |
51 | } |
52 | } |
53 | bool Contains(int val) const noexcept { |
54 | assert(val >= 0); |
55 | if (val < 0) return false; |
56 | if (val >= N) return valueAfter; |
57 | return bset[val >> 3] & (1 << (val & 7)); |
58 | } |
59 | bool Contains(char ch) const noexcept { |
60 | // Overload char as char may be signed |
61 | const unsigned char uch = ch; |
62 | return Contains(uch); |
63 | } |
64 | }; |
65 | |
66 | using CharacterSet = CharacterSetArray<0x80>; |
67 | |
68 | // Functions for classifying characters |
69 | |
70 | template <typename T, typename... Args> |
71 | constexpr bool AnyOf(T t, Args... args) noexcept { |
72 | #if defined(__clang__) |
73 | static_assert(__is_integral(T)); |
74 | #endif |
75 | return ((t == args) || ...); |
76 | } |
77 | |
78 | // prevent pointer without <type_traits> |
79 | template <typename T, typename... Args> |
80 | constexpr void AnyOf([[maybe_unused]] T *t, [[maybe_unused]] Args... args) noexcept {} |
81 | template <typename T, typename... Args> |
82 | constexpr void AnyOf([[maybe_unused]] const T *t, [[maybe_unused]] Args... args) noexcept {} |
83 | |
84 | constexpr bool IsASpace(int ch) noexcept { |
85 | return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)); |
86 | } |
87 | |
88 | constexpr bool IsASpaceOrTab(int ch) noexcept { |
89 | return (ch == ' ') || (ch == '\t'); |
90 | } |
91 | |
92 | constexpr bool IsADigit(int ch) noexcept { |
93 | return (ch >= '0') && (ch <= '9'); |
94 | } |
95 | |
96 | constexpr bool IsADigit(int ch, int base) noexcept { |
97 | if (base <= 10) { |
98 | return (ch >= '0') && (ch < '0' + base); |
99 | } else { |
100 | return ((ch >= '0') && (ch <= '9')) || |
101 | ((ch >= 'A') && (ch < 'A' + base - 10)) || |
102 | ((ch >= 'a') && (ch < 'a' + base - 10)); |
103 | } |
104 | } |
105 | |
106 | constexpr bool IsASCII(int ch) noexcept { |
107 | return (ch >= 0) && (ch < 0x80); |
108 | } |
109 | |
110 | constexpr bool IsLowerCase(int ch) noexcept { |
111 | return (ch >= 'a') && (ch <= 'z'); |
112 | } |
113 | |
114 | constexpr bool IsUpperCase(int ch) noexcept { |
115 | return (ch >= 'A') && (ch <= 'Z'); |
116 | } |
117 | |
118 | constexpr bool IsUpperOrLowerCase(int ch) noexcept { |
119 | return IsUpperCase(ch) || IsLowerCase(ch); |
120 | } |
121 | |
122 | constexpr bool IsAlphaNumeric(int ch) noexcept { |
123 | return |
124 | ((ch >= '0') && (ch <= '9')) || |
125 | ((ch >= 'a') && (ch <= 'z')) || |
126 | ((ch >= 'A') && (ch <= 'Z')); |
127 | } |
128 | |
129 | /** |
130 | * Check if a character is a space. |
131 | * This is ASCII specific but is safe with chars >= 0x80. |
132 | */ |
133 | constexpr bool isspacechar(int ch) noexcept { |
134 | return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)); |
135 | } |
136 | |
137 | constexpr bool iswordchar(int ch) noexcept { |
138 | return IsAlphaNumeric(ch) || ch == '.' || ch == '_'; |
139 | } |
140 | |
141 | constexpr bool iswordstart(int ch) noexcept { |
142 | return IsAlphaNumeric(ch) || ch == '_'; |
143 | } |
144 | |
145 | constexpr bool isoperator(int ch) noexcept { |
146 | if (IsAlphaNumeric(ch)) |
147 | return false; |
148 | if (ch == '%' || ch == '^' || ch == '&' || ch == '*' || |
149 | ch == '(' || ch == ')' || ch == '-' || ch == '+' || |
150 | ch == '=' || ch == '|' || ch == '{' || ch == '}' || |
151 | ch == '[' || ch == ']' || ch == ':' || ch == ';' || |
152 | ch == '<' || ch == '>' || ch == ',' || ch == '/' || |
153 | ch == '?' || ch == '!' || ch == '.' || ch == '~') |
154 | return true; |
155 | return false; |
156 | } |
157 | |
158 | // Simple case functions for ASCII supersets. |
159 | |
160 | template <typename T> |
161 | constexpr T MakeUpperCase(T ch) noexcept { |
162 | if (ch < 'a' || ch > 'z') |
163 | return ch; |
164 | else |
165 | return ch - 'a' + 'A'; |
166 | } |
167 | |
168 | template <typename T> |
169 | constexpr T MakeLowerCase(T ch) noexcept { |
170 | if (ch < 'A' || ch > 'Z') |
171 | return ch; |
172 | else |
173 | return ch - 'A' + 'a'; |
174 | } |
175 | |
176 | int CompareCaseInsensitive(const char *a, const char *b) noexcept; |
177 | int CompareNCaseInsensitive(const char *a, const char *b, size_t len) noexcept; |
178 | |
179 | } |
180 | |
181 | #endif |
182 | |