1// Scintilla source code edit control
2/** @file CharacterSet.h
3 ** Encapsulates a set of characters. Used to test if a character is within a set.
4 **/
5// Copyright 2007 by Neil Hodgson <neilh@scintilla.org>
6// The License.txt file describes the conditions under which this software may be distributed.
7
8#ifndef CHARACTERSET_H
9#define CHARACTERSET_H
10
11namespace Lexilla {
12
13template<int N>
14class CharacterSetArray {
15 unsigned char bset[(N-1)/8 + 1] = {};
16 bool valueAfter = false;
17public:
18 enum setBase {
19 setNone=0,
20 setLower=1,
21 setUpper=2,
22 setDigits=4,
23 setAlpha=setLower|setUpper,
24 setAlphaNum=setAlpha|setDigits
25 };
26 CharacterSetArray(setBase base=setNone, const char *initialSet="", bool valueAfter_=false) noexcept {
27 valueAfter = valueAfter_;
28 AddString(initialSet);
29 if (base & setLower)
30 AddString("abcdefghijklmnopqrstuvwxyz");
31 if (base & setUpper)
32 AddString("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
33 if (base & setDigits)
34 AddString("0123456789");
35 }
36 // For compatibility with previous version but should not be used in new code.
37 CharacterSetArray(setBase base, const char *initialSet, [[maybe_unused]]int size_, bool valueAfter_=false) noexcept :
38 CharacterSetArray(base, initialSet, valueAfter_) {
39 assert(size_ == N);
40 }
41 void Add(int val) noexcept {
42 assert(val >= 0);
43 assert(val < N);
44 bset[val >> 3] |= 1 << (val & 7);
45 }
46 void AddString(const char *setToAdd) noexcept {
47 for (const char *cp=setToAdd; *cp; cp++) {
48 const unsigned char uch = *cp;
49 assert(uch < N);
50 Add(uch);
51 }
52 }
53 bool Contains(int val) const noexcept {
54 assert(val >= 0);
55 if (val < 0) return false;
56 if (val >= N) return valueAfter;
57 return bset[val >> 3] & (1 << (val & 7));
58 }
59 bool Contains(char ch) const noexcept {
60 // Overload char as char may be signed
61 const unsigned char uch = ch;
62 return Contains(uch);
63 }
64};
65
66using CharacterSet = CharacterSetArray<0x80>;
67
68// Functions for classifying characters
69
70template <typename T, typename... Args>
71constexpr bool AnyOf(T t, Args... args) noexcept {
72#if defined(__clang__)
73 static_assert(__is_integral(T));
74#endif
75 return ((t == args) || ...);
76}
77
78// prevent pointer without <type_traits>
79template <typename T, typename... Args>
80constexpr void AnyOf([[maybe_unused]] T *t, [[maybe_unused]] Args... args) noexcept {}
81template <typename T, typename... Args>
82constexpr void AnyOf([[maybe_unused]] const T *t, [[maybe_unused]] Args... args) noexcept {}
83
84constexpr bool IsASpace(int ch) noexcept {
85 return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
86}
87
88constexpr bool IsASpaceOrTab(int ch) noexcept {
89 return (ch == ' ') || (ch == '\t');
90}
91
92constexpr bool IsADigit(int ch) noexcept {
93 return (ch >= '0') && (ch <= '9');
94}
95
96constexpr bool IsADigit(int ch, int base) noexcept {
97 if (base <= 10) {
98 return (ch >= '0') && (ch < '0' + base);
99 } else {
100 return ((ch >= '0') && (ch <= '9')) ||
101 ((ch >= 'A') && (ch < 'A' + base - 10)) ||
102 ((ch >= 'a') && (ch < 'a' + base - 10));
103 }
104}
105
106constexpr bool IsASCII(int ch) noexcept {
107 return (ch >= 0) && (ch < 0x80);
108}
109
110constexpr bool IsLowerCase(int ch) noexcept {
111 return (ch >= 'a') && (ch <= 'z');
112}
113
114constexpr bool IsUpperCase(int ch) noexcept {
115 return (ch >= 'A') && (ch <= 'Z');
116}
117
118constexpr bool IsUpperOrLowerCase(int ch) noexcept {
119 return IsUpperCase(ch) || IsLowerCase(ch);
120}
121
122constexpr bool IsAlphaNumeric(int ch) noexcept {
123 return
124 ((ch >= '0') && (ch <= '9')) ||
125 ((ch >= 'a') && (ch <= 'z')) ||
126 ((ch >= 'A') && (ch <= 'Z'));
127}
128
129/**
130 * Check if a character is a space.
131 * This is ASCII specific but is safe with chars >= 0x80.
132 */
133constexpr bool isspacechar(int ch) noexcept {
134 return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
135}
136
137constexpr bool iswordchar(int ch) noexcept {
138 return IsAlphaNumeric(ch) || ch == '.' || ch == '_';
139}
140
141constexpr bool iswordstart(int ch) noexcept {
142 return IsAlphaNumeric(ch) || ch == '_';
143}
144
145constexpr bool isoperator(int ch) noexcept {
146 if (IsAlphaNumeric(ch))
147 return false;
148 if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
149 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
150 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
151 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
152 ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
153 ch == '?' || ch == '!' || ch == '.' || ch == '~')
154 return true;
155 return false;
156}
157
158// Simple case functions for ASCII supersets.
159
160template <typename T>
161constexpr T MakeUpperCase(T ch) noexcept {
162 if (ch < 'a' || ch > 'z')
163 return ch;
164 else
165 return ch - 'a' + 'A';
166}
167
168template <typename T>
169constexpr T MakeLowerCase(T ch) noexcept {
170 if (ch < 'A' || ch > 'Z')
171 return ch;
172 else
173 return ch - 'A' + 'a';
174}
175
176int CompareCaseInsensitive(const char *a, const char *b) noexcept;
177int CompareNCaseInsensitive(const char *a, const char *b, size_t len) noexcept;
178
179}
180
181#endif
182