1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Class for representing character reachability. |
31 | * |
32 | * This is a simple (but hopefully fast) class for representing 8-bit character |
33 | * reachability, along with a bunch of useful operations. |
34 | */ |
35 | |
36 | #ifndef NG_CHARREACH_H |
37 | #define NG_CHARREACH_H |
38 | |
39 | #include "ue2common.h" |
40 | #include "util/bitfield.h" |
41 | |
42 | #include <string> |
43 | |
44 | namespace ue2 { |
45 | |
46 | class CharReach { |
47 | private: |
48 | /// Underlying storage. |
49 | ue2::bitfield<256> bits; |
50 | |
51 | public: |
52 | static constexpr size_t npos = decltype(bits)::npos; //!< One past the max value. |
53 | |
54 | /// Empty constructor. |
55 | CharReach() {} |
56 | |
57 | /// Constructor for a character class containing a single char. |
58 | explicit CharReach(unsigned char c) { set(c); } |
59 | |
60 | /// Constructor for a character class representing a contiguous range of |
61 | /// chars, inclusive. |
62 | CharReach(unsigned char from, unsigned char to) { setRange(from, to); } |
63 | |
64 | /// Constructor for a character class based on the set of chars in a |
65 | /// string. |
66 | explicit CharReach(const std::string &str) { set(str); } |
67 | |
68 | /// Returns total capacity. |
69 | static constexpr size_t size() { return npos; } |
70 | |
71 | /// Returns a CharReach with complete reachability (a "dot"). |
72 | static CharReach dot() { return CharReach(0, 255); } |
73 | |
74 | /// Complete bitset equality. |
75 | bool operator==(const CharReach &a) const { return bits == a.bits; } |
76 | |
77 | /// Inequality. |
78 | bool operator!=(const CharReach &a) const { return bits != a.bits; } |
79 | |
80 | /// Ordering. |
81 | bool operator<(const CharReach &a) const { return bits < a.bits; } |
82 | |
83 | /// Set all bits. |
84 | void setall() { bits.setall(); } |
85 | |
86 | /// Clear all bits. |
87 | void clear() { bits.clear(); } |
88 | |
89 | /// Clear bit N. |
90 | void clear(unsigned char n) { bits.clear(n); } |
91 | |
92 | /// Set bit N. |
93 | void set(unsigned char n) { bits.set(n); } |
94 | |
95 | /// Test bit N. |
96 | bool test(unsigned char n) const { return bits.test(n); } |
97 | |
98 | /// Flip bit N. |
99 | void flip(unsigned char n) { bits.flip(n); } |
100 | |
101 | /// Flip all bits. |
102 | void flip() { bits.flip(); } |
103 | |
104 | // Switch on the bit in the range (from, to), inclusive. |
105 | void setRange(unsigned char from, unsigned char to) { |
106 | bits.set_range(from, to); |
107 | } |
108 | |
109 | // Switch on the bits corresponding to the characters in \a s. |
110 | void set(const std::string &s); |
111 | |
112 | /// Returns number of bits set on. |
113 | size_t count() const { return bits.count(); } |
114 | |
115 | /// Are no bits set? |
116 | bool none() const { return bits.none(); } |
117 | |
118 | /// Is any bit set? |
119 | bool any() const { return bits.any(); } |
120 | |
121 | /// Are all bits set? |
122 | bool all() const { return bits.all(); } |
123 | |
124 | /// Returns first bit set, or CharReach::npos if none set. |
125 | size_t find_first() const { return bits.find_first(); } |
126 | |
127 | /// Returns last bit set, or CharReach::npos if none set. |
128 | size_t find_last() const { return bits.find_last(); } |
129 | |
130 | /// Returns next bit set, or CharReach::npos if none set after n. |
131 | size_t find_next(size_t last) const { return bits.find_next(last); } |
132 | |
133 | /// Returns (zero-based) N'th bit set, or CharReach::npos if fewer than |
134 | /// N + 1 bits are on. |
135 | size_t find_nth(size_t n) const { return bits.find_nth(n); } |
136 | |
137 | /// Bitwise OR. |
138 | CharReach operator|(const CharReach &a) const { |
139 | CharReach cr(*this); |
140 | cr.bits |= a.bits; |
141 | return cr; |
142 | } |
143 | |
144 | /// Bitwise OR-equals. |
145 | void operator|=(const CharReach &a) { bits |= a.bits; } |
146 | |
147 | /// Bitwise AND. |
148 | CharReach operator&(const CharReach &a) const { |
149 | CharReach cr(*this); |
150 | cr.bits &= a.bits; |
151 | return cr; |
152 | } |
153 | |
154 | /// Bitwise AND-equals. |
155 | void operator&=(const CharReach &a) { bits &= a.bits; } |
156 | |
157 | /// Bitwise XOR. |
158 | CharReach operator^(const CharReach &a) const { |
159 | CharReach cr(*this); |
160 | cr.bits ^= a.bits; |
161 | return cr; |
162 | } |
163 | |
164 | /// Bitwise complement. |
165 | CharReach operator~(void) const { |
166 | CharReach cr(*this); |
167 | cr.flip(); |
168 | return cr; |
169 | } |
170 | |
171 | /// Do we only contain bits representing alpha characters? |
172 | bool isAlpha() const; |
173 | |
174 | /// Do we represent an uppercase/lowercase pair? |
175 | bool isCaselessChar() const; |
176 | |
177 | /// Do we represent a cheapskate caseless set? |
178 | bool isBit5Insensitive() const; |
179 | |
180 | /// Return a string containing the characters that are switched on. |
181 | std::string to_string() const; |
182 | |
183 | /// Hash of enabled bits. |
184 | size_t hash() const { return bits.hash(); } |
185 | |
186 | /// True if this character class is a subset of \a other. |
187 | bool isSubsetOf(const CharReach &other) const; |
188 | }; |
189 | |
190 | /** \brief True iff there is a non-empty intersection between \a and \a b */ |
191 | bool overlaps(const CharReach &a, const CharReach &b); |
192 | |
193 | /** \brief True iff \a small is a subset of \a big. */ |
194 | bool isSubsetOf(const CharReach &small, const CharReach &big); |
195 | |
196 | bool isutf8ascii(const CharReach &cr); |
197 | bool isutf8start(const CharReach &cr); |
198 | |
199 | } // namespace ue2 |
200 | |
201 | namespace std { |
202 | |
203 | template<> |
204 | struct hash<ue2::CharReach> { |
205 | size_t operator()(const ue2::CharReach &cr) const { |
206 | return cr.hash(); |
207 | } |
208 | }; |
209 | |
210 | } // namespace std |
211 | |
212 | #endif // NG_CHARREACH_H |
213 | |