1/*
2 * Copyright (c) 2015-2016, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Class for representing character reachability.
31 *
32 * This is a simple (but hopefully fast) class for representing 8-bit character
33 * reachability, along with a bunch of useful operations.
34 */
35#include "ue2common.h"
36#include "charreach.h"
37#include "charreach_util.h"
38#include "compare.h"
39#include "unicode_def.h"
40
41#include <cassert>
42#include <string>
43
44namespace ue2 {
45
46/// Switch on the bits corresponding to the characters in \a s.
47void CharReach::set(const std::string &s) {
48 for (const auto &c : s) {
49 set(c);
50 }
51}
52
53/// Do we only contain bits representing alpha characters?
54bool CharReach::isAlpha() const {
55 if (none()) {
56 return false;
57 }
58 for (size_t i = find_first(); i != npos; i = find_next(i)) {
59 if (!ourisalpha((char)i)) {
60 return false;
61 }
62 }
63 return true;
64}
65
66/// Do we represent an uppercase/lowercase pair?
67bool CharReach::isCaselessChar() const {
68 if (count() != 2) {
69 return false;
70 }
71 size_t first = find_first();
72 size_t second = find_next(first);
73 assert(first != npos && second != npos);
74 return (char)first == mytoupper((char)second);
75}
76
77/// Do we represent a cheapskate caseless set?
78bool CharReach::isBit5Insensitive() const {
79 for (size_t i = find_first(); i != npos; i = find_next(i)) {
80 if (!test((char)i ^ 0x20)) {
81 return false;
82 }
83 }
84 return true;
85}
86
87/// Return a string containing the characters that are switched on.
88std::string CharReach::to_string() const {
89 std::string s;
90 for (size_t i = find_first(); i != npos; i = find_next(i)) {
91 s += (char)i;
92 }
93 return s;
94}
95
96/** \brief True iff there is a non-empty intersection between \a and \a b */
97bool overlaps(const CharReach &a, const CharReach &b) {
98 return (a & b).any();
99}
100
101/** \brief True iff \a small is a subset of \a big. */
102bool isSubsetOf(const CharReach &small, const CharReach &big) {
103 return small.isSubsetOf(big);
104}
105
106/// True if this character class is a subset of \a other.
107bool CharReach::isSubsetOf(const CharReach &other) const {
108 return (bits & other.bits) == bits;
109}
110
111void make_caseless(CharReach *cr) {
112 for (char c = 'A'; c <= 'Z'; c++) {
113 if (cr->test(c) || cr->test(mytolower(c))) {
114 cr->set(c);
115 cr->set(mytolower(c));
116 }
117 }
118}
119
120bool isutf8ascii(const CharReach &cr) {
121 return (cr & ~CharReach(0x0, 0x7f)).none();
122}
123
124bool isutf8start(const CharReach &cr) {
125 return (cr & CharReach(0x0, UTF_CONT_MAX)).none();
126}
127
128void fill_bitvector(const CharReach &cr, u8 *bits) {
129 assert(bits);
130 std::fill_n(bits, 32, 0);
131 for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
132 bits[i / 8U] |= (u8)1U << (i % 8U);
133 }
134}
135
136void make_and_cmp_mask(const CharReach &cr, u8 *and_mask, u8 *cmp_mask) {
137 u8 lo = 0xff;
138 u8 hi = 0;
139
140 for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) {
141 hi |= (u8)c;
142 lo &= (u8)c;
143 }
144
145 *and_mask = ~(lo ^ hi);
146 *cmp_mask = lo;
147}
148
149} // namespace ue2
150