1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Reverse acceleration analysis.
31 */
32#include "ng_revacc.h"
33
34#include "grey.h"
35#include "ng_holder.h"
36#include "ue2common.h"
37#include "nfa/accel.h"
38#include "nfa/nfa_internal.h"
39#include "util/bitutils.h"
40#include "util/charreach.h"
41#include "util/graph_range.h"
42
43#include <set>
44
45using namespace std;
46
47namespace ue2 {
48
49static
50bool isPseudoNoCaseChar(const CharReach &cr) {
51 return cr.count() == 2 && !(cr.find_first() & 32)
52 && cr.test(cr.find_first() | 32);
53}
54
55static
56bool lookForEodSchemes(const RevAccInfo &rev_info, const u32 minWidth,
57 NFA *nfa) {
58 DEBUG_PRINTF("pure eod triggered pattern\n");
59
60 /* 2 char */
61 for (u8 nocase = 0; nocase < 2; nocase++) {
62 for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) {
63 const CharReach &cr = rev_info.acceptEodReach[i];
64 const CharReach &cr2 = rev_info.acceptEodReach[i - 1];
65
66 if (!nocase && cr.count() == 1 && cr2.count() == 1) {
67 assert(i < minWidth);
68 if (i >= minWidth) {
69 goto single;
70 }
71 nfa->rAccelType = ACCEL_RDEOD;
72 nfa->rAccelData.array[0] = (u8)cr.find_first();
73 nfa->rAccelData.array[1] = (u8)cr2.find_first();
74 nfa->rAccelOffset = i + 1;
75 DEBUG_PRINTF("raccel eod x2 %u %04hx\n",
76 nfa->rAccelOffset, nfa->rAccelData.dc);
77 return true;
78 } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr))
79 && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) {
80 assert(i < minWidth);
81 if (i >= minWidth) {
82 goto single;
83 }
84 nfa->rAccelType = ACCEL_RDEOD_NOCASE;
85 nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR; /* uppercase */
86 nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR;
87 nfa->rAccelOffset = i + 1;
88 DEBUG_PRINTF("raccel nc eod x2 %u %04hx\n",
89 nfa->rAccelOffset, nfa->rAccelData.dc);
90 return true;
91 }
92 }
93 }
94
95 single:
96 /* 1 char */
97 for (u8 nocase = 0; nocase < 2; nocase++) {
98 for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
99 const CharReach &cr = rev_info.acceptEodReach[i];
100 if (!nocase && cr.count() == 1) {
101 assert(i < minWidth);
102 if (i >= minWidth) {
103 return false;
104 }
105 nfa->rAccelType = ACCEL_REOD;
106 nfa->rAccelData.c = (u8) cr.find_first();
107 nfa->rAccelOffset = i + 1;
108 DEBUG_PRINTF("raccel eod %u %02hhx\n",
109 nfa->rAccelOffset, nfa->rAccelData.c);
110 return true;
111 } else if (nocase && isPseudoNoCaseChar(cr)) {
112 assert(i < minWidth);
113 if (i >= minWidth) {
114 return false;
115 }
116 nfa->rAccelType = ACCEL_REOD_NOCASE;
117 nfa->rAccelData.c = (u8)cr.find_first(); /* uppercase */
118 nfa->rAccelOffset = i + 1;
119 DEBUG_PRINTF("raccel nc eod %u %02hhx\n",
120 nfa->rAccelOffset, nfa->rAccelData.c);
121 return true;
122 }
123 }
124 }
125
126 return false;
127}
128
129static
130bool lookForFloatingSchemes(const RevAccInfo &rev_info,
131 const u32 minWidth, NFA *nfa) {
132 /* 2 char */
133 for (u8 nocase = 0; nocase < 2; nocase++) {
134 for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) {
135 CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i];
136 CharReach cr2 = rev_info.acceptEodReach[i - 1]
137 | rev_info.acceptReach[i - 1];
138 if (!nocase && cr.count() == 1 && cr2.count() == 1) {
139 assert((u8)(i - 1) < minWidth);
140 if (i > minWidth) {
141 goto single;
142 }
143 nfa->rAccelType = ACCEL_RDVERM;
144 nfa->rAccelData.array[0] = (u8)cr.find_first();
145 nfa->rAccelData.array[1] = (u8)cr2.find_first();
146 nfa->rAccelOffset = i;
147 DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx\n",
148 nfa->rAccelOffset, nfa->rAccelData.array[0],
149 nfa->rAccelData.array[1]);
150 return true;
151 } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr))
152 && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) {
153 assert((u8)(i - 1) < minWidth);
154 if (i > minWidth) {
155 goto single;
156 }
157 nfa->rAccelType = ACCEL_RDVERM_NOCASE;
158 nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR;
159 nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR;
160 nfa->rAccelOffset = i;
161 DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx nc\n",
162 nfa->rAccelOffset, nfa->rAccelData.array[0],
163 nfa->rAccelData.array[1]);
164 return true;
165 }
166 }
167 }
168
169 single:
170 /* 1 char */
171 for (u8 nocase = 0; nocase < 2; nocase++) {
172 for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
173 CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i];
174 if (!nocase && cr.count() == 1) {
175 assert(i < minWidth);
176 if (i >= minWidth) {
177 return false;
178 }
179 nfa->rAccelType = ACCEL_RVERM;
180 nfa->rAccelData.c = (u8)cr.find_first();
181 nfa->rAccelOffset = i + 1;
182 DEBUG_PRINTF("raccel verm %u %02hhx\n", nfa->rAccelOffset,
183 nfa->rAccelData.c);
184 return true;
185 } else if (nocase && isPseudoNoCaseChar(cr)) {
186 assert(i < minWidth);
187 if (i >= minWidth) {
188 return false;
189 }
190 nfa->rAccelType = ACCEL_RVERM_NOCASE;
191 nfa->rAccelData.c = (u8)cr.find_first(); /* 'uppercase' char */
192 nfa->rAccelOffset = i + 1;
193 DEBUG_PRINTF("raccel nc verm %u %02hhx\n", nfa->rAccelOffset,
194 nfa->rAccelData.c);
195 return true;
196 }
197 }
198 }
199
200 return false;
201}
202
203void buildReverseAcceleration(NFA *nfa, const RevAccInfo &rev_info,
204 u32 min_width, bool eod_only) {
205 assert(nfa);
206
207 if (!rev_info.valid) {
208 return;
209 }
210
211 nfa->rAccelOffset = 1;
212
213 assert(rev_info.acceptReach[0].any() || rev_info.acceptEodReach[0].any());
214 if (rev_info.acceptReach[0].none() && rev_info.acceptEodReach[0].none()) {
215 DEBUG_PRINTF("expected path to accept\n");
216 return;
217 }
218
219 if (rev_info.acceptReach[0].none()) {
220 /* eod only */
221
222 if (lookForEodSchemes(rev_info, min_width, nfa)) {
223 assert(nfa->rAccelOffset <= min_width);
224 return;
225 }
226 }
227
228 if (eod_only) {
229 return;
230 }
231
232 if (!lookForFloatingSchemes(rev_info, min_width, nfa)) {
233 DEBUG_PRINTF("failed to accelerate\n");
234 }
235}
236
237static
238void populateRevAccelInfo(const NGHolder &g, NFAVertex terminal,
239 vector<CharReach> *reach) {
240 set<NFAVertex> vset;
241
242 for (auto v : inv_adjacent_vertices_range(terminal, g)) {
243 if (!is_special(v, g)) {
244 vset.insert(v);
245 }
246 }
247
248 for (u8 offset = 0; offset < MAX_RACCEL_OFFSET; offset++) {
249 set<NFAVertex> next;
250
251 for (auto v : vset) {
252 const CharReach &cr = g[v].char_reach;
253 (*reach)[offset] |= cr;
254
255 DEBUG_PRINTF("off %u adding %zu to %zu\n", offset, cr.count(),
256 (*reach)[offset].count());
257
258 for (auto u : inv_adjacent_vertices_range(v, g)) {
259 if (u == g.start || u == g.startDs) {
260 /* kill all subsequent offsets by setting to dot, setting
261 * to dot is in someways not accurate as there may be no
262 * data at all but neither case can be accelerated */
263 for (u8 i = offset + 1; i < MAX_RACCEL_OFFSET; i++) {
264 (*reach)[i].setall();
265 }
266 break;
267 } else if (!is_special(u, g)) {
268 next.insert(u);
269 }
270 }
271 }
272
273 swap(vset, next);
274 }
275}
276
277void populateReverseAccelerationInfo(RevAccInfo &rai, const NGHolder &g) {
278 DEBUG_PRINTF("pop rev info\n");
279 populateRevAccelInfo(g, g.accept, &rai.acceptReach);
280 populateRevAccelInfo(g, g.acceptEod, &rai.acceptEodReach);
281 rai.valid = true;
282}
283
284void mergeReverseAccelerationInfo(RevAccInfo &dest, const RevAccInfo &vic) {
285 DEBUG_PRINTF("merging ra\n");
286
287 dest.valid &= vic.valid;
288
289 for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) {
290 dest.acceptReach[i] |= vic.acceptReach[i];
291 dest.acceptEodReach[i] |= vic.acceptEodReach[i];
292 }
293}
294
295RevAccInfo::RevAccInfo(void)
296 : valid(false), acceptReach(MAX_RACCEL_OFFSET),
297 acceptEodReach(MAX_RACCEL_OFFSET) {}
298
299} // namespace ue2
300