1/*
2 * Copyright (c) 2015-2016, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Execute an NFA over a given input, returning the set of states that
31 * are active afterwards.
32 *
33 * Note: although our external interfaces for execute_graph() use std::set, we
34 * use a dynamic bitset containing the vertex indices internally for
35 * performance.
36 */
37#include "ng_execute.h"
38
39#include "ng_holder.h"
40#include "ng_util.h"
41#include "ue2common.h"
42#include "util/container.h"
43#include "util/dump_charclass.h"
44#include "util/graph_range.h"
45#include "util/ue2string.h"
46
47#include <sstream>
48#include <string>
49
50#include <boost/dynamic_bitset.hpp>
51#include <boost/graph/depth_first_search.hpp>
52#include <boost/graph/reverse_graph.hpp>
53
54using namespace std;
55using boost::dynamic_bitset;
56
57namespace ue2 {
58
59struct StateInfo {
60 StateInfo(NFAVertex v, const CharReach &cr) : vertex(v), reach(cr) {}
61 StateInfo() : vertex(NGHolder::null_vertex()) {}
62 NFAVertex vertex;
63 CharReach reach;
64};
65
66#ifdef DEBUG
67static
68std::string dumpStates(const dynamic_bitset<> &s) {
69 std::ostringstream oss;
70 for (size_t i = s.find_first(); i != s.npos; i = s.find_next(i)) {
71 oss << i << " ";
72 }
73 return oss.str();
74}
75#endif
76
77static
78void step(const NGHolder &g, const vector<StateInfo> &info,
79 const dynamic_bitset<> &in, dynamic_bitset<> *out) {
80 out->reset();
81 for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
82 NFAVertex u = info[i].vertex;
83 for (auto v : adjacent_vertices_range(u, g)) {
84 out->set(g[v].index);
85 }
86 }
87}
88
89static
90void filter_by_reach(const vector<StateInfo> &info, dynamic_bitset<> *states,
91 const CharReach &cr) {
92 for (size_t i = states->find_first(); i != states->npos;
93 i = states->find_next(i)) {
94 if ((info[i].reach & cr).none()) {
95 states->reset(i);
96 }
97 }
98}
99
100template<typename inputT>
101static
102void execute_graph_i(const NGHolder &g, const vector<StateInfo> &info,
103 const inputT &input, dynamic_bitset<> *states,
104 bool kill_sds) {
105 dynamic_bitset<> &curr = *states;
106 dynamic_bitset<> next(curr.size());
107 DEBUG_PRINTF("%zu states in\n", states->count());
108
109 for (const auto &e : input) {
110 DEBUG_PRINTF("processing %s\n", describeClass(e).c_str());
111 step(g, info, curr, &next);
112 if (kill_sds) {
113 next.reset(NODE_START_DOTSTAR);
114 }
115 filter_by_reach(info, &next, e);
116 next.swap(curr);
117
118 if (curr.empty()) {
119 DEBUG_PRINTF("went dead\n");
120 break;
121 }
122 }
123
124 DEBUG_PRINTF("%zu states out\n", states->size());
125}
126
127static
128dynamic_bitset<> makeStateBitset(const NGHolder &g,
129 const flat_set<NFAVertex> &in) {
130 dynamic_bitset<> work_states(num_vertices(g));
131 for (const auto &v : in) {
132 u32 idx = g[v].index;
133 work_states.set(idx);
134 }
135 return work_states;
136}
137
138static
139flat_set<NFAVertex> getVertices(const dynamic_bitset<> &in,
140 const vector<StateInfo> &info) {
141 flat_set<NFAVertex> out;
142 for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) {
143 out.insert(info[i].vertex);
144 }
145 return out;
146}
147
148static
149vector<StateInfo> makeInfoTable(const NGHolder &g) {
150 vector<StateInfo> info(num_vertices(g));
151 for (auto v : vertices_range(g)) {
152 u32 idx = g[v].index;
153 const CharReach &cr = g[v].char_reach;
154 assert(idx < info.size());
155 info[idx] = StateInfo(v, cr);
156 }
157 return info;
158}
159
160flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input,
161 const flat_set<NFAVertex> &initial_states,
162 bool kill_sds) {
163 assert(hasCorrectlyNumberedVertices(g));
164
165 auto info = makeInfoTable(g);
166 auto work_states = makeStateBitset(g, initial_states);
167
168 execute_graph_i(g, info, input, &work_states, kill_sds);
169
170 return getVertices(work_states, info);
171}
172
173flat_set<NFAVertex> execute_graph(const NGHolder &g,
174 const vector<CharReach> &input,
175 const flat_set<NFAVertex> &initial_states) {
176 assert(hasCorrectlyNumberedVertices(g));
177
178 auto info = makeInfoTable(g);
179 auto work_states = makeStateBitset(g, initial_states);
180
181 execute_graph_i(g, info, input, &work_states, false);
182
183 return getVertices(work_states, info);
184}
185
186namespace {
187class eg_visitor : public boost::default_dfs_visitor {
188public:
189 eg_visitor(const NGHolder &running_g_in, const vector<StateInfo> &info_in,
190 const NGHolder &input_g_in,
191 map<NFAVertex, dynamic_bitset<> > &states_in)
192 : vertex_count(num_vertices(running_g_in)), running_g(running_g_in),
193 info(info_in), input_g(input_g_in), states(states_in),
194 succs(vertex_count) {}
195
196 void finish_vertex(NFAVertex input_v,
197 const boost::reverse_graph<NGHolder, const NGHolder &> &) {
198 if (input_v == input_g.accept) {
199 return;
200 }
201 assert(input_v != input_g.acceptEod);
202
203 DEBUG_PRINTF("finished p%zu\n", input_g[input_v].index);
204
205 /* finish vertex is called on vertex --> implies that all its parents
206 * (in the forward graph) are also finished. Our parents will have
207 * pushed all of their successors for us into our stateset. */
208 states[input_v].resize(vertex_count);
209 dynamic_bitset<> our_states = states[input_v];
210 states[input_v].reset();
211
212 filter_by_reach(info, &our_states,
213 input_g[input_v].char_reach);
214
215 if (input_v != input_g.startDs &&
216 edge(input_v, input_v, input_g).second) {
217 bool changed;
218 do {
219 DEBUG_PRINTF("actually not finished -> have self loop\n");
220 succs.reset();
221 step(running_g, info, our_states, &succs);
222 filter_by_reach(info, &succs,
223 input_g[input_v].char_reach);
224 dynamic_bitset<> our_states2 = our_states | succs;
225 changed = our_states2 != our_states;
226 our_states.swap(our_states2);
227 } while (changed);
228 }
229
230 DEBUG_PRINTF(" active rstates: %s\n", dumpStates(our_states).c_str());
231
232 succs.reset();
233 step(running_g, info, our_states, &succs);
234
235 /* we need to push into all our (forward) children their successors
236 * from us. */
237 for (auto v : adjacent_vertices_range(input_v, input_g)) {
238 DEBUG_PRINTF("pushing our states to pstate %zu\n",
239 input_g[v].index);
240 if (v == input_g.startDs) {
241 /* no need for intra start edges */
242 continue;
243 }
244
245 states[v].resize(vertex_count); // May not yet exist
246
247 if (v != input_g.accept) {
248 states[v] |= succs;
249 } else {
250 /* accept is a magical pseudo state which does not consume
251 * characters and we are using to collect the output states. We
252 * must fill it with our states rather than our succs. */
253 DEBUG_PRINTF("prev outputted rstates: %s\n",
254 dumpStates(states[v]).c_str());
255 DEBUG_PRINTF("outputted rstates: %s\n",
256 dumpStates(our_states).c_str());
257
258 states[v] |= our_states;
259
260 DEBUG_PRINTF("new outputted rstates: %s\n",
261 dumpStates(states[v]).c_str());
262 }
263 }
264
265 /* note: the states at this vertex are no longer required */
266 }
267
268private:
269 const size_t vertex_count;
270 const NGHolder &running_g;
271 const vector<StateInfo> &info;
272 const NGHolder &input_g;
273 map<NFAVertex, dynamic_bitset<> > &states; /* vertex in input_g -> set of
274 states in running_g */
275 dynamic_bitset<> succs; // temp use internally
276};
277} // namespace
278
279flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
280 const NGHolder &input_dag,
281 const flat_set<NFAVertex> &input_start_states,
282 const flat_set<NFAVertex> &initial_states) {
283 DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n",
284 num_vertices(running_g), num_vertices(input_dag));
285 assert(hasCorrectlyNumberedVertices(running_g));
286 assert(in_degree(input_dag.acceptEod, input_dag) == 1);
287
288 map<NFAVertex, boost::default_color_type> colours;
289 /* could just a topo order, but really it is time to pull a slightly bigger
290 * gun: DFS */
291 boost::reverse_graph<NGHolder, const NGHolder &> revg(input_dag);
292 map<NFAVertex, dynamic_bitset<> > dfs_states;
293
294 auto info = makeInfoTable(running_g);
295 auto input_fs = makeStateBitset(running_g, initial_states);
296
297 for (auto v : input_start_states) {
298 dfs_states[v] = input_fs;
299 }
300
301 depth_first_visit(revg, input_dag.accept,
302 eg_visitor(running_g, info, input_dag, dfs_states),
303 make_assoc_property_map(colours));
304
305 auto states = getVertices(dfs_states[input_dag.accept], info);
306
307#ifdef DEBUG
308 DEBUG_PRINTF(" output rstates:");
309 for (const auto &v : states) {
310 printf(" %zu", running_g[v].index);
311 }
312 printf("\n");
313#endif
314
315 return states;
316}
317
318flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
319 const NGHolder &input_dag,
320 const flat_set<NFAVertex> &initial_states) {
321 auto input_start_states = {input_dag.start, input_dag.startDs};
322 return execute_graph(running_g, input_dag, input_start_states,
323 initial_states);
324}
325
326static
327bool can_die_early(const NGHolder &g, const vector<StateInfo> &info,
328 const dynamic_bitset<> &s,
329 map<dynamic_bitset<>, u32> &visited, u32 age_limit) {
330 if (contains(visited, s) && visited[s] >= age_limit) {
331 /* we have already (or are in the process) of visiting here with a
332 * looser limit. */
333 return false;
334 }
335 visited[s] = age_limit;
336
337 if (s.none()) {
338 DEBUG_PRINTF("dead\n");
339 return true;
340 }
341
342 if (age_limit == 0) {
343 return false;
344 }
345
346 dynamic_bitset<> all_succ(s.size());
347 step(g, info, s, &all_succ);
348 all_succ.reset(NODE_START_DOTSTAR);
349
350 for (u32 i = 0; i < N_CHARS; i++) {
351 dynamic_bitset<> next = all_succ;
352 filter_by_reach(info, &next, CharReach(i));
353 if (can_die_early(g, info, next, visited, age_limit - 1)) {
354 return true;
355 }
356 }
357
358 return false;
359}
360
361bool can_die_early(const NGHolder &g, u32 age_limit) {
362 if (proper_out_degree(g.startDs, g)) {
363 return false;
364 }
365 const vector<StateInfo> &info = makeInfoTable(g);
366 map<dynamic_bitset<>, u32> visited;
367 return can_die_early(g, info, makeStateBitset(g, {g.start}), visited,
368 age_limit);
369}
370
371} // namespace ue2
372