1 | /* |
2 | * Copyright (c) 2015-2016, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Execute an NFA over a given input, returning the set of states that |
31 | * are active afterwards. |
32 | * |
33 | * Note: although our external interfaces for execute_graph() use std::set, we |
34 | * use a dynamic bitset containing the vertex indices internally for |
35 | * performance. |
36 | */ |
37 | #include "ng_execute.h" |
38 | |
39 | #include "ng_holder.h" |
40 | #include "ng_util.h" |
41 | #include "ue2common.h" |
42 | #include "util/container.h" |
43 | #include "util/dump_charclass.h" |
44 | #include "util/graph_range.h" |
45 | #include "util/ue2string.h" |
46 | |
47 | #include <sstream> |
48 | #include <string> |
49 | |
50 | #include <boost/dynamic_bitset.hpp> |
51 | #include <boost/graph/depth_first_search.hpp> |
52 | #include <boost/graph/reverse_graph.hpp> |
53 | |
54 | using namespace std; |
55 | using boost::dynamic_bitset; |
56 | |
57 | namespace ue2 { |
58 | |
59 | struct StateInfo { |
60 | StateInfo(NFAVertex v, const CharReach &cr) : vertex(v), reach(cr) {} |
61 | StateInfo() : vertex(NGHolder::null_vertex()) {} |
62 | NFAVertex vertex; |
63 | CharReach reach; |
64 | }; |
65 | |
66 | #ifdef DEBUG |
67 | static |
68 | std::string dumpStates(const dynamic_bitset<> &s) { |
69 | std::ostringstream oss; |
70 | for (size_t i = s.find_first(); i != s.npos; i = s.find_next(i)) { |
71 | oss << i << " " ; |
72 | } |
73 | return oss.str(); |
74 | } |
75 | #endif |
76 | |
77 | static |
78 | void step(const NGHolder &g, const vector<StateInfo> &info, |
79 | const dynamic_bitset<> &in, dynamic_bitset<> *out) { |
80 | out->reset(); |
81 | for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) { |
82 | NFAVertex u = info[i].vertex; |
83 | for (auto v : adjacent_vertices_range(u, g)) { |
84 | out->set(g[v].index); |
85 | } |
86 | } |
87 | } |
88 | |
89 | static |
90 | void filter_by_reach(const vector<StateInfo> &info, dynamic_bitset<> *states, |
91 | const CharReach &cr) { |
92 | for (size_t i = states->find_first(); i != states->npos; |
93 | i = states->find_next(i)) { |
94 | if ((info[i].reach & cr).none()) { |
95 | states->reset(i); |
96 | } |
97 | } |
98 | } |
99 | |
100 | template<typename inputT> |
101 | static |
102 | void execute_graph_i(const NGHolder &g, const vector<StateInfo> &info, |
103 | const inputT &input, dynamic_bitset<> *states, |
104 | bool kill_sds) { |
105 | dynamic_bitset<> &curr = *states; |
106 | dynamic_bitset<> next(curr.size()); |
107 | DEBUG_PRINTF("%zu states in\n" , states->count()); |
108 | |
109 | for (const auto &e : input) { |
110 | DEBUG_PRINTF("processing %s\n" , describeClass(e).c_str()); |
111 | step(g, info, curr, &next); |
112 | if (kill_sds) { |
113 | next.reset(NODE_START_DOTSTAR); |
114 | } |
115 | filter_by_reach(info, &next, e); |
116 | next.swap(curr); |
117 | |
118 | if (curr.empty()) { |
119 | DEBUG_PRINTF("went dead\n" ); |
120 | break; |
121 | } |
122 | } |
123 | |
124 | DEBUG_PRINTF("%zu states out\n" , states->size()); |
125 | } |
126 | |
127 | static |
128 | dynamic_bitset<> makeStateBitset(const NGHolder &g, |
129 | const flat_set<NFAVertex> &in) { |
130 | dynamic_bitset<> work_states(num_vertices(g)); |
131 | for (const auto &v : in) { |
132 | u32 idx = g[v].index; |
133 | work_states.set(idx); |
134 | } |
135 | return work_states; |
136 | } |
137 | |
138 | static |
139 | flat_set<NFAVertex> getVertices(const dynamic_bitset<> &in, |
140 | const vector<StateInfo> &info) { |
141 | flat_set<NFAVertex> out; |
142 | for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) { |
143 | out.insert(info[i].vertex); |
144 | } |
145 | return out; |
146 | } |
147 | |
148 | static |
149 | vector<StateInfo> makeInfoTable(const NGHolder &g) { |
150 | vector<StateInfo> info(num_vertices(g)); |
151 | for (auto v : vertices_range(g)) { |
152 | u32 idx = g[v].index; |
153 | const CharReach &cr = g[v].char_reach; |
154 | assert(idx < info.size()); |
155 | info[idx] = StateInfo(v, cr); |
156 | } |
157 | return info; |
158 | } |
159 | |
160 | flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input, |
161 | const flat_set<NFAVertex> &initial_states, |
162 | bool kill_sds) { |
163 | assert(hasCorrectlyNumberedVertices(g)); |
164 | |
165 | auto info = makeInfoTable(g); |
166 | auto work_states = makeStateBitset(g, initial_states); |
167 | |
168 | execute_graph_i(g, info, input, &work_states, kill_sds); |
169 | |
170 | return getVertices(work_states, info); |
171 | } |
172 | |
173 | flat_set<NFAVertex> execute_graph(const NGHolder &g, |
174 | const vector<CharReach> &input, |
175 | const flat_set<NFAVertex> &initial_states) { |
176 | assert(hasCorrectlyNumberedVertices(g)); |
177 | |
178 | auto info = makeInfoTable(g); |
179 | auto work_states = makeStateBitset(g, initial_states); |
180 | |
181 | execute_graph_i(g, info, input, &work_states, false); |
182 | |
183 | return getVertices(work_states, info); |
184 | } |
185 | |
186 | namespace { |
187 | class eg_visitor : public boost::default_dfs_visitor { |
188 | public: |
189 | eg_visitor(const NGHolder &running_g_in, const vector<StateInfo> &info_in, |
190 | const NGHolder &input_g_in, |
191 | map<NFAVertex, dynamic_bitset<> > &states_in) |
192 | : vertex_count(num_vertices(running_g_in)), running_g(running_g_in), |
193 | info(info_in), input_g(input_g_in), states(states_in), |
194 | succs(vertex_count) {} |
195 | |
196 | void finish_vertex(NFAVertex input_v, |
197 | const boost::reverse_graph<NGHolder, const NGHolder &> &) { |
198 | if (input_v == input_g.accept) { |
199 | return; |
200 | } |
201 | assert(input_v != input_g.acceptEod); |
202 | |
203 | DEBUG_PRINTF("finished p%zu\n" , input_g[input_v].index); |
204 | |
205 | /* finish vertex is called on vertex --> implies that all its parents |
206 | * (in the forward graph) are also finished. Our parents will have |
207 | * pushed all of their successors for us into our stateset. */ |
208 | states[input_v].resize(vertex_count); |
209 | dynamic_bitset<> our_states = states[input_v]; |
210 | states[input_v].reset(); |
211 | |
212 | filter_by_reach(info, &our_states, |
213 | input_g[input_v].char_reach); |
214 | |
215 | if (input_v != input_g.startDs && |
216 | edge(input_v, input_v, input_g).second) { |
217 | bool changed; |
218 | do { |
219 | DEBUG_PRINTF("actually not finished -> have self loop\n" ); |
220 | succs.reset(); |
221 | step(running_g, info, our_states, &succs); |
222 | filter_by_reach(info, &succs, |
223 | input_g[input_v].char_reach); |
224 | dynamic_bitset<> our_states2 = our_states | succs; |
225 | changed = our_states2 != our_states; |
226 | our_states.swap(our_states2); |
227 | } while (changed); |
228 | } |
229 | |
230 | DEBUG_PRINTF(" active rstates: %s\n" , dumpStates(our_states).c_str()); |
231 | |
232 | succs.reset(); |
233 | step(running_g, info, our_states, &succs); |
234 | |
235 | /* we need to push into all our (forward) children their successors |
236 | * from us. */ |
237 | for (auto v : adjacent_vertices_range(input_v, input_g)) { |
238 | DEBUG_PRINTF("pushing our states to pstate %zu\n" , |
239 | input_g[v].index); |
240 | if (v == input_g.startDs) { |
241 | /* no need for intra start edges */ |
242 | continue; |
243 | } |
244 | |
245 | states[v].resize(vertex_count); // May not yet exist |
246 | |
247 | if (v != input_g.accept) { |
248 | states[v] |= succs; |
249 | } else { |
250 | /* accept is a magical pseudo state which does not consume |
251 | * characters and we are using to collect the output states. We |
252 | * must fill it with our states rather than our succs. */ |
253 | DEBUG_PRINTF("prev outputted rstates: %s\n" , |
254 | dumpStates(states[v]).c_str()); |
255 | DEBUG_PRINTF("outputted rstates: %s\n" , |
256 | dumpStates(our_states).c_str()); |
257 | |
258 | states[v] |= our_states; |
259 | |
260 | DEBUG_PRINTF("new outputted rstates: %s\n" , |
261 | dumpStates(states[v]).c_str()); |
262 | } |
263 | } |
264 | |
265 | /* note: the states at this vertex are no longer required */ |
266 | } |
267 | |
268 | private: |
269 | const size_t vertex_count; |
270 | const NGHolder &running_g; |
271 | const vector<StateInfo> &info; |
272 | const NGHolder &input_g; |
273 | map<NFAVertex, dynamic_bitset<> > &states; /* vertex in input_g -> set of |
274 | states in running_g */ |
275 | dynamic_bitset<> succs; // temp use internally |
276 | }; |
277 | } // namespace |
278 | |
279 | flat_set<NFAVertex> execute_graph(const NGHolder &running_g, |
280 | const NGHolder &input_dag, |
281 | const flat_set<NFAVertex> &input_start_states, |
282 | const flat_set<NFAVertex> &initial_states) { |
283 | DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n" , |
284 | num_vertices(running_g), num_vertices(input_dag)); |
285 | assert(hasCorrectlyNumberedVertices(running_g)); |
286 | assert(in_degree(input_dag.acceptEod, input_dag) == 1); |
287 | |
288 | map<NFAVertex, boost::default_color_type> colours; |
289 | /* could just a topo order, but really it is time to pull a slightly bigger |
290 | * gun: DFS */ |
291 | boost::reverse_graph<NGHolder, const NGHolder &> revg(input_dag); |
292 | map<NFAVertex, dynamic_bitset<> > dfs_states; |
293 | |
294 | auto info = makeInfoTable(running_g); |
295 | auto input_fs = makeStateBitset(running_g, initial_states); |
296 | |
297 | for (auto v : input_start_states) { |
298 | dfs_states[v] = input_fs; |
299 | } |
300 | |
301 | depth_first_visit(revg, input_dag.accept, |
302 | eg_visitor(running_g, info, input_dag, dfs_states), |
303 | make_assoc_property_map(colours)); |
304 | |
305 | auto states = getVertices(dfs_states[input_dag.accept], info); |
306 | |
307 | #ifdef DEBUG |
308 | DEBUG_PRINTF(" output rstates:" ); |
309 | for (const auto &v : states) { |
310 | printf(" %zu" , running_g[v].index); |
311 | } |
312 | printf("\n" ); |
313 | #endif |
314 | |
315 | return states; |
316 | } |
317 | |
318 | flat_set<NFAVertex> execute_graph(const NGHolder &running_g, |
319 | const NGHolder &input_dag, |
320 | const flat_set<NFAVertex> &initial_states) { |
321 | auto input_start_states = {input_dag.start, input_dag.startDs}; |
322 | return execute_graph(running_g, input_dag, input_start_states, |
323 | initial_states); |
324 | } |
325 | |
326 | static |
327 | bool can_die_early(const NGHolder &g, const vector<StateInfo> &info, |
328 | const dynamic_bitset<> &s, |
329 | map<dynamic_bitset<>, u32> &visited, u32 age_limit) { |
330 | if (contains(visited, s) && visited[s] >= age_limit) { |
331 | /* we have already (or are in the process) of visiting here with a |
332 | * looser limit. */ |
333 | return false; |
334 | } |
335 | visited[s] = age_limit; |
336 | |
337 | if (s.none()) { |
338 | DEBUG_PRINTF("dead\n" ); |
339 | return true; |
340 | } |
341 | |
342 | if (age_limit == 0) { |
343 | return false; |
344 | } |
345 | |
346 | dynamic_bitset<> all_succ(s.size()); |
347 | step(g, info, s, &all_succ); |
348 | all_succ.reset(NODE_START_DOTSTAR); |
349 | |
350 | for (u32 i = 0; i < N_CHARS; i++) { |
351 | dynamic_bitset<> next = all_succ; |
352 | filter_by_reach(info, &next, CharReach(i)); |
353 | if (can_die_early(g, info, next, visited, age_limit - 1)) { |
354 | return true; |
355 | } |
356 | } |
357 | |
358 | return false; |
359 | } |
360 | |
361 | bool can_die_early(const NGHolder &g, u32 age_limit) { |
362 | if (proper_out_degree(g.startDs, g)) { |
363 | return false; |
364 | } |
365 | const vector<StateInfo> &info = makeInfoTable(g); |
366 | map<dynamic_bitset<>, u32> visited; |
367 | return can_die_early(g, info, makeStateBitset(g, {g.start}), visited, |
368 | age_limit); |
369 | } |
370 | |
371 | } // namespace ue2 |
372 | |