| 1 | /* |
| 2 | * Copyright (c) 2015-2016, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | /** \file |
| 30 | * \brief Execute an NFA over a given input, returning the set of states that |
| 31 | * are active afterwards. |
| 32 | * |
| 33 | * Note: although our external interfaces for execute_graph() use std::set, we |
| 34 | * use a dynamic bitset containing the vertex indices internally for |
| 35 | * performance. |
| 36 | */ |
| 37 | #include "ng_execute.h" |
| 38 | |
| 39 | #include "ng_holder.h" |
| 40 | #include "ng_util.h" |
| 41 | #include "ue2common.h" |
| 42 | #include "util/container.h" |
| 43 | #include "util/dump_charclass.h" |
| 44 | #include "util/graph_range.h" |
| 45 | #include "util/ue2string.h" |
| 46 | |
| 47 | #include <sstream> |
| 48 | #include <string> |
| 49 | |
| 50 | #include <boost/dynamic_bitset.hpp> |
| 51 | #include <boost/graph/depth_first_search.hpp> |
| 52 | #include <boost/graph/reverse_graph.hpp> |
| 53 | |
| 54 | using namespace std; |
| 55 | using boost::dynamic_bitset; |
| 56 | |
| 57 | namespace ue2 { |
| 58 | |
| 59 | struct StateInfo { |
| 60 | StateInfo(NFAVertex v, const CharReach &cr) : vertex(v), reach(cr) {} |
| 61 | StateInfo() : vertex(NGHolder::null_vertex()) {} |
| 62 | NFAVertex vertex; |
| 63 | CharReach reach; |
| 64 | }; |
| 65 | |
| 66 | #ifdef DEBUG |
| 67 | static |
| 68 | std::string dumpStates(const dynamic_bitset<> &s) { |
| 69 | std::ostringstream oss; |
| 70 | for (size_t i = s.find_first(); i != s.npos; i = s.find_next(i)) { |
| 71 | oss << i << " " ; |
| 72 | } |
| 73 | return oss.str(); |
| 74 | } |
| 75 | #endif |
| 76 | |
| 77 | static |
| 78 | void step(const NGHolder &g, const vector<StateInfo> &info, |
| 79 | const dynamic_bitset<> &in, dynamic_bitset<> *out) { |
| 80 | out->reset(); |
| 81 | for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) { |
| 82 | NFAVertex u = info[i].vertex; |
| 83 | for (auto v : adjacent_vertices_range(u, g)) { |
| 84 | out->set(g[v].index); |
| 85 | } |
| 86 | } |
| 87 | } |
| 88 | |
| 89 | static |
| 90 | void filter_by_reach(const vector<StateInfo> &info, dynamic_bitset<> *states, |
| 91 | const CharReach &cr) { |
| 92 | for (size_t i = states->find_first(); i != states->npos; |
| 93 | i = states->find_next(i)) { |
| 94 | if ((info[i].reach & cr).none()) { |
| 95 | states->reset(i); |
| 96 | } |
| 97 | } |
| 98 | } |
| 99 | |
| 100 | template<typename inputT> |
| 101 | static |
| 102 | void execute_graph_i(const NGHolder &g, const vector<StateInfo> &info, |
| 103 | const inputT &input, dynamic_bitset<> *states, |
| 104 | bool kill_sds) { |
| 105 | dynamic_bitset<> &curr = *states; |
| 106 | dynamic_bitset<> next(curr.size()); |
| 107 | DEBUG_PRINTF("%zu states in\n" , states->count()); |
| 108 | |
| 109 | for (const auto &e : input) { |
| 110 | DEBUG_PRINTF("processing %s\n" , describeClass(e).c_str()); |
| 111 | step(g, info, curr, &next); |
| 112 | if (kill_sds) { |
| 113 | next.reset(NODE_START_DOTSTAR); |
| 114 | } |
| 115 | filter_by_reach(info, &next, e); |
| 116 | next.swap(curr); |
| 117 | |
| 118 | if (curr.empty()) { |
| 119 | DEBUG_PRINTF("went dead\n" ); |
| 120 | break; |
| 121 | } |
| 122 | } |
| 123 | |
| 124 | DEBUG_PRINTF("%zu states out\n" , states->size()); |
| 125 | } |
| 126 | |
| 127 | static |
| 128 | dynamic_bitset<> makeStateBitset(const NGHolder &g, |
| 129 | const flat_set<NFAVertex> &in) { |
| 130 | dynamic_bitset<> work_states(num_vertices(g)); |
| 131 | for (const auto &v : in) { |
| 132 | u32 idx = g[v].index; |
| 133 | work_states.set(idx); |
| 134 | } |
| 135 | return work_states; |
| 136 | } |
| 137 | |
| 138 | static |
| 139 | flat_set<NFAVertex> getVertices(const dynamic_bitset<> &in, |
| 140 | const vector<StateInfo> &info) { |
| 141 | flat_set<NFAVertex> out; |
| 142 | for (size_t i = in.find_first(); i != in.npos; i = in.find_next(i)) { |
| 143 | out.insert(info[i].vertex); |
| 144 | } |
| 145 | return out; |
| 146 | } |
| 147 | |
| 148 | static |
| 149 | vector<StateInfo> makeInfoTable(const NGHolder &g) { |
| 150 | vector<StateInfo> info(num_vertices(g)); |
| 151 | for (auto v : vertices_range(g)) { |
| 152 | u32 idx = g[v].index; |
| 153 | const CharReach &cr = g[v].char_reach; |
| 154 | assert(idx < info.size()); |
| 155 | info[idx] = StateInfo(v, cr); |
| 156 | } |
| 157 | return info; |
| 158 | } |
| 159 | |
| 160 | flat_set<NFAVertex> execute_graph(const NGHolder &g, const ue2_literal &input, |
| 161 | const flat_set<NFAVertex> &initial_states, |
| 162 | bool kill_sds) { |
| 163 | assert(hasCorrectlyNumberedVertices(g)); |
| 164 | |
| 165 | auto info = makeInfoTable(g); |
| 166 | auto work_states = makeStateBitset(g, initial_states); |
| 167 | |
| 168 | execute_graph_i(g, info, input, &work_states, kill_sds); |
| 169 | |
| 170 | return getVertices(work_states, info); |
| 171 | } |
| 172 | |
| 173 | flat_set<NFAVertex> execute_graph(const NGHolder &g, |
| 174 | const vector<CharReach> &input, |
| 175 | const flat_set<NFAVertex> &initial_states) { |
| 176 | assert(hasCorrectlyNumberedVertices(g)); |
| 177 | |
| 178 | auto info = makeInfoTable(g); |
| 179 | auto work_states = makeStateBitset(g, initial_states); |
| 180 | |
| 181 | execute_graph_i(g, info, input, &work_states, false); |
| 182 | |
| 183 | return getVertices(work_states, info); |
| 184 | } |
| 185 | |
| 186 | namespace { |
| 187 | class eg_visitor : public boost::default_dfs_visitor { |
| 188 | public: |
| 189 | eg_visitor(const NGHolder &running_g_in, const vector<StateInfo> &info_in, |
| 190 | const NGHolder &input_g_in, |
| 191 | map<NFAVertex, dynamic_bitset<> > &states_in) |
| 192 | : vertex_count(num_vertices(running_g_in)), running_g(running_g_in), |
| 193 | info(info_in), input_g(input_g_in), states(states_in), |
| 194 | succs(vertex_count) {} |
| 195 | |
| 196 | void finish_vertex(NFAVertex input_v, |
| 197 | const boost::reverse_graph<NGHolder, const NGHolder &> &) { |
| 198 | if (input_v == input_g.accept) { |
| 199 | return; |
| 200 | } |
| 201 | assert(input_v != input_g.acceptEod); |
| 202 | |
| 203 | DEBUG_PRINTF("finished p%zu\n" , input_g[input_v].index); |
| 204 | |
| 205 | /* finish vertex is called on vertex --> implies that all its parents |
| 206 | * (in the forward graph) are also finished. Our parents will have |
| 207 | * pushed all of their successors for us into our stateset. */ |
| 208 | states[input_v].resize(vertex_count); |
| 209 | dynamic_bitset<> our_states = states[input_v]; |
| 210 | states[input_v].reset(); |
| 211 | |
| 212 | filter_by_reach(info, &our_states, |
| 213 | input_g[input_v].char_reach); |
| 214 | |
| 215 | if (input_v != input_g.startDs && |
| 216 | edge(input_v, input_v, input_g).second) { |
| 217 | bool changed; |
| 218 | do { |
| 219 | DEBUG_PRINTF("actually not finished -> have self loop\n" ); |
| 220 | succs.reset(); |
| 221 | step(running_g, info, our_states, &succs); |
| 222 | filter_by_reach(info, &succs, |
| 223 | input_g[input_v].char_reach); |
| 224 | dynamic_bitset<> our_states2 = our_states | succs; |
| 225 | changed = our_states2 != our_states; |
| 226 | our_states.swap(our_states2); |
| 227 | } while (changed); |
| 228 | } |
| 229 | |
| 230 | DEBUG_PRINTF(" active rstates: %s\n" , dumpStates(our_states).c_str()); |
| 231 | |
| 232 | succs.reset(); |
| 233 | step(running_g, info, our_states, &succs); |
| 234 | |
| 235 | /* we need to push into all our (forward) children their successors |
| 236 | * from us. */ |
| 237 | for (auto v : adjacent_vertices_range(input_v, input_g)) { |
| 238 | DEBUG_PRINTF("pushing our states to pstate %zu\n" , |
| 239 | input_g[v].index); |
| 240 | if (v == input_g.startDs) { |
| 241 | /* no need for intra start edges */ |
| 242 | continue; |
| 243 | } |
| 244 | |
| 245 | states[v].resize(vertex_count); // May not yet exist |
| 246 | |
| 247 | if (v != input_g.accept) { |
| 248 | states[v] |= succs; |
| 249 | } else { |
| 250 | /* accept is a magical pseudo state which does not consume |
| 251 | * characters and we are using to collect the output states. We |
| 252 | * must fill it with our states rather than our succs. */ |
| 253 | DEBUG_PRINTF("prev outputted rstates: %s\n" , |
| 254 | dumpStates(states[v]).c_str()); |
| 255 | DEBUG_PRINTF("outputted rstates: %s\n" , |
| 256 | dumpStates(our_states).c_str()); |
| 257 | |
| 258 | states[v] |= our_states; |
| 259 | |
| 260 | DEBUG_PRINTF("new outputted rstates: %s\n" , |
| 261 | dumpStates(states[v]).c_str()); |
| 262 | } |
| 263 | } |
| 264 | |
| 265 | /* note: the states at this vertex are no longer required */ |
| 266 | } |
| 267 | |
| 268 | private: |
| 269 | const size_t vertex_count; |
| 270 | const NGHolder &running_g; |
| 271 | const vector<StateInfo> &info; |
| 272 | const NGHolder &input_g; |
| 273 | map<NFAVertex, dynamic_bitset<> > &states; /* vertex in input_g -> set of |
| 274 | states in running_g */ |
| 275 | dynamic_bitset<> succs; // temp use internally |
| 276 | }; |
| 277 | } // namespace |
| 278 | |
| 279 | flat_set<NFAVertex> execute_graph(const NGHolder &running_g, |
| 280 | const NGHolder &input_dag, |
| 281 | const flat_set<NFAVertex> &input_start_states, |
| 282 | const flat_set<NFAVertex> &initial_states) { |
| 283 | DEBUG_PRINTF("g has %zu vertices, input_dag has %zu vertices\n" , |
| 284 | num_vertices(running_g), num_vertices(input_dag)); |
| 285 | assert(hasCorrectlyNumberedVertices(running_g)); |
| 286 | assert(in_degree(input_dag.acceptEod, input_dag) == 1); |
| 287 | |
| 288 | map<NFAVertex, boost::default_color_type> colours; |
| 289 | /* could just a topo order, but really it is time to pull a slightly bigger |
| 290 | * gun: DFS */ |
| 291 | boost::reverse_graph<NGHolder, const NGHolder &> revg(input_dag); |
| 292 | map<NFAVertex, dynamic_bitset<> > dfs_states; |
| 293 | |
| 294 | auto info = makeInfoTable(running_g); |
| 295 | auto input_fs = makeStateBitset(running_g, initial_states); |
| 296 | |
| 297 | for (auto v : input_start_states) { |
| 298 | dfs_states[v] = input_fs; |
| 299 | } |
| 300 | |
| 301 | depth_first_visit(revg, input_dag.accept, |
| 302 | eg_visitor(running_g, info, input_dag, dfs_states), |
| 303 | make_assoc_property_map(colours)); |
| 304 | |
| 305 | auto states = getVertices(dfs_states[input_dag.accept], info); |
| 306 | |
| 307 | #ifdef DEBUG |
| 308 | DEBUG_PRINTF(" output rstates:" ); |
| 309 | for (const auto &v : states) { |
| 310 | printf(" %zu" , running_g[v].index); |
| 311 | } |
| 312 | printf("\n" ); |
| 313 | #endif |
| 314 | |
| 315 | return states; |
| 316 | } |
| 317 | |
| 318 | flat_set<NFAVertex> execute_graph(const NGHolder &running_g, |
| 319 | const NGHolder &input_dag, |
| 320 | const flat_set<NFAVertex> &initial_states) { |
| 321 | auto input_start_states = {input_dag.start, input_dag.startDs}; |
| 322 | return execute_graph(running_g, input_dag, input_start_states, |
| 323 | initial_states); |
| 324 | } |
| 325 | |
| 326 | static |
| 327 | bool can_die_early(const NGHolder &g, const vector<StateInfo> &info, |
| 328 | const dynamic_bitset<> &s, |
| 329 | map<dynamic_bitset<>, u32> &visited, u32 age_limit) { |
| 330 | if (contains(visited, s) && visited[s] >= age_limit) { |
| 331 | /* we have already (or are in the process) of visiting here with a |
| 332 | * looser limit. */ |
| 333 | return false; |
| 334 | } |
| 335 | visited[s] = age_limit; |
| 336 | |
| 337 | if (s.none()) { |
| 338 | DEBUG_PRINTF("dead\n" ); |
| 339 | return true; |
| 340 | } |
| 341 | |
| 342 | if (age_limit == 0) { |
| 343 | return false; |
| 344 | } |
| 345 | |
| 346 | dynamic_bitset<> all_succ(s.size()); |
| 347 | step(g, info, s, &all_succ); |
| 348 | all_succ.reset(NODE_START_DOTSTAR); |
| 349 | |
| 350 | for (u32 i = 0; i < N_CHARS; i++) { |
| 351 | dynamic_bitset<> next = all_succ; |
| 352 | filter_by_reach(info, &next, CharReach(i)); |
| 353 | if (can_die_early(g, info, next, visited, age_limit - 1)) { |
| 354 | return true; |
| 355 | } |
| 356 | } |
| 357 | |
| 358 | return false; |
| 359 | } |
| 360 | |
| 361 | bool can_die_early(const NGHolder &g, u32 age_limit) { |
| 362 | if (proper_out_degree(g.startDs, g)) { |
| 363 | return false; |
| 364 | } |
| 365 | const vector<StateInfo> &info = makeInfoTable(g); |
| 366 | map<dynamic_bitset<>, u32> visited; |
| 367 | return can_die_early(g, info, makeStateBitset(g, {g.start}), visited, |
| 368 | age_limit); |
| 369 | } |
| 370 | |
| 371 | } // namespace ue2 |
| 372 | |