1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Graph fuzzer for approximate matching |
31 | */ |
32 | |
33 | #include "ng_fuzzy.h" |
34 | |
35 | #include "ng.h" |
36 | #include "ng_depth.h" |
37 | #include "ng_util.h" |
38 | |
39 | #include <map> |
40 | #include <vector> |
41 | using namespace std; |
42 | |
43 | namespace ue2 { |
44 | |
45 | // returns all successors up to a given depth in a vector of sets, indexed by |
46 | // zero-based depth from source vertex |
47 | static |
48 | vector<flat_set<NFAVertex>> gatherSuccessorsByDepth(const NGHolder &g, |
49 | NFAVertex src, u32 depth) { |
50 | vector<flat_set<NFAVertex>> result(depth); |
51 | flat_set<NFAVertex> cur, next; |
52 | |
53 | assert(depth > 0); |
54 | |
55 | // populate current set of successors |
56 | for (auto v : adjacent_vertices_range(src, g)) { |
57 | // ignore self-loops |
58 | if (src == v) { |
59 | continue; |
60 | } |
61 | DEBUG_PRINTF("Node %zu depth 1\n" , g[v].index); |
62 | cur.insert(v); |
63 | } |
64 | result[0] = cur; |
65 | |
66 | for (unsigned d = 1; d < depth; d++) { |
67 | // collect all successors for all current level vertices |
68 | for (auto v : cur) { |
69 | // don't go past special nodes |
70 | if (is_special(v, g)) { |
71 | continue; |
72 | } |
73 | |
74 | for (auto succ : adjacent_vertices_range(v, g)) { |
75 | // ignore self-loops |
76 | if (v == succ) { |
77 | continue; |
78 | } |
79 | DEBUG_PRINTF("Node %zu depth %u\n" , g[succ].index, d + 1); |
80 | next.insert(succ); |
81 | } |
82 | } |
83 | result[d] = next; |
84 | next.swap(cur); |
85 | next.clear(); |
86 | } |
87 | |
88 | return result; |
89 | } |
90 | |
91 | // returns all predecessors up to a given depth in a vector of sets, indexed by |
92 | // zero-based depth from source vertex |
93 | static |
94 | vector<flat_set<NFAVertex>> gatherPredecessorsByDepth(const NGHolder &g, |
95 | NFAVertex src, |
96 | u32 depth) { |
97 | vector<flat_set<NFAVertex>> result(depth); |
98 | flat_set<NFAVertex> cur, next; |
99 | |
100 | assert(depth > 0); |
101 | |
102 | // populate current set of successors |
103 | for (auto v : inv_adjacent_vertices_range(src, g)) { |
104 | // ignore self-loops |
105 | if (src == v) { |
106 | continue; |
107 | } |
108 | DEBUG_PRINTF("Node %zu depth 1\n" , g[v].index); |
109 | cur.insert(v); |
110 | } |
111 | result[0] = cur; |
112 | |
113 | for (unsigned d = 1; d < depth; d++) { |
114 | // collect all successors for all current level vertices |
115 | for (auto v : cur) { |
116 | for (auto pred : inv_adjacent_vertices_range(v, g)) { |
117 | // ignore self-loops |
118 | if (v == pred) { |
119 | continue; |
120 | } |
121 | DEBUG_PRINTF("Node %zu depth %u\n" , g[pred].index, d + 1); |
122 | next.insert(pred); |
123 | } |
124 | } |
125 | result[d] = next; |
126 | next.swap(cur); |
127 | next.clear(); |
128 | } |
129 | |
130 | return result; |
131 | } |
132 | |
133 | /* |
134 | * This struct produces a fuzzed graph; that is, a graph that is able to match |
135 | * the original pattern, as well as input data within a certain edit distance. |
136 | * Construct the struct, then call fuzz_graph() to transform the graph. |
137 | * |
138 | * Terminology used: |
139 | * - Shadow vertices: vertices mirroring the original graph at various edit |
140 | * distances |
141 | * - Shadow graph level: edit distance of a particular shadow graph |
142 | * - Helpers: dot vertices assigned to shadow vertices, used for insert/replace |
143 | */ |
144 | struct ShadowGraph { |
145 | NGHolder &g; |
146 | u32 edit_distance; |
147 | bool hamming; |
148 | map<pair<NFAVertex, u32>, NFAVertex> shadow_map; |
149 | map<pair<NFAVertex, u32>, NFAVertex> helper_map; |
150 | map<NFAVertex, NFAVertex> clones; |
151 | // edge creation is deferred |
152 | vector<pair<NFAVertex, NFAVertex>> edges_to_be_added; |
153 | flat_set<NFAVertex> orig; |
154 | |
155 | ShadowGraph(NGHolder &g_in, u32 ed_in, bool hamm_in) |
156 | : g(g_in), edit_distance(ed_in), hamming(hamm_in) {} |
157 | |
158 | void fuzz_graph() { |
159 | if (edit_distance == 0) { |
160 | return; |
161 | } |
162 | |
163 | DEBUG_PRINTF("edit distance = %u hamming = %s\n" , edit_distance, |
164 | hamming ? "true" : "false" ); |
165 | |
166 | // step 1: prepare the vertices, helpers and shadows according to |
167 | // the original graph |
168 | prepare_graph(); |
169 | |
170 | // step 2: add shadow and helper nodes |
171 | build_shadow_graph(); |
172 | |
173 | // step 3: set up reports for newly created vertices (and make clones |
174 | // if necessary) |
175 | if (!hamming) { |
176 | create_reports(); |
177 | } |
178 | |
179 | // step 4: wire up shadow graph and helpers for insert/replace/remove |
180 | connect_shadow_graph(); |
181 | |
182 | // step 5: commit all the edge wirings |
183 | DEBUG_PRINTF("Committing edge wirings\n" ); |
184 | for (const auto &p : edges_to_be_added) { |
185 | add_edge_if_not_present(p.first, p.second, g); |
186 | } |
187 | |
188 | DEBUG_PRINTF("Done!\n" ); |
189 | } |
190 | |
191 | private: |
192 | const NFAVertex& get_clone(const NFAVertex &v) { |
193 | return contains(clones, v) ? |
194 | clones[v] : v; |
195 | } |
196 | |
197 | void connect_to_clones(const NFAVertex &u, const NFAVertex &v) { |
198 | const NFAVertex &clone_u = get_clone(u); |
199 | const NFAVertex &clone_v = get_clone(v); |
200 | |
201 | edges_to_be_added.emplace_back(u, v); |
202 | DEBUG_PRINTF("Adding edge: %zu -> %zu\n" , g[u].index, g[v].index); |
203 | |
204 | // do not connect clones to accepts, we do it during cloning |
205 | if (is_any_accept(clone_v, g)) { |
206 | return; |
207 | } |
208 | edges_to_be_added.emplace_back(clone_u, clone_v); |
209 | DEBUG_PRINTF("Adding edge: %zu -> %zu\n" , g[clone_u].index, |
210 | g[clone_v].index); |
211 | } |
212 | |
213 | void prepare_graph() { |
214 | DEBUG_PRINTF("Building shadow graphs\n" ); |
215 | |
216 | for (auto v : vertices_range(g)) { |
217 | // all level 0 vertices are their own helpers and their own shadows |
218 | helper_map[make_pair(v, 0)] = v; |
219 | shadow_map[make_pair(v, 0)] = v; |
220 | |
221 | // find special nodes |
222 | if (is_any_accept(v, g)) { |
223 | DEBUG_PRINTF("Node %zu is a special node\n" , g[v].index); |
224 | for (unsigned edit = 1; edit <= edit_distance; edit++) { |
225 | // all accepts are their own shadows and helpers at all |
226 | // levels |
227 | shadow_map[make_pair(v, edit)] = v; |
228 | helper_map[make_pair(v, edit)] = v; |
229 | } |
230 | continue; |
231 | } |
232 | DEBUG_PRINTF("Node %zu is to be shadowed\n" , g[v].index); |
233 | orig.insert(v); |
234 | } |
235 | } |
236 | |
237 | void build_shadow_graph() { |
238 | for (auto v : orig) { |
239 | DEBUG_PRINTF("Adding shadow/helper nodes for node %zu\n" , |
240 | g[v].index); |
241 | for (unsigned dist = 1; dist <= edit_distance; dist++) { |
242 | auto shadow_v = v; |
243 | |
244 | // start and startDs cannot have shadows but do have helpers |
245 | if (!is_any_start(v, g)) { |
246 | shadow_v = clone_vertex(g, v); |
247 | DEBUG_PRINTF("New shadow node ID: %zu (level %u)\n" , |
248 | g[shadow_v].index, dist); |
249 | } |
250 | shadow_map[make_pair(v, dist)] = shadow_v; |
251 | |
252 | // if there's nowhere to go from this vertex, no helper needed |
253 | if (proper_out_degree(v, g) < 1) { |
254 | DEBUG_PRINTF("No helper for node ID: %zu (level %u)\n" , |
255 | g[shadow_v].index, dist); |
256 | helper_map[make_pair(v, dist)] = shadow_v; |
257 | continue; |
258 | } |
259 | |
260 | // start and startDs only have helpers for insert, so not Hamming |
261 | if (hamming && is_any_start(v, g)) { |
262 | DEBUG_PRINTF("No helper for node ID: %zu (level %u)\n" , |
263 | g[shadow_v].index, dist); |
264 | helper_map[make_pair(v, dist)] = shadow_v; |
265 | continue; |
266 | } |
267 | |
268 | auto helper_v = clone_vertex(g, v); |
269 | DEBUG_PRINTF("New helper node ID: %zu (level %u)\n" , |
270 | g[helper_v].index, dist); |
271 | |
272 | // this is a helper, so make it a dot |
273 | g[helper_v].char_reach = CharReach::dot(); |
274 | // do not copy virtual start's assert flags |
275 | if (is_virtual_start(v, g)) { |
276 | DEBUG_PRINTF("Helper node ID is virtual start: %zu (level %u)\n" , |
277 | g[helper_v].index, dist); |
278 | g[helper_v].assert_flags = 0; |
279 | } |
280 | helper_map[make_pair(v, dist)] = helper_v; |
281 | } |
282 | } |
283 | } |
284 | |
285 | // wire up successors according to the original graph, wire helpers |
286 | // to shadow successors (insert/replace) |
287 | void connect_succs(NFAVertex v, u32 dist) { |
288 | DEBUG_PRINTF("Wiring up successors for node %zu shadow level %u\n" , |
289 | g[v].index, dist); |
290 | const auto &cur_shadow_v = shadow_map[make_pair(v, dist)]; |
291 | const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; |
292 | |
293 | // multiple insert |
294 | if (!hamming && dist > 1) { |
295 | const auto &prev_level_helper = helper_map[make_pair(v, dist - 1)]; |
296 | connect_to_clones(prev_level_helper, cur_shadow_helper); |
297 | } |
298 | |
299 | for (auto orig_dst : adjacent_vertices_range(v, g)) { |
300 | const auto &shadow_dst = shadow_map[make_pair(orig_dst, dist)]; |
301 | |
302 | connect_to_clones(cur_shadow_v, shadow_dst); |
303 | |
304 | // ignore startDs for insert/replace |
305 | if (orig_dst == g.startDs) { |
306 | continue; |
307 | } |
308 | |
309 | connect_to_clones(cur_shadow_helper, shadow_dst); |
310 | } |
311 | } |
312 | |
313 | // wire up predecessors according to the original graph, wire |
314 | // predecessors to helpers (replace), wire predecessor helpers to |
315 | // helpers (multiple replace) |
316 | void connect_preds(NFAVertex v, u32 dist) { |
317 | DEBUG_PRINTF("Wiring up predecessors for node %zu shadow level %u\n" , |
318 | g[v].index, dist); |
319 | const auto &cur_shadow_v = shadow_map[make_pair(v, dist)]; |
320 | const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; |
321 | |
322 | auto orig_src_vertices = inv_adjacent_vertices_range(v, g); |
323 | for (auto orig_src : orig_src_vertices) { |
324 | // ignore edges from start to startDs |
325 | if (v == g.startDs && orig_src == g.start) { |
326 | continue; |
327 | } |
328 | // ignore self-loops for replace |
329 | if (orig_src != v) { |
330 | // do not wire a replace node for start vertices if we |
331 | // have a virtual start |
332 | if (is_virtual_start(v, g) && is_any_start(orig_src, g)) { |
333 | continue; |
334 | } |
335 | |
336 | if (dist) { |
337 | const auto &prev_level_src = |
338 | shadow_map[make_pair(orig_src, dist - 1)]; |
339 | const auto &prev_level_helper = |
340 | helper_map[make_pair(orig_src, dist - 1)]; |
341 | |
342 | connect_to_clones(prev_level_src, cur_shadow_helper); |
343 | connect_to_clones(prev_level_helper, cur_shadow_helper); |
344 | } |
345 | } |
346 | // wire predecessor according to original graph |
347 | const auto &shadow_src = shadow_map[make_pair(orig_src, dist)]; |
348 | |
349 | connect_to_clones(shadow_src, cur_shadow_v); |
350 | } |
351 | } |
352 | |
353 | // wire up previous level helper to current shadow (insert) |
354 | void connect_helpers(NFAVertex v, u32 dist) { |
355 | DEBUG_PRINTF("Wiring up helpers for node %zu shadow level %u\n" , |
356 | g[v].index, dist); |
357 | const auto &cur_shadow_helper = helper_map[make_pair(v, dist)]; |
358 | auto prev_level_v = shadow_map[make_pair(v, dist - 1)]; |
359 | |
360 | connect_to_clones(prev_level_v, cur_shadow_helper); |
361 | } |
362 | |
363 | /* |
364 | * wiring edges for removal is a special case. |
365 | * |
366 | * when wiring edges for removal, as well as wiring up immediate |
367 | * predecessors to immediate successors, we also need to wire up more |
368 | * distant successors to their respective shadow graph levels. |
369 | * |
370 | * for example, consider graph start->a->b->c->d->accept. |
371 | * |
372 | * at edit distance 1, we need remove edges start->b, a->c, b->d, and |
373 | * c->accept, all going from original graph (level 0) to shadow graph |
374 | * level 1. |
375 | * |
376 | * at edit distance 2, we also need edges start->c, a->d and b->accept, |
377 | * all going from level 0 to shadow graph level 2. |
378 | * |
379 | * this is propagated to all shadow levels; that is, given edit |
380 | * distance 3, we will have edges from shadow levels 0->1, 0->2, |
381 | * 0->3, 1->2, 1->3, and 2->3. |
382 | * |
383 | * therefore, we wire them in steps: first wire with step 1 (0->1, 1->2, |
384 | * 2->3) at depth 1, then wire with step 2 (0->2, 1->3) at depth 2, etc. |
385 | * |
386 | * we also have to wire helpers to their removal successors, to |
387 | * accommodate for a replace followed by a remove, on all shadow levels. |
388 | * |
389 | * and finally, we also have to wire source shadows into removal |
390 | * successor helpers on a level above, to accommodate for a remove |
391 | * followed by a replace. |
392 | */ |
393 | void connect_removals(NFAVertex v) { |
394 | DEBUG_PRINTF("Wiring up remove edges for node %zu\n" , g[v].index); |
395 | |
396 | // vertices returned by this function don't include self-loops |
397 | auto dst_vertices_by_depth = |
398 | gatherSuccessorsByDepth(g, v, edit_distance); |
399 | auto orig_src_vertices = inv_adjacent_vertices_range(v, g); |
400 | for (auto orig_src : orig_src_vertices) { |
401 | // ignore self-loops |
402 | if (orig_src == v) { |
403 | continue; |
404 | } |
405 | for (unsigned step = 1; step <= edit_distance; step++) { |
406 | for (unsigned dist = step; dist <= edit_distance; dist++) { |
407 | auto &dst_vertices = dst_vertices_by_depth[step - 1]; |
408 | for (auto &orig_dst : dst_vertices) { |
409 | const auto &shadow_src = |
410 | shadow_map[make_pair(orig_src, dist - step)]; |
411 | const auto &shadow_helper = |
412 | helper_map[make_pair(orig_src, dist - step)]; |
413 | const auto &shadow_dst = |
414 | shadow_map[make_pair(orig_dst, dist)]; |
415 | |
416 | // removal |
417 | connect_to_clones(shadow_src, shadow_dst); |
418 | |
419 | // removal from helper vertex |
420 | connect_to_clones(shadow_helper, shadow_dst); |
421 | |
422 | // removal into helper, requires additional edit |
423 | if ((dist + 1) <= edit_distance) { |
424 | const auto &next_level_helper = |
425 | helper_map[make_pair(orig_dst, dist + 1)]; |
426 | |
427 | connect_to_clones(shadow_src, next_level_helper); |
428 | } |
429 | } |
430 | } |
431 | } |
432 | } |
433 | } |
434 | |
435 | void connect_shadow_graph() { |
436 | DEBUG_PRINTF("Wiring up the graph\n" ); |
437 | |
438 | for (auto v : orig) { |
439 | |
440 | DEBUG_PRINTF("Wiring up edges for node %zu\n" , g[v].index); |
441 | |
442 | for (unsigned dist = 0; dist <= edit_distance; dist++) { |
443 | |
444 | // handle insert/replace |
445 | connect_succs(v, dist); |
446 | |
447 | // handle replace/multiple insert |
448 | connect_preds(v, dist); |
449 | |
450 | // handle helpers |
451 | if (!hamming && dist > 0) { |
452 | connect_helpers(v, dist); |
453 | } |
454 | } |
455 | |
456 | // handle removals |
457 | if (!hamming) { |
458 | connect_removals(v); |
459 | } |
460 | } |
461 | } |
462 | |
463 | void connect_to_targets(NFAVertex src, const flat_set<NFAVertex> &targets) { |
464 | for (auto dst : targets) { |
465 | DEBUG_PRINTF("Adding edge: %zu -> %zu\n" , g[src].index, |
466 | g[dst].index); |
467 | edges_to_be_added.emplace_back(src, dst); |
468 | } |
469 | } |
470 | |
471 | // create a clone of the vertex, but overwrite its report set |
472 | void create_clone(NFAVertex v, const flat_set<ReportID> &reports, |
473 | unsigned max_edit_distance, |
474 | const flat_set<NFAVertex> &targets) { |
475 | // some vertices may have the same reports, but different successors; |
476 | // therefore, we may need to connect them multiple times, but still only |
477 | // clone once |
478 | bool needs_cloning = !contains(clones, v); |
479 | |
480 | DEBUG_PRINTF("Cloning node %zu\n" , g[v].index); |
481 | // go through all shadows and helpers, including |
482 | // original vertex |
483 | for (unsigned d = 0; d < max_edit_distance; d++) { |
484 | auto shadow_v = shadow_map[make_pair(v, d)]; |
485 | auto helper_v = helper_map[make_pair(v, d)]; |
486 | |
487 | NFAVertex new_shadow_v, new_helper_v; |
488 | |
489 | // make sure we don't clone the same vertex twice |
490 | if (needs_cloning) { |
491 | new_shadow_v = clone_vertex(g, shadow_v); |
492 | DEBUG_PRINTF("New shadow node ID: %zu (level %u)\n" , |
493 | g[new_shadow_v].index, d); |
494 | clones[shadow_v] = new_shadow_v; |
495 | } else { |
496 | new_shadow_v = clones[shadow_v]; |
497 | } |
498 | g[new_shadow_v].reports = reports; |
499 | |
500 | connect_to_targets(new_shadow_v, targets); |
501 | |
502 | if (shadow_v == helper_v) { |
503 | continue; |
504 | } |
505 | if (needs_cloning) { |
506 | new_helper_v = clone_vertex(g, helper_v); |
507 | DEBUG_PRINTF("New helper node ID: %zu (level %u)\n" , |
508 | g[new_helper_v].index, d); |
509 | clones[helper_v] = new_helper_v; |
510 | } else { |
511 | new_helper_v = clones[helper_v]; |
512 | } |
513 | g[new_helper_v].reports = reports; |
514 | |
515 | connect_to_targets(new_helper_v, targets); |
516 | } |
517 | } |
518 | |
519 | void write_reports(NFAVertex v, const flat_set<ReportID> &reports, |
520 | unsigned max_edit_distance, |
521 | const flat_set<NFAVertex> &targets) { |
522 | // we're overwriting reports, but we're not losing any |
523 | // information as we already cached all the different report |
524 | // sets, so vertices having different reports will be cloned and set up |
525 | // with the correct report set |
526 | |
527 | // go through all shadows and helpers, including original |
528 | // vertex |
529 | for (unsigned d = 0; d < max_edit_distance; d++) { |
530 | auto shadow_v = shadow_map[make_pair(v, d)]; |
531 | auto helper_v = helper_map[make_pair(v, d)]; |
532 | DEBUG_PRINTF("Setting up reports for shadow node: %zu " |
533 | "(level %u)\n" , |
534 | g[shadow_v].index, d); |
535 | DEBUG_PRINTF("Setting up reports for helper node: %zu " |
536 | "(level %u)\n" , |
537 | g[helper_v].index, d); |
538 | g[shadow_v].reports = reports; |
539 | g[helper_v].reports = reports; |
540 | |
541 | connect_to_targets(shadow_v, targets); |
542 | connect_to_targets(helper_v, targets); |
543 | } |
544 | } |
545 | |
546 | /* |
547 | * we may have multiple report sets per graph. that means, whenever we |
548 | * construct additional paths through the graph (alternations, removals), we |
549 | * have to account for the fact that some vertices are predecessors to |
550 | * vertices with different report sets. |
551 | * |
552 | * whenever that happens, we have to clone the paths for both report sets, |
553 | * and set up these new vertices with their respective report sets as well. |
554 | * |
555 | * in order to do that, we first have to get all the predecessors for accept |
556 | * and acceptEod vertices. then, go through them one by one, and take note |
557 | * of the report lists. the first report set we find, wins, the rest we |
558 | * clone. |
559 | * |
560 | * we also have to do this in two passes, because there may be vertices that |
561 | * are predecessors to vertices with different report sets, so to avoid |
562 | * overwriting reports we will be caching reports info instead. |
563 | */ |
564 | void create_reports() { |
565 | map<flat_set<ReportID>, flat_set<NFAVertex>> reports_to_vertices; |
566 | flat_set<NFAVertex> accepts{g.accept, g.acceptEod}; |
567 | |
568 | // gather reports info from all vertices connected to accept |
569 | for (auto accept : accepts) { |
570 | for (auto src : inv_adjacent_vertices_range(accept, g)) { |
571 | // skip special vertices |
572 | if (is_special(src, g)) { |
573 | continue; |
574 | } |
575 | reports_to_vertices[g[src].reports].insert(src); |
576 | } |
577 | } |
578 | |
579 | // we expect to see at most two report sets |
580 | assert(reports_to_vertices.size() > 0 && |
581 | reports_to_vertices.size() <= 2); |
582 | |
583 | // set up all reports |
584 | bool clone = false; |
585 | for (auto &pair : reports_to_vertices) { |
586 | const auto &reports = pair.first; |
587 | const auto &vertices = pair.second; |
588 | |
589 | for (auto src : vertices) { |
590 | // get all predecessors up to edit distance |
591 | auto src_vertices_by_depth = |
592 | gatherPredecessorsByDepth(g, src, edit_distance); |
593 | |
594 | // find which accepts source vertex connects to |
595 | flat_set<NFAVertex> targets; |
596 | for (const auto &accept : accepts) { |
597 | NFAEdge e = edge(src, accept, g); |
598 | if (e) { |
599 | targets.insert(accept); |
600 | } |
601 | } |
602 | assert(targets.size()); |
603 | |
604 | for (unsigned d = 0; d < src_vertices_by_depth.size(); d++) { |
605 | const auto &preds = src_vertices_by_depth[d]; |
606 | for (auto v : preds) { |
607 | // only clone a node if it already contains reports |
608 | if (clone && !g[v].reports.empty()) { |
609 | create_clone(v, reports, edit_distance - d, |
610 | targets); |
611 | } else { |
612 | write_reports(v, reports, edit_distance - d, |
613 | targets); |
614 | } |
615 | } |
616 | } |
617 | } |
618 | // clone vertices only if it's not our first report set |
619 | clone = true; |
620 | } |
621 | } |
622 | }; |
623 | |
624 | // check if we will edit our way into a vacuous pattern |
625 | static |
626 | bool will_turn_vacuous(const NGHolder &g, u32 edit_distance) { |
627 | auto depths = calcRevDepths(g); |
628 | |
629 | depth min_depth = depth::infinity(); |
630 | auto idx = g[g.start].index; |
631 | |
632 | // check distance from start to accept/acceptEod |
633 | if (depths[idx].toAccept.min.is_finite()) { |
634 | min_depth = min(depths[idx].toAccept.min, min_depth); |
635 | } |
636 | if (depths[idx].toAcceptEod.min.is_finite()) { |
637 | min_depth = min(depths[idx].toAcceptEod.min, min_depth); |
638 | } |
639 | |
640 | idx = g[g.startDs].index; |
641 | |
642 | // check distance from startDs to accept/acceptEod |
643 | if (depths[idx].toAccept.min.is_finite()) { |
644 | min_depth = min(depths[idx].toAccept.min, min_depth); |
645 | } |
646 | if (depths[idx].toAcceptEod.min.is_finite()) { |
647 | min_depth = min(depths[idx].toAcceptEod.min, min_depth); |
648 | } |
649 | |
650 | assert(min_depth.is_finite()); |
651 | |
652 | // now, check if we can edit our way into a vacuous pattern |
653 | if (min_depth <= (u64a) edit_distance + 1) { |
654 | DEBUG_PRINTF("Pattern will turn vacuous if approximately matched\n" ); |
655 | return true; |
656 | } |
657 | return false; |
658 | } |
659 | |
660 | void validate_fuzzy_compile(const NGHolder &g, u32 edit_distance, bool hamming, |
661 | bool utf8, const Grey &grey) { |
662 | if (edit_distance == 0) { |
663 | return; |
664 | } |
665 | if (!grey.allowApproximateMatching) { |
666 | throw CompileError("Approximate matching is disabled." ); |
667 | } |
668 | if (edit_distance > grey.maxEditDistance) { |
669 | throw CompileError("Edit distance is too big." ); |
670 | } |
671 | if (utf8) { |
672 | throw CompileError("UTF-8 is disallowed for approximate matching." ); |
673 | } |
674 | // graph isn't fuzzable if there are edge assertions anywhere in the graph |
675 | for (auto e : edges_range(g)) { |
676 | if (g[e].assert_flags) { |
677 | throw CompileError("Zero-width assertions are disallowed for " |
678 | "approximate matching." ); |
679 | } |
680 | } |
681 | if (!hamming && will_turn_vacuous(g, edit_distance)) { |
682 | throw CompileError("Approximate matching patterns that reduce to " |
683 | "vacuous patterns are disallowed." ); |
684 | } |
685 | } |
686 | |
687 | void make_fuzzy(NGHolder &g, u32 edit_distance, bool hamming, |
688 | const Grey &grey) { |
689 | if (edit_distance == 0) { |
690 | return; |
691 | } |
692 | |
693 | assert(grey.allowApproximateMatching); |
694 | assert(grey.maxEditDistance >= edit_distance); |
695 | |
696 | ShadowGraph sg(g, edit_distance, hamming); |
697 | sg.fuzz_graph(); |
698 | |
699 | // For safety, enforce limit on actual vertex count. |
700 | if (num_vertices(g) > grey.limitApproxMatchingVertices) { |
701 | DEBUG_PRINTF("built %zu vertices > limit of %u\n" , num_vertices(g), |
702 | grey.limitApproxMatchingVertices); |
703 | throw ResourceLimitError(); |
704 | } |
705 | } |
706 | |
707 | } // namespace ue2 |
708 | |