1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Equivalence class graph reduction pass.
31 */
32
33#include "ng_equivalence.h"
34
35#include "grey.h"
36#include "ng_depth.h"
37#include "ng_holder.h"
38#include "ng_util.h"
39#include "util/compile_context.h"
40#include "util/flat_containers.h"
41#include "util/graph_range.h"
42#include "util/make_unique.h"
43#include "util/unordered.h"
44
45#include <algorithm>
46#include <memory>
47#include <set>
48#include <stack>
49#include <vector>
50
51using namespace std;
52
53namespace ue2 {
54
55enum EquivalenceType {
56 LEFT_EQUIVALENCE,
57 RIGHT_EQUIVALENCE,
58};
59
60namespace {
61class VertexInfo;
62
63// custom comparison functor for unordered_set and flat_set
64struct VertexInfoPtrCmp {
65 // for flat_set
66 bool operator()(const VertexInfo *a, const VertexInfo *b) const;
67};
68
69using VertexInfoSet = flat_set<VertexInfo *, VertexInfoPtrCmp>;
70
71/** Precalculated (and maintained) information about a vertex. */
72class VertexInfo {
73public:
74 VertexInfo(NFAVertex v_in, const NGHolder &g)
75 : v(v_in), vert_index(g[v].index), cr(g[v].char_reach),
76 equivalence_class(~0), vertex_flags(g[v].assert_flags) {}
77
78 VertexInfoSet pred; //!< predecessors of this vertex
79 VertexInfoSet succ; //!< successors of this vertex
80 NFAVertex v;
81 size_t vert_index;
82 CharReach cr;
83 CharReach pred_cr;
84 CharReach succ_cr;
85 flat_set<u32> edge_tops; /**< tops on edge from start */
86 unsigned equivalence_class;
87 unsigned vertex_flags;
88};
89
90// compare two vertex info pointers on their vertex index
91bool VertexInfoPtrCmp::operator()(const VertexInfo *a,
92 const VertexInfo *b) const {
93 return a->vert_index < b->vert_index;
94}
95
96// to avoid traversing infomap each time we need to check the class during
97// partitioning, we will cache the information pertaining to a particular class
98class ClassInfo {
99public:
100 struct ClassDepth {
101 ClassDepth() {}
102 ClassDepth(const NFAVertexDepth &d)
103 : d1(d.fromStart), d2(d.fromStartDotStar) {}
104 ClassDepth(const NFAVertexRevDepth &rd)
105 : d1(rd.toAccept), d2(rd.toAcceptEod) {}
106 DepthMinMax d1;
107 DepthMinMax d2;
108 };
109 ClassInfo(const NGHolder &g, const VertexInfo &vi, const ClassDepth &d_in,
110 EquivalenceType eq)
111 : /* reports only matter for right-equiv */
112 rs(eq == RIGHT_EQUIVALENCE ? g[vi.v].reports : flat_set<ReportID>()),
113 vertex_flags(vi.vertex_flags), edge_tops(vi.edge_tops), cr(vi.cr),
114 adjacent_cr(eq == LEFT_EQUIVALENCE ? vi.pred_cr : vi.succ_cr),
115 /* treat non-special vertices the same */
116 node_type(min(g[vi.v].index, size_t{N_SPECIALS})), depth(d_in) {}
117
118 bool operator==(const ClassInfo &b) const {
119 return node_type == b.node_type && depth.d1 == b.depth.d1 &&
120 depth.d2 == b.depth.d2 && cr == b.cr &&
121 adjacent_cr == b.adjacent_cr && edge_tops == b.edge_tops &&
122 vertex_flags == b.vertex_flags && rs == b.rs;
123 }
124
125 size_t hash() const {
126 return hash_all(rs, vertex_flags, cr, adjacent_cr, node_type, depth.d1,
127 depth.d2);
128 }
129
130private:
131 flat_set<ReportID> rs; /* for right equiv only */
132 unsigned vertex_flags;
133 flat_set<u32> edge_tops;
134 CharReach cr;
135 CharReach adjacent_cr;
136 unsigned node_type;
137 ClassDepth depth;
138};
139
140// work queue class. this contraption has two goals:
141// 1. uniqueness of elements
142// 2. FILO operation
143class WorkQueue {
144public:
145 explicit WorkQueue(unsigned c) {
146 q.reserve(c);
147 }
148 // unique push
149 void push(unsigned id) {
150 if (ids.insert(id).second) {
151 q.push_back(id);
152 }
153 }
154
155 // pop
156 unsigned pop() {
157 unsigned id = q.back();
158 ids.erase(id);
159 q.pop_back();
160 return id;
161 }
162
163 void append(WorkQueue &other) {
164 for (const auto &e : other) {
165 push(e);
166 }
167 }
168
169 void clear() {
170 ids.clear();
171 q.clear();
172 }
173
174 bool empty() const {
175 return ids.empty();
176 }
177
178 vector<unsigned>::const_iterator begin() const {
179 return q.begin();
180 }
181
182 vector<unsigned>::const_iterator end() const {
183 return q.end();
184 }
185
186 size_t capacity() const {
187 return q.capacity();
188 }
189private:
190 unordered_set<unsigned> ids; //!< stores id's, for uniqueness
191 vector<unsigned> q; //!< vector of id's that we use as FILO.
192};
193
194}
195
196static
197bool outIsIrreducible(NFAVertex &v, const NGHolder &g) {
198 unsigned nonSpecialVertices = 0;
199 for (auto w : adjacent_vertices_range(v, g)) {
200 if (!is_special(w, g) && w != v) {
201 nonSpecialVertices++;
202 }
203 }
204 return nonSpecialVertices == 1;
205}
206
207static
208bool inIsIrreducible(NFAVertex &v, const NGHolder &g) {
209 unsigned nonSpecialVertices = 0;
210 for (auto u : inv_adjacent_vertices_range(v, g)) {
211 if (!is_special(u, g) && u != v) {
212 nonSpecialVertices++;
213 }
214 }
215 return nonSpecialVertices == 1;
216}
217
218/** Cheaply check whether this graph can't be reduced at all, because it is
219 * just a chain of vertices with no other edges. */
220static
221bool isIrreducible(const NGHolder &g) {
222 for (auto v : vertices_range(g)) {
223 // skip specials
224 if (is_special(v, g)) {
225 continue;
226 }
227
228 // we want meaningful in_degree to be 1. we also want to make sure we
229 // don't count self-loop + 1 incoming edge as not irreducible
230 if (in_degree(v, g) != 1 && !inIsIrreducible(v, g)) {
231 return false;
232 }
233 // we want meaningful out_degree to be 1. we also want to make sure we
234 // don't count self-loop + 1 outgoing edge as not irreducible
235 if (out_degree(v, g) != 1 && !outIsIrreducible(v, g)) {
236 return false;
237 }
238 }
239
240 return true;
241}
242
243#ifndef NDEBUG
244static
245bool hasEdgeAsserts(NFAVertex v, const NGHolder &g) {
246 for (const auto &e : in_edges_range(v, g)) {
247 if (g[e].assert_flags != 0) {
248 return true;
249 }
250 }
251 for (const auto &e : out_edges_range(v, g)) {
252 if (g[e].assert_flags != 0) {
253 return true;
254 }
255 }
256 return false;
257}
258#endif
259
260// populate VertexInfo table
261static
262vector<unique_ptr<VertexInfo>> getVertexInfos(const NGHolder &g) {
263 const size_t num_verts = num_vertices(g);
264
265 vector<unique_ptr<VertexInfo>> infos;
266 infos.reserve(num_verts * 2);
267
268 vector<VertexInfo *> vertex_map; // indexed by vertex_index property
269 vertex_map.resize(num_verts);
270
271 for (auto v : vertices_range(g)) {
272 infos.push_back(make_unique<VertexInfo>(v, g));
273 vertex_map[g[v].index] = infos.back().get();
274 }
275
276 // now, go through each vertex and populate its predecessor and successor
277 // lists
278 for (auto &vi : infos) {
279 assert(vi);
280 NFAVertex v = vi->v;
281
282 // find predecessors
283 for (const auto &e : in_edges_range(v, g)) {
284 NFAVertex u = source(e, g);
285 VertexInfo *u_vi = vertex_map[g[u].index];
286
287 vi->pred_cr |= u_vi->cr;
288 vi->pred.insert(u_vi);
289
290 // also set up edge tops
291 if (is_triggered(g) && u == g.start) {
292 vi->edge_tops = g[e].tops;
293 }
294 }
295
296 // find successors
297 for (auto w : adjacent_vertices_range(v, g)) {
298 VertexInfo *w_vi = vertex_map[g[w].index];
299 vi->succ_cr |= w_vi->cr;
300 vi->succ.insert(w_vi);
301 }
302 assert(!hasEdgeAsserts(vi->v, g));
303 }
304
305 return infos;
306}
307
308// store equivalence class in VertexInfo for each vertex
309static
310vector<VertexInfoSet> partitionGraph(vector<unique_ptr<VertexInfo>> &infos,
311 WorkQueue &work_queue, const NGHolder &g,
312 EquivalenceType eq) {
313 const size_t num_verts = infos.size();
314
315 vector<VertexInfoSet> classes;
316 ue2_unordered_map<ClassInfo, unsigned> classinfomap;
317
318 // assume we will have lots of classes, so we don't waste time resizing
319 // these structures.
320 classes.reserve(num_verts);
321 classinfomap.reserve(num_verts);
322
323 // get distances from start (or accept) for all vertices
324 // only one of them is used at a time, never both
325 vector<NFAVertexDepth> depths;
326 vector<NFAVertexRevDepth> rdepths;
327
328 if (eq == LEFT_EQUIVALENCE) {
329 depths = calcDepths(g);
330 } else {
331 rdepths = calcRevDepths(g);
332 }
333
334 // partition the graph based on CharReach
335 for (auto &vi : infos) {
336 assert(vi);
337
338 ClassInfo::ClassDepth depth;
339
340 if (eq == LEFT_EQUIVALENCE) {
341 depth = depths[vi->vert_index];
342 } else {
343 depth = rdepths[vi->vert_index];
344 }
345 ClassInfo ci(g, *vi, depth, eq);
346
347 auto ii = classinfomap.find(ci);
348 if (ii == classinfomap.end()) {
349 // vertex is in a new equivalence class by itself.
350 unsigned eq_class = classes.size();
351 vi->equivalence_class = eq_class;
352 classes.push_back({vi.get()});
353 classinfomap.emplace(move(ci), eq_class);
354 } else {
355 // vertex is added to an existing class.
356 unsigned eq_class = ii->second;
357 vi->equivalence_class = eq_class;
358 classes.at(eq_class).insert(vi.get());
359
360 // we now know that this particular class has more than one
361 // vertex, so we add it to the work queue
362 work_queue.push(eq_class);
363 }
364 }
365
366 DEBUG_PRINTF("partitioned, %zu equivalence classes\n", classes.size());
367 return classes;
368}
369
370// generalized equivalence processing (left and right)
371// basically, goes through every vertex in a class and checks if all successor or
372// predecessor classes match in all vertices. if classes mismatch, a vertex is
373// split into a separate class, along with all vertices having the same set of
374// successor/predecessor classes. the opposite side (successors for left
375// equivalence, predecessors for right equivalence) classes get revalidated in
376// case of a split.
377static
378void equivalence(vector<VertexInfoSet> &classes, WorkQueue &work_queue,
379 EquivalenceType eq_type) {
380 // now, go through the work queue until it's empty
381 map<flat_set<unsigned>, VertexInfoSet> tentative_classmap;
382 flat_set<unsigned> cur_classes;
383 // local work queue, to store classes we want to revalidate in case of split
384 WorkQueue reval_queue(work_queue.capacity());
385
386 while (!work_queue.empty()) {
387 // dequeue our class from the work queue
388 unsigned cur_class = work_queue.pop();
389
390 // get all vertices in current equivalence class
391 VertexInfoSet &cur_class_vertices = classes.at(cur_class);
392
393 if (cur_class_vertices.size() < 2) {
394 continue;
395 }
396
397 // clear data from previous iterations
398 tentative_classmap.clear();
399
400 DEBUG_PRINTF("doing equivalence pass for class %u, %zd vertices\n",
401 cur_class, cur_class_vertices.size());
402
403 // go through vertices in this class
404 for (VertexInfo *vi : cur_class_vertices) {
405 cur_classes.clear();
406
407 // get vertex lists for equivalence vertices and vertices for
408 // revalidation in case of split
409 const auto &eq_vertices =
410 (eq_type == LEFT_EQUIVALENCE) ? vi->pred : vi->succ;
411 const auto &reval_vertices =
412 (eq_type == LEFT_EQUIVALENCE) ? vi->succ : vi->pred;
413
414 // go through equivalence and note the classes
415 for (const VertexInfo *tmp : eq_vertices) {
416 cur_classes.insert(tmp->equivalence_class);
417 }
418
419 // note all the classes that need to be reevaluated
420 for (const VertexInfo *tmp : reval_vertices) {
421 reval_queue.push(tmp->equivalence_class);
422 }
423
424 VertexInfoSet &tentative_classes = tentative_classmap[cur_classes];
425 tentative_classes.insert(vi);
426 }
427
428 // if we found more than one class, split and revalidate everything
429 if (tentative_classmap.size() > 1) {
430 auto tmi = tentative_classmap.begin();
431
432 // start from the second class
433 for (++tmi; tmi != tentative_classmap.end(); ++tmi) {
434 const VertexInfoSet &vertices_to_split = tmi->second;
435 unsigned new_class = classes.size();
436 VertexInfoSet new_class_vertices;
437
438 for (VertexInfo *vi : vertices_to_split) {
439 vi->equivalence_class = new_class;
440 // note: we cannot use the cur_class_vertices ref, as it is
441 // invalidated by modifications to the classes vector.
442 classes[cur_class].erase(vi);
443 new_class_vertices.insert(vi);
444 }
445 classes.push_back(move(new_class_vertices));
446
447 if (contains(tmi->first, cur_class)) {
448 reval_queue.push(new_class);
449 }
450 }
451 work_queue.append(reval_queue);
452 }
453 reval_queue.clear();
454 }
455}
456
457static
458bool require_separate_eod_vertex(const VertexInfoSet &vert_infos,
459 const NGHolder &g) {
460 /* We require separate eod and normal accept vertices for a class if we have
461 * both normal accepts and eod accepts AND the reports are different for eod
462 * and non-eod reports. */
463
464 flat_set<ReportID> non_eod;
465 flat_set<ReportID> eod;
466
467 for (const VertexInfo *vi : vert_infos) {
468 NFAVertex v = vi->v;
469
470 if (edge(v, g.accept, g).second) {
471 insert(&non_eod, g[v].reports);
472 }
473
474 if (edge(v, g.acceptEod, g).second) {
475 insert(&eod, g[v].reports);
476 }
477 }
478
479 if (non_eod.empty() || eod.empty()) {
480 return false;
481 }
482
483 return non_eod != eod;
484
485}
486
487static
488void mergeClass(vector<unique_ptr<VertexInfo>> &infos, NGHolder &g,
489 unsigned eq_class, VertexInfoSet &cur_class_vertices,
490 set<NFAVertex> *toRemove) {
491 DEBUG_PRINTF("Replacing %zd vertices from equivalence class %u with a "
492 "single vertex.\n", cur_class_vertices.size(), eq_class);
493
494 // replace equivalence class with a single vertex:
495 // 1. create new vertex with matching properties
496 // 2. wire all predecessors to new vertex
497 // 2a. update info for new vertex with new predecessors
498 // 2b. update each predecessor's successor list
499 // 3. wire all successors to new vertex
500 // 3a. update info for new vertex with new successors
501 // 3b. update each successor's predecessor list
502 // 4. remove old vertex
503
504 // any differences between vertex properties were resolved during
505 // initial partitioning, so we assume that every vertex in equivalence
506 // class has the same CharReach et al.
507 // so, we find the first vertex in our class and get all its properties
508
509 /* For left equivalence, if the members have different reporting behaviour
510 * we sometimes require two vertices to be created (one connected to accept
511 * and one to accepteod) */
512
513 NFAVertex old_v = (*cur_class_vertices.begin())->v;
514 NFAVertex new_v = clone_vertex(g, old_v); /* set up new vertex with same
515 * props */
516 g[new_v].reports.clear(); /* populated as we pull in succs */
517
518 // store this vertex in our global vertex list
519 infos.push_back(make_unique<VertexInfo>(new_v, g));
520 VertexInfo *new_vertex_info = infos.back().get();
521
522 NFAVertex new_v_eod = NGHolder::null_vertex();
523 VertexInfo *new_vertex_info_eod = nullptr;
524
525 if (require_separate_eod_vertex(cur_class_vertices, g)) {
526 new_v_eod = clone_vertex(g, old_v);
527 g[new_v_eod].reports.clear();
528 infos.push_back(make_unique<VertexInfo>(new_v_eod, g));
529 new_vertex_info_eod = infos.back().get();
530 }
531
532 const auto &edgetops = (*cur_class_vertices.begin())->edge_tops;
533 for (VertexInfo *old_vertex_info : cur_class_vertices) {
534 assert(old_vertex_info->equivalence_class == eq_class);
535
536 // mark this vertex for removal
537 toRemove->insert(old_vertex_info->v);
538
539 // for each predecessor, add edge to new vertex and update info
540 for (VertexInfo *pred_info : old_vertex_info->pred) {
541 // update info for new vertex
542 new_vertex_info->pred.insert(pred_info);
543 if (new_vertex_info_eod) {
544 new_vertex_info_eod->pred.insert(pred_info);
545 }
546
547 // update info for predecessor
548 pred_info->succ.erase(old_vertex_info);
549
550 // if edge doesn't exist, create it
551 NFAEdge e = add_edge_if_not_present(pred_info->v, new_v, g);
552
553 // put edge tops, if applicable
554 if (!edgetops.empty()) {
555 assert(g[e].tops.empty() || g[e].tops == edgetops);
556 g[e].tops = edgetops;
557 }
558
559 pred_info->succ.insert(new_vertex_info);
560
561 if (new_v_eod) {
562 NFAEdge ee = add_edge_if_not_present(pred_info->v, new_v_eod,
563 g);
564
565 // put edge tops, if applicable
566 if (!edgetops.empty()) {
567 assert(g[e].tops.empty() || g[e].tops == edgetops);
568 g[ee].tops = edgetops;
569 }
570
571 pred_info->succ.insert(new_vertex_info_eod);
572 }
573 }
574
575 // for each successor, add edge from new vertex and update info
576 for (VertexInfo *succ_info : old_vertex_info->succ) {
577 NFAVertex succ_v = succ_info->v;
578
579 // update info for successor
580 succ_info->pred.erase(old_vertex_info);
581
582 if (new_v_eod && succ_v == g.acceptEod) {
583 // update info for new vertex
584 new_vertex_info_eod->succ.insert(succ_info);
585 insert(&g[new_v_eod].reports,
586 g[old_vertex_info->v].reports);
587
588 add_edge_if_not_present(new_v_eod, succ_v, g);
589 succ_info->pred.insert(new_vertex_info_eod);
590 } else {
591 // update info for new vertex
592 new_vertex_info->succ.insert(succ_info);
593
594 // if edge doesn't exist, create it
595 add_edge_if_not_present(new_v, succ_v, g);
596 succ_info->pred.insert(new_vertex_info);
597
598 if (is_any_accept(succ_v, g)) {
599 insert(&g[new_v].reports,
600 g[old_vertex_info->v].reports);
601 }
602 }
603 }
604 }
605
606 // update classmap
607 new_vertex_info->equivalence_class = eq_class;
608 cur_class_vertices.insert(new_vertex_info);
609}
610
611// walk through vertices of an equivalence class and replace them with a single
612// vertex (or, in rare cases for left equiv, a pair if we cannot satisfy the
613// report behaviour with a single vertex).
614static
615bool mergeEquivalentClasses(vector<VertexInfoSet> &classes,
616 vector<unique_ptr<VertexInfo>> &infos,
617 NGHolder &g) {
618 bool merged = false;
619 set<NFAVertex> toRemove;
620
621 // go through all classes and merge classes with more than one vertex
622 for (unsigned eq_class = 0; eq_class < classes.size(); eq_class++) {
623 // get all vertices in current equivalence class
624 VertexInfoSet &cur_class_vertices = classes[eq_class];
625
626 // we don't care for single-vertex classes
627 if (cur_class_vertices.size() > 1) {
628 merged = true;
629 mergeClass(infos, g, eq_class, cur_class_vertices, &toRemove);
630 }
631 }
632
633 // remove all dead vertices
634 DEBUG_PRINTF("removing %zd vertices.\n", toRemove.size());
635 remove_vertices(toRemove, g);
636
637 return merged;
638}
639
640static
641bool reduceGraphEquivalences(NGHolder &g, EquivalenceType eq_type) {
642 // create a list of equivalence classes to check
643 WorkQueue work_queue(num_vertices(g));
644
645 // get information on every vertex in the graph
646 // new vertices are allocated here, and stored in infos
647 auto infos = getVertexInfos(g);
648
649 // partition the graph
650 auto classes = partitionGraph(infos, work_queue, g, eq_type);
651
652 // do equivalence processing
653 equivalence(classes, work_queue, eq_type);
654
655 // replace equivalent classes with single vertices
656 // new vertices are (possibly) allocated here, and stored in infos
657 return mergeEquivalentClasses(classes, infos, g);
658}
659
660bool reduceGraphEquivalences(NGHolder &g, const CompileContext &cc) {
661 if (!cc.grey.equivalenceEnable) {
662 DEBUG_PRINTF("equivalence processing disabled in grey box\n");
663 return false;
664 }
665 renumber_vertices(g);
666
667 // Cheap check: if all the non-special vertices have in-degree one and
668 // out-degree one, there's no redundancy in this here graph and we can
669 // vamoose.
670 if (isIrreducible(g)) {
671 DEBUG_PRINTF("skipping equivalence processing, graph is irreducible\n");
672 return false;
673 }
674
675 // take note if we have merged any vertices
676 bool merge = false;
677 merge |= reduceGraphEquivalences(g, LEFT_EQUIVALENCE);
678 merge |= reduceGraphEquivalences(g, RIGHT_EQUIVALENCE);
679 return merge;
680}
681
682} // namespace ue2
683