1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Miscellaneous NFA graph utilities.
31 */
32#ifndef NG_UTIL_H
33#define NG_UTIL_H
34
35#include "ng_depth.h"
36#include "ng_holder.h"
37#include "ue2common.h"
38#include "util/flat_containers.h"
39#include "util/graph.h"
40#include "util/graph_range.h"
41
42#include <boost/graph/depth_first_search.hpp> // for default_dfs_visitor
43
44#include <algorithm>
45#include <map>
46#include <unordered_map>
47#include <vector>
48
49namespace ue2 {
50
51struct Grey;
52struct ue2_literal;
53class ReportManager;
54
55template<class VertexDepth>
56depth maxDistFromInit(const VertexDepth &vd) {
57 if (vd.fromStart.max.is_unreachable()) {
58 return vd.fromStartDotStar.max;
59 } else if (vd.fromStartDotStar.max.is_unreachable()) {
60 return vd.fromStart.max;
61 } else {
62 return std::max(vd.fromStartDotStar.max, vd.fromStart.max);
63 }
64}
65
66template<class VertexDepth>
67depth maxDistFromStartOfData(const VertexDepth &vd) {
68 if (vd.fromStartDotStar.max.is_reachable()) {
69 /* the irrepressible nature of floating literals cannot be contained */
70 return depth::infinity();
71 } else {
72 return vd.fromStart.max;
73 }
74}
75
76/** True if the given vertex is a dot (reachable on any character). */
77template<class GraphT>
78static really_inline
79bool is_dot(NFAVertex v, const GraphT &g) {
80 return g[v].char_reach.all();
81}
82
83/** adds successors of v to s */
84template<class U>
85static really_inline
86void succ(const NGHolder &g, NFAVertex v, U *s) {
87 auto rv = adjacent_vertices(v, g);
88 s->insert(rv.first, rv.second);
89}
90
91template<class ContTemp = flat_set<NFAVertex>>
92ContTemp succs(NFAVertex u, const NGHolder &g) {
93 ContTemp rv;
94 succ(g, u, &rv);
95 return rv;
96}
97
98/** adds predecessors of v to s */
99template<class U>
100static really_inline
101void pred(const NGHolder &g, NFAVertex v, U *p) {
102 auto rv = inv_adjacent_vertices(v, g);
103 p->insert(rv.first, rv.second);
104}
105
106template<class ContTemp = flat_set<NFAVertex>>
107ContTemp preds(NFAVertex u, const NGHolder &g) {
108 ContTemp rv;
109 pred(g, u, &rv);
110 return rv;
111}
112
113/** returns a vertex with an out edge from v and is not v.
114 * v must have exactly one out-edge excluding self-loops.
115 * will return NGHolder::null_vertex() if the preconditions don't hold.
116 */
117NFAVertex getSoleDestVertex(const NGHolder &g, NFAVertex v);
118
119/** Like getSoleDestVertex but for in-edges */
120NFAVertex getSoleSourceVertex(const NGHolder &g, NFAVertex v);
121
122/** \brief edge filtered graph.
123 *
124 * This will give you a view over the graph that has none of the edges from
125 * the provided set included.
126 *
127 * If this is provided with the back edges of the graph, this will result in an
128 * acyclic subgraph view. This is useful for topological_sort and other
129 * algorithms that require a DAG.
130 */
131template<typename EdgeSet>
132struct bad_edge_filter {
133 bad_edge_filter() {}
134 explicit bad_edge_filter(const EdgeSet *bad_e) : bad_edges(bad_e) {}
135 bool operator()(const typename EdgeSet::value_type &e) const {
136 return !contains(*bad_edges, e); /* keep edges not in the bad set */
137 }
138 const EdgeSet *bad_edges = nullptr;
139};
140
141template<typename EdgeSet>
142bad_edge_filter<EdgeSet> make_bad_edge_filter(const EdgeSet *e) {
143 return bad_edge_filter<EdgeSet>(e);
144}
145
146/** \brief vertex graph filter. */
147template<typename VertexSet>
148struct bad_vertex_filter {
149 bad_vertex_filter() = default;
150 explicit bad_vertex_filter(const VertexSet *bad_v) : bad_vertices(bad_v) {}
151 bool operator()(const typename VertexSet::value_type &v) const {
152 return !contains(*bad_vertices, v); /* keep vertices not in bad set */
153 }
154 const VertexSet *bad_vertices = nullptr;
155};
156
157template<typename VertexSet>
158bad_vertex_filter<VertexSet> make_bad_vertex_filter(const VertexSet *v) {
159 return bad_vertex_filter<VertexSet>(v);
160}
161
162/** Visitor that records back edges */
163template <typename BackEdgeSet>
164class BackEdges : public boost::default_dfs_visitor {
165public:
166 explicit BackEdges(BackEdgeSet &edges) : backEdges(edges) {}
167 template <class EdgeT, class GraphT>
168 void back_edge(const EdgeT &e, const GraphT &) {
169 backEdges.insert(e); // Remove this back edge only
170 }
171 BackEdgeSet &backEdges;
172};
173
174/** Returns true if the vertex is either of the real starts (NODE_START,
175 * NODE_START_DOTSTAR). */
176template <typename GraphT>
177static really_inline
178bool is_any_start(typename GraphT::vertex_descriptor v, const GraphT &g) {
179 u32 i = g[v].index;
180 return i == NODE_START || i == NODE_START_DOTSTAR;
181}
182
183bool is_virtual_start(NFAVertex v, const NGHolder &g);
184
185template <typename GraphT>
186bool is_any_accept(typename GraphT::vertex_descriptor v, const GraphT &g) {
187 u32 i = g[v].index;
188 return i == NODE_ACCEPT || i == NODE_ACCEPT_EOD;
189}
190
191/** returns true iff v has an edge to accept or acceptEod */
192template <typename GraphT>
193bool is_match_vertex(typename GraphT::vertex_descriptor v, const GraphT &g) {
194 return edge(v, g.accept, g).second || edge(v, g.acceptEod, g).second;
195}
196
197/** Generate a reverse topological ordering for a back-edge filtered version of
198 * our graph (as it must be a DAG and correctly numbered).
199 *
200 * Note: we ensure that we produce a topo ordering that begins with acceptEod
201 * and accept (if present) and ends with startDs followed by start.
202 */
203std::vector<NFAVertex> getTopoOrdering(const NGHolder &g);
204
205bool onlyOneTop(const NGHolder &g);
206
207/** Return the set of the tops on the given graph. */
208flat_set<u32> getTops(const NGHolder &h);
209
210/** Initialise the tops on h to the provide top. Assumes that h is triggered and
211 * no tops have been set on h. */
212void setTops(NGHolder &h, u32 top = DEFAULT_TOP);
213
214/** adds a vertex to g with all the same vertex properties as \p v (aside from
215 * index) */
216NFAVertex clone_vertex(NGHolder &g, NFAVertex v);
217
218/**
219 * \brief Copies all out-edges from source to target.
220 *
221 * Edge properties (aside from index) are preserved and duplicate edges are
222 * skipped.
223 */
224void clone_out_edges(NGHolder &g, NFAVertex source, NFAVertex dest);
225
226/**
227 * \brief Copies all in-edges from source to target.
228 *
229 * Edge properties (aside from index) are preserved.
230 */
231void clone_in_edges(NGHolder &g, NFAVertex source, NFAVertex dest);
232
233/** \brief True if the graph contains an edge from one of {start, startDs} to
234 * one of {accept, acceptEod}. */
235bool isVacuous(const NGHolder &h);
236
237/** \brief True if the graph contains no floating vertices (startDs has no
238 * proper successors). */
239bool isAnchored(const NGHolder &h);
240
241/** \brief True if the graph contains no anchored vertices (start has no
242 * successors aside from startDs or vertices connected to startDs). */
243bool isFloating(const NGHolder &h);
244
245/** True if the graph contains no back-edges at all, other than the
246 * startDs self-loop. */
247bool isAcyclic(const NGHolder &g);
248
249/** True if the graph has a cycle reachable from the given source vertex. */
250bool hasReachableCycle(const NGHolder &g, NFAVertex src);
251
252/** True if g has any cycles which are not self-loops. */
253bool hasBigCycles(const NGHolder &g);
254
255/**
256 * \brief True if g has at least one non-special vertex with reach smaller than
257 * max_reach_count. The default of 200 is pretty conservative.
258 */
259bool hasNarrowReachVertex(const NGHolder &g, size_t max_reach_count = 200);
260
261/** Returns the set of all vertices that appear in any of the graph's cycles. */
262std::set<NFAVertex> findVerticesInCycles(const NGHolder &g);
263
264bool can_never_match(const NGHolder &g);
265
266/* \brief Does the graph have any edges leading into acceptEod (aside from
267 * accept) or will it have after resolving asserts? */
268bool can_match_at_eod(const NGHolder &h);
269
270bool can_only_match_at_eod(const NGHolder &g);
271
272/** \brief Does this graph become a "firehose", matching between every
273 * byte? */
274bool matches_everywhere(const NGHolder &h);
275
276
277struct mbsb_cache {
278 explicit mbsb_cache(const NGHolder &gg) : g(gg) {}
279 std::map<std::pair<u32, u32>, bool> cache;
280 const NGHolder &g;
281};
282
283/* weaker than straight domination as allows jump edges */
284bool mustBeSetBefore(NFAVertex u, NFAVertex v, const NGHolder &g,
285 mbsb_cache &cache);
286
287/* adds the literal 's' to the end of the graph before h.accept */
288void appendLiteral(NGHolder &h, const ue2_literal &s);
289
290/** \brief Fill graph \a outp with a subset of the vertices in \a in (given in
291 * \a in). A vertex mapping is returned in \a v_map_out. */
292void fillHolder(NGHolder *outp, const NGHolder &in,
293 const std::deque<NFAVertex> &vv,
294 std::unordered_map<NFAVertex, NFAVertex> *v_map_out);
295
296/** \brief Clone the graph in \a in into graph \a out, returning a vertex
297 * mapping in \a v_map_out. */
298void cloneHolder(NGHolder &out, const NGHolder &in,
299 std::unordered_map<NFAVertex, NFAVertex> *v_map_out);
300
301/** \brief Clone the graph in \a in into graph \a out. */
302void cloneHolder(NGHolder &out, const NGHolder &in);
303
304/** \brief Build a clone of graph \a in and return a pointer to it. */
305std::unique_ptr<NGHolder> cloneHolder(const NGHolder &in);
306
307/** \brief Clear all reports on vertices that do not have an edge to accept or
308 * acceptEod. */
309void clearReports(NGHolder &g);
310
311/** \brief Add report \a r_new to every vertex that already has report \a
312 * r_old. */
313void duplicateReport(NGHolder &g, ReportID r_old, ReportID r_new);
314
315/** Construct a reversed copy of an arbitrary NGHolder, mapping starts to
316 * accepts. */
317void reverseHolder(const NGHolder &g, NGHolder &out);
318
319/** \brief Returns the delay or ~0U if the graph cannot match with
320 * the trailing literal. */
321u32 removeTrailingLiteralStates(NGHolder &g, const ue2_literal &lit,
322 u32 max_delay, bool overhang_ok = true);
323
324#ifndef NDEBUG
325
326// Assertions: only available in internal builds.
327
328/**
329 * Used in sanity-checking assertions: returns true if all vertices
330 * with edges to accept or acceptEod have at least one report ID. Additionally,
331 * checks that ONLY vertices with edges to accept or acceptEod has reports.
332 */
333bool allMatchStatesHaveReports(const NGHolder &g);
334
335/**
336 * Assertion: returns true if the graph is triggered and all edges out of start
337 * have tops OR if the graph is not-triggered and all edges out of start have no
338 * tops.
339 */
340bool isCorrectlyTopped(const NGHolder &g);
341#endif // NDEBUG
342
343} // namespace ue2
344
345#endif
346