1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "rose_in_util.h"
30
31#include "rose_build_util.h"
32#include "nfa/goughcompile.h"
33#include "nfagraph/ng_depth.h"
34#include "nfagraph/ng_util.h"
35#include "nfagraph/ng_width.h"
36#include "util/container.h"
37#include "util/graph_range.h"
38#include "util/make_unique.h"
39
40#include <vector>
41
42#include <boost/graph/copy.hpp>
43#include <boost/graph/reverse_graph.hpp>
44#include <boost/graph/topological_sort.hpp>
45
46using namespace std;
47
48namespace ue2 {
49
50/* Returns a topological ordering of the vertices in g. That is the starts are
51 * at the front and all the predecessors of a vertex occur earlier in the list
52 * than the vertex. */
53vector<RoseInVertex> topo_order(const RoseInGraph &g) {
54 assert(hasCorrectlyNumberedVertices(g));
55 vector<RoseInVertex> v_order;
56 v_order.reserve(num_vertices(g));
57
58 boost::topological_sort(g, back_inserter(v_order));
59
60 reverse(v_order.begin(), v_order.end()); /* put starts at the front */
61
62 return v_order;
63}
64
65namespace {
66struct RoseEdgeCopier {
67 typedef unordered_map<const NGHolder *, shared_ptr<NGHolder>> GraphMap;
68 typedef unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> HaigMap;
69
70 RoseEdgeCopier(const RoseInGraph &g1, RoseInGraph &g2,
71 const GraphMap &graph_map_in, const HaigMap &haig_map_in)
72 : ig(g1), out(g2), graph_map(graph_map_in), haig_map(haig_map_in) {}
73
74 void operator()(const RoseInEdge &e1, RoseInEdge &e2) {
75 // Clone all properties.
76 put(boost::edge_all, out, e2, get(boost::edge_all, ig, e1));
77 // Substitute in cloned graphs.
78 if (ig[e1].graph) {
79 out[e2].graph = graph_map.at(ig[e1].graph.get());
80 }
81 if (ig[e1].haig) {
82 out[e2].haig = haig_map.at(ig[e1].haig.get());
83 }
84 }
85
86private:
87 const RoseInGraph &ig;
88 RoseInGraph &out;
89 const GraphMap &graph_map;
90 const HaigMap &haig_map;
91};
92}
93
94unique_ptr<RoseInGraph> cloneRoseGraph(const RoseInGraph &ig) {
95 assert(hasCorrectlyNumberedVertices(ig));
96 unique_ptr<RoseInGraph> out = make_unique<RoseInGraph>();
97
98 unordered_map<const NGHolder *, shared_ptr<NGHolder>> graph_map;
99 unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> haig_map;
100
101 for (const auto &e : edges_range(ig)) {
102 const RoseInEdgeProps &ep = ig[e];
103 if (ep.graph && !contains(graph_map, ep.graph.get())) {
104 graph_map[ep.graph.get()] = cloneHolder(*ep.graph);
105 }
106 if (ep.haig && !contains(haig_map, ep.haig.get())) {
107 haig_map[ep.haig.get()] = make_shared<raw_som_dfa>(*ep.haig);
108 }
109 }
110
111 copy_graph(ig, *out,
112 boost::edge_copy(RoseEdgeCopier(ig, *out, graph_map, haig_map)));
113 return out;
114}
115
116void calcVertexOffsets(RoseInGraph &g) {
117 vector<RoseInVertex> v_order = topo_order(g);
118
119 for (RoseInVertex v : v_order) {
120 if (g[v].type == RIV_START) {
121 g[v].min_offset = 0;
122 g[v].max_offset = ROSE_BOUND_INF;
123 continue;
124 } else if (g[v].type == RIV_ANCHORED_START) {
125 g[v].min_offset = 0;
126 g[v].max_offset = 0;
127 continue;
128 }
129
130 DEBUG_PRINTF("vertex '%s'\n", dumpString(g[v].s).c_str());
131
132 // Min and max predecessor depths.
133 u32 min_d = ROSE_BOUND_INF;
134 u32 max_d = 0;
135
136 for (const auto &e : in_edges_range(v, g)) {
137 RoseInVertex u = source(e, g);
138 u32 e_min = g[u].min_offset;
139 u32 e_max = g[u].max_offset;
140
141 DEBUG_PRINTF("in-edge from u with offsets [%u,%u]\n", e_min, e_max);
142
143 if (g[e].graph) {
144 const NGHolder &h = *g[e].graph;
145 depth g_min_width = findMinWidth(h);
146 depth g_max_width =
147 isAnchored(h) ? findMaxWidth(h) : depth::infinity();
148 u32 graph_lag = g[e].graph_lag;
149
150 DEBUG_PRINTF("edge has graph, depths [%s,%s] and lag %u\n",
151 g_min_width.str().c_str(),
152 g_max_width.str().c_str(), graph_lag);
153 g_min_width += graph_lag;
154 g_max_width += graph_lag;
155 e_min = add_rose_depth(e_min, g_min_width);
156 if (g_max_width.is_finite()) {
157 e_max = add_rose_depth(e_max, g_max_width);
158 } else {
159 e_max = ROSE_BOUND_INF;
160 }
161 } else {
162 DEBUG_PRINTF("edge has bounds [%u,%u]\n", g[e].minBound,
163 g[e].maxBound);
164 e_min = add_rose_depth(e_min, g[e].minBound);
165 e_max = add_rose_depth(e_max, g[e].maxBound);
166 if (g[v].type == RIV_LITERAL) {
167 u32 len = g[v].s.length();
168 DEBUG_PRINTF("lit len %u\n", len);
169 e_min = add_rose_depth(e_min, len);
170 e_max = add_rose_depth(e_max, len);
171 }
172 }
173
174 min_d = min(min_d, e_min);
175 max_d = max(max_d, e_max);
176 }
177
178 DEBUG_PRINTF("vertex depths [%u,%u]\n", min_d, max_d);
179
180 assert(max_d >= min_d);
181 g[v].min_offset = min_d;
182 g[v].max_offset = max_d;
183 }
184
185 // It's possible that we may have literal delays assigned to vertices here
186 // as well. If so, these need to be added to the min/max offsets.
187 for (RoseInVertex v : v_order) {
188 const u32 delay = g[v].delay;
189 g[v].min_offset = add_rose_depth(g[v].min_offset, delay);
190 g[v].max_offset = add_rose_depth(g[v].max_offset, delay);
191 }
192}
193
194nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e) {
195 RoseInVertex u = source(e, in);
196 RoseInVertex v = target(e, in);
197
198 bool start = in[u].type == RIV_START || in[u].type == RIV_ANCHORED_START;
199 bool end = in[v].type == RIV_ACCEPT || in[v].type == RIV_ACCEPT_EOD;
200
201 if (start && !end) {
202 return NFA_PREFIX;
203 } else if (!start && end) {
204 return NFA_SUFFIX;
205 } else if (!start && !end) {
206 return NFA_INFIX;
207 } else {
208 assert(in[v].type == RIV_ACCEPT_EOD);
209 return NFA_OUTFIX;
210 }
211}
212
213void pruneUseless(RoseInGraph &g) {
214 DEBUG_PRINTF("pruning useless vertices\n");
215
216 set<RoseInVertex> dead;
217 RoseInVertex dummy_start
218 = add_vertex(RoseInVertexProps::makeStart(true), g);
219 RoseInVertex dummy_end
220 = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), g);
221 dead.insert(dummy_start);
222 dead.insert(dummy_end);
223 for (auto v : vertices_range(g)) {
224 if (v == dummy_start || v == dummy_end) {
225 continue;
226 }
227 switch (g[v].type) {
228 case RIV_ANCHORED_START:
229 case RIV_START:
230 add_edge(dummy_start, v, g);
231 break;
232 case RIV_ACCEPT:
233 case RIV_ACCEPT_EOD:
234 add_edge(v, dummy_end, g);
235 break;
236 default:
237 break;
238 }
239 }
240
241 find_unreachable(g, vector<RoseInVertex>(1, dummy_start), &dead);
242 find_unreachable(boost::reverse_graph<RoseInGraph, RoseInGraph &>(g),
243 vector<RoseInVertex>(1, dummy_end), &dead);
244
245 for (auto v : dead) {
246 clear_vertex(v, g);
247 remove_vertex(v, g);
248 }
249}
250
251}
252