1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "rose_in_util.h" |
30 | |
31 | #include "rose_build_util.h" |
32 | #include "nfa/goughcompile.h" |
33 | #include "nfagraph/ng_depth.h" |
34 | #include "nfagraph/ng_util.h" |
35 | #include "nfagraph/ng_width.h" |
36 | #include "util/container.h" |
37 | #include "util/graph_range.h" |
38 | #include "util/make_unique.h" |
39 | |
40 | #include <vector> |
41 | |
42 | #include <boost/graph/copy.hpp> |
43 | #include <boost/graph/reverse_graph.hpp> |
44 | #include <boost/graph/topological_sort.hpp> |
45 | |
46 | using namespace std; |
47 | |
48 | namespace ue2 { |
49 | |
50 | /* Returns a topological ordering of the vertices in g. That is the starts are |
51 | * at the front and all the predecessors of a vertex occur earlier in the list |
52 | * than the vertex. */ |
53 | vector<RoseInVertex> topo_order(const RoseInGraph &g) { |
54 | assert(hasCorrectlyNumberedVertices(g)); |
55 | vector<RoseInVertex> v_order; |
56 | v_order.reserve(num_vertices(g)); |
57 | |
58 | boost::topological_sort(g, back_inserter(v_order)); |
59 | |
60 | reverse(v_order.begin(), v_order.end()); /* put starts at the front */ |
61 | |
62 | return v_order; |
63 | } |
64 | |
65 | namespace { |
66 | struct RoseEdgeCopier { |
67 | typedef unordered_map<const NGHolder *, shared_ptr<NGHolder>> GraphMap; |
68 | typedef unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> HaigMap; |
69 | |
70 | RoseEdgeCopier(const RoseInGraph &g1, RoseInGraph &g2, |
71 | const GraphMap &graph_map_in, const HaigMap &haig_map_in) |
72 | : ig(g1), out(g2), graph_map(graph_map_in), haig_map(haig_map_in) {} |
73 | |
74 | void operator()(const RoseInEdge &e1, RoseInEdge &e2) { |
75 | // Clone all properties. |
76 | put(boost::edge_all, out, e2, get(boost::edge_all, ig, e1)); |
77 | // Substitute in cloned graphs. |
78 | if (ig[e1].graph) { |
79 | out[e2].graph = graph_map.at(ig[e1].graph.get()); |
80 | } |
81 | if (ig[e1].haig) { |
82 | out[e2].haig = haig_map.at(ig[e1].haig.get()); |
83 | } |
84 | } |
85 | |
86 | private: |
87 | const RoseInGraph &ig; |
88 | RoseInGraph &out; |
89 | const GraphMap &graph_map; |
90 | const HaigMap &haig_map; |
91 | }; |
92 | } |
93 | |
94 | unique_ptr<RoseInGraph> cloneRoseGraph(const RoseInGraph &ig) { |
95 | assert(hasCorrectlyNumberedVertices(ig)); |
96 | unique_ptr<RoseInGraph> out = make_unique<RoseInGraph>(); |
97 | |
98 | unordered_map<const NGHolder *, shared_ptr<NGHolder>> graph_map; |
99 | unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> haig_map; |
100 | |
101 | for (const auto &e : edges_range(ig)) { |
102 | const RoseInEdgeProps &ep = ig[e]; |
103 | if (ep.graph && !contains(graph_map, ep.graph.get())) { |
104 | graph_map[ep.graph.get()] = cloneHolder(*ep.graph); |
105 | } |
106 | if (ep.haig && !contains(haig_map, ep.haig.get())) { |
107 | haig_map[ep.haig.get()] = make_shared<raw_som_dfa>(*ep.haig); |
108 | } |
109 | } |
110 | |
111 | copy_graph(ig, *out, |
112 | boost::edge_copy(RoseEdgeCopier(ig, *out, graph_map, haig_map))); |
113 | return out; |
114 | } |
115 | |
116 | void calcVertexOffsets(RoseInGraph &g) { |
117 | vector<RoseInVertex> v_order = topo_order(g); |
118 | |
119 | for (RoseInVertex v : v_order) { |
120 | if (g[v].type == RIV_START) { |
121 | g[v].min_offset = 0; |
122 | g[v].max_offset = ROSE_BOUND_INF; |
123 | continue; |
124 | } else if (g[v].type == RIV_ANCHORED_START) { |
125 | g[v].min_offset = 0; |
126 | g[v].max_offset = 0; |
127 | continue; |
128 | } |
129 | |
130 | DEBUG_PRINTF("vertex '%s'\n" , dumpString(g[v].s).c_str()); |
131 | |
132 | // Min and max predecessor depths. |
133 | u32 min_d = ROSE_BOUND_INF; |
134 | u32 max_d = 0; |
135 | |
136 | for (const auto &e : in_edges_range(v, g)) { |
137 | RoseInVertex u = source(e, g); |
138 | u32 e_min = g[u].min_offset; |
139 | u32 e_max = g[u].max_offset; |
140 | |
141 | DEBUG_PRINTF("in-edge from u with offsets [%u,%u]\n" , e_min, e_max); |
142 | |
143 | if (g[e].graph) { |
144 | const NGHolder &h = *g[e].graph; |
145 | depth g_min_width = findMinWidth(h); |
146 | depth g_max_width = |
147 | isAnchored(h) ? findMaxWidth(h) : depth::infinity(); |
148 | u32 graph_lag = g[e].graph_lag; |
149 | |
150 | DEBUG_PRINTF("edge has graph, depths [%s,%s] and lag %u\n" , |
151 | g_min_width.str().c_str(), |
152 | g_max_width.str().c_str(), graph_lag); |
153 | g_min_width += graph_lag; |
154 | g_max_width += graph_lag; |
155 | e_min = add_rose_depth(e_min, g_min_width); |
156 | if (g_max_width.is_finite()) { |
157 | e_max = add_rose_depth(e_max, g_max_width); |
158 | } else { |
159 | e_max = ROSE_BOUND_INF; |
160 | } |
161 | } else { |
162 | DEBUG_PRINTF("edge has bounds [%u,%u]\n" , g[e].minBound, |
163 | g[e].maxBound); |
164 | e_min = add_rose_depth(e_min, g[e].minBound); |
165 | e_max = add_rose_depth(e_max, g[e].maxBound); |
166 | if (g[v].type == RIV_LITERAL) { |
167 | u32 len = g[v].s.length(); |
168 | DEBUG_PRINTF("lit len %u\n" , len); |
169 | e_min = add_rose_depth(e_min, len); |
170 | e_max = add_rose_depth(e_max, len); |
171 | } |
172 | } |
173 | |
174 | min_d = min(min_d, e_min); |
175 | max_d = max(max_d, e_max); |
176 | } |
177 | |
178 | DEBUG_PRINTF("vertex depths [%u,%u]\n" , min_d, max_d); |
179 | |
180 | assert(max_d >= min_d); |
181 | g[v].min_offset = min_d; |
182 | g[v].max_offset = max_d; |
183 | } |
184 | |
185 | // It's possible that we may have literal delays assigned to vertices here |
186 | // as well. If so, these need to be added to the min/max offsets. |
187 | for (RoseInVertex v : v_order) { |
188 | const u32 delay = g[v].delay; |
189 | g[v].min_offset = add_rose_depth(g[v].min_offset, delay); |
190 | g[v].max_offset = add_rose_depth(g[v].max_offset, delay); |
191 | } |
192 | } |
193 | |
194 | nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e) { |
195 | RoseInVertex u = source(e, in); |
196 | RoseInVertex v = target(e, in); |
197 | |
198 | bool start = in[u].type == RIV_START || in[u].type == RIV_ANCHORED_START; |
199 | bool end = in[v].type == RIV_ACCEPT || in[v].type == RIV_ACCEPT_EOD; |
200 | |
201 | if (start && !end) { |
202 | return NFA_PREFIX; |
203 | } else if (!start && end) { |
204 | return NFA_SUFFIX; |
205 | } else if (!start && !end) { |
206 | return NFA_INFIX; |
207 | } else { |
208 | assert(in[v].type == RIV_ACCEPT_EOD); |
209 | return NFA_OUTFIX; |
210 | } |
211 | } |
212 | |
213 | void pruneUseless(RoseInGraph &g) { |
214 | DEBUG_PRINTF("pruning useless vertices\n" ); |
215 | |
216 | set<RoseInVertex> dead; |
217 | RoseInVertex dummy_start |
218 | = add_vertex(RoseInVertexProps::makeStart(true), g); |
219 | RoseInVertex dummy_end |
220 | = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), g); |
221 | dead.insert(dummy_start); |
222 | dead.insert(dummy_end); |
223 | for (auto v : vertices_range(g)) { |
224 | if (v == dummy_start || v == dummy_end) { |
225 | continue; |
226 | } |
227 | switch (g[v].type) { |
228 | case RIV_ANCHORED_START: |
229 | case RIV_START: |
230 | add_edge(dummy_start, v, g); |
231 | break; |
232 | case RIV_ACCEPT: |
233 | case RIV_ACCEPT_EOD: |
234 | add_edge(v, dummy_end, g); |
235 | break; |
236 | default: |
237 | break; |
238 | } |
239 | } |
240 | |
241 | find_unreachable(g, vector<RoseInVertex>(1, dummy_start), &dead); |
242 | find_unreachable(boost::reverse_graph<RoseInGraph, RoseInGraph &>(g), |
243 | vector<RoseInVertex>(1, dummy_end), &dead); |
244 | |
245 | for (auto v : dead) { |
246 | clear_vertex(v, g); |
247 | remove_vertex(v, g); |
248 | } |
249 | } |
250 | |
251 | } |
252 | |