1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief State numbering and late graph restructuring code. |
31 | */ |
32 | #include "ng_restructuring.h" |
33 | |
34 | #include "grey.h" |
35 | #include "ng_holder.h" |
36 | #include "ng_util.h" |
37 | #include "ue2common.h" |
38 | #include "util/graph_range.h" |
39 | |
40 | #include <algorithm> |
41 | #include <cassert> |
42 | |
43 | #include <boost/graph/transpose_graph.hpp> |
44 | |
45 | using namespace std; |
46 | |
47 | namespace ue2 { |
48 | |
49 | /** Connect the start vertex to each of the vertices in \p tops. This is useful |
50 | * temporarily for when we need to run a graph algorithm that expects a single |
51 | * source vertex. */ |
52 | static |
53 | void wireStartToTops(NGHolder &g, const flat_set<NFAVertex> &tops, |
54 | vector<NFAEdge> &tempEdges) { |
55 | for (NFAVertex v : tops) { |
56 | assert(!isLeafNode(v, g)); |
57 | |
58 | const NFAEdge &e = add_edge(g.start, v, g); |
59 | tempEdges.push_back(e); |
60 | } |
61 | } |
62 | |
63 | /** |
64 | * Returns true if start's successors (aside from startDs) are subset of |
65 | * startDs's proper successors or if start has no successors other than startDs. |
66 | */ |
67 | static |
68 | bool startIsRedundant(const NGHolder &g) { |
69 | /* We ignore startDs as the self-loop may have been stripped as an |
70 | * optimisation for repeats (improveLeadingRepeats()). */ |
71 | set<NFAVertex> start; |
72 | insert(&start, adjacent_vertices_range(g.start, g)); |
73 | start.erase(g.startDs); |
74 | |
75 | // Trivial case: start has no successors other than startDs. |
76 | if (start.empty()) { |
77 | DEBUG_PRINTF("start has no out-edges other than to startDs\n" ); |
78 | return true; |
79 | } |
80 | |
81 | set<NFAVertex> startDs; |
82 | insert(&startDs, adjacent_vertices_range(g.startDs, g)); |
83 | startDs.erase(g.startDs); |
84 | |
85 | if (!is_subset_of(start, startDs)) { |
86 | DEBUG_PRINTF("out-edges of start and startDs aren't equivalent\n" ); |
87 | return false; |
88 | } |
89 | |
90 | return true; |
91 | } |
92 | |
93 | static |
94 | void getStateOrdering(NGHolder &g, const flat_set<NFAVertex> &tops, |
95 | vector<NFAVertex> &ordering) { |
96 | // First, wire up our "tops" to start so that we have a single source, |
97 | // which will give a nicer topo order. |
98 | vector<NFAEdge> tempEdges; |
99 | wireStartToTops(g, tops, tempEdges); |
100 | |
101 | renumber_vertices(g); |
102 | |
103 | vector<NFAVertex> temp = getTopoOrdering(g); |
104 | |
105 | remove_edges(tempEdges, g); |
106 | |
107 | // Move {start, startDs} to the end, so they'll be first when we reverse |
108 | // the ordering (if they are required). |
109 | temp.erase(remove(temp.begin(), temp.end(), g.startDs)); |
110 | temp.erase(remove(temp.begin(), temp.end(), g.start)); |
111 | if (proper_out_degree(g.startDs, g)) { |
112 | temp.push_back(g.startDs); |
113 | } |
114 | if (!startIsRedundant(g)) { |
115 | temp.push_back(g.start); |
116 | } |
117 | |
118 | // Walk ordering, remove vertices that shouldn't be participating in state |
119 | // numbering, such as accepts. |
120 | for (auto v : temp) { |
121 | if (is_any_accept(v, g)) { |
122 | continue; // accepts don't need states |
123 | } |
124 | |
125 | ordering.push_back(v); |
126 | } |
127 | |
128 | // Output of topo order was in reverse. |
129 | reverse(ordering.begin(), ordering.end()); |
130 | } |
131 | |
132 | // Returns the number of states. |
133 | static |
134 | unordered_map<NFAVertex, u32> |
135 | getStateIndices(const NGHolder &h, const vector<NFAVertex> &ordering) { |
136 | unordered_map<NFAVertex, u32> states; |
137 | for (const auto &v : vertices_range(h)) { |
138 | states[v] = NO_STATE; |
139 | } |
140 | |
141 | u32 stateNum = 0; |
142 | for (auto v : ordering) { |
143 | DEBUG_PRINTF("assigning state num %u to vertex %zu\n" , stateNum, |
144 | h[v].index); |
145 | states[v] = stateNum++; |
146 | } |
147 | return states; |
148 | } |
149 | |
150 | /** UE-1648: A state with a single successor that happens to be a predecessor |
151 | * can be given any ol' state ID by the topological ordering, so we sink it |
152 | * next to its pred. This enables better merging. */ |
153 | static |
154 | void optimiseTightLoops(const NGHolder &g, vector<NFAVertex> &ordering) { |
155 | deque<pair<NFAVertex, NFAVertex>> candidates; |
156 | |
157 | auto start = ordering.begin(); |
158 | for (auto it = ordering.begin(), ite = ordering.end(); it != ite; ++it) { |
159 | NFAVertex v = *it; |
160 | if (is_special(v, g)) { |
161 | continue; |
162 | } |
163 | |
164 | if (out_degree(v, g) == 1) { |
165 | NFAVertex t = *(adjacent_vertices(v, g).first); |
166 | if (v == t) { |
167 | continue; |
168 | } |
169 | if (edge(t, v, g).second && find(start, it, t) != ite) { |
170 | candidates.push_back(make_pair(v, t)); |
171 | } |
172 | } |
173 | } |
174 | |
175 | for (const auto &cand : candidates) { |
176 | NFAVertex v = cand.first, u = cand.second; |
177 | auto u_it = find(ordering.begin(), ordering.end(), u); |
178 | auto v_it = find(ordering.begin(), ordering.end(), v); |
179 | |
180 | // Only move candidates backwards in the ordering, and only move them |
181 | // when necessary. |
182 | if (u_it >= v_it || distance(u_it, v_it) == 1) { |
183 | continue; |
184 | } |
185 | |
186 | DEBUG_PRINTF("moving vertex %zu next to %zu\n" , g[v].index, g[u].index); |
187 | |
188 | ordering.erase(v_it); |
189 | ordering.insert(++u_it, v); |
190 | } |
191 | } |
192 | |
193 | unordered_map<NFAVertex, u32> |
194 | numberStates(NGHolder &h, const flat_set<NFAVertex> &tops) { |
195 | DEBUG_PRINTF("numbering states for holder %p\n" , &h); |
196 | |
197 | vector<NFAVertex> ordering; |
198 | getStateOrdering(h, tops, ordering); |
199 | |
200 | optimiseTightLoops(h, ordering); |
201 | |
202 | return getStateIndices(h, ordering); |
203 | } |
204 | |
205 | u32 countStates(const unordered_map<NFAVertex, u32> &state_ids) { |
206 | if (state_ids.empty()) { |
207 | return 0; |
208 | } |
209 | |
210 | u32 max_state = 0; |
211 | for (const auto &m : state_ids) { |
212 | if (m.second != NO_STATE) { |
213 | max_state = max(m.second, max_state); |
214 | } |
215 | } |
216 | u32 num_states = max_state + 1; |
217 | |
218 | return num_states; |
219 | } |
220 | |
221 | } // namespace ue2 |
222 | |