1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief State numbering and late graph restructuring code.
31 */
32#include "ng_restructuring.h"
33
34#include "grey.h"
35#include "ng_holder.h"
36#include "ng_util.h"
37#include "ue2common.h"
38#include "util/graph_range.h"
39
40#include <algorithm>
41#include <cassert>
42
43#include <boost/graph/transpose_graph.hpp>
44
45using namespace std;
46
47namespace ue2 {
48
49/** Connect the start vertex to each of the vertices in \p tops. This is useful
50 * temporarily for when we need to run a graph algorithm that expects a single
51 * source vertex. */
52static
53void wireStartToTops(NGHolder &g, const flat_set<NFAVertex> &tops,
54 vector<NFAEdge> &tempEdges) {
55 for (NFAVertex v : tops) {
56 assert(!isLeafNode(v, g));
57
58 const NFAEdge &e = add_edge(g.start, v, g);
59 tempEdges.push_back(e);
60 }
61}
62
63/**
64 * Returns true if start's successors (aside from startDs) are subset of
65 * startDs's proper successors or if start has no successors other than startDs.
66 */
67static
68bool startIsRedundant(const NGHolder &g) {
69 /* We ignore startDs as the self-loop may have been stripped as an
70 * optimisation for repeats (improveLeadingRepeats()). */
71 set<NFAVertex> start;
72 insert(&start, adjacent_vertices_range(g.start, g));
73 start.erase(g.startDs);
74
75 // Trivial case: start has no successors other than startDs.
76 if (start.empty()) {
77 DEBUG_PRINTF("start has no out-edges other than to startDs\n");
78 return true;
79 }
80
81 set<NFAVertex> startDs;
82 insert(&startDs, adjacent_vertices_range(g.startDs, g));
83 startDs.erase(g.startDs);
84
85 if (!is_subset_of(start, startDs)) {
86 DEBUG_PRINTF("out-edges of start and startDs aren't equivalent\n");
87 return false;
88 }
89
90 return true;
91}
92
93static
94void getStateOrdering(NGHolder &g, const flat_set<NFAVertex> &tops,
95 vector<NFAVertex> &ordering) {
96 // First, wire up our "tops" to start so that we have a single source,
97 // which will give a nicer topo order.
98 vector<NFAEdge> tempEdges;
99 wireStartToTops(g, tops, tempEdges);
100
101 renumber_vertices(g);
102
103 vector<NFAVertex> temp = getTopoOrdering(g);
104
105 remove_edges(tempEdges, g);
106
107 // Move {start, startDs} to the end, so they'll be first when we reverse
108 // the ordering (if they are required).
109 temp.erase(remove(temp.begin(), temp.end(), g.startDs));
110 temp.erase(remove(temp.begin(), temp.end(), g.start));
111 if (proper_out_degree(g.startDs, g)) {
112 temp.push_back(g.startDs);
113 }
114 if (!startIsRedundant(g)) {
115 temp.push_back(g.start);
116 }
117
118 // Walk ordering, remove vertices that shouldn't be participating in state
119 // numbering, such as accepts.
120 for (auto v : temp) {
121 if (is_any_accept(v, g)) {
122 continue; // accepts don't need states
123 }
124
125 ordering.push_back(v);
126 }
127
128 // Output of topo order was in reverse.
129 reverse(ordering.begin(), ordering.end());
130}
131
132// Returns the number of states.
133static
134unordered_map<NFAVertex, u32>
135getStateIndices(const NGHolder &h, const vector<NFAVertex> &ordering) {
136 unordered_map<NFAVertex, u32> states;
137 for (const auto &v : vertices_range(h)) {
138 states[v] = NO_STATE;
139 }
140
141 u32 stateNum = 0;
142 for (auto v : ordering) {
143 DEBUG_PRINTF("assigning state num %u to vertex %zu\n", stateNum,
144 h[v].index);
145 states[v] = stateNum++;
146 }
147 return states;
148}
149
150/** UE-1648: A state with a single successor that happens to be a predecessor
151 * can be given any ol' state ID by the topological ordering, so we sink it
152 * next to its pred. This enables better merging. */
153static
154void optimiseTightLoops(const NGHolder &g, vector<NFAVertex> &ordering) {
155 deque<pair<NFAVertex, NFAVertex>> candidates;
156
157 auto start = ordering.begin();
158 for (auto it = ordering.begin(), ite = ordering.end(); it != ite; ++it) {
159 NFAVertex v = *it;
160 if (is_special(v, g)) {
161 continue;
162 }
163
164 if (out_degree(v, g) == 1) {
165 NFAVertex t = *(adjacent_vertices(v, g).first);
166 if (v == t) {
167 continue;
168 }
169 if (edge(t, v, g).second && find(start, it, t) != ite) {
170 candidates.push_back(make_pair(v, t));
171 }
172 }
173 }
174
175 for (const auto &cand : candidates) {
176 NFAVertex v = cand.first, u = cand.second;
177 auto u_it = find(ordering.begin(), ordering.end(), u);
178 auto v_it = find(ordering.begin(), ordering.end(), v);
179
180 // Only move candidates backwards in the ordering, and only move them
181 // when necessary.
182 if (u_it >= v_it || distance(u_it, v_it) == 1) {
183 continue;
184 }
185
186 DEBUG_PRINTF("moving vertex %zu next to %zu\n", g[v].index, g[u].index);
187
188 ordering.erase(v_it);
189 ordering.insert(++u_it, v);
190 }
191}
192
193unordered_map<NFAVertex, u32>
194numberStates(NGHolder &h, const flat_set<NFAVertex> &tops) {
195 DEBUG_PRINTF("numbering states for holder %p\n", &h);
196
197 vector<NFAVertex> ordering;
198 getStateOrdering(h, tops, ordering);
199
200 optimiseTightLoops(h, ordering);
201
202 return getStateIndices(h, ordering);
203}
204
205u32 countStates(const unordered_map<NFAVertex, u32> &state_ids) {
206 if (state_ids.empty()) {
207 return 0;
208 }
209
210 u32 max_state = 0;
211 for (const auto &m : state_ids) {
212 if (m.second != NO_STATE) {
213 max_state = max(m.second, max_state);
214 }
215 }
216 u32 num_states = max_state + 1;
217
218 return num_states;
219}
220
221} // namespace ue2
222