1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Region Redundancy optimisation pass.
31 *
32 * Identifies and removes entire regions that are adjacent to a cyclic state
33 * with a superset of their character reachability.
34 */
35#include "ng_region_redundancy.h"
36
37#include "ng_holder.h"
38#include "ng_region.h"
39#include "ng_util.h"
40#include "ue2common.h"
41#include "util/container.h"
42#include "util/graph_range.h"
43
44#include <set>
45
46using namespace std;
47
48namespace ue2 {
49
50namespace {
51
52/** Precalculated information about a region. */
53struct RegionInfo {
54 NFAVertex entry; //!< arbitrary entry vertex
55 CharReach cr; //!< union of the reach of all vertices in region
56};
57
58} // namespace
59
60static
61bool regionHasUnexpectedAccept(const NGHolder &g, const u32 region,
62 const flat_set<ReportID> &expected_reports,
63 const unordered_map<NFAVertex, u32> &region_map) {
64 /* TODO: only check vertices connected to accept/acceptEOD */
65 for (auto v : vertices_range(g)) {
66 if (region != region_map.at(v)) {
67 continue;
68 }
69
70 if (is_any_accept(v, g)) {
71 return true; /* encountering an actual special in the region is
72 * possible but definitely unexpected */
73 }
74
75 for (auto w : adjacent_vertices_range(v, g)) {
76 if (is_any_accept(w, g) && g[v].reports != expected_reports) {
77 return true;
78 }
79 }
80 }
81 return false;
82}
83
84static
85void processCyclicStateForward(NGHolder &h, NFAVertex cyc,
86 const map<u32, RegionInfo> &info,
87 const unordered_map<NFAVertex, u32> &region_map,
88 set<u32> &deadRegions) {
89 u32 region = region_map.at(cyc);
90 CharReach cr = h[cyc].char_reach;
91 auto reports = h[cyc].reports;
92
93 DEBUG_PRINTF("going forward from %zu/%u\n", h[cyc].index,
94 region);
95
96 map<u32, RegionInfo>::const_iterator it;
97 while ((it = info.find(++region)) != info.end()) {
98 NFAVertex v = it->second.entry;
99 const CharReach &region_cr = it->second.cr;
100 assert(isRegionEntry(h, v, region_map) && !is_special(v, h));
101 DEBUG_PRINTF("checking %zu\n", h[v].index);
102
103 if (!region_cr.isSubsetOf(cr)) {
104 DEBUG_PRINTF("doesn't cover the reach of region %u\n", region);
105 break;
106 }
107
108 if (isOptionalRegion(h, v, region_map)
109 && !regionHasUnexpectedAccept(h, region, reports, region_map)) {
110 DEBUG_PRINTF("cyclic state %zu leads to optional region leader"
111 " %zu\n", h[cyc].index, h[v].index);
112 deadRegions.insert(region);
113 } else if (isSingletonRegion(h, v, region_map)) {
114 /* we can use this region as straw and suck in optional regions on
115 * the other side. This allows us to transform /a{n,m}/ to /a{n}/ */
116 cr = h[v].char_reach;
117 reports = h[v].reports;
118 DEBUG_PRINTF("%u is straw\n", region);
119 assert(cr.isSubsetOf(h[cyc].char_reach));
120 if (hasSelfLoop(v, h)) {
121 DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region);
122 remove_edge(v, v, h);
123 }
124 } else {
125 break;
126 }
127 }
128}
129
130static
131void processCyclicStateReverse(NGHolder &h, NFAVertex cyc,
132 const map<u32, RegionInfo> &info,
133 const unordered_map<NFAVertex, u32> &region_map,
134 set<u32> &deadRegions) {
135 u32 region = region_map.at(cyc);
136 CharReach cr = h[cyc].char_reach;
137 auto reports = h[cyc].reports;
138
139 DEBUG_PRINTF("going back from %zu/%u\n", h[cyc].index, region);
140
141 map<u32, RegionInfo>::const_iterator it;
142 while ((it = info.find(--region)) != info.end()) {
143 NFAVertex v = it->second.entry;
144 const CharReach &region_cr = it->second.cr;
145 assert(isRegionEntry(h, v, region_map) && !is_special(v, h));
146 DEBUG_PRINTF("checking %zu\n", h[v].index);
147
148 if (!region_cr.isSubsetOf(cr)) {
149 DEBUG_PRINTF("doesn't cover the reach of region %u\n", region);
150 break;
151 }
152
153 if (isOptionalRegion(h, v, region_map)
154 && !regionHasUnexpectedAccept(h, region, reports, region_map)) {
155 DEBUG_PRINTF("cyclic state %zu trails optional region leader %zu\n",
156 h[cyc].index, h[v].index);
157 deadRegions.insert(region);
158 } else if (isSingletonRegion(h, v, region_map)) {
159 /* we can use this region as a reverse straw and suck in optional
160 * regions on the other side. This allows us to transform
161 * /^a?a{n}.*b/ to /^a{n}.*b/ */
162 cr = h[v].char_reach;
163 reports = h[v].reports;
164 DEBUG_PRINTF("%u is straw\n", region);
165 assert(cr.isSubsetOf(h[cyc].char_reach));
166 if (hasSelfLoop(v, h)) {
167 DEBUG_PRINTF("%u is straw has a self-loop - kill\n", region);
168 remove_edge(v, v, h);
169 }
170 } else {
171 break;
172 }
173
174 if (!region) { // No wrapping
175 break;
176 }
177 }
178}
179
180static
181map<u32, RegionInfo> buildRegionInfoMap(const NGHolder &g,
182 const unordered_map<NFAVertex, u32> &region_map) {
183 map<u32, RegionInfo> info;
184
185 for (auto v : vertices_range(g)) {
186 u32 region = region_map.at(v);
187 if (is_special(v, g) || region == 0) {
188 continue;
189 }
190
191 RegionInfo &ri = info[region];
192 ri.cr |= g[v].char_reach;
193 if (isRegionEntry(g, v, region_map)) {
194 ri.entry = v;
195 }
196 }
197
198 return info;
199}
200
201static
202bool hasNoStartAnchoring(const NGHolder &h) {
203 for (auto v : adjacent_vertices_range(h.start, h)) {
204 if (!edge(h.startDs, v, h).second) {
205 return false;
206 }
207 }
208 return true;
209}
210
211void removeRegionRedundancy(NGHolder &g, som_type som) {
212 auto region_map = assignRegions(g);
213
214 map<u32, RegionInfo> info = buildRegionInfoMap(g, region_map);
215
216 set<u32> deadRegions;
217
218 /* if we are not tracking som, we can treat sds as a cyclic region if there
219 * is no anchoring */
220 if (!som && hasNoStartAnchoring(g)) {
221 processCyclicStateForward(g, g.startDs, info, region_map, deadRegions);
222 }
223
224 // Walk the region mapping, looking for regions that consist of a single
225 // cyclic node.
226
227 for (const auto &m : info) {
228 // Must not have already been removed
229 if (contains(deadRegions, m.first)) {
230 continue;
231 }
232
233 NFAVertex v = m.second.entry;
234 /* require a singleton cyclic region */
235 if (!hasSelfLoop(v, g) || !isSingletonRegion(g, v, region_map)) {
236 continue;
237 }
238
239 if (som && is_virtual_start(v, g)) {
240 continue;
241 }
242
243 processCyclicStateForward(g, v, info, region_map, deadRegions);
244 processCyclicStateReverse(g, v, info, region_map, deadRegions);
245 }
246
247 if (deadRegions.empty()) {
248 return;
249 }
250
251 vector<NFAVertex> dead;
252
253 for (auto v : vertices_range(g)) {
254 if (is_special(v, g)) {
255 continue;
256 }
257 u32 region = region_map.at(v);
258 if (contains(deadRegions, region)) {
259 dead.push_back(v);
260 }
261 }
262
263 if (!dead.empty()) {
264 DEBUG_PRINTF("removing %zu vertices from %zu dead regions\n",
265 dead.size(), deadRegions.size());
266 remove_vertices(dead, g);
267 }
268}
269
270} // namespace ue2
271