1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Region Redundancy optimisation pass. |
31 | * |
32 | * Identifies and removes entire regions that are adjacent to a cyclic state |
33 | * with a superset of their character reachability. |
34 | */ |
35 | #include "ng_region_redundancy.h" |
36 | |
37 | #include "ng_holder.h" |
38 | #include "ng_region.h" |
39 | #include "ng_util.h" |
40 | #include "ue2common.h" |
41 | #include "util/container.h" |
42 | #include "util/graph_range.h" |
43 | |
44 | #include <set> |
45 | |
46 | using namespace std; |
47 | |
48 | namespace ue2 { |
49 | |
50 | namespace { |
51 | |
52 | /** Precalculated information about a region. */ |
53 | struct RegionInfo { |
54 | NFAVertex entry; //!< arbitrary entry vertex |
55 | CharReach cr; //!< union of the reach of all vertices in region |
56 | }; |
57 | |
58 | } // namespace |
59 | |
60 | static |
61 | bool regionHasUnexpectedAccept(const NGHolder &g, const u32 region, |
62 | const flat_set<ReportID> &expected_reports, |
63 | const unordered_map<NFAVertex, u32> ®ion_map) { |
64 | /* TODO: only check vertices connected to accept/acceptEOD */ |
65 | for (auto v : vertices_range(g)) { |
66 | if (region != region_map.at(v)) { |
67 | continue; |
68 | } |
69 | |
70 | if (is_any_accept(v, g)) { |
71 | return true; /* encountering an actual special in the region is |
72 | * possible but definitely unexpected */ |
73 | } |
74 | |
75 | for (auto w : adjacent_vertices_range(v, g)) { |
76 | if (is_any_accept(w, g) && g[v].reports != expected_reports) { |
77 | return true; |
78 | } |
79 | } |
80 | } |
81 | return false; |
82 | } |
83 | |
84 | static |
85 | void processCyclicStateForward(NGHolder &h, NFAVertex cyc, |
86 | const map<u32, RegionInfo> &info, |
87 | const unordered_map<NFAVertex, u32> ®ion_map, |
88 | set<u32> &deadRegions) { |
89 | u32 region = region_map.at(cyc); |
90 | CharReach cr = h[cyc].char_reach; |
91 | auto reports = h[cyc].reports; |
92 | |
93 | DEBUG_PRINTF("going forward from %zu/%u\n" , h[cyc].index, |
94 | region); |
95 | |
96 | map<u32, RegionInfo>::const_iterator it; |
97 | while ((it = info.find(++region)) != info.end()) { |
98 | NFAVertex v = it->second.entry; |
99 | const CharReach ®ion_cr = it->second.cr; |
100 | assert(isRegionEntry(h, v, region_map) && !is_special(v, h)); |
101 | DEBUG_PRINTF("checking %zu\n" , h[v].index); |
102 | |
103 | if (!region_cr.isSubsetOf(cr)) { |
104 | DEBUG_PRINTF("doesn't cover the reach of region %u\n" , region); |
105 | break; |
106 | } |
107 | |
108 | if (isOptionalRegion(h, v, region_map) |
109 | && !regionHasUnexpectedAccept(h, region, reports, region_map)) { |
110 | DEBUG_PRINTF("cyclic state %zu leads to optional region leader" |
111 | " %zu\n" , h[cyc].index, h[v].index); |
112 | deadRegions.insert(region); |
113 | } else if (isSingletonRegion(h, v, region_map)) { |
114 | /* we can use this region as straw and suck in optional regions on |
115 | * the other side. This allows us to transform /a{n,m}/ to /a{n}/ */ |
116 | cr = h[v].char_reach; |
117 | reports = h[v].reports; |
118 | DEBUG_PRINTF("%u is straw\n" , region); |
119 | assert(cr.isSubsetOf(h[cyc].char_reach)); |
120 | if (hasSelfLoop(v, h)) { |
121 | DEBUG_PRINTF("%u is straw has a self-loop - kill\n" , region); |
122 | remove_edge(v, v, h); |
123 | } |
124 | } else { |
125 | break; |
126 | } |
127 | } |
128 | } |
129 | |
130 | static |
131 | void processCyclicStateReverse(NGHolder &h, NFAVertex cyc, |
132 | const map<u32, RegionInfo> &info, |
133 | const unordered_map<NFAVertex, u32> ®ion_map, |
134 | set<u32> &deadRegions) { |
135 | u32 region = region_map.at(cyc); |
136 | CharReach cr = h[cyc].char_reach; |
137 | auto reports = h[cyc].reports; |
138 | |
139 | DEBUG_PRINTF("going back from %zu/%u\n" , h[cyc].index, region); |
140 | |
141 | map<u32, RegionInfo>::const_iterator it; |
142 | while ((it = info.find(--region)) != info.end()) { |
143 | NFAVertex v = it->second.entry; |
144 | const CharReach ®ion_cr = it->second.cr; |
145 | assert(isRegionEntry(h, v, region_map) && !is_special(v, h)); |
146 | DEBUG_PRINTF("checking %zu\n" , h[v].index); |
147 | |
148 | if (!region_cr.isSubsetOf(cr)) { |
149 | DEBUG_PRINTF("doesn't cover the reach of region %u\n" , region); |
150 | break; |
151 | } |
152 | |
153 | if (isOptionalRegion(h, v, region_map) |
154 | && !regionHasUnexpectedAccept(h, region, reports, region_map)) { |
155 | DEBUG_PRINTF("cyclic state %zu trails optional region leader %zu\n" , |
156 | h[cyc].index, h[v].index); |
157 | deadRegions.insert(region); |
158 | } else if (isSingletonRegion(h, v, region_map)) { |
159 | /* we can use this region as a reverse straw and suck in optional |
160 | * regions on the other side. This allows us to transform |
161 | * /^a?a{n}.*b/ to /^a{n}.*b/ */ |
162 | cr = h[v].char_reach; |
163 | reports = h[v].reports; |
164 | DEBUG_PRINTF("%u is straw\n" , region); |
165 | assert(cr.isSubsetOf(h[cyc].char_reach)); |
166 | if (hasSelfLoop(v, h)) { |
167 | DEBUG_PRINTF("%u is straw has a self-loop - kill\n" , region); |
168 | remove_edge(v, v, h); |
169 | } |
170 | } else { |
171 | break; |
172 | } |
173 | |
174 | if (!region) { // No wrapping |
175 | break; |
176 | } |
177 | } |
178 | } |
179 | |
180 | static |
181 | map<u32, RegionInfo> buildRegionInfoMap(const NGHolder &g, |
182 | const unordered_map<NFAVertex, u32> ®ion_map) { |
183 | map<u32, RegionInfo> info; |
184 | |
185 | for (auto v : vertices_range(g)) { |
186 | u32 region = region_map.at(v); |
187 | if (is_special(v, g) || region == 0) { |
188 | continue; |
189 | } |
190 | |
191 | RegionInfo &ri = info[region]; |
192 | ri.cr |= g[v].char_reach; |
193 | if (isRegionEntry(g, v, region_map)) { |
194 | ri.entry = v; |
195 | } |
196 | } |
197 | |
198 | return info; |
199 | } |
200 | |
201 | static |
202 | bool hasNoStartAnchoring(const NGHolder &h) { |
203 | for (auto v : adjacent_vertices_range(h.start, h)) { |
204 | if (!edge(h.startDs, v, h).second) { |
205 | return false; |
206 | } |
207 | } |
208 | return true; |
209 | } |
210 | |
211 | void removeRegionRedundancy(NGHolder &g, som_type som) { |
212 | auto region_map = assignRegions(g); |
213 | |
214 | map<u32, RegionInfo> info = buildRegionInfoMap(g, region_map); |
215 | |
216 | set<u32> deadRegions; |
217 | |
218 | /* if we are not tracking som, we can treat sds as a cyclic region if there |
219 | * is no anchoring */ |
220 | if (!som && hasNoStartAnchoring(g)) { |
221 | processCyclicStateForward(g, g.startDs, info, region_map, deadRegions); |
222 | } |
223 | |
224 | // Walk the region mapping, looking for regions that consist of a single |
225 | // cyclic node. |
226 | |
227 | for (const auto &m : info) { |
228 | // Must not have already been removed |
229 | if (contains(deadRegions, m.first)) { |
230 | continue; |
231 | } |
232 | |
233 | NFAVertex v = m.second.entry; |
234 | /* require a singleton cyclic region */ |
235 | if (!hasSelfLoop(v, g) || !isSingletonRegion(g, v, region_map)) { |
236 | continue; |
237 | } |
238 | |
239 | if (som && is_virtual_start(v, g)) { |
240 | continue; |
241 | } |
242 | |
243 | processCyclicStateForward(g, v, info, region_map, deadRegions); |
244 | processCyclicStateReverse(g, v, info, region_map, deadRegions); |
245 | } |
246 | |
247 | if (deadRegions.empty()) { |
248 | return; |
249 | } |
250 | |
251 | vector<NFAVertex> dead; |
252 | |
253 | for (auto v : vertices_range(g)) { |
254 | if (is_special(v, g)) { |
255 | continue; |
256 | } |
257 | u32 region = region_map.at(v); |
258 | if (contains(deadRegions, region)) { |
259 | dead.push_back(v); |
260 | } |
261 | } |
262 | |
263 | if (!dead.empty()) { |
264 | DEBUG_PRINTF("removing %zu vertices from %zu dead regions\n" , |
265 | dead.size(), deadRegions.size()); |
266 | remove_vertices(dead, g); |
267 | } |
268 | } |
269 | |
270 | } // namespace ue2 |
271 | |