| 1 | /* | 
| 2 |  * Copyright (c) 2015-2017, Intel Corporation | 
| 3 |  * | 
| 4 |  * Redistribution and use in source and binary forms, with or without | 
| 5 |  * modification, are permitted provided that the following conditions are met: | 
| 6 |  * | 
| 7 |  *  * Redistributions of source code must retain the above copyright notice, | 
| 8 |  *    this list of conditions and the following disclaimer. | 
| 9 |  *  * Redistributions in binary form must reproduce the above copyright | 
| 10 |  *    notice, this list of conditions and the following disclaimer in the | 
| 11 |  *    documentation and/or other materials provided with the distribution. | 
| 12 |  *  * Neither the name of Intel Corporation nor the names of its contributors | 
| 13 |  *    may be used to endorse or promote products derived from this software | 
| 14 |  *    without specific prior written permission. | 
| 15 |  * | 
| 16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
| 17 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
| 18 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
| 19 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
| 20 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
| 21 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
| 22 |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
| 23 |  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
| 24 |  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
| 25 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
| 26 |  * POSSIBILITY OF SUCH DAMAGE. | 
| 27 |  */ | 
| 28 |  | 
| 29 | /** \file | 
| 30 |  * \brief Region Redundancy optimisation pass. | 
| 31 |  * | 
| 32 |  * Identifies and removes entire regions that are adjacent to a cyclic state | 
| 33 |  * with a superset of their character reachability. | 
| 34 |  */ | 
| 35 | #include "ng_region_redundancy.h" | 
| 36 |  | 
| 37 | #include "ng_holder.h" | 
| 38 | #include "ng_region.h" | 
| 39 | #include "ng_util.h" | 
| 40 | #include "ue2common.h" | 
| 41 | #include "util/container.h" | 
| 42 | #include "util/graph_range.h" | 
| 43 |  | 
| 44 | #include <set> | 
| 45 |  | 
| 46 | using namespace std; | 
| 47 |  | 
| 48 | namespace ue2 { | 
| 49 |  | 
| 50 | namespace { | 
| 51 |  | 
| 52 | /** Precalculated information about a region. */ | 
| 53 | struct RegionInfo { | 
| 54 |     NFAVertex entry; //!< arbitrary entry vertex | 
| 55 |     CharReach cr;    //!< union of the reach of all vertices in region | 
| 56 | }; | 
| 57 |  | 
| 58 | } // namespace | 
| 59 |  | 
| 60 | static | 
| 61 | bool regionHasUnexpectedAccept(const NGHolder &g, const u32 region, | 
| 62 |                        const flat_set<ReportID> &expected_reports, | 
| 63 |                        const unordered_map<NFAVertex, u32> ®ion_map) { | 
| 64 |     /* TODO: only check vertices connected to accept/acceptEOD */ | 
| 65 |     for (auto v : vertices_range(g)) { | 
| 66 |         if (region != region_map.at(v)) { | 
| 67 |             continue; | 
| 68 |         } | 
| 69 |  | 
| 70 |         if (is_any_accept(v, g)) { | 
| 71 |             return true; /* encountering an actual special in the region is | 
| 72 |                           * possible but definitely unexpected */ | 
| 73 |         } | 
| 74 |  | 
| 75 |         for (auto w : adjacent_vertices_range(v, g)) { | 
| 76 |             if (is_any_accept(w, g) && g[v].reports != expected_reports) { | 
| 77 |                 return true; | 
| 78 |             } | 
| 79 |         } | 
| 80 |     } | 
| 81 |     return false; | 
| 82 | } | 
| 83 |  | 
| 84 | static | 
| 85 | void processCyclicStateForward(NGHolder &h, NFAVertex cyc, | 
| 86 |                          const map<u32, RegionInfo> &info, | 
| 87 |                          const unordered_map<NFAVertex, u32> ®ion_map, | 
| 88 |                          set<u32> &deadRegions) { | 
| 89 |     u32 region = region_map.at(cyc); | 
| 90 |     CharReach cr = h[cyc].char_reach; | 
| 91 |     auto reports = h[cyc].reports; | 
| 92 |  | 
| 93 |     DEBUG_PRINTF("going forward from %zu/%u\n" , h[cyc].index, | 
| 94 |                  region); | 
| 95 |  | 
| 96 |     map<u32, RegionInfo>::const_iterator it; | 
| 97 |     while ((it = info.find(++region)) != info.end()) { | 
| 98 |         NFAVertex v = it->second.entry; | 
| 99 |         const CharReach ®ion_cr = it->second.cr; | 
| 100 |         assert(isRegionEntry(h, v, region_map) && !is_special(v, h)); | 
| 101 |         DEBUG_PRINTF("checking %zu\n" , h[v].index); | 
| 102 |  | 
| 103 |         if (!region_cr.isSubsetOf(cr)) { | 
| 104 |             DEBUG_PRINTF("doesn't cover the reach of region %u\n" , region); | 
| 105 |             break; | 
| 106 |         } | 
| 107 |  | 
| 108 |         if (isOptionalRegion(h, v, region_map) | 
| 109 |             && !regionHasUnexpectedAccept(h, region, reports, region_map)) { | 
| 110 |             DEBUG_PRINTF("cyclic state %zu leads to optional region leader"  | 
| 111 |                          " %zu\n" , h[cyc].index, h[v].index); | 
| 112 |             deadRegions.insert(region); | 
| 113 |         } else if (isSingletonRegion(h, v, region_map)) { | 
| 114 |             /* we can use this region as straw and suck in optional regions on | 
| 115 |              * the other side. This allows us to transform /a{n,m}/ to /a{n}/ */ | 
| 116 |             cr = h[v].char_reach; | 
| 117 |             reports = h[v].reports; | 
| 118 |             DEBUG_PRINTF("%u is straw\n" , region); | 
| 119 |             assert(cr.isSubsetOf(h[cyc].char_reach)); | 
| 120 |             if (hasSelfLoop(v, h)) { | 
| 121 |                 DEBUG_PRINTF("%u is straw has a self-loop - kill\n" , region); | 
| 122 |                 remove_edge(v, v, h); | 
| 123 |             } | 
| 124 |         } else { | 
| 125 |             break; | 
| 126 |         } | 
| 127 |     } | 
| 128 | } | 
| 129 |  | 
| 130 | static | 
| 131 | void processCyclicStateReverse(NGHolder &h, NFAVertex cyc, | 
| 132 |                          const map<u32, RegionInfo> &info, | 
| 133 |                          const unordered_map<NFAVertex, u32> ®ion_map, | 
| 134 |                          set<u32> &deadRegions) { | 
| 135 |     u32 region = region_map.at(cyc); | 
| 136 |     CharReach cr = h[cyc].char_reach; | 
| 137 |     auto reports = h[cyc].reports; | 
| 138 |  | 
| 139 |     DEBUG_PRINTF("going back from %zu/%u\n" , h[cyc].index, region); | 
| 140 |  | 
| 141 |     map<u32, RegionInfo>::const_iterator it; | 
| 142 |     while ((it = info.find(--region)) != info.end()) { | 
| 143 |         NFAVertex v = it->second.entry; | 
| 144 |         const CharReach ®ion_cr = it->second.cr; | 
| 145 |         assert(isRegionEntry(h, v, region_map) && !is_special(v, h)); | 
| 146 |         DEBUG_PRINTF("checking %zu\n" , h[v].index); | 
| 147 |  | 
| 148 |         if (!region_cr.isSubsetOf(cr)) { | 
| 149 |             DEBUG_PRINTF("doesn't cover the reach of region %u\n" , region); | 
| 150 |             break; | 
| 151 |         } | 
| 152 |  | 
| 153 |         if (isOptionalRegion(h, v, region_map) | 
| 154 |             && !regionHasUnexpectedAccept(h, region, reports, region_map)) { | 
| 155 |             DEBUG_PRINTF("cyclic state %zu trails optional region leader %zu\n" , | 
| 156 |                          h[cyc].index, h[v].index); | 
| 157 |             deadRegions.insert(region); | 
| 158 |         } else if (isSingletonRegion(h, v, region_map)) { | 
| 159 |             /* we can use this region as a reverse straw and suck in optional | 
| 160 |              * regions on the other side. This allows us to transform | 
| 161 |              * /^a?a{n}.*b/ to /^a{n}.*b/ */ | 
| 162 |             cr = h[v].char_reach; | 
| 163 |             reports = h[v].reports; | 
| 164 |             DEBUG_PRINTF("%u is straw\n" , region); | 
| 165 |             assert(cr.isSubsetOf(h[cyc].char_reach)); | 
| 166 |             if (hasSelfLoop(v, h)) { | 
| 167 |                 DEBUG_PRINTF("%u is straw has a self-loop - kill\n" , region); | 
| 168 |                 remove_edge(v, v, h); | 
| 169 |             } | 
| 170 |         } else { | 
| 171 |             break; | 
| 172 |         } | 
| 173 |  | 
| 174 |         if (!region) { // No wrapping | 
| 175 |             break; | 
| 176 |         } | 
| 177 |     } | 
| 178 | } | 
| 179 |  | 
| 180 | static | 
| 181 | map<u32, RegionInfo> buildRegionInfoMap(const NGHolder &g, | 
| 182 |                    const unordered_map<NFAVertex, u32> ®ion_map) { | 
| 183 |     map<u32, RegionInfo> info; | 
| 184 |  | 
| 185 |     for (auto v : vertices_range(g)) { | 
| 186 |         u32 region = region_map.at(v); | 
| 187 |         if (is_special(v, g) || region == 0) { | 
| 188 |             continue; | 
| 189 |         } | 
| 190 |  | 
| 191 |         RegionInfo &ri = info[region]; | 
| 192 |         ri.cr |= g[v].char_reach; | 
| 193 |         if (isRegionEntry(g, v, region_map)) { | 
| 194 |             ri.entry = v; | 
| 195 |         } | 
| 196 |     } | 
| 197 |  | 
| 198 |     return info; | 
| 199 | } | 
| 200 |  | 
| 201 | static | 
| 202 | bool hasNoStartAnchoring(const NGHolder &h) { | 
| 203 |     for (auto v : adjacent_vertices_range(h.start, h)) { | 
| 204 |         if (!edge(h.startDs, v, h).second) { | 
| 205 |             return false; | 
| 206 |         } | 
| 207 |     } | 
| 208 |     return true; | 
| 209 | } | 
| 210 |  | 
| 211 | void removeRegionRedundancy(NGHolder &g, som_type som) { | 
| 212 |     auto region_map = assignRegions(g); | 
| 213 |  | 
| 214 |     map<u32, RegionInfo> info = buildRegionInfoMap(g, region_map); | 
| 215 |  | 
| 216 |     set<u32> deadRegions; | 
| 217 |  | 
| 218 |     /* if we are not tracking som, we can treat sds as a cyclic region if there | 
| 219 |      * is no anchoring */ | 
| 220 |     if (!som && hasNoStartAnchoring(g)) { | 
| 221 |         processCyclicStateForward(g, g.startDs, info, region_map, deadRegions); | 
| 222 |     } | 
| 223 |  | 
| 224 |     // Walk the region mapping, looking for regions that consist of a single | 
| 225 |     // cyclic node. | 
| 226 |  | 
| 227 |     for (const auto &m : info) { | 
| 228 |         // Must not have already been removed | 
| 229 |         if (contains(deadRegions, m.first)) { | 
| 230 |             continue; | 
| 231 |         } | 
| 232 |  | 
| 233 |         NFAVertex v = m.second.entry; | 
| 234 |         /* require a singleton cyclic region */ | 
| 235 |         if (!hasSelfLoop(v, g) || !isSingletonRegion(g, v, region_map)) { | 
| 236 |             continue; | 
| 237 |         } | 
| 238 |  | 
| 239 |         if (som && is_virtual_start(v, g)) { | 
| 240 |             continue; | 
| 241 |         } | 
| 242 |  | 
| 243 |         processCyclicStateForward(g, v, info, region_map, deadRegions); | 
| 244 |         processCyclicStateReverse(g, v, info, region_map, deadRegions); | 
| 245 |     } | 
| 246 |  | 
| 247 |     if (deadRegions.empty()) { | 
| 248 |         return; | 
| 249 |     } | 
| 250 |  | 
| 251 |     vector<NFAVertex> dead; | 
| 252 |  | 
| 253 |     for (auto v : vertices_range(g)) { | 
| 254 |         if (is_special(v, g)) { | 
| 255 |             continue; | 
| 256 |         } | 
| 257 |         u32 region = region_map.at(v); | 
| 258 |         if (contains(deadRegions, region)) { | 
| 259 |             dead.push_back(v); | 
| 260 |         } | 
| 261 |     } | 
| 262 |  | 
| 263 |     if (!dead.empty()) { | 
| 264 |         DEBUG_PRINTF("removing %zu vertices from %zu dead regions\n" , | 
| 265 |                      dead.size(), deadRegions.size()); | 
| 266 |         remove_vertices(dead, g); | 
| 267 |     } | 
| 268 | } | 
| 269 |  | 
| 270 | } // namespace ue2 | 
| 271 |  |