1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Analysis for literals decorated by leading/trailing assertions or |
31 | * character classes. |
32 | */ |
33 | #include "ng_literal_decorated.h" |
34 | |
35 | #include "nfagraph/ng_holder.h" |
36 | #include "nfagraph/ng_util.h" |
37 | #include "rose/rose_build.h" |
38 | #include "rose/rose_in_graph.h" |
39 | #include "rose/rose_in_util.h" |
40 | #include "util/compile_context.h" |
41 | #include "util/dump_charclass.h" |
42 | #include "util/make_unique.h" |
43 | |
44 | #include <algorithm> |
45 | #include <memory> |
46 | #include <sstream> |
47 | |
48 | using namespace std; |
49 | |
50 | namespace ue2 { |
51 | |
52 | namespace { |
53 | |
54 | /** \brief Max fixed-width paths to generate from a graph. */ |
55 | static constexpr size_t MAX_PATHS = 10; |
56 | |
57 | /** \brief Max degree for any non-special vertex in the graph. */ |
58 | static constexpr size_t MAX_VERTEX_DEGREE = 6; |
59 | |
60 | using Path = vector<NFAVertex>; |
61 | |
62 | } // namespace |
63 | |
64 | static |
65 | bool findPaths(const NGHolder &g, vector<Path> &paths) { |
66 | vector<NFAVertex> order = getTopoOrdering(g); |
67 | |
68 | vector<size_t> read_count(num_vertices(g)); |
69 | vector<vector<Path>> built(num_vertices(g)); |
70 | |
71 | for (auto it = order.rbegin(); it != order.rend(); ++it) { |
72 | NFAVertex v = *it; |
73 | auto &out = built[g[v].index]; |
74 | assert(out.empty()); |
75 | |
76 | read_count[g[v].index] = out_degree(v, g); |
77 | |
78 | DEBUG_PRINTF("setting read_count to %zu for %zu\n" , |
79 | read_count[g[v].index], g[v].index); |
80 | |
81 | if (v == g.start || v == g.startDs) { |
82 | out.push_back({v}); |
83 | continue; |
84 | } |
85 | |
86 | // The paths to v are the paths to v's predecessors, with v added to |
87 | // the end of each. |
88 | for (auto u : inv_adjacent_vertices_range(v, g)) { |
89 | // We have a stylized connection from start -> startDs, but we |
90 | // don't need anchored and unanchored versions of the same path. |
91 | if (u == g.start && edge(g.startDs, v, g).second) { |
92 | continue; |
93 | } |
94 | |
95 | // Similarly, avoid the accept->acceptEod edge. |
96 | if (u == g.accept) { |
97 | assert(v == g.acceptEod); |
98 | continue; |
99 | } |
100 | |
101 | assert(!built[g[u].index].empty()); |
102 | assert(read_count[g[u].index]); |
103 | |
104 | for (const auto &p : built[g[u].index]) { |
105 | out.push_back(p); |
106 | out.back().push_back(v); |
107 | |
108 | if (out.size() > MAX_PATHS) { |
109 | // All these paths should eventually end up at a sink, so |
110 | // we've blown past our limit. |
111 | DEBUG_PRINTF("path limit exceeded\n" ); |
112 | return false; |
113 | } |
114 | } |
115 | |
116 | read_count[g[u].index]--; |
117 | if (!read_count[g[u].index]) { |
118 | DEBUG_PRINTF("clearing %zu as finished reading\n" , g[u].index); |
119 | built[g[u].index].clear(); |
120 | built[g[u].index].shrink_to_fit(); |
121 | } |
122 | } |
123 | } |
124 | |
125 | insert(&paths, paths.end(), built[NODE_ACCEPT]); |
126 | insert(&paths, paths.end(), built[NODE_ACCEPT_EOD]); |
127 | |
128 | DEBUG_PRINTF("%zu paths generated\n" , paths.size()); |
129 | |
130 | return paths.size() <= MAX_PATHS; |
131 | } |
132 | |
133 | static |
134 | bool hasLargeDegreeVertex(const NGHolder &g) { |
135 | for (const auto &v : vertices_range(g)) { |
136 | if (is_special(v, g)) { // specials can have large degree |
137 | continue; |
138 | } |
139 | if (degree(v, g) > MAX_VERTEX_DEGREE) { |
140 | DEBUG_PRINTF("vertex %zu has degree %zu\n" , g[v].index, |
141 | degree(v, g)); |
142 | return true; |
143 | } |
144 | } |
145 | return false; |
146 | } |
147 | |
148 | #if defined(DEBUG) || defined(DUMP_SUPPORT) |
149 | static UNUSED |
150 | string dumpPath(const NGHolder &g, const Path &path) { |
151 | ostringstream oss; |
152 | for (const auto &v : path) { |
153 | switch (g[v].index) { |
154 | case NODE_START: |
155 | oss << "<start>" ; |
156 | break; |
157 | case NODE_START_DOTSTAR: |
158 | oss << "<startDs>" ; |
159 | break; |
160 | case NODE_ACCEPT: |
161 | oss << "<accept>" ; |
162 | break; |
163 | case NODE_ACCEPT_EOD: |
164 | oss << "<acceptEod>" ; |
165 | break; |
166 | default: |
167 | oss << describeClass(g[v].char_reach); |
168 | break; |
169 | } |
170 | } |
171 | return oss.str(); |
172 | } |
173 | #endif |
174 | |
175 | struct PathMask { |
176 | PathMask(const NGHolder &g, const Path &path) |
177 | : is_anchored(path.front() == g.start), |
178 | is_eod(path.back() == g.acceptEod) { |
179 | assert(path.size() >= 2); |
180 | mask.reserve(path.size() - 2); |
181 | for (const auto &v : path) { |
182 | if (is_special(v, g)) { |
183 | continue; |
184 | } |
185 | mask.push_back(g[v].char_reach); |
186 | } |
187 | |
188 | // Reports are attached to the second-to-last vertex. |
189 | NFAVertex u = *std::next(path.rbegin()); |
190 | reports = g[u].reports; |
191 | assert(!reports.empty()); |
192 | } |
193 | |
194 | vector<CharReach> mask; |
195 | flat_set<ReportID> reports; |
196 | bool is_anchored; |
197 | bool is_eod; |
198 | }; |
199 | |
200 | bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g, |
201 | const CompileContext &cc) { |
202 | if (!cc.grey.allowDecoratedLiteral) { |
203 | return false; |
204 | } |
205 | |
206 | if (!isAcyclic(g)) { |
207 | DEBUG_PRINTF("not acyclic\n" ); |
208 | return false; |
209 | } |
210 | |
211 | if (!hasNarrowReachVertex(g)) { |
212 | DEBUG_PRINTF("no narrow reach vertices\n" ); |
213 | return false; |
214 | } |
215 | |
216 | if (hasLargeDegreeVertex(g)) { |
217 | DEBUG_PRINTF("large degree\n" ); |
218 | return false; |
219 | } |
220 | |
221 | vector<Path> paths; |
222 | if (!findPaths(g, paths)) { |
223 | DEBUG_PRINTF("couldn't split into a small number of paths\n" ); |
224 | return false; |
225 | } |
226 | |
227 | assert(!paths.empty()); |
228 | assert(paths.size() <= MAX_PATHS); |
229 | |
230 | vector<PathMask> masks; |
231 | masks.reserve(paths.size()); |
232 | |
233 | for (const auto &path : paths) { |
234 | DEBUG_PRINTF("path: %s\n" , dumpPath(g, path).c_str()); |
235 | PathMask pm(g, path); |
236 | if (!rose.validateMask(pm.mask, pm.reports, pm.is_anchored, |
237 | pm.is_eod)) { |
238 | DEBUG_PRINTF("failed validation\n" ); |
239 | return false; |
240 | } |
241 | masks.push_back(move(pm)); |
242 | } |
243 | |
244 | for (const auto &pm : masks) { |
245 | rose.addMask(pm.mask, pm.reports, pm.is_anchored, pm.is_eod); |
246 | } |
247 | |
248 | DEBUG_PRINTF("all ok, %zu masks added\n" , masks.size()); |
249 | return true; |
250 | } |
251 | |
252 | } // namespace ue2 |
253 | |