1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Analysis for literals decorated by leading/trailing assertions or
31 * character classes.
32 */
33#include "ng_literal_decorated.h"
34
35#include "nfagraph/ng_holder.h"
36#include "nfagraph/ng_util.h"
37#include "rose/rose_build.h"
38#include "rose/rose_in_graph.h"
39#include "rose/rose_in_util.h"
40#include "util/compile_context.h"
41#include "util/dump_charclass.h"
42#include "util/make_unique.h"
43
44#include <algorithm>
45#include <memory>
46#include <sstream>
47
48using namespace std;
49
50namespace ue2 {
51
52namespace {
53
54/** \brief Max fixed-width paths to generate from a graph. */
55static constexpr size_t MAX_PATHS = 10;
56
57/** \brief Max degree for any non-special vertex in the graph. */
58static constexpr size_t MAX_VERTEX_DEGREE = 6;
59
60using Path = vector<NFAVertex>;
61
62} // namespace
63
64static
65bool findPaths(const NGHolder &g, vector<Path> &paths) {
66 vector<NFAVertex> order = getTopoOrdering(g);
67
68 vector<size_t> read_count(num_vertices(g));
69 vector<vector<Path>> built(num_vertices(g));
70
71 for (auto it = order.rbegin(); it != order.rend(); ++it) {
72 NFAVertex v = *it;
73 auto &out = built[g[v].index];
74 assert(out.empty());
75
76 read_count[g[v].index] = out_degree(v, g);
77
78 DEBUG_PRINTF("setting read_count to %zu for %zu\n",
79 read_count[g[v].index], g[v].index);
80
81 if (v == g.start || v == g.startDs) {
82 out.push_back({v});
83 continue;
84 }
85
86 // The paths to v are the paths to v's predecessors, with v added to
87 // the end of each.
88 for (auto u : inv_adjacent_vertices_range(v, g)) {
89 // We have a stylized connection from start -> startDs, but we
90 // don't need anchored and unanchored versions of the same path.
91 if (u == g.start && edge(g.startDs, v, g).second) {
92 continue;
93 }
94
95 // Similarly, avoid the accept->acceptEod edge.
96 if (u == g.accept) {
97 assert(v == g.acceptEod);
98 continue;
99 }
100
101 assert(!built[g[u].index].empty());
102 assert(read_count[g[u].index]);
103
104 for (const auto &p : built[g[u].index]) {
105 out.push_back(p);
106 out.back().push_back(v);
107
108 if (out.size() > MAX_PATHS) {
109 // All these paths should eventually end up at a sink, so
110 // we've blown past our limit.
111 DEBUG_PRINTF("path limit exceeded\n");
112 return false;
113 }
114 }
115
116 read_count[g[u].index]--;
117 if (!read_count[g[u].index]) {
118 DEBUG_PRINTF("clearing %zu as finished reading\n", g[u].index);
119 built[g[u].index].clear();
120 built[g[u].index].shrink_to_fit();
121 }
122 }
123 }
124
125 insert(&paths, paths.end(), built[NODE_ACCEPT]);
126 insert(&paths, paths.end(), built[NODE_ACCEPT_EOD]);
127
128 DEBUG_PRINTF("%zu paths generated\n", paths.size());
129
130 return paths.size() <= MAX_PATHS;
131}
132
133static
134bool hasLargeDegreeVertex(const NGHolder &g) {
135 for (const auto &v : vertices_range(g)) {
136 if (is_special(v, g)) { // specials can have large degree
137 continue;
138 }
139 if (degree(v, g) > MAX_VERTEX_DEGREE) {
140 DEBUG_PRINTF("vertex %zu has degree %zu\n", g[v].index,
141 degree(v, g));
142 return true;
143 }
144 }
145 return false;
146}
147
148#if defined(DEBUG) || defined(DUMP_SUPPORT)
149static UNUSED
150string dumpPath(const NGHolder &g, const Path &path) {
151 ostringstream oss;
152 for (const auto &v : path) {
153 switch (g[v].index) {
154 case NODE_START:
155 oss << "<start>";
156 break;
157 case NODE_START_DOTSTAR:
158 oss << "<startDs>";
159 break;
160 case NODE_ACCEPT:
161 oss << "<accept>";
162 break;
163 case NODE_ACCEPT_EOD:
164 oss << "<acceptEod>";
165 break;
166 default:
167 oss << describeClass(g[v].char_reach);
168 break;
169 }
170 }
171 return oss.str();
172}
173#endif
174
175struct PathMask {
176 PathMask(const NGHolder &g, const Path &path)
177 : is_anchored(path.front() == g.start),
178 is_eod(path.back() == g.acceptEod) {
179 assert(path.size() >= 2);
180 mask.reserve(path.size() - 2);
181 for (const auto &v : path) {
182 if (is_special(v, g)) {
183 continue;
184 }
185 mask.push_back(g[v].char_reach);
186 }
187
188 // Reports are attached to the second-to-last vertex.
189 NFAVertex u = *std::next(path.rbegin());
190 reports = g[u].reports;
191 assert(!reports.empty());
192 }
193
194 vector<CharReach> mask;
195 flat_set<ReportID> reports;
196 bool is_anchored;
197 bool is_eod;
198};
199
200bool handleDecoratedLiterals(RoseBuild &rose, const NGHolder &g,
201 const CompileContext &cc) {
202 if (!cc.grey.allowDecoratedLiteral) {
203 return false;
204 }
205
206 if (!isAcyclic(g)) {
207 DEBUG_PRINTF("not acyclic\n");
208 return false;
209 }
210
211 if (!hasNarrowReachVertex(g)) {
212 DEBUG_PRINTF("no narrow reach vertices\n");
213 return false;
214 }
215
216 if (hasLargeDegreeVertex(g)) {
217 DEBUG_PRINTF("large degree\n");
218 return false;
219 }
220
221 vector<Path> paths;
222 if (!findPaths(g, paths)) {
223 DEBUG_PRINTF("couldn't split into a small number of paths\n");
224 return false;
225 }
226
227 assert(!paths.empty());
228 assert(paths.size() <= MAX_PATHS);
229
230 vector<PathMask> masks;
231 masks.reserve(paths.size());
232
233 for (const auto &path : paths) {
234 DEBUG_PRINTF("path: %s\n", dumpPath(g, path).c_str());
235 PathMask pm(g, path);
236 if (!rose.validateMask(pm.mask, pm.reports, pm.is_anchored,
237 pm.is_eod)) {
238 DEBUG_PRINTF("failed validation\n");
239 return false;
240 }
241 masks.push_back(move(pm));
242 }
243
244 for (const auto &pm : masks) {
245 rose.addMask(pm.mask, pm.reports, pm.is_anchored, pm.is_eod);
246 }
247
248 DEBUG_PRINTF("all ok, %zu masks added\n", masks.size());
249 return true;
250}
251
252} // namespace ue2
253