1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "rose_build_castle.h"
30
31#include "rose_build_impl.h"
32#include "ue2common.h"
33#include "nfa/castlecompile.h"
34#include "nfagraph/ng_holder.h"
35#include "nfagraph/ng_puff.h"
36#include "util/charreach.h"
37#include "util/compile_context.h"
38#include "util/container.h"
39#include "util/dump_charclass.h"
40#include "util/graph_range.h"
41#include "util/ue2string.h"
42
43#include <map>
44#include <set>
45#include <string>
46#include <vector>
47
48#include <boost/range/adaptor/map.hpp>
49
50using namespace std;
51using boost::adaptors::map_values;
52
53namespace ue2 {
54
55static
56void makeCastle(LeftEngInfo &left,
57 unordered_map<const NGHolder *, shared_ptr<CastleProto>> &cache) {
58 if (left.dfa || left.haig || left.castle) {
59 return;
60 }
61 if (!left.graph) {
62 return;
63 }
64
65 const NGHolder &h = *left.graph;
66 DEBUG_PRINTF("prefix %p\n", &h);
67
68 if (contains(cache, &h)) {
69 DEBUG_PRINTF("using cached CastleProto\n");
70 left.castle = cache[&h];
71 left.graph.reset();
72 return;
73 }
74
75 PureRepeat pr;
76 if (isPureRepeat(h, pr) && pr.reports.size() == 1) {
77 DEBUG_PRINTF("vertex preceded by infix repeat %s\n",
78 pr.bounds.str().c_str());
79 left.castle = make_shared<CastleProto>(h.kind, pr);
80 cache[&h] = left.castle;
81 left.graph.reset();
82 }
83}
84
85static
86void makeCastleSuffix(RoseBuildImpl &tbi, RoseVertex v,
87 unordered_map<const NGHolder *, shared_ptr<CastleProto>> &cache) {
88 RoseSuffixInfo &suffix = tbi.g[v].suffix;
89 if (!suffix.graph) {
90 return;
91 }
92 const NGHolder &h = *suffix.graph;
93 DEBUG_PRINTF("suffix %p\n", &h);
94
95 if (contains(cache, &h)) {
96 DEBUG_PRINTF("using cached CastleProto\n");
97 suffix.castle = cache[&h];
98 suffix.graph.reset();
99 return;
100 }
101
102 // The MPV will probably do a better job on the cases it's designed
103 // for.
104 const bool fixed_depth = tbi.g[v].min_offset == tbi.g[v].max_offset;
105 if (isPuffable(h, fixed_depth, tbi.rm, tbi.cc.grey)) {
106 DEBUG_PRINTF("leaving suffix for puff\n");
107 return;
108 }
109
110 PureRepeat pr;
111 if (isPureRepeat(h, pr) && pr.reports.size() == 1) {
112 DEBUG_PRINTF("suffix repeat %s\n", pr.bounds.str().c_str());
113
114 // Right now, the Castle uses much more stream state to represent a
115 // {m,1} repeat than just leaving it to an NFA.
116 if (pr.bounds.max <= depth(1)) {
117 DEBUG_PRINTF("leaving for other engines\n");
118 return;
119 }
120
121 suffix.castle = make_shared<CastleProto>(h.kind, pr);
122 cache[&h] = suffix.castle;
123 suffix.graph.reset();
124 }
125}
126
127static
128vector<rose_literal_id> literals_for_vertex(const RoseBuildImpl &tbi,
129 RoseVertex v) {
130 vector<rose_literal_id> rv;
131
132 for (const u32 id : tbi.g[v].literals) {
133 rv.push_back(tbi.literals.at(id));
134 }
135
136 return rv;
137}
138
139static
140void renovateCastle(RoseBuildImpl &tbi, CastleProto *castle,
141 const vector<RoseVertex> &verts) {
142 DEBUG_PRINTF("looking to renovate\n");
143
144 if (castle->repeats.size() != 1) {
145 assert(0); /* should not have merged castles yet */
146 return;
147 }
148
149 PureRepeat &pr = castle->repeats.begin()->second;
150 if (pr.bounds.max.is_finite()) {
151 /* repeat cannot be turned into pseudo .* */
152 return;
153 }
154
155 RoseGraph &g = tbi.g;
156 const CharReach &cr = castle->reach();
157
158 DEBUG_PRINTF("cr || %zu\n", cr.count());
159
160 u32 allowed_to_remove = ~0;
161 size_t min_succ_lit_len = 0;
162
163 for (RoseVertex v : verts) {
164 assert(g[v].left.castle.get() == castle);
165 DEBUG_PRINTF("%zu checks at lag %u\n", g[v].index, g[v].left.lag);
166 vector<rose_literal_id> lits = literals_for_vertex(tbi, v);
167 for (const auto &e : lits) {
168 DEBUG_PRINTF("%s +%u\n", dumpString(e.s).c_str(), e.delay);
169 if (e.delay) {
170 return; /* bail - TODO: be less lazy */
171 }
172
173 vector<CharReach> rem_local_cr;
174 u32 ok_count = 0;
175 for (auto it = e.s.end() - g[v].left.lag; it != e.s.end(); ++it) {
176 if (!isSubsetOf(*it, cr)) {
177 break;
178 }
179
180 ok_count++;
181 }
182 LIMIT_TO_AT_MOST(&allowed_to_remove, ok_count);
183 ENSURE_AT_LEAST(&min_succ_lit_len, e.elength());
184 }
185 }
186
187 DEBUG_PRINTF("possible to decrease lag by %u\n", allowed_to_remove);
188
189
190 for (RoseVertex v : verts) {
191 assert(g[v].left.lag >= allowed_to_remove);
192 g[v].left.lag -= allowed_to_remove;
193 }
194
195 assert(castle->repeats.size() == 1); /* should not have merged castles yet */
196
197 pr.bounds.max += allowed_to_remove;
198
199 /* Although it is always safe to increase the min bound as well, we would
200 * rather not as a >0 min bound means that we have to store state as well.
201 *
202 * As it was legal to run with the original lag, we know that it is not
203 * possible to have an overlapping match which finishes within the trigger
204 * literal past the original lag point. However, if there is already a min
205 * bound constraint this would be broken if we did not also increase the
206 * min bound. */
207
208 if (pr.bounds.min > 0ULL || allowed_to_remove > min_succ_lit_len) {
209 pr.bounds.min += allowed_to_remove;
210 }
211}
212
213void makeCastles(RoseBuildImpl &tbi) {
214 if (!tbi.cc.grey.allowCastle && !tbi.cc.grey.allowLbr) {
215 return;
216 }
217
218 RoseGraph &g = tbi.g;
219
220 // Caches so that we can reuse analysis on graphs we've seen already.
221 unordered_map<const NGHolder *, shared_ptr<CastleProto> > left_cache;
222 unordered_map<const NGHolder *, shared_ptr<CastleProto> > suffix_cache;
223
224 unordered_map<CastleProto *, vector<RoseVertex>> rev;
225
226 for (RoseVertex v : vertices_range(g)) {
227 if (g[v].left && !tbi.isRootSuccessor(v)) {
228 makeCastle(g[v].left, left_cache);
229 if (g[v].left.castle) {
230 rev[g[v].left.castle.get()].push_back(v);
231 }
232 }
233
234 if (g[v].suffix) {
235 makeCastleSuffix(tbi, v, suffix_cache);
236 }
237 }
238
239 for (const auto &e : rev) {
240 renovateCastle(tbi, e.first, e.second);
241 }
242}
243
244bool unmakeCastles(RoseBuildImpl &tbi) {
245 RoseGraph &g = tbi.g;
246
247 const size_t MAX_UNMAKE_VERTICES = 64;
248
249 map<left_id, vector<RoseVertex> > left_castles;
250 map<suffix_id, vector<RoseVertex> > suffix_castles;
251 bool changed = false;
252
253 for (auto v : vertices_range(g)) {
254 const LeftEngInfo &left = g[v].left;
255 if (left.castle && left.castle->repeats.size() > 1) {
256 left_castles[left].push_back(v);
257 }
258 const RoseSuffixInfo &suffix = g[v].suffix;
259 if (suffix.castle && suffix.castle->repeats.size() > 1) {
260 suffix_castles[suffix].push_back(v);
261 }
262 }
263
264 for (const auto &e : left_castles) {
265 assert(e.first.castle());
266 shared_ptr<NGHolder> h = makeHolder(*e.first.castle(), tbi.cc);
267 if (!h || num_vertices(*h) > MAX_UNMAKE_VERTICES) {
268 continue;
269 }
270 DEBUG_PRINTF("replace rose with holder (%zu vertices)\n",
271 num_vertices(*h));
272 for (auto v : e.second) {
273 assert(g[v].left.castle.get() == e.first.castle());
274 g[v].left.graph = h;
275 g[v].left.castle.reset();
276 changed = true;
277 }
278 }
279
280 for (const auto &e : suffix_castles) {
281 assert(e.first.castle());
282 shared_ptr<NGHolder> h = makeHolder(*e.first.castle(), tbi.cc);
283 if (!h || num_vertices(*h) > MAX_UNMAKE_VERTICES) {
284 continue;
285 }
286 DEBUG_PRINTF("replace suffix with holder (%zu vertices)\n",
287 num_vertices(*h));
288 for (auto v : e.second) {
289 assert(g[v].suffix.castle.get() == e.first.castle());
290 g[v].suffix.graph = h;
291 g[v].suffix.castle.reset();
292 changed = true;
293 }
294 }
295
296 return changed;
297}
298
299void remapCastleTops(RoseBuildImpl &tbi) {
300 unordered_map<CastleProto *, vector<RoseVertex>> rose_castles;
301 unordered_map<CastleProto *, vector<RoseVertex>> suffix_castles;
302
303 RoseGraph &g = tbi.g;
304 for (auto v : vertices_range(g)) {
305 if (g[v].left.castle) {
306 rose_castles[g[v].left.castle.get()].push_back(v);
307 }
308 if (g[v].suffix.castle) {
309 suffix_castles[g[v].suffix.castle.get()].push_back(v);
310 }
311 }
312
313 DEBUG_PRINTF("%zu rose castles, %zu suffix castles\n", rose_castles.size(),
314 suffix_castles.size());
315
316 map<u32, u32> top_map;
317
318 // Remap Rose Castles.
319 for (const auto &rc : rose_castles) {
320 CastleProto *c = rc.first;
321 const vector<RoseVertex> &verts = rc.second;
322
323 DEBUG_PRINTF("rose castle %p (%zu repeats) has %zu verts\n", c,
324 c->repeats.size(), verts.size());
325
326 top_map.clear();
327 remapCastleTops(*c, top_map);
328
329 // Update the tops on the edges leading into vertices in v.
330 for (auto v : verts) {
331 for (const auto &e : in_edges_range(v, g)) {
332 g[e].rose_top = top_map.at(g[e].rose_top);
333 }
334 }
335 }
336
337 // Remap Suffix Castles.
338 for (const auto &e : suffix_castles) {
339 CastleProto *c = e.first;
340 const vector<RoseVertex> &verts = e.second;
341
342 DEBUG_PRINTF("suffix castle %p (%zu repeats) has %zu verts\n", c,
343 c->repeats.size(), verts.size());
344
345 top_map.clear();
346 remapCastleTops(*c, top_map);
347
348 // Update the tops on the suffixes.
349 for (auto v : verts) {
350 assert(g[v].suffix);
351 g[v].suffix.top = top_map.at(g[v].suffix.top);
352 }
353 }
354}
355
356bool triggerKillsRoseCastle(const RoseBuildImpl &tbi, const left_id &left,
357 const set<ue2_literal> &all_lits,
358 const RoseEdge &e) {
359 assert(left.castle());
360 const CastleProto &c = *left.castle();
361
362 const depth max_width = findMaxWidth(c);
363 DEBUG_PRINTF("castle max width is %s\n", max_width.str().c_str());
364
365 /* check each pred literal to see if they all kill previous castle
366 * state */
367 for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) {
368 const rose_literal_id &pred_lit = tbi.literals.at(lit_id);
369 const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s);
370 const CharReach &cr = c.reach();
371
372 DEBUG_PRINTF("s=%s, castle reach=%s\n", dumpString(s).c_str(),
373 describeClass(cr).c_str());
374
375 for (const auto &s_cr : s) {
376 if (!overlaps(cr, s_cr)) {
377 DEBUG_PRINTF("reach %s kills castle\n",
378 describeClass(s_cr).c_str());
379 goto next_pred;
380 }
381 }
382
383 if (max_width < depth(s.length())) {
384 DEBUG_PRINTF("literal width >= castle max width\n");
385 goto next_pred;
386 }
387
388 return false;
389
390 next_pred:;
391 }
392
393 return true;
394}
395
396} // namespace ue2
397