1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "accel_dfa_build_strat.h"
30
31#include "accel.h"
32#include "grey.h"
33#include "nfagraph/ng_limex_accel.h"
34#include "shufticompile.h"
35#include "trufflecompile.h"
36#include "util/accel_scheme.h"
37#include "util/charreach.h"
38#include "util/container.h"
39#include "util/dump_charclass.h"
40#include "util/small_vector.h"
41#include "util/verify_types.h"
42
43#include <sstream>
44#include <unordered_map>
45#include <unordered_set>
46#include <vector>
47
48#define PATHS_LIMIT 500
49
50using namespace std;
51
52namespace ue2 {
53
54namespace {
55struct path {
56 small_vector<CharReach, MAX_ACCEL_DEPTH + 1> reach;
57 dstate_id_t dest = DEAD_STATE;
58 explicit path(dstate_id_t base) : dest(base) {}
59};
60};
61
62template<typename Container>
63void dump_paths(const Container &paths) {
64 for (UNUSED const path &p : paths) {
65 DEBUG_PRINTF("[%s] -> %u\n", describeClasses(p.reach).c_str(), p.dest);
66 }
67 DEBUG_PRINTF("%zu paths\n", paths.size());
68}
69
70static
71vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) {
72 vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */
73
74 for (u32 i = 0; i < N_CHARS; i++) {
75 rv.at(rdfa.alpha_remap[i]).set(i);
76 }
77
78 return rv;
79}
80
81static
82bool is_useful_path(const vector<path> &good, const path &p) {
83 for (const auto &g : good) {
84 assert(g.dest == p.dest);
85 assert(g.reach.size() <= p.reach.size());
86 auto git = g.reach.rbegin();
87 auto pit = p.reach.rbegin();
88
89 for (; git != g.reach.rend(); ++git, ++pit) {
90 if (!pit->isSubsetOf(*git)) {
91 goto next;
92 }
93 }
94 DEBUG_PRINTF("better: [%s] -> %u\n", describeClasses(g.reach).c_str(),
95 g.dest);
96
97 return false;
98 next:;
99 }
100
101 return true;
102}
103
104static
105path append(const path &orig, const CharReach &cr, u32 new_dest) {
106 path p(new_dest);
107 p.reach = orig.reach;
108 p.reach.push_back(cr);
109
110 return p;
111}
112
113static
114void extend(const raw_dfa &rdfa, const vector<CharReach> &rev_map,
115 const path &p, unordered_map<u32, vector<path>> &all,
116 vector<path> &out) {
117 const dstate &s = rdfa.states[p.dest];
118
119 if (!p.reach.empty() && p.reach.back().none()) {
120 out.push_back(p);
121 return;
122 }
123
124 if (!s.reports.empty()) {
125 if (generates_callbacks(rdfa.kind)) {
126 out.push_back(p);
127 return;
128 } else {
129 path pp = append(p, CharReach(), p.dest);
130 all[p.dest].push_back(pp);
131 out.push_back(move(pp));
132 }
133 }
134
135 if (!s.reports_eod.empty()) {
136 path pp = append(p, CharReach(), p.dest);
137 all[p.dest].push_back(pp);
138 out.push_back(move(pp));
139 }
140
141 flat_map<u32, CharReach> dest;
142 for (u32 i = 0; i < rev_map.size(); i++) {
143 u32 succ = s.next[i];
144 dest[succ] |= rev_map[i];
145 }
146
147 for (const auto &e : dest) {
148 path pp = append(p, e.second, e.first);
149 if (!is_useful_path(all[e.first], pp)) {
150 DEBUG_PRINTF("not useful: [%s] -> %u\n",
151 describeClasses(pp.reach).c_str(), pp.dest);
152 continue;
153 }
154
155 DEBUG_PRINTF("----good: [%s] -> %u\n",
156 describeClasses(pp.reach).c_str(), pp.dest);
157 all[e.first].push_back(pp);
158 out.push_back(move(pp));
159 }
160}
161
162static
163vector<vector<CharReach>> generate_paths(const raw_dfa &rdfa,
164 dstate_id_t base, u32 len) {
165 const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa);
166 vector<path> paths{path(base)};
167 unordered_map<u32, vector<path>> all;
168 all[base].push_back(path(base));
169 for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) {
170 vector<path> next_gen;
171 for (const auto &p : paths) {
172 extend(rdfa, rev_map, p, all, next_gen);
173 }
174
175 paths = move(next_gen);
176 }
177
178 dump_paths(paths);
179
180 vector<vector<CharReach>> rv;
181 rv.reserve(paths.size());
182 for (auto &p : paths) {
183 rv.push_back(vector<CharReach>(std::make_move_iterator(p.reach.begin()),
184 std::make_move_iterator(p.reach.end())));
185 }
186 return rv;
187}
188
189static
190AccelScheme look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base,
191 u32 max_allowed_accel_offset) {
192 DEBUG_PRINTF("looking for accel for %hu\n", base);
193 vector<vector<CharReach>> paths =
194 generate_paths(rdfa, base, max_allowed_accel_offset + 1);
195 AccelScheme as = findBestAccelScheme(paths, CharReach(), true);
196 DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset);
197 return as;
198}
199
200static UNUSED
201bool better(const AccelScheme &a, const AccelScheme &b) {
202 if (!a.double_byte.empty() && b.double_byte.empty()) {
203 return true;
204 }
205
206 if (!b.double_byte.empty()) {
207 return false;
208 }
209
210 return a.cr.count() < b.cr.count();
211}
212
213static
214bool double_byte_ok(const AccelScheme &info) {
215 return !info.double_byte.empty() &&
216 info.double_cr.count() < info.double_byte.size() &&
217 info.double_cr.count() <= 2 && !info.double_byte.empty();
218}
219
220static
221bool has_self_loop(dstate_id_t s, const raw_dfa &raw) {
222 u16 top_remap = raw.alpha_remap[TOP];
223 for (u32 i = 0; i < raw.states[s].next.size(); i++) {
224 if (i != top_remap && raw.states[s].next[i] == s) {
225 return true;
226 }
227 }
228 return false;
229}
230
231static
232flat_set<u16> find_nonexit_symbols(const raw_dfa &rdfa,
233 const CharReach &escape) {
234 flat_set<u16> rv;
235 CharReach nonexit = ~escape;
236 for (auto i = nonexit.find_first(); i != nonexit.npos;
237 i = nonexit.find_next(i)) {
238 rv.insert(rdfa.alpha_remap[i]);
239 }
240
241 return rv;
242}
243
244static
245dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
246 if (raw.start_floating != DEAD_STATE) {
247 DEBUG_PRINTF("has floating start\n");
248 return raw.start_floating;
249 }
250
251 DEBUG_PRINTF("looking for SDS proxy\n");
252
253 dstate_id_t s = raw.start_anchored;
254
255 if (has_self_loop(s, raw)) {
256 return s;
257 }
258
259 u16 top_remap = raw.alpha_remap[TOP];
260
261 std::unordered_set<dstate_id_t> seen;
262 while (true) {
263 seen.insert(s);
264 DEBUG_PRINTF("basis %hu\n", s);
265
266 /* check if we are connected to a state with a self loop */
267 for (u32 i = 0; i < raw.states[s].next.size(); i++) {
268 dstate_id_t t = raw.states[s].next[i];
269 if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) {
270 return t;
271 }
272 }
273
274 /* find a neighbour to use as a basis for looking for the sds proxy */
275 dstate_id_t t = DEAD_STATE;
276 for (u32 i = 0; i < raw.states[s].next.size(); i++) {
277 dstate_id_t tt = raw.states[s].next[i];
278 if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) {
279 t = tt;
280 break;
281 }
282 }
283
284 if (t == DEAD_STATE) {
285 /* we were unable to find a state to use as a SDS proxy */
286 return DEAD_STATE;
287 }
288
289 s = t;
290 }
291}
292
293static
294set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base,
295 const AccelScheme &ei) {
296 DEBUG_PRINTF("looking for region around %hu\n", base);
297
298 set<dstate_id_t> region = {base};
299
300 if (!ei.double_byte.empty()) {
301 return region;
302 }
303
304 DEBUG_PRINTF("accel %s+%u\n", describeClass(ei.cr).c_str(), ei.offset);
305
306 const CharReach &escape = ei.cr;
307 auto nonexit_symbols = find_nonexit_symbols(rdfa, escape);
308
309 vector<dstate_id_t> pending = {base};
310 while (!pending.empty()) {
311 dstate_id_t curr = pending.back();
312 pending.pop_back();
313 for (auto s : nonexit_symbols) {
314 dstate_id_t t = rdfa.states[curr].next[s];
315 if (contains(region, t)) {
316 continue;
317 }
318
319 DEBUG_PRINTF(" %hu is in region\n", t);
320 region.insert(t);
321 pending.push_back(t);
322 }
323 }
324
325 return region;
326}
327
328AccelScheme
329accel_dfa_build_strat::find_escape_strings(dstate_id_t this_idx) const {
330 AccelScheme rv;
331 const raw_dfa &rdfa = get_raw();
332 rv.cr.clear();
333 rv.offset = 0;
334 const dstate &raw = rdfa.states[this_idx];
335 const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa);
336 bool outs2_broken = false;
337 flat_map<dstate_id_t, CharReach> succs;
338
339 for (u32 i = 0; i < rev_map.size(); i++) {
340 if (raw.next[i] == this_idx) {
341 continue;
342 }
343
344 const CharReach &cr_i = rev_map.at(i);
345
346 rv.cr |= cr_i;
347 dstate_id_t next_id = raw.next[i];
348
349 DEBUG_PRINTF("next is %hu\n", next_id);
350 const dstate &raw_next = rdfa.states[next_id];
351
352 if (outs2_broken) {
353 continue;
354 }
355
356 if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) {
357 DEBUG_PRINTF("leads to report\n");
358 outs2_broken = true; /* cannot accelerate over reports */
359 continue;
360 }
361 succs[next_id] |= cr_i;
362 }
363
364 if (!outs2_broken) {
365 for (const auto &e : succs) {
366 const CharReach &cr_i = e.second;
367 const dstate &raw_next = rdfa.states[e.first];
368
369 CharReach cr_all_j;
370 for (u32 j = 0; j < rev_map.size(); j++) {
371 if (raw_next.next[j] == raw.next[j]) {
372 continue;
373 }
374
375 DEBUG_PRINTF("state %hu: adding sym %u -> %hu to 2 \n", e.first,
376 j, raw_next.next[j]);
377 cr_all_j |= rev_map.at(j);
378 }
379
380 if (cr_i.count() * cr_all_j.count() > 8) {
381 DEBUG_PRINTF("adding %zu to double_cr\n", cr_i.count());
382 rv.double_cr |= cr_i;
383 } else {
384 for (auto ii = cr_i.find_first(); ii != CharReach::npos;
385 ii = cr_i.find_next(ii)) {
386 for (auto jj = cr_all_j.find_first(); jj != CharReach::npos;
387 jj = cr_all_j.find_next(jj)) {
388 rv.double_byte.emplace((u8)ii, (u8)jj);
389 if (rv.double_byte.size() > 8) {
390 DEBUG_PRINTF("outs2 too big\n");
391 outs2_broken = true;
392 goto done;
393 }
394 }
395 }
396 }
397 }
398
399 done:
400 assert(outs2_broken || rv.double_byte.size() <= 8);
401 if (outs2_broken) {
402 rv.double_byte.clear();
403 }
404 }
405
406 DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa));
407 DEBUG_PRINTF("broken %d\n", outs2_broken);
408 if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) &&
409 this_idx == rdfa.start_floating && this_idx != DEAD_STATE) {
410 DEBUG_PRINTF("looking for offset accel at %u\n", this_idx);
411 auto offset =
412 look_for_offset_accel(rdfa, this_idx, max_allowed_offset_accel());
413 DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(), rv.cr.count());
414 if (double_byte_ok(offset) || offset.cr.count() < rv.cr.count()) {
415 DEBUG_PRINTF("using offset accel\n");
416 rv = offset;
417 }
418 }
419
420 return rv;
421}
422
423void
424accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
425 const AccelScheme &info,
426 void *accel_out) {
427 AccelAux *accel = (AccelAux *)accel_out;
428
429 DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset,
430 info.double_offset);
431 accel->generic.offset = verify_u8(info.offset);
432
433 if (double_byte_ok(info) && info.double_cr.none() &&
434 info.double_byte.size() == 1) {
435 accel->accel_type = ACCEL_DVERM;
436 accel->dverm.c1 = info.double_byte.begin()->first;
437 accel->dverm.c2 = info.double_byte.begin()->second;
438 accel->dverm.offset = verify_u8(info.double_offset);
439 DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx);
440 return;
441 }
442
443 if (double_byte_ok(info) && info.double_cr.none() &&
444 (info.double_byte.size() == 2 || info.double_byte.size() == 4)) {
445 bool ok = true;
446
447 assert(!info.double_byte.empty());
448 u8 firstC = info.double_byte.begin()->first & CASE_CLEAR;
449 u8 secondC = info.double_byte.begin()->second & CASE_CLEAR;
450
451 for (const pair<u8, u8> &p : info.double_byte) {
452 if ((p.first & CASE_CLEAR) != firstC ||
453 (p.second & CASE_CLEAR) != secondC) {
454 ok = false;
455 break;
456 }
457 }
458
459 if (ok) {
460 accel->accel_type = ACCEL_DVERM_NOCASE;
461 accel->dverm.c1 = firstC;
462 accel->dverm.c2 = secondC;
463 accel->dverm.offset = verify_u8(info.double_offset);
464 DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx);
465 return;
466 }
467
468 u8 m1;
469 u8 m2;
470 if (buildDvermMask(info.double_byte, &m1, &m2)) {
471 accel->accel_type = ACCEL_DVERM_MASKED;
472 accel->dverm.offset = verify_u8(info.double_offset);
473 accel->dverm.c1 = info.double_byte.begin()->first & m1;
474 accel->dverm.c2 = info.double_byte.begin()->second & m2;
475 accel->dverm.m1 = m1;
476 accel->dverm.m2 = m2;
477 DEBUG_PRINTF(
478 "building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
479 accel->dverm.c1, accel->dverm.c2);
480 return;
481 }
482 }
483
484 if (double_byte_ok(info) &&
485 shuftiBuildDoubleMasks(
486 info.double_cr, info.double_byte, (u8 *)&accel->dshufti.lo1,
487 (u8 *)&accel->dshufti.hi1, (u8 *)&accel->dshufti.lo2,
488 (u8 *)&accel->dshufti.hi2)) {
489 accel->accel_type = ACCEL_DSHUFTI;
490 accel->dshufti.offset = verify_u8(info.double_offset);
491 DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
492 return;
493 }
494
495 if (info.cr.none()) {
496 accel->accel_type = ACCEL_RED_TAPE;
497 DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape"
498 " from which there is no escape\n",
499 this_idx);
500 return;
501 }
502
503 if (info.cr.count() == 1) {
504 accel->accel_type = ACCEL_VERM;
505 accel->verm.c = info.cr.find_first();
506 DEBUG_PRINTF("state %hu is vermicelli\n", this_idx);
507 return;
508 }
509
510 if (info.cr.count() == 2 && info.cr.isCaselessChar()) {
511 accel->accel_type = ACCEL_VERM_NOCASE;
512 accel->verm.c = info.cr.find_first() & CASE_CLEAR;
513 DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx);
514 return;
515 }
516
517 if (info.cr.count() > max_floating_stop_char()) {
518 accel->accel_type = ACCEL_NONE;
519 DEBUG_PRINTF("state %hu is too broad\n", this_idx);
520 return;
521 }
522
523 accel->accel_type = ACCEL_SHUFTI;
524 if (-1 != shuftiBuildMasks(info.cr, (u8 *)&accel->shufti.lo,
525 (u8 *)&accel->shufti.hi)) {
526 DEBUG_PRINTF("state %hu is shufti\n", this_idx);
527 return;
528 }
529
530 assert(!info.cr.none());
531 accel->accel_type = ACCEL_TRUFFLE;
532 truffleBuildMasks(info.cr, (u8 *)&accel->truffle.mask1,
533 (u8 *)&accel->truffle.mask2);
534 DEBUG_PRINTF("state %hu is truffle\n", this_idx);
535}
536
537map<dstate_id_t, AccelScheme>
538accel_dfa_build_strat::getAccelInfo(const Grey &grey) {
539 map<dstate_id_t, AccelScheme> rv;
540 raw_dfa &rdfa = get_raw();
541 if (!grey.accelerateDFA) {
542 return rv;
543 }
544
545 dstate_id_t sds_proxy = get_sds_or_proxy(rdfa);
546 DEBUG_PRINTF("sds %hu\n", sds_proxy);
547
548 /* Find accel info for a single state. */
549 auto do_state = [&](size_t i) {
550 if (i == DEAD_STATE) {
551 return;
552 }
553
554 /* Note on report acceleration states: While we can't accelerate while
555 * we are spamming out callbacks, the QR code paths don't raise reports
556 * during scanning so they can accelerate report states. */
557 if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) {
558 return;
559 }
560
561 size_t single_limit =
562 i == sds_proxy ? max_floating_stop_char() : max_stop_char();
563 DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit);
564
565 AccelScheme ei = find_escape_strings(i);
566 if (ei.cr.count() > single_limit) {
567 DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i,
568 ei.cr.count());
569 return;
570 }
571
572 DEBUG_PRINTF("state %zu should be accelerable %zu\n", i, ei.cr.count());
573
574 rv[i] = ei;
575 };
576
577 if (only_accel_init) {
578 DEBUG_PRINTF("only computing accel for init states\n");
579 do_state(rdfa.start_anchored);
580 if (rdfa.start_floating != rdfa.start_anchored) {
581 do_state(rdfa.start_floating);
582 }
583 } else {
584 DEBUG_PRINTF("computing accel for all states\n");
585 for (size_t i = 0; i < rdfa.states.size(); i++) {
586 do_state(i);
587 }
588 }
589
590 /* provide acceleration states to states in the region of sds */
591 if (contains(rv, sds_proxy)) {
592 AccelScheme sds_ei = rv[sds_proxy];
593 sds_ei.double_byte.clear(); /* region based on single byte scheme
594 * may differ from double byte */
595 DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n",
596 sds_ei.cr.count());
597 auto sds_region = find_region(rdfa, sds_proxy, sds_ei);
598 for (auto s : sds_region) {
599 if (!contains(rv, s) || better(sds_ei, rv[s])) {
600 rv[s] = sds_ei;
601 }
602 }
603 }
604
605 return rv;
606}
607};
608