1/*
2 * Copyright (c) 2016-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "shengcompile.h"
30
31#include "accel.h"
32#include "accelcompile.h"
33#include "shufticompile.h"
34#include "trufflecompile.h"
35#include "util/alloc.h"
36#include "util/bitutils.h"
37#include "util/charreach.h"
38#include "util/compare.h"
39#include "util/container.h"
40#include "util/order_check.h"
41#include "util/report_manager.h"
42#include "util/unaligned.h"
43
44#include "grey.h"
45#include "nfa_internal.h"
46#include "sheng_internal.h"
47#include "ue2common.h"
48#include "util/compile_context.h"
49#include "util/make_unique.h"
50#include "util/verify_types.h"
51#include "util/simd_types.h"
52
53#include <map>
54#include <vector>
55#include <sstream>
56
57#include <boost/range/adaptor/map.hpp>
58
59using namespace std;
60using boost::adaptors::map_keys;
61
62namespace ue2 {
63
64#define ACCEL_DFA_MAX_OFFSET_DEPTH 4
65
66/** Maximum tolerated number of escape character from an accel state.
67 * This is larger than nfa, as we don't have a budget and the nfa cheats on stop
68 * characters for sets of states */
69#define ACCEL_DFA_MAX_STOP_CHAR 160
70
71/** Maximum tolerated number of escape character from a sds accel state. Larger
72 * than normal states as accelerating sds is important. Matches NFA value */
73#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192
74
75struct dfa_info {
76 accel_dfa_build_strat &strat;
77 raw_dfa &raw;
78 vector<dstate> &states;
79 dstate &floating;
80 dstate &anchored;
81 bool can_die;
82
83 explicit dfa_info(accel_dfa_build_strat &s)
84 : strat(s), raw(strat.get_raw()), states(raw.states),
85 floating(states[raw.start_floating]),
86 anchored(states[raw.start_anchored]), can_die(dfaCanDie(raw)) {}
87
88 // returns adjusted size
89 size_t size() const {
90 return can_die ? states.size() : states.size() - 1;
91 }
92 // expects adjusted index
93 dstate &operator[](dstate_id_t idx) {
94 return states[raw_id(idx)];
95 }
96 dstate &top(dstate_id_t idx) {
97 if (isDead(idx)) {
98 return floating;
99 }
100 return next(idx, TOP);
101 }
102 dstate &next(dstate_id_t idx, u16 chr) {
103 auto &src = (*this)[idx];
104 auto next_id = src.next[raw.alpha_remap[chr]];
105 return states[next_id];
106 }
107 // get original idx from adjusted idx
108 dstate_id_t raw_id(dstate_id_t idx) {
109 assert(idx < size());
110 // if DFA can't die, shift all indices left by 1
111 return can_die ? idx : idx + 1;
112 }
113 bool isDead(dstate &state) {
114 return raw_id(state.impl_id) == DEAD_STATE;
115 }
116 bool isDead(dstate_id_t idx) {
117 return raw_id(idx) == DEAD_STATE;
118 }
119
120private:
121 static bool dfaCanDie(raw_dfa &rdfa) {
122 for (unsigned chr = 0; chr < 256; chr++) {
123 for (dstate_id_t state = 0; state < rdfa.states.size(); state++) {
124 auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]];
125 if (succ == DEAD_STATE) {
126 return true;
127 }
128 }
129 }
130 return false;
131 }
132};
133
134namespace {
135
136struct raw_report_list {
137 flat_set<ReportID> reports;
138
139 raw_report_list(const flat_set<ReportID> &reports_in,
140 const ReportManager &rm, bool do_remap) {
141 if (do_remap) {
142 for (auto &id : reports_in) {
143 reports.insert(rm.getProgramOffset(id));
144 }
145 } else {
146 reports = reports_in;
147 }
148 }
149
150 bool operator<(const raw_report_list &b) const {
151 return reports < b.reports;
152 }
153};
154
155struct raw_report_info_impl : public raw_report_info {
156 vector<raw_report_list> rl;
157 u32 getReportListSize() const override;
158 size_t size() const override;
159 void fillReportLists(NFA *n, size_t base_offset,
160 std::vector<u32> &ro /* out */) const override;
161};
162}
163
164u32 raw_report_info_impl::getReportListSize() const {
165 u32 rv = 0;
166
167 for (const auto &reps : rl) {
168 rv += sizeof(report_list);
169 rv += sizeof(ReportID) * reps.reports.size();
170 }
171
172 return rv;
173}
174
175size_t raw_report_info_impl::size() const {
176 return rl.size();
177}
178
179void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset,
180 vector<u32> &ro) const {
181 for (const auto &reps : rl) {
182 ro.push_back(base_offset);
183
184 report_list *p = (report_list *)((char *)n + base_offset);
185
186 u32 i = 0;
187 for (const ReportID report : reps.reports) {
188 p->report[i++] = report;
189 }
190 p->count = verify_u32(reps.reports.size());
191
192 base_offset += sizeof(report_list);
193 base_offset += sizeof(ReportID) * reps.reports.size();
194 }
195}
196
197unique_ptr<raw_report_info> sheng_build_strat::gatherReports(
198 vector<u32> &reports,
199 vector<u32> &reports_eod,
200 u8 *isSingleReport,
201 ReportID *arbReport) const {
202 DEBUG_PRINTF("gathering reports\n");
203
204 const bool remap_reports = has_managed_reports(rdfa.kind);
205
206 auto ri = ue2::make_unique<raw_report_info_impl>();
207 map<raw_report_list, u32> rev;
208
209 for (const dstate &s : rdfa.states) {
210 if (s.reports.empty()) {
211 reports.push_back(MO_INVALID_IDX);
212 continue;
213 }
214
215 raw_report_list rrl(s.reports, rm, remap_reports);
216 DEBUG_PRINTF("non empty r\n");
217 if (rev.find(rrl) != rev.end()) {
218 reports.push_back(rev[rrl]);
219 } else {
220 DEBUG_PRINTF("adding to rl %zu\n", ri->size());
221 rev[rrl] = ri->size();
222 reports.push_back(ri->size());
223 ri->rl.push_back(rrl);
224 }
225 }
226
227 for (const dstate &s : rdfa.states) {
228 if (s.reports_eod.empty()) {
229 reports_eod.push_back(MO_INVALID_IDX);
230 continue;
231 }
232
233 DEBUG_PRINTF("non empty r eod\n");
234 raw_report_list rrl(s.reports_eod, rm, remap_reports);
235 if (rev.find(rrl) != rev.end()) {
236 reports_eod.push_back(rev[rrl]);
237 continue;
238 }
239
240 DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size());
241 rev[rrl] = ri->size();
242 reports_eod.push_back(ri->size());
243 ri->rl.push_back(rrl);
244 }
245
246 assert(!ri->rl.empty()); /* all components should be able to generate
247 reports */
248 if (!ri->rl.empty()) {
249 *arbReport = *ri->rl.begin()->reports.begin();
250 } else {
251 *arbReport = 0;
252 }
253
254 /* if we have only a single report id generated from all accepts (not eod)
255 * we can take some short cuts */
256 set<ReportID> reps;
257
258 for (u32 rl_index : reports) {
259 if (rl_index == MO_INVALID_IDX) {
260 continue;
261 }
262 assert(rl_index < ri->size());
263 insert(&reps, ri->rl[rl_index].reports);
264 }
265
266 if (reps.size() == 1) {
267 *isSingleReport = 1;
268 *arbReport = *reps.begin();
269 DEBUG_PRINTF("single -- %u\n", *arbReport);
270 } else {
271 *isSingleReport = 0;
272 }
273
274 return ri;
275}
276
277u32 sheng_build_strat::max_allowed_offset_accel() const {
278 return ACCEL_DFA_MAX_OFFSET_DEPTH;
279}
280
281u32 sheng_build_strat::max_stop_char() const {
282 return ACCEL_DFA_MAX_STOP_CHAR;
283}
284
285u32 sheng_build_strat::max_floating_stop_char() const {
286 return ACCEL_DFA_MAX_FLOATING_STOP_CHAR;
287}
288
289size_t sheng_build_strat::accelSize() const {
290 return sizeof(AccelAux);
291}
292
293#ifdef DEBUG
294static really_inline
295void dumpShuffleMask(const u8 chr, const u8 *buf, unsigned sz) {
296 stringstream o;
297
298 for (unsigned i = 0; i < sz; i++) {
299 o.width(2);
300 o << (buf[i] & SHENG_STATE_MASK) << " ";
301 }
302 DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str());
303}
304#endif
305
306static
307void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info,
308 set<dstate_id_t> *accel_states) {
309 for (dstate_id_t i : accel_escape_info | map_keys) {
310 accel_states->insert(i);
311 }
312}
313
314static
315u8 getShengState(dstate &state, dfa_info &info,
316 map<dstate_id_t, AccelScheme> &accelInfo) {
317 u8 s = state.impl_id;
318 if (!state.reports.empty()) {
319 s |= SHENG_STATE_ACCEPT;
320 }
321 if (info.isDead(state)) {
322 s |= SHENG_STATE_DEAD;
323 }
324 if (accelInfo.find(info.raw_id(state.impl_id)) != accelInfo.end()) {
325 s |= SHENG_STATE_ACCEL;
326 }
327 return s;
328}
329
330static
331void fillAccelAux(struct NFA *n, dfa_info &info,
332 map<dstate_id_t, AccelScheme> &accelInfo) {
333 DEBUG_PRINTF("Filling accel aux structures\n");
334 sheng *s = (sheng *)getMutableImplNfa(n);
335 u32 offset = s->accel_offset;
336
337 for (dstate_id_t i = 0; i < info.size(); i++) {
338 dstate_id_t state_id = info.raw_id(i);
339 if (accelInfo.find(state_id) != accelInfo.end()) {
340 s->flags |= SHENG_FLAG_HAS_ACCEL;
341 AccelAux *aux = (AccelAux *)((char *)n + offset);
342 info.strat.buildAccel(state_id, accelInfo[state_id], aux);
343 sstate_aux *saux =
344 (sstate_aux *)((char *)n + s->aux_offset) + state_id;
345 saux->accel = offset;
346 DEBUG_PRINTF("Accel offset: %u\n", offset);
347 offset += ROUNDUP_N(sizeof(AccelAux), alignof(AccelAux));
348 }
349 }
350}
351
352static
353void populateBasicInfo(struct NFA *n, dfa_info &info,
354 map<dstate_id_t, AccelScheme> &accelInfo, u32 aux_offset,
355 u32 report_offset, u32 accel_offset, u32 total_size,
356 u32 dfa_size) {
357 n->length = total_size;
358 n->scratchStateSize = 1;
359 n->streamStateSize = 1;
360 n->nPositions = info.size();
361 n->type = SHENG_NFA;
362 n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0;
363
364 sheng *s = (sheng *)getMutableImplNfa(n);
365 s->aux_offset = aux_offset;
366 s->report_offset = report_offset;
367 s->accel_offset = accel_offset;
368 s->n_states = info.size();
369 s->length = dfa_size;
370 s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0;
371
372 s->anchored = getShengState(info.anchored, info, accelInfo);
373 s->floating = getShengState(info.floating, info, accelInfo);
374}
375
376static
377void fillTops(NFA *n, dfa_info &info, dstate_id_t id,
378 map<dstate_id_t, AccelScheme> &accelInfo) {
379 sheng *s = (sheng *)getMutableImplNfa(n);
380 u32 aux_base = s->aux_offset;
381
382 DEBUG_PRINTF("Filling tops for state %u\n", id);
383
384 sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id;
385
386 DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id,
387 (char *)aux - (char *)n);
388
389 /* we could conceivably end up in an accept/dead state on a top event,
390 * so mark top as accept/dead state if it indeed is.
391 */
392 auto &top_state = info.top(id);
393
394 DEBUG_PRINTF("Top transition for state %u: %u\n", id, top_state.impl_id);
395
396 aux->top = getShengState(top_state, info, accelInfo);
397}
398
399static
400void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector<u32> &reports,
401 vector<u32> &reports_eod, vector<u32> &report_offsets) {
402 sheng *s = (sheng *)getMutableImplNfa(n);
403 u32 aux_base = s->aux_offset;
404 auto raw_id = info.raw_id(id);
405
406 auto &state = info[id];
407
408 sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id;
409
410 DEBUG_PRINTF("Filling aux and report structures for state %u\n", id);
411 DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id,
412 (char *)aux - (char *)n);
413
414 aux->accept = state.reports.empty() ? 0 : report_offsets[reports[raw_id]];
415 aux->accept_eod =
416 state.reports_eod.empty() ? 0 : report_offsets[reports_eod[raw_id]];
417
418 DEBUG_PRINTF("Report list offset: %u\n", aux->accept);
419 DEBUG_PRINTF("EOD report list offset: %u\n", aux->accept_eod);
420}
421
422static
423void fillSingleReport(NFA *n, ReportID r_id) {
424 sheng *s = (sheng *)getMutableImplNfa(n);
425
426 DEBUG_PRINTF("Single report ID: %u\n", r_id);
427 s->report = r_id;
428 s->flags |= SHENG_FLAG_SINGLE_REPORT;
429}
430
431static
432void createShuffleMasks(sheng *s, dfa_info &info,
433 map<dstate_id_t, AccelScheme> &accelInfo) {
434 for (u16 chr = 0; chr < 256; chr++) {
435 u8 buf[16] = {0};
436
437 for (dstate_id_t idx = 0; idx < info.size(); idx++) {
438 auto &succ_state = info.next(idx, chr);
439
440 buf[idx] = getShengState(succ_state, info, accelInfo);
441 }
442#ifdef DEBUG
443 dumpShuffleMask(chr, buf, sizeof(buf));
444#endif
445 memcpy(&s->shuffle_masks[chr], buf, sizeof(m128));
446 }
447}
448
449bool has_accel_sheng(const NFA *) {
450 return true; /* consider the sheng region as accelerated */
451}
452
453bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc,
454 const ReportManager &rm, bool only_accel_init,
455 set<dstate_id_t> *accel_states) {
456 if (!cc.grey.allowSheng) {
457 DEBUG_PRINTF("Sheng is not allowed!\n");
458 return nullptr;
459 }
460
461 sheng_build_strat strat(raw, rm, only_accel_init);
462 dfa_info info(strat);
463
464 DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size());
465
466 DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n",
467 raw.start_anchored, raw.start_floating);
468
469 DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n",
470 info.can_die ? "can" : "cannot", info.size());
471 if (info.size() > 16) {
472 DEBUG_PRINTF("Too many states\n");
473 return nullptr;
474 }
475
476 if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming
477 * mode with our semantics */
478 raw.stripExtraEodReports();
479 }
480 auto accelInfo = strat.getAccelInfo(cc.grey);
481
482 // set impl_id of each dfa state
483 for (dstate_id_t i = 0; i < info.size(); i++) {
484 info[i].impl_id = i;
485 }
486
487 DEBUG_PRINTF("Anchored start state: %u, floating start state: %u\n",
488 info.anchored.impl_id, info.floating.impl_id);
489
490 u32 nfa_size = ROUNDUP_16(sizeof(NFA) + sizeof(sheng));
491 vector<u32> reports, eod_reports, report_offsets;
492 u8 isSingle = 0;
493 ReportID single_report = 0;
494
495 auto ri =
496 strat.gatherReports(reports, eod_reports, &isSingle, &single_report);
497
498 u32 total_aux = sizeof(sstate_aux) * info.size();
499 u32 total_accel = strat.accelSize() * accelInfo.size();
500 u32 total_reports = ri->getReportListSize();
501
502 u32 reports_offset = nfa_size + total_aux;
503 u32 accel_offset =
504 ROUNDUP_N(reports_offset + total_reports, alignof(AccelAux));
505 u32 total_size = ROUNDUP_N(accel_offset + total_accel, 64);
506
507 DEBUG_PRINTF("NFA: %u, aux: %u, reports: %u, accel: %u, total: %u\n",
508 nfa_size, total_aux, total_reports, total_accel, total_size);
509
510 auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size);
511
512 populateBasicInfo(nfa.get(), info, accelInfo, nfa_size, reports_offset,
513 accel_offset, total_size, total_size - sizeof(NFA));
514
515 DEBUG_PRINTF("Setting up aux and report structures\n");
516
517 ri->fillReportLists(nfa.get(), reports_offset, report_offsets);
518
519 for (dstate_id_t idx = 0; idx < info.size(); idx++) {
520 fillTops(nfa.get(), info, idx, accelInfo);
521 fillAux(nfa.get(), info, idx, reports, eod_reports, report_offsets);
522 }
523 if (isSingle) {
524 fillSingleReport(nfa.get(), single_report);
525 }
526
527 fillAccelAux(nfa.get(), info, accelInfo);
528
529 if (accel_states) {
530 fillAccelOut(accelInfo, accel_states);
531 }
532
533 createShuffleMasks((sheng *)getMutableImplNfa(nfa.get()), info, accelInfo);
534
535 return nfa;
536}
537
538} // namespace ue2
539