1 | /* |
2 | * Copyright (c) 2016-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "shengcompile.h" |
30 | |
31 | #include "accel.h" |
32 | #include "accelcompile.h" |
33 | #include "shufticompile.h" |
34 | #include "trufflecompile.h" |
35 | #include "util/alloc.h" |
36 | #include "util/bitutils.h" |
37 | #include "util/charreach.h" |
38 | #include "util/compare.h" |
39 | #include "util/container.h" |
40 | #include "util/order_check.h" |
41 | #include "util/report_manager.h" |
42 | #include "util/unaligned.h" |
43 | |
44 | #include "grey.h" |
45 | #include "nfa_internal.h" |
46 | #include "sheng_internal.h" |
47 | #include "ue2common.h" |
48 | #include "util/compile_context.h" |
49 | #include "util/make_unique.h" |
50 | #include "util/verify_types.h" |
51 | #include "util/simd_types.h" |
52 | |
53 | #include <map> |
54 | #include <vector> |
55 | #include <sstream> |
56 | |
57 | #include <boost/range/adaptor/map.hpp> |
58 | |
59 | using namespace std; |
60 | using boost::adaptors::map_keys; |
61 | |
62 | namespace ue2 { |
63 | |
64 | #define ACCEL_DFA_MAX_OFFSET_DEPTH 4 |
65 | |
66 | /** Maximum tolerated number of escape character from an accel state. |
67 | * This is larger than nfa, as we don't have a budget and the nfa cheats on stop |
68 | * characters for sets of states */ |
69 | #define ACCEL_DFA_MAX_STOP_CHAR 160 |
70 | |
71 | /** Maximum tolerated number of escape character from a sds accel state. Larger |
72 | * than normal states as accelerating sds is important. Matches NFA value */ |
73 | #define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192 |
74 | |
75 | struct dfa_info { |
76 | accel_dfa_build_strat &strat; |
77 | raw_dfa &raw; |
78 | vector<dstate> &states; |
79 | dstate &floating; |
80 | dstate &anchored; |
81 | bool can_die; |
82 | |
83 | explicit dfa_info(accel_dfa_build_strat &s) |
84 | : strat(s), raw(strat.get_raw()), states(raw.states), |
85 | floating(states[raw.start_floating]), |
86 | anchored(states[raw.start_anchored]), can_die(dfaCanDie(raw)) {} |
87 | |
88 | // returns adjusted size |
89 | size_t size() const { |
90 | return can_die ? states.size() : states.size() - 1; |
91 | } |
92 | // expects adjusted index |
93 | dstate &operator[](dstate_id_t idx) { |
94 | return states[raw_id(idx)]; |
95 | } |
96 | dstate &top(dstate_id_t idx) { |
97 | if (isDead(idx)) { |
98 | return floating; |
99 | } |
100 | return next(idx, TOP); |
101 | } |
102 | dstate &next(dstate_id_t idx, u16 chr) { |
103 | auto &src = (*this)[idx]; |
104 | auto next_id = src.next[raw.alpha_remap[chr]]; |
105 | return states[next_id]; |
106 | } |
107 | // get original idx from adjusted idx |
108 | dstate_id_t raw_id(dstate_id_t idx) { |
109 | assert(idx < size()); |
110 | // if DFA can't die, shift all indices left by 1 |
111 | return can_die ? idx : idx + 1; |
112 | } |
113 | bool isDead(dstate &state) { |
114 | return raw_id(state.impl_id) == DEAD_STATE; |
115 | } |
116 | bool isDead(dstate_id_t idx) { |
117 | return raw_id(idx) == DEAD_STATE; |
118 | } |
119 | |
120 | private: |
121 | static bool dfaCanDie(raw_dfa &rdfa) { |
122 | for (unsigned chr = 0; chr < 256; chr++) { |
123 | for (dstate_id_t state = 0; state < rdfa.states.size(); state++) { |
124 | auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]]; |
125 | if (succ == DEAD_STATE) { |
126 | return true; |
127 | } |
128 | } |
129 | } |
130 | return false; |
131 | } |
132 | }; |
133 | |
134 | namespace { |
135 | |
136 | struct raw_report_list { |
137 | flat_set<ReportID> reports; |
138 | |
139 | raw_report_list(const flat_set<ReportID> &reports_in, |
140 | const ReportManager &rm, bool do_remap) { |
141 | if (do_remap) { |
142 | for (auto &id : reports_in) { |
143 | reports.insert(rm.getProgramOffset(id)); |
144 | } |
145 | } else { |
146 | reports = reports_in; |
147 | } |
148 | } |
149 | |
150 | bool operator<(const raw_report_list &b) const { |
151 | return reports < b.reports; |
152 | } |
153 | }; |
154 | |
155 | struct raw_report_info_impl : public raw_report_info { |
156 | vector<raw_report_list> rl; |
157 | u32 getReportListSize() const override; |
158 | size_t size() const override; |
159 | void fillReportLists(NFA *n, size_t base_offset, |
160 | std::vector<u32> &ro /* out */) const override; |
161 | }; |
162 | } |
163 | |
164 | u32 raw_report_info_impl::getReportListSize() const { |
165 | u32 rv = 0; |
166 | |
167 | for (const auto &reps : rl) { |
168 | rv += sizeof(report_list); |
169 | rv += sizeof(ReportID) * reps.reports.size(); |
170 | } |
171 | |
172 | return rv; |
173 | } |
174 | |
175 | size_t raw_report_info_impl::size() const { |
176 | return rl.size(); |
177 | } |
178 | |
179 | void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset, |
180 | vector<u32> &ro) const { |
181 | for (const auto &reps : rl) { |
182 | ro.push_back(base_offset); |
183 | |
184 | report_list *p = (report_list *)((char *)n + base_offset); |
185 | |
186 | u32 i = 0; |
187 | for (const ReportID report : reps.reports) { |
188 | p->report[i++] = report; |
189 | } |
190 | p->count = verify_u32(reps.reports.size()); |
191 | |
192 | base_offset += sizeof(report_list); |
193 | base_offset += sizeof(ReportID) * reps.reports.size(); |
194 | } |
195 | } |
196 | |
197 | unique_ptr<raw_report_info> sheng_build_strat::gatherReports( |
198 | vector<u32> &reports, |
199 | vector<u32> &reports_eod, |
200 | u8 *isSingleReport, |
201 | ReportID *arbReport) const { |
202 | DEBUG_PRINTF("gathering reports\n" ); |
203 | |
204 | const bool remap_reports = has_managed_reports(rdfa.kind); |
205 | |
206 | auto ri = ue2::make_unique<raw_report_info_impl>(); |
207 | map<raw_report_list, u32> rev; |
208 | |
209 | for (const dstate &s : rdfa.states) { |
210 | if (s.reports.empty()) { |
211 | reports.push_back(MO_INVALID_IDX); |
212 | continue; |
213 | } |
214 | |
215 | raw_report_list rrl(s.reports, rm, remap_reports); |
216 | DEBUG_PRINTF("non empty r\n" ); |
217 | if (rev.find(rrl) != rev.end()) { |
218 | reports.push_back(rev[rrl]); |
219 | } else { |
220 | DEBUG_PRINTF("adding to rl %zu\n" , ri->size()); |
221 | rev[rrl] = ri->size(); |
222 | reports.push_back(ri->size()); |
223 | ri->rl.push_back(rrl); |
224 | } |
225 | } |
226 | |
227 | for (const dstate &s : rdfa.states) { |
228 | if (s.reports_eod.empty()) { |
229 | reports_eod.push_back(MO_INVALID_IDX); |
230 | continue; |
231 | } |
232 | |
233 | DEBUG_PRINTF("non empty r eod\n" ); |
234 | raw_report_list rrl(s.reports_eod, rm, remap_reports); |
235 | if (rev.find(rrl) != rev.end()) { |
236 | reports_eod.push_back(rev[rrl]); |
237 | continue; |
238 | } |
239 | |
240 | DEBUG_PRINTF("adding to rl eod %zu\n" , s.reports_eod.size()); |
241 | rev[rrl] = ri->size(); |
242 | reports_eod.push_back(ri->size()); |
243 | ri->rl.push_back(rrl); |
244 | } |
245 | |
246 | assert(!ri->rl.empty()); /* all components should be able to generate |
247 | reports */ |
248 | if (!ri->rl.empty()) { |
249 | *arbReport = *ri->rl.begin()->reports.begin(); |
250 | } else { |
251 | *arbReport = 0; |
252 | } |
253 | |
254 | /* if we have only a single report id generated from all accepts (not eod) |
255 | * we can take some short cuts */ |
256 | set<ReportID> reps; |
257 | |
258 | for (u32 rl_index : reports) { |
259 | if (rl_index == MO_INVALID_IDX) { |
260 | continue; |
261 | } |
262 | assert(rl_index < ri->size()); |
263 | insert(&reps, ri->rl[rl_index].reports); |
264 | } |
265 | |
266 | if (reps.size() == 1) { |
267 | *isSingleReport = 1; |
268 | *arbReport = *reps.begin(); |
269 | DEBUG_PRINTF("single -- %u\n" , *arbReport); |
270 | } else { |
271 | *isSingleReport = 0; |
272 | } |
273 | |
274 | return ri; |
275 | } |
276 | |
277 | u32 sheng_build_strat::max_allowed_offset_accel() const { |
278 | return ACCEL_DFA_MAX_OFFSET_DEPTH; |
279 | } |
280 | |
281 | u32 sheng_build_strat::max_stop_char() const { |
282 | return ACCEL_DFA_MAX_STOP_CHAR; |
283 | } |
284 | |
285 | u32 sheng_build_strat::max_floating_stop_char() const { |
286 | return ACCEL_DFA_MAX_FLOATING_STOP_CHAR; |
287 | } |
288 | |
289 | size_t sheng_build_strat::accelSize() const { |
290 | return sizeof(AccelAux); |
291 | } |
292 | |
293 | #ifdef DEBUG |
294 | static really_inline |
295 | void dumpShuffleMask(const u8 chr, const u8 *buf, unsigned sz) { |
296 | stringstream o; |
297 | |
298 | for (unsigned i = 0; i < sz; i++) { |
299 | o.width(2); |
300 | o << (buf[i] & SHENG_STATE_MASK) << " " ; |
301 | } |
302 | DEBUG_PRINTF("chr %3u: %s\n" , chr, o.str().c_str()); |
303 | } |
304 | #endif |
305 | |
306 | static |
307 | void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info, |
308 | set<dstate_id_t> *accel_states) { |
309 | for (dstate_id_t i : accel_escape_info | map_keys) { |
310 | accel_states->insert(i); |
311 | } |
312 | } |
313 | |
314 | static |
315 | u8 getShengState(dstate &state, dfa_info &info, |
316 | map<dstate_id_t, AccelScheme> &accelInfo) { |
317 | u8 s = state.impl_id; |
318 | if (!state.reports.empty()) { |
319 | s |= SHENG_STATE_ACCEPT; |
320 | } |
321 | if (info.isDead(state)) { |
322 | s |= SHENG_STATE_DEAD; |
323 | } |
324 | if (accelInfo.find(info.raw_id(state.impl_id)) != accelInfo.end()) { |
325 | s |= SHENG_STATE_ACCEL; |
326 | } |
327 | return s; |
328 | } |
329 | |
330 | static |
331 | void fillAccelAux(struct NFA *n, dfa_info &info, |
332 | map<dstate_id_t, AccelScheme> &accelInfo) { |
333 | DEBUG_PRINTF("Filling accel aux structures\n" ); |
334 | sheng *s = (sheng *)getMutableImplNfa(n); |
335 | u32 offset = s->accel_offset; |
336 | |
337 | for (dstate_id_t i = 0; i < info.size(); i++) { |
338 | dstate_id_t state_id = info.raw_id(i); |
339 | if (accelInfo.find(state_id) != accelInfo.end()) { |
340 | s->flags |= SHENG_FLAG_HAS_ACCEL; |
341 | AccelAux *aux = (AccelAux *)((char *)n + offset); |
342 | info.strat.buildAccel(state_id, accelInfo[state_id], aux); |
343 | sstate_aux *saux = |
344 | (sstate_aux *)((char *)n + s->aux_offset) + state_id; |
345 | saux->accel = offset; |
346 | DEBUG_PRINTF("Accel offset: %u\n" , offset); |
347 | offset += ROUNDUP_N(sizeof(AccelAux), alignof(AccelAux)); |
348 | } |
349 | } |
350 | } |
351 | |
352 | static |
353 | void populateBasicInfo(struct NFA *n, dfa_info &info, |
354 | map<dstate_id_t, AccelScheme> &accelInfo, u32 aux_offset, |
355 | u32 report_offset, u32 accel_offset, u32 total_size, |
356 | u32 dfa_size) { |
357 | n->length = total_size; |
358 | n->scratchStateSize = 1; |
359 | n->streamStateSize = 1; |
360 | n->nPositions = info.size(); |
361 | n->type = SHENG_NFA; |
362 | n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0; |
363 | |
364 | sheng *s = (sheng *)getMutableImplNfa(n); |
365 | s->aux_offset = aux_offset; |
366 | s->report_offset = report_offset; |
367 | s->accel_offset = accel_offset; |
368 | s->n_states = info.size(); |
369 | s->length = dfa_size; |
370 | s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0; |
371 | |
372 | s->anchored = getShengState(info.anchored, info, accelInfo); |
373 | s->floating = getShengState(info.floating, info, accelInfo); |
374 | } |
375 | |
376 | static |
377 | void fillTops(NFA *n, dfa_info &info, dstate_id_t id, |
378 | map<dstate_id_t, AccelScheme> &accelInfo) { |
379 | sheng *s = (sheng *)getMutableImplNfa(n); |
380 | u32 aux_base = s->aux_offset; |
381 | |
382 | DEBUG_PRINTF("Filling tops for state %u\n" , id); |
383 | |
384 | sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id; |
385 | |
386 | DEBUG_PRINTF("Aux structure for state %u, offset %zd\n" , id, |
387 | (char *)aux - (char *)n); |
388 | |
389 | /* we could conceivably end up in an accept/dead state on a top event, |
390 | * so mark top as accept/dead state if it indeed is. |
391 | */ |
392 | auto &top_state = info.top(id); |
393 | |
394 | DEBUG_PRINTF("Top transition for state %u: %u\n" , id, top_state.impl_id); |
395 | |
396 | aux->top = getShengState(top_state, info, accelInfo); |
397 | } |
398 | |
399 | static |
400 | void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector<u32> &reports, |
401 | vector<u32> &reports_eod, vector<u32> &report_offsets) { |
402 | sheng *s = (sheng *)getMutableImplNfa(n); |
403 | u32 aux_base = s->aux_offset; |
404 | auto raw_id = info.raw_id(id); |
405 | |
406 | auto &state = info[id]; |
407 | |
408 | sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id; |
409 | |
410 | DEBUG_PRINTF("Filling aux and report structures for state %u\n" , id); |
411 | DEBUG_PRINTF("Aux structure for state %u, offset %zd\n" , id, |
412 | (char *)aux - (char *)n); |
413 | |
414 | aux->accept = state.reports.empty() ? 0 : report_offsets[reports[raw_id]]; |
415 | aux->accept_eod = |
416 | state.reports_eod.empty() ? 0 : report_offsets[reports_eod[raw_id]]; |
417 | |
418 | DEBUG_PRINTF("Report list offset: %u\n" , aux->accept); |
419 | DEBUG_PRINTF("EOD report list offset: %u\n" , aux->accept_eod); |
420 | } |
421 | |
422 | static |
423 | void fillSingleReport(NFA *n, ReportID r_id) { |
424 | sheng *s = (sheng *)getMutableImplNfa(n); |
425 | |
426 | DEBUG_PRINTF("Single report ID: %u\n" , r_id); |
427 | s->report = r_id; |
428 | s->flags |= SHENG_FLAG_SINGLE_REPORT; |
429 | } |
430 | |
431 | static |
432 | void createShuffleMasks(sheng *s, dfa_info &info, |
433 | map<dstate_id_t, AccelScheme> &accelInfo) { |
434 | for (u16 chr = 0; chr < 256; chr++) { |
435 | u8 buf[16] = {0}; |
436 | |
437 | for (dstate_id_t idx = 0; idx < info.size(); idx++) { |
438 | auto &succ_state = info.next(idx, chr); |
439 | |
440 | buf[idx] = getShengState(succ_state, info, accelInfo); |
441 | } |
442 | #ifdef DEBUG |
443 | dumpShuffleMask(chr, buf, sizeof(buf)); |
444 | #endif |
445 | memcpy(&s->shuffle_masks[chr], buf, sizeof(m128)); |
446 | } |
447 | } |
448 | |
449 | bool has_accel_sheng(const NFA *) { |
450 | return true; /* consider the sheng region as accelerated */ |
451 | } |
452 | |
453 | bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc, |
454 | const ReportManager &rm, bool only_accel_init, |
455 | set<dstate_id_t> *accel_states) { |
456 | if (!cc.grey.allowSheng) { |
457 | DEBUG_PRINTF("Sheng is not allowed!\n" ); |
458 | return nullptr; |
459 | } |
460 | |
461 | sheng_build_strat strat(raw, rm, only_accel_init); |
462 | dfa_info info(strat); |
463 | |
464 | DEBUG_PRINTF("Trying to compile a %zu state Sheng\n" , raw.states.size()); |
465 | |
466 | DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n" , |
467 | raw.start_anchored, raw.start_floating); |
468 | |
469 | DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n" , |
470 | info.can_die ? "can" : "cannot" , info.size()); |
471 | if (info.size() > 16) { |
472 | DEBUG_PRINTF("Too many states\n" ); |
473 | return nullptr; |
474 | } |
475 | |
476 | if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming |
477 | * mode with our semantics */ |
478 | raw.stripExtraEodReports(); |
479 | } |
480 | auto accelInfo = strat.getAccelInfo(cc.grey); |
481 | |
482 | // set impl_id of each dfa state |
483 | for (dstate_id_t i = 0; i < info.size(); i++) { |
484 | info[i].impl_id = i; |
485 | } |
486 | |
487 | DEBUG_PRINTF("Anchored start state: %u, floating start state: %u\n" , |
488 | info.anchored.impl_id, info.floating.impl_id); |
489 | |
490 | u32 nfa_size = ROUNDUP_16(sizeof(NFA) + sizeof(sheng)); |
491 | vector<u32> reports, eod_reports, report_offsets; |
492 | u8 isSingle = 0; |
493 | ReportID single_report = 0; |
494 | |
495 | auto ri = |
496 | strat.gatherReports(reports, eod_reports, &isSingle, &single_report); |
497 | |
498 | u32 total_aux = sizeof(sstate_aux) * info.size(); |
499 | u32 total_accel = strat.accelSize() * accelInfo.size(); |
500 | u32 total_reports = ri->getReportListSize(); |
501 | |
502 | u32 reports_offset = nfa_size + total_aux; |
503 | u32 accel_offset = |
504 | ROUNDUP_N(reports_offset + total_reports, alignof(AccelAux)); |
505 | u32 total_size = ROUNDUP_N(accel_offset + total_accel, 64); |
506 | |
507 | DEBUG_PRINTF("NFA: %u, aux: %u, reports: %u, accel: %u, total: %u\n" , |
508 | nfa_size, total_aux, total_reports, total_accel, total_size); |
509 | |
510 | auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); |
511 | |
512 | populateBasicInfo(nfa.get(), info, accelInfo, nfa_size, reports_offset, |
513 | accel_offset, total_size, total_size - sizeof(NFA)); |
514 | |
515 | DEBUG_PRINTF("Setting up aux and report structures\n" ); |
516 | |
517 | ri->fillReportLists(nfa.get(), reports_offset, report_offsets); |
518 | |
519 | for (dstate_id_t idx = 0; idx < info.size(); idx++) { |
520 | fillTops(nfa.get(), info, idx, accelInfo); |
521 | fillAux(nfa.get(), info, idx, reports, eod_reports, report_offsets); |
522 | } |
523 | if (isSingle) { |
524 | fillSingleReport(nfa.get(), single_report); |
525 | } |
526 | |
527 | fillAccelAux(nfa.get(), info, accelInfo); |
528 | |
529 | if (accel_states) { |
530 | fillAccelOut(accelInfo, accel_states); |
531 | } |
532 | |
533 | createShuffleMasks((sheng *)getMutableImplNfa(nfa.get()), info, accelInfo); |
534 | |
535 | return nfa; |
536 | } |
537 | |
538 | } // namespace ue2 |
539 | |