1 | /* |
2 | * Copyright (c) 2015-2018, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "mcclellancompile.h" |
30 | |
31 | #include "accel.h" |
32 | #include "accelcompile.h" |
33 | #include "grey.h" |
34 | #include "mcclellan_internal.h" |
35 | #include "mcclellancompile_util.h" |
36 | #include "nfa_internal.h" |
37 | #include "shufticompile.h" |
38 | #include "trufflecompile.h" |
39 | #include "ue2common.h" |
40 | #include "util/alloc.h" |
41 | #include "util/bitutils.h" |
42 | #include "util/charreach.h" |
43 | #include "util/compare.h" |
44 | #include "util/compile_context.h" |
45 | #include "util/container.h" |
46 | #include "util/make_unique.h" |
47 | #include "util/order_check.h" |
48 | #include "util/report_manager.h" |
49 | #include "util/flat_containers.h" |
50 | #include "util/unaligned.h" |
51 | #include "util/verify_types.h" |
52 | |
53 | #include <algorithm> |
54 | #include <cstdio> |
55 | #include <cstdlib> |
56 | #include <cstring> |
57 | #include <map> |
58 | #include <memory> |
59 | #include <queue> |
60 | #include <set> |
61 | #include <vector> |
62 | |
63 | #include <boost/range/adaptor/map.hpp> |
64 | |
65 | #include "mcclellandump.h" |
66 | #include "util/dump_util.h" |
67 | #include "util/dump_charclass.h" |
68 | |
69 | using namespace std; |
70 | using boost::adaptors::map_keys; |
71 | using boost::dynamic_bitset; |
72 | |
73 | #define ACCEL_DFA_MAX_OFFSET_DEPTH 4 |
74 | |
75 | /** Maximum tolerated number of escape character from an accel state. |
76 | * This is larger than nfa, as we don't have a budget and the nfa cheats on stop |
77 | * characters for sets of states */ |
78 | #define ACCEL_DFA_MAX_STOP_CHAR 160 |
79 | |
80 | /** Maximum tolerated number of escape character from a sds accel state. Larger |
81 | * than normal states as accelerating sds is important. Matches NFA value */ |
82 | #define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192 |
83 | |
84 | namespace ue2 { |
85 | |
86 | namespace /* anon */ { |
87 | |
88 | struct { |
89 | u16 = 0; |
90 | bool = false; |
91 | bool = false; |
92 | bool = false; |
93 | }; |
94 | |
95 | struct dfa_info { |
96 | accel_dfa_build_strat &strat; |
97 | raw_dfa &raw; |
98 | vector<dstate> &states; |
99 | vector<dstate_extra> ; |
100 | vector<vector<dstate_id_t>> wide_state_chain; |
101 | vector<vector<symbol_t>> wide_symbol_chain; |
102 | const u16 alpha_size; /* including special symbols */ |
103 | const array<u16, ALPHABET_SIZE> &alpha_remap; |
104 | const u16 impl_alpha_size; |
105 | |
106 | u8 getAlphaShift() const; |
107 | |
108 | explicit dfa_info(accel_dfa_build_strat &s) |
109 | : strat(s), |
110 | raw(s.get_raw()), |
111 | states(raw.states), |
112 | extra(raw.states.size()), |
113 | alpha_size(raw.alpha_size), |
114 | alpha_remap(raw.alpha_remap), |
115 | impl_alpha_size(raw.getImplAlphaSize()) {} |
116 | |
117 | dstate_id_t implId(dstate_id_t raw_id) const { |
118 | return states[raw_id].impl_id; |
119 | } |
120 | |
121 | bool is_sherman(dstate_id_t raw_id) const { |
122 | return extra[raw_id].shermanState; |
123 | } |
124 | |
125 | bool is_widestate(dstate_id_t raw_id) const { |
126 | return extra[raw_id].wideState; |
127 | } |
128 | |
129 | bool is_widehead(dstate_id_t raw_id) const { |
130 | return extra[raw_id].wideHead; |
131 | } |
132 | |
133 | size_t size(void) const { return states.size(); } |
134 | }; |
135 | |
136 | u8 dfa_info::getAlphaShift() const { |
137 | if (impl_alpha_size < 2) { |
138 | return 1; |
139 | } else { |
140 | /* log2 round up */ |
141 | return 32 - clz32(impl_alpha_size - 1); |
142 | } |
143 | } |
144 | |
145 | struct state_prev_info { |
146 | vector<vector<dstate_id_t>> prev_vec; |
147 | explicit state_prev_info(size_t alpha_size) : prev_vec(alpha_size) {} |
148 | }; |
149 | |
150 | struct DfaPrevInfo { |
151 | u16 impl_alpha_size; |
152 | u16 state_num; |
153 | vector<state_prev_info> states; |
154 | set<dstate_id_t> accepts; |
155 | |
156 | explicit DfaPrevInfo(raw_dfa &rdfa); |
157 | }; |
158 | |
159 | DfaPrevInfo::DfaPrevInfo(raw_dfa &rdfa) |
160 | : impl_alpha_size(rdfa.getImplAlphaSize()), state_num(rdfa.states.size()), |
161 | states(state_num, state_prev_info(impl_alpha_size)){ |
162 | for (size_t i = 0; i < states.size(); i++) { |
163 | for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { |
164 | dstate_id_t curr = rdfa.states[i].next[sym]; |
165 | states[curr].prev_vec[sym].push_back(i); |
166 | } |
167 | if (!rdfa.states[i].reports.empty() |
168 | || !rdfa.states[i].reports_eod.empty()) { |
169 | DEBUG_PRINTF("accept raw state: %ld\n" , i); |
170 | accepts.insert(i); |
171 | } |
172 | } |
173 | } |
174 | } // namespace |
175 | |
176 | static |
177 | mstate_aux *getAux(NFA *n, dstate_id_t i) { |
178 | assert(isMcClellanType(n->type)); |
179 | |
180 | mcclellan *m = (mcclellan *)getMutableImplNfa(n); |
181 | mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); |
182 | |
183 | mstate_aux *aux = aux_base + i; |
184 | assert((const char *)aux < (const char *)n + m->length); |
185 | return aux; |
186 | } |
187 | |
188 | static |
189 | void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { |
190 | assert((size_t)succ_table % 2 == 0); |
191 | assert(n->type == MCCLELLAN_NFA_16); |
192 | u8 alphaShift = info.getAlphaShift(); |
193 | u16 alphaSize = info.impl_alpha_size; |
194 | mcclellan *m = (mcclellan *)getMutableImplNfa(n); |
195 | |
196 | /* handle the normal states */ |
197 | for (u32 i = 0; i < m->sherman_limit; i++) { |
198 | for (size_t j = 0; j < alphaSize; j++) { |
199 | size_t c_prime = (i << alphaShift) + j; |
200 | |
201 | // wide state has no aux structure. |
202 | if (m->has_wide && succ_table[c_prime] >= m->wide_limit) { |
203 | continue; |
204 | } |
205 | |
206 | mstate_aux *aux = getAux(n, succ_table[c_prime]); |
207 | |
208 | if (aux->accept) { |
209 | succ_table[c_prime] |= ACCEPT_FLAG; |
210 | } |
211 | |
212 | if (aux->accel_offset) { |
213 | succ_table[c_prime] |= ACCEL_FLAG; |
214 | } |
215 | } |
216 | } |
217 | |
218 | /* handle the sherman states */ |
219 | char *sherman_base_offset = (char *)n + m->sherman_offset; |
220 | u16 sherman_ceil = m->has_wide == 1 ? m->wide_limit : m->state_count; |
221 | for (u16 j = m->sherman_limit; j < sherman_ceil; j++) { |
222 | char *sherman_cur |
223 | = findMutableShermanState(sherman_base_offset, m->sherman_limit, j); |
224 | assert(*(sherman_cur + SHERMAN_TYPE_OFFSET) == SHERMAN_STATE); |
225 | u8 len = *(u8 *)(sherman_cur + SHERMAN_LEN_OFFSET); |
226 | u16 *succs = (u16 *)(sherman_cur + SHERMAN_STATES_OFFSET(len)); |
227 | |
228 | for (u8 i = 0; i < len; i++) { |
229 | u16 succ_i = unaligned_load_u16((u8 *)&succs[i]); |
230 | // wide state has no aux structure. |
231 | if (m->has_wide && succ_i >= m->wide_limit) { |
232 | continue; |
233 | } |
234 | |
235 | mstate_aux *aux = getAux(n, succ_i); |
236 | |
237 | if (aux->accept) { |
238 | succ_i |= ACCEPT_FLAG; |
239 | } |
240 | |
241 | if (aux->accel_offset) { |
242 | succ_i |= ACCEL_FLAG; |
243 | } |
244 | |
245 | unaligned_store_u16((u8 *)&succs[i], succ_i); |
246 | } |
247 | } |
248 | |
249 | /* handle the wide states */ |
250 | if (m->has_wide) { |
251 | u32 wide_limit = m->wide_limit; |
252 | char *wide_base = (char *)n + m->wide_offset; |
253 | assert(*wide_base == WIDE_STATE); |
254 | u16 wide_number = verify_u16(info.wide_symbol_chain.size()); |
255 | // traverse over wide head states. |
256 | for (u16 j = wide_limit; j < wide_limit + wide_number; j++) { |
257 | char *wide_cur |
258 | = findMutableWideEntry16(wide_base, wide_limit, j); |
259 | u16 width = *(const u16 *)(wide_cur + WIDE_WIDTH_OFFSET); |
260 | u16 *trans = (u16 *)(wide_cur + WIDE_TRANSITION_OFFSET16(width)); |
261 | |
262 | // check successful transition |
263 | u16 next = unaligned_load_u16((u8 *)trans); |
264 | if (next < wide_limit) { |
265 | mstate_aux *aux = getAux(n, next); |
266 | if (aux->accept) { |
267 | next |= ACCEPT_FLAG; |
268 | } |
269 | if (aux->accel_offset) { |
270 | next |= ACCEL_FLAG; |
271 | } |
272 | unaligned_store_u16((u8 *)trans, next); |
273 | } |
274 | trans++; |
275 | |
276 | // check failure transition |
277 | for (symbol_t k = 0; k < alphaSize; k++) { |
278 | u16 next_k = unaligned_load_u16((u8 *)&trans[k]); |
279 | if (next_k >= wide_limit) { |
280 | continue; |
281 | } |
282 | mstate_aux *aux_k = getAux(n, next_k); |
283 | if (aux_k->accept) { |
284 | next_k |= ACCEPT_FLAG; |
285 | } |
286 | if (aux_k->accel_offset) { |
287 | next_k |= ACCEL_FLAG; |
288 | } |
289 | unaligned_store_u16((u8 *)&trans[k], next_k); |
290 | } |
291 | } |
292 | } |
293 | } |
294 | |
295 | u32 mcclellan_build_strat::max_allowed_offset_accel() const { |
296 | return ACCEL_DFA_MAX_OFFSET_DEPTH; |
297 | } |
298 | |
299 | u32 mcclellan_build_strat::max_stop_char() const { |
300 | return ACCEL_DFA_MAX_STOP_CHAR; |
301 | } |
302 | |
303 | u32 mcclellan_build_strat::max_floating_stop_char() const { |
304 | return ACCEL_DFA_MAX_FLOATING_STOP_CHAR; |
305 | } |
306 | |
307 | static |
308 | void populateBasicInfo(size_t state_size, const dfa_info &info, |
309 | u32 total_size, u32 aux_offset, u32 accel_offset, |
310 | u32 accel_count, ReportID arb, bool single, NFA *nfa) { |
311 | assert(state_size == sizeof(u16) || state_size == sizeof(u8)); |
312 | |
313 | nfa->length = total_size; |
314 | nfa->nPositions = info.states.size(); |
315 | |
316 | nfa->scratchStateSize = verify_u32(state_size); |
317 | nfa->streamStateSize = verify_u32(state_size); |
318 | |
319 | if (state_size == sizeof(u8)) { |
320 | nfa->type = MCCLELLAN_NFA_8; |
321 | } else { |
322 | nfa->type = MCCLELLAN_NFA_16; |
323 | } |
324 | |
325 | mcclellan *m = (mcclellan *)getMutableImplNfa(nfa); |
326 | for (u32 i = 0; i < 256; i++) { |
327 | m->remap[i] = verify_u8(info.alpha_remap[i]); |
328 | } |
329 | m->alphaShift = info.getAlphaShift(); |
330 | m->length = total_size; |
331 | m->aux_offset = aux_offset; |
332 | m->accel_offset = accel_offset; |
333 | m->arb_report = arb; |
334 | m->state_count = verify_u16(info.size()); |
335 | m->start_anchored = info.implId(info.raw.start_anchored); |
336 | m->start_floating = info.implId(info.raw.start_floating); |
337 | m->has_accel = accel_count ? 1 : 0; |
338 | m->has_wide = info.wide_state_chain.size() > 0 ? 1 : 0; |
339 | |
340 | if (state_size == sizeof(u8) && m->has_wide == 1) { |
341 | // allocate 1 more byte for wide state use. |
342 | nfa->scratchStateSize += sizeof(u8); |
343 | nfa->streamStateSize += sizeof(u8); |
344 | } |
345 | |
346 | if (state_size == sizeof(u16) && m->has_wide == 1) { |
347 | // allocate 2 more bytes for wide state use. |
348 | nfa->scratchStateSize += sizeof(u16); |
349 | nfa->streamStateSize += sizeof(u16); |
350 | } |
351 | |
352 | if (single) { |
353 | m->flags |= MCCLELLAN_FLAG_SINGLE; |
354 | } |
355 | } |
356 | |
357 | namespace { |
358 | |
359 | struct raw_report_list { |
360 | flat_set<ReportID> reports; |
361 | |
362 | raw_report_list(const flat_set<ReportID> &reports_in, |
363 | const ReportManager &rm, bool do_remap) { |
364 | if (do_remap) { |
365 | for (auto &id : reports_in) { |
366 | reports.insert(rm.getProgramOffset(id)); |
367 | } |
368 | } else { |
369 | reports = reports_in; |
370 | } |
371 | } |
372 | |
373 | bool operator<(const raw_report_list &b) const { |
374 | return reports < b.reports; |
375 | } |
376 | }; |
377 | |
378 | struct raw_report_info_impl : public raw_report_info { |
379 | vector<raw_report_list> rl; |
380 | u32 getReportListSize() const override; |
381 | size_t size() const override; |
382 | void fillReportLists(NFA *n, size_t base_offset, |
383 | std::vector<u32> &ro /* out */) const override; |
384 | }; |
385 | } |
386 | |
387 | unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports( |
388 | vector<u32> &reports, |
389 | vector<u32> &reports_eod, |
390 | u8 *isSingleReport, |
391 | ReportID *arbReport) const { |
392 | DEBUG_PRINTF("gathering reports\n" ); |
393 | |
394 | const bool remap_reports = has_managed_reports(rdfa.kind); |
395 | |
396 | auto ri = ue2::make_unique<raw_report_info_impl>(); |
397 | map<raw_report_list, u32> rev; |
398 | |
399 | for (const dstate &s : rdfa.states) { |
400 | if (s.reports.empty()) { |
401 | reports.push_back(MO_INVALID_IDX); |
402 | continue; |
403 | } |
404 | |
405 | raw_report_list rrl(s.reports, rm, remap_reports); |
406 | DEBUG_PRINTF("non empty r\n" ); |
407 | auto it = rev.find(rrl); |
408 | if (it != rev.end()) { |
409 | reports.push_back(it->second); |
410 | } else { |
411 | DEBUG_PRINTF("adding to rl %zu\n" , ri->size()); |
412 | rev.emplace(rrl, ri->size()); |
413 | reports.push_back(ri->size()); |
414 | ri->rl.push_back(rrl); |
415 | } |
416 | } |
417 | |
418 | for (const dstate &s : rdfa.states) { |
419 | if (s.reports_eod.empty()) { |
420 | reports_eod.push_back(MO_INVALID_IDX); |
421 | continue; |
422 | } |
423 | |
424 | DEBUG_PRINTF("non empty r eod\n" ); |
425 | raw_report_list rrl(s.reports_eod, rm, remap_reports); |
426 | auto it = rev.find(rrl); |
427 | if (it != rev.end()) { |
428 | reports_eod.push_back(it->second); |
429 | continue; |
430 | } |
431 | |
432 | DEBUG_PRINTF("adding to rl eod %zu\n" , s.reports_eod.size()); |
433 | rev.emplace(rrl, ri->size()); |
434 | reports_eod.push_back(ri->size()); |
435 | ri->rl.push_back(rrl); |
436 | } |
437 | |
438 | assert(!ri->rl.empty()); /* all components should be able to generate |
439 | reports */ |
440 | if (!ri->rl.empty()) { |
441 | *arbReport = *ri->rl.begin()->reports.begin(); |
442 | } else { |
443 | *arbReport = 0; |
444 | } |
445 | |
446 | /* if we have only a single report id generated from all accepts (not eod) |
447 | * we can take some short cuts */ |
448 | flat_set<ReportID> reps; |
449 | |
450 | for (u32 rl_index : reports) { |
451 | if (rl_index == MO_INVALID_IDX) { |
452 | continue; |
453 | } |
454 | assert(rl_index < ri->size()); |
455 | insert(&reps, ri->rl[rl_index].reports); |
456 | } |
457 | |
458 | if (reps.size() == 1) { |
459 | *isSingleReport = 1; |
460 | *arbReport = *reps.begin(); |
461 | DEBUG_PRINTF("single -- %u\n" , *arbReport); |
462 | } else { |
463 | *isSingleReport = 0; |
464 | } |
465 | |
466 | return ri; |
467 | } |
468 | |
469 | u32 raw_report_info_impl::getReportListSize() const { |
470 | u32 rv = 0; |
471 | |
472 | for (const auto &reps : rl) { |
473 | rv += sizeof(report_list); |
474 | rv += sizeof(ReportID) * reps.reports.size(); |
475 | } |
476 | |
477 | return rv; |
478 | } |
479 | |
480 | size_t raw_report_info_impl::size() const { |
481 | return rl.size(); |
482 | } |
483 | |
484 | void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset, |
485 | vector<u32> &ro) const { |
486 | for (const auto &reps : rl) { |
487 | ro.push_back(base_offset); |
488 | |
489 | report_list *p = (report_list *)((char *)n + base_offset); |
490 | |
491 | u32 i = 0; |
492 | for (const ReportID report : reps.reports) { |
493 | p->report[i++] = report; |
494 | } |
495 | p->count = verify_u32(reps.reports.size()); |
496 | |
497 | base_offset += sizeof(report_list); |
498 | base_offset += sizeof(ReportID) * reps.reports.size(); |
499 | } |
500 | } |
501 | |
502 | static |
503 | void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info, |
504 | set<dstate_id_t> *accel_states) { |
505 | for (dstate_id_t i : accel_escape_info | map_keys) { |
506 | accel_states->insert(i); |
507 | } |
508 | } |
509 | |
510 | static |
511 | size_t calcShermanRegionSize(const dfa_info &info) { |
512 | size_t rv = 0; |
513 | |
514 | for (size_t i = 0; i < info.size(); i++) { |
515 | if (info.is_sherman(i)) { |
516 | rv += SHERMAN_FIXED_SIZE; |
517 | } |
518 | } |
519 | |
520 | return ROUNDUP_16(rv); |
521 | } |
522 | |
523 | static |
524 | size_t calcWideRegionSize(const dfa_info &info) { |
525 | if (info.wide_state_chain.empty()) { |
526 | return 0; |
527 | } |
528 | |
529 | // wide info header |
530 | size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + 4; |
531 | |
532 | // wide info body |
533 | for (const auto &chain : info.wide_symbol_chain) { |
534 | rv += ROUNDUP_N(chain.size(), 2) + |
535 | (info.impl_alpha_size + 1) * sizeof(u16) + 2; |
536 | } |
537 | |
538 | return ROUNDUP_16(rv); |
539 | } |
540 | |
541 | static |
542 | void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, |
543 | const vector<u32> &reports, const vector<u32> &reports_eod, |
544 | vector<u32> &reportOffsets) { |
545 | const dstate &raw_state = info.states[i]; |
546 | aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]]; |
547 | aux->accept_eod = raw_state.reports_eod.empty() ? 0 |
548 | : reportOffsets[reports_eod[i]]; |
549 | aux->top = info.implId(i ? raw_state.next[info.alpha_remap[TOP]] |
550 | : info.raw.start_floating); |
551 | } |
552 | |
553 | /* returns false on error */ |
554 | static |
555 | bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base, |
556 | dstate_id_t *wide_limit) { |
557 | info.states[0].impl_id = 0; /* dead is always 0 */ |
558 | |
559 | vector<dstate_id_t> norm; |
560 | vector<dstate_id_t> sherm; |
561 | vector<dstate_id_t> wideHead; |
562 | vector<dstate_id_t> wideState; |
563 | |
564 | if (info.size() > (1 << 16)) { |
565 | DEBUG_PRINTF("too many states\n" ); |
566 | *wide_limit = 0; |
567 | return false; |
568 | } |
569 | |
570 | for (u32 i = 1; i < info.size(); i++) { |
571 | if (info.is_widehead(i)) { |
572 | wideHead.push_back(i); |
573 | } else if (info.is_widestate(i)) { |
574 | wideState.push_back(i); |
575 | } else if (info.is_sherman(i)) { |
576 | sherm.push_back(i); |
577 | } else { |
578 | norm.push_back(i); |
579 | } |
580 | } |
581 | |
582 | dstate_id_t next = 1; |
583 | for (const dstate_id_t &s : norm) { |
584 | DEBUG_PRINTF("[norm] mapping state %u to %u\n" , s, next); |
585 | info.states[s].impl_id = next++; |
586 | } |
587 | |
588 | *sherman_base = next; |
589 | for (const dstate_id_t &s : sherm) { |
590 | DEBUG_PRINTF("[sherm] mapping state %u to %u\n" , s, next); |
591 | info.states[s].impl_id = next++; |
592 | } |
593 | |
594 | *wide_limit = next; |
595 | for (const dstate_id_t &s : wideHead) { |
596 | DEBUG_PRINTF("[widehead] mapping state %u to %u\n" , s, next); |
597 | info.states[s].impl_id = next++; |
598 | } |
599 | |
600 | for (const dstate_id_t &s : wideState) { |
601 | DEBUG_PRINTF("[wide] mapping state %u to %u\n" , s, next); |
602 | info.states[s].impl_id = next++; |
603 | } |
604 | |
605 | /* Check to see if we haven't over allocated our states */ |
606 | DEBUG_PRINTF("next sherman %u masked %u\n" , next, |
607 | (dstate_id_t)(next & STATE_MASK)); |
608 | return (next - 1) == ((next - 1) & STATE_MASK); |
609 | } |
610 | |
611 | static |
612 | bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, |
613 | set<dstate_id_t> *accel_states) { |
614 | DEBUG_PRINTF("building mcclellan 16\n" ); |
615 | |
616 | vector<u32> reports; /* index in ri for the appropriate report list */ |
617 | vector<u32> reports_eod; /* as above */ |
618 | ReportID arb; |
619 | u8 single; |
620 | |
621 | u8 alphaShift = info.getAlphaShift(); |
622 | assert(alphaShift <= 8); |
623 | |
624 | u16 count_real_states; |
625 | u16 wide_limit; |
626 | if (!allocateFSN16(info, &count_real_states, &wide_limit)) { |
627 | DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n" , |
628 | info.size()); |
629 | return nullptr; |
630 | } |
631 | |
632 | DEBUG_PRINTF("count_real_states: %d\n" , count_real_states); |
633 | DEBUG_PRINTF("non_wide_states: %d\n" , wide_limit); |
634 | |
635 | auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); |
636 | map<dstate_id_t, AccelScheme> accel_escape_info |
637 | = info.strat.getAccelInfo(cc.grey); |
638 | |
639 | size_t tran_size = (1 << info.getAlphaShift()) |
640 | * sizeof(u16) * count_real_states; |
641 | |
642 | size_t aux_size = sizeof(mstate_aux) * wide_limit; |
643 | |
644 | size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size); |
645 | size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); |
646 | size_t accel_offset = ROUNDUP_N(aux_offset + aux_size |
647 | + ri->getReportListSize(), 32); |
648 | size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); |
649 | size_t sherman_size = calcShermanRegionSize(info); |
650 | size_t wide_offset = ROUNDUP_16(sherman_offset + sherman_size); |
651 | size_t wide_size = calcWideRegionSize(info); |
652 | size_t total_size = wide_offset + wide_size; |
653 | |
654 | accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ |
655 | assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); |
656 | |
657 | DEBUG_PRINTF("aux_offset %zu\n" , aux_offset); |
658 | DEBUG_PRINTF("aux_size %zu\n" , aux_size); |
659 | DEBUG_PRINTF("rl size %u\n" , ri->getReportListSize()); |
660 | DEBUG_PRINTF("accel_offset %zu\n" , accel_offset + sizeof(NFA)); |
661 | DEBUG_PRINTF("accel_size %zu\n" , accel_size); |
662 | DEBUG_PRINTF("sherman_offset %zu\n" , sherman_offset); |
663 | DEBUG_PRINTF("sherman_size %zu\n" , sherman_size); |
664 | DEBUG_PRINTF("wide_offset %zu\n" , wide_offset); |
665 | DEBUG_PRINTF("wide_size %zu\n" , wide_size); |
666 | DEBUG_PRINTF("total_size %zu\n" , total_size); |
667 | |
668 | auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); |
669 | char *nfa_base = (char *)nfa.get(); |
670 | |
671 | populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, |
672 | accel_escape_info.size(), arb, single, nfa.get()); |
673 | |
674 | vector<u32> reportOffsets; |
675 | |
676 | ri->fillReportLists(nfa.get(), aux_offset + aux_size, reportOffsets); |
677 | |
678 | u16 *succ_table = (u16 *)(nfa_base + sizeof(NFA) + sizeof(mcclellan)); |
679 | mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset); |
680 | mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get()); |
681 | |
682 | m->wide_limit = wide_limit; |
683 | m->wide_offset = wide_offset; |
684 | |
685 | /* copy in the mc header information */ |
686 | m->sherman_offset = sherman_offset; |
687 | m->sherman_end = total_size; |
688 | m->sherman_limit = count_real_states; |
689 | |
690 | /* do normal states */ |
691 | for (size_t i = 0; i < info.size(); i++) { |
692 | if (info.is_sherman(i) || info.is_widestate(i)) { |
693 | continue; |
694 | } |
695 | |
696 | u16 fs = info.implId(i); |
697 | mstate_aux *this_aux = getAux(nfa.get(), fs); |
698 | |
699 | assert(fs < count_real_states); |
700 | |
701 | for (size_t j = 0; j < info.impl_alpha_size; j++) { |
702 | succ_table[(fs << alphaShift) + j] = |
703 | info.implId(info.states[i].next[j]); |
704 | } |
705 | |
706 | fillInAux(&aux[fs], i, info, reports, reports_eod, reportOffsets); |
707 | |
708 | if (contains(accel_escape_info, i)) { |
709 | this_aux->accel_offset = accel_offset; |
710 | accel_offset += info.strat.accelSize(); |
711 | assert(accel_offset + sizeof(NFA) <= sherman_offset); |
712 | assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); |
713 | info.strat.buildAccel(i, accel_escape_info.at(i), |
714 | (void *)((char *)m + this_aux->accel_offset)); |
715 | } |
716 | } |
717 | |
718 | /* do sherman states */ |
719 | char *sherman_table = nfa_base + m->sherman_offset; |
720 | assert(ISALIGNED_16(sherman_table)); |
721 | for (size_t i = 0; i < info.size(); i++) { |
722 | if (!info.is_sherman(i)) { |
723 | continue; |
724 | } |
725 | |
726 | u16 fs = verify_u16(info.implId(i)); |
727 | mstate_aux *this_aux = getAux(nfa.get(), fs); |
728 | |
729 | assert(fs >= count_real_states); |
730 | assert(fs < wide_limit); |
731 | |
732 | char *curr_sherman_entry |
733 | = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE; |
734 | assert(curr_sherman_entry <= nfa_base + m->length); |
735 | |
736 | fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets); |
737 | |
738 | if (contains(accel_escape_info, i)) { |
739 | this_aux->accel_offset = accel_offset; |
740 | accel_offset += info.strat.accelSize(); |
741 | assert(accel_offset + sizeof(NFA) <= sherman_offset); |
742 | assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); |
743 | info.strat.buildAccel(i, accel_escape_info.at(i), |
744 | (void *)((char *)m + this_aux->accel_offset)); |
745 | } |
746 | |
747 | u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken); |
748 | assert(len <= 9); |
749 | dstate_id_t d = info.states[i].daddy; |
750 | |
751 | *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE; |
752 | *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len; |
753 | *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d); |
754 | u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET); |
755 | |
756 | for (u16 s = 0; s < info.impl_alpha_size; s++) { |
757 | if (info.states[i].next[s] != info.states[d].next[s]) { |
758 | *(chars++) = (u8)s; |
759 | } |
760 | } |
761 | |
762 | u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len)); |
763 | for (u16 s = 0; s < info.impl_alpha_size; s++) { |
764 | if (info.states[i].next[s] != info.states[d].next[s]) { |
765 | DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n" , |
766 | fs, info.implId(d), |
767 | info.implId(info.states[i].next[s])); |
768 | unaligned_store_u16((u8 *)states++, |
769 | info.implId(info.states[i].next[s])); |
770 | } |
771 | } |
772 | } |
773 | |
774 | if (!info.wide_state_chain.empty()) { |
775 | /* do wide states using info */ |
776 | u16 wide_number = verify_u16(info.wide_symbol_chain.size()); |
777 | char *wide_base = nfa_base + m->wide_offset; |
778 | assert(ISALIGNED_16(wide_base)); |
779 | |
780 | char *wide_top = wide_base; |
781 | *(u8 *)(wide_top++) = WIDE_STATE; |
782 | wide_top = ROUNDUP_PTR(wide_top, 2); |
783 | *(u16 *)(wide_top) = wide_number; |
784 | wide_top += 2; |
785 | |
786 | char *curr_wide_entry = wide_top + wide_number * sizeof(u32); |
787 | u32 *wide_offset_list = (u32 *)wide_top; |
788 | |
789 | /* get the order of writing wide states */ |
790 | vector<size_t> order(wide_number); |
791 | for (size_t i = 0; i < wide_number; i++) { |
792 | dstate_id_t head = info.wide_state_chain[i].front(); |
793 | size_t pos = info.implId(head) - m->wide_limit; |
794 | order[pos] = i; |
795 | } |
796 | |
797 | for (size_t i : order) { |
798 | vector<dstate_id_t> &state_chain = info.wide_state_chain[i]; |
799 | vector<symbol_t> &symbol_chain = info.wide_symbol_chain[i]; |
800 | |
801 | u16 width = verify_u16(symbol_chain.size()); |
802 | *(u16 *)(curr_wide_entry + WIDE_WIDTH_OFFSET) = width; |
803 | u8 *chars = (u8 *)(curr_wide_entry + WIDE_SYMBOL_OFFSET16); |
804 | |
805 | // store wide state symbol chain |
806 | for (size_t j = 0; j < width; j++) { |
807 | *(chars++) = verify_u8(symbol_chain[j]); |
808 | } |
809 | |
810 | // store wide state transition table |
811 | u16 *trans = (u16 *)(curr_wide_entry |
812 | + WIDE_TRANSITION_OFFSET16(width)); |
813 | dstate_id_t tail = state_chain[width - 1]; |
814 | symbol_t last = symbol_chain[width -1]; |
815 | dstate_id_t tran = info.states[tail].next[last]; |
816 | // 1. successful transition |
817 | *trans++ = info.implId(tran); |
818 | // 2. failure transition |
819 | for (size_t j = 0; verify_u16(j) < width - 1; j++) { |
820 | if (symbol_chain[j] != last) { |
821 | tran = info.states[state_chain[j]].next[last]; |
822 | } |
823 | } |
824 | for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) { |
825 | if (sym != last) { |
826 | *trans++ = info.implId(info.states[tail].next[sym]); |
827 | } |
828 | else { |
829 | *trans++ = info.implId(tran); |
830 | } |
831 | } |
832 | |
833 | *wide_offset_list++ = verify_u32(curr_wide_entry - wide_base); |
834 | |
835 | curr_wide_entry = (char *)trans; |
836 | } |
837 | } |
838 | |
839 | markEdges(nfa.get(), succ_table, info); |
840 | |
841 | if (accel_states && nfa) { |
842 | fillAccelOut(accel_escape_info, accel_states); |
843 | } |
844 | |
845 | return nfa; |
846 | } |
847 | |
848 | static |
849 | void fillInBasicState8(const dfa_info &info, mstate_aux *aux, u8 *succ_table, |
850 | const vector<u32> &reportOffsets, |
851 | const vector<u32> &reports, |
852 | const vector<u32> &reports_eod, u32 i) { |
853 | dstate_id_t j = info.implId(i); |
854 | u8 alphaShift = info.getAlphaShift(); |
855 | assert(alphaShift <= 8); |
856 | |
857 | for (size_t s = 0; s < info.impl_alpha_size; s++) { |
858 | dstate_id_t raw_succ = info.states[i].next[s]; |
859 | succ_table[(j << alphaShift) + s] = info.implId(raw_succ); |
860 | } |
861 | |
862 | aux[j].accept = 0; |
863 | aux[j].accept_eod = 0; |
864 | |
865 | if (!info.states[i].reports.empty()) { |
866 | DEBUG_PRINTF("i=%u r[i]=%u\n" , i, reports[i]); |
867 | assert(reports[i] != MO_INVALID_IDX); |
868 | aux[j].accept = reportOffsets[reports[i]]; |
869 | } |
870 | |
871 | if (!info.states[i].reports_eod.empty()) { |
872 | DEBUG_PRINTF("i=%u re[i]=%u\n" , i, reports_eod[i]); |
873 | aux[j].accept_eod = reportOffsets[reports_eod[i]]; |
874 | } |
875 | |
876 | dstate_id_t raw_top = i ? info.states[i].next[info.alpha_remap[TOP]] |
877 | : info.raw.start_floating; |
878 | |
879 | aux[j].top = info.implId(raw_top); |
880 | } |
881 | |
882 | static |
883 | void allocateFSN8(dfa_info &info, |
884 | const map<dstate_id_t, AccelScheme> &accel_escape_info, |
885 | u16 *accel_limit, u16 *accept_limit) { |
886 | info.states[0].impl_id = 0; /* dead is always 0 */ |
887 | |
888 | vector<dstate_id_t> norm; |
889 | vector<dstate_id_t> accel; |
890 | vector<dstate_id_t> accept; |
891 | |
892 | assert(info.size() <= (1 << 8)); |
893 | |
894 | for (u32 i = 1; i < info.size(); i++) { |
895 | if (!info.states[i].reports.empty()) { |
896 | accept.push_back(i); |
897 | } else if (contains(accel_escape_info, i)) { |
898 | accel.push_back(i); |
899 | } else { |
900 | norm.push_back(i); |
901 | } |
902 | } |
903 | |
904 | u32 j = 1; /* dead is already at 0 */ |
905 | for (const dstate_id_t &s : norm) { |
906 | assert(j <= 256); |
907 | DEBUG_PRINTF("mapping state %u to %u\n" , s, j); |
908 | info.states[s].impl_id = j++; |
909 | } |
910 | *accel_limit = j; |
911 | for (const dstate_id_t &s : accel) { |
912 | assert(j <= 256); |
913 | DEBUG_PRINTF("mapping state %u to %u\n" , s, j); |
914 | info.states[s].impl_id = j++; |
915 | } |
916 | *accept_limit = j; |
917 | for (const dstate_id_t &s : accept) { |
918 | assert(j <= 256); |
919 | DEBUG_PRINTF("mapping state %u to %u\n" , s, j); |
920 | info.states[s].impl_id = j++; |
921 | } |
922 | } |
923 | |
924 | static |
925 | bytecode_ptr<NFA> mcclellanCompile8(dfa_info &info, const CompileContext &cc, |
926 | set<dstate_id_t> *accel_states) { |
927 | DEBUG_PRINTF("building mcclellan 8\n" ); |
928 | |
929 | vector<u32> reports; |
930 | vector<u32> reports_eod; |
931 | ReportID arb; |
932 | u8 single; |
933 | |
934 | auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); |
935 | map<dstate_id_t, AccelScheme> accel_escape_info |
936 | = info.strat.getAccelInfo(cc.grey); |
937 | |
938 | size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size(); |
939 | size_t aux_size = sizeof(mstate_aux) * info.size(); |
940 | size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size); |
941 | size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); |
942 | size_t accel_offset = ROUNDUP_N(aux_offset + aux_size |
943 | + ri->getReportListSize(), 32); |
944 | size_t total_size = accel_offset + accel_size; |
945 | |
946 | DEBUG_PRINTF("aux_size %zu\n" , aux_size); |
947 | DEBUG_PRINTF("aux_offset %zu\n" , aux_offset); |
948 | DEBUG_PRINTF("rl size %u\n" , ri->getReportListSize()); |
949 | DEBUG_PRINTF("accel_size %zu\n" , accel_size); |
950 | DEBUG_PRINTF("accel_offset %zu\n" , accel_offset); |
951 | DEBUG_PRINTF("total_size %zu\n" , total_size); |
952 | |
953 | accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ |
954 | assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); |
955 | |
956 | auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); |
957 | char *nfa_base = (char *)nfa.get(); |
958 | |
959 | mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get()); |
960 | |
961 | allocateFSN8(info, accel_escape_info, &m->accel_limit_8, |
962 | &m->accept_limit_8); |
963 | populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset, |
964 | accel_escape_info.size(), arb, single, nfa.get()); |
965 | |
966 | vector<u32> reportOffsets; |
967 | |
968 | ri->fillReportLists(nfa.get(), aux_offset + aux_size, reportOffsets); |
969 | |
970 | /* copy in the state information */ |
971 | u8 *succ_table = (u8 *)(nfa_base + sizeof(NFA) + sizeof(mcclellan)); |
972 | mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset); |
973 | |
974 | for (size_t i = 0; i < info.size(); i++) { |
975 | if (contains(accel_escape_info, i)) { |
976 | u32 j = info.implId(i); |
977 | |
978 | aux[j].accel_offset = accel_offset; |
979 | accel_offset += info.strat.accelSize(); |
980 | |
981 | info.strat.buildAccel(i, accel_escape_info.at(i), |
982 | (void *)((char *)m + aux[j].accel_offset)); |
983 | } |
984 | |
985 | fillInBasicState8(info, aux, succ_table, reportOffsets, reports, |
986 | reports_eod, i); |
987 | } |
988 | |
989 | assert(accel_offset + sizeof(NFA) <= total_size); |
990 | |
991 | DEBUG_PRINTF("rl size %zu\n" , ri->size()); |
992 | |
993 | if (accel_states && nfa) { |
994 | fillAccelOut(accel_escape_info, accel_states); |
995 | } |
996 | |
997 | return nfa; |
998 | } |
999 | |
1000 | #define MAX_SHERMAN_LIST_LEN 9 |
1001 | |
1002 | static |
1003 | void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate, |
1004 | dstate_id_t max) { |
1005 | if (candidate < max) { |
1006 | dest.insert(candidate); |
1007 | } |
1008 | } |
1009 | |
1010 | static |
1011 | void addSuccessors(flat_set<dstate_id_t> &dest, const dstate &source, |
1012 | u16 alphasize, dstate_id_t curr_id) { |
1013 | for (symbol_t s = 0; s < alphasize; s++) { |
1014 | addIfEarlier(dest, source.next[s], curr_id); |
1015 | } |
1016 | } |
1017 | |
1018 | /* \brief Returns a set of states to search for a better daddy. */ |
1019 | static |
1020 | flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info, |
1021 | dstate_id_t curr_id) { |
1022 | flat_set<dstate_id_t> hinted; |
1023 | |
1024 | addIfEarlier(hinted, 0, curr_id); |
1025 | addIfEarlier(hinted, info.raw.start_anchored, curr_id); |
1026 | addIfEarlier(hinted, info.raw.start_floating, curr_id); |
1027 | |
1028 | // Add existing daddy and his successors, then search back one generation. |
1029 | const u16 alphasize = info.impl_alpha_size; |
1030 | dstate_id_t daddy = info.states[curr_id].daddy; |
1031 | for (u32 level = 0; daddy && level < 2; level++) { |
1032 | addIfEarlier(hinted, daddy, curr_id); |
1033 | addSuccessors(hinted, info.states[daddy], alphasize, curr_id); |
1034 | daddy = info.states[daddy].daddy; |
1035 | } |
1036 | |
1037 | return hinted; |
1038 | } |
1039 | |
1040 | #define MAX_SHERMAN_SELF_LOOP 20 |
1041 | |
1042 | static |
1043 | void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, |
1044 | bool any_cyclic_near_anchored_state, |
1045 | bool trust_daddy_states, const Grey &grey) { |
1046 | if (!grey.allowShermanStates) { |
1047 | return; |
1048 | } |
1049 | |
1050 | const u16 width = using8bit ? sizeof(u8) : sizeof(u16); |
1051 | const u16 alphasize = info.impl_alpha_size; |
1052 | |
1053 | if (info.raw.start_anchored != DEAD_STATE |
1054 | && any_cyclic_near_anchored_state |
1055 | && curr_id < alphasize * 3) { |
1056 | /* crude attempt to prevent frequent states from being sherman'ed |
1057 | * depends on the fact that states are numbers are currently in bfs |
1058 | * order */ |
1059 | DEBUG_PRINTF("%hu is banned\n" , curr_id); |
1060 | return; |
1061 | } |
1062 | |
1063 | if (info.raw.start_floating != DEAD_STATE |
1064 | && curr_id >= info.raw.start_floating |
1065 | && curr_id < info.raw.start_floating + alphasize * 3) { |
1066 | /* crude attempt to prevent frequent states from being sherman'ed |
1067 | * depends on the fact that states are numbers are currently in bfs |
1068 | * order */ |
1069 | DEBUG_PRINTF("%hu is banned (%hu)\n" , curr_id, info.raw.start_floating); |
1070 | return; |
1071 | } |
1072 | |
1073 | const u16 full_state_size = width * alphasize; |
1074 | const u16 max_list_len = MIN(MAX_SHERMAN_LIST_LEN, |
1075 | (full_state_size - 2)/(width + 1)); |
1076 | u16 best_score = 0; |
1077 | dstate_id_t best_daddy = 0; |
1078 | dstate &currState = info.states[curr_id]; |
1079 | |
1080 | flat_set<dstate_id_t> hinted; |
1081 | if (trust_daddy_states) { |
1082 | // Use the daddy already set for this state so long as it isn't already |
1083 | // a Sherman state. |
1084 | dstate_id_t daddy = currState.daddy; |
1085 | if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) { |
1086 | hinted.insert(currState.daddy); |
1087 | } else { |
1088 | // Fall back to granddaddy, which has already been processed (due |
1089 | // to BFS ordering) and cannot be a Sherman state. |
1090 | dstate_id_t granddaddy = info.states[currState.daddy].daddy; |
1091 | if (info.is_widestate(granddaddy)) { |
1092 | return; |
1093 | } |
1094 | assert(!info.is_sherman(granddaddy)); |
1095 | hinted.insert(granddaddy); |
1096 | } |
1097 | } else { |
1098 | hinted = find_daddy_candidates(info, curr_id); |
1099 | } |
1100 | |
1101 | for (const dstate_id_t &donor : hinted) { |
1102 | assert(donor < curr_id); |
1103 | u32 score = 0; |
1104 | |
1105 | if (info.is_sherman(donor) || info.is_widestate(donor)) { |
1106 | continue; |
1107 | } |
1108 | |
1109 | const dstate &donorState = info.states[donor]; |
1110 | for (symbol_t s = 0; s < alphasize; s++) { |
1111 | if (currState.next[s] == donorState.next[s]) { |
1112 | score++; |
1113 | } |
1114 | } |
1115 | |
1116 | /* prefer lower ids to provide some stability amongst potential |
1117 | * siblings */ |
1118 | if (score > best_score || (score == best_score && donor < best_daddy)) { |
1119 | best_daddy = donor; |
1120 | best_score = score; |
1121 | |
1122 | if (score == alphasize) { |
1123 | break; |
1124 | } |
1125 | } |
1126 | } |
1127 | |
1128 | currState.daddy = best_daddy; |
1129 | info.extra[curr_id].daddytaken = best_score; |
1130 | DEBUG_PRINTF("%hu -> daddy %hu: %u/%u BF\n" , curr_id, best_daddy, |
1131 | best_score, alphasize); |
1132 | |
1133 | if (best_score + max_list_len < alphasize) { |
1134 | return; /* ??? */ |
1135 | } |
1136 | |
1137 | if (info.is_sherman(currState.daddy)) { |
1138 | return; |
1139 | } |
1140 | |
1141 | u32 self_loop_width = 0; |
1142 | const dstate &curr_raw = info.states[curr_id]; |
1143 | for (unsigned i = 0; i < N_CHARS; i++) { |
1144 | if (curr_raw.next[info.alpha_remap[i]] == curr_id) { |
1145 | self_loop_width++; |
1146 | } |
1147 | } |
1148 | |
1149 | if (self_loop_width > MAX_SHERMAN_SELF_LOOP) { |
1150 | DEBUG_PRINTF("%hu is banned wide self loop (%u)\n" , curr_id, |
1151 | self_loop_width); |
1152 | return; |
1153 | } |
1154 | |
1155 | DEBUG_PRINTF("%hu is sherman\n" , curr_id); |
1156 | info.extra[curr_id].shermanState = true; |
1157 | } |
1158 | |
1159 | static |
1160 | bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { |
1161 | symbol_t alphasize = raw.getImplAlphaSize(); |
1162 | for (symbol_t s = 0; s < alphasize; s++) { |
1163 | dstate_id_t succ_id = raw.states[root].next[s]; |
1164 | if (succ_id == DEAD_STATE) { |
1165 | continue; |
1166 | } |
1167 | |
1168 | const dstate &succ = raw.states[succ_id]; |
1169 | for (symbol_t t = 0; t < alphasize; t++) { |
1170 | if (succ.next[t] == root || succ.next[t] == succ_id) { |
1171 | return true; |
1172 | } |
1173 | } |
1174 | } |
1175 | return false; |
1176 | } |
1177 | |
1178 | /* \brief Test for only-one-predecessor property. */ |
1179 | static |
1180 | bool check_property1(const DfaPrevInfo &info, const u16 impl_alpha_size, |
1181 | const dstate_id_t curr_id, dstate_id_t &prev_id, |
1182 | symbol_t &prev_sym) { |
1183 | u32 num_prev = 0; |
1184 | bool test_p1 = false; |
1185 | |
1186 | for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { |
1187 | num_prev += info.states[curr_id].prev_vec[sym].size(); |
1188 | DEBUG_PRINTF("Check symbol: %u, with its vector size: %lu\n" , sym, |
1189 | info.states[curr_id].prev_vec[sym].size()); |
1190 | if (num_prev == 1 && !test_p1) { |
1191 | test_p1 = true; |
1192 | prev_id = info.states[curr_id].prev_vec[sym].front(); //[0] for sure??? |
1193 | prev_sym = sym; |
1194 | } |
1195 | } |
1196 | |
1197 | return num_prev == 1; |
1198 | } |
1199 | |
1200 | /* \brief Test for same-failure-action property. */ |
1201 | static |
1202 | bool check_property2(const raw_dfa &rdfa, const u16 impl_alpha_size, |
1203 | const dstate_id_t curr_id, const dstate_id_t prev_id, |
1204 | const symbol_t curr_sym, const symbol_t prev_sym) { |
1205 | const dstate &prevState = rdfa.states[prev_id]; |
1206 | const dstate &currState = rdfa.states[curr_id]; |
1207 | |
1208 | // Compare transition tables between currState and prevState. |
1209 | u16 score = 0; |
1210 | for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { |
1211 | if (currState.next[sym] == prevState.next[sym] |
1212 | && sym != curr_sym && sym != prev_sym) { |
1213 | score++; |
1214 | } |
1215 | } |
1216 | DEBUG_PRINTF("(Score: %u/%u)\n" , score, impl_alpha_size); |
1217 | |
1218 | // 2 cases. |
1219 | if (curr_sym != prev_sym && score >= impl_alpha_size - 2 |
1220 | && currState.next[prev_sym] == prevState.next[curr_sym]) { |
1221 | return true; |
1222 | } else if (curr_sym == prev_sym && score == impl_alpha_size - 1) { |
1223 | return true; |
1224 | } |
1225 | return false; |
1226 | } |
1227 | |
1228 | /* \brief Check whether adding current prev_id will generate a circle.*/ |
1229 | static |
1230 | bool check_circle(const DfaPrevInfo &info, const u16 impl_alpha_size, |
1231 | const vector<dstate_id_t> &chain, const dstate_id_t id) { |
1232 | const vector<vector<dstate_id_t>> &prev_vec = info.states[id].prev_vec; |
1233 | const dstate_id_t tail = chain.front(); |
1234 | for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { |
1235 | auto iter = find(prev_vec[sym].begin(), prev_vec[sym].end(), tail); |
1236 | if (iter != prev_vec[sym].end()) { |
1237 | // Tail is one of id's predecessors, forming a circle. |
1238 | return true; |
1239 | } |
1240 | } |
1241 | return false; |
1242 | } |
1243 | |
1244 | /* \brief Returns a chain of state ids and symbols. */ |
1245 | static |
1246 | dstate_id_t find_chain_candidate(const raw_dfa &rdfa, const DfaPrevInfo &info, |
1247 | const dstate_id_t curr_id, |
1248 | const symbol_t curr_sym, |
1249 | vector<dstate_id_t> &temp_chain) { |
1250 | //Record current id first. |
1251 | temp_chain.push_back(curr_id); |
1252 | |
1253 | const u16 size = info.impl_alpha_size; |
1254 | |
1255 | // Stop when entering root cloud. |
1256 | if (rdfa.start_anchored != DEAD_STATE |
1257 | && is_cyclic_near(rdfa, rdfa.start_anchored) |
1258 | && curr_id < size) { |
1259 | return curr_id; |
1260 | } |
1261 | if (rdfa.start_floating != DEAD_STATE |
1262 | && curr_id >= rdfa.start_floating |
1263 | && curr_id < rdfa.start_floating + size * 3) { |
1264 | return curr_id; |
1265 | } |
1266 | |
1267 | // Stop when reaching anchored or floating. |
1268 | if (curr_id == rdfa.start_anchored || curr_id == rdfa.start_floating) { |
1269 | return curr_id; |
1270 | } |
1271 | |
1272 | dstate_id_t prev_id = 0; |
1273 | symbol_t prev_sym = ALPHABET_SIZE; |
1274 | |
1275 | // Check the only-one-predecessor property. |
1276 | if (!check_property1(info, size, curr_id, prev_id, prev_sym)) { |
1277 | return curr_id; |
1278 | } |
1279 | assert(prev_id != 0 && prev_sym != ALPHABET_SIZE); |
1280 | DEBUG_PRINTF("(P1 test passed.)\n" ); |
1281 | |
1282 | // Circle testing for the prev_id that passes the P1 test. |
1283 | if (check_circle(info, size, temp_chain, prev_id)) { |
1284 | DEBUG_PRINTF("(A circle is found.)\n" ); |
1285 | return curr_id; |
1286 | } |
1287 | |
1288 | // Check the same-failure-action property. |
1289 | if (!check_property2(rdfa, size, curr_id, prev_id, curr_sym, prev_sym)) { |
1290 | return curr_id; |
1291 | } |
1292 | DEBUG_PRINTF("(P2 test passed.)\n" ); |
1293 | |
1294 | if (!rdfa.states[prev_id].reports.empty() |
1295 | || !rdfa.states[prev_id].reports_eod.empty()) { |
1296 | return curr_id; |
1297 | } else { |
1298 | return find_chain_candidate(rdfa, info, prev_id, prev_sym, temp_chain); |
1299 | } |
1300 | } |
1301 | |
1302 | /* \brief Always store the non-extensible chains found till now. */ |
1303 | static |
1304 | bool store_chain_longest(vector<vector<dstate_id_t>> &candidate_chain, |
1305 | vector<dstate_id_t> &temp_chain, |
1306 | dynamic_bitset<> &added, bool head_is_new) { |
1307 | dstate_id_t head = temp_chain.front(); |
1308 | u16 length = temp_chain.size(); |
1309 | |
1310 | if (head_is_new) { |
1311 | DEBUG_PRINTF("This is a new chain!\n" ); |
1312 | |
1313 | // Add this new chain and get it marked. |
1314 | candidate_chain.push_back(temp_chain); |
1315 | |
1316 | for (auto &id : temp_chain) { |
1317 | DEBUG_PRINTF("(Marking s%u ...)\n" , id); |
1318 | added.set(id); |
1319 | } |
1320 | |
1321 | return true; |
1322 | } |
1323 | |
1324 | DEBUG_PRINTF("This is a longer chain!\n" ); |
1325 | assert(!candidate_chain.empty()); |
1326 | |
1327 | auto chain = find_if(candidate_chain.begin(), candidate_chain.end(), |
1328 | [&](const vector<dstate_id_t> &it) { |
1329 | return it.front() == head; |
1330 | }); |
1331 | |
1332 | // Not a valid head, just do nothing and return. |
1333 | if (chain == candidate_chain.end()) { |
1334 | return false; |
1335 | } |
1336 | |
1337 | u16 len = chain->size(); |
1338 | |
1339 | if (length > len) { |
1340 | // Find out the branch node first. |
1341 | size_t piv = 0; |
1342 | for (; piv < length; piv++) { |
1343 | if ((*chain)[piv] != temp_chain[piv]) { |
1344 | break; |
1345 | } |
1346 | } |
1347 | |
1348 | for (size_t j = piv + 1; j < length; j++) { |
1349 | DEBUG_PRINTF("(Marking s%u (new branch) ...)\n" , temp_chain[j]); |
1350 | added.set(temp_chain[j]); |
1351 | } |
1352 | |
1353 | // Unmark old unuseful nodes. |
1354 | // (Except the tail node, which is in working queue) |
1355 | for (size_t j = piv + 1; j < verify_u16(len - 1); j++) { |
1356 | DEBUG_PRINTF("(UnMarking s%u (old branch)...)\n" , (*chain)[j]); |
1357 | added.reset((*chain)[j]); |
1358 | } |
1359 | |
1360 | chain->assign(temp_chain.begin(), temp_chain.end()); |
1361 | } |
1362 | |
1363 | return false; |
1364 | } |
1365 | |
1366 | /* \brief Generate wide_symbol_chain from wide_state_chain. */ |
1367 | static |
1368 | void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) { |
1369 | raw_dfa &rdfa = info.raw; |
1370 | assert(chain_tail.size() == info.wide_state_chain.size()); |
1371 | |
1372 | for (size_t i = 0; i < info.wide_state_chain.size(); i++) { |
1373 | vector<dstate_id_t> &state_chain = info.wide_state_chain[i]; |
1374 | vector<symbol_t> symbol_chain; |
1375 | |
1376 | info.extra[state_chain[0]].wideHead = true; |
1377 | size_t width = state_chain.size() - 1; |
1378 | |
1379 | for (size_t j = 0; j < width; j++) { |
1380 | dstate_id_t curr_id = state_chain[j]; |
1381 | dstate_id_t next_id = state_chain[j + 1]; |
1382 | |
1383 | // The last state of the chain doesn't belong to a wide state. |
1384 | info.extra[curr_id].wideState = true; |
1385 | |
1386 | // The tail symbol comes from vector chain_tail; |
1387 | if (j == width - 1) { |
1388 | symbol_chain.push_back(chain_tail[i]); |
1389 | } else { |
1390 | for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) { |
1391 | if (rdfa.states[curr_id].next[sym] == next_id) { |
1392 | symbol_chain.push_back(sym); |
1393 | break; |
1394 | } |
1395 | } |
1396 | } |
1397 | } |
1398 | |
1399 | info.wide_symbol_chain.push_back(symbol_chain); |
1400 | } |
1401 | } |
1402 | |
1403 | /* \brief Find potential regions of states to be packed into wide states. */ |
1404 | static |
1405 | void find_wide_state(dfa_info &info) { |
1406 | DfaPrevInfo dinfo(info.raw); |
1407 | queue<dstate_id_t> work_queue; |
1408 | |
1409 | dynamic_bitset<> added(info.raw.states.size()); |
1410 | for (auto it : dinfo.accepts) { |
1411 | work_queue.push(it); |
1412 | added.set(it); |
1413 | } |
1414 | |
1415 | vector<symbol_t> chain_tail; |
1416 | while (!work_queue.empty()) { |
1417 | dstate_id_t curr_id = work_queue.front(); |
1418 | work_queue.pop(); |
1419 | DEBUG_PRINTF("Newly popped state: s%u\n" , curr_id); |
1420 | |
1421 | for (symbol_t sym = 0; sym < dinfo.impl_alpha_size; sym++) { |
1422 | for (auto info_it : dinfo.states[curr_id].prev_vec[sym]) { |
1423 | if (added.test(info_it)) { |
1424 | DEBUG_PRINTF("(s%u already marked.)\n" , info_it); |
1425 | continue; |
1426 | } |
1427 | |
1428 | vector<dstate_id_t> temp_chain; |
1429 | // Head is a state failing the test of the chain. |
1430 | dstate_id_t head = find_chain_candidate(info.raw, dinfo, |
1431 | info_it, sym, |
1432 | temp_chain); |
1433 | |
1434 | // A candidate chain should contain 8 substates at least. |
1435 | if (temp_chain.size() < 8) { |
1436 | DEBUG_PRINTF("(Not enough substates, continue.)\n" ); |
1437 | continue; |
1438 | } |
1439 | |
1440 | bool head_is_new = !added.test(head); |
1441 | if (head_is_new) { |
1442 | added.set(head); |
1443 | work_queue.push(head); |
1444 | DEBUG_PRINTF("Newly pushed state: s%u\n" , head); |
1445 | } |
1446 | |
1447 | reverse(temp_chain.begin(), temp_chain.end()); |
1448 | temp_chain.push_back(curr_id); |
1449 | |
1450 | assert(head > 0 && head == temp_chain.front()); |
1451 | if (store_chain_longest(info.wide_state_chain, temp_chain, |
1452 | added, head_is_new)) { |
1453 | chain_tail.push_back(sym); |
1454 | } |
1455 | } |
1456 | } |
1457 | } |
1458 | |
1459 | generate_symbol_chain(info, chain_tail); |
1460 | } |
1461 | |
1462 | bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, |
1463 | const CompileContext &cc, |
1464 | bool trust_daddy_states, |
1465 | set<dstate_id_t> *accel_states) { |
1466 | assert(!is_dead(raw)); |
1467 | |
1468 | dfa_info info(strat); |
1469 | bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; |
1470 | |
1471 | if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming |
1472 | * mode with our semantics */ |
1473 | raw.stripExtraEodReports(); |
1474 | } |
1475 | |
1476 | bool has_eod_reports = raw.hasEodReports(); |
1477 | |
1478 | bytecode_ptr<NFA> nfa; |
1479 | if (!using8bit) { |
1480 | if (cc.grey.allowWideStates && strat.getType() == McClellan |
1481 | && !is_triggered(raw.kind)) { |
1482 | find_wide_state(info); |
1483 | } |
1484 | |
1485 | u16 total_daddy = 0; |
1486 | bool any_cyclic_near_anchored_state |
1487 | = is_cyclic_near(raw, raw.start_anchored); |
1488 | |
1489 | for (u32 i = 0; i < info.size(); i++) { |
1490 | if (info.is_widestate(i)) { |
1491 | continue; |
1492 | } |
1493 | find_better_daddy(info, i, using8bit, |
1494 | any_cyclic_near_anchored_state, |
1495 | trust_daddy_states, cc.grey); |
1496 | total_daddy += info.extra[i].daddytaken; |
1497 | } |
1498 | |
1499 | DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n" , total_daddy, |
1500 | info.size() * info.impl_alpha_size, info.size(), |
1501 | info.impl_alpha_size); |
1502 | |
1503 | nfa = mcclellanCompile16(info, cc, accel_states); |
1504 | } else { |
1505 | nfa = mcclellanCompile8(info, cc, accel_states); |
1506 | } |
1507 | |
1508 | if (has_eod_reports) { |
1509 | nfa->flags |= NFA_ACCEPTS_EOD; |
1510 | } |
1511 | |
1512 | DEBUG_PRINTF("compile done\n" ); |
1513 | return nfa; |
1514 | } |
1515 | |
1516 | bytecode_ptr<NFA> mcclellanCompile(raw_dfa &raw, const CompileContext &cc, |
1517 | const ReportManager &rm, |
1518 | bool only_accel_init, |
1519 | bool trust_daddy_states, |
1520 | set<dstate_id_t> *accel_states) { |
1521 | mcclellan_build_strat mbs(raw, rm, only_accel_init); |
1522 | return mcclellanCompile_i(raw, mbs, cc, trust_daddy_states, accel_states); |
1523 | } |
1524 | |
1525 | size_t mcclellan_build_strat::accelSize(void) const { |
1526 | return sizeof(AccelAux); /* McClellan accel structures are just bare |
1527 | * accelaux */ |
1528 | } |
1529 | |
1530 | u32 mcclellanStartReachSize(const raw_dfa *raw) { |
1531 | if (raw->states.size() < 2) { |
1532 | return 0; |
1533 | } |
1534 | |
1535 | const dstate &ds = raw->states[raw->start_anchored]; |
1536 | |
1537 | CharReach out; |
1538 | for (unsigned i = 0; i < N_CHARS; i++) { |
1539 | if (ds.next[raw->alpha_remap[i]] != DEAD_STATE) { |
1540 | out.set(i); |
1541 | } |
1542 | } |
1543 | |
1544 | return out.count(); |
1545 | } |
1546 | |
1547 | bool has_accel_mcclellan(const NFA *nfa) { |
1548 | const mcclellan *m = (const mcclellan *)getImplNfa(nfa); |
1549 | return m->has_accel; |
1550 | } |
1551 | |
1552 | } // namespace ue2 |
1553 | |