1 | /* |
2 | * Copyright (c) 2016-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "mcsheng_compile.h" |
30 | |
31 | #include "accel.h" |
32 | #include "accelcompile.h" |
33 | #include "grey.h" |
34 | #include "mcclellancompile.h" |
35 | #include "mcclellancompile_util.h" |
36 | #include "mcsheng_internal.h" |
37 | #include "nfa_internal.h" |
38 | #include "rdfa_graph.h" |
39 | #include "shufticompile.h" |
40 | #include "trufflecompile.h" |
41 | #include "ue2common.h" |
42 | #include "util/alloc.h" |
43 | #include "util/bitutils.h" |
44 | #include "util/charreach.h" |
45 | #include "util/compare.h" |
46 | #include "util/compile_context.h" |
47 | #include "util/container.h" |
48 | #include "util/flat_containers.h" |
49 | #include "util/graph.h" |
50 | #include "util/graph_range.h" |
51 | #include "util/make_unique.h" |
52 | #include "util/order_check.h" |
53 | #include "util/report_manager.h" |
54 | #include "util/unaligned.h" |
55 | #include "util/unordered.h" |
56 | #include "util/verify_types.h" |
57 | |
58 | #include <algorithm> |
59 | #include <cstdio> |
60 | #include <cstdlib> |
61 | #include <cstring> |
62 | #include <map> |
63 | #include <memory> |
64 | #include <set> |
65 | #include <deque> |
66 | #include <vector> |
67 | |
68 | #include <boost/range/adaptor/map.hpp> |
69 | |
70 | using namespace std; |
71 | using boost::adaptors::map_keys; |
72 | |
73 | namespace ue2 { |
74 | |
75 | namespace /* anon */ { |
76 | |
77 | #define MIN_SHENG_SIZE 6 |
78 | #define INVALID_SHENG_ID 255 |
79 | |
80 | struct { |
81 | u16 = 0; |
82 | bool = false; |
83 | bool = false; |
84 | u8 = INVALID_SHENG_ID; |
85 | }; |
86 | |
87 | struct dfa_info { |
88 | accel_dfa_build_strat &strat; |
89 | raw_dfa &raw; |
90 | vector<dstate> &states; |
91 | vector<dstate_extra> ; |
92 | const u16 alpha_size; /* including special symbols */ |
93 | const array<u16, ALPHABET_SIZE> &alpha_remap; |
94 | vector<CharReach> rev_alpha; |
95 | const u16 impl_alpha_size; |
96 | |
97 | u8 getAlphaShift() const; |
98 | |
99 | explicit dfa_info(accel_dfa_build_strat &s) |
100 | : strat(s), |
101 | raw(s.get_raw()), |
102 | states(raw.states), |
103 | extra(raw.states.size()), |
104 | alpha_size(raw.alpha_size), |
105 | alpha_remap(raw.alpha_remap), |
106 | impl_alpha_size(raw.getImplAlphaSize()) { |
107 | rev_alpha.resize(impl_alpha_size); |
108 | for (u32 i = 0; i < N_CHARS; i++) { |
109 | rev_alpha[alpha_remap[i]].set(i); |
110 | } |
111 | } |
112 | |
113 | dstate_id_t implId(dstate_id_t raw_id) const { |
114 | return states[raw_id].impl_id; |
115 | } |
116 | |
117 | bool is_sherman(dstate_id_t raw_id) const { |
118 | return extra[raw_id].shermanState; |
119 | } |
120 | |
121 | bool is_sheng(dstate_id_t raw_id) const { |
122 | return extra[raw_id].sheng_id != INVALID_SHENG_ID; |
123 | } |
124 | |
125 | bool is_sheng_succ(dstate_id_t raw_id) const { |
126 | return extra[raw_id].sheng_succ; |
127 | } |
128 | |
129 | /* states which use the normal transition/successor table */ |
130 | bool is_normal(dstate_id_t raw_id) const { |
131 | return raw_id != DEAD_STATE && !is_sheng(raw_id) && !is_sherman(raw_id); |
132 | } |
133 | size_t size(void) const { return states.size(); } |
134 | }; |
135 | |
136 | u8 dfa_info::getAlphaShift() const { |
137 | if (impl_alpha_size < 2) { |
138 | return 1; |
139 | } else { |
140 | /* log2 round up */ |
141 | return 32 - clz32(impl_alpha_size - 1); |
142 | } |
143 | } |
144 | |
145 | } // namespace |
146 | |
147 | static |
148 | mstate_aux *getAux(NFA *n, dstate_id_t i) { |
149 | mcsheng *m = (mcsheng *)getMutableImplNfa(n); |
150 | mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); |
151 | |
152 | mstate_aux *aux = aux_base + i; |
153 | assert((const char *)aux < (const char *)n + m->length); |
154 | return aux; |
155 | } |
156 | |
157 | static |
158 | void createShuffleMasks(mcsheng *m, const dfa_info &info, |
159 | dstate_id_t sheng_end, |
160 | const map<dstate_id_t, AccelScheme> &accel_escape_info) { |
161 | DEBUG_PRINTF("using first %hu states for a sheng\n" , sheng_end); |
162 | assert(sheng_end > DEAD_STATE + 1); |
163 | assert(sheng_end <= sizeof(m128) + 1); |
164 | vector<array<u8, sizeof(m128)>> masks; |
165 | masks.resize(info.alpha_size); |
166 | /* -1 to avoid wasting a slot as we do not include dead state */ |
167 | vector<dstate_id_t> raw_ids; |
168 | raw_ids.resize(sheng_end - 1); |
169 | for (dstate_id_t s = DEAD_STATE + 1; s < info.states.size(); s++) { |
170 | assert(info.implId(s)); /* should not map to DEAD_STATE */ |
171 | if (info.is_sheng(s)) { |
172 | raw_ids[info.extra[s].sheng_id] = s; |
173 | } |
174 | } |
175 | for (u32 i = 0; i < info.alpha_size; i++) { |
176 | if (i == info.alpha_remap[TOP]) { |
177 | continue; |
178 | } |
179 | auto &mask = masks[i]; |
180 | assert(sizeof(mask) == sizeof(m128)); |
181 | mask.fill(0); |
182 | |
183 | for (dstate_id_t sheng_id = 0; sheng_id < sheng_end - 1; sheng_id++) { |
184 | dstate_id_t raw_id = raw_ids[sheng_id]; |
185 | dstate_id_t next_id = info.implId(info.states[raw_id].next[i]); |
186 | if (next_id == DEAD_STATE) { |
187 | next_id = sheng_end - 1; |
188 | } else if (next_id < sheng_end) { |
189 | next_id--; |
190 | } |
191 | DEBUG_PRINTF("%hu: %u->next %hu\n" , sheng_id, i, next_id); |
192 | mask[sheng_id] = verify_u8(next_id); |
193 | } |
194 | } |
195 | for (u32 i = 0; i < N_CHARS; i++) { |
196 | assert(info.alpha_remap[i] != info.alpha_remap[TOP]); |
197 | memcpy((u8 *)&m->sheng_masks[i], |
198 | (u8 *)masks[info.alpha_remap[i]].data(), sizeof(m128)); |
199 | } |
200 | m->sheng_end = sheng_end; |
201 | m->sheng_accel_limit = sheng_end - 1; |
202 | |
203 | for (dstate_id_t s : raw_ids) { |
204 | if (contains(accel_escape_info, s)) { |
205 | LIMIT_TO_AT_MOST(&m->sheng_accel_limit, info.extra[s].sheng_id); |
206 | } |
207 | } |
208 | } |
209 | |
210 | static |
211 | void populateBasicInfo(size_t state_size, const dfa_info &info, |
212 | u32 total_size, u32 aux_offset, u32 accel_offset, |
213 | u32 accel_count, ReportID arb, bool single, NFA *nfa) { |
214 | assert(state_size == sizeof(u16) || state_size == sizeof(u8)); |
215 | |
216 | nfa->length = total_size; |
217 | nfa->nPositions = info.states.size(); |
218 | |
219 | nfa->scratchStateSize = verify_u32(state_size); |
220 | nfa->streamStateSize = verify_u32(state_size); |
221 | |
222 | if (state_size == sizeof(u8)) { |
223 | nfa->type = MCSHENG_NFA_8; |
224 | } else { |
225 | nfa->type = MCSHENG_NFA_16; |
226 | } |
227 | |
228 | mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); |
229 | for (u32 i = 0; i < 256; i++) { |
230 | m->remap[i] = verify_u8(info.alpha_remap[i]); |
231 | } |
232 | m->alphaShift = info.getAlphaShift(); |
233 | m->length = total_size; |
234 | m->aux_offset = aux_offset; |
235 | m->accel_offset = accel_offset; |
236 | m->arb_report = arb; |
237 | m->state_count = verify_u16(info.size()); |
238 | m->start_anchored = info.implId(info.raw.start_anchored); |
239 | m->start_floating = info.implId(info.raw.start_floating); |
240 | m->has_accel = accel_count ? 1 : 0; |
241 | |
242 | if (single) { |
243 | m->flags |= MCSHENG_FLAG_SINGLE; |
244 | } |
245 | } |
246 | |
247 | static |
248 | size_t calcShermanRegionSize(const dfa_info &info) { |
249 | size_t rv = 0; |
250 | |
251 | for (size_t i = 0; i < info.size(); i++) { |
252 | if (info.is_sherman(i)) { |
253 | rv += SHERMAN_FIXED_SIZE; |
254 | } |
255 | } |
256 | |
257 | return ROUNDUP_16(rv); |
258 | } |
259 | |
260 | static |
261 | void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, |
262 | const vector<u32> &reports, const vector<u32> &reports_eod, |
263 | const vector<u32> &reportOffsets) { |
264 | const dstate &raw_state = info.states[i]; |
265 | aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]]; |
266 | aux->accept_eod = raw_state.reports_eod.empty() ? 0 |
267 | : reportOffsets[reports_eod[i]]; |
268 | aux->top = info.implId(i ? raw_state.next[info.alpha_remap[TOP]] |
269 | : info.raw.start_floating); |
270 | } |
271 | |
272 | /* returns false on error */ |
273 | static |
274 | bool allocateImplId16(dfa_info &info, dstate_id_t sheng_end, |
275 | dstate_id_t *sherman_base) { |
276 | info.states[0].impl_id = 0; /* dead is always 0 */ |
277 | |
278 | vector<dstate_id_t> norm; |
279 | vector<dstate_id_t> sherm; |
280 | vector<dstate_id_t> norm_sheng_succ; |
281 | vector<dstate_id_t> sherm_sheng_succ; |
282 | |
283 | if (info.size() > (1 << 16)) { |
284 | DEBUG_PRINTF("too many states\n" ); |
285 | *sherman_base = 0; |
286 | return false; |
287 | } |
288 | |
289 | for (u32 i = 1; i < info.size(); i++) { |
290 | if (info.is_sheng(i)) { |
291 | continue; /* sheng impl ids have already been allocated */ |
292 | } if (info.is_sherman(i)) { |
293 | if (info.is_sheng_succ(i)) { |
294 | sherm_sheng_succ.push_back(i); |
295 | } else { |
296 | sherm.push_back(i); |
297 | } |
298 | } else { |
299 | if (info.is_sheng_succ(i)) { |
300 | norm_sheng_succ.push_back(i); |
301 | } else { |
302 | norm.push_back(i); |
303 | } |
304 | } |
305 | } |
306 | |
307 | dstate_id_t next_norm = sheng_end; |
308 | for (dstate_id_t s : norm_sheng_succ) { |
309 | info.states[s].impl_id = next_norm++; |
310 | } |
311 | if (next_norm + norm.size() + sherm_sheng_succ.size() > UINT8_MAX) { |
312 | /* we need to give sheng_succs ids which fit into a u8 -- demote these |
313 | * to normal states */ |
314 | for (dstate_id_t s : sherm_sheng_succ) { |
315 | info.states[s].impl_id = next_norm++; |
316 | info.extra[s].shermanState = false; |
317 | } |
318 | sherm_sheng_succ.clear(); |
319 | } |
320 | for (dstate_id_t s : norm) { |
321 | info.states[s].impl_id = next_norm++; |
322 | } |
323 | |
324 | *sherman_base = next_norm; |
325 | dstate_id_t next_sherman = next_norm; |
326 | |
327 | for (dstate_id_t s : sherm_sheng_succ) { |
328 | info.states[s].impl_id = next_sherman++; |
329 | } |
330 | |
331 | for (dstate_id_t s : sherm) { |
332 | info.states[s].impl_id = next_sherman++; |
333 | } |
334 | |
335 | /* Check to see if we haven't over allocated our states */ |
336 | DEBUG_PRINTF("next sherman %u masked %u\n" , next_sherman, |
337 | (dstate_id_t)(next_sherman & STATE_MASK)); |
338 | return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK); |
339 | } |
340 | |
341 | typedef RdfaGraph::vertex_descriptor RdfaVertex; |
342 | |
343 | static |
344 | bool mark_sheng_succs(const RdfaGraph &g, dfa_info &info, |
345 | const flat_set<RdfaVertex> &sheng_states) { |
346 | u32 exit_count = 0; |
347 | |
348 | for (auto v : sheng_states) { |
349 | dstate_id_t s = g[v].index; |
350 | for (u32 i = 0; i != info.alpha_size; i++) { |
351 | if (i == info.alpha_remap[TOP]) { |
352 | continue; |
353 | } |
354 | dstate_id_t next = info.states[s].next[i]; |
355 | if (!next || info.is_sheng(next) || info.is_sheng_succ(next)) { |
356 | continue; |
357 | } |
358 | exit_count++; |
359 | info.extra[next].sheng_succ = true; |
360 | } |
361 | } |
362 | |
363 | if (exit_count + sheng_states.size() < UINT8_MAX) { |
364 | return true; |
365 | } else { |
366 | DEBUG_PRINTF("fail: unable to fit %u exits in byte" , exit_count); |
367 | return false; |
368 | } |
369 | } |
370 | |
371 | static |
372 | CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) { |
373 | CharReach rv; |
374 | for (u32 i = 0; i < info.impl_alpha_size; i++) { |
375 | if (info.raw.states[u].next[i] == v) { |
376 | assert(info.rev_alpha[i].any()); |
377 | rv |= info.rev_alpha[i]; |
378 | } |
379 | } |
380 | assert(rv.any()); |
381 | return rv; |
382 | } |
383 | |
384 | #define MAX_SHENG_STATES 16 |
385 | #define MAX_SHENG_LEAKINESS 0.05 |
386 | |
387 | using LeakinessCache = ue2_unordered_map<pair<RdfaVertex, u32>, double>; |
388 | |
389 | /** |
390 | * Returns the proportion of strings of length 'depth' which will leave the |
391 | * sheng region when starting at state 'u'. |
392 | */ |
393 | static |
394 | double leakiness(const RdfaGraph &g, dfa_info &info, |
395 | const flat_set<RdfaVertex> &sheng_states, RdfaVertex u, |
396 | u32 depth, LeakinessCache &cache) { |
397 | double rv = 0; |
398 | if (contains(cache, make_pair(u, depth))) { |
399 | return cache[make_pair(u, depth)]; |
400 | } |
401 | for (RdfaVertex v : adjacent_vertices_range(u, g)) { |
402 | if (g[v].index == DEAD_STATE) { |
403 | continue; |
404 | } |
405 | double width = get_edge_reach(g[u].index, g[v].index, info).count(); |
406 | width /= N_CHARS; |
407 | |
408 | double weight; |
409 | if (!contains(sheng_states, v)) { |
410 | weight = 1; |
411 | } else if (depth > 1) { |
412 | weight = leakiness(g, info, sheng_states, v, depth - 1, cache); |
413 | } else { |
414 | continue; /* weight = 0 */ |
415 | } |
416 | rv += width * weight; |
417 | } |
418 | |
419 | cache[make_pair(u, depth)] = rv; |
420 | DEBUG_PRINTF("%zu [%u] q = %g\n" , g[u].index, depth, rv); |
421 | return rv; |
422 | } |
423 | |
424 | /** |
425 | * Returns the proportion of 8 byte strings which will leave the sheng region |
426 | * when starting at state 'u'. |
427 | */ |
428 | static |
429 | double leakiness(const RdfaGraph &g, dfa_info &info, |
430 | const flat_set<RdfaVertex> &sheng_states, RdfaVertex u) { |
431 | LeakinessCache cache; |
432 | double rv = leakiness(g, info, sheng_states, u, 8, cache); |
433 | return rv; |
434 | } |
435 | |
436 | static |
437 | dstate_id_t find_sheng_states(dfa_info &info, |
438 | map<dstate_id_t, AccelScheme> &accel_escape_info) { |
439 | RdfaGraph g(info.raw); |
440 | auto cyclics = find_vertices_in_cycles(g); |
441 | |
442 | auto base_cyclic = RdfaGraph::null_vertex(); |
443 | for (const auto &v : cyclics) { |
444 | if (g[v].index == DEAD_STATE) { |
445 | continue; |
446 | } |
447 | DEBUG_PRINTF("considering cyclic %zu\n" , g[v].index); |
448 | /* get an estimate of stickness of the cyclic: assume any edges from |
449 | * states with larger state ids are back edges */ |
450 | CharReach est_back_reach; |
451 | for (const auto &u : inv_adjacent_vertices_range(v, g)) { |
452 | if (g[u].index < g[v].index) { |
453 | continue; |
454 | } |
455 | est_back_reach |= get_edge_reach(g[u].index, g[v].index, info); |
456 | } |
457 | |
458 | if (est_back_reach.count() < 30) { |
459 | continue; |
460 | } |
461 | base_cyclic = v; |
462 | break; |
463 | } |
464 | if (!base_cyclic) { |
465 | return DEAD_STATE; |
466 | } |
467 | |
468 | flat_set<RdfaVertex> sheng_states; |
469 | deque<RdfaVertex> to_consider = { base_cyclic }; |
470 | flat_set<dstate_id_t> considered = { DEAD_STATE }; |
471 | bool seen_back_edge = false; |
472 | while (!to_consider.empty() |
473 | && sheng_states.size() < MAX_SHENG_STATES) { |
474 | auto v = to_consider.front(); |
475 | to_consider.pop_front(); |
476 | if (!considered.insert(g[v].index).second) { |
477 | continue; |
478 | } |
479 | |
480 | assert(!contains(sheng_states, v)); |
481 | |
482 | if (generates_callbacks(info.raw.kind) |
483 | && !info.states[g[v].index].reports.empty()) { |
484 | /* cannot raise callbacks from sheng region */ |
485 | continue; |
486 | } |
487 | |
488 | sheng_states.insert(v); |
489 | for (const auto &t : adjacent_vertices_range(v, g)) { |
490 | if (!contains(considered, g[t].index)) { |
491 | to_consider.push_back(t); |
492 | } |
493 | if (t == base_cyclic) { |
494 | seen_back_edge = true; |
495 | } |
496 | } |
497 | } |
498 | |
499 | /* allocate normal ids */ |
500 | dstate_id_t sheng_end = DEAD_STATE + 1; |
501 | for (auto v : sheng_states) { |
502 | dstate_id_t s = g[v].index; |
503 | if (!contains(accel_escape_info, s)) { |
504 | info.states[s].impl_id = sheng_end++; |
505 | info.extra[s].sheng_id = info.states[s].impl_id - 1; |
506 | } |
507 | } |
508 | |
509 | /* allocate accel ids */ |
510 | for (auto v : sheng_states) { |
511 | dstate_id_t s = g[v].index; |
512 | if (contains(accel_escape_info, s)) { |
513 | assert(!info.states[s].impl_id); |
514 | info.states[s].impl_id = sheng_end++; |
515 | info.extra[s].sheng_id = info.states[s].impl_id - 1; |
516 | } |
517 | } |
518 | |
519 | if (sheng_states.size() < MIN_SHENG_SIZE) { |
520 | DEBUG_PRINTF("sheng region too small\n" ); |
521 | return DEAD_STATE; |
522 | } |
523 | |
524 | if (!seen_back_edge) { |
525 | DEBUG_PRINTF("did not include cyclic\n" ); |
526 | return DEAD_STATE; |
527 | } |
528 | |
529 | double leak = leakiness(g, info, sheng_states, base_cyclic); |
530 | if (leak > MAX_SHENG_LEAKINESS) { |
531 | DEBUG_PRINTF("too leaky (%g)\n" , leak); |
532 | return DEAD_STATE; |
533 | } |
534 | |
535 | if (!mark_sheng_succs(g, info, sheng_states)) { |
536 | return DEAD_STATE; |
537 | } |
538 | |
539 | /* TODO: ensure sufficiently 'sticky' */ |
540 | /* TODO: check not all states accel */ |
541 | DEBUG_PRINTF("sheng_end = %hu\n" , sheng_end); |
542 | return sheng_end; |
543 | } |
544 | |
545 | static |
546 | void fill_in_aux_info(NFA *nfa, const dfa_info &info, |
547 | const map<dstate_id_t, AccelScheme> &accel_escape_info, |
548 | u32 accel_offset, UNUSED u32 accel_end_offset, |
549 | const vector<u32> &reports, |
550 | const vector<u32> &reports_eod, |
551 | u32 report_base_offset, |
552 | const raw_report_info &ri) { |
553 | mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); |
554 | |
555 | vector<u32> reportOffsets; |
556 | |
557 | ri.fillReportLists(nfa, report_base_offset, reportOffsets); |
558 | |
559 | for (u32 i = 0; i < info.size(); i++) { |
560 | u16 impl_id = info.implId(i); |
561 | mstate_aux *this_aux = getAux(nfa, impl_id); |
562 | |
563 | fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets); |
564 | if (contains(accel_escape_info, i)) { |
565 | this_aux->accel_offset = accel_offset; |
566 | accel_offset += info.strat.accelSize(); |
567 | assert(accel_offset <= accel_end_offset); |
568 | assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); |
569 | info.strat.buildAccel(i, accel_escape_info.at(i), |
570 | (void *)((char *)m + this_aux->accel_offset)); |
571 | } |
572 | } |
573 | } |
574 | |
575 | static |
576 | u16 get_edge_flags(NFA *nfa, dstate_id_t target_impl_id) { |
577 | mstate_aux *aux = getAux(nfa, target_impl_id); |
578 | u16 flags = 0; |
579 | |
580 | if (aux->accept) { |
581 | flags |= ACCEPT_FLAG; |
582 | } |
583 | |
584 | if (aux->accel_offset) { |
585 | flags |= ACCEL_FLAG; |
586 | } |
587 | |
588 | return flags; |
589 | } |
590 | |
591 | static |
592 | void fill_in_succ_table_16(NFA *nfa, const dfa_info &info, |
593 | dstate_id_t sheng_end, |
594 | UNUSED dstate_id_t sherman_base) { |
595 | u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng)); |
596 | |
597 | u8 alphaShift = info.getAlphaShift(); |
598 | assert(alphaShift <= 8); |
599 | |
600 | for (size_t i = 0; i < info.size(); i++) { |
601 | if (!info.is_normal(i)) { |
602 | assert(info.implId(i) < sheng_end || info.is_sherman(i)); |
603 | continue; |
604 | } |
605 | |
606 | assert(info.implId(i) < sherman_base); |
607 | u16 normal_id = verify_u16(info.implId(i) - sheng_end); |
608 | |
609 | for (size_t s = 0; s < info.impl_alpha_size; s++) { |
610 | dstate_id_t raw_succ = info.states[i].next[s]; |
611 | u16 &entry = succ_table[((size_t)normal_id << alphaShift) + s]; |
612 | |
613 | entry = info.implId(raw_succ); |
614 | entry |= get_edge_flags(nfa, entry); |
615 | } |
616 | } |
617 | } |
618 | |
619 | #define MAX_SHERMAN_LIST_LEN 8 |
620 | |
621 | static |
622 | void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate, |
623 | dstate_id_t max) { |
624 | if (candidate < max) { |
625 | dest.insert(candidate); |
626 | } |
627 | } |
628 | |
629 | static |
630 | void addSuccessors(flat_set<dstate_id_t> &dest, const dstate &source, |
631 | u16 alphasize, dstate_id_t curr_id) { |
632 | for (symbol_t s = 0; s < alphasize; s++) { |
633 | addIfEarlier(dest, source.next[s], curr_id); |
634 | } |
635 | } |
636 | |
637 | /* \brief Returns a set of states to search for a better daddy. */ |
638 | static |
639 | flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info, |
640 | dstate_id_t curr_id) { |
641 | flat_set<dstate_id_t> hinted; |
642 | |
643 | addIfEarlier(hinted, 0, curr_id); |
644 | addIfEarlier(hinted, info.raw.start_anchored, curr_id); |
645 | addIfEarlier(hinted, info.raw.start_floating, curr_id); |
646 | |
647 | // Add existing daddy and his successors, then search back one generation. |
648 | const u16 alphasize = info.impl_alpha_size; |
649 | dstate_id_t daddy = info.states[curr_id].daddy; |
650 | for (u32 level = 0; daddy && level < 2; level++) { |
651 | addIfEarlier(hinted, daddy, curr_id); |
652 | addSuccessors(hinted, info.states[daddy], alphasize, curr_id); |
653 | daddy = info.states[daddy].daddy; |
654 | } |
655 | |
656 | return hinted; |
657 | } |
658 | |
659 | #define MAX_SHERMAN_SELF_LOOP 20 |
660 | |
661 | static |
662 | void find_better_daddy(dfa_info &info, dstate_id_t curr_id, |
663 | bool any_cyclic_near_anchored_state, const Grey &grey) { |
664 | if (!grey.allowShermanStates) { |
665 | return; |
666 | } |
667 | |
668 | const u16 width = sizeof(u16); |
669 | const u16 alphasize = info.impl_alpha_size; |
670 | |
671 | if (info.raw.start_anchored != DEAD_STATE |
672 | && any_cyclic_near_anchored_state |
673 | && curr_id < alphasize * 3) { |
674 | /* crude attempt to prevent frequent states from being sherman'ed |
675 | * depends on the fact that states are numbers are currently in bfs |
676 | * order */ |
677 | DEBUG_PRINTF("%hu is banned\n" , curr_id); |
678 | return; |
679 | } |
680 | |
681 | if (info.raw.start_floating != DEAD_STATE |
682 | && curr_id >= info.raw.start_floating |
683 | && curr_id < info.raw.start_floating + alphasize * 3) { |
684 | /* crude attempt to prevent frequent states from being sherman'ed |
685 | * depends on the fact that states are numbers are currently in bfs |
686 | * order */ |
687 | DEBUG_PRINTF("%hu is banned (%hu)\n" , curr_id, info.raw.start_floating); |
688 | return; |
689 | } |
690 | |
691 | const u16 full_state_size = width * alphasize; |
692 | const u16 max_list_len = MIN(MAX_SHERMAN_LIST_LEN, |
693 | (full_state_size - 2)/(width + 1)); |
694 | u16 best_score = 0; |
695 | dstate_id_t best_daddy = 0; |
696 | dstate &currState = info.states[curr_id]; |
697 | |
698 | flat_set<dstate_id_t> hinted = find_daddy_candidates(info, curr_id); |
699 | |
700 | for (const dstate_id_t &donor : hinted) { |
701 | assert(donor < curr_id); |
702 | u32 score = 0; |
703 | |
704 | if (!info.is_normal(donor)) { |
705 | continue; |
706 | } |
707 | |
708 | const dstate &donorState = info.states[donor]; |
709 | for (symbol_t s = 0; s < alphasize; s++) { |
710 | if (currState.next[s] == donorState.next[s]) { |
711 | score++; |
712 | } |
713 | } |
714 | |
715 | /* prefer lower ids to provide some stability amongst potential |
716 | * siblings */ |
717 | if (score > best_score || (score == best_score && donor < best_daddy)) { |
718 | best_daddy = donor; |
719 | best_score = score; |
720 | |
721 | if (score == alphasize) { |
722 | break; |
723 | } |
724 | } |
725 | } |
726 | |
727 | currState.daddy = best_daddy; |
728 | info.extra[curr_id].daddytaken = best_score; |
729 | DEBUG_PRINTF("%hu -> daddy %hu: %u/%u BF\n" , curr_id, best_daddy, |
730 | best_score, alphasize); |
731 | |
732 | if (best_daddy == DEAD_STATE) { |
733 | return; /* No good daddy */ |
734 | } |
735 | |
736 | if (best_score + max_list_len < alphasize) { |
737 | return; /* ??? */ |
738 | } |
739 | |
740 | assert(info.is_normal(currState.daddy)); |
741 | |
742 | u32 self_loop_width = 0; |
743 | const dstate &curr_raw = info.states[curr_id]; |
744 | for (unsigned i = 0; i < N_CHARS; i++) { |
745 | if (curr_raw.next[info.alpha_remap[i]] == curr_id) { |
746 | self_loop_width++; |
747 | } |
748 | } |
749 | |
750 | if (self_loop_width > MAX_SHERMAN_SELF_LOOP) { |
751 | DEBUG_PRINTF("%hu is banned wide self loop (%u)\n" , curr_id, |
752 | self_loop_width); |
753 | return; |
754 | } |
755 | |
756 | if (info.is_sheng(curr_id)) { |
757 | return; |
758 | } |
759 | |
760 | DEBUG_PRINTF("%hu is sherman\n" , curr_id); |
761 | info.extra[curr_id].shermanState = true; |
762 | } |
763 | |
764 | static |
765 | bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { |
766 | symbol_t alphasize = raw.getImplAlphaSize(); |
767 | for (symbol_t s = 0; s < alphasize; s++) { |
768 | dstate_id_t succ_id = raw.states[root].next[s]; |
769 | if (succ_id == DEAD_STATE) { |
770 | continue; |
771 | } |
772 | |
773 | const dstate &succ = raw.states[succ_id]; |
774 | for (symbol_t t = 0; t < alphasize; t++) { |
775 | if (succ.next[t] == root || succ.next[t] == succ_id) { |
776 | return true; |
777 | } |
778 | } |
779 | } |
780 | return false; |
781 | } |
782 | |
783 | static |
784 | void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { |
785 | char *nfa_base = (char *)nfa; |
786 | mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); |
787 | char *sherman_table = nfa_base + m->sherman_offset; |
788 | |
789 | assert(ISALIGNED_16(sherman_table)); |
790 | for (size_t i = 0; i < info.size(); i++) { |
791 | if (!info.is_sherman(i)) { |
792 | continue; |
793 | } |
794 | u16 fs = verify_u16(info.implId(i)); |
795 | DEBUG_PRINTF("building sherman %zu impl %hu\n" , i, fs); |
796 | |
797 | assert(fs >= sherman_limit); |
798 | |
799 | char *curr_sherman_entry |
800 | = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE; |
801 | assert(curr_sherman_entry <= nfa_base + m->length); |
802 | |
803 | u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken); |
804 | assert(len <= 9); |
805 | dstate_id_t d = info.states[i].daddy; |
806 | |
807 | *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE; |
808 | *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len; |
809 | *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d); |
810 | u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET); |
811 | |
812 | for (u16 s = 0; s < info.impl_alpha_size; s++) { |
813 | if (info.states[i].next[s] != info.states[d].next[s]) { |
814 | *(chars++) = (u8)s; |
815 | } |
816 | } |
817 | |
818 | u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len)); |
819 | for (u16 s = 0; s < info.impl_alpha_size; s++) { |
820 | if (info.states[i].next[s] != info.states[d].next[s]) { |
821 | DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n" , fs, |
822 | info.implId(d), |
823 | info.implId(info.states[i].next[s])); |
824 | u16 entry_val = info.implId(info.states[i].next[s]); |
825 | entry_val |= get_edge_flags(nfa, entry_val); |
826 | unaligned_store_u16((u8 *)states++, entry_val); |
827 | } |
828 | } |
829 | } |
830 | } |
831 | |
832 | static |
833 | bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, |
834 | const map<dstate_id_t, AccelScheme> &accel_escape_info, |
835 | const Grey &grey) { |
836 | DEBUG_PRINTF("building mcsheng 16\n" ); |
837 | |
838 | vector<u32> reports; /* index in ri for the appropriate report list */ |
839 | vector<u32> reports_eod; /* as above */ |
840 | ReportID arb; |
841 | u8 single; |
842 | |
843 | assert(info.getAlphaShift() <= 8); |
844 | |
845 | u16 total_daddy = 0; |
846 | for (u32 i = 0; i < info.size(); i++) { |
847 | find_better_daddy(info, i, |
848 | is_cyclic_near(info.raw, info.raw.start_anchored), |
849 | grey); |
850 | total_daddy += info.extra[i].daddytaken; |
851 | } |
852 | |
853 | DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n" , total_daddy, |
854 | info.size() * info.impl_alpha_size, info.size(), |
855 | info.impl_alpha_size); |
856 | |
857 | u16 sherman_limit; |
858 | if (!allocateImplId16(info, sheng_end, &sherman_limit)) { |
859 | DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n" , |
860 | info.size()); |
861 | return nullptr; |
862 | } |
863 | u16 count_real_states = sherman_limit - sheng_end; |
864 | |
865 | auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); |
866 | |
867 | size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16) |
868 | * count_real_states; |
869 | |
870 | size_t aux_size = sizeof(mstate_aux) * info.size(); |
871 | |
872 | size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size); |
873 | size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); |
874 | size_t accel_offset = ROUNDUP_N(aux_offset + aux_size |
875 | + ri->getReportListSize(), 32); |
876 | size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); |
877 | size_t sherman_size = calcShermanRegionSize(info); |
878 | |
879 | size_t total_size = sherman_offset + sherman_size; |
880 | |
881 | accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ |
882 | assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); |
883 | |
884 | auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); |
885 | mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); |
886 | |
887 | populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, |
888 | accel_escape_info.size(), arb, single, nfa.get()); |
889 | createShuffleMasks(m, info, sheng_end, accel_escape_info); |
890 | |
891 | /* copy in the mc header information */ |
892 | m->sherman_offset = sherman_offset; |
893 | m->sherman_end = total_size; |
894 | m->sherman_limit = sherman_limit; |
895 | |
896 | DEBUG_PRINTF("%hu sheng, %hu norm, %zu total\n" , sheng_end, |
897 | count_real_states, info.size()); |
898 | |
899 | fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset, |
900 | sherman_offset - sizeof(NFA), reports, reports_eod, |
901 | aux_offset + aux_size, *ri); |
902 | |
903 | fill_in_succ_table_16(nfa.get(), info, sheng_end, sherman_limit); |
904 | |
905 | fill_in_sherman(nfa.get(), info, sherman_limit); |
906 | |
907 | return nfa; |
908 | } |
909 | |
910 | static |
911 | void fill_in_succ_table_8(NFA *nfa, const dfa_info &info, |
912 | dstate_id_t sheng_end) { |
913 | u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng); |
914 | |
915 | u8 alphaShift = info.getAlphaShift(); |
916 | assert(alphaShift <= 8); |
917 | |
918 | for (size_t i = 0; i < info.size(); i++) { |
919 | assert(!info.is_sherman(i)); |
920 | if (!info.is_normal(i)) { |
921 | assert(info.implId(i) < sheng_end); |
922 | continue; |
923 | } |
924 | u8 normal_id = verify_u8(info.implId(i) - sheng_end); |
925 | |
926 | for (size_t s = 0; s < info.impl_alpha_size; s++) { |
927 | dstate_id_t raw_succ = info.states[i].next[s]; |
928 | succ_table[((size_t)normal_id << alphaShift) + s] |
929 | = info.implId(raw_succ); |
930 | } |
931 | } |
932 | } |
933 | |
934 | static |
935 | void allocateImplId8(dfa_info &info, dstate_id_t sheng_end, |
936 | const map<dstate_id_t, AccelScheme> &accel_escape_info, |
937 | u16 *accel_limit, u16 *accept_limit) { |
938 | info.states[0].impl_id = 0; /* dead is always 0 */ |
939 | |
940 | vector<dstate_id_t> norm; |
941 | vector<dstate_id_t> accel; |
942 | vector<dstate_id_t> accept; |
943 | |
944 | assert(info.size() <= (1 << 8)); |
945 | |
946 | for (u32 i = 1; i < info.size(); i++) { |
947 | if (info.is_sheng(i)) { |
948 | continue; /* already allocated */ |
949 | } else if (!info.states[i].reports.empty()) { |
950 | accept.push_back(i); |
951 | } else if (contains(accel_escape_info, i)) { |
952 | accel.push_back(i); |
953 | } else { |
954 | norm.push_back(i); |
955 | } |
956 | } |
957 | |
958 | u32 j = sheng_end; |
959 | for (const dstate_id_t &s : norm) { |
960 | assert(j <= 256); |
961 | DEBUG_PRINTF("mapping state %u to %u\n" , s, j); |
962 | info.states[s].impl_id = j++; |
963 | } |
964 | *accel_limit = j; |
965 | for (const dstate_id_t &s : accel) { |
966 | assert(j <= 256); |
967 | DEBUG_PRINTF("mapping state %u to %u\n" , s, j); |
968 | info.states[s].impl_id = j++; |
969 | } |
970 | *accept_limit = j; |
971 | for (const dstate_id_t &s : accept) { |
972 | assert(j <= 256); |
973 | DEBUG_PRINTF("mapping state %u to %u\n" , s, j); |
974 | info.states[s].impl_id = j++; |
975 | } |
976 | } |
977 | |
978 | static |
979 | bytecode_ptr<NFA> mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, |
980 | const map<dstate_id_t, AccelScheme> &accel_escape_info) { |
981 | DEBUG_PRINTF("building mcsheng 8\n" ); |
982 | |
983 | vector<u32> reports; |
984 | vector<u32> reports_eod; |
985 | ReportID arb; |
986 | u8 single; |
987 | |
988 | auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); |
989 | |
990 | size_t normal_count = info.size() - sheng_end; |
991 | |
992 | size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * normal_count; |
993 | size_t aux_size = sizeof(mstate_aux) * info.size(); |
994 | size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size); |
995 | size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); |
996 | size_t accel_offset = ROUNDUP_N(aux_offset + aux_size |
997 | + ri->getReportListSize(), 32); |
998 | size_t total_size = accel_offset + accel_size; |
999 | |
1000 | DEBUG_PRINTF("aux_size %zu\n" , aux_size); |
1001 | DEBUG_PRINTF("aux_offset %zu\n" , aux_offset); |
1002 | DEBUG_PRINTF("rl size %u\n" , ri->getReportListSize()); |
1003 | DEBUG_PRINTF("accel_size %zu\n" , accel_size); |
1004 | DEBUG_PRINTF("accel_offset %zu\n" , accel_offset); |
1005 | DEBUG_PRINTF("total_size %zu\n" , total_size); |
1006 | |
1007 | accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ |
1008 | assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); |
1009 | |
1010 | auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); |
1011 | mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); |
1012 | |
1013 | allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8, |
1014 | &m->accept_limit_8); |
1015 | |
1016 | populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset, |
1017 | accel_escape_info.size(), arb, single, nfa.get()); |
1018 | createShuffleMasks(m, info, sheng_end, accel_escape_info); |
1019 | |
1020 | fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset, |
1021 | total_size - sizeof(NFA), reports, reports_eod, |
1022 | aux_offset + aux_size, *ri); |
1023 | |
1024 | fill_in_succ_table_8(nfa.get(), info, sheng_end); |
1025 | |
1026 | DEBUG_PRINTF("rl size %zu\n" , ri->size()); |
1027 | |
1028 | return nfa; |
1029 | } |
1030 | |
1031 | bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc, |
1032 | const ReportManager &rm) { |
1033 | if (!cc.grey.allowMcSheng) { |
1034 | return nullptr; |
1035 | } |
1036 | |
1037 | mcclellan_build_strat mbs(raw, rm, false); |
1038 | dfa_info info(mbs); |
1039 | bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; |
1040 | |
1041 | if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming |
1042 | * mode with our semantics */ |
1043 | raw.stripExtraEodReports(); |
1044 | } |
1045 | |
1046 | bool has_eod_reports = raw.hasEodReports(); |
1047 | |
1048 | map<dstate_id_t, AccelScheme> accel_escape_info |
1049 | = info.strat.getAccelInfo(cc.grey); |
1050 | |
1051 | dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info); |
1052 | if (sheng_end <= DEAD_STATE + 1) { |
1053 | return nullptr; |
1054 | } |
1055 | |
1056 | bytecode_ptr<NFA> nfa; |
1057 | if (!using8bit) { |
1058 | nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey); |
1059 | } else { |
1060 | nfa = mcshengCompile8(info, sheng_end, accel_escape_info); |
1061 | } |
1062 | |
1063 | if (!nfa) { |
1064 | return nfa; |
1065 | } |
1066 | |
1067 | if (has_eod_reports) { |
1068 | nfa->flags |= NFA_ACCEPTS_EOD; |
1069 | } |
1070 | |
1071 | DEBUG_PRINTF("compile done\n" ); |
1072 | return nfa; |
1073 | } |
1074 | |
1075 | bool has_accel_mcsheng(const NFA *) { |
1076 | return true; /* consider the sheng region as accelerated */ |
1077 | } |
1078 | |
1079 | } // namespace ue2 |
1080 | |