| 1 | /* |
| 2 | * Copyright (c) 2016-2017, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #include "mcsheng_compile.h" |
| 30 | |
| 31 | #include "accel.h" |
| 32 | #include "accelcompile.h" |
| 33 | #include "grey.h" |
| 34 | #include "mcclellancompile.h" |
| 35 | #include "mcclellancompile_util.h" |
| 36 | #include "mcsheng_internal.h" |
| 37 | #include "nfa_internal.h" |
| 38 | #include "rdfa_graph.h" |
| 39 | #include "shufticompile.h" |
| 40 | #include "trufflecompile.h" |
| 41 | #include "ue2common.h" |
| 42 | #include "util/alloc.h" |
| 43 | #include "util/bitutils.h" |
| 44 | #include "util/charreach.h" |
| 45 | #include "util/compare.h" |
| 46 | #include "util/compile_context.h" |
| 47 | #include "util/container.h" |
| 48 | #include "util/flat_containers.h" |
| 49 | #include "util/graph.h" |
| 50 | #include "util/graph_range.h" |
| 51 | #include "util/make_unique.h" |
| 52 | #include "util/order_check.h" |
| 53 | #include "util/report_manager.h" |
| 54 | #include "util/unaligned.h" |
| 55 | #include "util/unordered.h" |
| 56 | #include "util/verify_types.h" |
| 57 | |
| 58 | #include <algorithm> |
| 59 | #include <cstdio> |
| 60 | #include <cstdlib> |
| 61 | #include <cstring> |
| 62 | #include <map> |
| 63 | #include <memory> |
| 64 | #include <set> |
| 65 | #include <deque> |
| 66 | #include <vector> |
| 67 | |
| 68 | #include <boost/range/adaptor/map.hpp> |
| 69 | |
| 70 | using namespace std; |
| 71 | using boost::adaptors::map_keys; |
| 72 | |
| 73 | namespace ue2 { |
| 74 | |
| 75 | namespace /* anon */ { |
| 76 | |
| 77 | #define MIN_SHENG_SIZE 6 |
| 78 | #define INVALID_SHENG_ID 255 |
| 79 | |
| 80 | struct { |
| 81 | u16 = 0; |
| 82 | bool = false; |
| 83 | bool = false; |
| 84 | u8 = INVALID_SHENG_ID; |
| 85 | }; |
| 86 | |
| 87 | struct dfa_info { |
| 88 | accel_dfa_build_strat &strat; |
| 89 | raw_dfa &raw; |
| 90 | vector<dstate> &states; |
| 91 | vector<dstate_extra> ; |
| 92 | const u16 alpha_size; /* including special symbols */ |
| 93 | const array<u16, ALPHABET_SIZE> &alpha_remap; |
| 94 | vector<CharReach> rev_alpha; |
| 95 | const u16 impl_alpha_size; |
| 96 | |
| 97 | u8 getAlphaShift() const; |
| 98 | |
| 99 | explicit dfa_info(accel_dfa_build_strat &s) |
| 100 | : strat(s), |
| 101 | raw(s.get_raw()), |
| 102 | states(raw.states), |
| 103 | extra(raw.states.size()), |
| 104 | alpha_size(raw.alpha_size), |
| 105 | alpha_remap(raw.alpha_remap), |
| 106 | impl_alpha_size(raw.getImplAlphaSize()) { |
| 107 | rev_alpha.resize(impl_alpha_size); |
| 108 | for (u32 i = 0; i < N_CHARS; i++) { |
| 109 | rev_alpha[alpha_remap[i]].set(i); |
| 110 | } |
| 111 | } |
| 112 | |
| 113 | dstate_id_t implId(dstate_id_t raw_id) const { |
| 114 | return states[raw_id].impl_id; |
| 115 | } |
| 116 | |
| 117 | bool is_sherman(dstate_id_t raw_id) const { |
| 118 | return extra[raw_id].shermanState; |
| 119 | } |
| 120 | |
| 121 | bool is_sheng(dstate_id_t raw_id) const { |
| 122 | return extra[raw_id].sheng_id != INVALID_SHENG_ID; |
| 123 | } |
| 124 | |
| 125 | bool is_sheng_succ(dstate_id_t raw_id) const { |
| 126 | return extra[raw_id].sheng_succ; |
| 127 | } |
| 128 | |
| 129 | /* states which use the normal transition/successor table */ |
| 130 | bool is_normal(dstate_id_t raw_id) const { |
| 131 | return raw_id != DEAD_STATE && !is_sheng(raw_id) && !is_sherman(raw_id); |
| 132 | } |
| 133 | size_t size(void) const { return states.size(); } |
| 134 | }; |
| 135 | |
| 136 | u8 dfa_info::getAlphaShift() const { |
| 137 | if (impl_alpha_size < 2) { |
| 138 | return 1; |
| 139 | } else { |
| 140 | /* log2 round up */ |
| 141 | return 32 - clz32(impl_alpha_size - 1); |
| 142 | } |
| 143 | } |
| 144 | |
| 145 | } // namespace |
| 146 | |
| 147 | static |
| 148 | mstate_aux *getAux(NFA *n, dstate_id_t i) { |
| 149 | mcsheng *m = (mcsheng *)getMutableImplNfa(n); |
| 150 | mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); |
| 151 | |
| 152 | mstate_aux *aux = aux_base + i; |
| 153 | assert((const char *)aux < (const char *)n + m->length); |
| 154 | return aux; |
| 155 | } |
| 156 | |
| 157 | static |
| 158 | void createShuffleMasks(mcsheng *m, const dfa_info &info, |
| 159 | dstate_id_t sheng_end, |
| 160 | const map<dstate_id_t, AccelScheme> &accel_escape_info) { |
| 161 | DEBUG_PRINTF("using first %hu states for a sheng\n" , sheng_end); |
| 162 | assert(sheng_end > DEAD_STATE + 1); |
| 163 | assert(sheng_end <= sizeof(m128) + 1); |
| 164 | vector<array<u8, sizeof(m128)>> masks; |
| 165 | masks.resize(info.alpha_size); |
| 166 | /* -1 to avoid wasting a slot as we do not include dead state */ |
| 167 | vector<dstate_id_t> raw_ids; |
| 168 | raw_ids.resize(sheng_end - 1); |
| 169 | for (dstate_id_t s = DEAD_STATE + 1; s < info.states.size(); s++) { |
| 170 | assert(info.implId(s)); /* should not map to DEAD_STATE */ |
| 171 | if (info.is_sheng(s)) { |
| 172 | raw_ids[info.extra[s].sheng_id] = s; |
| 173 | } |
| 174 | } |
| 175 | for (u32 i = 0; i < info.alpha_size; i++) { |
| 176 | if (i == info.alpha_remap[TOP]) { |
| 177 | continue; |
| 178 | } |
| 179 | auto &mask = masks[i]; |
| 180 | assert(sizeof(mask) == sizeof(m128)); |
| 181 | mask.fill(0); |
| 182 | |
| 183 | for (dstate_id_t sheng_id = 0; sheng_id < sheng_end - 1; sheng_id++) { |
| 184 | dstate_id_t raw_id = raw_ids[sheng_id]; |
| 185 | dstate_id_t next_id = info.implId(info.states[raw_id].next[i]); |
| 186 | if (next_id == DEAD_STATE) { |
| 187 | next_id = sheng_end - 1; |
| 188 | } else if (next_id < sheng_end) { |
| 189 | next_id--; |
| 190 | } |
| 191 | DEBUG_PRINTF("%hu: %u->next %hu\n" , sheng_id, i, next_id); |
| 192 | mask[sheng_id] = verify_u8(next_id); |
| 193 | } |
| 194 | } |
| 195 | for (u32 i = 0; i < N_CHARS; i++) { |
| 196 | assert(info.alpha_remap[i] != info.alpha_remap[TOP]); |
| 197 | memcpy((u8 *)&m->sheng_masks[i], |
| 198 | (u8 *)masks[info.alpha_remap[i]].data(), sizeof(m128)); |
| 199 | } |
| 200 | m->sheng_end = sheng_end; |
| 201 | m->sheng_accel_limit = sheng_end - 1; |
| 202 | |
| 203 | for (dstate_id_t s : raw_ids) { |
| 204 | if (contains(accel_escape_info, s)) { |
| 205 | LIMIT_TO_AT_MOST(&m->sheng_accel_limit, info.extra[s].sheng_id); |
| 206 | } |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | static |
| 211 | void populateBasicInfo(size_t state_size, const dfa_info &info, |
| 212 | u32 total_size, u32 aux_offset, u32 accel_offset, |
| 213 | u32 accel_count, ReportID arb, bool single, NFA *nfa) { |
| 214 | assert(state_size == sizeof(u16) || state_size == sizeof(u8)); |
| 215 | |
| 216 | nfa->length = total_size; |
| 217 | nfa->nPositions = info.states.size(); |
| 218 | |
| 219 | nfa->scratchStateSize = verify_u32(state_size); |
| 220 | nfa->streamStateSize = verify_u32(state_size); |
| 221 | |
| 222 | if (state_size == sizeof(u8)) { |
| 223 | nfa->type = MCSHENG_NFA_8; |
| 224 | } else { |
| 225 | nfa->type = MCSHENG_NFA_16; |
| 226 | } |
| 227 | |
| 228 | mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); |
| 229 | for (u32 i = 0; i < 256; i++) { |
| 230 | m->remap[i] = verify_u8(info.alpha_remap[i]); |
| 231 | } |
| 232 | m->alphaShift = info.getAlphaShift(); |
| 233 | m->length = total_size; |
| 234 | m->aux_offset = aux_offset; |
| 235 | m->accel_offset = accel_offset; |
| 236 | m->arb_report = arb; |
| 237 | m->state_count = verify_u16(info.size()); |
| 238 | m->start_anchored = info.implId(info.raw.start_anchored); |
| 239 | m->start_floating = info.implId(info.raw.start_floating); |
| 240 | m->has_accel = accel_count ? 1 : 0; |
| 241 | |
| 242 | if (single) { |
| 243 | m->flags |= MCSHENG_FLAG_SINGLE; |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | static |
| 248 | size_t calcShermanRegionSize(const dfa_info &info) { |
| 249 | size_t rv = 0; |
| 250 | |
| 251 | for (size_t i = 0; i < info.size(); i++) { |
| 252 | if (info.is_sherman(i)) { |
| 253 | rv += SHERMAN_FIXED_SIZE; |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | return ROUNDUP_16(rv); |
| 258 | } |
| 259 | |
| 260 | static |
| 261 | void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, |
| 262 | const vector<u32> &reports, const vector<u32> &reports_eod, |
| 263 | const vector<u32> &reportOffsets) { |
| 264 | const dstate &raw_state = info.states[i]; |
| 265 | aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]]; |
| 266 | aux->accept_eod = raw_state.reports_eod.empty() ? 0 |
| 267 | : reportOffsets[reports_eod[i]]; |
| 268 | aux->top = info.implId(i ? raw_state.next[info.alpha_remap[TOP]] |
| 269 | : info.raw.start_floating); |
| 270 | } |
| 271 | |
| 272 | /* returns false on error */ |
| 273 | static |
| 274 | bool allocateImplId16(dfa_info &info, dstate_id_t sheng_end, |
| 275 | dstate_id_t *sherman_base) { |
| 276 | info.states[0].impl_id = 0; /* dead is always 0 */ |
| 277 | |
| 278 | vector<dstate_id_t> norm; |
| 279 | vector<dstate_id_t> sherm; |
| 280 | vector<dstate_id_t> norm_sheng_succ; |
| 281 | vector<dstate_id_t> sherm_sheng_succ; |
| 282 | |
| 283 | if (info.size() > (1 << 16)) { |
| 284 | DEBUG_PRINTF("too many states\n" ); |
| 285 | *sherman_base = 0; |
| 286 | return false; |
| 287 | } |
| 288 | |
| 289 | for (u32 i = 1; i < info.size(); i++) { |
| 290 | if (info.is_sheng(i)) { |
| 291 | continue; /* sheng impl ids have already been allocated */ |
| 292 | } if (info.is_sherman(i)) { |
| 293 | if (info.is_sheng_succ(i)) { |
| 294 | sherm_sheng_succ.push_back(i); |
| 295 | } else { |
| 296 | sherm.push_back(i); |
| 297 | } |
| 298 | } else { |
| 299 | if (info.is_sheng_succ(i)) { |
| 300 | norm_sheng_succ.push_back(i); |
| 301 | } else { |
| 302 | norm.push_back(i); |
| 303 | } |
| 304 | } |
| 305 | } |
| 306 | |
| 307 | dstate_id_t next_norm = sheng_end; |
| 308 | for (dstate_id_t s : norm_sheng_succ) { |
| 309 | info.states[s].impl_id = next_norm++; |
| 310 | } |
| 311 | if (next_norm + norm.size() + sherm_sheng_succ.size() > UINT8_MAX) { |
| 312 | /* we need to give sheng_succs ids which fit into a u8 -- demote these |
| 313 | * to normal states */ |
| 314 | for (dstate_id_t s : sherm_sheng_succ) { |
| 315 | info.states[s].impl_id = next_norm++; |
| 316 | info.extra[s].shermanState = false; |
| 317 | } |
| 318 | sherm_sheng_succ.clear(); |
| 319 | } |
| 320 | for (dstate_id_t s : norm) { |
| 321 | info.states[s].impl_id = next_norm++; |
| 322 | } |
| 323 | |
| 324 | *sherman_base = next_norm; |
| 325 | dstate_id_t next_sherman = next_norm; |
| 326 | |
| 327 | for (dstate_id_t s : sherm_sheng_succ) { |
| 328 | info.states[s].impl_id = next_sherman++; |
| 329 | } |
| 330 | |
| 331 | for (dstate_id_t s : sherm) { |
| 332 | info.states[s].impl_id = next_sherman++; |
| 333 | } |
| 334 | |
| 335 | /* Check to see if we haven't over allocated our states */ |
| 336 | DEBUG_PRINTF("next sherman %u masked %u\n" , next_sherman, |
| 337 | (dstate_id_t)(next_sherman & STATE_MASK)); |
| 338 | return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK); |
| 339 | } |
| 340 | |
| 341 | typedef RdfaGraph::vertex_descriptor RdfaVertex; |
| 342 | |
| 343 | static |
| 344 | bool mark_sheng_succs(const RdfaGraph &g, dfa_info &info, |
| 345 | const flat_set<RdfaVertex> &sheng_states) { |
| 346 | u32 exit_count = 0; |
| 347 | |
| 348 | for (auto v : sheng_states) { |
| 349 | dstate_id_t s = g[v].index; |
| 350 | for (u32 i = 0; i != info.alpha_size; i++) { |
| 351 | if (i == info.alpha_remap[TOP]) { |
| 352 | continue; |
| 353 | } |
| 354 | dstate_id_t next = info.states[s].next[i]; |
| 355 | if (!next || info.is_sheng(next) || info.is_sheng_succ(next)) { |
| 356 | continue; |
| 357 | } |
| 358 | exit_count++; |
| 359 | info.extra[next].sheng_succ = true; |
| 360 | } |
| 361 | } |
| 362 | |
| 363 | if (exit_count + sheng_states.size() < UINT8_MAX) { |
| 364 | return true; |
| 365 | } else { |
| 366 | DEBUG_PRINTF("fail: unable to fit %u exits in byte" , exit_count); |
| 367 | return false; |
| 368 | } |
| 369 | } |
| 370 | |
| 371 | static |
| 372 | CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) { |
| 373 | CharReach rv; |
| 374 | for (u32 i = 0; i < info.impl_alpha_size; i++) { |
| 375 | if (info.raw.states[u].next[i] == v) { |
| 376 | assert(info.rev_alpha[i].any()); |
| 377 | rv |= info.rev_alpha[i]; |
| 378 | } |
| 379 | } |
| 380 | assert(rv.any()); |
| 381 | return rv; |
| 382 | } |
| 383 | |
| 384 | #define MAX_SHENG_STATES 16 |
| 385 | #define MAX_SHENG_LEAKINESS 0.05 |
| 386 | |
| 387 | using LeakinessCache = ue2_unordered_map<pair<RdfaVertex, u32>, double>; |
| 388 | |
| 389 | /** |
| 390 | * Returns the proportion of strings of length 'depth' which will leave the |
| 391 | * sheng region when starting at state 'u'. |
| 392 | */ |
| 393 | static |
| 394 | double leakiness(const RdfaGraph &g, dfa_info &info, |
| 395 | const flat_set<RdfaVertex> &sheng_states, RdfaVertex u, |
| 396 | u32 depth, LeakinessCache &cache) { |
| 397 | double rv = 0; |
| 398 | if (contains(cache, make_pair(u, depth))) { |
| 399 | return cache[make_pair(u, depth)]; |
| 400 | } |
| 401 | for (RdfaVertex v : adjacent_vertices_range(u, g)) { |
| 402 | if (g[v].index == DEAD_STATE) { |
| 403 | continue; |
| 404 | } |
| 405 | double width = get_edge_reach(g[u].index, g[v].index, info).count(); |
| 406 | width /= N_CHARS; |
| 407 | |
| 408 | double weight; |
| 409 | if (!contains(sheng_states, v)) { |
| 410 | weight = 1; |
| 411 | } else if (depth > 1) { |
| 412 | weight = leakiness(g, info, sheng_states, v, depth - 1, cache); |
| 413 | } else { |
| 414 | continue; /* weight = 0 */ |
| 415 | } |
| 416 | rv += width * weight; |
| 417 | } |
| 418 | |
| 419 | cache[make_pair(u, depth)] = rv; |
| 420 | DEBUG_PRINTF("%zu [%u] q = %g\n" , g[u].index, depth, rv); |
| 421 | return rv; |
| 422 | } |
| 423 | |
| 424 | /** |
| 425 | * Returns the proportion of 8 byte strings which will leave the sheng region |
| 426 | * when starting at state 'u'. |
| 427 | */ |
| 428 | static |
| 429 | double leakiness(const RdfaGraph &g, dfa_info &info, |
| 430 | const flat_set<RdfaVertex> &sheng_states, RdfaVertex u) { |
| 431 | LeakinessCache cache; |
| 432 | double rv = leakiness(g, info, sheng_states, u, 8, cache); |
| 433 | return rv; |
| 434 | } |
| 435 | |
| 436 | static |
| 437 | dstate_id_t find_sheng_states(dfa_info &info, |
| 438 | map<dstate_id_t, AccelScheme> &accel_escape_info) { |
| 439 | RdfaGraph g(info.raw); |
| 440 | auto cyclics = find_vertices_in_cycles(g); |
| 441 | |
| 442 | auto base_cyclic = RdfaGraph::null_vertex(); |
| 443 | for (const auto &v : cyclics) { |
| 444 | if (g[v].index == DEAD_STATE) { |
| 445 | continue; |
| 446 | } |
| 447 | DEBUG_PRINTF("considering cyclic %zu\n" , g[v].index); |
| 448 | /* get an estimate of stickness of the cyclic: assume any edges from |
| 449 | * states with larger state ids are back edges */ |
| 450 | CharReach est_back_reach; |
| 451 | for (const auto &u : inv_adjacent_vertices_range(v, g)) { |
| 452 | if (g[u].index < g[v].index) { |
| 453 | continue; |
| 454 | } |
| 455 | est_back_reach |= get_edge_reach(g[u].index, g[v].index, info); |
| 456 | } |
| 457 | |
| 458 | if (est_back_reach.count() < 30) { |
| 459 | continue; |
| 460 | } |
| 461 | base_cyclic = v; |
| 462 | break; |
| 463 | } |
| 464 | if (!base_cyclic) { |
| 465 | return DEAD_STATE; |
| 466 | } |
| 467 | |
| 468 | flat_set<RdfaVertex> sheng_states; |
| 469 | deque<RdfaVertex> to_consider = { base_cyclic }; |
| 470 | flat_set<dstate_id_t> considered = { DEAD_STATE }; |
| 471 | bool seen_back_edge = false; |
| 472 | while (!to_consider.empty() |
| 473 | && sheng_states.size() < MAX_SHENG_STATES) { |
| 474 | auto v = to_consider.front(); |
| 475 | to_consider.pop_front(); |
| 476 | if (!considered.insert(g[v].index).second) { |
| 477 | continue; |
| 478 | } |
| 479 | |
| 480 | assert(!contains(sheng_states, v)); |
| 481 | |
| 482 | if (generates_callbacks(info.raw.kind) |
| 483 | && !info.states[g[v].index].reports.empty()) { |
| 484 | /* cannot raise callbacks from sheng region */ |
| 485 | continue; |
| 486 | } |
| 487 | |
| 488 | sheng_states.insert(v); |
| 489 | for (const auto &t : adjacent_vertices_range(v, g)) { |
| 490 | if (!contains(considered, g[t].index)) { |
| 491 | to_consider.push_back(t); |
| 492 | } |
| 493 | if (t == base_cyclic) { |
| 494 | seen_back_edge = true; |
| 495 | } |
| 496 | } |
| 497 | } |
| 498 | |
| 499 | /* allocate normal ids */ |
| 500 | dstate_id_t sheng_end = DEAD_STATE + 1; |
| 501 | for (auto v : sheng_states) { |
| 502 | dstate_id_t s = g[v].index; |
| 503 | if (!contains(accel_escape_info, s)) { |
| 504 | info.states[s].impl_id = sheng_end++; |
| 505 | info.extra[s].sheng_id = info.states[s].impl_id - 1; |
| 506 | } |
| 507 | } |
| 508 | |
| 509 | /* allocate accel ids */ |
| 510 | for (auto v : sheng_states) { |
| 511 | dstate_id_t s = g[v].index; |
| 512 | if (contains(accel_escape_info, s)) { |
| 513 | assert(!info.states[s].impl_id); |
| 514 | info.states[s].impl_id = sheng_end++; |
| 515 | info.extra[s].sheng_id = info.states[s].impl_id - 1; |
| 516 | } |
| 517 | } |
| 518 | |
| 519 | if (sheng_states.size() < MIN_SHENG_SIZE) { |
| 520 | DEBUG_PRINTF("sheng region too small\n" ); |
| 521 | return DEAD_STATE; |
| 522 | } |
| 523 | |
| 524 | if (!seen_back_edge) { |
| 525 | DEBUG_PRINTF("did not include cyclic\n" ); |
| 526 | return DEAD_STATE; |
| 527 | } |
| 528 | |
| 529 | double leak = leakiness(g, info, sheng_states, base_cyclic); |
| 530 | if (leak > MAX_SHENG_LEAKINESS) { |
| 531 | DEBUG_PRINTF("too leaky (%g)\n" , leak); |
| 532 | return DEAD_STATE; |
| 533 | } |
| 534 | |
| 535 | if (!mark_sheng_succs(g, info, sheng_states)) { |
| 536 | return DEAD_STATE; |
| 537 | } |
| 538 | |
| 539 | /* TODO: ensure sufficiently 'sticky' */ |
| 540 | /* TODO: check not all states accel */ |
| 541 | DEBUG_PRINTF("sheng_end = %hu\n" , sheng_end); |
| 542 | return sheng_end; |
| 543 | } |
| 544 | |
| 545 | static |
| 546 | void fill_in_aux_info(NFA *nfa, const dfa_info &info, |
| 547 | const map<dstate_id_t, AccelScheme> &accel_escape_info, |
| 548 | u32 accel_offset, UNUSED u32 accel_end_offset, |
| 549 | const vector<u32> &reports, |
| 550 | const vector<u32> &reports_eod, |
| 551 | u32 report_base_offset, |
| 552 | const raw_report_info &ri) { |
| 553 | mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); |
| 554 | |
| 555 | vector<u32> reportOffsets; |
| 556 | |
| 557 | ri.fillReportLists(nfa, report_base_offset, reportOffsets); |
| 558 | |
| 559 | for (u32 i = 0; i < info.size(); i++) { |
| 560 | u16 impl_id = info.implId(i); |
| 561 | mstate_aux *this_aux = getAux(nfa, impl_id); |
| 562 | |
| 563 | fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets); |
| 564 | if (contains(accel_escape_info, i)) { |
| 565 | this_aux->accel_offset = accel_offset; |
| 566 | accel_offset += info.strat.accelSize(); |
| 567 | assert(accel_offset <= accel_end_offset); |
| 568 | assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); |
| 569 | info.strat.buildAccel(i, accel_escape_info.at(i), |
| 570 | (void *)((char *)m + this_aux->accel_offset)); |
| 571 | } |
| 572 | } |
| 573 | } |
| 574 | |
| 575 | static |
| 576 | u16 get_edge_flags(NFA *nfa, dstate_id_t target_impl_id) { |
| 577 | mstate_aux *aux = getAux(nfa, target_impl_id); |
| 578 | u16 flags = 0; |
| 579 | |
| 580 | if (aux->accept) { |
| 581 | flags |= ACCEPT_FLAG; |
| 582 | } |
| 583 | |
| 584 | if (aux->accel_offset) { |
| 585 | flags |= ACCEL_FLAG; |
| 586 | } |
| 587 | |
| 588 | return flags; |
| 589 | } |
| 590 | |
| 591 | static |
| 592 | void fill_in_succ_table_16(NFA *nfa, const dfa_info &info, |
| 593 | dstate_id_t sheng_end, |
| 594 | UNUSED dstate_id_t sherman_base) { |
| 595 | u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng)); |
| 596 | |
| 597 | u8 alphaShift = info.getAlphaShift(); |
| 598 | assert(alphaShift <= 8); |
| 599 | |
| 600 | for (size_t i = 0; i < info.size(); i++) { |
| 601 | if (!info.is_normal(i)) { |
| 602 | assert(info.implId(i) < sheng_end || info.is_sherman(i)); |
| 603 | continue; |
| 604 | } |
| 605 | |
| 606 | assert(info.implId(i) < sherman_base); |
| 607 | u16 normal_id = verify_u16(info.implId(i) - sheng_end); |
| 608 | |
| 609 | for (size_t s = 0; s < info.impl_alpha_size; s++) { |
| 610 | dstate_id_t raw_succ = info.states[i].next[s]; |
| 611 | u16 &entry = succ_table[((size_t)normal_id << alphaShift) + s]; |
| 612 | |
| 613 | entry = info.implId(raw_succ); |
| 614 | entry |= get_edge_flags(nfa, entry); |
| 615 | } |
| 616 | } |
| 617 | } |
| 618 | |
| 619 | #define MAX_SHERMAN_LIST_LEN 8 |
| 620 | |
| 621 | static |
| 622 | void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate, |
| 623 | dstate_id_t max) { |
| 624 | if (candidate < max) { |
| 625 | dest.insert(candidate); |
| 626 | } |
| 627 | } |
| 628 | |
| 629 | static |
| 630 | void addSuccessors(flat_set<dstate_id_t> &dest, const dstate &source, |
| 631 | u16 alphasize, dstate_id_t curr_id) { |
| 632 | for (symbol_t s = 0; s < alphasize; s++) { |
| 633 | addIfEarlier(dest, source.next[s], curr_id); |
| 634 | } |
| 635 | } |
| 636 | |
| 637 | /* \brief Returns a set of states to search for a better daddy. */ |
| 638 | static |
| 639 | flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info, |
| 640 | dstate_id_t curr_id) { |
| 641 | flat_set<dstate_id_t> hinted; |
| 642 | |
| 643 | addIfEarlier(hinted, 0, curr_id); |
| 644 | addIfEarlier(hinted, info.raw.start_anchored, curr_id); |
| 645 | addIfEarlier(hinted, info.raw.start_floating, curr_id); |
| 646 | |
| 647 | // Add existing daddy and his successors, then search back one generation. |
| 648 | const u16 alphasize = info.impl_alpha_size; |
| 649 | dstate_id_t daddy = info.states[curr_id].daddy; |
| 650 | for (u32 level = 0; daddy && level < 2; level++) { |
| 651 | addIfEarlier(hinted, daddy, curr_id); |
| 652 | addSuccessors(hinted, info.states[daddy], alphasize, curr_id); |
| 653 | daddy = info.states[daddy].daddy; |
| 654 | } |
| 655 | |
| 656 | return hinted; |
| 657 | } |
| 658 | |
| 659 | #define MAX_SHERMAN_SELF_LOOP 20 |
| 660 | |
| 661 | static |
| 662 | void find_better_daddy(dfa_info &info, dstate_id_t curr_id, |
| 663 | bool any_cyclic_near_anchored_state, const Grey &grey) { |
| 664 | if (!grey.allowShermanStates) { |
| 665 | return; |
| 666 | } |
| 667 | |
| 668 | const u16 width = sizeof(u16); |
| 669 | const u16 alphasize = info.impl_alpha_size; |
| 670 | |
| 671 | if (info.raw.start_anchored != DEAD_STATE |
| 672 | && any_cyclic_near_anchored_state |
| 673 | && curr_id < alphasize * 3) { |
| 674 | /* crude attempt to prevent frequent states from being sherman'ed |
| 675 | * depends on the fact that states are numbers are currently in bfs |
| 676 | * order */ |
| 677 | DEBUG_PRINTF("%hu is banned\n" , curr_id); |
| 678 | return; |
| 679 | } |
| 680 | |
| 681 | if (info.raw.start_floating != DEAD_STATE |
| 682 | && curr_id >= info.raw.start_floating |
| 683 | && curr_id < info.raw.start_floating + alphasize * 3) { |
| 684 | /* crude attempt to prevent frequent states from being sherman'ed |
| 685 | * depends on the fact that states are numbers are currently in bfs |
| 686 | * order */ |
| 687 | DEBUG_PRINTF("%hu is banned (%hu)\n" , curr_id, info.raw.start_floating); |
| 688 | return; |
| 689 | } |
| 690 | |
| 691 | const u16 full_state_size = width * alphasize; |
| 692 | const u16 max_list_len = MIN(MAX_SHERMAN_LIST_LEN, |
| 693 | (full_state_size - 2)/(width + 1)); |
| 694 | u16 best_score = 0; |
| 695 | dstate_id_t best_daddy = 0; |
| 696 | dstate &currState = info.states[curr_id]; |
| 697 | |
| 698 | flat_set<dstate_id_t> hinted = find_daddy_candidates(info, curr_id); |
| 699 | |
| 700 | for (const dstate_id_t &donor : hinted) { |
| 701 | assert(donor < curr_id); |
| 702 | u32 score = 0; |
| 703 | |
| 704 | if (!info.is_normal(donor)) { |
| 705 | continue; |
| 706 | } |
| 707 | |
| 708 | const dstate &donorState = info.states[donor]; |
| 709 | for (symbol_t s = 0; s < alphasize; s++) { |
| 710 | if (currState.next[s] == donorState.next[s]) { |
| 711 | score++; |
| 712 | } |
| 713 | } |
| 714 | |
| 715 | /* prefer lower ids to provide some stability amongst potential |
| 716 | * siblings */ |
| 717 | if (score > best_score || (score == best_score && donor < best_daddy)) { |
| 718 | best_daddy = donor; |
| 719 | best_score = score; |
| 720 | |
| 721 | if (score == alphasize) { |
| 722 | break; |
| 723 | } |
| 724 | } |
| 725 | } |
| 726 | |
| 727 | currState.daddy = best_daddy; |
| 728 | info.extra[curr_id].daddytaken = best_score; |
| 729 | DEBUG_PRINTF("%hu -> daddy %hu: %u/%u BF\n" , curr_id, best_daddy, |
| 730 | best_score, alphasize); |
| 731 | |
| 732 | if (best_daddy == DEAD_STATE) { |
| 733 | return; /* No good daddy */ |
| 734 | } |
| 735 | |
| 736 | if (best_score + max_list_len < alphasize) { |
| 737 | return; /* ??? */ |
| 738 | } |
| 739 | |
| 740 | assert(info.is_normal(currState.daddy)); |
| 741 | |
| 742 | u32 self_loop_width = 0; |
| 743 | const dstate &curr_raw = info.states[curr_id]; |
| 744 | for (unsigned i = 0; i < N_CHARS; i++) { |
| 745 | if (curr_raw.next[info.alpha_remap[i]] == curr_id) { |
| 746 | self_loop_width++; |
| 747 | } |
| 748 | } |
| 749 | |
| 750 | if (self_loop_width > MAX_SHERMAN_SELF_LOOP) { |
| 751 | DEBUG_PRINTF("%hu is banned wide self loop (%u)\n" , curr_id, |
| 752 | self_loop_width); |
| 753 | return; |
| 754 | } |
| 755 | |
| 756 | if (info.is_sheng(curr_id)) { |
| 757 | return; |
| 758 | } |
| 759 | |
| 760 | DEBUG_PRINTF("%hu is sherman\n" , curr_id); |
| 761 | info.extra[curr_id].shermanState = true; |
| 762 | } |
| 763 | |
| 764 | static |
| 765 | bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { |
| 766 | symbol_t alphasize = raw.getImplAlphaSize(); |
| 767 | for (symbol_t s = 0; s < alphasize; s++) { |
| 768 | dstate_id_t succ_id = raw.states[root].next[s]; |
| 769 | if (succ_id == DEAD_STATE) { |
| 770 | continue; |
| 771 | } |
| 772 | |
| 773 | const dstate &succ = raw.states[succ_id]; |
| 774 | for (symbol_t t = 0; t < alphasize; t++) { |
| 775 | if (succ.next[t] == root || succ.next[t] == succ_id) { |
| 776 | return true; |
| 777 | } |
| 778 | } |
| 779 | } |
| 780 | return false; |
| 781 | } |
| 782 | |
| 783 | static |
| 784 | void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { |
| 785 | char *nfa_base = (char *)nfa; |
| 786 | mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); |
| 787 | char *sherman_table = nfa_base + m->sherman_offset; |
| 788 | |
| 789 | assert(ISALIGNED_16(sherman_table)); |
| 790 | for (size_t i = 0; i < info.size(); i++) { |
| 791 | if (!info.is_sherman(i)) { |
| 792 | continue; |
| 793 | } |
| 794 | u16 fs = verify_u16(info.implId(i)); |
| 795 | DEBUG_PRINTF("building sherman %zu impl %hu\n" , i, fs); |
| 796 | |
| 797 | assert(fs >= sherman_limit); |
| 798 | |
| 799 | char *curr_sherman_entry |
| 800 | = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE; |
| 801 | assert(curr_sherman_entry <= nfa_base + m->length); |
| 802 | |
| 803 | u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken); |
| 804 | assert(len <= 9); |
| 805 | dstate_id_t d = info.states[i].daddy; |
| 806 | |
| 807 | *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE; |
| 808 | *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len; |
| 809 | *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d); |
| 810 | u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET); |
| 811 | |
| 812 | for (u16 s = 0; s < info.impl_alpha_size; s++) { |
| 813 | if (info.states[i].next[s] != info.states[d].next[s]) { |
| 814 | *(chars++) = (u8)s; |
| 815 | } |
| 816 | } |
| 817 | |
| 818 | u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len)); |
| 819 | for (u16 s = 0; s < info.impl_alpha_size; s++) { |
| 820 | if (info.states[i].next[s] != info.states[d].next[s]) { |
| 821 | DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n" , fs, |
| 822 | info.implId(d), |
| 823 | info.implId(info.states[i].next[s])); |
| 824 | u16 entry_val = info.implId(info.states[i].next[s]); |
| 825 | entry_val |= get_edge_flags(nfa, entry_val); |
| 826 | unaligned_store_u16((u8 *)states++, entry_val); |
| 827 | } |
| 828 | } |
| 829 | } |
| 830 | } |
| 831 | |
| 832 | static |
| 833 | bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, |
| 834 | const map<dstate_id_t, AccelScheme> &accel_escape_info, |
| 835 | const Grey &grey) { |
| 836 | DEBUG_PRINTF("building mcsheng 16\n" ); |
| 837 | |
| 838 | vector<u32> reports; /* index in ri for the appropriate report list */ |
| 839 | vector<u32> reports_eod; /* as above */ |
| 840 | ReportID arb; |
| 841 | u8 single; |
| 842 | |
| 843 | assert(info.getAlphaShift() <= 8); |
| 844 | |
| 845 | u16 total_daddy = 0; |
| 846 | for (u32 i = 0; i < info.size(); i++) { |
| 847 | find_better_daddy(info, i, |
| 848 | is_cyclic_near(info.raw, info.raw.start_anchored), |
| 849 | grey); |
| 850 | total_daddy += info.extra[i].daddytaken; |
| 851 | } |
| 852 | |
| 853 | DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n" , total_daddy, |
| 854 | info.size() * info.impl_alpha_size, info.size(), |
| 855 | info.impl_alpha_size); |
| 856 | |
| 857 | u16 sherman_limit; |
| 858 | if (!allocateImplId16(info, sheng_end, &sherman_limit)) { |
| 859 | DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n" , |
| 860 | info.size()); |
| 861 | return nullptr; |
| 862 | } |
| 863 | u16 count_real_states = sherman_limit - sheng_end; |
| 864 | |
| 865 | auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); |
| 866 | |
| 867 | size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16) |
| 868 | * count_real_states; |
| 869 | |
| 870 | size_t aux_size = sizeof(mstate_aux) * info.size(); |
| 871 | |
| 872 | size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size); |
| 873 | size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); |
| 874 | size_t accel_offset = ROUNDUP_N(aux_offset + aux_size |
| 875 | + ri->getReportListSize(), 32); |
| 876 | size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); |
| 877 | size_t sherman_size = calcShermanRegionSize(info); |
| 878 | |
| 879 | size_t total_size = sherman_offset + sherman_size; |
| 880 | |
| 881 | accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ |
| 882 | assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); |
| 883 | |
| 884 | auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); |
| 885 | mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); |
| 886 | |
| 887 | populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, |
| 888 | accel_escape_info.size(), arb, single, nfa.get()); |
| 889 | createShuffleMasks(m, info, sheng_end, accel_escape_info); |
| 890 | |
| 891 | /* copy in the mc header information */ |
| 892 | m->sherman_offset = sherman_offset; |
| 893 | m->sherman_end = total_size; |
| 894 | m->sherman_limit = sherman_limit; |
| 895 | |
| 896 | DEBUG_PRINTF("%hu sheng, %hu norm, %zu total\n" , sheng_end, |
| 897 | count_real_states, info.size()); |
| 898 | |
| 899 | fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset, |
| 900 | sherman_offset - sizeof(NFA), reports, reports_eod, |
| 901 | aux_offset + aux_size, *ri); |
| 902 | |
| 903 | fill_in_succ_table_16(nfa.get(), info, sheng_end, sherman_limit); |
| 904 | |
| 905 | fill_in_sherman(nfa.get(), info, sherman_limit); |
| 906 | |
| 907 | return nfa; |
| 908 | } |
| 909 | |
| 910 | static |
| 911 | void fill_in_succ_table_8(NFA *nfa, const dfa_info &info, |
| 912 | dstate_id_t sheng_end) { |
| 913 | u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng); |
| 914 | |
| 915 | u8 alphaShift = info.getAlphaShift(); |
| 916 | assert(alphaShift <= 8); |
| 917 | |
| 918 | for (size_t i = 0; i < info.size(); i++) { |
| 919 | assert(!info.is_sherman(i)); |
| 920 | if (!info.is_normal(i)) { |
| 921 | assert(info.implId(i) < sheng_end); |
| 922 | continue; |
| 923 | } |
| 924 | u8 normal_id = verify_u8(info.implId(i) - sheng_end); |
| 925 | |
| 926 | for (size_t s = 0; s < info.impl_alpha_size; s++) { |
| 927 | dstate_id_t raw_succ = info.states[i].next[s]; |
| 928 | succ_table[((size_t)normal_id << alphaShift) + s] |
| 929 | = info.implId(raw_succ); |
| 930 | } |
| 931 | } |
| 932 | } |
| 933 | |
| 934 | static |
| 935 | void allocateImplId8(dfa_info &info, dstate_id_t sheng_end, |
| 936 | const map<dstate_id_t, AccelScheme> &accel_escape_info, |
| 937 | u16 *accel_limit, u16 *accept_limit) { |
| 938 | info.states[0].impl_id = 0; /* dead is always 0 */ |
| 939 | |
| 940 | vector<dstate_id_t> norm; |
| 941 | vector<dstate_id_t> accel; |
| 942 | vector<dstate_id_t> accept; |
| 943 | |
| 944 | assert(info.size() <= (1 << 8)); |
| 945 | |
| 946 | for (u32 i = 1; i < info.size(); i++) { |
| 947 | if (info.is_sheng(i)) { |
| 948 | continue; /* already allocated */ |
| 949 | } else if (!info.states[i].reports.empty()) { |
| 950 | accept.push_back(i); |
| 951 | } else if (contains(accel_escape_info, i)) { |
| 952 | accel.push_back(i); |
| 953 | } else { |
| 954 | norm.push_back(i); |
| 955 | } |
| 956 | } |
| 957 | |
| 958 | u32 j = sheng_end; |
| 959 | for (const dstate_id_t &s : norm) { |
| 960 | assert(j <= 256); |
| 961 | DEBUG_PRINTF("mapping state %u to %u\n" , s, j); |
| 962 | info.states[s].impl_id = j++; |
| 963 | } |
| 964 | *accel_limit = j; |
| 965 | for (const dstate_id_t &s : accel) { |
| 966 | assert(j <= 256); |
| 967 | DEBUG_PRINTF("mapping state %u to %u\n" , s, j); |
| 968 | info.states[s].impl_id = j++; |
| 969 | } |
| 970 | *accept_limit = j; |
| 971 | for (const dstate_id_t &s : accept) { |
| 972 | assert(j <= 256); |
| 973 | DEBUG_PRINTF("mapping state %u to %u\n" , s, j); |
| 974 | info.states[s].impl_id = j++; |
| 975 | } |
| 976 | } |
| 977 | |
| 978 | static |
| 979 | bytecode_ptr<NFA> mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, |
| 980 | const map<dstate_id_t, AccelScheme> &accel_escape_info) { |
| 981 | DEBUG_PRINTF("building mcsheng 8\n" ); |
| 982 | |
| 983 | vector<u32> reports; |
| 984 | vector<u32> reports_eod; |
| 985 | ReportID arb; |
| 986 | u8 single; |
| 987 | |
| 988 | auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); |
| 989 | |
| 990 | size_t normal_count = info.size() - sheng_end; |
| 991 | |
| 992 | size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * normal_count; |
| 993 | size_t aux_size = sizeof(mstate_aux) * info.size(); |
| 994 | size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size); |
| 995 | size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); |
| 996 | size_t accel_offset = ROUNDUP_N(aux_offset + aux_size |
| 997 | + ri->getReportListSize(), 32); |
| 998 | size_t total_size = accel_offset + accel_size; |
| 999 | |
| 1000 | DEBUG_PRINTF("aux_size %zu\n" , aux_size); |
| 1001 | DEBUG_PRINTF("aux_offset %zu\n" , aux_offset); |
| 1002 | DEBUG_PRINTF("rl size %u\n" , ri->getReportListSize()); |
| 1003 | DEBUG_PRINTF("accel_size %zu\n" , accel_size); |
| 1004 | DEBUG_PRINTF("accel_offset %zu\n" , accel_offset); |
| 1005 | DEBUG_PRINTF("total_size %zu\n" , total_size); |
| 1006 | |
| 1007 | accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ |
| 1008 | assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); |
| 1009 | |
| 1010 | auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); |
| 1011 | mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); |
| 1012 | |
| 1013 | allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8, |
| 1014 | &m->accept_limit_8); |
| 1015 | |
| 1016 | populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset, |
| 1017 | accel_escape_info.size(), arb, single, nfa.get()); |
| 1018 | createShuffleMasks(m, info, sheng_end, accel_escape_info); |
| 1019 | |
| 1020 | fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset, |
| 1021 | total_size - sizeof(NFA), reports, reports_eod, |
| 1022 | aux_offset + aux_size, *ri); |
| 1023 | |
| 1024 | fill_in_succ_table_8(nfa.get(), info, sheng_end); |
| 1025 | |
| 1026 | DEBUG_PRINTF("rl size %zu\n" , ri->size()); |
| 1027 | |
| 1028 | return nfa; |
| 1029 | } |
| 1030 | |
| 1031 | bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc, |
| 1032 | const ReportManager &rm) { |
| 1033 | if (!cc.grey.allowMcSheng) { |
| 1034 | return nullptr; |
| 1035 | } |
| 1036 | |
| 1037 | mcclellan_build_strat mbs(raw, rm, false); |
| 1038 | dfa_info info(mbs); |
| 1039 | bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; |
| 1040 | |
| 1041 | if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming |
| 1042 | * mode with our semantics */ |
| 1043 | raw.stripExtraEodReports(); |
| 1044 | } |
| 1045 | |
| 1046 | bool has_eod_reports = raw.hasEodReports(); |
| 1047 | |
| 1048 | map<dstate_id_t, AccelScheme> accel_escape_info |
| 1049 | = info.strat.getAccelInfo(cc.grey); |
| 1050 | |
| 1051 | dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info); |
| 1052 | if (sheng_end <= DEAD_STATE + 1) { |
| 1053 | return nullptr; |
| 1054 | } |
| 1055 | |
| 1056 | bytecode_ptr<NFA> nfa; |
| 1057 | if (!using8bit) { |
| 1058 | nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey); |
| 1059 | } else { |
| 1060 | nfa = mcshengCompile8(info, sheng_end, accel_escape_info); |
| 1061 | } |
| 1062 | |
| 1063 | if (!nfa) { |
| 1064 | return nfa; |
| 1065 | } |
| 1066 | |
| 1067 | if (has_eod_reports) { |
| 1068 | nfa->flags |= NFA_ACCEPTS_EOD; |
| 1069 | } |
| 1070 | |
| 1071 | DEBUG_PRINTF("compile done\n" ); |
| 1072 | return nfa; |
| 1073 | } |
| 1074 | |
| 1075 | bool has_accel_mcsheng(const NFA *) { |
| 1076 | return true; /* consider the sheng region as accelerated */ |
| 1077 | } |
| 1078 | |
| 1079 | } // namespace ue2 |
| 1080 | |