| 1 | /* |
| 2 | * Copyright (c) 2015-2017, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #include "accel_dfa_build_strat.h" |
| 30 | |
| 31 | #include "accel.h" |
| 32 | #include "grey.h" |
| 33 | #include "nfagraph/ng_limex_accel.h" |
| 34 | #include "shufticompile.h" |
| 35 | #include "trufflecompile.h" |
| 36 | #include "util/accel_scheme.h" |
| 37 | #include "util/charreach.h" |
| 38 | #include "util/container.h" |
| 39 | #include "util/dump_charclass.h" |
| 40 | #include "util/small_vector.h" |
| 41 | #include "util/verify_types.h" |
| 42 | |
| 43 | #include <sstream> |
| 44 | #include <unordered_map> |
| 45 | #include <unordered_set> |
| 46 | #include <vector> |
| 47 | |
| 48 | #define PATHS_LIMIT 500 |
| 49 | |
| 50 | using namespace std; |
| 51 | |
| 52 | namespace ue2 { |
| 53 | |
| 54 | namespace { |
| 55 | struct path { |
| 56 | small_vector<CharReach, MAX_ACCEL_DEPTH + 1> reach; |
| 57 | dstate_id_t dest = DEAD_STATE; |
| 58 | explicit path(dstate_id_t base) : dest(base) {} |
| 59 | }; |
| 60 | }; |
| 61 | |
| 62 | template<typename Container> |
| 63 | void dump_paths(const Container &paths) { |
| 64 | for (UNUSED const path &p : paths) { |
| 65 | DEBUG_PRINTF("[%s] -> %u\n" , describeClasses(p.reach).c_str(), p.dest); |
| 66 | } |
| 67 | DEBUG_PRINTF("%zu paths\n" , paths.size()); |
| 68 | } |
| 69 | |
| 70 | static |
| 71 | vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) { |
| 72 | vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */ |
| 73 | |
| 74 | for (u32 i = 0; i < N_CHARS; i++) { |
| 75 | rv.at(rdfa.alpha_remap[i]).set(i); |
| 76 | } |
| 77 | |
| 78 | return rv; |
| 79 | } |
| 80 | |
| 81 | static |
| 82 | bool is_useful_path(const vector<path> &good, const path &p) { |
| 83 | for (const auto &g : good) { |
| 84 | assert(g.dest == p.dest); |
| 85 | assert(g.reach.size() <= p.reach.size()); |
| 86 | auto git = g.reach.rbegin(); |
| 87 | auto pit = p.reach.rbegin(); |
| 88 | |
| 89 | for (; git != g.reach.rend(); ++git, ++pit) { |
| 90 | if (!pit->isSubsetOf(*git)) { |
| 91 | goto next; |
| 92 | } |
| 93 | } |
| 94 | DEBUG_PRINTF("better: [%s] -> %u\n" , describeClasses(g.reach).c_str(), |
| 95 | g.dest); |
| 96 | |
| 97 | return false; |
| 98 | next:; |
| 99 | } |
| 100 | |
| 101 | return true; |
| 102 | } |
| 103 | |
| 104 | static |
| 105 | path append(const path &orig, const CharReach &cr, u32 new_dest) { |
| 106 | path p(new_dest); |
| 107 | p.reach = orig.reach; |
| 108 | p.reach.push_back(cr); |
| 109 | |
| 110 | return p; |
| 111 | } |
| 112 | |
| 113 | static |
| 114 | void extend(const raw_dfa &rdfa, const vector<CharReach> &rev_map, |
| 115 | const path &p, unordered_map<u32, vector<path>> &all, |
| 116 | vector<path> &out) { |
| 117 | const dstate &s = rdfa.states[p.dest]; |
| 118 | |
| 119 | if (!p.reach.empty() && p.reach.back().none()) { |
| 120 | out.push_back(p); |
| 121 | return; |
| 122 | } |
| 123 | |
| 124 | if (!s.reports.empty()) { |
| 125 | if (generates_callbacks(rdfa.kind)) { |
| 126 | out.push_back(p); |
| 127 | return; |
| 128 | } else { |
| 129 | path pp = append(p, CharReach(), p.dest); |
| 130 | all[p.dest].push_back(pp); |
| 131 | out.push_back(move(pp)); |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | if (!s.reports_eod.empty()) { |
| 136 | path pp = append(p, CharReach(), p.dest); |
| 137 | all[p.dest].push_back(pp); |
| 138 | out.push_back(move(pp)); |
| 139 | } |
| 140 | |
| 141 | flat_map<u32, CharReach> dest; |
| 142 | for (u32 i = 0; i < rev_map.size(); i++) { |
| 143 | u32 succ = s.next[i]; |
| 144 | dest[succ] |= rev_map[i]; |
| 145 | } |
| 146 | |
| 147 | for (const auto &e : dest) { |
| 148 | path pp = append(p, e.second, e.first); |
| 149 | if (!is_useful_path(all[e.first], pp)) { |
| 150 | DEBUG_PRINTF("not useful: [%s] -> %u\n" , |
| 151 | describeClasses(pp.reach).c_str(), pp.dest); |
| 152 | continue; |
| 153 | } |
| 154 | |
| 155 | DEBUG_PRINTF("----good: [%s] -> %u\n" , |
| 156 | describeClasses(pp.reach).c_str(), pp.dest); |
| 157 | all[e.first].push_back(pp); |
| 158 | out.push_back(move(pp)); |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | static |
| 163 | vector<vector<CharReach>> generate_paths(const raw_dfa &rdfa, |
| 164 | dstate_id_t base, u32 len) { |
| 165 | const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa); |
| 166 | vector<path> paths{path(base)}; |
| 167 | unordered_map<u32, vector<path>> all; |
| 168 | all[base].push_back(path(base)); |
| 169 | for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) { |
| 170 | vector<path> next_gen; |
| 171 | for (const auto &p : paths) { |
| 172 | extend(rdfa, rev_map, p, all, next_gen); |
| 173 | } |
| 174 | |
| 175 | paths = move(next_gen); |
| 176 | } |
| 177 | |
| 178 | dump_paths(paths); |
| 179 | |
| 180 | vector<vector<CharReach>> rv; |
| 181 | rv.reserve(paths.size()); |
| 182 | for (auto &p : paths) { |
| 183 | rv.push_back(vector<CharReach>(std::make_move_iterator(p.reach.begin()), |
| 184 | std::make_move_iterator(p.reach.end()))); |
| 185 | } |
| 186 | return rv; |
| 187 | } |
| 188 | |
| 189 | static |
| 190 | AccelScheme look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, |
| 191 | u32 max_allowed_accel_offset) { |
| 192 | DEBUG_PRINTF("looking for accel for %hu\n" , base); |
| 193 | vector<vector<CharReach>> paths = |
| 194 | generate_paths(rdfa, base, max_allowed_accel_offset + 1); |
| 195 | AccelScheme as = findBestAccelScheme(paths, CharReach(), true); |
| 196 | DEBUG_PRINTF("found %s + %u\n" , describeClass(as.cr).c_str(), as.offset); |
| 197 | return as; |
| 198 | } |
| 199 | |
| 200 | static UNUSED |
| 201 | bool better(const AccelScheme &a, const AccelScheme &b) { |
| 202 | if (!a.double_byte.empty() && b.double_byte.empty()) { |
| 203 | return true; |
| 204 | } |
| 205 | |
| 206 | if (!b.double_byte.empty()) { |
| 207 | return false; |
| 208 | } |
| 209 | |
| 210 | return a.cr.count() < b.cr.count(); |
| 211 | } |
| 212 | |
| 213 | static |
| 214 | bool double_byte_ok(const AccelScheme &info) { |
| 215 | return !info.double_byte.empty() && |
| 216 | info.double_cr.count() < info.double_byte.size() && |
| 217 | info.double_cr.count() <= 2 && !info.double_byte.empty(); |
| 218 | } |
| 219 | |
| 220 | static |
| 221 | bool has_self_loop(dstate_id_t s, const raw_dfa &raw) { |
| 222 | u16 top_remap = raw.alpha_remap[TOP]; |
| 223 | for (u32 i = 0; i < raw.states[s].next.size(); i++) { |
| 224 | if (i != top_remap && raw.states[s].next[i] == s) { |
| 225 | return true; |
| 226 | } |
| 227 | } |
| 228 | return false; |
| 229 | } |
| 230 | |
| 231 | static |
| 232 | flat_set<u16> find_nonexit_symbols(const raw_dfa &rdfa, |
| 233 | const CharReach &escape) { |
| 234 | flat_set<u16> rv; |
| 235 | CharReach nonexit = ~escape; |
| 236 | for (auto i = nonexit.find_first(); i != nonexit.npos; |
| 237 | i = nonexit.find_next(i)) { |
| 238 | rv.insert(rdfa.alpha_remap[i]); |
| 239 | } |
| 240 | |
| 241 | return rv; |
| 242 | } |
| 243 | |
| 244 | static |
| 245 | dstate_id_t get_sds_or_proxy(const raw_dfa &raw) { |
| 246 | if (raw.start_floating != DEAD_STATE) { |
| 247 | DEBUG_PRINTF("has floating start\n" ); |
| 248 | return raw.start_floating; |
| 249 | } |
| 250 | |
| 251 | DEBUG_PRINTF("looking for SDS proxy\n" ); |
| 252 | |
| 253 | dstate_id_t s = raw.start_anchored; |
| 254 | |
| 255 | if (has_self_loop(s, raw)) { |
| 256 | return s; |
| 257 | } |
| 258 | |
| 259 | u16 top_remap = raw.alpha_remap[TOP]; |
| 260 | |
| 261 | std::unordered_set<dstate_id_t> seen; |
| 262 | while (true) { |
| 263 | seen.insert(s); |
| 264 | DEBUG_PRINTF("basis %hu\n" , s); |
| 265 | |
| 266 | /* check if we are connected to a state with a self loop */ |
| 267 | for (u32 i = 0; i < raw.states[s].next.size(); i++) { |
| 268 | dstate_id_t t = raw.states[s].next[i]; |
| 269 | if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) { |
| 270 | return t; |
| 271 | } |
| 272 | } |
| 273 | |
| 274 | /* find a neighbour to use as a basis for looking for the sds proxy */ |
| 275 | dstate_id_t t = DEAD_STATE; |
| 276 | for (u32 i = 0; i < raw.states[s].next.size(); i++) { |
| 277 | dstate_id_t tt = raw.states[s].next[i]; |
| 278 | if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) { |
| 279 | t = tt; |
| 280 | break; |
| 281 | } |
| 282 | } |
| 283 | |
| 284 | if (t == DEAD_STATE) { |
| 285 | /* we were unable to find a state to use as a SDS proxy */ |
| 286 | return DEAD_STATE; |
| 287 | } |
| 288 | |
| 289 | s = t; |
| 290 | } |
| 291 | } |
| 292 | |
| 293 | static |
| 294 | set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base, |
| 295 | const AccelScheme &ei) { |
| 296 | DEBUG_PRINTF("looking for region around %hu\n" , base); |
| 297 | |
| 298 | set<dstate_id_t> region = {base}; |
| 299 | |
| 300 | if (!ei.double_byte.empty()) { |
| 301 | return region; |
| 302 | } |
| 303 | |
| 304 | DEBUG_PRINTF("accel %s+%u\n" , describeClass(ei.cr).c_str(), ei.offset); |
| 305 | |
| 306 | const CharReach &escape = ei.cr; |
| 307 | auto nonexit_symbols = find_nonexit_symbols(rdfa, escape); |
| 308 | |
| 309 | vector<dstate_id_t> pending = {base}; |
| 310 | while (!pending.empty()) { |
| 311 | dstate_id_t curr = pending.back(); |
| 312 | pending.pop_back(); |
| 313 | for (auto s : nonexit_symbols) { |
| 314 | dstate_id_t t = rdfa.states[curr].next[s]; |
| 315 | if (contains(region, t)) { |
| 316 | continue; |
| 317 | } |
| 318 | |
| 319 | DEBUG_PRINTF(" %hu is in region\n" , t); |
| 320 | region.insert(t); |
| 321 | pending.push_back(t); |
| 322 | } |
| 323 | } |
| 324 | |
| 325 | return region; |
| 326 | } |
| 327 | |
| 328 | AccelScheme |
| 329 | accel_dfa_build_strat::find_escape_strings(dstate_id_t this_idx) const { |
| 330 | AccelScheme rv; |
| 331 | const raw_dfa &rdfa = get_raw(); |
| 332 | rv.cr.clear(); |
| 333 | rv.offset = 0; |
| 334 | const dstate &raw = rdfa.states[this_idx]; |
| 335 | const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa); |
| 336 | bool outs2_broken = false; |
| 337 | flat_map<dstate_id_t, CharReach> succs; |
| 338 | |
| 339 | for (u32 i = 0; i < rev_map.size(); i++) { |
| 340 | if (raw.next[i] == this_idx) { |
| 341 | continue; |
| 342 | } |
| 343 | |
| 344 | const CharReach &cr_i = rev_map.at(i); |
| 345 | |
| 346 | rv.cr |= cr_i; |
| 347 | dstate_id_t next_id = raw.next[i]; |
| 348 | |
| 349 | DEBUG_PRINTF("next is %hu\n" , next_id); |
| 350 | const dstate &raw_next = rdfa.states[next_id]; |
| 351 | |
| 352 | if (outs2_broken) { |
| 353 | continue; |
| 354 | } |
| 355 | |
| 356 | if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) { |
| 357 | DEBUG_PRINTF("leads to report\n" ); |
| 358 | outs2_broken = true; /* cannot accelerate over reports */ |
| 359 | continue; |
| 360 | } |
| 361 | succs[next_id] |= cr_i; |
| 362 | } |
| 363 | |
| 364 | if (!outs2_broken) { |
| 365 | for (const auto &e : succs) { |
| 366 | const CharReach &cr_i = e.second; |
| 367 | const dstate &raw_next = rdfa.states[e.first]; |
| 368 | |
| 369 | CharReach cr_all_j; |
| 370 | for (u32 j = 0; j < rev_map.size(); j++) { |
| 371 | if (raw_next.next[j] == raw.next[j]) { |
| 372 | continue; |
| 373 | } |
| 374 | |
| 375 | DEBUG_PRINTF("state %hu: adding sym %u -> %hu to 2 \n" , e.first, |
| 376 | j, raw_next.next[j]); |
| 377 | cr_all_j |= rev_map.at(j); |
| 378 | } |
| 379 | |
| 380 | if (cr_i.count() * cr_all_j.count() > 8) { |
| 381 | DEBUG_PRINTF("adding %zu to double_cr\n" , cr_i.count()); |
| 382 | rv.double_cr |= cr_i; |
| 383 | } else { |
| 384 | for (auto ii = cr_i.find_first(); ii != CharReach::npos; |
| 385 | ii = cr_i.find_next(ii)) { |
| 386 | for (auto jj = cr_all_j.find_first(); jj != CharReach::npos; |
| 387 | jj = cr_all_j.find_next(jj)) { |
| 388 | rv.double_byte.emplace((u8)ii, (u8)jj); |
| 389 | if (rv.double_byte.size() > 8) { |
| 390 | DEBUG_PRINTF("outs2 too big\n" ); |
| 391 | outs2_broken = true; |
| 392 | goto done; |
| 393 | } |
| 394 | } |
| 395 | } |
| 396 | } |
| 397 | } |
| 398 | |
| 399 | done: |
| 400 | assert(outs2_broken || rv.double_byte.size() <= 8); |
| 401 | if (outs2_broken) { |
| 402 | rv.double_byte.clear(); |
| 403 | } |
| 404 | } |
| 405 | |
| 406 | DEBUG_PRINTF("this %u, sds proxy %hu\n" , this_idx, get_sds_or_proxy(rdfa)); |
| 407 | DEBUG_PRINTF("broken %d\n" , outs2_broken); |
| 408 | if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) && |
| 409 | this_idx == rdfa.start_floating && this_idx != DEAD_STATE) { |
| 410 | DEBUG_PRINTF("looking for offset accel at %u\n" , this_idx); |
| 411 | auto offset = |
| 412 | look_for_offset_accel(rdfa, this_idx, max_allowed_offset_accel()); |
| 413 | DEBUG_PRINTF("width %zu vs %zu\n" , offset.cr.count(), rv.cr.count()); |
| 414 | if (double_byte_ok(offset) || offset.cr.count() < rv.cr.count()) { |
| 415 | DEBUG_PRINTF("using offset accel\n" ); |
| 416 | rv = offset; |
| 417 | } |
| 418 | } |
| 419 | |
| 420 | return rv; |
| 421 | } |
| 422 | |
| 423 | void |
| 424 | accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, |
| 425 | const AccelScheme &info, |
| 426 | void *accel_out) { |
| 427 | AccelAux *accel = (AccelAux *)accel_out; |
| 428 | |
| 429 | DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n" , info.offset, |
| 430 | info.double_offset); |
| 431 | accel->generic.offset = verify_u8(info.offset); |
| 432 | |
| 433 | if (double_byte_ok(info) && info.double_cr.none() && |
| 434 | info.double_byte.size() == 1) { |
| 435 | accel->accel_type = ACCEL_DVERM; |
| 436 | accel->dverm.c1 = info.double_byte.begin()->first; |
| 437 | accel->dverm.c2 = info.double_byte.begin()->second; |
| 438 | accel->dverm.offset = verify_u8(info.double_offset); |
| 439 | DEBUG_PRINTF("state %hu is double vermicelli\n" , this_idx); |
| 440 | return; |
| 441 | } |
| 442 | |
| 443 | if (double_byte_ok(info) && info.double_cr.none() && |
| 444 | (info.double_byte.size() == 2 || info.double_byte.size() == 4)) { |
| 445 | bool ok = true; |
| 446 | |
| 447 | assert(!info.double_byte.empty()); |
| 448 | u8 firstC = info.double_byte.begin()->first & CASE_CLEAR; |
| 449 | u8 secondC = info.double_byte.begin()->second & CASE_CLEAR; |
| 450 | |
| 451 | for (const pair<u8, u8> &p : info.double_byte) { |
| 452 | if ((p.first & CASE_CLEAR) != firstC || |
| 453 | (p.second & CASE_CLEAR) != secondC) { |
| 454 | ok = false; |
| 455 | break; |
| 456 | } |
| 457 | } |
| 458 | |
| 459 | if (ok) { |
| 460 | accel->accel_type = ACCEL_DVERM_NOCASE; |
| 461 | accel->dverm.c1 = firstC; |
| 462 | accel->dverm.c2 = secondC; |
| 463 | accel->dverm.offset = verify_u8(info.double_offset); |
| 464 | DEBUG_PRINTF("state %hu is nc double vermicelli\n" , this_idx); |
| 465 | return; |
| 466 | } |
| 467 | |
| 468 | u8 m1; |
| 469 | u8 m2; |
| 470 | if (buildDvermMask(info.double_byte, &m1, &m2)) { |
| 471 | accel->accel_type = ACCEL_DVERM_MASKED; |
| 472 | accel->dverm.offset = verify_u8(info.double_offset); |
| 473 | accel->dverm.c1 = info.double_byte.begin()->first & m1; |
| 474 | accel->dverm.c2 = info.double_byte.begin()->second & m2; |
| 475 | accel->dverm.m1 = m1; |
| 476 | accel->dverm.m2 = m2; |
| 477 | DEBUG_PRINTF( |
| 478 | "building maskeddouble-vermicelli for 0x%02hhx%02hhx\n" , |
| 479 | accel->dverm.c1, accel->dverm.c2); |
| 480 | return; |
| 481 | } |
| 482 | } |
| 483 | |
| 484 | if (double_byte_ok(info) && |
| 485 | shuftiBuildDoubleMasks( |
| 486 | info.double_cr, info.double_byte, (u8 *)&accel->dshufti.lo1, |
| 487 | (u8 *)&accel->dshufti.hi1, (u8 *)&accel->dshufti.lo2, |
| 488 | (u8 *)&accel->dshufti.hi2)) { |
| 489 | accel->accel_type = ACCEL_DSHUFTI; |
| 490 | accel->dshufti.offset = verify_u8(info.double_offset); |
| 491 | DEBUG_PRINTF("state %hu is double shufti\n" , this_idx); |
| 492 | return; |
| 493 | } |
| 494 | |
| 495 | if (info.cr.none()) { |
| 496 | accel->accel_type = ACCEL_RED_TAPE; |
| 497 | DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape" |
| 498 | " from which there is no escape\n" , |
| 499 | this_idx); |
| 500 | return; |
| 501 | } |
| 502 | |
| 503 | if (info.cr.count() == 1) { |
| 504 | accel->accel_type = ACCEL_VERM; |
| 505 | accel->verm.c = info.cr.find_first(); |
| 506 | DEBUG_PRINTF("state %hu is vermicelli\n" , this_idx); |
| 507 | return; |
| 508 | } |
| 509 | |
| 510 | if (info.cr.count() == 2 && info.cr.isCaselessChar()) { |
| 511 | accel->accel_type = ACCEL_VERM_NOCASE; |
| 512 | accel->verm.c = info.cr.find_first() & CASE_CLEAR; |
| 513 | DEBUG_PRINTF("state %hu is caseless vermicelli\n" , this_idx); |
| 514 | return; |
| 515 | } |
| 516 | |
| 517 | if (info.cr.count() > max_floating_stop_char()) { |
| 518 | accel->accel_type = ACCEL_NONE; |
| 519 | DEBUG_PRINTF("state %hu is too broad\n" , this_idx); |
| 520 | return; |
| 521 | } |
| 522 | |
| 523 | accel->accel_type = ACCEL_SHUFTI; |
| 524 | if (-1 != shuftiBuildMasks(info.cr, (u8 *)&accel->shufti.lo, |
| 525 | (u8 *)&accel->shufti.hi)) { |
| 526 | DEBUG_PRINTF("state %hu is shufti\n" , this_idx); |
| 527 | return; |
| 528 | } |
| 529 | |
| 530 | assert(!info.cr.none()); |
| 531 | accel->accel_type = ACCEL_TRUFFLE; |
| 532 | truffleBuildMasks(info.cr, (u8 *)&accel->truffle.mask1, |
| 533 | (u8 *)&accel->truffle.mask2); |
| 534 | DEBUG_PRINTF("state %hu is truffle\n" , this_idx); |
| 535 | } |
| 536 | |
| 537 | map<dstate_id_t, AccelScheme> |
| 538 | accel_dfa_build_strat::getAccelInfo(const Grey &grey) { |
| 539 | map<dstate_id_t, AccelScheme> rv; |
| 540 | raw_dfa &rdfa = get_raw(); |
| 541 | if (!grey.accelerateDFA) { |
| 542 | return rv; |
| 543 | } |
| 544 | |
| 545 | dstate_id_t sds_proxy = get_sds_or_proxy(rdfa); |
| 546 | DEBUG_PRINTF("sds %hu\n" , sds_proxy); |
| 547 | |
| 548 | /* Find accel info for a single state. */ |
| 549 | auto do_state = [&](size_t i) { |
| 550 | if (i == DEAD_STATE) { |
| 551 | return; |
| 552 | } |
| 553 | |
| 554 | /* Note on report acceleration states: While we can't accelerate while |
| 555 | * we are spamming out callbacks, the QR code paths don't raise reports |
| 556 | * during scanning so they can accelerate report states. */ |
| 557 | if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) { |
| 558 | return; |
| 559 | } |
| 560 | |
| 561 | size_t single_limit = |
| 562 | i == sds_proxy ? max_floating_stop_char() : max_stop_char(); |
| 563 | DEBUG_PRINTF("inspecting %zu/%hu: %zu\n" , i, sds_proxy, single_limit); |
| 564 | |
| 565 | AccelScheme ei = find_escape_strings(i); |
| 566 | if (ei.cr.count() > single_limit) { |
| 567 | DEBUG_PRINTF("state %zu is not accelerable has %zu\n" , i, |
| 568 | ei.cr.count()); |
| 569 | return; |
| 570 | } |
| 571 | |
| 572 | DEBUG_PRINTF("state %zu should be accelerable %zu\n" , i, ei.cr.count()); |
| 573 | |
| 574 | rv[i] = ei; |
| 575 | }; |
| 576 | |
| 577 | if (only_accel_init) { |
| 578 | DEBUG_PRINTF("only computing accel for init states\n" ); |
| 579 | do_state(rdfa.start_anchored); |
| 580 | if (rdfa.start_floating != rdfa.start_anchored) { |
| 581 | do_state(rdfa.start_floating); |
| 582 | } |
| 583 | } else { |
| 584 | DEBUG_PRINTF("computing accel for all states\n" ); |
| 585 | for (size_t i = 0; i < rdfa.states.size(); i++) { |
| 586 | do_state(i); |
| 587 | } |
| 588 | } |
| 589 | |
| 590 | /* provide acceleration states to states in the region of sds */ |
| 591 | if (contains(rv, sds_proxy)) { |
| 592 | AccelScheme sds_ei = rv[sds_proxy]; |
| 593 | sds_ei.double_byte.clear(); /* region based on single byte scheme |
| 594 | * may differ from double byte */ |
| 595 | DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n" , |
| 596 | sds_ei.cr.count()); |
| 597 | auto sds_region = find_region(rdfa, sds_proxy, sds_ei); |
| 598 | for (auto s : sds_region) { |
| 599 | if (!contains(rv, s) || better(sds_ei, rv[s])) { |
| 600 | rv[s] = sds_ei; |
| 601 | } |
| 602 | } |
| 603 | } |
| 604 | |
| 605 | return rv; |
| 606 | } |
| 607 | }; |
| 608 | |