1 | /* |
2 | * Copyright (c) 2016-2018, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #include "rose_build_program.h" |
30 | |
31 | #include "rose_build_engine_blob.h" |
32 | #include "rose_build_instructions.h" |
33 | #include "rose_build_lookaround.h" |
34 | #include "rose_build_resources.h" |
35 | #include "nfa/nfa_api_queue.h" |
36 | #include "nfa/nfa_build_util.h" |
37 | #include "nfa/tamaramacompile.h" |
38 | #include "nfagraph/ng_util.h" |
39 | #include "util/charreach_util.h" |
40 | #include "util/container.h" |
41 | #include "util/compile_context.h" |
42 | #include "util/compile_error.h" |
43 | #include "util/report_manager.h" |
44 | #include "util/unordered.h" |
45 | #include "util/verify_types.h" |
46 | |
47 | #include <boost/range/adaptor/map.hpp> |
48 | |
49 | #include <algorithm> |
50 | #include <cstring> |
51 | |
52 | using namespace std; |
53 | using boost::adaptors::map_values; |
54 | using boost::adaptors::map_keys; |
55 | |
56 | namespace ue2 { |
57 | |
58 | engine_info::engine_info(const NFA *nfa, bool trans) |
59 | : type((NFAEngineType)nfa->type), accepts_eod(nfaAcceptsEod(nfa)), |
60 | stream_size(nfa->streamStateSize), |
61 | scratch_size(nfa->scratchStateSize), |
62 | scratch_align(state_alignment(*nfa)), |
63 | transient(trans) { |
64 | assert(scratch_align); |
65 | } |
66 | |
67 | left_build_info::left_build_info(u32 q, u32 l, u32 t, rose_group sm, |
68 | const std::vector<u8> &stops, u32 max_ql, |
69 | u8 cm_count, const CharReach &cm_cr) |
70 | : queue(q), lag(l), transient(t), squash_mask(sm), stopAlphabet(stops), |
71 | max_queuelen(max_ql), countingMiracleCount(cm_count), |
72 | countingMiracleReach(cm_cr) { |
73 | } |
74 | |
75 | left_build_info::left_build_info(const vector<vector<LookEntry>> &looks) |
76 | : has_lookaround(true), lookaround(looks) { |
77 | } |
78 | |
79 | using OffsetMap = RoseInstruction::OffsetMap; |
80 | |
81 | static |
82 | OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) { |
83 | OffsetMap offset_map; |
84 | u32 offset = 0; |
85 | for (const auto &ri : program) { |
86 | offset = ROUNDUP_N(offset, ROSE_INSTR_MIN_ALIGN); |
87 | DEBUG_PRINTF("instr %p (opcode %d) -> offset %u\n" , ri.get(), |
88 | ri->code(), offset); |
89 | assert(!contains(offset_map, ri.get())); |
90 | offset_map.emplace(ri.get(), offset); |
91 | offset += ri->byte_length(); |
92 | } |
93 | *total_len = offset; |
94 | return offset_map; |
95 | } |
96 | |
97 | RoseProgram::RoseProgram() { |
98 | prog.push_back(ue2::make_unique<RoseInstrEnd>()); |
99 | } |
100 | |
101 | RoseProgram::~RoseProgram() = default; |
102 | |
103 | RoseProgram::RoseProgram(RoseProgram &&) = default; |
104 | RoseProgram &RoseProgram::operator=(RoseProgram &&) = default; |
105 | |
106 | bool RoseProgram::empty() const { |
107 | assert(!prog.empty()); |
108 | assert(prog.back()->code() == ROSE_INSTR_END); |
109 | // Empty if we only have one element, the END instruction. |
110 | return next(prog.begin()) == prog.end(); |
111 | } |
112 | |
113 | const RoseInstruction *RoseProgram::end_instruction() const { |
114 | assert(!prog.empty()); |
115 | assert(prog.back()->code() == ROSE_INSTR_END); |
116 | |
117 | return prog.back().get(); |
118 | } |
119 | |
120 | void RoseProgram::update_targets(RoseProgram::iterator it, |
121 | RoseProgram::iterator it_end, |
122 | const RoseInstruction *old_target, |
123 | const RoseInstruction *new_target) { |
124 | assert(old_target && new_target && old_target != new_target); |
125 | for (; it != it_end; ++it) { |
126 | unique_ptr<RoseInstruction> &ri = *it; |
127 | assert(ri); |
128 | ri->update_target(old_target, new_target); |
129 | } |
130 | } |
131 | |
132 | RoseProgram::iterator RoseProgram::insert(RoseProgram::iterator it, |
133 | unique_ptr<RoseInstruction> ri) { |
134 | assert(!prog.empty()); |
135 | assert(it != end()); |
136 | assert(prog.back()->code() == ROSE_INSTR_END); |
137 | |
138 | return prog.insert(it, move(ri)); |
139 | } |
140 | |
141 | RoseProgram::iterator RoseProgram::insert(RoseProgram::iterator it, |
142 | RoseProgram &&block) { |
143 | assert(!prog.empty()); |
144 | assert(it != end()); |
145 | assert(prog.back()->code() == ROSE_INSTR_END); |
146 | |
147 | if (block.empty()) { |
148 | return it; |
149 | } |
150 | |
151 | const RoseInstruction *end_ptr = block.end_instruction(); |
152 | assert(end_ptr->code() == ROSE_INSTR_END); |
153 | block.prog.pop_back(); |
154 | |
155 | const RoseInstruction *new_target = it->get(); |
156 | update_targets(block.prog.begin(), block.prog.end(), end_ptr, new_target); |
157 | |
158 | // Workaround: container insert() for ranges doesn't return an iterator |
159 | // in the version of the STL distributed with gcc 4.8. |
160 | auto dist = distance(prog.begin(), it); |
161 | prog.insert(it, make_move_iterator(block.prog.begin()), |
162 | make_move_iterator(block.prog.end())); |
163 | it = prog.begin(); |
164 | advance(it, dist); |
165 | return it; |
166 | } |
167 | |
168 | RoseProgram::iterator RoseProgram::erase(RoseProgram::iterator first, |
169 | RoseProgram::iterator last) { |
170 | return prog.erase(first, last); |
171 | } |
172 | |
173 | void RoseProgram::add_before_end(std::unique_ptr<RoseInstruction> ri) { |
174 | assert(!prog.empty()); |
175 | insert(std::prev(prog.end()), std::move(ri)); |
176 | } |
177 | |
178 | void RoseProgram::add_before_end(RoseProgram &&block) { |
179 | assert(!prog.empty()); |
180 | assert(prog.back()->code() == ROSE_INSTR_END); |
181 | |
182 | if (block.empty()) { |
183 | return; |
184 | } |
185 | |
186 | insert(prev(prog.end()), move(block)); |
187 | } |
188 | |
189 | void RoseProgram::add_block(RoseProgram &&block) { |
190 | assert(!prog.empty()); |
191 | assert(prog.back()->code() == ROSE_INSTR_END); |
192 | |
193 | if (block.empty()) { |
194 | return; |
195 | } |
196 | |
197 | // Replace pointers to the current END with pointers to the first |
198 | // instruction in the new sequence. |
199 | const RoseInstruction *end_ptr = end_instruction(); |
200 | prog.pop_back(); |
201 | update_targets(prog.begin(), prog.end(), end_ptr, |
202 | block.prog.front().get()); |
203 | prog.insert(prog.end(), make_move_iterator(block.prog.begin()), |
204 | make_move_iterator(block.prog.end())); |
205 | } |
206 | |
207 | bytecode_ptr<char> writeProgram(RoseEngineBlob &blob, |
208 | const RoseProgram &program) { |
209 | u32 total_len = 0; |
210 | const auto offset_map = makeOffsetMap(program, &total_len); |
211 | DEBUG_PRINTF("%zu instructions, len %u\n" , program.size(), total_len); |
212 | |
213 | auto bytecode = make_zeroed_bytecode_ptr<char>(total_len, |
214 | ROSE_INSTR_MIN_ALIGN); |
215 | char *ptr = bytecode.get(); |
216 | |
217 | for (const auto &ri : program) { |
218 | assert(contains(offset_map, ri.get())); |
219 | const u32 offset = offset_map.at(ri.get()); |
220 | ri->write(ptr + offset, blob, offset_map); |
221 | } |
222 | |
223 | return bytecode; |
224 | } |
225 | |
226 | size_t RoseProgramHash::operator()(const RoseProgram &program) const { |
227 | size_t v = 0; |
228 | for (const auto &ri : program) { |
229 | assert(ri); |
230 | hash_combine(v, ri->hash()); |
231 | } |
232 | return v; |
233 | } |
234 | |
235 | bool RoseProgramEquivalence::operator()(const RoseProgram &prog1, |
236 | const RoseProgram &prog2) const { |
237 | if (prog1.size() != prog2.size()) { |
238 | return false; |
239 | } |
240 | |
241 | u32 len_1 = 0, len_2 = 0; |
242 | const auto offset_map_1 = makeOffsetMap(prog1, &len_1); |
243 | const auto offset_map_2 = makeOffsetMap(prog2, &len_2); |
244 | |
245 | if (len_1 != len_2) { |
246 | return false; |
247 | } |
248 | |
249 | auto is_equiv = [&](const unique_ptr<RoseInstruction> &a, |
250 | const unique_ptr<RoseInstruction> &b) { |
251 | assert(a && b); |
252 | return a->equiv(*b, offset_map_1, offset_map_2); |
253 | }; |
254 | |
255 | return std::equal(prog1.begin(), prog1.end(), prog2.begin(), is_equiv); |
256 | } |
257 | |
258 | /* Removes any CHECK_HANDLED instructions from the given program */ |
259 | static |
260 | void stripCheckHandledInstruction(RoseProgram &prog) { |
261 | for (auto it = prog.begin(); it != prog.end();) { |
262 | auto ins = dynamic_cast<const RoseInstrCheckNotHandled *>(it->get()); |
263 | if (!ins) { |
264 | ++it; |
265 | continue; |
266 | } |
267 | |
268 | auto next_it = next(it); |
269 | assert(next_it != prog.end()); /* there should always be an end ins */ |
270 | auto next_ins = next_it->get(); |
271 | |
272 | /* update all earlier instructions which point to ins to instead point |
273 | * to the next instruction. Only need to look at earlier as we only ever |
274 | * jump forward. */ |
275 | RoseProgram::update_targets(prog.begin(), it, ins, next_ins); |
276 | |
277 | /* remove check handled instruction */ |
278 | it = prog.erase(it, next_it); |
279 | } |
280 | } |
281 | |
282 | |
283 | /** Returns true if the program may read the interpreter's work_done flag */ |
284 | static |
285 | bool reads_work_done_flag(const RoseProgram &prog) { |
286 | for (const auto &ri : prog) { |
287 | if (dynamic_cast<const RoseInstrSquashGroups *>(ri.get())) { |
288 | return true; |
289 | } |
290 | } |
291 | return false; |
292 | } |
293 | |
294 | void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program) { |
295 | if (!eodNfaIterOffset) { |
296 | return; |
297 | } |
298 | |
299 | RoseProgram block; |
300 | block.add_before_end(ue2::make_unique<RoseInstrEnginesEod>(eodNfaIterOffset)); |
301 | program.add_block(move(block)); |
302 | } |
303 | |
304 | void addSuffixesEodProgram(RoseProgram &program) { |
305 | RoseProgram block; |
306 | block.add_before_end(ue2::make_unique<RoseInstrSuffixesEod>()); |
307 | program.add_block(move(block)); |
308 | } |
309 | |
310 | void addMatcherEodProgram(RoseProgram &program) { |
311 | RoseProgram block; |
312 | block.add_before_end(ue2::make_unique<RoseInstrMatcherEod>()); |
313 | program.add_block(move(block)); |
314 | } |
315 | |
316 | void addFlushCombinationProgram(RoseProgram &program) { |
317 | program.add_before_end(ue2::make_unique<RoseInstrFlushCombination>()); |
318 | } |
319 | |
320 | static |
321 | void makeRoleCheckLeftfix(const RoseBuildImpl &build, |
322 | const map<RoseVertex, left_build_info> &leftfix_info, |
323 | RoseVertex v, RoseProgram &program) { |
324 | auto it = leftfix_info.find(v); |
325 | if (it == end(leftfix_info)) { |
326 | return; |
327 | } |
328 | const left_build_info &lni = it->second; |
329 | if (lni.has_lookaround) { |
330 | return; // Leftfix completely implemented by lookaround. |
331 | } |
332 | |
333 | assert(!build.cc.streaming || |
334 | build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); |
335 | |
336 | bool is_prefix = build.isRootSuccessor(v); |
337 | const auto *end_inst = program.end_instruction(); |
338 | |
339 | unique_ptr<RoseInstruction> ri; |
340 | if (is_prefix) { |
341 | ri = ue2::make_unique<RoseInstrCheckPrefix>(lni.queue, build.g[v].left.lag, |
342 | build.g[v].left.leftfix_report, |
343 | end_inst); |
344 | } else { |
345 | ri = ue2::make_unique<RoseInstrCheckInfix>(lni.queue, build.g[v].left.lag, |
346 | build.g[v].left.leftfix_report, |
347 | end_inst); |
348 | } |
349 | program.add_before_end(move(ri)); |
350 | } |
351 | |
352 | static |
353 | void makeAnchoredLiteralDelay(const RoseBuildImpl &build, |
354 | const ProgramBuild &prog_build, u32 lit_id, |
355 | RoseProgram &program) { |
356 | // Only relevant for literals in the anchored table. |
357 | const rose_literal_id &lit = build.literals.at(lit_id); |
358 | if (lit.table != ROSE_ANCHORED) { |
359 | return; |
360 | } |
361 | |
362 | // If this literal match cannot occur after floatingMinLiteralMatchOffset, |
363 | // we do not need this check. |
364 | bool all_too_early = true; |
365 | rose_group groups = 0; |
366 | |
367 | const auto &lit_vertices = build.literal_info.at(lit_id).vertices; |
368 | for (RoseVertex v : lit_vertices) { |
369 | if (build.g[v].max_offset > prog_build.floatingMinLiteralMatchOffset) { |
370 | all_too_early = false; |
371 | } |
372 | groups |= build.g[v].groups; |
373 | } |
374 | |
375 | if (all_too_early) { |
376 | return; |
377 | } |
378 | |
379 | assert(contains(prog_build.anchored_programs, lit_id)); |
380 | u32 anch_id = prog_build.anchored_programs.at(lit_id); |
381 | |
382 | const auto *end_inst = program.end_instruction(); |
383 | auto ri = ue2::make_unique<RoseInstrAnchoredDelay>(groups, anch_id, end_inst); |
384 | program.add_before_end(move(ri)); |
385 | } |
386 | |
387 | static |
388 | void makeDedupe(const ReportManager &rm, const Report &report, |
389 | RoseProgram &program) { |
390 | const auto *end_inst = program.end_instruction(); |
391 | auto ri = |
392 | ue2::make_unique<RoseInstrDedupe>(report.quashSom, rm.getDkey(report), |
393 | report.offsetAdjust, end_inst); |
394 | program.add_before_end(move(ri)); |
395 | } |
396 | |
397 | static |
398 | void makeDedupeSom(const ReportManager &rm, const Report &report, |
399 | RoseProgram &program) { |
400 | const auto *end_inst = program.end_instruction(); |
401 | auto ri = ue2::make_unique<RoseInstrDedupeSom>(report.quashSom, |
402 | rm.getDkey(report), |
403 | report.offsetAdjust, end_inst); |
404 | program.add_before_end(move(ri)); |
405 | } |
406 | |
407 | static |
408 | void makeCatchup(const ReportManager &rm, bool needs_catchup, |
409 | const flat_set<ReportID> &reports, RoseProgram &program) { |
410 | if (!needs_catchup) { |
411 | return; |
412 | } |
413 | |
414 | // Everything except the INTERNAL_ROSE_CHAIN report needs catchup to run |
415 | // before reports are triggered. |
416 | |
417 | auto report_needs_catchup = [&](const ReportID &id) { |
418 | const Report &report = rm.getReport(id); |
419 | return report.type != INTERNAL_ROSE_CHAIN; |
420 | }; |
421 | |
422 | if (!any_of(begin(reports), end(reports), report_needs_catchup)) { |
423 | DEBUG_PRINTF("none of the given reports needs catchup\n" ); |
424 | return; |
425 | } |
426 | |
427 | program.add_before_end(ue2::make_unique<RoseInstrCatchUp>()); |
428 | } |
429 | |
430 | static |
431 | void writeSomOperation(const Report &report, som_operation *op) { |
432 | assert(op); |
433 | |
434 | memset(op, 0, sizeof(*op)); |
435 | |
436 | switch (report.type) { |
437 | case EXTERNAL_CALLBACK_SOM_REL: |
438 | op->type = SOM_EXTERNAL_CALLBACK_REL; |
439 | break; |
440 | case INTERNAL_SOM_LOC_SET: |
441 | op->type = SOM_INTERNAL_LOC_SET; |
442 | break; |
443 | case INTERNAL_SOM_LOC_SET_IF_UNSET: |
444 | op->type = SOM_INTERNAL_LOC_SET_IF_UNSET; |
445 | break; |
446 | case INTERNAL_SOM_LOC_SET_IF_WRITABLE: |
447 | op->type = SOM_INTERNAL_LOC_SET_IF_WRITABLE; |
448 | break; |
449 | case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: |
450 | op->type = SOM_INTERNAL_LOC_SET_REV_NFA; |
451 | break; |
452 | case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: |
453 | op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET; |
454 | break; |
455 | case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: |
456 | op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE; |
457 | break; |
458 | case INTERNAL_SOM_LOC_COPY: |
459 | op->type = SOM_INTERNAL_LOC_COPY; |
460 | break; |
461 | case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: |
462 | op->type = SOM_INTERNAL_LOC_COPY_IF_WRITABLE; |
463 | break; |
464 | case INTERNAL_SOM_LOC_MAKE_WRITABLE: |
465 | op->type = SOM_INTERNAL_LOC_MAKE_WRITABLE; |
466 | break; |
467 | case EXTERNAL_CALLBACK_SOM_STORED: |
468 | op->type = SOM_EXTERNAL_CALLBACK_STORED; |
469 | break; |
470 | case EXTERNAL_CALLBACK_SOM_ABS: |
471 | op->type = SOM_EXTERNAL_CALLBACK_ABS; |
472 | break; |
473 | case EXTERNAL_CALLBACK_SOM_REV_NFA: |
474 | op->type = SOM_EXTERNAL_CALLBACK_REV_NFA; |
475 | break; |
476 | case INTERNAL_SOM_LOC_SET_FROM: |
477 | op->type = SOM_INTERNAL_LOC_SET_FROM; |
478 | break; |
479 | case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: |
480 | op->type = SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE; |
481 | break; |
482 | default: |
483 | // This report doesn't correspond to a SOM operation. |
484 | assert(0); |
485 | throw CompileError("Unable to generate bytecode." ); |
486 | } |
487 | |
488 | op->onmatch = report.onmatch; |
489 | |
490 | switch (report.type) { |
491 | case EXTERNAL_CALLBACK_SOM_REV_NFA: |
492 | case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: |
493 | case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: |
494 | case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: |
495 | op->aux.revNfaIndex = report.revNfaIndex; |
496 | break; |
497 | default: |
498 | op->aux.somDistance = report.somDistance; |
499 | break; |
500 | } |
501 | } |
502 | |
503 | static |
504 | void addLogicalSetRequired(const Report &report, ReportManager &rm, |
505 | RoseProgram &program) { |
506 | if (report.lkey == INVALID_LKEY) { |
507 | return; |
508 | } |
509 | // set matching status of current lkey |
510 | auto risl = ue2::make_unique<RoseInstrSetLogical>(report.lkey, |
511 | report.offsetAdjust); |
512 | program.add_before_end(move(risl)); |
513 | // set current lkey's corresponding ckeys active, pending to check |
514 | for (auto ckey : rm.getRelateCKeys(report.lkey)) { |
515 | auto risc = ue2::make_unique<RoseInstrSetCombination>(ckey); |
516 | program.add_before_end(move(risc)); |
517 | } |
518 | } |
519 | |
520 | static |
521 | void makeReport(const RoseBuildImpl &build, const ReportID id, |
522 | const bool has_som, RoseProgram &program) { |
523 | assert(id < build.rm.numReports()); |
524 | const Report &report = build.rm.getReport(id); |
525 | |
526 | RoseProgram report_block; |
527 | const RoseInstruction *end_inst = report_block.end_instruction(); |
528 | |
529 | // Handle min/max offset checks. |
530 | if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) { |
531 | auto ri = ue2::make_unique<RoseInstrCheckBounds>(report.minOffset, |
532 | report.maxOffset, end_inst); |
533 | report_block.add_before_end(move(ri)); |
534 | } |
535 | |
536 | // If this report has an exhaustion key, we can check it in the program |
537 | // rather than waiting until we're in the callback adaptor. |
538 | if (report.ekey != INVALID_EKEY) { |
539 | auto ri = ue2::make_unique<RoseInstrCheckExhausted>(report.ekey, end_inst); |
540 | report_block.add_before_end(move(ri)); |
541 | } |
542 | |
543 | // External SOM reports that aren't passthrough need their SOM value |
544 | // calculated. |
545 | if (isExternalSomReport(report) && |
546 | report.type != EXTERNAL_CALLBACK_SOM_PASS) { |
547 | auto ri = ue2::make_unique<RoseInstrSomFromReport>(); |
548 | writeSomOperation(report, &ri->som); |
549 | report_block.add_before_end(move(ri)); |
550 | } |
551 | |
552 | // Min length constraint. |
553 | if (report.minLength > 0) { |
554 | assert(build.hasSom); |
555 | auto ri = ue2::make_unique<RoseInstrCheckMinLength>( |
556 | report.offsetAdjust, report.minLength, end_inst); |
557 | report_block.add_before_end(move(ri)); |
558 | } |
559 | |
560 | if (report.quashSom) { |
561 | report_block.add_before_end(ue2::make_unique<RoseInstrSomZero>()); |
562 | } |
563 | |
564 | switch (report.type) { |
565 | case EXTERNAL_CALLBACK: |
566 | if (build.rm.numCkeys()) { |
567 | addFlushCombinationProgram(report_block); |
568 | } |
569 | if (!has_som) { |
570 | // Dedupe is only necessary if this report has a dkey, or if there |
571 | // are SOM reports to catch up. |
572 | bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom; |
573 | if (report.ekey == INVALID_EKEY) { |
574 | if (needs_dedupe) { |
575 | if (!report.quiet) { |
576 | report_block.add_before_end( |
577 | ue2::make_unique<RoseInstrDedupeAndReport>( |
578 | report.quashSom, build.rm.getDkey(report), |
579 | report.onmatch, report.offsetAdjust, end_inst)); |
580 | } else { |
581 | makeDedupe(build.rm, report, report_block); |
582 | } |
583 | } else { |
584 | if (!report.quiet) { |
585 | report_block.add_before_end( |
586 | ue2::make_unique<RoseInstrReport>( |
587 | report.onmatch, report.offsetAdjust)); |
588 | } |
589 | } |
590 | } else { |
591 | if (needs_dedupe) { |
592 | makeDedupe(build.rm, report, report_block); |
593 | } |
594 | if (!report.quiet) { |
595 | report_block.add_before_end( |
596 | ue2::make_unique<RoseInstrReportExhaust>( |
597 | report.onmatch, report.offsetAdjust, report.ekey)); |
598 | } else { |
599 | report_block.add_before_end( |
600 | ue2::make_unique<RoseInstrSetExhaust>(report.ekey)); |
601 | } |
602 | } |
603 | } else { // has_som |
604 | makeDedupeSom(build.rm, report, report_block); |
605 | if (report.ekey == INVALID_EKEY) { |
606 | if (!report.quiet) { |
607 | report_block.add_before_end(ue2::make_unique<RoseInstrReportSom>( |
608 | report.onmatch, report.offsetAdjust)); |
609 | } |
610 | } else { |
611 | if (!report.quiet) { |
612 | report_block.add_before_end( |
613 | ue2::make_unique<RoseInstrReportSomExhaust>( |
614 | report.onmatch, report.offsetAdjust, report.ekey)); |
615 | } else { |
616 | report_block.add_before_end( |
617 | ue2::make_unique<RoseInstrSetExhaust>(report.ekey)); |
618 | } |
619 | } |
620 | } |
621 | addLogicalSetRequired(report, build.rm, report_block); |
622 | break; |
623 | case INTERNAL_SOM_LOC_SET: |
624 | case INTERNAL_SOM_LOC_SET_IF_UNSET: |
625 | case INTERNAL_SOM_LOC_SET_IF_WRITABLE: |
626 | case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: |
627 | case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: |
628 | case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: |
629 | case INTERNAL_SOM_LOC_COPY: |
630 | case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: |
631 | case INTERNAL_SOM_LOC_MAKE_WRITABLE: |
632 | case INTERNAL_SOM_LOC_SET_FROM: |
633 | case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: |
634 | if (build.rm.numCkeys()) { |
635 | addFlushCombinationProgram(report_block); |
636 | } |
637 | if (has_som) { |
638 | auto ri = ue2::make_unique<RoseInstrReportSomAware>(); |
639 | writeSomOperation(report, &ri->som); |
640 | report_block.add_before_end(move(ri)); |
641 | } else { |
642 | auto ri = ue2::make_unique<RoseInstrReportSomInt>(); |
643 | writeSomOperation(report, &ri->som); |
644 | report_block.add_before_end(move(ri)); |
645 | } |
646 | break; |
647 | case INTERNAL_ROSE_CHAIN: { |
648 | report_block.add_before_end(ue2::make_unique<RoseInstrReportChain>( |
649 | report.onmatch, report.topSquashDistance)); |
650 | break; |
651 | } |
652 | case EXTERNAL_CALLBACK_SOM_REL: |
653 | case EXTERNAL_CALLBACK_SOM_STORED: |
654 | case EXTERNAL_CALLBACK_SOM_ABS: |
655 | case EXTERNAL_CALLBACK_SOM_REV_NFA: |
656 | if (build.rm.numCkeys()) { |
657 | addFlushCombinationProgram(report_block); |
658 | } |
659 | makeDedupeSom(build.rm, report, report_block); |
660 | if (report.ekey == INVALID_EKEY) { |
661 | if (!report.quiet) { |
662 | report_block.add_before_end(ue2::make_unique<RoseInstrReportSom>( |
663 | report.onmatch, report.offsetAdjust)); |
664 | } |
665 | } else { |
666 | if (!report.quiet) { |
667 | report_block.add_before_end( |
668 | ue2::make_unique<RoseInstrReportSomExhaust>( |
669 | report.onmatch, report.offsetAdjust, report.ekey)); |
670 | } else { |
671 | report_block.add_before_end( |
672 | ue2::make_unique<RoseInstrSetExhaust>(report.ekey)); |
673 | } |
674 | } |
675 | addLogicalSetRequired(report, build.rm, report_block); |
676 | break; |
677 | case EXTERNAL_CALLBACK_SOM_PASS: |
678 | if (build.rm.numCkeys()) { |
679 | addFlushCombinationProgram(report_block); |
680 | } |
681 | makeDedupeSom(build.rm, report, report_block); |
682 | if (report.ekey == INVALID_EKEY) { |
683 | if (!report.quiet) { |
684 | report_block.add_before_end(ue2::make_unique<RoseInstrReportSom>( |
685 | report.onmatch, report.offsetAdjust)); |
686 | } |
687 | } else { |
688 | if (!report.quiet) { |
689 | report_block.add_before_end( |
690 | ue2::make_unique<RoseInstrReportSomExhaust>( |
691 | report.onmatch, report.offsetAdjust, report.ekey)); |
692 | } else { |
693 | report_block.add_before_end( |
694 | ue2::make_unique<RoseInstrSetExhaust>(report.ekey)); |
695 | } |
696 | } |
697 | addLogicalSetRequired(report, build.rm, report_block); |
698 | break; |
699 | |
700 | default: |
701 | assert(0); |
702 | throw CompileError("Unable to generate bytecode." ); |
703 | } |
704 | |
705 | program.add_block(move(report_block)); |
706 | } |
707 | |
708 | static |
709 | void makeRoleReports(const RoseBuildImpl &build, |
710 | const std::map<RoseVertex, left_build_info> &leftfix_info, |
711 | bool needs_catchup, RoseVertex v, RoseProgram &program) { |
712 | const auto &g = build.g; |
713 | |
714 | bool report_som = false; |
715 | if (g[v].left.tracksSom()) { |
716 | /* we are a suffaig - need to update role to provide som to the |
717 | * suffix. */ |
718 | assert(contains(leftfix_info, v)); |
719 | const left_build_info &lni = leftfix_info.at(v); |
720 | program.add_before_end( |
721 | ue2::make_unique<RoseInstrSomLeftfix>(lni.queue, g[v].left.lag)); |
722 | report_som = true; |
723 | } else if (g[v].som_adjust) { |
724 | program.add_before_end( |
725 | ue2::make_unique<RoseInstrSomAdjust>(g[v].som_adjust)); |
726 | report_som = true; |
727 | } |
728 | |
729 | makeCatchup(build.rm, needs_catchup, g[v].reports, program); |
730 | |
731 | RoseProgram report_block; |
732 | for (ReportID id : g[v].reports) { |
733 | makeReport(build, id, report_som, report_block); |
734 | } |
735 | program.add_before_end(move(report_block)); |
736 | } |
737 | |
738 | static |
739 | void makeRoleSetState(const unordered_map<RoseVertex, u32> &roleStateIndices, |
740 | RoseVertex v, RoseProgram &program) { |
741 | // We only need this instruction if a state index has been assigned to this |
742 | // vertex. |
743 | auto it = roleStateIndices.find(v); |
744 | if (it == end(roleStateIndices)) { |
745 | return; |
746 | } |
747 | program.add_before_end(ue2::make_unique<RoseInstrSetState>(it->second)); |
748 | } |
749 | |
750 | static |
751 | void makePushDelayedInstructions(const RoseLiteralMap &literals, |
752 | ProgramBuild &prog_build, |
753 | const flat_set<u32> &delayed_ids, |
754 | RoseProgram &program) { |
755 | vector<RoseInstrPushDelayed> delay_instructions; |
756 | |
757 | for (const auto &delayed_lit_id : delayed_ids) { |
758 | DEBUG_PRINTF("delayed lit id %u\n" , delayed_lit_id); |
759 | assert(contains(prog_build.delay_programs, delayed_lit_id)); |
760 | u32 delay_id = prog_build.delay_programs.at(delayed_lit_id); |
761 | const auto &delay_lit = literals.at(delayed_lit_id); |
762 | delay_instructions.emplace_back(verify_u8(delay_lit.delay), delay_id); |
763 | } |
764 | |
765 | sort_and_unique(delay_instructions, [](const RoseInstrPushDelayed &a, |
766 | const RoseInstrPushDelayed &b) { |
767 | return tie(a.delay, a.index) < tie(b.delay, b.index); |
768 | }); |
769 | |
770 | for (const auto &ri : delay_instructions) { |
771 | program.add_before_end(ue2::make_unique<RoseInstrPushDelayed>(ri)); |
772 | } |
773 | } |
774 | |
775 | static |
776 | void makeCheckLiteralInstruction(const rose_literal_id &lit, |
777 | size_t longLitLengthThreshold, |
778 | RoseProgram &program, |
779 | const CompileContext &cc) { |
780 | assert(longLitLengthThreshold > 0); |
781 | |
782 | DEBUG_PRINTF("lit=%s, long lit threshold %zu\n" , dumpString(lit.s).c_str(), |
783 | longLitLengthThreshold); |
784 | |
785 | if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) { |
786 | DEBUG_PRINTF("lit short enough to not need confirm\n" ); |
787 | return; |
788 | } |
789 | |
790 | // Check resource limits as well. |
791 | if (lit.s.length() > cc.grey.limitLiteralLength) { |
792 | throw ResourceLimitError(); |
793 | } |
794 | |
795 | if (lit.s.length() <= longLitLengthThreshold) { |
796 | DEBUG_PRINTF("is a medium-length literal\n" ); |
797 | const auto *end_inst = program.end_instruction(); |
798 | unique_ptr<RoseInstruction> ri; |
799 | if (lit.s.any_nocase()) { |
800 | ri = ue2::make_unique<RoseInstrCheckMedLitNocase>(lit.s.get_string(), |
801 | end_inst); |
802 | } else { |
803 | ri = ue2::make_unique<RoseInstrCheckMedLit>(lit.s.get_string(), |
804 | end_inst); |
805 | } |
806 | program.add_before_end(move(ri)); |
807 | return; |
808 | } |
809 | |
810 | // Long literal support should only really be used for the floating table |
811 | // in streaming mode. |
812 | assert(lit.table == ROSE_FLOATING && cc.streaming); |
813 | |
814 | DEBUG_PRINTF("is a long literal\n" ); |
815 | |
816 | const auto *end_inst = program.end_instruction(); |
817 | unique_ptr<RoseInstruction> ri; |
818 | if (lit.s.any_nocase()) { |
819 | ri = ue2::make_unique<RoseInstrCheckLongLitNocase>(lit.s.get_string(), |
820 | end_inst); |
821 | } else { |
822 | ri = ue2::make_unique<RoseInstrCheckLongLit>(lit.s.get_string(), end_inst); |
823 | } |
824 | program.add_before_end(move(ri)); |
825 | } |
826 | |
827 | static |
828 | void makeRoleCheckNotHandled(ProgramBuild &prog_build, RoseVertex v, |
829 | RoseProgram &program) { |
830 | u32 handled_key; |
831 | if (contains(prog_build.handledKeys, v)) { |
832 | handled_key = prog_build.handledKeys.at(v); |
833 | } else { |
834 | handled_key = verify_u32(prog_build.handledKeys.size()); |
835 | prog_build.handledKeys.emplace(v, handled_key); |
836 | } |
837 | |
838 | const auto *end_inst = program.end_instruction(); |
839 | auto ri = ue2::make_unique<RoseInstrCheckNotHandled>(handled_key, end_inst); |
840 | program.add_before_end(move(ri)); |
841 | } |
842 | |
843 | static |
844 | void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, |
845 | const RoseEdge &e, RoseProgram &program) { |
846 | const RoseGraph &g = build.g; |
847 | const RoseVertex u = source(e, g); |
848 | |
849 | // We know that we can trust the anchored table (DFA) to always deliver us |
850 | // literals at the correct offset. |
851 | if (build.isAnchored(v)) { |
852 | DEBUG_PRINTF("literal in anchored table, skipping bounds check\n" ); |
853 | return; |
854 | } |
855 | |
856 | // Use the minimum literal length. |
857 | u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v)); |
858 | |
859 | u64a min_bound = g[e].minBound + lit_length; |
860 | u64a max_bound = g[e].maxBound == ROSE_BOUND_INF |
861 | ? ROSE_BOUND_INF |
862 | : g[e].maxBound + lit_length; |
863 | |
864 | if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { |
865 | assert(g[u].fixedOffset()); |
866 | // Make offsets absolute. |
867 | min_bound += g[u].max_offset; |
868 | if (max_bound != ROSE_BOUND_INF) { |
869 | max_bound += g[u].max_offset; |
870 | } |
871 | } |
872 | |
873 | assert(max_bound <= ROSE_BOUND_INF); |
874 | assert(min_bound <= max_bound); |
875 | |
876 | // CHECK_BOUNDS instruction uses 64-bit bounds, so we can use MAX_OFFSET |
877 | // (max value of a u64a) to represent ROSE_BOUND_INF. |
878 | if (max_bound == ROSE_BOUND_INF) { |
879 | max_bound = MAX_OFFSET; |
880 | } |
881 | |
882 | // This instruction should be doing _something_ -- bounds should be tighter |
883 | // than just {length, inf}. |
884 | assert(min_bound > lit_length || max_bound < MAX_OFFSET); |
885 | |
886 | const auto *end_inst = program.end_instruction(); |
887 | program.add_before_end( |
888 | ue2::make_unique<RoseInstrCheckBounds>(min_bound, max_bound, end_inst)); |
889 | } |
890 | |
891 | static |
892 | void makeRoleGroups(const RoseGraph &g, ProgramBuild &prog_build, |
893 | RoseVertex v, RoseProgram &program) { |
894 | rose_group groups = g[v].groups; |
895 | if (!groups) { |
896 | return; |
897 | } |
898 | |
899 | // The set of "already on" groups as we process this vertex is the |
900 | // intersection of the groups set by our predecessors. |
901 | assert(in_degree(v, g) > 0); |
902 | rose_group already_on = ~rose_group{0}; |
903 | for (const auto &u : inv_adjacent_vertices_range(v, g)) { |
904 | already_on &= prog_build.vertex_group_map.at(u); |
905 | } |
906 | |
907 | DEBUG_PRINTF("already_on=0x%llx\n" , already_on); |
908 | DEBUG_PRINTF("squashable=0x%llx\n" , prog_build.squashable_groups); |
909 | DEBUG_PRINTF("groups=0x%llx\n" , groups); |
910 | |
911 | already_on &= ~prog_build.squashable_groups; |
912 | DEBUG_PRINTF("squashed already_on=0x%llx\n" , already_on); |
913 | |
914 | // We don't *have* to mask off the groups that we know are already on, but |
915 | // this will make bugs more apparent. |
916 | groups &= ~already_on; |
917 | |
918 | if (!groups) { |
919 | DEBUG_PRINTF("no new groups to set, skipping\n" ); |
920 | return; |
921 | } |
922 | |
923 | program.add_before_end(ue2::make_unique<RoseInstrSetGroups>(groups)); |
924 | } |
925 | |
926 | static |
927 | bool checkReachMask(const CharReach &cr, u8 &andmask, u8 &cmpmask) { |
928 | size_t reach_size = cr.count(); |
929 | assert(reach_size > 0); |
930 | // check whether entry_size is some power of 2. |
931 | if ((reach_size - 1) & reach_size) { |
932 | return false; |
933 | } |
934 | make_and_cmp_mask(cr, &andmask, &cmpmask); |
935 | if ((1 << popcount32((u8)(~andmask))) ^ reach_size) { |
936 | return false; |
937 | } |
938 | return true; |
939 | } |
940 | |
941 | static |
942 | bool checkReachWithFlip(const CharReach &cr, u8 &andmask, |
943 | u8 &cmpmask, u8 &flip) { |
944 | if (checkReachMask(cr, andmask, cmpmask)) { |
945 | flip = 0; |
946 | return true; |
947 | } |
948 | if (checkReachMask(~cr, andmask, cmpmask)) { |
949 | flip = 1; |
950 | return true; |
951 | } |
952 | return false; |
953 | } |
954 | |
955 | static |
956 | bool makeRoleByte(const vector<LookEntry> &look, RoseProgram &program) { |
957 | if (look.size() == 1) { |
958 | const auto &entry = look[0]; |
959 | u8 andmask_u8, cmpmask_u8; |
960 | u8 flip; |
961 | if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) { |
962 | return false; |
963 | } |
964 | s32 checkbyte_offset = verify_s32(entry.offset); |
965 | DEBUG_PRINTF("CHECK BYTE offset=%d\n" , checkbyte_offset); |
966 | const auto *end_inst = program.end_instruction(); |
967 | auto ri = ue2::make_unique<RoseInstrCheckByte>(andmask_u8, cmpmask_u8, flip, |
968 | checkbyte_offset, end_inst); |
969 | program.add_before_end(move(ri)); |
970 | return true; |
971 | } |
972 | return false; |
973 | } |
974 | |
975 | static |
976 | bool makeRoleMask(const vector<LookEntry> &look, RoseProgram &program) { |
977 | if (look.back().offset < look.front().offset + 8) { |
978 | s32 base_offset = verify_s32(look.front().offset); |
979 | u64a and_mask = 0; |
980 | u64a cmp_mask = 0; |
981 | u64a neg_mask = 0; |
982 | for (const auto &entry : look) { |
983 | u8 andmask_u8, cmpmask_u8, flip; |
984 | if (!checkReachWithFlip(entry.reach, andmask_u8, |
985 | cmpmask_u8, flip)) { |
986 | return false; |
987 | } |
988 | DEBUG_PRINTF("entry offset %d\n" , entry.offset); |
989 | u32 shift = (entry.offset - base_offset) << 3; |
990 | and_mask |= (u64a)andmask_u8 << shift; |
991 | cmp_mask |= (u64a)cmpmask_u8 << shift; |
992 | if (flip) { |
993 | neg_mask |= 0xffLLU << shift; |
994 | } |
995 | } |
996 | DEBUG_PRINTF("CHECK MASK and_mask=%llx cmp_mask=%llx\n" , |
997 | and_mask, cmp_mask); |
998 | const auto *end_inst = program.end_instruction(); |
999 | auto ri = ue2::make_unique<RoseInstrCheckMask>(and_mask, cmp_mask, neg_mask, |
1000 | base_offset, end_inst); |
1001 | program.add_before_end(move(ri)); |
1002 | return true; |
1003 | } |
1004 | return false; |
1005 | } |
1006 | |
1007 | static UNUSED |
1008 | string convertMaskstoString(u8 *p, int byte_len) { |
1009 | string s; |
1010 | for (int i = 0; i < byte_len; i++) { |
1011 | u8 hi = *p >> 4; |
1012 | u8 lo = *p & 0xf; |
1013 | s += (char)(hi + (hi < 10 ? 48 : 87)); |
1014 | s += (char)(lo + (lo < 10 ? 48 : 87)); |
1015 | p++; |
1016 | } |
1017 | return s; |
1018 | } |
1019 | |
1020 | static |
1021 | bool makeRoleMask32(const vector<LookEntry> &look, |
1022 | RoseProgram &program) { |
1023 | if (look.back().offset >= look.front().offset + 32) { |
1024 | return false; |
1025 | } |
1026 | s32 base_offset = verify_s32(look.front().offset); |
1027 | array<u8, 32> and_mask, cmp_mask; |
1028 | and_mask.fill(0); |
1029 | cmp_mask.fill(0); |
1030 | u32 neg_mask = 0; |
1031 | for (const auto &entry : look) { |
1032 | u8 andmask_u8, cmpmask_u8, flip; |
1033 | if (!checkReachWithFlip(entry.reach, andmask_u8, |
1034 | cmpmask_u8, flip)) { |
1035 | return false; |
1036 | } |
1037 | u32 shift = entry.offset - base_offset; |
1038 | assert(shift < 32); |
1039 | and_mask[shift] = andmask_u8; |
1040 | cmp_mask[shift] = cmpmask_u8; |
1041 | if (flip) { |
1042 | neg_mask |= 1 << shift; |
1043 | } |
1044 | } |
1045 | |
1046 | DEBUG_PRINTF("and_mask %s\n" , |
1047 | convertMaskstoString(and_mask.data(), 32).c_str()); |
1048 | DEBUG_PRINTF("cmp_mask %s\n" , |
1049 | convertMaskstoString(cmp_mask.data(), 32).c_str()); |
1050 | DEBUG_PRINTF("neg_mask %08x\n" , neg_mask); |
1051 | DEBUG_PRINTF("base_offset %d\n" , base_offset); |
1052 | |
1053 | const auto *end_inst = program.end_instruction(); |
1054 | auto ri = ue2::make_unique<RoseInstrCheckMask32>(and_mask, cmp_mask, neg_mask, |
1055 | base_offset, end_inst); |
1056 | program.add_before_end(move(ri)); |
1057 | return true; |
1058 | } |
1059 | |
1060 | // Sorting by the size of every bucket. |
1061 | // Used in map<u32, vector<s8>, cmpNibble>. |
1062 | struct cmpNibble { |
1063 | bool operator()(const u32 data1, const u32 data2) const{ |
1064 | u32 size1 = popcount32(data1 >> 16) * popcount32(data1 << 16); |
1065 | u32 size2 = popcount32(data2 >> 16) * popcount32(data2 << 16); |
1066 | return std::tie(size1, data1) < std::tie(size2, data2); |
1067 | } |
1068 | }; |
1069 | |
1070 | // Insert all pairs of bucket and offset into buckets. |
1071 | static really_inline |
1072 | void getAllBuckets(const vector<LookEntry> &look, |
1073 | map<u32, vector<s8>, cmpNibble> &buckets, u64a &neg_mask) { |
1074 | s32 base_offset = verify_s32(look.front().offset); |
1075 | for (const auto &entry : look) { |
1076 | CharReach cr = entry.reach; |
1077 | // Flip heavy character classes to save buckets. |
1078 | if (cr.count() > 128 ) { |
1079 | cr.flip(); |
1080 | } else { |
1081 | neg_mask ^= 1ULL << (entry.offset - base_offset); |
1082 | } |
1083 | map <u16, u16> lo2hi; |
1084 | // We treat Ascii Table as a 16x16 grid. |
1085 | // Push every row in cr into lo2hi and mark the row number. |
1086 | for (size_t i = cr.find_first(); i != CharReach::npos;) { |
1087 | u8 it_hi = i >> 4; |
1088 | u16 low_encode = 0; |
1089 | while (i != CharReach::npos && (i >> 4) == it_hi) { |
1090 | low_encode |= 1 << (i & 0xf); |
1091 | i = cr.find_next(i); |
1092 | } |
1093 | lo2hi[low_encode] |= 1 << it_hi; |
1094 | } |
1095 | for (const auto &it : lo2hi) { |
1096 | u32 hi_lo = (it.second << 16) | it.first; |
1097 | buckets[hi_lo].push_back(entry.offset); |
1098 | } |
1099 | } |
1100 | } |
1101 | |
1102 | // Once we have a new bucket, we'll try to combine it with all old buckets. |
1103 | static really_inline |
1104 | void nibUpdate(map<u32, u16> &nib, u32 hi_lo) { |
1105 | u16 hi = hi_lo >> 16; |
1106 | u16 lo = hi_lo & 0xffff; |
1107 | for (const auto pairs : nib) { |
1108 | u32 old = pairs.first; |
1109 | if ((old >> 16) == hi || (old & 0xffff) == lo) { |
1110 | if (!nib[old | hi_lo]) { |
1111 | nib[old | hi_lo] = nib[old] | nib[hi_lo]; |
1112 | } |
1113 | } |
1114 | } |
1115 | } |
1116 | |
1117 | static really_inline |
1118 | void nibMaskUpdate(array<u8, 32> &mask, u32 data, u8 bit_index) { |
1119 | for (u8 index = 0; data > 0; data >>= 1, index++) { |
1120 | if (data & 1) { |
1121 | // 0 ~ 7 bucket in first 16 bytes, |
1122 | // 8 ~ 15 bucket in second 16 bytes. |
1123 | if (bit_index >= 8) { |
1124 | mask[index + 16] |= 1 << (bit_index - 8); |
1125 | } else { |
1126 | mask[index] |= 1 << bit_index; |
1127 | } |
1128 | } |
1129 | } |
1130 | } |
1131 | |
1132 | static |
1133 | bool getShuftiMasks(const vector<LookEntry> &look, array<u8, 32> &hi_mask, |
1134 | array<u8, 32> &lo_mask, u8 *bucket_select_hi, |
1135 | u8 *bucket_select_lo, u64a &neg_mask, |
1136 | u8 &bit_idx, size_t len) { |
1137 | map<u32, u16> nib; // map every bucket to its bucket number. |
1138 | map<u32, vector<s8>, cmpNibble> bucket2offsets; |
1139 | s32 base_offset = look.front().offset; |
1140 | |
1141 | bit_idx = 0; |
1142 | neg_mask = ~0ULL; |
1143 | |
1144 | getAllBuckets(look, bucket2offsets, neg_mask); |
1145 | |
1146 | for (const auto &it : bucket2offsets) { |
1147 | u32 hi_lo = it.first; |
1148 | // New bucket. |
1149 | if (!nib[hi_lo]) { |
1150 | if ((bit_idx >= 8 && len == 64) || bit_idx >= 16) { |
1151 | return false; |
1152 | } |
1153 | nib[hi_lo] = 1 << bit_idx; |
1154 | |
1155 | nibUpdate(nib, hi_lo); |
1156 | nibMaskUpdate(hi_mask, hi_lo >> 16, bit_idx); |
1157 | nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_idx); |
1158 | bit_idx++; |
1159 | } |
1160 | |
1161 | DEBUG_PRINTF("hi_lo %x bucket %x\n" , hi_lo, nib[hi_lo]); |
1162 | |
1163 | // Update bucket_select_mask. |
1164 | u8 nib_hi = nib[hi_lo] >> 8; |
1165 | u8 nib_lo = nib[hi_lo] & 0xff; |
1166 | for (const auto offset : it.second) { |
1167 | bucket_select_hi[offset - base_offset] |= nib_hi; |
1168 | bucket_select_lo[offset - base_offset] |= nib_lo; |
1169 | } |
1170 | } |
1171 | return true; |
1172 | } |
1173 | |
1174 | static |
1175 | unique_ptr<RoseInstruction> |
1176 | makeCheckShufti16x8(u32 offset_range, u8 bucket_idx, |
1177 | const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask, |
1178 | const array<u8, 32> &bucket_select_mask, |
1179 | u32 neg_mask, s32 base_offset, |
1180 | const RoseInstruction *end_inst) { |
1181 | if (offset_range > 16 || bucket_idx > 8) { |
1182 | return nullptr; |
1183 | } |
1184 | array<u8, 32> nib_mask; |
1185 | array<u8, 16> bucket_select_mask_16; |
1186 | copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin()); |
1187 | copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16); |
1188 | copy(bucket_select_mask.begin(), bucket_select_mask.begin() + 16, |
1189 | bucket_select_mask_16.begin()); |
1190 | return ue2::make_unique<RoseInstrCheckShufti16x8> |
1191 | (nib_mask, bucket_select_mask_16, |
1192 | neg_mask & 0xffff, base_offset, end_inst); |
1193 | } |
1194 | |
1195 | static |
1196 | unique_ptr<RoseInstruction> |
1197 | makeCheckShufti32x8(u32 offset_range, u8 bucket_idx, |
1198 | const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask, |
1199 | const array<u8, 32> &bucket_select_mask, |
1200 | u32 neg_mask, s32 base_offset, |
1201 | const RoseInstruction *end_inst) { |
1202 | if (offset_range > 32 || bucket_idx > 8) { |
1203 | return nullptr; |
1204 | } |
1205 | |
1206 | array<u8, 16> hi_mask_16; |
1207 | array<u8, 16> lo_mask_16; |
1208 | copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin()); |
1209 | copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin()); |
1210 | return ue2::make_unique<RoseInstrCheckShufti32x8> |
1211 | (hi_mask_16, lo_mask_16, bucket_select_mask, |
1212 | neg_mask, base_offset, end_inst); |
1213 | } |
1214 | |
1215 | static |
1216 | unique_ptr<RoseInstruction> |
1217 | makeCheckShufti16x16(u32 offset_range, u8 bucket_idx, |
1218 | const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask, |
1219 | const array<u8, 32> &bucket_select_mask_lo, |
1220 | const array<u8, 32> &bucket_select_mask_hi, |
1221 | u32 neg_mask, s32 base_offset, |
1222 | const RoseInstruction *end_inst) { |
1223 | if (offset_range > 16 || bucket_idx > 16) { |
1224 | return nullptr; |
1225 | } |
1226 | |
1227 | array<u8, 32> bucket_select_mask_32; |
1228 | copy(bucket_select_mask_lo.begin(), bucket_select_mask_lo.begin() + 16, |
1229 | bucket_select_mask_32.begin()); |
1230 | copy(bucket_select_mask_hi.begin(), bucket_select_mask_hi.begin() + 16, |
1231 | bucket_select_mask_32.begin() + 16); |
1232 | return ue2::make_unique<RoseInstrCheckShufti16x16> |
1233 | (hi_mask, lo_mask, bucket_select_mask_32, |
1234 | neg_mask & 0xffff, base_offset, end_inst); |
1235 | } |
1236 | static |
1237 | unique_ptr<RoseInstruction> |
1238 | makeCheckShufti32x16(u32 offset_range, u8 bucket_idx, |
1239 | const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask, |
1240 | const array<u8, 32> &bucket_select_mask_lo, |
1241 | const array<u8, 32> &bucket_select_mask_hi, |
1242 | u32 neg_mask, s32 base_offset, |
1243 | const RoseInstruction *end_inst) { |
1244 | if (offset_range > 32 || bucket_idx > 16) { |
1245 | return nullptr; |
1246 | } |
1247 | |
1248 | return ue2::make_unique<RoseInstrCheckShufti32x16> |
1249 | (hi_mask, lo_mask, bucket_select_mask_hi, |
1250 | bucket_select_mask_lo, neg_mask, base_offset, end_inst); |
1251 | } |
1252 | |
1253 | static |
1254 | bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program) { |
1255 | |
1256 | s32 base_offset = verify_s32(look.front().offset); |
1257 | if (look.back().offset >= base_offset + 32) { |
1258 | return false; |
1259 | } |
1260 | |
1261 | u8 bucket_idx = 0; // number of buckets |
1262 | u64a neg_mask_64; |
1263 | array<u8, 32> hi_mask; |
1264 | array<u8, 32> lo_mask; |
1265 | array<u8, 32> bucket_select_hi; |
1266 | array<u8, 32> bucket_select_lo; |
1267 | hi_mask.fill(0); |
1268 | lo_mask.fill(0); |
1269 | bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8. |
1270 | bucket_select_lo.fill(0); |
1271 | |
1272 | if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(), |
1273 | bucket_select_lo.data(), neg_mask_64, bucket_idx, 32)) { |
1274 | return false; |
1275 | } |
1276 | u32 neg_mask = (u32)neg_mask_64; |
1277 | |
1278 | DEBUG_PRINTF("hi_mask %s\n" , |
1279 | convertMaskstoString(hi_mask.data(), 32).c_str()); |
1280 | DEBUG_PRINTF("lo_mask %s\n" , |
1281 | convertMaskstoString(lo_mask.data(), 32).c_str()); |
1282 | DEBUG_PRINTF("bucket_select_hi %s\n" , |
1283 | convertMaskstoString(bucket_select_hi.data(), 32).c_str()); |
1284 | DEBUG_PRINTF("bucket_select_lo %s\n" , |
1285 | convertMaskstoString(bucket_select_lo.data(), 32).c_str()); |
1286 | |
1287 | const auto *end_inst = program.end_instruction(); |
1288 | s32 offset_range = look.back().offset - base_offset + 1; |
1289 | |
1290 | auto ri = makeCheckShufti16x8(offset_range, bucket_idx, hi_mask, lo_mask, |
1291 | bucket_select_lo, neg_mask, base_offset, |
1292 | end_inst); |
1293 | if (!ri) { |
1294 | ri = makeCheckShufti32x8(offset_range, bucket_idx, hi_mask, lo_mask, |
1295 | bucket_select_lo, neg_mask, base_offset, |
1296 | end_inst); |
1297 | } |
1298 | if (!ri) { |
1299 | ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask, |
1300 | bucket_select_lo, bucket_select_hi, |
1301 | neg_mask, base_offset, end_inst); |
1302 | } |
1303 | if (!ri) { |
1304 | ri = makeCheckShufti32x16(offset_range, bucket_idx, hi_mask, lo_mask, |
1305 | bucket_select_lo, bucket_select_hi, |
1306 | neg_mask, base_offset, end_inst); |
1307 | } |
1308 | assert(ri); |
1309 | program.add_before_end(move(ri)); |
1310 | |
1311 | return true; |
1312 | } |
1313 | |
1314 | /** |
1315 | * Builds a lookaround instruction, or an appropriate specialization if one is |
1316 | * available. |
1317 | */ |
1318 | static |
1319 | void makeLookaroundInstruction(const vector<LookEntry> &look, |
1320 | RoseProgram &program) { |
1321 | assert(!look.empty()); |
1322 | |
1323 | if (makeRoleByte(look, program)) { |
1324 | return; |
1325 | } |
1326 | |
1327 | if (look.size() == 1) { |
1328 | s8 offset = look.begin()->offset; |
1329 | const CharReach &reach = look.begin()->reach; |
1330 | auto ri = ue2::make_unique<RoseInstrCheckSingleLookaround>(offset, reach, |
1331 | program.end_instruction()); |
1332 | program.add_before_end(move(ri)); |
1333 | return; |
1334 | } |
1335 | |
1336 | if (makeRoleMask(look, program)) { |
1337 | return; |
1338 | } |
1339 | |
1340 | if (makeRoleMask32(look, program)) { |
1341 | return; |
1342 | } |
1343 | |
1344 | if (makeRoleShufti(look, program)) { |
1345 | return; |
1346 | } |
1347 | |
1348 | auto ri = ue2::make_unique<RoseInstrCheckLookaround>(look, |
1349 | program.end_instruction()); |
1350 | program.add_before_end(move(ri)); |
1351 | } |
1352 | |
1353 | static |
1354 | void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 lit_id, |
1355 | RoseProgram &program) { |
1356 | const auto &info = build.literal_info.at(lit_id); |
1357 | if (!info.requires_benefits) { |
1358 | return; |
1359 | } |
1360 | |
1361 | vector<LookEntry> look; |
1362 | |
1363 | const auto &lit = build.literals.at(lit_id); |
1364 | const ue2_literal &s = lit.s; |
1365 | const auto &msk = lit.msk; |
1366 | |
1367 | DEBUG_PRINTF("building mask for lit %u: %s\n" , lit_id, |
1368 | dumpString(s).c_str()); |
1369 | |
1370 | assert(s.length() <= MAX_MASK2_WIDTH); |
1371 | |
1372 | // Note: the literal matcher will confirm the HWLM mask in lit.msk, so we |
1373 | // do not include those entries in the lookaround. |
1374 | auto it = s.begin(); |
1375 | for (s32 i = 0 - s.length(), i_end = 0 - msk.size(); i < i_end; ++i, ++it) { |
1376 | if (!it->nocase) { |
1377 | look.emplace_back(verify_s8(i), *it); |
1378 | } |
1379 | } |
1380 | |
1381 | if (look.empty()) { |
1382 | return; // all caseful chars handled by HWLM mask. |
1383 | } |
1384 | |
1385 | makeLookaroundInstruction(look, program); |
1386 | } |
1387 | |
1388 | static |
1389 | void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, u32 lit_id, |
1390 | const vector<RoseEdge> &lit_edges, |
1391 | u32 floatingMinLiteralMatchOffset, |
1392 | RoseProgram &prog) { |
1393 | if (lit_edges.empty()) { |
1394 | return; |
1395 | } |
1396 | |
1397 | if (floatingMinLiteralMatchOffset == 0) { |
1398 | return; |
1399 | } |
1400 | |
1401 | RoseVertex v = target(lit_edges.front(), build.g); |
1402 | if (!build.isFloating(v)) { |
1403 | return; |
1404 | } |
1405 | |
1406 | const auto &lit = build.literals.at(lit_id); |
1407 | size_t min_len = lit.elength(); |
1408 | u32 min_offset = findMinOffset(build, lit_id); |
1409 | DEBUG_PRINTF("has min_len=%zu, min_offset=%u, global min is %u\n" , min_len, |
1410 | min_offset, floatingMinLiteralMatchOffset); |
1411 | |
1412 | // If we can't match before the min offset, we don't need the check. |
1413 | if (min_len >= floatingMinLiteralMatchOffset) { |
1414 | DEBUG_PRINTF("no need for check, min is %u\n" , |
1415 | floatingMinLiteralMatchOffset); |
1416 | return; |
1417 | } |
1418 | |
1419 | assert(min_offset >= floatingMinLiteralMatchOffset); |
1420 | assert(min_offset < UINT32_MAX); |
1421 | |
1422 | DEBUG_PRINTF("adding lit early check, min_offset=%u\n" , min_offset); |
1423 | const auto *end = prog.end_instruction(); |
1424 | prog.add_before_end(ue2::make_unique<RoseInstrCheckLitEarly>(min_offset, end)); |
1425 | } |
1426 | |
1427 | static |
1428 | void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 lit_id, |
1429 | RoseProgram &prog) { |
1430 | const auto &info = build.literal_info.at(lit_id); |
1431 | |
1432 | if (!info.group_mask) { |
1433 | return; |
1434 | } |
1435 | prog.add_before_end(ue2::make_unique<RoseInstrCheckGroups>(info.group_mask)); |
1436 | } |
1437 | |
1438 | static |
1439 | bool hasDelayedLiteral(const RoseBuildImpl &build, |
1440 | const vector<RoseEdge> &lit_edges) { |
1441 | auto is_delayed = [&build](u32 lit_id) { return build.isDelayed(lit_id); }; |
1442 | for (const auto &e : lit_edges) { |
1443 | auto v = target(e, build.g); |
1444 | const auto &lits = build.g[v].literals; |
1445 | if (any_of(begin(lits), end(lits), is_delayed)) { |
1446 | return true; |
1447 | } |
1448 | } |
1449 | return false; |
1450 | } |
1451 | |
1452 | static |
1453 | RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, |
1454 | ProgramBuild &prog_build, u32 lit_id, |
1455 | const vector<RoseEdge> &lit_edges, |
1456 | bool is_anchored_replay_program) { |
1457 | RoseProgram program; |
1458 | |
1459 | // Check long literal info. |
1460 | if (!build.isDelayed(lit_id)) { |
1461 | makeCheckLiteralInstruction(build.literals.at(lit_id), |
1462 | prog_build.longLitLengthThreshold, |
1463 | program, build.cc); |
1464 | } |
1465 | |
1466 | // Check lit mask. |
1467 | makeCheckLitMaskInstruction(build, lit_id, program); |
1468 | |
1469 | // Check literal groups. This is an optimisation that we only perform for |
1470 | // delayed literals, as their groups may be switched off; ordinarily, we |
1471 | // can trust the HWLM matcher. |
1472 | if (hasDelayedLiteral(build, lit_edges)) { |
1473 | makeGroupCheckInstruction(build, lit_id, program); |
1474 | } |
1475 | |
1476 | // Add instructions for pushing delayed matches, if there are any. |
1477 | makePushDelayedInstructions(build.literals, prog_build, |
1478 | build.literal_info.at(lit_id).delayed_ids, |
1479 | program); |
1480 | |
1481 | // Add pre-check for early literals in the floating table. |
1482 | makeCheckLitEarlyInstruction(build, lit_id, lit_edges, |
1483 | prog_build.floatingMinLiteralMatchOffset, |
1484 | program); |
1485 | |
1486 | /* Check if we are able to deliever matches from the anchored table now */ |
1487 | if (!is_anchored_replay_program) { |
1488 | makeAnchoredLiteralDelay(build, prog_build, lit_id, program); |
1489 | } |
1490 | |
1491 | return program; |
1492 | } |
1493 | |
1494 | static |
1495 | bool makeRoleMultipathShufti(const vector<vector<LookEntry>> &multi_look, |
1496 | RoseProgram &program) { |
1497 | if (multi_look.empty()) { |
1498 | return false; |
1499 | } |
1500 | |
1501 | // find the base offset |
1502 | assert(!multi_look[0].empty()); |
1503 | s32 base_offset = multi_look[0].front().offset; |
1504 | s32 last_start = base_offset; |
1505 | s32 end_offset = multi_look[0].back().offset; |
1506 | size_t multi_len = 0; |
1507 | |
1508 | for (const auto &look : multi_look) { |
1509 | assert(look.size() > 0); |
1510 | multi_len += look.size(); |
1511 | |
1512 | LIMIT_TO_AT_MOST(&base_offset, look.front().offset); |
1513 | ENSURE_AT_LEAST(&last_start, look.front().offset); |
1514 | ENSURE_AT_LEAST(&end_offset, look.back().offset); |
1515 | } |
1516 | |
1517 | assert(last_start < 0); |
1518 | |
1519 | if (end_offset - base_offset >= MULTIPATH_MAX_LEN) { |
1520 | return false; |
1521 | } |
1522 | |
1523 | if (multi_len <= 16) { |
1524 | multi_len = 16; |
1525 | } else if (multi_len <= 32) { |
1526 | multi_len = 32; |
1527 | } else if (multi_len <= 64) { |
1528 | multi_len = 64; |
1529 | } else { |
1530 | DEBUG_PRINTF("too long for multi-path\n" ); |
1531 | return false; |
1532 | } |
1533 | |
1534 | vector<LookEntry> linear_look; |
1535 | array<u8, 64> data_select_mask; |
1536 | data_select_mask.fill(0); |
1537 | u64a hi_bits_mask = 0; |
1538 | u64a lo_bits_mask = 0; |
1539 | |
1540 | for (const auto &look : multi_look) { |
1541 | assert(linear_look.size() < 64); |
1542 | lo_bits_mask |= 1LLU << linear_look.size(); |
1543 | for (const auto &entry : look) { |
1544 | assert(entry.offset - base_offset < MULTIPATH_MAX_LEN); |
1545 | data_select_mask[linear_look.size()] = |
1546 | verify_u8(entry.offset - base_offset); |
1547 | linear_look.emplace_back(verify_s8(linear_look.size()), entry.reach); |
1548 | } |
1549 | hi_bits_mask |= 1LLU << (linear_look.size() - 1); |
1550 | } |
1551 | |
1552 | u8 bit_index = 0; // number of buckets |
1553 | u64a neg_mask; |
1554 | array<u8, 32> hi_mask; |
1555 | array<u8, 32> lo_mask; |
1556 | array<u8, 64> bucket_select_hi; |
1557 | array<u8, 64> bucket_select_lo; |
1558 | hi_mask.fill(0); |
1559 | lo_mask.fill(0); |
1560 | bucket_select_hi.fill(0); |
1561 | bucket_select_lo.fill(0); |
1562 | |
1563 | if (!getShuftiMasks(linear_look, hi_mask, lo_mask, bucket_select_hi.data(), |
1564 | bucket_select_lo.data(), neg_mask, bit_index, |
1565 | multi_len)) { |
1566 | return false; |
1567 | } |
1568 | |
1569 | DEBUG_PRINTF("hi_mask %s\n" , |
1570 | convertMaskstoString(hi_mask.data(), 16).c_str()); |
1571 | DEBUG_PRINTF("lo_mask %s\n" , |
1572 | convertMaskstoString(lo_mask.data(), 16).c_str()); |
1573 | DEBUG_PRINTF("bucket_select_hi %s\n" , |
1574 | convertMaskstoString(bucket_select_hi.data(), 64).c_str()); |
1575 | DEBUG_PRINTF("bucket_select_lo %s\n" , |
1576 | convertMaskstoString(bucket_select_lo.data(), 64).c_str()); |
1577 | DEBUG_PRINTF("data_select_mask %s\n" , |
1578 | convertMaskstoString(data_select_mask.data(), 64).c_str()); |
1579 | DEBUG_PRINTF("hi_bits_mask %llx\n" , hi_bits_mask); |
1580 | DEBUG_PRINTF("lo_bits_mask %llx\n" , lo_bits_mask); |
1581 | DEBUG_PRINTF("neg_mask %llx\n" , neg_mask); |
1582 | DEBUG_PRINTF("base_offset %d\n" , base_offset); |
1583 | DEBUG_PRINTF("last_start %d\n" , last_start); |
1584 | |
1585 | // Since we don't have 16x16 now, just call 32x16 instead. |
1586 | if (bit_index > 8) { |
1587 | assert(multi_len <= 32); |
1588 | multi_len = 32; |
1589 | } |
1590 | |
1591 | const auto *end_inst = program.end_instruction(); |
1592 | assert(multi_len == 16 || multi_len == 32 || multi_len == 64); |
1593 | if (multi_len == 16) { |
1594 | neg_mask &= 0xffff; |
1595 | assert(!(hi_bits_mask & ~0xffffULL)); |
1596 | assert(!(lo_bits_mask & ~0xffffULL)); |
1597 | assert(bit_index <=8); |
1598 | array<u8, 32> nib_mask; |
1599 | copy(begin(lo_mask), begin(lo_mask) + 16, nib_mask.begin()); |
1600 | copy(begin(hi_mask), begin(hi_mask) + 16, nib_mask.begin() + 16); |
1601 | |
1602 | auto ri = ue2::make_unique<RoseInstrCheckMultipathShufti16x8> |
1603 | (nib_mask, bucket_select_lo, data_select_mask, hi_bits_mask, |
1604 | lo_bits_mask, neg_mask, base_offset, last_start, end_inst); |
1605 | program.add_before_end(move(ri)); |
1606 | } else if (multi_len == 32) { |
1607 | neg_mask &= 0xffffffff; |
1608 | assert(!(hi_bits_mask & ~0xffffffffULL)); |
1609 | assert(!(lo_bits_mask & ~0xffffffffULL)); |
1610 | if (bit_index <= 8) { |
1611 | auto ri = ue2::make_unique<RoseInstrCheckMultipathShufti32x8> |
1612 | (hi_mask, lo_mask, bucket_select_lo, data_select_mask, |
1613 | hi_bits_mask, lo_bits_mask, neg_mask, base_offset, |
1614 | last_start, end_inst); |
1615 | program.add_before_end(move(ri)); |
1616 | } else { |
1617 | auto ri = ue2::make_unique<RoseInstrCheckMultipathShufti32x16> |
1618 | (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo, |
1619 | data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, |
1620 | base_offset, last_start, end_inst); |
1621 | program.add_before_end(move(ri)); |
1622 | } |
1623 | } else { |
1624 | auto ri = ue2::make_unique<RoseInstrCheckMultipathShufti64> |
1625 | (hi_mask, lo_mask, bucket_select_lo, data_select_mask, |
1626 | hi_bits_mask, lo_bits_mask, neg_mask, base_offset, |
1627 | last_start, end_inst); |
1628 | program.add_before_end(move(ri)); |
1629 | } |
1630 | return true; |
1631 | } |
1632 | |
1633 | static |
1634 | void makeRoleMultipathLookaround(const vector<vector<LookEntry>> &multi_look, |
1635 | RoseProgram &program) { |
1636 | assert(!multi_look.empty()); |
1637 | assert(multi_look.size() <= MAX_LOOKAROUND_PATHS); |
1638 | vector<vector<LookEntry>> ordered_look; |
1639 | set<s32> look_offset; |
1640 | |
1641 | assert(!multi_look[0].empty()); |
1642 | s32 last_start = multi_look[0][0].offset; |
1643 | |
1644 | // build offset table. |
1645 | for (const auto &look : multi_look) { |
1646 | assert(look.size() > 0); |
1647 | last_start = max(last_start, (s32)look.begin()->offset); |
1648 | |
1649 | for (const auto &t : look) { |
1650 | look_offset.insert(t.offset); |
1651 | } |
1652 | } |
1653 | |
1654 | array<u8, MULTIPATH_MAX_LEN> start_mask; |
1655 | if (multi_look.size() < MAX_LOOKAROUND_PATHS) { |
1656 | start_mask.fill((1 << multi_look.size()) - 1); |
1657 | } else { |
1658 | start_mask.fill(0xff); |
1659 | } |
1660 | |
1661 | u32 path_idx = 0; |
1662 | for (const auto &look : multi_look) { |
1663 | for (const auto &t : look) { |
1664 | assert(t.offset >= (int)*look_offset.begin()); |
1665 | size_t update_offset = t.offset - *look_offset.begin() + 1; |
1666 | if (update_offset < start_mask.size()) { |
1667 | start_mask[update_offset] &= ~(1 << path_idx); |
1668 | } |
1669 | } |
1670 | path_idx++; |
1671 | } |
1672 | |
1673 | for (u32 i = 1; i < MULTIPATH_MAX_LEN; i++) { |
1674 | start_mask[i] &= start_mask[i - 1]; |
1675 | DEBUG_PRINTF("start_mask[%u] = %x\n" , i, start_mask[i]); |
1676 | } |
1677 | |
1678 | assert(look_offset.size() <= MULTIPATH_MAX_LEN); |
1679 | |
1680 | assert(last_start < 0); |
1681 | |
1682 | for (const auto &offset : look_offset) { |
1683 | vector<LookEntry> multi_entry; |
1684 | multi_entry.resize(MAX_LOOKAROUND_PATHS); |
1685 | |
1686 | for (size_t i = 0; i < multi_look.size(); i++) { |
1687 | for (const auto &t : multi_look[i]) { |
1688 | if (t.offset == offset) { |
1689 | multi_entry[i] = t; |
1690 | } |
1691 | } |
1692 | } |
1693 | ordered_look.emplace_back(multi_entry); |
1694 | } |
1695 | |
1696 | auto ri = ue2::make_unique<RoseInstrMultipathLookaround>(move(ordered_look), |
1697 | last_start, start_mask, |
1698 | program.end_instruction()); |
1699 | program.add_before_end(move(ri)); |
1700 | } |
1701 | |
1702 | static |
1703 | void makeRoleLookaround(const RoseBuildImpl &build, |
1704 | const map<RoseVertex, left_build_info> &leftfix_info, |
1705 | RoseVertex v, RoseProgram &program) { |
1706 | if (!build.cc.grey.roseLookaroundMasks) { |
1707 | return; |
1708 | } |
1709 | |
1710 | vector<vector<LookEntry>> looks; |
1711 | |
1712 | // Lookaround from leftfix (mandatory). |
1713 | if (contains(leftfix_info, v) && leftfix_info.at(v).has_lookaround) { |
1714 | DEBUG_PRINTF("using leftfix lookaround\n" ); |
1715 | looks = leftfix_info.at(v).lookaround; |
1716 | } |
1717 | |
1718 | // We may be able to find more lookaround info (advisory) and merge it |
1719 | // in. |
1720 | if (looks.size() <= 1) { |
1721 | vector<LookEntry> look; |
1722 | vector<LookEntry> look_more; |
1723 | if (!looks.empty()) { |
1724 | look = move(looks.front()); |
1725 | } |
1726 | findLookaroundMasks(build, v, look_more); |
1727 | mergeLookaround(look, look_more); |
1728 | if (!look.empty()) { |
1729 | makeLookaroundInstruction(look, program); |
1730 | } |
1731 | return; |
1732 | } |
1733 | |
1734 | if (!makeRoleMultipathShufti(looks, program)) { |
1735 | assert(looks.size() <= 8); |
1736 | makeRoleMultipathLookaround(looks, program); |
1737 | } |
1738 | } |
1739 | |
1740 | static |
1741 | void makeRoleSuffix(const RoseBuildImpl &build, |
1742 | const map<suffix_id, u32> &suffixes, |
1743 | const map<u32, engine_info> &engine_info_by_queue, |
1744 | RoseVertex v, RoseProgram &prog) { |
1745 | const auto &g = build.g; |
1746 | if (!g[v].suffix) { |
1747 | return; |
1748 | } |
1749 | assert(contains(suffixes, g[v].suffix)); |
1750 | u32 queue = suffixes.at(g[v].suffix); |
1751 | u32 event; |
1752 | assert(contains(engine_info_by_queue, queue)); |
1753 | const auto eng_info = engine_info_by_queue.at(queue); |
1754 | if (isContainerType(eng_info.type)) { |
1755 | auto tamaProto = g[v].suffix.tamarama.get(); |
1756 | assert(tamaProto); |
1757 | event = (u32)MQE_TOP_FIRST + |
1758 | tamaProto->top_remap.at(make_pair(g[v].index, |
1759 | g[v].suffix.top)); |
1760 | assert(event < MQE_INVALID); |
1761 | } else if (isMultiTopType(eng_info.type)) { |
1762 | assert(!g[v].suffix.haig); |
1763 | event = (u32)MQE_TOP_FIRST + g[v].suffix.top; |
1764 | assert(event < MQE_INVALID); |
1765 | } else { |
1766 | // DFAs/Puffs have no MQE_TOP_N support, so they get a classic TOP |
1767 | // event. |
1768 | assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); |
1769 | event = MQE_TOP; |
1770 | } |
1771 | |
1772 | prog.add_before_end(ue2::make_unique<RoseInstrTriggerSuffix>(queue, event)); |
1773 | } |
1774 | |
1775 | static |
1776 | void addInfixTriggerInstructions(vector<TriggerInfo> triggers, |
1777 | RoseProgram &prog) { |
1778 | // Order, de-dupe and add instructions to the end of program. |
1779 | sort_and_unique(triggers, [](const TriggerInfo &a, const TriggerInfo &b) { |
1780 | return tie(a.cancel, a.queue, a.event) < |
1781 | tie(b.cancel, b.queue, b.event); |
1782 | }); |
1783 | for (const auto &ti : triggers) { |
1784 | prog.add_before_end( |
1785 | ue2::make_unique<RoseInstrTriggerInfix>(ti.cancel, ti.queue, ti.event)); |
1786 | } |
1787 | } |
1788 | |
1789 | static |
1790 | void makeRoleInfixTriggers(const RoseBuildImpl &build, |
1791 | const map<RoseVertex, left_build_info> &leftfix_info, |
1792 | const map<u32, engine_info> &engine_info_by_queue, |
1793 | RoseVertex u, RoseProgram &program) { |
1794 | const auto &g = build.g; |
1795 | |
1796 | vector<TriggerInfo> triggers; |
1797 | |
1798 | for (const auto &e : out_edges_range(u, g)) { |
1799 | RoseVertex v = target(e, g); |
1800 | if (!g[v].left) { |
1801 | continue; |
1802 | } |
1803 | |
1804 | assert(contains(leftfix_info, v)); |
1805 | const left_build_info &lbi = leftfix_info.at(v); |
1806 | if (lbi.has_lookaround) { |
1807 | continue; |
1808 | } |
1809 | |
1810 | assert(contains(engine_info_by_queue, lbi.queue)); |
1811 | const auto &eng_info = engine_info_by_queue.at(lbi.queue); |
1812 | |
1813 | // DFAs have no TOP_N support, so they get a classic MQE_TOP event. |
1814 | u32 top; |
1815 | if (isContainerType(eng_info.type)) { |
1816 | auto tamaProto = g[v].left.tamarama.get(); |
1817 | assert(tamaProto); |
1818 | top = MQE_TOP_FIRST + tamaProto->top_remap.at( |
1819 | make_pair(g[v].index, g[e].rose_top)); |
1820 | assert(top < MQE_INVALID); |
1821 | } else if (!isMultiTopType(eng_info.type)) { |
1822 | assert(num_tops(g[v].left) == 1); |
1823 | top = MQE_TOP; |
1824 | } else { |
1825 | top = MQE_TOP_FIRST + g[e].rose_top; |
1826 | assert(top < MQE_INVALID); |
1827 | } |
1828 | |
1829 | triggers.emplace_back(g[e].rose_cancel_prev_top, lbi.queue, top); |
1830 | } |
1831 | |
1832 | addInfixTriggerInstructions(move(triggers), program); |
1833 | } |
1834 | |
1835 | |
1836 | /** |
1837 | * \brief True if the given vertex is a role that can only be switched on at |
1838 | * EOD. |
1839 | */ |
1840 | static |
1841 | bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) { |
1842 | const RoseGraph &g = tbi.g; |
1843 | |
1844 | // All such roles have only (0,0) edges to vertices with the eod_accept |
1845 | // property, and no other effects (suffixes, ordinary reports, etc, etc). |
1846 | |
1847 | if (isLeafNode(v, g) || !g[v].reports.empty() || g[v].suffix) { |
1848 | return false; |
1849 | } |
1850 | |
1851 | for (const auto &e : out_edges_range(v, g)) { |
1852 | RoseVertex w = target(e, g); |
1853 | if (!g[w].eod_accept) { |
1854 | return false; |
1855 | } |
1856 | assert(!g[w].reports.empty()); |
1857 | assert(g[w].literals.empty()); |
1858 | |
1859 | if (g[e].minBound || g[e].maxBound) { |
1860 | return false; |
1861 | } |
1862 | } |
1863 | |
1864 | /* There is no pointing enforcing this check at runtime if |
1865 | * this role is only fired by the eod event literal */ |
1866 | if (tbi.eod_event_literal_id != MO_INVALID_IDX && |
1867 | g[v].literals.size() == 1 && |
1868 | *g[v].literals.begin() == tbi.eod_event_literal_id) { |
1869 | return false; |
1870 | } |
1871 | |
1872 | return true; |
1873 | } |
1874 | |
1875 | static |
1876 | void addCheckOnlyEodInstruction(RoseProgram &prog) { |
1877 | DEBUG_PRINTF("only at eod\n" ); |
1878 | const auto *end_inst = prog.end_instruction(); |
1879 | prog.add_before_end(ue2::make_unique<RoseInstrCheckOnlyEod>(end_inst)); |
1880 | } |
1881 | |
1882 | static |
1883 | void makeRoleEagerEodReports(const RoseBuildImpl &build, |
1884 | const map<RoseVertex, left_build_info> &leftfix_info, |
1885 | bool needs_catchup, RoseVertex v, |
1886 | RoseProgram &program) { |
1887 | RoseProgram eod_program; |
1888 | |
1889 | for (const auto &e : out_edges_range(v, build.g)) { |
1890 | if (canEagerlyReportAtEod(build, e)) { |
1891 | RoseProgram block; |
1892 | makeRoleReports(build, leftfix_info, needs_catchup, |
1893 | target(e, build.g), block); |
1894 | eod_program.add_block(move(block)); |
1895 | } |
1896 | } |
1897 | |
1898 | if (eod_program.empty()) { |
1899 | return; |
1900 | } |
1901 | |
1902 | if (!onlyAtEod(build, v)) { |
1903 | // The rest of our program wasn't EOD anchored, so we need to guard |
1904 | // these reports with a check. |
1905 | addCheckOnlyEodInstruction(program); |
1906 | } |
1907 | |
1908 | program.add_before_end(move(eod_program)); |
1909 | } |
1910 | |
1911 | /** Makes a program for a role/vertex given a specific pred/in_edge. */ |
1912 | static |
1913 | RoseProgram makeRoleProgram(const RoseBuildImpl &build, |
1914 | const map<RoseVertex, left_build_info> &leftfix_info, |
1915 | const map<suffix_id, u32> &suffixes, |
1916 | const map<u32, engine_info> &engine_info_by_queue, |
1917 | const unordered_map<RoseVertex, u32> &roleStateIndices, |
1918 | ProgramBuild &prog_build, const RoseEdge &e) { |
1919 | const RoseGraph &g = build.g; |
1920 | auto v = target(e, g); |
1921 | |
1922 | RoseProgram program; |
1923 | |
1924 | // First, add program instructions that enforce preconditions without |
1925 | // effects. |
1926 | |
1927 | if (onlyAtEod(build, v)) { |
1928 | addCheckOnlyEodInstruction(program); |
1929 | } |
1930 | |
1931 | if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { |
1932 | makeRoleCheckBounds(build, v, e, program); |
1933 | } |
1934 | |
1935 | // This role program may be triggered by different predecessors, with |
1936 | // different offset bounds. We must ensure we put this check/set operation |
1937 | // after the bounds check to deal with this case. |
1938 | if (in_degree(v, g) > 1) { |
1939 | assert(!build.isRootSuccessor(v)); |
1940 | makeRoleCheckNotHandled(prog_build, v, program); |
1941 | } |
1942 | |
1943 | makeRoleLookaround(build, leftfix_info, v, program); |
1944 | makeRoleCheckLeftfix(build, leftfix_info, v, program); |
1945 | |
1946 | // Next, we can add program instructions that have effects. This must be |
1947 | // done as a series of blocks, as some of them (like reports) are |
1948 | // escapable. |
1949 | |
1950 | RoseProgram effects_block; |
1951 | |
1952 | RoseProgram reports_block; |
1953 | makeRoleReports(build, leftfix_info, prog_build.needs_catchup, v, |
1954 | reports_block); |
1955 | effects_block.add_block(move(reports_block)); |
1956 | |
1957 | RoseProgram infix_block; |
1958 | makeRoleInfixTriggers(build, leftfix_info, engine_info_by_queue, v, |
1959 | infix_block); |
1960 | effects_block.add_block(move(infix_block)); |
1961 | |
1962 | // Note: SET_GROUPS instruction must be after infix triggers, as an infix |
1963 | // going dead may switch off groups. |
1964 | RoseProgram groups_block; |
1965 | makeRoleGroups(build.g, prog_build, v, groups_block); |
1966 | effects_block.add_block(move(groups_block)); |
1967 | |
1968 | RoseProgram suffix_block; |
1969 | makeRoleSuffix(build, suffixes, engine_info_by_queue, v, suffix_block); |
1970 | effects_block.add_block(move(suffix_block)); |
1971 | |
1972 | RoseProgram state_block; |
1973 | makeRoleSetState(roleStateIndices, v, state_block); |
1974 | effects_block.add_block(move(state_block)); |
1975 | |
1976 | // Note: EOD eager reports may generate a CHECK_ONLY_EOD instruction (if |
1977 | // the program doesn't have one already). |
1978 | RoseProgram eod_block; |
1979 | makeRoleEagerEodReports(build, leftfix_info, prog_build.needs_catchup, v, |
1980 | eod_block); |
1981 | effects_block.add_block(move(eod_block)); |
1982 | |
1983 | /* a 'ghost role' may do nothing if we know that its groups are already set |
1984 | * - in this case we can avoid producing a program at all. */ |
1985 | if (effects_block.empty()) { |
1986 | return {}; |
1987 | } |
1988 | |
1989 | program.add_before_end(move(effects_block)); |
1990 | return program; |
1991 | } |
1992 | |
1993 | static |
1994 | void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 lit_id, |
1995 | RoseProgram &prog) { |
1996 | const auto &info = build.literal_info.at(lit_id); |
1997 | if (!info.squash_group) { |
1998 | return; |
1999 | } |
2000 | |
2001 | DEBUG_PRINTF("squashes 0x%llx\n" , info.group_mask); |
2002 | assert(info.group_mask); |
2003 | /* Note: group_mask is negated. */ |
2004 | prog.add_before_end(ue2::make_unique<RoseInstrSquashGroups>(~info.group_mask)); |
2005 | } |
2006 | |
2007 | namespace { |
2008 | struct ProgKey { |
2009 | ProgKey(const RoseProgram &p) : prog(&p) {} |
2010 | |
2011 | bool operator==(const ProgKey &b) const { |
2012 | return RoseProgramEquivalence()(*prog, *b.prog); |
2013 | } |
2014 | |
2015 | size_t hash() const { |
2016 | return RoseProgramHash()(*prog); |
2017 | } |
2018 | private: |
2019 | const RoseProgram *prog; |
2020 | }; |
2021 | } |
2022 | |
2023 | RoseProgram assembleProgramBlocks(vector<RoseProgram> &&blocks_in) { |
2024 | DEBUG_PRINTF("%zu blocks before dedupe\n" , blocks_in.size()); |
2025 | |
2026 | vector<RoseProgram> blocks; |
2027 | blocks.reserve(blocks_in.size()); /* to ensure stable reference for seen */ |
2028 | |
2029 | ue2_unordered_set<ProgKey> seen; |
2030 | for (auto &block : blocks_in) { |
2031 | if (contains(seen, block)) { |
2032 | continue; |
2033 | } |
2034 | |
2035 | blocks.push_back(move(block)); |
2036 | seen.emplace(blocks.back()); |
2037 | } |
2038 | |
2039 | DEBUG_PRINTF("%zu blocks after dedupe\n" , blocks.size()); |
2040 | |
2041 | RoseProgram prog; |
2042 | for (auto &block : blocks) { |
2043 | /* If we have multiple blocks from different literals and any of them |
2044 | * squash groups, we will have to add a CLEAR_WORK_DONE instruction to |
2045 | * each literal program block to clear the work_done flags so that it's |
2046 | * only set if a state has been. */ |
2047 | if (!prog.empty() && reads_work_done_flag(block)) { |
2048 | RoseProgram clear_block; |
2049 | clear_block.add_before_end(ue2::make_unique<RoseInstrClearWorkDone>()); |
2050 | prog.add_block(move(clear_block)); |
2051 | } |
2052 | |
2053 | prog.add_block(move(block)); |
2054 | } |
2055 | |
2056 | return prog; |
2057 | } |
2058 | |
2059 | RoseProgram makeLiteralProgram(const RoseBuildImpl &build, |
2060 | const map<RoseVertex, left_build_info> &leftfix_info, |
2061 | const map<suffix_id, u32> &suffixes, |
2062 | const map<u32, engine_info> &engine_info_by_queue, |
2063 | const unordered_map<RoseVertex, u32> &roleStateIndices, |
2064 | ProgramBuild &prog_build, u32 lit_id, |
2065 | const vector<RoseEdge> &lit_edges, |
2066 | bool is_anchored_replay_program) { |
2067 | const auto &g = build.g; |
2068 | |
2069 | DEBUG_PRINTF("lit id=%u, %zu lit edges\n" , lit_id, lit_edges.size()); |
2070 | |
2071 | // Construct initial program up front, as its early checks must be able |
2072 | // to jump to end and terminate processing for this literal. |
2073 | auto lit_program = makeLitInitialProgram(build, prog_build, lit_id, |
2074 | lit_edges, |
2075 | is_anchored_replay_program); |
2076 | |
2077 | RoseProgram role_programs; |
2078 | |
2079 | // Predecessor state id -> program block. |
2080 | map<u32, RoseProgram> pred_blocks; |
2081 | |
2082 | // Construct sparse iter sub-programs. |
2083 | for (const auto &e : lit_edges) { |
2084 | const auto &u = source(e, g); |
2085 | if (build.isAnyStart(u)) { |
2086 | continue; // Root roles are not handled with sparse iterator. |
2087 | } |
2088 | DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n" , g[u].index, |
2089 | g[target(e, g)].index); |
2090 | assert(contains(roleStateIndices, u)); |
2091 | u32 pred_state = roleStateIndices.at(u); |
2092 | auto role_prog = makeRoleProgram(build, leftfix_info, suffixes, |
2093 | engine_info_by_queue, roleStateIndices, |
2094 | prog_build, e); |
2095 | if (!role_prog.empty()) { |
2096 | pred_blocks[pred_state].add_block(move(role_prog)); |
2097 | } |
2098 | } |
2099 | |
2100 | // Add blocks to deal with non-root edges (triggered by sparse iterator or |
2101 | // mmbit_isset checks). |
2102 | addPredBlocks(pred_blocks, roleStateIndices.size(), role_programs); |
2103 | |
2104 | // Add blocks to handle root roles. |
2105 | for (const auto &e : lit_edges) { |
2106 | const auto &u = source(e, g); |
2107 | if (!build.isAnyStart(u)) { |
2108 | continue; |
2109 | } |
2110 | DEBUG_PRINTF("root edge (%zu,%zu)\n" , g[u].index, |
2111 | g[target(e, g)].index); |
2112 | auto role_prog = makeRoleProgram(build, leftfix_info, suffixes, |
2113 | engine_info_by_queue, roleStateIndices, |
2114 | prog_build, e); |
2115 | role_programs.add_block(move(role_prog)); |
2116 | } |
2117 | |
2118 | if (lit_id == build.eod_event_literal_id) { |
2119 | /* Note: does not require the lit initial program */ |
2120 | assert(build.eod_event_literal_id != MO_INVALID_IDX); |
2121 | return role_programs; |
2122 | } |
2123 | |
2124 | /* Instructions to run even if a role program bails out */ |
2125 | RoseProgram unconditional_block; |
2126 | |
2127 | // Literal may squash groups. |
2128 | makeGroupSquashInstruction(build, lit_id, unconditional_block); |
2129 | |
2130 | role_programs.add_block(move(unconditional_block)); |
2131 | lit_program.add_before_end(move(role_programs)); |
2132 | |
2133 | return lit_program; |
2134 | } |
2135 | |
2136 | RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, |
2137 | ProgramBuild &prog_build, |
2138 | const vector<u32> &lit_ids) { |
2139 | assert(!lit_ids.empty()); |
2140 | assert(build.cc.streaming); |
2141 | |
2142 | vector<RoseProgram> blocks; |
2143 | |
2144 | for (const auto &lit_id : lit_ids) { |
2145 | DEBUG_PRINTF("lit_id=%u\n" , lit_id); |
2146 | const auto &info = build.literal_info.at(lit_id); |
2147 | if (info.delayed_ids.empty()) { |
2148 | continue; // No delayed IDs, no work to do. |
2149 | } |
2150 | |
2151 | RoseProgram prog; |
2152 | if (!build.isDelayed(lit_id)) { |
2153 | makeCheckLiteralInstruction(build.literals.at(lit_id), |
2154 | prog_build.longLitLengthThreshold, prog, |
2155 | build.cc); |
2156 | } |
2157 | |
2158 | makeCheckLitMaskInstruction(build, lit_id, prog); |
2159 | makePushDelayedInstructions(build.literals, prog_build, |
2160 | build.literal_info.at(lit_id).delayed_ids, |
2161 | prog); |
2162 | blocks.push_back(move(prog)); |
2163 | } |
2164 | |
2165 | return assembleProgramBlocks(move(blocks)); |
2166 | } |
2167 | |
2168 | RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, |
2169 | ProgramBuild &prog_build, const RoseEdge &e, |
2170 | const bool multiple_preds) { |
2171 | const RoseGraph &g = build.g; |
2172 | const RoseVertex v = target(e, g); |
2173 | |
2174 | RoseProgram program; |
2175 | |
2176 | if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { |
2177 | makeRoleCheckBounds(build, v, e, program); |
2178 | } |
2179 | |
2180 | if (multiple_preds) { |
2181 | // Only necessary when there is more than one pred. |
2182 | makeRoleCheckNotHandled(prog_build, v, program); |
2183 | } |
2184 | |
2185 | makeCatchup(build.rm, prog_build.needs_catchup, g[v].reports, program); |
2186 | |
2187 | const bool has_som = false; |
2188 | RoseProgram report_block; |
2189 | for (const auto &id : g[v].reports) { |
2190 | makeReport(build, id, has_som, report_block); |
2191 | } |
2192 | program.add_before_end(move(report_block)); |
2193 | |
2194 | return program; |
2195 | } |
2196 | |
2197 | static |
2198 | void makeCatchupMpv(const ReportManager &rm, bool needs_mpv_catchup, |
2199 | ReportID id, RoseProgram &program) { |
2200 | if (!needs_mpv_catchup) { |
2201 | return; |
2202 | } |
2203 | |
2204 | const Report &report = rm.getReport(id); |
2205 | if (report.type == INTERNAL_ROSE_CHAIN) { |
2206 | return; |
2207 | } |
2208 | |
2209 | program.add_before_end(ue2::make_unique<RoseInstrCatchUpMpv>()); |
2210 | } |
2211 | |
2212 | RoseProgram makeReportProgram(const RoseBuildImpl &build, |
2213 | bool needs_mpv_catchup, ReportID id) { |
2214 | RoseProgram prog; |
2215 | |
2216 | makeCatchupMpv(build.rm, needs_mpv_catchup, id, prog); |
2217 | |
2218 | const bool has_som = false; |
2219 | makeReport(build, id, has_som, prog); |
2220 | |
2221 | return prog; |
2222 | } |
2223 | |
2224 | RoseProgram makeBoundaryProgram(const RoseBuildImpl &build, |
2225 | const set<ReportID> &reports) { |
2226 | // Note: no CATCHUP instruction is necessary in the boundary case, as we |
2227 | // should always be caught up (and may not even have the resources in |
2228 | // scratch to support it). |
2229 | |
2230 | const bool has_som = false; |
2231 | RoseProgram prog; |
2232 | for (const auto &id : reports) { |
2233 | makeReport(build, id, has_som, prog); |
2234 | } |
2235 | |
2236 | return prog; |
2237 | } |
2238 | |
2239 | void addIncludedJumpProgram(RoseProgram &program, u32 child_offset, |
2240 | u8 squash) { |
2241 | RoseProgram block; |
2242 | block.add_before_end(ue2::make_unique<RoseInstrIncludedJump>(child_offset, |
2243 | squash)); |
2244 | program.add_block(move(block)); |
2245 | } |
2246 | |
2247 | static |
2248 | void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block, |
2249 | RoseProgram &program) { |
2250 | // Prepend an instruction to check the pred state is on. |
2251 | const auto *end_inst = pred_block.end_instruction(); |
2252 | pred_block.insert(begin(pred_block), |
2253 | ue2::make_unique<RoseInstrCheckState>(pred_state, end_inst)); |
2254 | program.add_block(move(pred_block)); |
2255 | } |
2256 | |
2257 | static |
2258 | void addPredBlocksAny(map<u32, RoseProgram> &pred_blocks, u32 num_states, |
2259 | RoseProgram &program) { |
2260 | RoseProgram sparse_program; |
2261 | |
2262 | vector<u32> keys; |
2263 | for (const u32 &key : pred_blocks | map_keys) { |
2264 | keys.push_back(key); |
2265 | } |
2266 | |
2267 | const RoseInstruction *end_inst = sparse_program.end_instruction(); |
2268 | auto ri = ue2::make_unique<RoseInstrSparseIterAny>(num_states, keys, end_inst); |
2269 | sparse_program.add_before_end(move(ri)); |
2270 | |
2271 | RoseProgram &block = pred_blocks.begin()->second; |
2272 | |
2273 | /* we no longer need the check handled instruction as all the pred-role |
2274 | * blocks are being collapsed together */ |
2275 | stripCheckHandledInstruction(block); |
2276 | |
2277 | sparse_program.add_before_end(move(block)); |
2278 | program.add_block(move(sparse_program)); |
2279 | } |
2280 | |
2281 | static |
2282 | void addPredBlocksMulti(map<u32, RoseProgram> &pred_blocks, |
2283 | u32 num_states, RoseProgram &program) { |
2284 | assert(!pred_blocks.empty()); |
2285 | |
2286 | RoseProgram sparse_program; |
2287 | const RoseInstruction *end_inst = sparse_program.end_instruction(); |
2288 | vector<pair<u32, const RoseInstruction *>> jump_table; |
2289 | |
2290 | // BEGIN instruction. |
2291 | auto ri_begin = ue2::make_unique<RoseInstrSparseIterBegin>(num_states, end_inst); |
2292 | RoseInstrSparseIterBegin *begin_inst = ri_begin.get(); |
2293 | sparse_program.add_before_end(move(ri_begin)); |
2294 | |
2295 | // NEXT instructions, one per pred program. |
2296 | u32 prev_key = pred_blocks.begin()->first; |
2297 | for (auto it = next(begin(pred_blocks)); it != end(pred_blocks); ++it) { |
2298 | auto ri = ue2::make_unique<RoseInstrSparseIterNext>(prev_key, begin_inst, |
2299 | end_inst); |
2300 | sparse_program.add_before_end(move(ri)); |
2301 | prev_key = it->first; |
2302 | } |
2303 | |
2304 | // Splice in each pred program after its BEGIN/NEXT. |
2305 | auto out_it = begin(sparse_program); |
2306 | for (auto &m : pred_blocks) { |
2307 | u32 key = m.first; |
2308 | RoseProgram &flat_prog = m.second; |
2309 | assert(!flat_prog.empty()); |
2310 | const size_t block_len = flat_prog.size() - 1; // without INSTR_END. |
2311 | |
2312 | assert(dynamic_cast<const RoseInstrSparseIterBegin *>(out_it->get()) || |
2313 | dynamic_cast<const RoseInstrSparseIterNext *>(out_it->get())); |
2314 | out_it = sparse_program.insert(++out_it, move(flat_prog)); |
2315 | |
2316 | // Jump table target for this key is the beginning of the block we just |
2317 | // spliced in. |
2318 | jump_table.emplace_back(key, out_it->get()); |
2319 | |
2320 | assert(distance(begin(sparse_program), out_it) + block_len <= |
2321 | sparse_program.size()); |
2322 | advance(out_it, block_len); |
2323 | } |
2324 | |
2325 | // Write the jump table back into the SPARSE_ITER_BEGIN instruction. |
2326 | begin_inst->jump_table = move(jump_table); |
2327 | |
2328 | program.add_block(move(sparse_program)); |
2329 | } |
2330 | |
2331 | void addPredBlocks(map<u32, RoseProgram> &pred_blocks, u32 num_states, |
2332 | RoseProgram &program) { |
2333 | // Trim empty blocks, if any exist. |
2334 | for (auto it = pred_blocks.begin(); it != pred_blocks.end();) { |
2335 | if (it->second.empty()) { |
2336 | it = pred_blocks.erase(it); |
2337 | } else { |
2338 | ++it; |
2339 | } |
2340 | } |
2341 | |
2342 | const size_t num_preds = pred_blocks.size(); |
2343 | if (num_preds == 0) { |
2344 | return; |
2345 | } |
2346 | |
2347 | if (num_preds == 1) { |
2348 | const auto head = pred_blocks.begin(); |
2349 | addPredBlockSingle(head->first, head->second, program); |
2350 | return; |
2351 | } |
2352 | |
2353 | // First, see if all our blocks are equivalent, in which case we can |
2354 | // collapse them down into one. |
2355 | const auto &blocks = pred_blocks | map_values; |
2356 | if (all_of(begin(blocks), end(blocks), [&](const RoseProgram &block) { |
2357 | return RoseProgramEquivalence()(*begin(blocks), block); |
2358 | })) { |
2359 | DEBUG_PRINTF("all blocks equiv\n" ); |
2360 | addPredBlocksAny(pred_blocks, num_states, program); |
2361 | return; |
2362 | } |
2363 | |
2364 | addPredBlocksMulti(pred_blocks, num_states, program); |
2365 | } |
2366 | |
2367 | void applyFinalSpecialisation(RoseProgram &program) { |
2368 | assert(!program.empty()); |
2369 | assert(program.back().code() == ROSE_INSTR_END); |
2370 | if (program.size() < 2) { |
2371 | return; |
2372 | } |
2373 | |
2374 | /* Replace the second-to-last instruction (before END) with a one-shot |
2375 | * specialisation if available. */ |
2376 | auto it = next(program.rbegin()); |
2377 | if (auto *ri = dynamic_cast<const RoseInstrReport *>(it->get())) { |
2378 | DEBUG_PRINTF("replacing REPORT with FINAL_REPORT\n" ); |
2379 | program.replace(it, ue2::make_unique<RoseInstrFinalReport>( |
2380 | ri->onmatch, ri->offset_adjust)); |
2381 | } |
2382 | } |
2383 | |
2384 | void recordLongLiterals(vector<ue2_case_string> &longLiterals, |
2385 | const RoseProgram &program) { |
2386 | for (const auto &ri : program) { |
2387 | if (const auto *ri_check = |
2388 | dynamic_cast<const RoseInstrCheckLongLit *>(ri.get())) { |
2389 | DEBUG_PRINTF("found CHECK_LONG_LIT for string '%s'\n" , |
2390 | escapeString(ri_check->literal).c_str()); |
2391 | longLiterals.emplace_back(ri_check->literal, false); |
2392 | continue; |
2393 | } |
2394 | if (const auto *ri_check = |
2395 | dynamic_cast<const RoseInstrCheckLongLitNocase *>(ri.get())) { |
2396 | DEBUG_PRINTF("found CHECK_LONG_LIT_NOCASE for string '%s'\n" , |
2397 | escapeString(ri_check->literal).c_str()); |
2398 | longLiterals.emplace_back(ri_check->literal, true); |
2399 | } |
2400 | } |
2401 | } |
2402 | |
2403 | void recordResources(RoseResources &resources, const RoseProgram &program) { |
2404 | for (const auto &ri : program) { |
2405 | switch (ri->code()) { |
2406 | case ROSE_INSTR_TRIGGER_SUFFIX: |
2407 | resources.has_suffixes = true; |
2408 | break; |
2409 | case ROSE_INSTR_TRIGGER_INFIX: |
2410 | case ROSE_INSTR_CHECK_INFIX: |
2411 | case ROSE_INSTR_CHECK_PREFIX: |
2412 | case ROSE_INSTR_SOM_LEFTFIX: |
2413 | resources.has_leftfixes = true; |
2414 | break; |
2415 | case ROSE_INSTR_SET_STATE: |
2416 | case ROSE_INSTR_CHECK_STATE: |
2417 | case ROSE_INSTR_SPARSE_ITER_BEGIN: |
2418 | case ROSE_INSTR_SPARSE_ITER_NEXT: |
2419 | resources.has_states = true; |
2420 | break; |
2421 | case ROSE_INSTR_CHECK_GROUPS: |
2422 | resources.checks_groups = true; |
2423 | break; |
2424 | case ROSE_INSTR_PUSH_DELAYED: |
2425 | resources.has_lit_delay = true; |
2426 | break; |
2427 | case ROSE_INSTR_CHECK_LONG_LIT: |
2428 | case ROSE_INSTR_CHECK_LONG_LIT_NOCASE: |
2429 | resources.has_lit_check = true; |
2430 | break; |
2431 | default: |
2432 | break; |
2433 | } |
2434 | } |
2435 | } |
2436 | |
2437 | } // namespace ue2 |
2438 | |