1/*
2 * Copyright (c) 2016-2018, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "rose_build_program.h"
30
31#include "rose_build_engine_blob.h"
32#include "rose_build_instructions.h"
33#include "rose_build_lookaround.h"
34#include "rose_build_resources.h"
35#include "nfa/nfa_api_queue.h"
36#include "nfa/nfa_build_util.h"
37#include "nfa/tamaramacompile.h"
38#include "nfagraph/ng_util.h"
39#include "util/charreach_util.h"
40#include "util/container.h"
41#include "util/compile_context.h"
42#include "util/compile_error.h"
43#include "util/report_manager.h"
44#include "util/unordered.h"
45#include "util/verify_types.h"
46
47#include <boost/range/adaptor/map.hpp>
48
49#include <algorithm>
50#include <cstring>
51
52using namespace std;
53using boost::adaptors::map_values;
54using boost::adaptors::map_keys;
55
56namespace ue2 {
57
58engine_info::engine_info(const NFA *nfa, bool trans)
59 : type((NFAEngineType)nfa->type), accepts_eod(nfaAcceptsEod(nfa)),
60 stream_size(nfa->streamStateSize),
61 scratch_size(nfa->scratchStateSize),
62 scratch_align(state_alignment(*nfa)),
63 transient(trans) {
64 assert(scratch_align);
65}
66
67left_build_info::left_build_info(u32 q, u32 l, u32 t, rose_group sm,
68 const std::vector<u8> &stops, u32 max_ql,
69 u8 cm_count, const CharReach &cm_cr)
70 : queue(q), lag(l), transient(t), squash_mask(sm), stopAlphabet(stops),
71 max_queuelen(max_ql), countingMiracleCount(cm_count),
72 countingMiracleReach(cm_cr) {
73}
74
75left_build_info::left_build_info(const vector<vector<LookEntry>> &looks)
76 : has_lookaround(true), lookaround(looks) {
77}
78
79using OffsetMap = RoseInstruction::OffsetMap;
80
81static
82OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) {
83 OffsetMap offset_map;
84 u32 offset = 0;
85 for (const auto &ri : program) {
86 offset = ROUNDUP_N(offset, ROSE_INSTR_MIN_ALIGN);
87 DEBUG_PRINTF("instr %p (opcode %d) -> offset %u\n", ri.get(),
88 ri->code(), offset);
89 assert(!contains(offset_map, ri.get()));
90 offset_map.emplace(ri.get(), offset);
91 offset += ri->byte_length();
92 }
93 *total_len = offset;
94 return offset_map;
95}
96
97RoseProgram::RoseProgram() {
98 prog.push_back(ue2::make_unique<RoseInstrEnd>());
99}
100
101RoseProgram::~RoseProgram() = default;
102
103RoseProgram::RoseProgram(RoseProgram &&) = default;
104RoseProgram &RoseProgram::operator=(RoseProgram &&) = default;
105
106bool RoseProgram::empty() const {
107 assert(!prog.empty());
108 assert(prog.back()->code() == ROSE_INSTR_END);
109 // Empty if we only have one element, the END instruction.
110 return next(prog.begin()) == prog.end();
111}
112
113const RoseInstruction *RoseProgram::end_instruction() const {
114 assert(!prog.empty());
115 assert(prog.back()->code() == ROSE_INSTR_END);
116
117 return prog.back().get();
118}
119
120void RoseProgram::update_targets(RoseProgram::iterator it,
121 RoseProgram::iterator it_end,
122 const RoseInstruction *old_target,
123 const RoseInstruction *new_target) {
124 assert(old_target && new_target && old_target != new_target);
125 for (; it != it_end; ++it) {
126 unique_ptr<RoseInstruction> &ri = *it;
127 assert(ri);
128 ri->update_target(old_target, new_target);
129 }
130}
131
132RoseProgram::iterator RoseProgram::insert(RoseProgram::iterator it,
133 unique_ptr<RoseInstruction> ri) {
134 assert(!prog.empty());
135 assert(it != end());
136 assert(prog.back()->code() == ROSE_INSTR_END);
137
138 return prog.insert(it, move(ri));
139}
140
141RoseProgram::iterator RoseProgram::insert(RoseProgram::iterator it,
142 RoseProgram &&block) {
143 assert(!prog.empty());
144 assert(it != end());
145 assert(prog.back()->code() == ROSE_INSTR_END);
146
147 if (block.empty()) {
148 return it;
149 }
150
151 const RoseInstruction *end_ptr = block.end_instruction();
152 assert(end_ptr->code() == ROSE_INSTR_END);
153 block.prog.pop_back();
154
155 const RoseInstruction *new_target = it->get();
156 update_targets(block.prog.begin(), block.prog.end(), end_ptr, new_target);
157
158 // Workaround: container insert() for ranges doesn't return an iterator
159 // in the version of the STL distributed with gcc 4.8.
160 auto dist = distance(prog.begin(), it);
161 prog.insert(it, make_move_iterator(block.prog.begin()),
162 make_move_iterator(block.prog.end()));
163 it = prog.begin();
164 advance(it, dist);
165 return it;
166}
167
168RoseProgram::iterator RoseProgram::erase(RoseProgram::iterator first,
169 RoseProgram::iterator last) {
170 return prog.erase(first, last);
171}
172
173void RoseProgram::add_before_end(std::unique_ptr<RoseInstruction> ri) {
174 assert(!prog.empty());
175 insert(std::prev(prog.end()), std::move(ri));
176}
177
178void RoseProgram::add_before_end(RoseProgram &&block) {
179 assert(!prog.empty());
180 assert(prog.back()->code() == ROSE_INSTR_END);
181
182 if (block.empty()) {
183 return;
184 }
185
186 insert(prev(prog.end()), move(block));
187}
188
189void RoseProgram::add_block(RoseProgram &&block) {
190 assert(!prog.empty());
191 assert(prog.back()->code() == ROSE_INSTR_END);
192
193 if (block.empty()) {
194 return;
195 }
196
197 // Replace pointers to the current END with pointers to the first
198 // instruction in the new sequence.
199 const RoseInstruction *end_ptr = end_instruction();
200 prog.pop_back();
201 update_targets(prog.begin(), prog.end(), end_ptr,
202 block.prog.front().get());
203 prog.insert(prog.end(), make_move_iterator(block.prog.begin()),
204 make_move_iterator(block.prog.end()));
205}
206
207bytecode_ptr<char> writeProgram(RoseEngineBlob &blob,
208 const RoseProgram &program) {
209 u32 total_len = 0;
210 const auto offset_map = makeOffsetMap(program, &total_len);
211 DEBUG_PRINTF("%zu instructions, len %u\n", program.size(), total_len);
212
213 auto bytecode = make_zeroed_bytecode_ptr<char>(total_len,
214 ROSE_INSTR_MIN_ALIGN);
215 char *ptr = bytecode.get();
216
217 for (const auto &ri : program) {
218 assert(contains(offset_map, ri.get()));
219 const u32 offset = offset_map.at(ri.get());
220 ri->write(ptr + offset, blob, offset_map);
221 }
222
223 return bytecode;
224}
225
226size_t RoseProgramHash::operator()(const RoseProgram &program) const {
227 size_t v = 0;
228 for (const auto &ri : program) {
229 assert(ri);
230 hash_combine(v, ri->hash());
231 }
232 return v;
233}
234
235bool RoseProgramEquivalence::operator()(const RoseProgram &prog1,
236 const RoseProgram &prog2) const {
237 if (prog1.size() != prog2.size()) {
238 return false;
239 }
240
241 u32 len_1 = 0, len_2 = 0;
242 const auto offset_map_1 = makeOffsetMap(prog1, &len_1);
243 const auto offset_map_2 = makeOffsetMap(prog2, &len_2);
244
245 if (len_1 != len_2) {
246 return false;
247 }
248
249 auto is_equiv = [&](const unique_ptr<RoseInstruction> &a,
250 const unique_ptr<RoseInstruction> &b) {
251 assert(a && b);
252 return a->equiv(*b, offset_map_1, offset_map_2);
253 };
254
255 return std::equal(prog1.begin(), prog1.end(), prog2.begin(), is_equiv);
256}
257
258/* Removes any CHECK_HANDLED instructions from the given program */
259static
260void stripCheckHandledInstruction(RoseProgram &prog) {
261 for (auto it = prog.begin(); it != prog.end();) {
262 auto ins = dynamic_cast<const RoseInstrCheckNotHandled *>(it->get());
263 if (!ins) {
264 ++it;
265 continue;
266 }
267
268 auto next_it = next(it);
269 assert(next_it != prog.end()); /* there should always be an end ins */
270 auto next_ins = next_it->get();
271
272 /* update all earlier instructions which point to ins to instead point
273 * to the next instruction. Only need to look at earlier as we only ever
274 * jump forward. */
275 RoseProgram::update_targets(prog.begin(), it, ins, next_ins);
276
277 /* remove check handled instruction */
278 it = prog.erase(it, next_it);
279 }
280}
281
282
283/** Returns true if the program may read the interpreter's work_done flag */
284static
285bool reads_work_done_flag(const RoseProgram &prog) {
286 for (const auto &ri : prog) {
287 if (dynamic_cast<const RoseInstrSquashGroups *>(ri.get())) {
288 return true;
289 }
290 }
291 return false;
292}
293
294void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program) {
295 if (!eodNfaIterOffset) {
296 return;
297 }
298
299 RoseProgram block;
300 block.add_before_end(ue2::make_unique<RoseInstrEnginesEod>(eodNfaIterOffset));
301 program.add_block(move(block));
302}
303
304void addSuffixesEodProgram(RoseProgram &program) {
305 RoseProgram block;
306 block.add_before_end(ue2::make_unique<RoseInstrSuffixesEod>());
307 program.add_block(move(block));
308}
309
310void addMatcherEodProgram(RoseProgram &program) {
311 RoseProgram block;
312 block.add_before_end(ue2::make_unique<RoseInstrMatcherEod>());
313 program.add_block(move(block));
314}
315
316void addFlushCombinationProgram(RoseProgram &program) {
317 program.add_before_end(ue2::make_unique<RoseInstrFlushCombination>());
318}
319
320static
321void makeRoleCheckLeftfix(const RoseBuildImpl &build,
322 const map<RoseVertex, left_build_info> &leftfix_info,
323 RoseVertex v, RoseProgram &program) {
324 auto it = leftfix_info.find(v);
325 if (it == end(leftfix_info)) {
326 return;
327 }
328 const left_build_info &lni = it->second;
329 if (lni.has_lookaround) {
330 return; // Leftfix completely implemented by lookaround.
331 }
332
333 assert(!build.cc.streaming ||
334 build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG);
335
336 bool is_prefix = build.isRootSuccessor(v);
337 const auto *end_inst = program.end_instruction();
338
339 unique_ptr<RoseInstruction> ri;
340 if (is_prefix) {
341 ri = ue2::make_unique<RoseInstrCheckPrefix>(lni.queue, build.g[v].left.lag,
342 build.g[v].left.leftfix_report,
343 end_inst);
344 } else {
345 ri = ue2::make_unique<RoseInstrCheckInfix>(lni.queue, build.g[v].left.lag,
346 build.g[v].left.leftfix_report,
347 end_inst);
348 }
349 program.add_before_end(move(ri));
350}
351
352static
353void makeAnchoredLiteralDelay(const RoseBuildImpl &build,
354 const ProgramBuild &prog_build, u32 lit_id,
355 RoseProgram &program) {
356 // Only relevant for literals in the anchored table.
357 const rose_literal_id &lit = build.literals.at(lit_id);
358 if (lit.table != ROSE_ANCHORED) {
359 return;
360 }
361
362 // If this literal match cannot occur after floatingMinLiteralMatchOffset,
363 // we do not need this check.
364 bool all_too_early = true;
365 rose_group groups = 0;
366
367 const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
368 for (RoseVertex v : lit_vertices) {
369 if (build.g[v].max_offset > prog_build.floatingMinLiteralMatchOffset) {
370 all_too_early = false;
371 }
372 groups |= build.g[v].groups;
373 }
374
375 if (all_too_early) {
376 return;
377 }
378
379 assert(contains(prog_build.anchored_programs, lit_id));
380 u32 anch_id = prog_build.anchored_programs.at(lit_id);
381
382 const auto *end_inst = program.end_instruction();
383 auto ri = ue2::make_unique<RoseInstrAnchoredDelay>(groups, anch_id, end_inst);
384 program.add_before_end(move(ri));
385}
386
387static
388void makeDedupe(const ReportManager &rm, const Report &report,
389 RoseProgram &program) {
390 const auto *end_inst = program.end_instruction();
391 auto ri =
392 ue2::make_unique<RoseInstrDedupe>(report.quashSom, rm.getDkey(report),
393 report.offsetAdjust, end_inst);
394 program.add_before_end(move(ri));
395}
396
397static
398void makeDedupeSom(const ReportManager &rm, const Report &report,
399 RoseProgram &program) {
400 const auto *end_inst = program.end_instruction();
401 auto ri = ue2::make_unique<RoseInstrDedupeSom>(report.quashSom,
402 rm.getDkey(report),
403 report.offsetAdjust, end_inst);
404 program.add_before_end(move(ri));
405}
406
407static
408void makeCatchup(const ReportManager &rm, bool needs_catchup,
409 const flat_set<ReportID> &reports, RoseProgram &program) {
410 if (!needs_catchup) {
411 return;
412 }
413
414 // Everything except the INTERNAL_ROSE_CHAIN report needs catchup to run
415 // before reports are triggered.
416
417 auto report_needs_catchup = [&](const ReportID &id) {
418 const Report &report = rm.getReport(id);
419 return report.type != INTERNAL_ROSE_CHAIN;
420 };
421
422 if (!any_of(begin(reports), end(reports), report_needs_catchup)) {
423 DEBUG_PRINTF("none of the given reports needs catchup\n");
424 return;
425 }
426
427 program.add_before_end(ue2::make_unique<RoseInstrCatchUp>());
428}
429
430static
431void writeSomOperation(const Report &report, som_operation *op) {
432 assert(op);
433
434 memset(op, 0, sizeof(*op));
435
436 switch (report.type) {
437 case EXTERNAL_CALLBACK_SOM_REL:
438 op->type = SOM_EXTERNAL_CALLBACK_REL;
439 break;
440 case INTERNAL_SOM_LOC_SET:
441 op->type = SOM_INTERNAL_LOC_SET;
442 break;
443 case INTERNAL_SOM_LOC_SET_IF_UNSET:
444 op->type = SOM_INTERNAL_LOC_SET_IF_UNSET;
445 break;
446 case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
447 op->type = SOM_INTERNAL_LOC_SET_IF_WRITABLE;
448 break;
449 case INTERNAL_SOM_LOC_SET_SOM_REV_NFA:
450 op->type = SOM_INTERNAL_LOC_SET_REV_NFA;
451 break;
452 case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET:
453 op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET;
454 break;
455 case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE:
456 op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE;
457 break;
458 case INTERNAL_SOM_LOC_COPY:
459 op->type = SOM_INTERNAL_LOC_COPY;
460 break;
461 case INTERNAL_SOM_LOC_COPY_IF_WRITABLE:
462 op->type = SOM_INTERNAL_LOC_COPY_IF_WRITABLE;
463 break;
464 case INTERNAL_SOM_LOC_MAKE_WRITABLE:
465 op->type = SOM_INTERNAL_LOC_MAKE_WRITABLE;
466 break;
467 case EXTERNAL_CALLBACK_SOM_STORED:
468 op->type = SOM_EXTERNAL_CALLBACK_STORED;
469 break;
470 case EXTERNAL_CALLBACK_SOM_ABS:
471 op->type = SOM_EXTERNAL_CALLBACK_ABS;
472 break;
473 case EXTERNAL_CALLBACK_SOM_REV_NFA:
474 op->type = SOM_EXTERNAL_CALLBACK_REV_NFA;
475 break;
476 case INTERNAL_SOM_LOC_SET_FROM:
477 op->type = SOM_INTERNAL_LOC_SET_FROM;
478 break;
479 case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE:
480 op->type = SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE;
481 break;
482 default:
483 // This report doesn't correspond to a SOM operation.
484 assert(0);
485 throw CompileError("Unable to generate bytecode.");
486 }
487
488 op->onmatch = report.onmatch;
489
490 switch (report.type) {
491 case EXTERNAL_CALLBACK_SOM_REV_NFA:
492 case INTERNAL_SOM_LOC_SET_SOM_REV_NFA:
493 case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET:
494 case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE:
495 op->aux.revNfaIndex = report.revNfaIndex;
496 break;
497 default:
498 op->aux.somDistance = report.somDistance;
499 break;
500 }
501}
502
503static
504void addLogicalSetRequired(const Report &report, ReportManager &rm,
505 RoseProgram &program) {
506 if (report.lkey == INVALID_LKEY) {
507 return;
508 }
509 // set matching status of current lkey
510 auto risl = ue2::make_unique<RoseInstrSetLogical>(report.lkey,
511 report.offsetAdjust);
512 program.add_before_end(move(risl));
513 // set current lkey's corresponding ckeys active, pending to check
514 for (auto ckey : rm.getRelateCKeys(report.lkey)) {
515 auto risc = ue2::make_unique<RoseInstrSetCombination>(ckey);
516 program.add_before_end(move(risc));
517 }
518}
519
520static
521void makeReport(const RoseBuildImpl &build, const ReportID id,
522 const bool has_som, RoseProgram &program) {
523 assert(id < build.rm.numReports());
524 const Report &report = build.rm.getReport(id);
525
526 RoseProgram report_block;
527 const RoseInstruction *end_inst = report_block.end_instruction();
528
529 // Handle min/max offset checks.
530 if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) {
531 auto ri = ue2::make_unique<RoseInstrCheckBounds>(report.minOffset,
532 report.maxOffset, end_inst);
533 report_block.add_before_end(move(ri));
534 }
535
536 // If this report has an exhaustion key, we can check it in the program
537 // rather than waiting until we're in the callback adaptor.
538 if (report.ekey != INVALID_EKEY) {
539 auto ri = ue2::make_unique<RoseInstrCheckExhausted>(report.ekey, end_inst);
540 report_block.add_before_end(move(ri));
541 }
542
543 // External SOM reports that aren't passthrough need their SOM value
544 // calculated.
545 if (isExternalSomReport(report) &&
546 report.type != EXTERNAL_CALLBACK_SOM_PASS) {
547 auto ri = ue2::make_unique<RoseInstrSomFromReport>();
548 writeSomOperation(report, &ri->som);
549 report_block.add_before_end(move(ri));
550 }
551
552 // Min length constraint.
553 if (report.minLength > 0) {
554 assert(build.hasSom);
555 auto ri = ue2::make_unique<RoseInstrCheckMinLength>(
556 report.offsetAdjust, report.minLength, end_inst);
557 report_block.add_before_end(move(ri));
558 }
559
560 if (report.quashSom) {
561 report_block.add_before_end(ue2::make_unique<RoseInstrSomZero>());
562 }
563
564 switch (report.type) {
565 case EXTERNAL_CALLBACK:
566 if (build.rm.numCkeys()) {
567 addFlushCombinationProgram(report_block);
568 }
569 if (!has_som) {
570 // Dedupe is only necessary if this report has a dkey, or if there
571 // are SOM reports to catch up.
572 bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom;
573 if (report.ekey == INVALID_EKEY) {
574 if (needs_dedupe) {
575 if (!report.quiet) {
576 report_block.add_before_end(
577 ue2::make_unique<RoseInstrDedupeAndReport>(
578 report.quashSom, build.rm.getDkey(report),
579 report.onmatch, report.offsetAdjust, end_inst));
580 } else {
581 makeDedupe(build.rm, report, report_block);
582 }
583 } else {
584 if (!report.quiet) {
585 report_block.add_before_end(
586 ue2::make_unique<RoseInstrReport>(
587 report.onmatch, report.offsetAdjust));
588 }
589 }
590 } else {
591 if (needs_dedupe) {
592 makeDedupe(build.rm, report, report_block);
593 }
594 if (!report.quiet) {
595 report_block.add_before_end(
596 ue2::make_unique<RoseInstrReportExhaust>(
597 report.onmatch, report.offsetAdjust, report.ekey));
598 } else {
599 report_block.add_before_end(
600 ue2::make_unique<RoseInstrSetExhaust>(report.ekey));
601 }
602 }
603 } else { // has_som
604 makeDedupeSom(build.rm, report, report_block);
605 if (report.ekey == INVALID_EKEY) {
606 if (!report.quiet) {
607 report_block.add_before_end(ue2::make_unique<RoseInstrReportSom>(
608 report.onmatch, report.offsetAdjust));
609 }
610 } else {
611 if (!report.quiet) {
612 report_block.add_before_end(
613 ue2::make_unique<RoseInstrReportSomExhaust>(
614 report.onmatch, report.offsetAdjust, report.ekey));
615 } else {
616 report_block.add_before_end(
617 ue2::make_unique<RoseInstrSetExhaust>(report.ekey));
618 }
619 }
620 }
621 addLogicalSetRequired(report, build.rm, report_block);
622 break;
623 case INTERNAL_SOM_LOC_SET:
624 case INTERNAL_SOM_LOC_SET_IF_UNSET:
625 case INTERNAL_SOM_LOC_SET_IF_WRITABLE:
626 case INTERNAL_SOM_LOC_SET_SOM_REV_NFA:
627 case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET:
628 case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE:
629 case INTERNAL_SOM_LOC_COPY:
630 case INTERNAL_SOM_LOC_COPY_IF_WRITABLE:
631 case INTERNAL_SOM_LOC_MAKE_WRITABLE:
632 case INTERNAL_SOM_LOC_SET_FROM:
633 case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE:
634 if (build.rm.numCkeys()) {
635 addFlushCombinationProgram(report_block);
636 }
637 if (has_som) {
638 auto ri = ue2::make_unique<RoseInstrReportSomAware>();
639 writeSomOperation(report, &ri->som);
640 report_block.add_before_end(move(ri));
641 } else {
642 auto ri = ue2::make_unique<RoseInstrReportSomInt>();
643 writeSomOperation(report, &ri->som);
644 report_block.add_before_end(move(ri));
645 }
646 break;
647 case INTERNAL_ROSE_CHAIN: {
648 report_block.add_before_end(ue2::make_unique<RoseInstrReportChain>(
649 report.onmatch, report.topSquashDistance));
650 break;
651 }
652 case EXTERNAL_CALLBACK_SOM_REL:
653 case EXTERNAL_CALLBACK_SOM_STORED:
654 case EXTERNAL_CALLBACK_SOM_ABS:
655 case EXTERNAL_CALLBACK_SOM_REV_NFA:
656 if (build.rm.numCkeys()) {
657 addFlushCombinationProgram(report_block);
658 }
659 makeDedupeSom(build.rm, report, report_block);
660 if (report.ekey == INVALID_EKEY) {
661 if (!report.quiet) {
662 report_block.add_before_end(ue2::make_unique<RoseInstrReportSom>(
663 report.onmatch, report.offsetAdjust));
664 }
665 } else {
666 if (!report.quiet) {
667 report_block.add_before_end(
668 ue2::make_unique<RoseInstrReportSomExhaust>(
669 report.onmatch, report.offsetAdjust, report.ekey));
670 } else {
671 report_block.add_before_end(
672 ue2::make_unique<RoseInstrSetExhaust>(report.ekey));
673 }
674 }
675 addLogicalSetRequired(report, build.rm, report_block);
676 break;
677 case EXTERNAL_CALLBACK_SOM_PASS:
678 if (build.rm.numCkeys()) {
679 addFlushCombinationProgram(report_block);
680 }
681 makeDedupeSom(build.rm, report, report_block);
682 if (report.ekey == INVALID_EKEY) {
683 if (!report.quiet) {
684 report_block.add_before_end(ue2::make_unique<RoseInstrReportSom>(
685 report.onmatch, report.offsetAdjust));
686 }
687 } else {
688 if (!report.quiet) {
689 report_block.add_before_end(
690 ue2::make_unique<RoseInstrReportSomExhaust>(
691 report.onmatch, report.offsetAdjust, report.ekey));
692 } else {
693 report_block.add_before_end(
694 ue2::make_unique<RoseInstrSetExhaust>(report.ekey));
695 }
696 }
697 addLogicalSetRequired(report, build.rm, report_block);
698 break;
699
700 default:
701 assert(0);
702 throw CompileError("Unable to generate bytecode.");
703 }
704
705 program.add_block(move(report_block));
706}
707
708static
709void makeRoleReports(const RoseBuildImpl &build,
710 const std::map<RoseVertex, left_build_info> &leftfix_info,
711 bool needs_catchup, RoseVertex v, RoseProgram &program) {
712 const auto &g = build.g;
713
714 bool report_som = false;
715 if (g[v].left.tracksSom()) {
716 /* we are a suffaig - need to update role to provide som to the
717 * suffix. */
718 assert(contains(leftfix_info, v));
719 const left_build_info &lni = leftfix_info.at(v);
720 program.add_before_end(
721 ue2::make_unique<RoseInstrSomLeftfix>(lni.queue, g[v].left.lag));
722 report_som = true;
723 } else if (g[v].som_adjust) {
724 program.add_before_end(
725 ue2::make_unique<RoseInstrSomAdjust>(g[v].som_adjust));
726 report_som = true;
727 }
728
729 makeCatchup(build.rm, needs_catchup, g[v].reports, program);
730
731 RoseProgram report_block;
732 for (ReportID id : g[v].reports) {
733 makeReport(build, id, report_som, report_block);
734 }
735 program.add_before_end(move(report_block));
736}
737
738static
739void makeRoleSetState(const unordered_map<RoseVertex, u32> &roleStateIndices,
740 RoseVertex v, RoseProgram &program) {
741 // We only need this instruction if a state index has been assigned to this
742 // vertex.
743 auto it = roleStateIndices.find(v);
744 if (it == end(roleStateIndices)) {
745 return;
746 }
747 program.add_before_end(ue2::make_unique<RoseInstrSetState>(it->second));
748}
749
750static
751void makePushDelayedInstructions(const RoseLiteralMap &literals,
752 ProgramBuild &prog_build,
753 const flat_set<u32> &delayed_ids,
754 RoseProgram &program) {
755 vector<RoseInstrPushDelayed> delay_instructions;
756
757 for (const auto &delayed_lit_id : delayed_ids) {
758 DEBUG_PRINTF("delayed lit id %u\n", delayed_lit_id);
759 assert(contains(prog_build.delay_programs, delayed_lit_id));
760 u32 delay_id = prog_build.delay_programs.at(delayed_lit_id);
761 const auto &delay_lit = literals.at(delayed_lit_id);
762 delay_instructions.emplace_back(verify_u8(delay_lit.delay), delay_id);
763 }
764
765 sort_and_unique(delay_instructions, [](const RoseInstrPushDelayed &a,
766 const RoseInstrPushDelayed &b) {
767 return tie(a.delay, a.index) < tie(b.delay, b.index);
768 });
769
770 for (const auto &ri : delay_instructions) {
771 program.add_before_end(ue2::make_unique<RoseInstrPushDelayed>(ri));
772 }
773}
774
775static
776void makeCheckLiteralInstruction(const rose_literal_id &lit,
777 size_t longLitLengthThreshold,
778 RoseProgram &program,
779 const CompileContext &cc) {
780 assert(longLitLengthThreshold > 0);
781
782 DEBUG_PRINTF("lit=%s, long lit threshold %zu\n", dumpString(lit.s).c_str(),
783 longLitLengthThreshold);
784
785 if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) {
786 DEBUG_PRINTF("lit short enough to not need confirm\n");
787 return;
788 }
789
790 // Check resource limits as well.
791 if (lit.s.length() > cc.grey.limitLiteralLength) {
792 throw ResourceLimitError();
793 }
794
795 if (lit.s.length() <= longLitLengthThreshold) {
796 DEBUG_PRINTF("is a medium-length literal\n");
797 const auto *end_inst = program.end_instruction();
798 unique_ptr<RoseInstruction> ri;
799 if (lit.s.any_nocase()) {
800 ri = ue2::make_unique<RoseInstrCheckMedLitNocase>(lit.s.get_string(),
801 end_inst);
802 } else {
803 ri = ue2::make_unique<RoseInstrCheckMedLit>(lit.s.get_string(),
804 end_inst);
805 }
806 program.add_before_end(move(ri));
807 return;
808 }
809
810 // Long literal support should only really be used for the floating table
811 // in streaming mode.
812 assert(lit.table == ROSE_FLOATING && cc.streaming);
813
814 DEBUG_PRINTF("is a long literal\n");
815
816 const auto *end_inst = program.end_instruction();
817 unique_ptr<RoseInstruction> ri;
818 if (lit.s.any_nocase()) {
819 ri = ue2::make_unique<RoseInstrCheckLongLitNocase>(lit.s.get_string(),
820 end_inst);
821 } else {
822 ri = ue2::make_unique<RoseInstrCheckLongLit>(lit.s.get_string(), end_inst);
823 }
824 program.add_before_end(move(ri));
825}
826
827static
828void makeRoleCheckNotHandled(ProgramBuild &prog_build, RoseVertex v,
829 RoseProgram &program) {
830 u32 handled_key;
831 if (contains(prog_build.handledKeys, v)) {
832 handled_key = prog_build.handledKeys.at(v);
833 } else {
834 handled_key = verify_u32(prog_build.handledKeys.size());
835 prog_build.handledKeys.emplace(v, handled_key);
836 }
837
838 const auto *end_inst = program.end_instruction();
839 auto ri = ue2::make_unique<RoseInstrCheckNotHandled>(handled_key, end_inst);
840 program.add_before_end(move(ri));
841}
842
843static
844void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v,
845 const RoseEdge &e, RoseProgram &program) {
846 const RoseGraph &g = build.g;
847 const RoseVertex u = source(e, g);
848
849 // We know that we can trust the anchored table (DFA) to always deliver us
850 // literals at the correct offset.
851 if (build.isAnchored(v)) {
852 DEBUG_PRINTF("literal in anchored table, skipping bounds check\n");
853 return;
854 }
855
856 // Use the minimum literal length.
857 u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v));
858
859 u64a min_bound = g[e].minBound + lit_length;
860 u64a max_bound = g[e].maxBound == ROSE_BOUND_INF
861 ? ROSE_BOUND_INF
862 : g[e].maxBound + lit_length;
863
864 if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
865 assert(g[u].fixedOffset());
866 // Make offsets absolute.
867 min_bound += g[u].max_offset;
868 if (max_bound != ROSE_BOUND_INF) {
869 max_bound += g[u].max_offset;
870 }
871 }
872
873 assert(max_bound <= ROSE_BOUND_INF);
874 assert(min_bound <= max_bound);
875
876 // CHECK_BOUNDS instruction uses 64-bit bounds, so we can use MAX_OFFSET
877 // (max value of a u64a) to represent ROSE_BOUND_INF.
878 if (max_bound == ROSE_BOUND_INF) {
879 max_bound = MAX_OFFSET;
880 }
881
882 // This instruction should be doing _something_ -- bounds should be tighter
883 // than just {length, inf}.
884 assert(min_bound > lit_length || max_bound < MAX_OFFSET);
885
886 const auto *end_inst = program.end_instruction();
887 program.add_before_end(
888 ue2::make_unique<RoseInstrCheckBounds>(min_bound, max_bound, end_inst));
889}
890
891static
892void makeRoleGroups(const RoseGraph &g, ProgramBuild &prog_build,
893 RoseVertex v, RoseProgram &program) {
894 rose_group groups = g[v].groups;
895 if (!groups) {
896 return;
897 }
898
899 // The set of "already on" groups as we process this vertex is the
900 // intersection of the groups set by our predecessors.
901 assert(in_degree(v, g) > 0);
902 rose_group already_on = ~rose_group{0};
903 for (const auto &u : inv_adjacent_vertices_range(v, g)) {
904 already_on &= prog_build.vertex_group_map.at(u);
905 }
906
907 DEBUG_PRINTF("already_on=0x%llx\n", already_on);
908 DEBUG_PRINTF("squashable=0x%llx\n", prog_build.squashable_groups);
909 DEBUG_PRINTF("groups=0x%llx\n", groups);
910
911 already_on &= ~prog_build.squashable_groups;
912 DEBUG_PRINTF("squashed already_on=0x%llx\n", already_on);
913
914 // We don't *have* to mask off the groups that we know are already on, but
915 // this will make bugs more apparent.
916 groups &= ~already_on;
917
918 if (!groups) {
919 DEBUG_PRINTF("no new groups to set, skipping\n");
920 return;
921 }
922
923 program.add_before_end(ue2::make_unique<RoseInstrSetGroups>(groups));
924}
925
926static
927bool checkReachMask(const CharReach &cr, u8 &andmask, u8 &cmpmask) {
928 size_t reach_size = cr.count();
929 assert(reach_size > 0);
930 // check whether entry_size is some power of 2.
931 if ((reach_size - 1) & reach_size) {
932 return false;
933 }
934 make_and_cmp_mask(cr, &andmask, &cmpmask);
935 if ((1 << popcount32((u8)(~andmask))) ^ reach_size) {
936 return false;
937 }
938 return true;
939}
940
941static
942bool checkReachWithFlip(const CharReach &cr, u8 &andmask,
943 u8 &cmpmask, u8 &flip) {
944 if (checkReachMask(cr, andmask, cmpmask)) {
945 flip = 0;
946 return true;
947 }
948 if (checkReachMask(~cr, andmask, cmpmask)) {
949 flip = 1;
950 return true;
951 }
952 return false;
953}
954
955static
956bool makeRoleByte(const vector<LookEntry> &look, RoseProgram &program) {
957 if (look.size() == 1) {
958 const auto &entry = look[0];
959 u8 andmask_u8, cmpmask_u8;
960 u8 flip;
961 if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) {
962 return false;
963 }
964 s32 checkbyte_offset = verify_s32(entry.offset);
965 DEBUG_PRINTF("CHECK BYTE offset=%d\n", checkbyte_offset);
966 const auto *end_inst = program.end_instruction();
967 auto ri = ue2::make_unique<RoseInstrCheckByte>(andmask_u8, cmpmask_u8, flip,
968 checkbyte_offset, end_inst);
969 program.add_before_end(move(ri));
970 return true;
971 }
972 return false;
973}
974
975static
976bool makeRoleMask(const vector<LookEntry> &look, RoseProgram &program) {
977 if (look.back().offset < look.front().offset + 8) {
978 s32 base_offset = verify_s32(look.front().offset);
979 u64a and_mask = 0;
980 u64a cmp_mask = 0;
981 u64a neg_mask = 0;
982 for (const auto &entry : look) {
983 u8 andmask_u8, cmpmask_u8, flip;
984 if (!checkReachWithFlip(entry.reach, andmask_u8,
985 cmpmask_u8, flip)) {
986 return false;
987 }
988 DEBUG_PRINTF("entry offset %d\n", entry.offset);
989 u32 shift = (entry.offset - base_offset) << 3;
990 and_mask |= (u64a)andmask_u8 << shift;
991 cmp_mask |= (u64a)cmpmask_u8 << shift;
992 if (flip) {
993 neg_mask |= 0xffLLU << shift;
994 }
995 }
996 DEBUG_PRINTF("CHECK MASK and_mask=%llx cmp_mask=%llx\n",
997 and_mask, cmp_mask);
998 const auto *end_inst = program.end_instruction();
999 auto ri = ue2::make_unique<RoseInstrCheckMask>(and_mask, cmp_mask, neg_mask,
1000 base_offset, end_inst);
1001 program.add_before_end(move(ri));
1002 return true;
1003 }
1004 return false;
1005}
1006
1007static UNUSED
1008string convertMaskstoString(u8 *p, int byte_len) {
1009 string s;
1010 for (int i = 0; i < byte_len; i++) {
1011 u8 hi = *p >> 4;
1012 u8 lo = *p & 0xf;
1013 s += (char)(hi + (hi < 10 ? 48 : 87));
1014 s += (char)(lo + (lo < 10 ? 48 : 87));
1015 p++;
1016 }
1017 return s;
1018}
1019
1020static
1021bool makeRoleMask32(const vector<LookEntry> &look,
1022 RoseProgram &program) {
1023 if (look.back().offset >= look.front().offset + 32) {
1024 return false;
1025 }
1026 s32 base_offset = verify_s32(look.front().offset);
1027 array<u8, 32> and_mask, cmp_mask;
1028 and_mask.fill(0);
1029 cmp_mask.fill(0);
1030 u32 neg_mask = 0;
1031 for (const auto &entry : look) {
1032 u8 andmask_u8, cmpmask_u8, flip;
1033 if (!checkReachWithFlip(entry.reach, andmask_u8,
1034 cmpmask_u8, flip)) {
1035 return false;
1036 }
1037 u32 shift = entry.offset - base_offset;
1038 assert(shift < 32);
1039 and_mask[shift] = andmask_u8;
1040 cmp_mask[shift] = cmpmask_u8;
1041 if (flip) {
1042 neg_mask |= 1 << shift;
1043 }
1044 }
1045
1046 DEBUG_PRINTF("and_mask %s\n",
1047 convertMaskstoString(and_mask.data(), 32).c_str());
1048 DEBUG_PRINTF("cmp_mask %s\n",
1049 convertMaskstoString(cmp_mask.data(), 32).c_str());
1050 DEBUG_PRINTF("neg_mask %08x\n", neg_mask);
1051 DEBUG_PRINTF("base_offset %d\n", base_offset);
1052
1053 const auto *end_inst = program.end_instruction();
1054 auto ri = ue2::make_unique<RoseInstrCheckMask32>(and_mask, cmp_mask, neg_mask,
1055 base_offset, end_inst);
1056 program.add_before_end(move(ri));
1057 return true;
1058}
1059
1060// Sorting by the size of every bucket.
1061// Used in map<u32, vector<s8>, cmpNibble>.
1062struct cmpNibble {
1063 bool operator()(const u32 data1, const u32 data2) const{
1064 u32 size1 = popcount32(data1 >> 16) * popcount32(data1 << 16);
1065 u32 size2 = popcount32(data2 >> 16) * popcount32(data2 << 16);
1066 return std::tie(size1, data1) < std::tie(size2, data2);
1067 }
1068};
1069
1070// Insert all pairs of bucket and offset into buckets.
1071static really_inline
1072void getAllBuckets(const vector<LookEntry> &look,
1073 map<u32, vector<s8>, cmpNibble> &buckets, u64a &neg_mask) {
1074 s32 base_offset = verify_s32(look.front().offset);
1075 for (const auto &entry : look) {
1076 CharReach cr = entry.reach;
1077 // Flip heavy character classes to save buckets.
1078 if (cr.count() > 128 ) {
1079 cr.flip();
1080 } else {
1081 neg_mask ^= 1ULL << (entry.offset - base_offset);
1082 }
1083 map <u16, u16> lo2hi;
1084 // We treat Ascii Table as a 16x16 grid.
1085 // Push every row in cr into lo2hi and mark the row number.
1086 for (size_t i = cr.find_first(); i != CharReach::npos;) {
1087 u8 it_hi = i >> 4;
1088 u16 low_encode = 0;
1089 while (i != CharReach::npos && (i >> 4) == it_hi) {
1090 low_encode |= 1 << (i & 0xf);
1091 i = cr.find_next(i);
1092 }
1093 lo2hi[low_encode] |= 1 << it_hi;
1094 }
1095 for (const auto &it : lo2hi) {
1096 u32 hi_lo = (it.second << 16) | it.first;
1097 buckets[hi_lo].push_back(entry.offset);
1098 }
1099 }
1100}
1101
1102// Once we have a new bucket, we'll try to combine it with all old buckets.
1103static really_inline
1104void nibUpdate(map<u32, u16> &nib, u32 hi_lo) {
1105 u16 hi = hi_lo >> 16;
1106 u16 lo = hi_lo & 0xffff;
1107 for (const auto pairs : nib) {
1108 u32 old = pairs.first;
1109 if ((old >> 16) == hi || (old & 0xffff) == lo) {
1110 if (!nib[old | hi_lo]) {
1111 nib[old | hi_lo] = nib[old] | nib[hi_lo];
1112 }
1113 }
1114 }
1115}
1116
1117static really_inline
1118void nibMaskUpdate(array<u8, 32> &mask, u32 data, u8 bit_index) {
1119 for (u8 index = 0; data > 0; data >>= 1, index++) {
1120 if (data & 1) {
1121 // 0 ~ 7 bucket in first 16 bytes,
1122 // 8 ~ 15 bucket in second 16 bytes.
1123 if (bit_index >= 8) {
1124 mask[index + 16] |= 1 << (bit_index - 8);
1125 } else {
1126 mask[index] |= 1 << bit_index;
1127 }
1128 }
1129 }
1130}
1131
1132static
1133bool getShuftiMasks(const vector<LookEntry> &look, array<u8, 32> &hi_mask,
1134 array<u8, 32> &lo_mask, u8 *bucket_select_hi,
1135 u8 *bucket_select_lo, u64a &neg_mask,
1136 u8 &bit_idx, size_t len) {
1137 map<u32, u16> nib; // map every bucket to its bucket number.
1138 map<u32, vector<s8>, cmpNibble> bucket2offsets;
1139 s32 base_offset = look.front().offset;
1140
1141 bit_idx = 0;
1142 neg_mask = ~0ULL;
1143
1144 getAllBuckets(look, bucket2offsets, neg_mask);
1145
1146 for (const auto &it : bucket2offsets) {
1147 u32 hi_lo = it.first;
1148 // New bucket.
1149 if (!nib[hi_lo]) {
1150 if ((bit_idx >= 8 && len == 64) || bit_idx >= 16) {
1151 return false;
1152 }
1153 nib[hi_lo] = 1 << bit_idx;
1154
1155 nibUpdate(nib, hi_lo);
1156 nibMaskUpdate(hi_mask, hi_lo >> 16, bit_idx);
1157 nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_idx);
1158 bit_idx++;
1159 }
1160
1161 DEBUG_PRINTF("hi_lo %x bucket %x\n", hi_lo, nib[hi_lo]);
1162
1163 // Update bucket_select_mask.
1164 u8 nib_hi = nib[hi_lo] >> 8;
1165 u8 nib_lo = nib[hi_lo] & 0xff;
1166 for (const auto offset : it.second) {
1167 bucket_select_hi[offset - base_offset] |= nib_hi;
1168 bucket_select_lo[offset - base_offset] |= nib_lo;
1169 }
1170 }
1171 return true;
1172}
1173
1174static
1175unique_ptr<RoseInstruction>
1176makeCheckShufti16x8(u32 offset_range, u8 bucket_idx,
1177 const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
1178 const array<u8, 32> &bucket_select_mask,
1179 u32 neg_mask, s32 base_offset,
1180 const RoseInstruction *end_inst) {
1181 if (offset_range > 16 || bucket_idx > 8) {
1182 return nullptr;
1183 }
1184 array<u8, 32> nib_mask;
1185 array<u8, 16> bucket_select_mask_16;
1186 copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin());
1187 copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16);
1188 copy(bucket_select_mask.begin(), bucket_select_mask.begin() + 16,
1189 bucket_select_mask_16.begin());
1190 return ue2::make_unique<RoseInstrCheckShufti16x8>
1191 (nib_mask, bucket_select_mask_16,
1192 neg_mask & 0xffff, base_offset, end_inst);
1193}
1194
1195static
1196unique_ptr<RoseInstruction>
1197makeCheckShufti32x8(u32 offset_range, u8 bucket_idx,
1198 const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
1199 const array<u8, 32> &bucket_select_mask,
1200 u32 neg_mask, s32 base_offset,
1201 const RoseInstruction *end_inst) {
1202 if (offset_range > 32 || bucket_idx > 8) {
1203 return nullptr;
1204 }
1205
1206 array<u8, 16> hi_mask_16;
1207 array<u8, 16> lo_mask_16;
1208 copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin());
1209 copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin());
1210 return ue2::make_unique<RoseInstrCheckShufti32x8>
1211 (hi_mask_16, lo_mask_16, bucket_select_mask,
1212 neg_mask, base_offset, end_inst);
1213}
1214
1215static
1216unique_ptr<RoseInstruction>
1217makeCheckShufti16x16(u32 offset_range, u8 bucket_idx,
1218 const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
1219 const array<u8, 32> &bucket_select_mask_lo,
1220 const array<u8, 32> &bucket_select_mask_hi,
1221 u32 neg_mask, s32 base_offset,
1222 const RoseInstruction *end_inst) {
1223 if (offset_range > 16 || bucket_idx > 16) {
1224 return nullptr;
1225 }
1226
1227 array<u8, 32> bucket_select_mask_32;
1228 copy(bucket_select_mask_lo.begin(), bucket_select_mask_lo.begin() + 16,
1229 bucket_select_mask_32.begin());
1230 copy(bucket_select_mask_hi.begin(), bucket_select_mask_hi.begin() + 16,
1231 bucket_select_mask_32.begin() + 16);
1232 return ue2::make_unique<RoseInstrCheckShufti16x16>
1233 (hi_mask, lo_mask, bucket_select_mask_32,
1234 neg_mask & 0xffff, base_offset, end_inst);
1235}
1236static
1237unique_ptr<RoseInstruction>
1238makeCheckShufti32x16(u32 offset_range, u8 bucket_idx,
1239 const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask,
1240 const array<u8, 32> &bucket_select_mask_lo,
1241 const array<u8, 32> &bucket_select_mask_hi,
1242 u32 neg_mask, s32 base_offset,
1243 const RoseInstruction *end_inst) {
1244 if (offset_range > 32 || bucket_idx > 16) {
1245 return nullptr;
1246 }
1247
1248 return ue2::make_unique<RoseInstrCheckShufti32x16>
1249 (hi_mask, lo_mask, bucket_select_mask_hi,
1250 bucket_select_mask_lo, neg_mask, base_offset, end_inst);
1251}
1252
1253static
1254bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program) {
1255
1256 s32 base_offset = verify_s32(look.front().offset);
1257 if (look.back().offset >= base_offset + 32) {
1258 return false;
1259 }
1260
1261 u8 bucket_idx = 0; // number of buckets
1262 u64a neg_mask_64;
1263 array<u8, 32> hi_mask;
1264 array<u8, 32> lo_mask;
1265 array<u8, 32> bucket_select_hi;
1266 array<u8, 32> bucket_select_lo;
1267 hi_mask.fill(0);
1268 lo_mask.fill(0);
1269 bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8.
1270 bucket_select_lo.fill(0);
1271
1272 if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi.data(),
1273 bucket_select_lo.data(), neg_mask_64, bucket_idx, 32)) {
1274 return false;
1275 }
1276 u32 neg_mask = (u32)neg_mask_64;
1277
1278 DEBUG_PRINTF("hi_mask %s\n",
1279 convertMaskstoString(hi_mask.data(), 32).c_str());
1280 DEBUG_PRINTF("lo_mask %s\n",
1281 convertMaskstoString(lo_mask.data(), 32).c_str());
1282 DEBUG_PRINTF("bucket_select_hi %s\n",
1283 convertMaskstoString(bucket_select_hi.data(), 32).c_str());
1284 DEBUG_PRINTF("bucket_select_lo %s\n",
1285 convertMaskstoString(bucket_select_lo.data(), 32).c_str());
1286
1287 const auto *end_inst = program.end_instruction();
1288 s32 offset_range = look.back().offset - base_offset + 1;
1289
1290 auto ri = makeCheckShufti16x8(offset_range, bucket_idx, hi_mask, lo_mask,
1291 bucket_select_lo, neg_mask, base_offset,
1292 end_inst);
1293 if (!ri) {
1294 ri = makeCheckShufti32x8(offset_range, bucket_idx, hi_mask, lo_mask,
1295 bucket_select_lo, neg_mask, base_offset,
1296 end_inst);
1297 }
1298 if (!ri) {
1299 ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask,
1300 bucket_select_lo, bucket_select_hi,
1301 neg_mask, base_offset, end_inst);
1302 }
1303 if (!ri) {
1304 ri = makeCheckShufti32x16(offset_range, bucket_idx, hi_mask, lo_mask,
1305 bucket_select_lo, bucket_select_hi,
1306 neg_mask, base_offset, end_inst);
1307 }
1308 assert(ri);
1309 program.add_before_end(move(ri));
1310
1311 return true;
1312}
1313
1314/**
1315 * Builds a lookaround instruction, or an appropriate specialization if one is
1316 * available.
1317 */
1318static
1319void makeLookaroundInstruction(const vector<LookEntry> &look,
1320 RoseProgram &program) {
1321 assert(!look.empty());
1322
1323 if (makeRoleByte(look, program)) {
1324 return;
1325 }
1326
1327 if (look.size() == 1) {
1328 s8 offset = look.begin()->offset;
1329 const CharReach &reach = look.begin()->reach;
1330 auto ri = ue2::make_unique<RoseInstrCheckSingleLookaround>(offset, reach,
1331 program.end_instruction());
1332 program.add_before_end(move(ri));
1333 return;
1334 }
1335
1336 if (makeRoleMask(look, program)) {
1337 return;
1338 }
1339
1340 if (makeRoleMask32(look, program)) {
1341 return;
1342 }
1343
1344 if (makeRoleShufti(look, program)) {
1345 return;
1346 }
1347
1348 auto ri = ue2::make_unique<RoseInstrCheckLookaround>(look,
1349 program.end_instruction());
1350 program.add_before_end(move(ri));
1351}
1352
1353static
1354void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 lit_id,
1355 RoseProgram &program) {
1356 const auto &info = build.literal_info.at(lit_id);
1357 if (!info.requires_benefits) {
1358 return;
1359 }
1360
1361 vector<LookEntry> look;
1362
1363 const auto &lit = build.literals.at(lit_id);
1364 const ue2_literal &s = lit.s;
1365 const auto &msk = lit.msk;
1366
1367 DEBUG_PRINTF("building mask for lit %u: %s\n", lit_id,
1368 dumpString(s).c_str());
1369
1370 assert(s.length() <= MAX_MASK2_WIDTH);
1371
1372 // Note: the literal matcher will confirm the HWLM mask in lit.msk, so we
1373 // do not include those entries in the lookaround.
1374 auto it = s.begin();
1375 for (s32 i = 0 - s.length(), i_end = 0 - msk.size(); i < i_end; ++i, ++it) {
1376 if (!it->nocase) {
1377 look.emplace_back(verify_s8(i), *it);
1378 }
1379 }
1380
1381 if (look.empty()) {
1382 return; // all caseful chars handled by HWLM mask.
1383 }
1384
1385 makeLookaroundInstruction(look, program);
1386}
1387
1388static
1389void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, u32 lit_id,
1390 const vector<RoseEdge> &lit_edges,
1391 u32 floatingMinLiteralMatchOffset,
1392 RoseProgram &prog) {
1393 if (lit_edges.empty()) {
1394 return;
1395 }
1396
1397 if (floatingMinLiteralMatchOffset == 0) {
1398 return;
1399 }
1400
1401 RoseVertex v = target(lit_edges.front(), build.g);
1402 if (!build.isFloating(v)) {
1403 return;
1404 }
1405
1406 const auto &lit = build.literals.at(lit_id);
1407 size_t min_len = lit.elength();
1408 u32 min_offset = findMinOffset(build, lit_id);
1409 DEBUG_PRINTF("has min_len=%zu, min_offset=%u, global min is %u\n", min_len,
1410 min_offset, floatingMinLiteralMatchOffset);
1411
1412 // If we can't match before the min offset, we don't need the check.
1413 if (min_len >= floatingMinLiteralMatchOffset) {
1414 DEBUG_PRINTF("no need for check, min is %u\n",
1415 floatingMinLiteralMatchOffset);
1416 return;
1417 }
1418
1419 assert(min_offset >= floatingMinLiteralMatchOffset);
1420 assert(min_offset < UINT32_MAX);
1421
1422 DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset);
1423 const auto *end = prog.end_instruction();
1424 prog.add_before_end(ue2::make_unique<RoseInstrCheckLitEarly>(min_offset, end));
1425}
1426
1427static
1428void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 lit_id,
1429 RoseProgram &prog) {
1430 const auto &info = build.literal_info.at(lit_id);
1431
1432 if (!info.group_mask) {
1433 return;
1434 }
1435 prog.add_before_end(ue2::make_unique<RoseInstrCheckGroups>(info.group_mask));
1436}
1437
1438static
1439bool hasDelayedLiteral(const RoseBuildImpl &build,
1440 const vector<RoseEdge> &lit_edges) {
1441 auto is_delayed = [&build](u32 lit_id) { return build.isDelayed(lit_id); };
1442 for (const auto &e : lit_edges) {
1443 auto v = target(e, build.g);
1444 const auto &lits = build.g[v].literals;
1445 if (any_of(begin(lits), end(lits), is_delayed)) {
1446 return true;
1447 }
1448 }
1449 return false;
1450}
1451
1452static
1453RoseProgram makeLitInitialProgram(const RoseBuildImpl &build,
1454 ProgramBuild &prog_build, u32 lit_id,
1455 const vector<RoseEdge> &lit_edges,
1456 bool is_anchored_replay_program) {
1457 RoseProgram program;
1458
1459 // Check long literal info.
1460 if (!build.isDelayed(lit_id)) {
1461 makeCheckLiteralInstruction(build.literals.at(lit_id),
1462 prog_build.longLitLengthThreshold,
1463 program, build.cc);
1464 }
1465
1466 // Check lit mask.
1467 makeCheckLitMaskInstruction(build, lit_id, program);
1468
1469 // Check literal groups. This is an optimisation that we only perform for
1470 // delayed literals, as their groups may be switched off; ordinarily, we
1471 // can trust the HWLM matcher.
1472 if (hasDelayedLiteral(build, lit_edges)) {
1473 makeGroupCheckInstruction(build, lit_id, program);
1474 }
1475
1476 // Add instructions for pushing delayed matches, if there are any.
1477 makePushDelayedInstructions(build.literals, prog_build,
1478 build.literal_info.at(lit_id).delayed_ids,
1479 program);
1480
1481 // Add pre-check for early literals in the floating table.
1482 makeCheckLitEarlyInstruction(build, lit_id, lit_edges,
1483 prog_build.floatingMinLiteralMatchOffset,
1484 program);
1485
1486 /* Check if we are able to deliever matches from the anchored table now */
1487 if (!is_anchored_replay_program) {
1488 makeAnchoredLiteralDelay(build, prog_build, lit_id, program);
1489 }
1490
1491 return program;
1492}
1493
1494static
1495bool makeRoleMultipathShufti(const vector<vector<LookEntry>> &multi_look,
1496 RoseProgram &program) {
1497 if (multi_look.empty()) {
1498 return false;
1499 }
1500
1501 // find the base offset
1502 assert(!multi_look[0].empty());
1503 s32 base_offset = multi_look[0].front().offset;
1504 s32 last_start = base_offset;
1505 s32 end_offset = multi_look[0].back().offset;
1506 size_t multi_len = 0;
1507
1508 for (const auto &look : multi_look) {
1509 assert(look.size() > 0);
1510 multi_len += look.size();
1511
1512 LIMIT_TO_AT_MOST(&base_offset, look.front().offset);
1513 ENSURE_AT_LEAST(&last_start, look.front().offset);
1514 ENSURE_AT_LEAST(&end_offset, look.back().offset);
1515 }
1516
1517 assert(last_start < 0);
1518
1519 if (end_offset - base_offset >= MULTIPATH_MAX_LEN) {
1520 return false;
1521 }
1522
1523 if (multi_len <= 16) {
1524 multi_len = 16;
1525 } else if (multi_len <= 32) {
1526 multi_len = 32;
1527 } else if (multi_len <= 64) {
1528 multi_len = 64;
1529 } else {
1530 DEBUG_PRINTF("too long for multi-path\n");
1531 return false;
1532 }
1533
1534 vector<LookEntry> linear_look;
1535 array<u8, 64> data_select_mask;
1536 data_select_mask.fill(0);
1537 u64a hi_bits_mask = 0;
1538 u64a lo_bits_mask = 0;
1539
1540 for (const auto &look : multi_look) {
1541 assert(linear_look.size() < 64);
1542 lo_bits_mask |= 1LLU << linear_look.size();
1543 for (const auto &entry : look) {
1544 assert(entry.offset - base_offset < MULTIPATH_MAX_LEN);
1545 data_select_mask[linear_look.size()] =
1546 verify_u8(entry.offset - base_offset);
1547 linear_look.emplace_back(verify_s8(linear_look.size()), entry.reach);
1548 }
1549 hi_bits_mask |= 1LLU << (linear_look.size() - 1);
1550 }
1551
1552 u8 bit_index = 0; // number of buckets
1553 u64a neg_mask;
1554 array<u8, 32> hi_mask;
1555 array<u8, 32> lo_mask;
1556 array<u8, 64> bucket_select_hi;
1557 array<u8, 64> bucket_select_lo;
1558 hi_mask.fill(0);
1559 lo_mask.fill(0);
1560 bucket_select_hi.fill(0);
1561 bucket_select_lo.fill(0);
1562
1563 if (!getShuftiMasks(linear_look, hi_mask, lo_mask, bucket_select_hi.data(),
1564 bucket_select_lo.data(), neg_mask, bit_index,
1565 multi_len)) {
1566 return false;
1567 }
1568
1569 DEBUG_PRINTF("hi_mask %s\n",
1570 convertMaskstoString(hi_mask.data(), 16).c_str());
1571 DEBUG_PRINTF("lo_mask %s\n",
1572 convertMaskstoString(lo_mask.data(), 16).c_str());
1573 DEBUG_PRINTF("bucket_select_hi %s\n",
1574 convertMaskstoString(bucket_select_hi.data(), 64).c_str());
1575 DEBUG_PRINTF("bucket_select_lo %s\n",
1576 convertMaskstoString(bucket_select_lo.data(), 64).c_str());
1577 DEBUG_PRINTF("data_select_mask %s\n",
1578 convertMaskstoString(data_select_mask.data(), 64).c_str());
1579 DEBUG_PRINTF("hi_bits_mask %llx\n", hi_bits_mask);
1580 DEBUG_PRINTF("lo_bits_mask %llx\n", lo_bits_mask);
1581 DEBUG_PRINTF("neg_mask %llx\n", neg_mask);
1582 DEBUG_PRINTF("base_offset %d\n", base_offset);
1583 DEBUG_PRINTF("last_start %d\n", last_start);
1584
1585 // Since we don't have 16x16 now, just call 32x16 instead.
1586 if (bit_index > 8) {
1587 assert(multi_len <= 32);
1588 multi_len = 32;
1589 }
1590
1591 const auto *end_inst = program.end_instruction();
1592 assert(multi_len == 16 || multi_len == 32 || multi_len == 64);
1593 if (multi_len == 16) {
1594 neg_mask &= 0xffff;
1595 assert(!(hi_bits_mask & ~0xffffULL));
1596 assert(!(lo_bits_mask & ~0xffffULL));
1597 assert(bit_index <=8);
1598 array<u8, 32> nib_mask;
1599 copy(begin(lo_mask), begin(lo_mask) + 16, nib_mask.begin());
1600 copy(begin(hi_mask), begin(hi_mask) + 16, nib_mask.begin() + 16);
1601
1602 auto ri = ue2::make_unique<RoseInstrCheckMultipathShufti16x8>
1603 (nib_mask, bucket_select_lo, data_select_mask, hi_bits_mask,
1604 lo_bits_mask, neg_mask, base_offset, last_start, end_inst);
1605 program.add_before_end(move(ri));
1606 } else if (multi_len == 32) {
1607 neg_mask &= 0xffffffff;
1608 assert(!(hi_bits_mask & ~0xffffffffULL));
1609 assert(!(lo_bits_mask & ~0xffffffffULL));
1610 if (bit_index <= 8) {
1611 auto ri = ue2::make_unique<RoseInstrCheckMultipathShufti32x8>
1612 (hi_mask, lo_mask, bucket_select_lo, data_select_mask,
1613 hi_bits_mask, lo_bits_mask, neg_mask, base_offset,
1614 last_start, end_inst);
1615 program.add_before_end(move(ri));
1616 } else {
1617 auto ri = ue2::make_unique<RoseInstrCheckMultipathShufti32x16>
1618 (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo,
1619 data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask,
1620 base_offset, last_start, end_inst);
1621 program.add_before_end(move(ri));
1622 }
1623 } else {
1624 auto ri = ue2::make_unique<RoseInstrCheckMultipathShufti64>
1625 (hi_mask, lo_mask, bucket_select_lo, data_select_mask,
1626 hi_bits_mask, lo_bits_mask, neg_mask, base_offset,
1627 last_start, end_inst);
1628 program.add_before_end(move(ri));
1629 }
1630 return true;
1631}
1632
1633static
1634void makeRoleMultipathLookaround(const vector<vector<LookEntry>> &multi_look,
1635 RoseProgram &program) {
1636 assert(!multi_look.empty());
1637 assert(multi_look.size() <= MAX_LOOKAROUND_PATHS);
1638 vector<vector<LookEntry>> ordered_look;
1639 set<s32> look_offset;
1640
1641 assert(!multi_look[0].empty());
1642 s32 last_start = multi_look[0][0].offset;
1643
1644 // build offset table.
1645 for (const auto &look : multi_look) {
1646 assert(look.size() > 0);
1647 last_start = max(last_start, (s32)look.begin()->offset);
1648
1649 for (const auto &t : look) {
1650 look_offset.insert(t.offset);
1651 }
1652 }
1653
1654 array<u8, MULTIPATH_MAX_LEN> start_mask;
1655 if (multi_look.size() < MAX_LOOKAROUND_PATHS) {
1656 start_mask.fill((1 << multi_look.size()) - 1);
1657 } else {
1658 start_mask.fill(0xff);
1659 }
1660
1661 u32 path_idx = 0;
1662 for (const auto &look : multi_look) {
1663 for (const auto &t : look) {
1664 assert(t.offset >= (int)*look_offset.begin());
1665 size_t update_offset = t.offset - *look_offset.begin() + 1;
1666 if (update_offset < start_mask.size()) {
1667 start_mask[update_offset] &= ~(1 << path_idx);
1668 }
1669 }
1670 path_idx++;
1671 }
1672
1673 for (u32 i = 1; i < MULTIPATH_MAX_LEN; i++) {
1674 start_mask[i] &= start_mask[i - 1];
1675 DEBUG_PRINTF("start_mask[%u] = %x\n", i, start_mask[i]);
1676 }
1677
1678 assert(look_offset.size() <= MULTIPATH_MAX_LEN);
1679
1680 assert(last_start < 0);
1681
1682 for (const auto &offset : look_offset) {
1683 vector<LookEntry> multi_entry;
1684 multi_entry.resize(MAX_LOOKAROUND_PATHS);
1685
1686 for (size_t i = 0; i < multi_look.size(); i++) {
1687 for (const auto &t : multi_look[i]) {
1688 if (t.offset == offset) {
1689 multi_entry[i] = t;
1690 }
1691 }
1692 }
1693 ordered_look.emplace_back(multi_entry);
1694 }
1695
1696 auto ri = ue2::make_unique<RoseInstrMultipathLookaround>(move(ordered_look),
1697 last_start, start_mask,
1698 program.end_instruction());
1699 program.add_before_end(move(ri));
1700}
1701
1702static
1703void makeRoleLookaround(const RoseBuildImpl &build,
1704 const map<RoseVertex, left_build_info> &leftfix_info,
1705 RoseVertex v, RoseProgram &program) {
1706 if (!build.cc.grey.roseLookaroundMasks) {
1707 return;
1708 }
1709
1710 vector<vector<LookEntry>> looks;
1711
1712 // Lookaround from leftfix (mandatory).
1713 if (contains(leftfix_info, v) && leftfix_info.at(v).has_lookaround) {
1714 DEBUG_PRINTF("using leftfix lookaround\n");
1715 looks = leftfix_info.at(v).lookaround;
1716 }
1717
1718 // We may be able to find more lookaround info (advisory) and merge it
1719 // in.
1720 if (looks.size() <= 1) {
1721 vector<LookEntry> look;
1722 vector<LookEntry> look_more;
1723 if (!looks.empty()) {
1724 look = move(looks.front());
1725 }
1726 findLookaroundMasks(build, v, look_more);
1727 mergeLookaround(look, look_more);
1728 if (!look.empty()) {
1729 makeLookaroundInstruction(look, program);
1730 }
1731 return;
1732 }
1733
1734 if (!makeRoleMultipathShufti(looks, program)) {
1735 assert(looks.size() <= 8);
1736 makeRoleMultipathLookaround(looks, program);
1737 }
1738}
1739
1740static
1741void makeRoleSuffix(const RoseBuildImpl &build,
1742 const map<suffix_id, u32> &suffixes,
1743 const map<u32, engine_info> &engine_info_by_queue,
1744 RoseVertex v, RoseProgram &prog) {
1745 const auto &g = build.g;
1746 if (!g[v].suffix) {
1747 return;
1748 }
1749 assert(contains(suffixes, g[v].suffix));
1750 u32 queue = suffixes.at(g[v].suffix);
1751 u32 event;
1752 assert(contains(engine_info_by_queue, queue));
1753 const auto eng_info = engine_info_by_queue.at(queue);
1754 if (isContainerType(eng_info.type)) {
1755 auto tamaProto = g[v].suffix.tamarama.get();
1756 assert(tamaProto);
1757 event = (u32)MQE_TOP_FIRST +
1758 tamaProto->top_remap.at(make_pair(g[v].index,
1759 g[v].suffix.top));
1760 assert(event < MQE_INVALID);
1761 } else if (isMultiTopType(eng_info.type)) {
1762 assert(!g[v].suffix.haig);
1763 event = (u32)MQE_TOP_FIRST + g[v].suffix.top;
1764 assert(event < MQE_INVALID);
1765 } else {
1766 // DFAs/Puffs have no MQE_TOP_N support, so they get a classic TOP
1767 // event.
1768 assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph));
1769 event = MQE_TOP;
1770 }
1771
1772 prog.add_before_end(ue2::make_unique<RoseInstrTriggerSuffix>(queue, event));
1773}
1774
1775static
1776void addInfixTriggerInstructions(vector<TriggerInfo> triggers,
1777 RoseProgram &prog) {
1778 // Order, de-dupe and add instructions to the end of program.
1779 sort_and_unique(triggers, [](const TriggerInfo &a, const TriggerInfo &b) {
1780 return tie(a.cancel, a.queue, a.event) <
1781 tie(b.cancel, b.queue, b.event);
1782 });
1783 for (const auto &ti : triggers) {
1784 prog.add_before_end(
1785 ue2::make_unique<RoseInstrTriggerInfix>(ti.cancel, ti.queue, ti.event));
1786 }
1787}
1788
1789static
1790void makeRoleInfixTriggers(const RoseBuildImpl &build,
1791 const map<RoseVertex, left_build_info> &leftfix_info,
1792 const map<u32, engine_info> &engine_info_by_queue,
1793 RoseVertex u, RoseProgram &program) {
1794 const auto &g = build.g;
1795
1796 vector<TriggerInfo> triggers;
1797
1798 for (const auto &e : out_edges_range(u, g)) {
1799 RoseVertex v = target(e, g);
1800 if (!g[v].left) {
1801 continue;
1802 }
1803
1804 assert(contains(leftfix_info, v));
1805 const left_build_info &lbi = leftfix_info.at(v);
1806 if (lbi.has_lookaround) {
1807 continue;
1808 }
1809
1810 assert(contains(engine_info_by_queue, lbi.queue));
1811 const auto &eng_info = engine_info_by_queue.at(lbi.queue);
1812
1813 // DFAs have no TOP_N support, so they get a classic MQE_TOP event.
1814 u32 top;
1815 if (isContainerType(eng_info.type)) {
1816 auto tamaProto = g[v].left.tamarama.get();
1817 assert(tamaProto);
1818 top = MQE_TOP_FIRST + tamaProto->top_remap.at(
1819 make_pair(g[v].index, g[e].rose_top));
1820 assert(top < MQE_INVALID);
1821 } else if (!isMultiTopType(eng_info.type)) {
1822 assert(num_tops(g[v].left) == 1);
1823 top = MQE_TOP;
1824 } else {
1825 top = MQE_TOP_FIRST + g[e].rose_top;
1826 assert(top < MQE_INVALID);
1827 }
1828
1829 triggers.emplace_back(g[e].rose_cancel_prev_top, lbi.queue, top);
1830 }
1831
1832 addInfixTriggerInstructions(move(triggers), program);
1833}
1834
1835
1836/**
1837 * \brief True if the given vertex is a role that can only be switched on at
1838 * EOD.
1839 */
1840static
1841bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) {
1842 const RoseGraph &g = tbi.g;
1843
1844 // All such roles have only (0,0) edges to vertices with the eod_accept
1845 // property, and no other effects (suffixes, ordinary reports, etc, etc).
1846
1847 if (isLeafNode(v, g) || !g[v].reports.empty() || g[v].suffix) {
1848 return false;
1849 }
1850
1851 for (const auto &e : out_edges_range(v, g)) {
1852 RoseVertex w = target(e, g);
1853 if (!g[w].eod_accept) {
1854 return false;
1855 }
1856 assert(!g[w].reports.empty());
1857 assert(g[w].literals.empty());
1858
1859 if (g[e].minBound || g[e].maxBound) {
1860 return false;
1861 }
1862 }
1863
1864 /* There is no pointing enforcing this check at runtime if
1865 * this role is only fired by the eod event literal */
1866 if (tbi.eod_event_literal_id != MO_INVALID_IDX &&
1867 g[v].literals.size() == 1 &&
1868 *g[v].literals.begin() == tbi.eod_event_literal_id) {
1869 return false;
1870 }
1871
1872 return true;
1873}
1874
1875static
1876void addCheckOnlyEodInstruction(RoseProgram &prog) {
1877 DEBUG_PRINTF("only at eod\n");
1878 const auto *end_inst = prog.end_instruction();
1879 prog.add_before_end(ue2::make_unique<RoseInstrCheckOnlyEod>(end_inst));
1880}
1881
1882static
1883void makeRoleEagerEodReports(const RoseBuildImpl &build,
1884 const map<RoseVertex, left_build_info> &leftfix_info,
1885 bool needs_catchup, RoseVertex v,
1886 RoseProgram &program) {
1887 RoseProgram eod_program;
1888
1889 for (const auto &e : out_edges_range(v, build.g)) {
1890 if (canEagerlyReportAtEod(build, e)) {
1891 RoseProgram block;
1892 makeRoleReports(build, leftfix_info, needs_catchup,
1893 target(e, build.g), block);
1894 eod_program.add_block(move(block));
1895 }
1896 }
1897
1898 if (eod_program.empty()) {
1899 return;
1900 }
1901
1902 if (!onlyAtEod(build, v)) {
1903 // The rest of our program wasn't EOD anchored, so we need to guard
1904 // these reports with a check.
1905 addCheckOnlyEodInstruction(program);
1906 }
1907
1908 program.add_before_end(move(eod_program));
1909}
1910
1911/** Makes a program for a role/vertex given a specific pred/in_edge. */
1912static
1913RoseProgram makeRoleProgram(const RoseBuildImpl &build,
1914 const map<RoseVertex, left_build_info> &leftfix_info,
1915 const map<suffix_id, u32> &suffixes,
1916 const map<u32, engine_info> &engine_info_by_queue,
1917 const unordered_map<RoseVertex, u32> &roleStateIndices,
1918 ProgramBuild &prog_build, const RoseEdge &e) {
1919 const RoseGraph &g = build.g;
1920 auto v = target(e, g);
1921
1922 RoseProgram program;
1923
1924 // First, add program instructions that enforce preconditions without
1925 // effects.
1926
1927 if (onlyAtEod(build, v)) {
1928 addCheckOnlyEodInstruction(program);
1929 }
1930
1931 if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
1932 makeRoleCheckBounds(build, v, e, program);
1933 }
1934
1935 // This role program may be triggered by different predecessors, with
1936 // different offset bounds. We must ensure we put this check/set operation
1937 // after the bounds check to deal with this case.
1938 if (in_degree(v, g) > 1) {
1939 assert(!build.isRootSuccessor(v));
1940 makeRoleCheckNotHandled(prog_build, v, program);
1941 }
1942
1943 makeRoleLookaround(build, leftfix_info, v, program);
1944 makeRoleCheckLeftfix(build, leftfix_info, v, program);
1945
1946 // Next, we can add program instructions that have effects. This must be
1947 // done as a series of blocks, as some of them (like reports) are
1948 // escapable.
1949
1950 RoseProgram effects_block;
1951
1952 RoseProgram reports_block;
1953 makeRoleReports(build, leftfix_info, prog_build.needs_catchup, v,
1954 reports_block);
1955 effects_block.add_block(move(reports_block));
1956
1957 RoseProgram infix_block;
1958 makeRoleInfixTriggers(build, leftfix_info, engine_info_by_queue, v,
1959 infix_block);
1960 effects_block.add_block(move(infix_block));
1961
1962 // Note: SET_GROUPS instruction must be after infix triggers, as an infix
1963 // going dead may switch off groups.
1964 RoseProgram groups_block;
1965 makeRoleGroups(build.g, prog_build, v, groups_block);
1966 effects_block.add_block(move(groups_block));
1967
1968 RoseProgram suffix_block;
1969 makeRoleSuffix(build, suffixes, engine_info_by_queue, v, suffix_block);
1970 effects_block.add_block(move(suffix_block));
1971
1972 RoseProgram state_block;
1973 makeRoleSetState(roleStateIndices, v, state_block);
1974 effects_block.add_block(move(state_block));
1975
1976 // Note: EOD eager reports may generate a CHECK_ONLY_EOD instruction (if
1977 // the program doesn't have one already).
1978 RoseProgram eod_block;
1979 makeRoleEagerEodReports(build, leftfix_info, prog_build.needs_catchup, v,
1980 eod_block);
1981 effects_block.add_block(move(eod_block));
1982
1983 /* a 'ghost role' may do nothing if we know that its groups are already set
1984 * - in this case we can avoid producing a program at all. */
1985 if (effects_block.empty()) {
1986 return {};
1987 }
1988
1989 program.add_before_end(move(effects_block));
1990 return program;
1991}
1992
1993static
1994void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 lit_id,
1995 RoseProgram &prog) {
1996 const auto &info = build.literal_info.at(lit_id);
1997 if (!info.squash_group) {
1998 return;
1999 }
2000
2001 DEBUG_PRINTF("squashes 0x%llx\n", info.group_mask);
2002 assert(info.group_mask);
2003 /* Note: group_mask is negated. */
2004 prog.add_before_end(ue2::make_unique<RoseInstrSquashGroups>(~info.group_mask));
2005}
2006
2007namespace {
2008struct ProgKey {
2009 ProgKey(const RoseProgram &p) : prog(&p) {}
2010
2011 bool operator==(const ProgKey &b) const {
2012 return RoseProgramEquivalence()(*prog, *b.prog);
2013 }
2014
2015 size_t hash() const {
2016 return RoseProgramHash()(*prog);
2017 }
2018private:
2019 const RoseProgram *prog;
2020};
2021}
2022
2023RoseProgram assembleProgramBlocks(vector<RoseProgram> &&blocks_in) {
2024 DEBUG_PRINTF("%zu blocks before dedupe\n", blocks_in.size());
2025
2026 vector<RoseProgram> blocks;
2027 blocks.reserve(blocks_in.size()); /* to ensure stable reference for seen */
2028
2029 ue2_unordered_set<ProgKey> seen;
2030 for (auto &block : blocks_in) {
2031 if (contains(seen, block)) {
2032 continue;
2033 }
2034
2035 blocks.push_back(move(block));
2036 seen.emplace(blocks.back());
2037 }
2038
2039 DEBUG_PRINTF("%zu blocks after dedupe\n", blocks.size());
2040
2041 RoseProgram prog;
2042 for (auto &block : blocks) {
2043 /* If we have multiple blocks from different literals and any of them
2044 * squash groups, we will have to add a CLEAR_WORK_DONE instruction to
2045 * each literal program block to clear the work_done flags so that it's
2046 * only set if a state has been. */
2047 if (!prog.empty() && reads_work_done_flag(block)) {
2048 RoseProgram clear_block;
2049 clear_block.add_before_end(ue2::make_unique<RoseInstrClearWorkDone>());
2050 prog.add_block(move(clear_block));
2051 }
2052
2053 prog.add_block(move(block));
2054 }
2055
2056 return prog;
2057}
2058
2059RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
2060 const map<RoseVertex, left_build_info> &leftfix_info,
2061 const map<suffix_id, u32> &suffixes,
2062 const map<u32, engine_info> &engine_info_by_queue,
2063 const unordered_map<RoseVertex, u32> &roleStateIndices,
2064 ProgramBuild &prog_build, u32 lit_id,
2065 const vector<RoseEdge> &lit_edges,
2066 bool is_anchored_replay_program) {
2067 const auto &g = build.g;
2068
2069 DEBUG_PRINTF("lit id=%u, %zu lit edges\n", lit_id, lit_edges.size());
2070
2071 // Construct initial program up front, as its early checks must be able
2072 // to jump to end and terminate processing for this literal.
2073 auto lit_program = makeLitInitialProgram(build, prog_build, lit_id,
2074 lit_edges,
2075 is_anchored_replay_program);
2076
2077 RoseProgram role_programs;
2078
2079 // Predecessor state id -> program block.
2080 map<u32, RoseProgram> pred_blocks;
2081
2082 // Construct sparse iter sub-programs.
2083 for (const auto &e : lit_edges) {
2084 const auto &u = source(e, g);
2085 if (build.isAnyStart(u)) {
2086 continue; // Root roles are not handled with sparse iterator.
2087 }
2088 DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].index,
2089 g[target(e, g)].index);
2090 assert(contains(roleStateIndices, u));
2091 u32 pred_state = roleStateIndices.at(u);
2092 auto role_prog = makeRoleProgram(build, leftfix_info, suffixes,
2093 engine_info_by_queue, roleStateIndices,
2094 prog_build, e);
2095 if (!role_prog.empty()) {
2096 pred_blocks[pred_state].add_block(move(role_prog));
2097 }
2098 }
2099
2100 // Add blocks to deal with non-root edges (triggered by sparse iterator or
2101 // mmbit_isset checks).
2102 addPredBlocks(pred_blocks, roleStateIndices.size(), role_programs);
2103
2104 // Add blocks to handle root roles.
2105 for (const auto &e : lit_edges) {
2106 const auto &u = source(e, g);
2107 if (!build.isAnyStart(u)) {
2108 continue;
2109 }
2110 DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index,
2111 g[target(e, g)].index);
2112 auto role_prog = makeRoleProgram(build, leftfix_info, suffixes,
2113 engine_info_by_queue, roleStateIndices,
2114 prog_build, e);
2115 role_programs.add_block(move(role_prog));
2116 }
2117
2118 if (lit_id == build.eod_event_literal_id) {
2119 /* Note: does not require the lit initial program */
2120 assert(build.eod_event_literal_id != MO_INVALID_IDX);
2121 return role_programs;
2122 }
2123
2124 /* Instructions to run even if a role program bails out */
2125 RoseProgram unconditional_block;
2126
2127 // Literal may squash groups.
2128 makeGroupSquashInstruction(build, lit_id, unconditional_block);
2129
2130 role_programs.add_block(move(unconditional_block));
2131 lit_program.add_before_end(move(role_programs));
2132
2133 return lit_program;
2134}
2135
2136RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build,
2137 ProgramBuild &prog_build,
2138 const vector<u32> &lit_ids) {
2139 assert(!lit_ids.empty());
2140 assert(build.cc.streaming);
2141
2142 vector<RoseProgram> blocks;
2143
2144 for (const auto &lit_id : lit_ids) {
2145 DEBUG_PRINTF("lit_id=%u\n", lit_id);
2146 const auto &info = build.literal_info.at(lit_id);
2147 if (info.delayed_ids.empty()) {
2148 continue; // No delayed IDs, no work to do.
2149 }
2150
2151 RoseProgram prog;
2152 if (!build.isDelayed(lit_id)) {
2153 makeCheckLiteralInstruction(build.literals.at(lit_id),
2154 prog_build.longLitLengthThreshold, prog,
2155 build.cc);
2156 }
2157
2158 makeCheckLitMaskInstruction(build, lit_id, prog);
2159 makePushDelayedInstructions(build.literals, prog_build,
2160 build.literal_info.at(lit_id).delayed_ids,
2161 prog);
2162 blocks.push_back(move(prog));
2163 }
2164
2165 return assembleProgramBlocks(move(blocks));
2166}
2167
2168RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build,
2169 ProgramBuild &prog_build, const RoseEdge &e,
2170 const bool multiple_preds) {
2171 const RoseGraph &g = build.g;
2172 const RoseVertex v = target(e, g);
2173
2174 RoseProgram program;
2175
2176 if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
2177 makeRoleCheckBounds(build, v, e, program);
2178 }
2179
2180 if (multiple_preds) {
2181 // Only necessary when there is more than one pred.
2182 makeRoleCheckNotHandled(prog_build, v, program);
2183 }
2184
2185 makeCatchup(build.rm, prog_build.needs_catchup, g[v].reports, program);
2186
2187 const bool has_som = false;
2188 RoseProgram report_block;
2189 for (const auto &id : g[v].reports) {
2190 makeReport(build, id, has_som, report_block);
2191 }
2192 program.add_before_end(move(report_block));
2193
2194 return program;
2195}
2196
2197static
2198void makeCatchupMpv(const ReportManager &rm, bool needs_mpv_catchup,
2199 ReportID id, RoseProgram &program) {
2200 if (!needs_mpv_catchup) {
2201 return;
2202 }
2203
2204 const Report &report = rm.getReport(id);
2205 if (report.type == INTERNAL_ROSE_CHAIN) {
2206 return;
2207 }
2208
2209 program.add_before_end(ue2::make_unique<RoseInstrCatchUpMpv>());
2210}
2211
2212RoseProgram makeReportProgram(const RoseBuildImpl &build,
2213 bool needs_mpv_catchup, ReportID id) {
2214 RoseProgram prog;
2215
2216 makeCatchupMpv(build.rm, needs_mpv_catchup, id, prog);
2217
2218 const bool has_som = false;
2219 makeReport(build, id, has_som, prog);
2220
2221 return prog;
2222}
2223
2224RoseProgram makeBoundaryProgram(const RoseBuildImpl &build,
2225 const set<ReportID> &reports) {
2226 // Note: no CATCHUP instruction is necessary in the boundary case, as we
2227 // should always be caught up (and may not even have the resources in
2228 // scratch to support it).
2229
2230 const bool has_som = false;
2231 RoseProgram prog;
2232 for (const auto &id : reports) {
2233 makeReport(build, id, has_som, prog);
2234 }
2235
2236 return prog;
2237}
2238
2239void addIncludedJumpProgram(RoseProgram &program, u32 child_offset,
2240 u8 squash) {
2241 RoseProgram block;
2242 block.add_before_end(ue2::make_unique<RoseInstrIncludedJump>(child_offset,
2243 squash));
2244 program.add_block(move(block));
2245}
2246
2247static
2248void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block,
2249 RoseProgram &program) {
2250 // Prepend an instruction to check the pred state is on.
2251 const auto *end_inst = pred_block.end_instruction();
2252 pred_block.insert(begin(pred_block),
2253 ue2::make_unique<RoseInstrCheckState>(pred_state, end_inst));
2254 program.add_block(move(pred_block));
2255}
2256
2257static
2258void addPredBlocksAny(map<u32, RoseProgram> &pred_blocks, u32 num_states,
2259 RoseProgram &program) {
2260 RoseProgram sparse_program;
2261
2262 vector<u32> keys;
2263 for (const u32 &key : pred_blocks | map_keys) {
2264 keys.push_back(key);
2265 }
2266
2267 const RoseInstruction *end_inst = sparse_program.end_instruction();
2268 auto ri = ue2::make_unique<RoseInstrSparseIterAny>(num_states, keys, end_inst);
2269 sparse_program.add_before_end(move(ri));
2270
2271 RoseProgram &block = pred_blocks.begin()->second;
2272
2273 /* we no longer need the check handled instruction as all the pred-role
2274 * blocks are being collapsed together */
2275 stripCheckHandledInstruction(block);
2276
2277 sparse_program.add_before_end(move(block));
2278 program.add_block(move(sparse_program));
2279}
2280
2281static
2282void addPredBlocksMulti(map<u32, RoseProgram> &pred_blocks,
2283 u32 num_states, RoseProgram &program) {
2284 assert(!pred_blocks.empty());
2285
2286 RoseProgram sparse_program;
2287 const RoseInstruction *end_inst = sparse_program.end_instruction();
2288 vector<pair<u32, const RoseInstruction *>> jump_table;
2289
2290 // BEGIN instruction.
2291 auto ri_begin = ue2::make_unique<RoseInstrSparseIterBegin>(num_states, end_inst);
2292 RoseInstrSparseIterBegin *begin_inst = ri_begin.get();
2293 sparse_program.add_before_end(move(ri_begin));
2294
2295 // NEXT instructions, one per pred program.
2296 u32 prev_key = pred_blocks.begin()->first;
2297 for (auto it = next(begin(pred_blocks)); it != end(pred_blocks); ++it) {
2298 auto ri = ue2::make_unique<RoseInstrSparseIterNext>(prev_key, begin_inst,
2299 end_inst);
2300 sparse_program.add_before_end(move(ri));
2301 prev_key = it->first;
2302 }
2303
2304 // Splice in each pred program after its BEGIN/NEXT.
2305 auto out_it = begin(sparse_program);
2306 for (auto &m : pred_blocks) {
2307 u32 key = m.first;
2308 RoseProgram &flat_prog = m.second;
2309 assert(!flat_prog.empty());
2310 const size_t block_len = flat_prog.size() - 1; // without INSTR_END.
2311
2312 assert(dynamic_cast<const RoseInstrSparseIterBegin *>(out_it->get()) ||
2313 dynamic_cast<const RoseInstrSparseIterNext *>(out_it->get()));
2314 out_it = sparse_program.insert(++out_it, move(flat_prog));
2315
2316 // Jump table target for this key is the beginning of the block we just
2317 // spliced in.
2318 jump_table.emplace_back(key, out_it->get());
2319
2320 assert(distance(begin(sparse_program), out_it) + block_len <=
2321 sparse_program.size());
2322 advance(out_it, block_len);
2323 }
2324
2325 // Write the jump table back into the SPARSE_ITER_BEGIN instruction.
2326 begin_inst->jump_table = move(jump_table);
2327
2328 program.add_block(move(sparse_program));
2329}
2330
2331void addPredBlocks(map<u32, RoseProgram> &pred_blocks, u32 num_states,
2332 RoseProgram &program) {
2333 // Trim empty blocks, if any exist.
2334 for (auto it = pred_blocks.begin(); it != pred_blocks.end();) {
2335 if (it->second.empty()) {
2336 it = pred_blocks.erase(it);
2337 } else {
2338 ++it;
2339 }
2340 }
2341
2342 const size_t num_preds = pred_blocks.size();
2343 if (num_preds == 0) {
2344 return;
2345 }
2346
2347 if (num_preds == 1) {
2348 const auto head = pred_blocks.begin();
2349 addPredBlockSingle(head->first, head->second, program);
2350 return;
2351 }
2352
2353 // First, see if all our blocks are equivalent, in which case we can
2354 // collapse them down into one.
2355 const auto &blocks = pred_blocks | map_values;
2356 if (all_of(begin(blocks), end(blocks), [&](const RoseProgram &block) {
2357 return RoseProgramEquivalence()(*begin(blocks), block);
2358 })) {
2359 DEBUG_PRINTF("all blocks equiv\n");
2360 addPredBlocksAny(pred_blocks, num_states, program);
2361 return;
2362 }
2363
2364 addPredBlocksMulti(pred_blocks, num_states, program);
2365}
2366
2367void applyFinalSpecialisation(RoseProgram &program) {
2368 assert(!program.empty());
2369 assert(program.back().code() == ROSE_INSTR_END);
2370 if (program.size() < 2) {
2371 return;
2372 }
2373
2374 /* Replace the second-to-last instruction (before END) with a one-shot
2375 * specialisation if available. */
2376 auto it = next(program.rbegin());
2377 if (auto *ri = dynamic_cast<const RoseInstrReport *>(it->get())) {
2378 DEBUG_PRINTF("replacing REPORT with FINAL_REPORT\n");
2379 program.replace(it, ue2::make_unique<RoseInstrFinalReport>(
2380 ri->onmatch, ri->offset_adjust));
2381 }
2382}
2383
2384void recordLongLiterals(vector<ue2_case_string> &longLiterals,
2385 const RoseProgram &program) {
2386 for (const auto &ri : program) {
2387 if (const auto *ri_check =
2388 dynamic_cast<const RoseInstrCheckLongLit *>(ri.get())) {
2389 DEBUG_PRINTF("found CHECK_LONG_LIT for string '%s'\n",
2390 escapeString(ri_check->literal).c_str());
2391 longLiterals.emplace_back(ri_check->literal, false);
2392 continue;
2393 }
2394 if (const auto *ri_check =
2395 dynamic_cast<const RoseInstrCheckLongLitNocase *>(ri.get())) {
2396 DEBUG_PRINTF("found CHECK_LONG_LIT_NOCASE for string '%s'\n",
2397 escapeString(ri_check->literal).c_str());
2398 longLiterals.emplace_back(ri_check->literal, true);
2399 }
2400 }
2401}
2402
2403void recordResources(RoseResources &resources, const RoseProgram &program) {
2404 for (const auto &ri : program) {
2405 switch (ri->code()) {
2406 case ROSE_INSTR_TRIGGER_SUFFIX:
2407 resources.has_suffixes = true;
2408 break;
2409 case ROSE_INSTR_TRIGGER_INFIX:
2410 case ROSE_INSTR_CHECK_INFIX:
2411 case ROSE_INSTR_CHECK_PREFIX:
2412 case ROSE_INSTR_SOM_LEFTFIX:
2413 resources.has_leftfixes = true;
2414 break;
2415 case ROSE_INSTR_SET_STATE:
2416 case ROSE_INSTR_CHECK_STATE:
2417 case ROSE_INSTR_SPARSE_ITER_BEGIN:
2418 case ROSE_INSTR_SPARSE_ITER_NEXT:
2419 resources.has_states = true;
2420 break;
2421 case ROSE_INSTR_CHECK_GROUPS:
2422 resources.checks_groups = true;
2423 break;
2424 case ROSE_INSTR_PUSH_DELAYED:
2425 resources.has_lit_delay = true;
2426 break;
2427 case ROSE_INSTR_CHECK_LONG_LIT:
2428 case ROSE_INSTR_CHECK_LONG_LIT_NOCASE:
2429 resources.has_lit_check = true;
2430 break;
2431 default:
2432 break;
2433 }
2434 }
2435}
2436
2437} // namespace ue2
2438