1/*
2 * Copyright (c) 2016-2018, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#ifndef ROSE_BUILD_PROGRAM_H
30#define ROSE_BUILD_PROGRAM_H
31
32#include "rose_build_impl.h"
33#include "rose_program.h"
34#include "util/bytecode_ptr.h"
35#include "util/hash.h"
36#include "util/make_unique.h"
37
38#include <unordered_map>
39#include <vector>
40
41#include <boost/range/adaptor/map.hpp>
42
43namespace ue2 {
44
45struct LookEntry;
46class RoseEngineBlob;
47class RoseInstruction;
48struct RoseResources;
49
50/**
51 * \brief Container for a list of program instructions.
52 */
53class RoseProgram {
54private:
55 std::vector<std::unique_ptr<RoseInstruction>> prog;
56
57public:
58 RoseProgram();
59 ~RoseProgram();
60 RoseProgram(const RoseProgram &) = delete;
61 RoseProgram(RoseProgram &&);
62 RoseProgram &operator=(const RoseProgram &) = delete;
63 RoseProgram &operator=(RoseProgram &&);
64
65 bool empty() const;
66
67 size_t size() const { return prog.size(); }
68
69 const RoseInstruction &back() const { return *prog.back(); }
70 const RoseInstruction &front() const { return *prog.front(); }
71
72 using iterator = decltype(prog)::iterator;
73 iterator begin() { return prog.begin(); }
74 iterator end() { return prog.end(); }
75
76 using const_iterator = decltype(prog)::const_iterator;
77 const_iterator begin() const { return prog.begin(); }
78 const_iterator end() const { return prog.end(); }
79
80 using reverse_iterator = decltype(prog)::reverse_iterator;
81 reverse_iterator rbegin() { return prog.rbegin(); }
82 reverse_iterator rend() { return prog.rend(); }
83
84 using const_reverse_iterator = decltype(prog)::const_reverse_iterator;
85 const_reverse_iterator rbegin() const { return prog.rbegin(); }
86 const_reverse_iterator rend() const { return prog.rend(); }
87
88 /** \brief Retrieve a pointer to the terminating ROSE_INSTR_END. */
89 const RoseInstruction *end_instruction() const;
90
91 static void update_targets(iterator it, iterator it_end,
92 const RoseInstruction *old_target,
93 const RoseInstruction *new_target);
94
95 iterator insert(iterator it, std::unique_ptr<RoseInstruction> ri);
96
97 iterator insert(iterator it, RoseProgram &&block);
98
99 /* Note: takes iterator rather than const_iterator to support toolchains
100 * with pre-C++11 standard libraries (i.e., gcc-4.8). */
101 iterator erase(iterator first, iterator last);
102
103 /**
104 * \brief Adds this instruction to the program just before the terminating
105 * ROSE_INSTR_END.
106 */
107 void add_before_end(std::unique_ptr<RoseInstruction> ri);
108
109 /**
110 * \brief Adds this block to the program just before the terminating
111 * ROSE_INSTR_END.
112 *
113 * Any existing instruction that was jumping to end continues to do so.
114 */
115 void add_before_end(RoseProgram &&block);
116 /**
117 * \brief Append this program block, replacing our current ROSE_INSTR_END.
118 *
119 * Any existing instruction that was jumping to end, now leads to the newly
120 * added block.
121 */
122 void add_block(RoseProgram &&block);
123
124 /**
125 * \brief Replace the instruction pointed to by the given iterator.
126 */
127 template<class Iter>
128 void replace(Iter it, std::unique_ptr<RoseInstruction> ri) {
129 assert(!prog.empty());
130
131 const RoseInstruction *old_ptr = it->get();
132 *it = move(ri);
133 update_targets(prog.begin(), prog.end(), old_ptr, it->get());
134 }
135};
136
137bytecode_ptr<char> writeProgram(RoseEngineBlob &blob,
138 const RoseProgram &program);
139
140class RoseProgramHash {
141public:
142 size_t operator()(const RoseProgram &program) const;
143};
144
145class RoseProgramEquivalence {
146public:
147 bool operator()(const RoseProgram &prog1, const RoseProgram &prog2) const;
148};
149
150/** \brief Data only used during construction of various programs (literal,
151 * anchored, delay, etc). */
152struct ProgramBuild : noncopyable {
153 explicit ProgramBuild(u32 fMinLitOffset, size_t longLitThresh,
154 bool catchup)
155 : floatingMinLiteralMatchOffset(fMinLitOffset),
156 longLitLengthThreshold(longLitThresh), needs_catchup(catchup) {
157 }
158
159 /** \brief Minimum offset of a match from the floating table. */
160 const u32 floatingMinLiteralMatchOffset;
161
162 /** \brief Long literal length threshold, used in streaming mode. */
163 const size_t longLitLengthThreshold;
164
165 /** \brief True if reports need CATCH_UP instructions to catch up suffixes,
166 * outfixes etc. */
167 const bool needs_catchup;
168
169 /** \brief Mapping from vertex to key, for vertices with a
170 * CHECK_NOT_HANDLED instruction. */
171 std::unordered_map<RoseVertex, u32> handledKeys;
172
173 /** \brief Mapping from Rose literal ID to anchored program index. */
174 std::map<u32, u32> anchored_programs;
175
176 /** \brief Mapping from Rose literal ID to delayed program index. */
177 std::map<u32, u32> delay_programs;
178
179 /** \brief Mapping from every vertex to the groups that must be on for that
180 * vertex to be reached. */
181 std::unordered_map<RoseVertex, rose_group> vertex_group_map;
182
183 /** \brief Global bitmap of groups that can be squashed. */
184 rose_group squashable_groups = 0;
185};
186
187void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program);
188void addSuffixesEodProgram(RoseProgram &program);
189void addMatcherEodProgram(RoseProgram &program);
190void addFlushCombinationProgram(RoseProgram &program);
191
192static constexpr u32 INVALID_QUEUE = ~0U;
193
194struct left_build_info {
195 // Constructor for an engine implementation.
196 left_build_info(u32 q, u32 l, u32 t, rose_group sm,
197 const std::vector<u8> &stops, u32 max_ql, u8 cm_count,
198 const CharReach &cm_cr);
199
200 // Constructor for a lookaround implementation.
201 explicit left_build_info(const std::vector<std::vector<LookEntry>> &looks);
202
203 u32 queue = INVALID_QUEUE; /* uniquely idents the left_build_info */
204 u32 lag = 0;
205 u32 transient = 0;
206 rose_group squash_mask = ~rose_group{0};
207 std::vector<u8> stopAlphabet;
208 u32 max_queuelen = 0;
209 u8 countingMiracleCount = 0;
210 CharReach countingMiracleReach;
211 u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */
212 bool has_lookaround = false;
213
214 // alternative implementation to the NFA
215 std::vector<std::vector<LookEntry>> lookaround;
216};
217
218/**
219 * \brief Provides a brief summary of properties of an NFA that has already been
220 * finalised and stored in the blob.
221 */
222struct engine_info {
223 engine_info(const NFA *nfa, bool trans);
224
225 enum NFAEngineType type;
226 bool accepts_eod;
227 u32 stream_size;
228 u32 scratch_size;
229 u32 scratch_align;
230 bool transient;
231};
232
233/**
234 * \brief Consumes list of program blocks corresponding to different literals,
235 * checks them for duplicates and then concatenates them into one program.
236 *
237 * Note: if a block will squash groups, a CLEAR_WORK_DONE instruction is
238 * inserted to prevent the work_done flag being contaminated by early blocks.
239 */
240RoseProgram assembleProgramBlocks(std::vector<RoseProgram> &&blocks);
241
242RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
243 const std::map<RoseVertex, left_build_info> &leftfix_info,
244 const std::map<suffix_id, u32> &suffixes,
245 const std::map<u32, engine_info> &engine_info_by_queue,
246 const std::unordered_map<RoseVertex, u32> &roleStateIndices,
247 ProgramBuild &prog_build, u32 lit_id,
248 const std::vector<RoseEdge> &lit_edges,
249 bool is_anchored_replay_program);
250
251RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build,
252 ProgramBuild &prog_build,
253 const std::vector<u32> &lit_ids);
254
255RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build,
256 ProgramBuild &prog_build, const RoseEdge &e,
257 const bool multiple_preds);
258
259RoseProgram makeReportProgram(const RoseBuildImpl &build,
260 bool needs_mpv_catchup, ReportID id);
261
262RoseProgram makeBoundaryProgram(const RoseBuildImpl &build,
263 const std::set<ReportID> &reports);
264
265struct TriggerInfo {
266 TriggerInfo(bool c, u32 q, u32 e) : cancel(c), queue(q), event(e) {}
267 bool cancel;
268 u32 queue;
269 u32 event;
270
271 bool operator==(const TriggerInfo &b) const {
272 return cancel == b.cancel && queue == b.queue && event == b.event;
273 }
274};
275
276void addPredBlocks(std::map<u32, RoseProgram> &pred_blocks, u32 num_states,
277 RoseProgram &program);
278
279void applyFinalSpecialisation(RoseProgram &program);
280
281void recordLongLiterals(std::vector<ue2_case_string> &longLiterals,
282 const RoseProgram &program);
283
284void recordResources(RoseResources &resources, const RoseProgram &program);
285
286void addIncludedJumpProgram(RoseProgram &program, u32 child_offset, u8 squash);
287} // namespace ue2
288
289#endif // ROSE_BUILD_PROGRAM_H
290