1 | /* |
2 | * Copyright (c) 2016-2018, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #ifndef ROSE_BUILD_PROGRAM_H |
30 | #define ROSE_BUILD_PROGRAM_H |
31 | |
32 | #include "rose_build_impl.h" |
33 | #include "rose_program.h" |
34 | #include "util/bytecode_ptr.h" |
35 | #include "util/hash.h" |
36 | #include "util/make_unique.h" |
37 | |
38 | #include <unordered_map> |
39 | #include <vector> |
40 | |
41 | #include <boost/range/adaptor/map.hpp> |
42 | |
43 | namespace ue2 { |
44 | |
45 | struct LookEntry; |
46 | class RoseEngineBlob; |
47 | class RoseInstruction; |
48 | struct RoseResources; |
49 | |
50 | /** |
51 | * \brief Container for a list of program instructions. |
52 | */ |
53 | class RoseProgram { |
54 | private: |
55 | std::vector<std::unique_ptr<RoseInstruction>> prog; |
56 | |
57 | public: |
58 | RoseProgram(); |
59 | ~RoseProgram(); |
60 | RoseProgram(const RoseProgram &) = delete; |
61 | RoseProgram(RoseProgram &&); |
62 | RoseProgram &operator=(const RoseProgram &) = delete; |
63 | RoseProgram &operator=(RoseProgram &&); |
64 | |
65 | bool empty() const; |
66 | |
67 | size_t size() const { return prog.size(); } |
68 | |
69 | const RoseInstruction &back() const { return *prog.back(); } |
70 | const RoseInstruction &front() const { return *prog.front(); } |
71 | |
72 | using iterator = decltype(prog)::iterator; |
73 | iterator begin() { return prog.begin(); } |
74 | iterator end() { return prog.end(); } |
75 | |
76 | using const_iterator = decltype(prog)::const_iterator; |
77 | const_iterator begin() const { return prog.begin(); } |
78 | const_iterator end() const { return prog.end(); } |
79 | |
80 | using reverse_iterator = decltype(prog)::reverse_iterator; |
81 | reverse_iterator rbegin() { return prog.rbegin(); } |
82 | reverse_iterator rend() { return prog.rend(); } |
83 | |
84 | using const_reverse_iterator = decltype(prog)::const_reverse_iterator; |
85 | const_reverse_iterator rbegin() const { return prog.rbegin(); } |
86 | const_reverse_iterator rend() const { return prog.rend(); } |
87 | |
88 | /** \brief Retrieve a pointer to the terminating ROSE_INSTR_END. */ |
89 | const RoseInstruction *end_instruction() const; |
90 | |
91 | static void update_targets(iterator it, iterator it_end, |
92 | const RoseInstruction *old_target, |
93 | const RoseInstruction *new_target); |
94 | |
95 | iterator insert(iterator it, std::unique_ptr<RoseInstruction> ri); |
96 | |
97 | iterator insert(iterator it, RoseProgram &&block); |
98 | |
99 | /* Note: takes iterator rather than const_iterator to support toolchains |
100 | * with pre-C++11 standard libraries (i.e., gcc-4.8). */ |
101 | iterator erase(iterator first, iterator last); |
102 | |
103 | /** |
104 | * \brief Adds this instruction to the program just before the terminating |
105 | * ROSE_INSTR_END. |
106 | */ |
107 | void add_before_end(std::unique_ptr<RoseInstruction> ri); |
108 | |
109 | /** |
110 | * \brief Adds this block to the program just before the terminating |
111 | * ROSE_INSTR_END. |
112 | * |
113 | * Any existing instruction that was jumping to end continues to do so. |
114 | */ |
115 | void add_before_end(RoseProgram &&block); |
116 | /** |
117 | * \brief Append this program block, replacing our current ROSE_INSTR_END. |
118 | * |
119 | * Any existing instruction that was jumping to end, now leads to the newly |
120 | * added block. |
121 | */ |
122 | void add_block(RoseProgram &&block); |
123 | |
124 | /** |
125 | * \brief Replace the instruction pointed to by the given iterator. |
126 | */ |
127 | template<class Iter> |
128 | void replace(Iter it, std::unique_ptr<RoseInstruction> ri) { |
129 | assert(!prog.empty()); |
130 | |
131 | const RoseInstruction *old_ptr = it->get(); |
132 | *it = move(ri); |
133 | update_targets(prog.begin(), prog.end(), old_ptr, it->get()); |
134 | } |
135 | }; |
136 | |
137 | bytecode_ptr<char> writeProgram(RoseEngineBlob &blob, |
138 | const RoseProgram &program); |
139 | |
140 | class RoseProgramHash { |
141 | public: |
142 | size_t operator()(const RoseProgram &program) const; |
143 | }; |
144 | |
145 | class RoseProgramEquivalence { |
146 | public: |
147 | bool operator()(const RoseProgram &prog1, const RoseProgram &prog2) const; |
148 | }; |
149 | |
150 | /** \brief Data only used during construction of various programs (literal, |
151 | * anchored, delay, etc). */ |
152 | struct ProgramBuild : noncopyable { |
153 | explicit ProgramBuild(u32 fMinLitOffset, size_t longLitThresh, |
154 | bool catchup) |
155 | : floatingMinLiteralMatchOffset(fMinLitOffset), |
156 | longLitLengthThreshold(longLitThresh), needs_catchup(catchup) { |
157 | } |
158 | |
159 | /** \brief Minimum offset of a match from the floating table. */ |
160 | const u32 floatingMinLiteralMatchOffset; |
161 | |
162 | /** \brief Long literal length threshold, used in streaming mode. */ |
163 | const size_t longLitLengthThreshold; |
164 | |
165 | /** \brief True if reports need CATCH_UP instructions to catch up suffixes, |
166 | * outfixes etc. */ |
167 | const bool needs_catchup; |
168 | |
169 | /** \brief Mapping from vertex to key, for vertices with a |
170 | * CHECK_NOT_HANDLED instruction. */ |
171 | std::unordered_map<RoseVertex, u32> handledKeys; |
172 | |
173 | /** \brief Mapping from Rose literal ID to anchored program index. */ |
174 | std::map<u32, u32> anchored_programs; |
175 | |
176 | /** \brief Mapping from Rose literal ID to delayed program index. */ |
177 | std::map<u32, u32> delay_programs; |
178 | |
179 | /** \brief Mapping from every vertex to the groups that must be on for that |
180 | * vertex to be reached. */ |
181 | std::unordered_map<RoseVertex, rose_group> vertex_group_map; |
182 | |
183 | /** \brief Global bitmap of groups that can be squashed. */ |
184 | rose_group squashable_groups = 0; |
185 | }; |
186 | |
187 | void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program); |
188 | void addSuffixesEodProgram(RoseProgram &program); |
189 | void addMatcherEodProgram(RoseProgram &program); |
190 | void addFlushCombinationProgram(RoseProgram &program); |
191 | |
192 | static constexpr u32 INVALID_QUEUE = ~0U; |
193 | |
194 | struct left_build_info { |
195 | // Constructor for an engine implementation. |
196 | left_build_info(u32 q, u32 l, u32 t, rose_group sm, |
197 | const std::vector<u8> &stops, u32 max_ql, u8 cm_count, |
198 | const CharReach &cm_cr); |
199 | |
200 | // Constructor for a lookaround implementation. |
201 | explicit left_build_info(const std::vector<std::vector<LookEntry>> &looks); |
202 | |
203 | u32 queue = INVALID_QUEUE; /* uniquely idents the left_build_info */ |
204 | u32 lag = 0; |
205 | u32 transient = 0; |
206 | rose_group squash_mask = ~rose_group{0}; |
207 | std::vector<u8> stopAlphabet; |
208 | u32 max_queuelen = 0; |
209 | u8 countingMiracleCount = 0; |
210 | CharReach countingMiracleReach; |
211 | u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */ |
212 | bool has_lookaround = false; |
213 | |
214 | // alternative implementation to the NFA |
215 | std::vector<std::vector<LookEntry>> lookaround; |
216 | }; |
217 | |
218 | /** |
219 | * \brief Provides a brief summary of properties of an NFA that has already been |
220 | * finalised and stored in the blob. |
221 | */ |
222 | struct engine_info { |
223 | engine_info(const NFA *nfa, bool trans); |
224 | |
225 | enum NFAEngineType type; |
226 | bool accepts_eod; |
227 | u32 stream_size; |
228 | u32 scratch_size; |
229 | u32 scratch_align; |
230 | bool transient; |
231 | }; |
232 | |
233 | /** |
234 | * \brief Consumes list of program blocks corresponding to different literals, |
235 | * checks them for duplicates and then concatenates them into one program. |
236 | * |
237 | * Note: if a block will squash groups, a CLEAR_WORK_DONE instruction is |
238 | * inserted to prevent the work_done flag being contaminated by early blocks. |
239 | */ |
240 | RoseProgram assembleProgramBlocks(std::vector<RoseProgram> &&blocks); |
241 | |
242 | RoseProgram makeLiteralProgram(const RoseBuildImpl &build, |
243 | const std::map<RoseVertex, left_build_info> &leftfix_info, |
244 | const std::map<suffix_id, u32> &suffixes, |
245 | const std::map<u32, engine_info> &engine_info_by_queue, |
246 | const std::unordered_map<RoseVertex, u32> &roleStateIndices, |
247 | ProgramBuild &prog_build, u32 lit_id, |
248 | const std::vector<RoseEdge> &lit_edges, |
249 | bool is_anchored_replay_program); |
250 | |
251 | RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, |
252 | ProgramBuild &prog_build, |
253 | const std::vector<u32> &lit_ids); |
254 | |
255 | RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, |
256 | ProgramBuild &prog_build, const RoseEdge &e, |
257 | const bool multiple_preds); |
258 | |
259 | RoseProgram makeReportProgram(const RoseBuildImpl &build, |
260 | bool needs_mpv_catchup, ReportID id); |
261 | |
262 | RoseProgram makeBoundaryProgram(const RoseBuildImpl &build, |
263 | const std::set<ReportID> &reports); |
264 | |
265 | struct TriggerInfo { |
266 | TriggerInfo(bool c, u32 q, u32 e) : cancel(c), queue(q), event(e) {} |
267 | bool cancel; |
268 | u32 queue; |
269 | u32 event; |
270 | |
271 | bool operator==(const TriggerInfo &b) const { |
272 | return cancel == b.cancel && queue == b.queue && event == b.event; |
273 | } |
274 | }; |
275 | |
276 | void addPredBlocks(std::map<u32, RoseProgram> &pred_blocks, u32 num_states, |
277 | RoseProgram &program); |
278 | |
279 | void applyFinalSpecialisation(RoseProgram &program); |
280 | |
281 | void recordLongLiterals(std::vector<ue2_case_string> &longLiterals, |
282 | const RoseProgram &program); |
283 | |
284 | void recordResources(RoseResources &resources, const RoseProgram &program); |
285 | |
286 | void addIncludedJumpProgram(RoseProgram &program, u32 child_offset, u8 squash); |
287 | } // namespace ue2 |
288 | |
289 | #endif // ROSE_BUILD_PROGRAM_H |
290 | |