| 1 | /* |
| 2 | * Copyright (c) 2016-2018, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #ifndef ROSE_BUILD_PROGRAM_H |
| 30 | #define ROSE_BUILD_PROGRAM_H |
| 31 | |
| 32 | #include "rose_build_impl.h" |
| 33 | #include "rose_program.h" |
| 34 | #include "util/bytecode_ptr.h" |
| 35 | #include "util/hash.h" |
| 36 | #include "util/make_unique.h" |
| 37 | |
| 38 | #include <unordered_map> |
| 39 | #include <vector> |
| 40 | |
| 41 | #include <boost/range/adaptor/map.hpp> |
| 42 | |
| 43 | namespace ue2 { |
| 44 | |
| 45 | struct LookEntry; |
| 46 | class RoseEngineBlob; |
| 47 | class RoseInstruction; |
| 48 | struct RoseResources; |
| 49 | |
| 50 | /** |
| 51 | * \brief Container for a list of program instructions. |
| 52 | */ |
| 53 | class RoseProgram { |
| 54 | private: |
| 55 | std::vector<std::unique_ptr<RoseInstruction>> prog; |
| 56 | |
| 57 | public: |
| 58 | RoseProgram(); |
| 59 | ~RoseProgram(); |
| 60 | RoseProgram(const RoseProgram &) = delete; |
| 61 | RoseProgram(RoseProgram &&); |
| 62 | RoseProgram &operator=(const RoseProgram &) = delete; |
| 63 | RoseProgram &operator=(RoseProgram &&); |
| 64 | |
| 65 | bool empty() const; |
| 66 | |
| 67 | size_t size() const { return prog.size(); } |
| 68 | |
| 69 | const RoseInstruction &back() const { return *prog.back(); } |
| 70 | const RoseInstruction &front() const { return *prog.front(); } |
| 71 | |
| 72 | using iterator = decltype(prog)::iterator; |
| 73 | iterator begin() { return prog.begin(); } |
| 74 | iterator end() { return prog.end(); } |
| 75 | |
| 76 | using const_iterator = decltype(prog)::const_iterator; |
| 77 | const_iterator begin() const { return prog.begin(); } |
| 78 | const_iterator end() const { return prog.end(); } |
| 79 | |
| 80 | using reverse_iterator = decltype(prog)::reverse_iterator; |
| 81 | reverse_iterator rbegin() { return prog.rbegin(); } |
| 82 | reverse_iterator rend() { return prog.rend(); } |
| 83 | |
| 84 | using const_reverse_iterator = decltype(prog)::const_reverse_iterator; |
| 85 | const_reverse_iterator rbegin() const { return prog.rbegin(); } |
| 86 | const_reverse_iterator rend() const { return prog.rend(); } |
| 87 | |
| 88 | /** \brief Retrieve a pointer to the terminating ROSE_INSTR_END. */ |
| 89 | const RoseInstruction *end_instruction() const; |
| 90 | |
| 91 | static void update_targets(iterator it, iterator it_end, |
| 92 | const RoseInstruction *old_target, |
| 93 | const RoseInstruction *new_target); |
| 94 | |
| 95 | iterator insert(iterator it, std::unique_ptr<RoseInstruction> ri); |
| 96 | |
| 97 | iterator insert(iterator it, RoseProgram &&block); |
| 98 | |
| 99 | /* Note: takes iterator rather than const_iterator to support toolchains |
| 100 | * with pre-C++11 standard libraries (i.e., gcc-4.8). */ |
| 101 | iterator erase(iterator first, iterator last); |
| 102 | |
| 103 | /** |
| 104 | * \brief Adds this instruction to the program just before the terminating |
| 105 | * ROSE_INSTR_END. |
| 106 | */ |
| 107 | void add_before_end(std::unique_ptr<RoseInstruction> ri); |
| 108 | |
| 109 | /** |
| 110 | * \brief Adds this block to the program just before the terminating |
| 111 | * ROSE_INSTR_END. |
| 112 | * |
| 113 | * Any existing instruction that was jumping to end continues to do so. |
| 114 | */ |
| 115 | void add_before_end(RoseProgram &&block); |
| 116 | /** |
| 117 | * \brief Append this program block, replacing our current ROSE_INSTR_END. |
| 118 | * |
| 119 | * Any existing instruction that was jumping to end, now leads to the newly |
| 120 | * added block. |
| 121 | */ |
| 122 | void add_block(RoseProgram &&block); |
| 123 | |
| 124 | /** |
| 125 | * \brief Replace the instruction pointed to by the given iterator. |
| 126 | */ |
| 127 | template<class Iter> |
| 128 | void replace(Iter it, std::unique_ptr<RoseInstruction> ri) { |
| 129 | assert(!prog.empty()); |
| 130 | |
| 131 | const RoseInstruction *old_ptr = it->get(); |
| 132 | *it = move(ri); |
| 133 | update_targets(prog.begin(), prog.end(), old_ptr, it->get()); |
| 134 | } |
| 135 | }; |
| 136 | |
| 137 | bytecode_ptr<char> writeProgram(RoseEngineBlob &blob, |
| 138 | const RoseProgram &program); |
| 139 | |
| 140 | class RoseProgramHash { |
| 141 | public: |
| 142 | size_t operator()(const RoseProgram &program) const; |
| 143 | }; |
| 144 | |
| 145 | class RoseProgramEquivalence { |
| 146 | public: |
| 147 | bool operator()(const RoseProgram &prog1, const RoseProgram &prog2) const; |
| 148 | }; |
| 149 | |
| 150 | /** \brief Data only used during construction of various programs (literal, |
| 151 | * anchored, delay, etc). */ |
| 152 | struct ProgramBuild : noncopyable { |
| 153 | explicit ProgramBuild(u32 fMinLitOffset, size_t longLitThresh, |
| 154 | bool catchup) |
| 155 | : floatingMinLiteralMatchOffset(fMinLitOffset), |
| 156 | longLitLengthThreshold(longLitThresh), needs_catchup(catchup) { |
| 157 | } |
| 158 | |
| 159 | /** \brief Minimum offset of a match from the floating table. */ |
| 160 | const u32 floatingMinLiteralMatchOffset; |
| 161 | |
| 162 | /** \brief Long literal length threshold, used in streaming mode. */ |
| 163 | const size_t longLitLengthThreshold; |
| 164 | |
| 165 | /** \brief True if reports need CATCH_UP instructions to catch up suffixes, |
| 166 | * outfixes etc. */ |
| 167 | const bool needs_catchup; |
| 168 | |
| 169 | /** \brief Mapping from vertex to key, for vertices with a |
| 170 | * CHECK_NOT_HANDLED instruction. */ |
| 171 | std::unordered_map<RoseVertex, u32> handledKeys; |
| 172 | |
| 173 | /** \brief Mapping from Rose literal ID to anchored program index. */ |
| 174 | std::map<u32, u32> anchored_programs; |
| 175 | |
| 176 | /** \brief Mapping from Rose literal ID to delayed program index. */ |
| 177 | std::map<u32, u32> delay_programs; |
| 178 | |
| 179 | /** \brief Mapping from every vertex to the groups that must be on for that |
| 180 | * vertex to be reached. */ |
| 181 | std::unordered_map<RoseVertex, rose_group> vertex_group_map; |
| 182 | |
| 183 | /** \brief Global bitmap of groups that can be squashed. */ |
| 184 | rose_group squashable_groups = 0; |
| 185 | }; |
| 186 | |
| 187 | void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program); |
| 188 | void addSuffixesEodProgram(RoseProgram &program); |
| 189 | void addMatcherEodProgram(RoseProgram &program); |
| 190 | void addFlushCombinationProgram(RoseProgram &program); |
| 191 | |
| 192 | static constexpr u32 INVALID_QUEUE = ~0U; |
| 193 | |
| 194 | struct left_build_info { |
| 195 | // Constructor for an engine implementation. |
| 196 | left_build_info(u32 q, u32 l, u32 t, rose_group sm, |
| 197 | const std::vector<u8> &stops, u32 max_ql, u8 cm_count, |
| 198 | const CharReach &cm_cr); |
| 199 | |
| 200 | // Constructor for a lookaround implementation. |
| 201 | explicit left_build_info(const std::vector<std::vector<LookEntry>> &looks); |
| 202 | |
| 203 | u32 queue = INVALID_QUEUE; /* uniquely idents the left_build_info */ |
| 204 | u32 lag = 0; |
| 205 | u32 transient = 0; |
| 206 | rose_group squash_mask = ~rose_group{0}; |
| 207 | std::vector<u8> stopAlphabet; |
| 208 | u32 max_queuelen = 0; |
| 209 | u8 countingMiracleCount = 0; |
| 210 | CharReach countingMiracleReach; |
| 211 | u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */ |
| 212 | bool has_lookaround = false; |
| 213 | |
| 214 | // alternative implementation to the NFA |
| 215 | std::vector<std::vector<LookEntry>> lookaround; |
| 216 | }; |
| 217 | |
| 218 | /** |
| 219 | * \brief Provides a brief summary of properties of an NFA that has already been |
| 220 | * finalised and stored in the blob. |
| 221 | */ |
| 222 | struct engine_info { |
| 223 | engine_info(const NFA *nfa, bool trans); |
| 224 | |
| 225 | enum NFAEngineType type; |
| 226 | bool accepts_eod; |
| 227 | u32 stream_size; |
| 228 | u32 scratch_size; |
| 229 | u32 scratch_align; |
| 230 | bool transient; |
| 231 | }; |
| 232 | |
| 233 | /** |
| 234 | * \brief Consumes list of program blocks corresponding to different literals, |
| 235 | * checks them for duplicates and then concatenates them into one program. |
| 236 | * |
| 237 | * Note: if a block will squash groups, a CLEAR_WORK_DONE instruction is |
| 238 | * inserted to prevent the work_done flag being contaminated by early blocks. |
| 239 | */ |
| 240 | RoseProgram assembleProgramBlocks(std::vector<RoseProgram> &&blocks); |
| 241 | |
| 242 | RoseProgram makeLiteralProgram(const RoseBuildImpl &build, |
| 243 | const std::map<RoseVertex, left_build_info> &leftfix_info, |
| 244 | const std::map<suffix_id, u32> &suffixes, |
| 245 | const std::map<u32, engine_info> &engine_info_by_queue, |
| 246 | const std::unordered_map<RoseVertex, u32> &roleStateIndices, |
| 247 | ProgramBuild &prog_build, u32 lit_id, |
| 248 | const std::vector<RoseEdge> &lit_edges, |
| 249 | bool is_anchored_replay_program); |
| 250 | |
| 251 | RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, |
| 252 | ProgramBuild &prog_build, |
| 253 | const std::vector<u32> &lit_ids); |
| 254 | |
| 255 | RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, |
| 256 | ProgramBuild &prog_build, const RoseEdge &e, |
| 257 | const bool multiple_preds); |
| 258 | |
| 259 | RoseProgram makeReportProgram(const RoseBuildImpl &build, |
| 260 | bool needs_mpv_catchup, ReportID id); |
| 261 | |
| 262 | RoseProgram makeBoundaryProgram(const RoseBuildImpl &build, |
| 263 | const std::set<ReportID> &reports); |
| 264 | |
| 265 | struct TriggerInfo { |
| 266 | TriggerInfo(bool c, u32 q, u32 e) : cancel(c), queue(q), event(e) {} |
| 267 | bool cancel; |
| 268 | u32 queue; |
| 269 | u32 event; |
| 270 | |
| 271 | bool operator==(const TriggerInfo &b) const { |
| 272 | return cancel == b.cancel && queue == b.queue && event == b.event; |
| 273 | } |
| 274 | }; |
| 275 | |
| 276 | void addPredBlocks(std::map<u32, RoseProgram> &pred_blocks, u32 num_states, |
| 277 | RoseProgram &program); |
| 278 | |
| 279 | void applyFinalSpecialisation(RoseProgram &program); |
| 280 | |
| 281 | void recordLongLiterals(std::vector<ue2_case_string> &longLiterals, |
| 282 | const RoseProgram &program); |
| 283 | |
| 284 | void recordResources(RoseResources &resources, const RoseProgram &program); |
| 285 | |
| 286 | void addIncludedJumpProgram(RoseProgram &program, u32 child_offset, u8 squash); |
| 287 | } // namespace ue2 |
| 288 | |
| 289 | #endif // ROSE_BUILD_PROGRAM_H |
| 290 | |