| 1 | /* |
| 2 | * Copyright (c) 2015-2017, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | /** \file |
| 30 | * \brief Bounded repeat analysis. |
| 31 | */ |
| 32 | |
| 33 | #ifndef NG_REPEAT_H |
| 34 | #define NG_REPEAT_H |
| 35 | |
| 36 | #include "ng_holder.h" |
| 37 | #include "ue2common.h" |
| 38 | #include "nfa/repeat_internal.h" |
| 39 | #include "util/depth.h" |
| 40 | #include "util/flat_containers.h" |
| 41 | |
| 42 | #include <map> |
| 43 | #include <vector> |
| 44 | |
| 45 | namespace ue2 { |
| 46 | |
| 47 | class NGHolder; |
| 48 | class ReportManager; |
| 49 | struct Grey; |
| 50 | |
| 51 | /** |
| 52 | * \brief Everything you need to know about a bounded repeat that we have |
| 53 | * transformed. |
| 54 | */ |
| 55 | struct BoundedRepeatData { |
| 56 | BoundedRepeatData(enum RepeatType type_in, const depth &a, const depth &z, |
| 57 | u32 minPeriod_in, NFAVertex cyc, NFAVertex pos, |
| 58 | const std::vector<NFAVertex> &tug_in) |
| 59 | : type(type_in), repeatMin(a), repeatMax(z), minPeriod(minPeriod_in), |
| 60 | cyclic(cyc), pos_trigger(pos), tug_triggers(tug_in) {} |
| 61 | |
| 62 | BoundedRepeatData() = delete; // no default construction allowed. |
| 63 | |
| 64 | enum RepeatType type; //!< selected type based on bounds and structure |
| 65 | depth repeatMin; //!< minimum repeat bound |
| 66 | depth repeatMax; //!< maximum repeat bound |
| 67 | u32 minPeriod; //!< min trigger period |
| 68 | NFAVertex cyclic; //!< cyclic vertex representing repeat in graph |
| 69 | NFAVertex pos_trigger; //!< positive trigger vertex |
| 70 | std::vector<NFAVertex> tug_triggers; //!< list of tug trigger vertices |
| 71 | }; |
| 72 | |
| 73 | /** |
| 74 | * \brief Run the bounded repeat analysis and transform the graph where |
| 75 | * bounded repeats are found. |
| 76 | * |
| 77 | * \param h |
| 78 | * Graph to operate on. |
| 79 | * \param rm |
| 80 | * ReportManager, or nullptr if the graph's reports are internal (e.g. for |
| 81 | * Rose use). |
| 82 | * \param fixed_depth_tops |
| 83 | * Map of top to possible trigger depth. |
| 84 | * \param triggers |
| 85 | * Map of top to the vector of triggers (i.e. preceding literals/masks) |
| 86 | * \param repeats |
| 87 | * Repeat info is filled in for caller here. |
| 88 | * \param streaming |
| 89 | * True if we're in streaming mode. |
| 90 | * \param simple_model_selection |
| 91 | * Don't perform complex (and slow) model selection analysis, e.g. |
| 92 | * determining whether the repeat is sole entry. |
| 93 | * \param grey |
| 94 | * Grey box object. |
| 95 | * \param reformed_start_ds |
| 96 | * If supplied, this will be set to true if the graph was optimised for a |
| 97 | * leading first repeat, resulting in the output graph having no self-loop |
| 98 | * on startDs. |
| 99 | */ |
| 100 | void analyseRepeats(NGHolder &h, const ReportManager *rm, |
| 101 | const std::map<u32, u32> &fixed_depth_tops, |
| 102 | const std::map<u32, std::vector<std::vector<CharReach>>> &triggers, |
| 103 | std::vector<BoundedRepeatData> *repeats, bool streaming, |
| 104 | bool simple_model_selection, const Grey &grey, |
| 105 | bool *reformed_start_ds = nullptr); |
| 106 | |
| 107 | /** |
| 108 | * \brief Information on repeats in a holder, returned from \ref findRepeats. |
| 109 | */ |
| 110 | struct GraphRepeatInfo { |
| 111 | depth repeatMin; /**< minimum bound */ |
| 112 | depth repeatMax; /**< effective max bound */ |
| 113 | std::vector<NFAVertex> vertices; /**< vertices involved in repeat */ |
| 114 | }; |
| 115 | |
| 116 | /** |
| 117 | * \brief Provides information on repeats in the graph. |
| 118 | */ |
| 119 | void findRepeats(const NGHolder &h, u32 minRepeatVertices, |
| 120 | std::vector<GraphRepeatInfo> *repeats_out); |
| 121 | |
| 122 | struct PureRepeat { |
| 123 | CharReach reach; |
| 124 | DepthMinMax bounds; |
| 125 | flat_set<ReportID> reports; |
| 126 | |
| 127 | bool operator==(const PureRepeat &a) const { |
| 128 | return reach == a.reach && bounds == a.bounds && reports == a.reports; |
| 129 | } |
| 130 | |
| 131 | bool operator!=(const PureRepeat &a) const { return !(*this == a); } |
| 132 | |
| 133 | bool operator<(const PureRepeat &a) const { |
| 134 | if (reach != a.reach) { |
| 135 | return reach < a.reach; |
| 136 | } |
| 137 | if (bounds != a.bounds) { |
| 138 | return bounds < a.bounds; |
| 139 | } |
| 140 | return reports < a.reports; |
| 141 | } |
| 142 | }; |
| 143 | |
| 144 | /** |
| 145 | * \brief Returns true and fills the given PureRepeat structure if the graph is |
| 146 | * wholly a repeat over a single character class. |
| 147 | * |
| 148 | * For example, something like: |
| 149 | * |
| 150 | * /^[a-z]{10,20}/ |
| 151 | * |
| 152 | * - Note: graph must not use SDS or EOD. |
| 153 | * - Note: \p PureRepeat::bounds::max is set to infinity if there is no upper |
| 154 | * bound on the repeat. |
| 155 | */ |
| 156 | bool isPureRepeat(const NGHolder &h, PureRepeat &r); |
| 157 | |
| 158 | } // namespace ue2 |
| 159 | |
| 160 | #endif // NG_REPEAT_H |
| 161 | |