1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Bounded repeat analysis. |
31 | */ |
32 | |
33 | #ifndef NG_REPEAT_H |
34 | #define NG_REPEAT_H |
35 | |
36 | #include "ng_holder.h" |
37 | #include "ue2common.h" |
38 | #include "nfa/repeat_internal.h" |
39 | #include "util/depth.h" |
40 | #include "util/flat_containers.h" |
41 | |
42 | #include <map> |
43 | #include <vector> |
44 | |
45 | namespace ue2 { |
46 | |
47 | class NGHolder; |
48 | class ReportManager; |
49 | struct Grey; |
50 | |
51 | /** |
52 | * \brief Everything you need to know about a bounded repeat that we have |
53 | * transformed. |
54 | */ |
55 | struct BoundedRepeatData { |
56 | BoundedRepeatData(enum RepeatType type_in, const depth &a, const depth &z, |
57 | u32 minPeriod_in, NFAVertex cyc, NFAVertex pos, |
58 | const std::vector<NFAVertex> &tug_in) |
59 | : type(type_in), repeatMin(a), repeatMax(z), minPeriod(minPeriod_in), |
60 | cyclic(cyc), pos_trigger(pos), tug_triggers(tug_in) {} |
61 | |
62 | BoundedRepeatData() = delete; // no default construction allowed. |
63 | |
64 | enum RepeatType type; //!< selected type based on bounds and structure |
65 | depth repeatMin; //!< minimum repeat bound |
66 | depth repeatMax; //!< maximum repeat bound |
67 | u32 minPeriod; //!< min trigger period |
68 | NFAVertex cyclic; //!< cyclic vertex representing repeat in graph |
69 | NFAVertex pos_trigger; //!< positive trigger vertex |
70 | std::vector<NFAVertex> tug_triggers; //!< list of tug trigger vertices |
71 | }; |
72 | |
73 | /** |
74 | * \brief Run the bounded repeat analysis and transform the graph where |
75 | * bounded repeats are found. |
76 | * |
77 | * \param h |
78 | * Graph to operate on. |
79 | * \param rm |
80 | * ReportManager, or nullptr if the graph's reports are internal (e.g. for |
81 | * Rose use). |
82 | * \param fixed_depth_tops |
83 | * Map of top to possible trigger depth. |
84 | * \param triggers |
85 | * Map of top to the vector of triggers (i.e. preceding literals/masks) |
86 | * \param repeats |
87 | * Repeat info is filled in for caller here. |
88 | * \param streaming |
89 | * True if we're in streaming mode. |
90 | * \param simple_model_selection |
91 | * Don't perform complex (and slow) model selection analysis, e.g. |
92 | * determining whether the repeat is sole entry. |
93 | * \param grey |
94 | * Grey box object. |
95 | * \param reformed_start_ds |
96 | * If supplied, this will be set to true if the graph was optimised for a |
97 | * leading first repeat, resulting in the output graph having no self-loop |
98 | * on startDs. |
99 | */ |
100 | void analyseRepeats(NGHolder &h, const ReportManager *rm, |
101 | const std::map<u32, u32> &fixed_depth_tops, |
102 | const std::map<u32, std::vector<std::vector<CharReach>>> &triggers, |
103 | std::vector<BoundedRepeatData> *repeats, bool streaming, |
104 | bool simple_model_selection, const Grey &grey, |
105 | bool *reformed_start_ds = nullptr); |
106 | |
107 | /** |
108 | * \brief Information on repeats in a holder, returned from \ref findRepeats. |
109 | */ |
110 | struct GraphRepeatInfo { |
111 | depth repeatMin; /**< minimum bound */ |
112 | depth repeatMax; /**< effective max bound */ |
113 | std::vector<NFAVertex> vertices; /**< vertices involved in repeat */ |
114 | }; |
115 | |
116 | /** |
117 | * \brief Provides information on repeats in the graph. |
118 | */ |
119 | void findRepeats(const NGHolder &h, u32 minRepeatVertices, |
120 | std::vector<GraphRepeatInfo> *repeats_out); |
121 | |
122 | struct PureRepeat { |
123 | CharReach reach; |
124 | DepthMinMax bounds; |
125 | flat_set<ReportID> reports; |
126 | |
127 | bool operator==(const PureRepeat &a) const { |
128 | return reach == a.reach && bounds == a.bounds && reports == a.reports; |
129 | } |
130 | |
131 | bool operator!=(const PureRepeat &a) const { return !(*this == a); } |
132 | |
133 | bool operator<(const PureRepeat &a) const { |
134 | if (reach != a.reach) { |
135 | return reach < a.reach; |
136 | } |
137 | if (bounds != a.bounds) { |
138 | return bounds < a.bounds; |
139 | } |
140 | return reports < a.reports; |
141 | } |
142 | }; |
143 | |
144 | /** |
145 | * \brief Returns true and fills the given PureRepeat structure if the graph is |
146 | * wholly a repeat over a single character class. |
147 | * |
148 | * For example, something like: |
149 | * |
150 | * /^[a-z]{10,20}/ |
151 | * |
152 | * - Note: graph must not use SDS or EOD. |
153 | * - Note: \p PureRepeat::bounds::max is set to infinity if there is no upper |
154 | * bound on the repeat. |
155 | */ |
156 | bool isPureRepeat(const NGHolder &h, PureRepeat &r); |
157 | |
158 | } // namespace ue2 |
159 | |
160 | #endif // NG_REPEAT_H |
161 | |