1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Bounded repeat analysis.
31 */
32
33#ifndef NG_REPEAT_H
34#define NG_REPEAT_H
35
36#include "ng_holder.h"
37#include "ue2common.h"
38#include "nfa/repeat_internal.h"
39#include "util/depth.h"
40#include "util/flat_containers.h"
41
42#include <map>
43#include <vector>
44
45namespace ue2 {
46
47class NGHolder;
48class ReportManager;
49struct Grey;
50
51/**
52 * \brief Everything you need to know about a bounded repeat that we have
53 * transformed.
54 */
55struct BoundedRepeatData {
56 BoundedRepeatData(enum RepeatType type_in, const depth &a, const depth &z,
57 u32 minPeriod_in, NFAVertex cyc, NFAVertex pos,
58 const std::vector<NFAVertex> &tug_in)
59 : type(type_in), repeatMin(a), repeatMax(z), minPeriod(minPeriod_in),
60 cyclic(cyc), pos_trigger(pos), tug_triggers(tug_in) {}
61
62 BoundedRepeatData() = delete; // no default construction allowed.
63
64 enum RepeatType type; //!< selected type based on bounds and structure
65 depth repeatMin; //!< minimum repeat bound
66 depth repeatMax; //!< maximum repeat bound
67 u32 minPeriod; //!< min trigger period
68 NFAVertex cyclic; //!< cyclic vertex representing repeat in graph
69 NFAVertex pos_trigger; //!< positive trigger vertex
70 std::vector<NFAVertex> tug_triggers; //!< list of tug trigger vertices
71};
72
73/**
74 * \brief Run the bounded repeat analysis and transform the graph where
75 * bounded repeats are found.
76 *
77 * \param h
78 * Graph to operate on.
79 * \param rm
80 * ReportManager, or nullptr if the graph's reports are internal (e.g. for
81 * Rose use).
82 * \param fixed_depth_tops
83 * Map of top to possible trigger depth.
84 * \param triggers
85 * Map of top to the vector of triggers (i.e. preceding literals/masks)
86 * \param repeats
87 * Repeat info is filled in for caller here.
88 * \param streaming
89 * True if we're in streaming mode.
90 * \param simple_model_selection
91 * Don't perform complex (and slow) model selection analysis, e.g.
92 * determining whether the repeat is sole entry.
93 * \param grey
94 * Grey box object.
95 * \param reformed_start_ds
96 * If supplied, this will be set to true if the graph was optimised for a
97 * leading first repeat, resulting in the output graph having no self-loop
98 * on startDs.
99 */
100void analyseRepeats(NGHolder &h, const ReportManager *rm,
101 const std::map<u32, u32> &fixed_depth_tops,
102 const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
103 std::vector<BoundedRepeatData> *repeats, bool streaming,
104 bool simple_model_selection, const Grey &grey,
105 bool *reformed_start_ds = nullptr);
106
107/**
108 * \brief Information on repeats in a holder, returned from \ref findRepeats.
109 */
110struct GraphRepeatInfo {
111 depth repeatMin; /**< minimum bound */
112 depth repeatMax; /**< effective max bound */
113 std::vector<NFAVertex> vertices; /**< vertices involved in repeat */
114};
115
116/**
117 * \brief Provides information on repeats in the graph.
118 */
119void findRepeats(const NGHolder &h, u32 minRepeatVertices,
120 std::vector<GraphRepeatInfo> *repeats_out);
121
122struct PureRepeat {
123 CharReach reach;
124 DepthMinMax bounds;
125 flat_set<ReportID> reports;
126
127 bool operator==(const PureRepeat &a) const {
128 return reach == a.reach && bounds == a.bounds && reports == a.reports;
129 }
130
131 bool operator!=(const PureRepeat &a) const { return !(*this == a); }
132
133 bool operator<(const PureRepeat &a) const {
134 if (reach != a.reach) {
135 return reach < a.reach;
136 }
137 if (bounds != a.bounds) {
138 return bounds < a.bounds;
139 }
140 return reports < a.reports;
141 }
142};
143
144/**
145 * \brief Returns true and fills the given PureRepeat structure if the graph is
146 * wholly a repeat over a single character class.
147 *
148 * For example, something like:
149 *
150 * /^[a-z]{10,20}/
151 *
152 * - Note: graph must not use SDS or EOD.
153 * - Note: \p PureRepeat::bounds::max is set to infinity if there is no upper
154 * bound on the repeat.
155 */
156bool isPureRepeat(const NGHolder &h, PureRepeat &r);
157
158} // namespace ue2
159
160#endif // NG_REPEAT_H
161