1 | /* |
2 | * Copyright (c) 2015, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Base class for all components. |
31 | */ |
32 | |
33 | #ifndef _RE_COMPONENT_H_ |
34 | #define _RE_COMPONENT_H_ |
35 | |
36 | #include "ComponentVisitor.h" |
37 | #include "ConstComponentVisitor.h" |
38 | |
39 | #include "position.h" |
40 | #include "ue2common.h" |
41 | |
42 | #include <set> |
43 | #include <string> |
44 | #include <vector> |
45 | |
46 | namespace ue2 { |
47 | |
48 | class GlushkovBuildState; |
49 | class PositionInfo; |
50 | |
51 | enum EmptyPathType { |
52 | NOT_EMPTY, /**< component must consume characters */ |
53 | EPS_ONLY_PATHS, /**< eps path with no overhanging asserts */ |
54 | BOUNDARY_PATHS /**< eps paths some with overhanging asserts */ |
55 | }; |
56 | |
57 | /** \brief Base class for regular expression parse tree components. */ |
58 | class Component { |
59 | friend class DumpVisitor; |
60 | public: |
61 | /** \brief Constructor. */ |
62 | Component(); |
63 | |
64 | /** \brief Destructor. */ |
65 | virtual ~Component(); |
66 | |
67 | /** \brief Returns a newly-allocated deep copy of this component. */ |
68 | virtual Component *clone() const = 0; |
69 | |
70 | /** \brief Apply the given visitor functor. */ |
71 | virtual Component *accept(ComponentVisitor &v) = 0; |
72 | |
73 | /** \brief Apply the given const visitor functor. */ |
74 | virtual void accept(ConstComponentVisitor &v) const = 0; |
75 | |
76 | /** \brief Glushkov construction First() function. |
77 | * \return set of initial positions in this component. */ |
78 | virtual std::vector<PositionInfo> first() const = 0; |
79 | |
80 | /** \brief Glushkov construction Last() function. |
81 | * \return set of final positions in this component. */ |
82 | virtual std::vector<PositionInfo> last() const = 0; |
83 | |
84 | /** \brief Glushkov construction Empty() function. |
85 | * \return true iff the component accepts epsilon. |
86 | * |
87 | * Note: ^, $, etc are considered empty. */ |
88 | virtual bool empty() const = 0; |
89 | |
90 | /** \brief True iff epsilon can pass through the component. |
91 | * |
92 | * Note: ^, $, etc are not vacuous everywhere. */ |
93 | virtual bool vacuous_everywhere(void) const; |
94 | |
95 | /** \brief True iff the component is repeatable on its own, without being |
96 | * encapsulated in a sequence first. |
97 | * |
98 | * This is true for most components, but not for repeats, anchors and word |
99 | * boundaries. */ |
100 | virtual bool repeatable() const; |
101 | |
102 | /** \brief Optimisation pass on the component tree. |
103 | * |
104 | * Called before \ref notePositions. May modify to the component tree. |
105 | * Assumes no start of match information is required. |
106 | */ |
107 | virtual void optimise(bool connected_to_sds); |
108 | |
109 | /** \brief Informs the Glushkov build process of the positions used by this |
110 | * component. */ |
111 | virtual void notePositions(GlushkovBuildState &bs) = 0; |
112 | |
113 | /** \brief Glushkov construction Follow() function. |
114 | * |
115 | * Constructs (in \a bs) the set of positions in this component reachable |
116 | * from the positions in \a lastPos. |
117 | * |
118 | * \throw ParseError on failure |
119 | */ |
120 | virtual void buildFollowSet(GlushkovBuildState &bs, |
121 | const std::vector<PositionInfo> &lastPos) = 0; |
122 | |
123 | /** \brief Return value is used for chaining, throws if finds embedded |
124 | * anchor. */ |
125 | virtual bool checkEmbeddedStartAnchor(bool at_start) const; |
126 | |
127 | /* \brief Return value is used for chaining, throws if finds embedded |
128 | * anchor. */ |
129 | virtual bool checkEmbeddedEndAnchor(bool at_end) const; |
130 | |
131 | protected: |
132 | /** \brief Called during \ref notePositions. */ |
133 | void recordPosBounds(u32 b, u32 e); |
134 | |
135 | u32 pos_begin; |
136 | u32 pos_end; |
137 | |
138 | // Protected copy ctor. Use clone instead. |
139 | Component(const Component &other) |
140 | : pos_begin(other.pos_begin), pos_end(other.pos_end) {} |
141 | }; |
142 | |
143 | } // namespace ue2 |
144 | |
145 | #endif |
146 | |