1 | /* |
2 | * Copyright (c) 2015, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Repeats ('*', '+', '?', '{M,N}', etc) |
31 | */ |
32 | |
33 | #ifndef RE_COMPONENTREPEAT_H |
34 | #define RE_COMPONENTREPEAT_H |
35 | |
36 | #include "Component.h" |
37 | #include "position.h" |
38 | #include "ue2common.h" |
39 | |
40 | #include <memory> |
41 | #include <utility> |
42 | |
43 | namespace ue2 { |
44 | |
45 | /** |
46 | * \brief Encapsulates a repeat of a subexpression ('*', '+', '?', '{M,N}', |
47 | * etc). |
48 | * |
49 | * ASCII Art Time: |
50 | * |
51 | * Our standard representation of standard repeats. Other constructions (fan-in |
52 | * vs fan-out) would also be possible and equivalent for our purposes. |
53 | * |
54 | * {n,m} |
55 | * |
56 | * S->M->M->M->O->O->O->T |
57 | * | ^ ^ ^ |
58 | * | | | | |
59 | * \-----------/ |
60 | * |
61 | * {0,m} |
62 | * |
63 | * /-----------\ |
64 | * | | |
65 | * | V |
66 | * S->O->O->O->T |
67 | * | ^ ^ ^ |
68 | * | | | | |
69 | * \--------/ |
70 | * |
71 | */ |
72 | class ComponentRepeat : public Component { |
73 | friend class ConstructLiteralVisitor; |
74 | friend class DumpVisitor; |
75 | friend class PrintVisitor; |
76 | friend class SimplifyVisitor; |
77 | public: |
78 | /** \brief Value representing no maximum bound. */ |
79 | static constexpr u32 NoLimit = 0xffffffff; |
80 | |
81 | /** \brief Type of this repeat, characterising its |
82 | * greediness/possessiveness. */ |
83 | enum RepeatType { |
84 | /** Minimising repeat, like 'a*?'. */ |
85 | REPEAT_NONGREEDY, |
86 | /** Maximising repeat, like 'a*'. This is the default in PCRE. */ |
87 | REPEAT_GREEDY, |
88 | /** Possessive, maximising repeat, like 'a*+'. Possessive repeats are |
89 | * only currently supported in prefiltering mode, where we treat them |
90 | * the same way we treat normal greedy repeats. */ |
91 | REPEAT_POSSESSIVE, |
92 | }; |
93 | |
94 | ComponentRepeat(std::unique_ptr<Component> sub_comp, u32 min, u32 max, |
95 | RepeatType t); |
96 | ~ComponentRepeat() override; |
97 | ComponentRepeat *clone() const override; |
98 | Component *accept(ComponentVisitor &v) override; |
99 | void accept(ConstComponentVisitor &v) const override; |
100 | |
101 | std::vector<PositionInfo> first() const override; |
102 | std::vector<PositionInfo> last() const override; |
103 | bool empty() const override; |
104 | bool repeatable() const override; |
105 | bool vacuous_everywhere() const override; |
106 | void notePositions(GlushkovBuildState &bs) override; |
107 | void buildFollowSet(GlushkovBuildState &bs, |
108 | const std::vector<PositionInfo> &lastPos) override; |
109 | bool checkEmbeddedStartAnchor(bool at_start) const override; |
110 | bool checkEmbeddedEndAnchor(bool at_end) const override; |
111 | |
112 | void optimise(bool connected_to_sds) override; |
113 | |
114 | virtual std::pair<u32, u32> getBounds() const { |
115 | return std::make_pair(m_min, m_max); |
116 | } |
117 | |
118 | /** \brief From declared behaviour (not taking into account the |
119 | * sub-component). */ |
120 | enum RepeatType type; |
121 | |
122 | protected: |
123 | void postSubNotePositionHook(); |
124 | void wireRepeats(GlushkovBuildState &bs); |
125 | |
126 | std::unique_ptr<Component> sub_comp; |
127 | u32 m_min; |
128 | u32 m_max; |
129 | |
130 | std::vector<std::vector<PositionInfo> > m_firsts; |
131 | std::vector<std::vector<PositionInfo> > m_lasts; |
132 | Position posFirst; |
133 | Position posLast; |
134 | |
135 | ComponentRepeat(const ComponentRepeat &other); |
136 | }; |
137 | |
138 | std::unique_ptr<ComponentRepeat> |
139 | makeComponentRepeat(std::unique_ptr<Component> sub_comp, u32 min, u32 max, |
140 | ComponentRepeat::RepeatType t); |
141 | |
142 | } // namespace ue2 |
143 | |
144 | #endif // _RE_COMPONENTREPEAT_H_ |
145 | |