1 | /* |
2 | * Copyright (c) 2015, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Boundary assertions (^, $, \\A, \\Z, \\z) |
31 | */ |
32 | |
33 | |
34 | #include "ComponentBoundary.h" |
35 | |
36 | #include "buildstate.h" |
37 | #include "parse_error.h" |
38 | #include "position.h" |
39 | #include "position_info.h" |
40 | #include "Parser.h" |
41 | #include "util/charreach.h" |
42 | #include "nfagraph/ng_builder.h" |
43 | |
44 | #include <cassert> |
45 | |
46 | using namespace std; |
47 | |
48 | namespace ue2 { |
49 | |
50 | ComponentBoundary::ComponentBoundary(enum Boundary bound) |
51 | : m_bound(bound), m_newline(GlushkovBuildState::POS_UNINITIALIZED) {} |
52 | |
53 | ComponentBoundary::~ComponentBoundary() { |
54 | } |
55 | |
56 | ComponentBoundary::ComponentBoundary(const ComponentBoundary &other) |
57 | : Component(other), m_bound(other.m_bound), m_newline(other.m_newline), |
58 | m_first(other.m_first), m_last(other.m_last) {} |
59 | |
60 | ComponentBoundary * ComponentBoundary::clone() const { |
61 | return new ComponentBoundary(*this); |
62 | } |
63 | |
64 | vector<PositionInfo> ComponentBoundary::first() const { |
65 | return m_first; |
66 | } |
67 | |
68 | vector<PositionInfo> ComponentBoundary::last() const { |
69 | return m_last; |
70 | } |
71 | |
72 | bool ComponentBoundary::empty() const { |
73 | return true; |
74 | } |
75 | |
76 | bool ComponentBoundary::repeatable() const { |
77 | return false; |
78 | } |
79 | |
80 | static |
81 | Position makeNewline(GlushkovBuildState &bs) { |
82 | NFABuilder &builder = bs.getBuilder(); |
83 | Position newline = builder.makePositions(1); |
84 | builder.addCharReach(newline, CharReach('\n')); |
85 | return newline; |
86 | } |
87 | |
88 | void ComponentBoundary::notePositions(GlushkovBuildState & bs) { |
89 | NFABuilder &builder = bs.getBuilder(); |
90 | const Position startState = builder.getStart(); |
91 | |
92 | switch (m_bound) { |
93 | case BEGIN_STRING: // beginning of data stream ('^') |
94 | { |
95 | PositionInfo epsilon(GlushkovBuildState::POS_EPSILON); |
96 | epsilon.flags = POS_FLAG_NOFLOAT; |
97 | m_first.push_back(epsilon); |
98 | |
99 | // We have the start vertex in firsts so that we can discourage |
100 | // the mid-pattern use of boundaries. |
101 | m_first.push_back(startState); |
102 | |
103 | break; |
104 | } |
105 | case BEGIN_LINE: // multiline anchor: beginning of stream or a newline |
106 | { |
107 | PositionInfo epsilon(GlushkovBuildState::POS_EPSILON); |
108 | epsilon.flags = POS_FLAG_NOFLOAT; |
109 | m_first.push_back(epsilon); |
110 | |
111 | // We have the start vertex in firsts so that we can discourage |
112 | // the mid-pattern use of boundaries. |
113 | m_first.push_back(startState); |
114 | |
115 | // Newline |
116 | m_newline = makeNewline(bs); |
117 | builder.setAssertFlag(m_newline, POS_FLAG_MULTILINE_START); |
118 | builder.setAssertFlag(m_newline, POS_FLAG_VIRTUAL_START); |
119 | PositionInfo nl(m_newline); |
120 | nl.flags = POS_FLAG_MUST_FLOAT | POS_FLAG_FIDDLE_ACCEPT; |
121 | m_first.push_back(nl); |
122 | m_last.push_back(nl); |
123 | recordPosBounds(m_newline, m_newline + 1); |
124 | break; |
125 | } |
126 | case END_STRING: // end of data stream ('\z') |
127 | { |
128 | PositionInfo epsilon(GlushkovBuildState::POS_EPSILON); |
129 | epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_NO_NL_EOD | |
130 | POS_FLAG_NO_NL_ACCEPT | POS_FLAG_ONLY_ENDS; |
131 | m_first.push_back(epsilon); |
132 | break; |
133 | } |
134 | case END_STRING_OPTIONAL_LF: // end of data with optional LF ('$') |
135 | { |
136 | PositionInfo epsilon(GlushkovBuildState::POS_EPSILON); |
137 | epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_WIRE_NL_EOD | |
138 | POS_FLAG_NO_NL_ACCEPT | POS_FLAG_ONLY_ENDS; |
139 | m_first.push_back(epsilon); |
140 | break; |
141 | } |
142 | case END_LINE: // multiline anchor: end of data or a newline |
143 | { |
144 | PositionInfo epsilon(GlushkovBuildState::POS_EPSILON); |
145 | epsilon.flags = POS_FLAG_WIRE_EOD | POS_FLAG_WIRE_NL_EOD | |
146 | POS_FLAG_WIRE_NL_ACCEPT | POS_FLAG_ONLY_ENDS; |
147 | m_first.push_back(epsilon); |
148 | break; |
149 | } |
150 | default: |
151 | // unsupported |
152 | assert(0); |
153 | break; |
154 | } |
155 | } |
156 | |
157 | void ComponentBoundary::buildFollowSet(GlushkovBuildState &, |
158 | const vector<PositionInfo> &) { |
159 | |
160 | } |
161 | |
162 | bool ComponentBoundary::checkEmbeddedStartAnchor(bool at_start) const { |
163 | if (at_start) { |
164 | return at_start; |
165 | } |
166 | |
167 | if (m_bound == BEGIN_STRING || m_bound == BEGIN_LINE) { |
168 | throw ParseError("Embedded start anchors not supported." ); |
169 | } |
170 | |
171 | return at_start; |
172 | } |
173 | |
174 | bool ComponentBoundary::checkEmbeddedEndAnchor(bool at_end) const { |
175 | if (at_end) { |
176 | return at_end; |
177 | } |
178 | |
179 | if (m_bound != BEGIN_STRING && m_bound != BEGIN_LINE) { |
180 | throw ParseError("Embedded end anchors not supported." ); |
181 | } |
182 | |
183 | return at_end; |
184 | } |
185 | |
186 | } // namespace |
187 | |