1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /** \file |
30 | * \brief Resolve special assert vertices. |
31 | * |
32 | * The assert resolution algorithm proceeds by iterating over those edges with |
33 | * assertion flags, considering source and target vertices of each edge. If a |
34 | * vertex has a superset of the reachability demanded by the assertion on the |
35 | * edge, it is split into alternatives providing the word and non-word paths |
36 | * through that vertex. |
37 | * |
38 | * A great deal of the complexity in the resolveAsserts pass is devoted to |
39 | * handling these assertions when the UCP flag is specified (meaning \\w and \\W |
40 | * are implemented with Unicode properties, rather than their ASCII |
41 | * interpretation) and the prefiltering flag is also used. Complete, |
42 | * non-prefiltering UCP support is not available yet. |
43 | */ |
44 | #include "ng_asserts.h" |
45 | |
46 | #include "ng.h" |
47 | #include "ng_prune.h" |
48 | #include "ng_redundancy.h" |
49 | #include "ng_util.h" |
50 | #include "compiler/compiler.h" |
51 | #include "parser/position.h" // for POS flags |
52 | #include "util/bitutils.h" // for findAndClearLSB_32 |
53 | #include "util/boundary_reports.h" |
54 | #include "util/container.h" |
55 | #include "util/compile_context.h" |
56 | #include "util/compile_error.h" |
57 | #include "util/graph_range.h" |
58 | #include "util/report_manager.h" |
59 | #include "util/unicode_def.h" |
60 | |
61 | #include <queue> |
62 | |
63 | using namespace std; |
64 | |
65 | namespace ue2 { |
66 | |
67 | /** \brief Hard limit on the maximum number of vertices we'll clone before we |
68 | * throw up our hands and report 'Pattern too large.' */ |
69 | static const size_t MAX_CLONED_VERTICES = 2048; |
70 | |
71 | /** \brief The definition of \\w, since we use it everywhere in here. */ |
72 | static const CharReach CHARREACH_WORD(CharReach('a', 'z') | |
73 | CharReach('A', 'Z') | CharReach('0', '9') | CharReach('_')); |
74 | |
75 | /** \brief \\W is the inverse of \\w */ |
76 | static const CharReach CHARREACH_NONWORD(~CHARREACH_WORD); |
77 | |
78 | /** \brief Prefiltering definition of \\w for UCP mode. |
79 | * |
80 | * Includes all high bytes as to capture all non-ASCII, however depending on |
81 | * direction only continuers or starters are strictly required - as the input |
82 | * is well-formed, this laxness will not cost us. */ |
83 | static const CharReach CHARREACH_WORD_UCP_PRE(CHARREACH_WORD |
84 | | CharReach(128, 255)); |
85 | |
86 | /** \brief Prefiltering definition of \\W for UCP Mode. |
87 | * |
88 | * (non-word already includes high bytes) */ |
89 | static const CharReach CHARREACH_NONWORD_UCP_PRE(CHARREACH_NONWORD); |
90 | |
91 | /** \brief Find all the edges with assertion flags. */ |
92 | static |
93 | vector<NFAEdge> getAsserts(const NGHolder &g) { |
94 | vector<NFAEdge> out; |
95 | for (const auto &e : edges_range(g)) { |
96 | if (g[e].assert_flags) { |
97 | out.push_back(e); |
98 | } |
99 | } |
100 | return out; |
101 | } |
102 | |
103 | static |
104 | void addToSplit(const NGHolder &g, NFAVertex v, map<u32, NFAVertex> *to_split) { |
105 | DEBUG_PRINTF("%zu needs splitting\n" , g[v].index); |
106 | to_split->emplace(g[v].index, v); |
107 | } |
108 | |
109 | /** \brief Find vertices that need to be split due to an assertion edge. |
110 | * |
111 | * A vertex needs to be split if has an edge to/from it with an assert with a |
112 | * restriction on the relevant end. */ |
113 | static |
114 | void findSplitters(const NGHolder &g, const vector<NFAEdge> &asserts, |
115 | map<u32, NFAVertex> *to_split, |
116 | map<u32, NFAVertex> *to_split_ucp) { |
117 | for (const auto &e : asserts) { |
118 | NFAVertex u = source(e, g); |
119 | NFAVertex v = target(e, g); |
120 | u32 flags = g[e].assert_flags; |
121 | assert(flags); |
122 | |
123 | const CharReach &u_cr = g[u].char_reach; |
124 | const CharReach &v_cr = g[v].char_reach; |
125 | |
126 | bool ucp_assert = flags & UCP_ASSERT_FLAGS; |
127 | bool normal_assert = flags & NON_UCP_ASSERT_FLAGS; |
128 | /* In reality, an expression can only be entirely ucp or not ucp */ |
129 | assert(ucp_assert != normal_assert); |
130 | |
131 | if (normal_assert) { |
132 | /* assume any flag results in us have to split if the vertex is not |
133 | * a subset of word or completely disjoint from it. We could be more |
134 | * nuanced if flags is a disjunction of multiple assertions. */ |
135 | if (!u_cr.isSubsetOf(CHARREACH_WORD) |
136 | && !u_cr.isSubsetOf(CHARREACH_NONWORD) |
137 | && u != g.start) { /* start is always considered a nonword */ |
138 | addToSplit(g, u, to_split); |
139 | } |
140 | |
141 | if (!v_cr.isSubsetOf(CHARREACH_WORD) |
142 | && !v_cr.isSubsetOf(CHARREACH_NONWORD) |
143 | && v != g.accept /* accept require special handling, done on a |
144 | * per edge basis in resolve asserts |
145 | */ |
146 | && v != g.acceptEod) { /* eod is always considered a nonword */ |
147 | addToSplit(g, v, to_split); |
148 | } |
149 | } |
150 | |
151 | if (ucp_assert) { |
152 | /* note: the ucp prefilter crs overlap - requires a bit more care */ |
153 | if (u == g.start) { /* start never needs to be split, |
154 | * treat nonword */ |
155 | } else if (flags & POS_FLAG_ASSERT_WORD_TO_ANY_UCP) { |
156 | if (!u_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE) |
157 | && !u_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) { |
158 | addToSplit(g, u, to_split_ucp); |
159 | } |
160 | } else { |
161 | assert(flags & POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP); |
162 | if (!u_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE) |
163 | && !u_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) { |
164 | addToSplit(g, u, to_split_ucp); |
165 | } |
166 | } |
167 | |
168 | if (v == g.acceptEod /* eod is always considered a nonword */ |
169 | || v == g.accept) { /* accept require special handling, done on |
170 | * a per edge basis in resolve asserts */ |
171 | } else if (flags & POS_FLAG_ASSERT_ANY_TO_WORD_UCP) { |
172 | if (!v_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE) |
173 | && !v_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) { |
174 | addToSplit(g, v, to_split_ucp); |
175 | } |
176 | } else { |
177 | assert(flags & POS_FLAG_ASSERT_ANY_TO_NONWORD_UCP); |
178 | if (!v_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE) |
179 | && !v_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) { |
180 | addToSplit(g, v, to_split_ucp); |
181 | } |
182 | } |
183 | } |
184 | } |
185 | } |
186 | |
187 | static |
188 | void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, |
189 | NFAVertex v, s32 adj) { |
190 | // Don't try and set the report ID of a special vertex. |
191 | assert(!is_special(v, g)); |
192 | |
193 | // If there's a report set already, we're replacing it. |
194 | g[v].reports.clear(); |
195 | |
196 | Report ir = rm.getBasicInternalReport(expr, adj); |
197 | |
198 | g[v].reports.insert(rm.getInternalId(ir)); |
199 | DEBUG_PRINTF("set report id for vertex %zu, adj %d\n" , g[v].index, adj); |
200 | } |
201 | |
202 | static |
203 | NFAVertex makeClone(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, |
204 | NFAVertex v, const CharReach &cr_mask) { |
205 | NFAVertex clone = clone_vertex(g, v); |
206 | g[clone].char_reach &= cr_mask; |
207 | clone_out_edges(g, v, clone); |
208 | clone_in_edges(g, v, clone); |
209 | |
210 | if (v == g.startDs) { |
211 | if (expr.utf8) { |
212 | g[clone].char_reach &= ~UTF_START_CR; |
213 | } |
214 | |
215 | DEBUG_PRINTF("marked as virt\n" ); |
216 | g[clone].assert_flags = POS_FLAG_VIRTUAL_START; |
217 | |
218 | setReportId(rm, g, expr, clone, 0); |
219 | } |
220 | |
221 | return clone; |
222 | } |
223 | |
224 | static |
225 | void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, |
226 | NFAVertex v, bool ucp) { |
227 | assert(v != g.start); |
228 | assert(v != g.accept); |
229 | assert(v != g.acceptEod); |
230 | DEBUG_PRINTF("partitioning vertex %zu ucp:%d\n" , g[v].index, (int)ucp); |
231 | |
232 | CharReach cr_word = ucp ? CHARREACH_WORD_UCP_PRE : CHARREACH_WORD; |
233 | CharReach cr_nonword = ucp ? CHARREACH_NONWORD_UCP_PRE : CHARREACH_NONWORD; |
234 | |
235 | auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; }; |
236 | |
237 | // Split v into word/nonword vertices with only asserting out-edges. |
238 | NFAVertex w_out = makeClone(rm, g, expr, v, cr_word); |
239 | NFAVertex nw_out = makeClone(rm, g, expr, v, cr_nonword); |
240 | remove_out_edge_if(w_out, has_no_assert, g); |
241 | remove_out_edge_if(nw_out, has_no_assert, g); |
242 | |
243 | // Split v into word/nonword vertices with only asserting in-edges. |
244 | NFAVertex w_in = makeClone(rm, g, expr, v, cr_word); |
245 | NFAVertex nw_in = makeClone(rm, g, expr, v, cr_nonword); |
246 | remove_in_edge_if(w_in, has_no_assert, g); |
247 | remove_in_edge_if(nw_in, has_no_assert, g); |
248 | |
249 | // Prune edges with asserts from original v. |
250 | auto has_assert = [&g](const NFAEdge &e) { return g[e].assert_flags; }; |
251 | remove_in_edge_if(v, has_assert, g); |
252 | remove_out_edge_if(v, has_assert, g); |
253 | } |
254 | |
255 | static |
256 | void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr, |
257 | set<NFAEdge> *dead) { |
258 | for (const auto &e : edges_range(g)) { |
259 | u32 flags = g[e].assert_flags; |
260 | if (!flags) { |
261 | continue; |
262 | } |
263 | |
264 | NFAVertex u = source(e, g); |
265 | NFAVertex v = target(e, g); |
266 | |
267 | assert(u != g.startDs); |
268 | |
269 | const CharReach &u_cr = g[u].char_reach; |
270 | const CharReach &v_cr = g[v].char_reach; |
271 | |
272 | bool impassable = true; |
273 | bool ucp = flags & UCP_ASSERT_FLAGS; |
274 | DEBUG_PRINTF("resolving edge %zu->%zu (flags=0x%x, ucp=%d)\n" , |
275 | g[u].index, g[v].index, flags, (int)ucp); |
276 | while (flags && impassable) { |
277 | u32 flag = 1U << findAndClearLSB_32(&flags); |
278 | switch (flag) { |
279 | case POS_FLAG_ASSERT_NONWORD_TO_NONWORD: |
280 | case POS_FLAG_ASSERT_NONWORD_TO_WORD: |
281 | if ((u_cr & CHARREACH_NONWORD).none() && u != g.start) { |
282 | continue; |
283 | } |
284 | break; |
285 | case POS_FLAG_ASSERT_WORD_TO_NONWORD: |
286 | case POS_FLAG_ASSERT_WORD_TO_WORD: |
287 | if ((u_cr & CHARREACH_WORD).none() || u == g.start) { |
288 | continue; |
289 | } |
290 | break; |
291 | case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP: |
292 | case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP: |
293 | if ((u_cr & ~CHARREACH_NONWORD_UCP_PRE).any() && u != g.start) { |
294 | continue; |
295 | } |
296 | break; |
297 | case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP: |
298 | case POS_FLAG_ASSERT_WORD_TO_WORD_UCP: |
299 | if ((u_cr & ~CHARREACH_WORD_UCP_PRE).any() || u == g.start) { |
300 | continue; |
301 | } |
302 | break; |
303 | default: |
304 | assert(0); |
305 | } |
306 | |
307 | if (v == g.accept) { |
308 | /* accept special will need to be treated specially later */ |
309 | impassable = false; |
310 | continue; |
311 | } |
312 | |
313 | switch (flag) { |
314 | case POS_FLAG_ASSERT_NONWORD_TO_NONWORD: |
315 | case POS_FLAG_ASSERT_WORD_TO_NONWORD: |
316 | if ((v_cr & CHARREACH_NONWORD).none() && v != g.acceptEod) { |
317 | continue; |
318 | } |
319 | break; |
320 | case POS_FLAG_ASSERT_WORD_TO_WORD: |
321 | case POS_FLAG_ASSERT_NONWORD_TO_WORD: |
322 | if ((v_cr & CHARREACH_WORD).none() || v == g.acceptEod) { |
323 | continue; |
324 | } |
325 | break; |
326 | case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP: |
327 | case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP: |
328 | if ((v_cr & ~CHARREACH_NONWORD_UCP_PRE).any() |
329 | && v != g.acceptEod) { |
330 | continue; |
331 | } |
332 | break; |
333 | case POS_FLAG_ASSERT_WORD_TO_WORD_UCP: |
334 | case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP: |
335 | if ((v_cr & ~CHARREACH_WORD_UCP_PRE).any() |
336 | || v == g.acceptEod) { |
337 | continue; |
338 | } |
339 | break; |
340 | default: |
341 | assert(0); |
342 | } |
343 | impassable = false; |
344 | } |
345 | |
346 | if (impassable) { |
347 | dead->insert(e); |
348 | } else if (v == g.accept && !ucp) { |
349 | bool u_w = (u_cr & CHARREACH_NONWORD).none() && u != g.start; |
350 | UNUSED bool u_nw = (u_cr & CHARREACH_WORD).none() || u == g.start; |
351 | assert(u_w != u_nw); |
352 | bool v_w = false; |
353 | bool v_nw = false; |
354 | |
355 | flags = g[e].assert_flags; |
356 | if (u_w) { |
357 | v_w = flags & POS_FLAG_ASSERT_WORD_TO_WORD; |
358 | v_nw = flags & POS_FLAG_ASSERT_WORD_TO_NONWORD; |
359 | } else { |
360 | v_w = flags & POS_FLAG_ASSERT_NONWORD_TO_WORD; |
361 | v_nw = flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD; |
362 | } |
363 | assert(v_w || v_nw); |
364 | if (v_w && v_nw) { |
365 | /* edge is effectively unconditional */ |
366 | g[e].assert_flags = 0; |
367 | } else if (v_w) { |
368 | /* need to add a word byte */ |
369 | NFAVertex vv = add_vertex(g); |
370 | setReportId(rm, g, expr, vv, -1); |
371 | g[vv].char_reach = CHARREACH_WORD; |
372 | add_edge(vv, g.accept, g); |
373 | g[e].assert_flags = 0; |
374 | add_edge(u, vv, g[e], g); |
375 | dead->insert(e); |
376 | } else { |
377 | /* need to add a non word byte or see eod */ |
378 | NFAVertex vv = add_vertex(g); |
379 | setReportId(rm, g, expr, vv, -1); |
380 | g[vv].char_reach = CHARREACH_NONWORD; |
381 | add_edge(vv, g.accept, g); |
382 | g[e].assert_flags = 0; |
383 | add_edge(u, vv, g[e], g); |
384 | /* there may already be a different edge from start to eod if so |
385 | * we need to make it unconditional and alive |
386 | */ |
387 | if (NFAEdge start_eod = edge(u, g.acceptEod, g)) { |
388 | g[start_eod].assert_flags = 0; |
389 | dead->erase(start_eod); |
390 | } else { |
391 | add_edge(u, g.acceptEod, g[e], g); |
392 | } |
393 | dead->insert(e); |
394 | } |
395 | } else if (v == g.accept && ucp) { |
396 | DEBUG_PRINTF("resolving ucp assert to accept\n" ); |
397 | assert(u_cr.any()); |
398 | bool u_w = (u_cr & CHARREACH_WORD_UCP_PRE).any() |
399 | && u != g.start; |
400 | bool u_nw = (u_cr & CHARREACH_NONWORD_UCP_PRE).any() |
401 | || u == g.start; |
402 | assert(u_w || u_nw); |
403 | |
404 | bool v_w = false; |
405 | bool v_nw = false; |
406 | |
407 | flags = g[e].assert_flags; |
408 | if (u_w) { |
409 | v_w |= flags & POS_FLAG_ASSERT_WORD_TO_WORD_UCP; |
410 | v_nw |= flags & POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP; |
411 | } |
412 | if (u_nw) { |
413 | v_w |= flags & POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP; |
414 | v_nw |= flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP; |
415 | } |
416 | assert(v_w || v_nw); |
417 | if (v_w && v_nw) { |
418 | /* edge is effectively unconditional */ |
419 | g[e].assert_flags = 0; |
420 | } else if (v_w) { |
421 | /* need to add a word byte */ |
422 | NFAVertex vv = add_vertex(g); |
423 | setReportId(rm, g, expr, vv, -1); |
424 | g[vv].char_reach = CHARREACH_WORD_UCP_PRE; |
425 | add_edge(vv, g.accept, g); |
426 | g[e].assert_flags = 0; |
427 | add_edge(u, vv, g[e], g); |
428 | dead->insert(e); |
429 | } else { |
430 | /* need to add a non word byte or see eod */ |
431 | NFAVertex vv = add_vertex(g); |
432 | setReportId(rm, g, expr, vv, -1); |
433 | g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE; |
434 | add_edge(vv, g.accept, g); |
435 | g[e].assert_flags = 0; |
436 | add_edge(u, vv, g[e], g); |
437 | /* there may already be a different edge from start to eod if so |
438 | * we need to make it unconditional and alive |
439 | */ |
440 | if (NFAEdge start_eod = edge(u, g.acceptEod, g)) { |
441 | g[start_eod].assert_flags = 0; |
442 | dead->erase(start_eod); |
443 | } else { |
444 | add_edge(u, g.acceptEod, g[e], g); |
445 | } |
446 | dead->insert(e); |
447 | } |
448 | } else { |
449 | /* we can remove the asserts as we have partitioned the vertices |
450 | * into w/nw around the assert edges |
451 | */ |
452 | g[e].assert_flags = 0; |
453 | } |
454 | } |
455 | } |
456 | |
457 | void resolveAsserts(ReportManager &rm, NGHolder &g, |
458 | const ExpressionInfo &expr) { |
459 | vector<NFAEdge> asserts = getAsserts(g); |
460 | if (asserts.empty()) { |
461 | return; |
462 | } |
463 | |
464 | map<u32, NFAVertex> to_split; /* by index, for determinism */ |
465 | map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */ |
466 | findSplitters(g, asserts, &to_split, &to_split_ucp); |
467 | if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) { |
468 | throw CompileError(expr.index, "Pattern is too large." ); |
469 | } |
470 | |
471 | for (const auto &m : to_split) { |
472 | assert(!contains(to_split_ucp, m.first)); |
473 | splitVertex(rm, g, expr, m.second, false); |
474 | } |
475 | |
476 | for (const auto &m : to_split_ucp) { |
477 | splitVertex(rm, g, expr, m.second, true); |
478 | } |
479 | |
480 | set<NFAEdge> dead; |
481 | resolveEdges(rm, g, expr, &dead); |
482 | |
483 | remove_edges(dead, g); |
484 | renumber_vertices(g); |
485 | pruneUseless(g); |
486 | pruneEmptyVertices(g); |
487 | |
488 | renumber_vertices(g); |
489 | renumber_edges(g); |
490 | clearReports(g); |
491 | } |
492 | |
493 | void ensureCodePointStart(ReportManager &rm, NGHolder &g, |
494 | const ExpressionInfo &expr) { |
495 | /* In utf8 mode there is an implicit assertion that we start at codepoint |
496 | * boundaries. Assert resolution handles the badness coming from asserts. |
497 | * The only other source of trouble is startDs->accept connections. |
498 | */ |
499 | NFAEdge orig = edge(g.startDs, g.accept, g); |
500 | if (expr.utf8 && orig) { |
501 | DEBUG_PRINTF("rectifying %u\n" , expr.report); |
502 | Report ir = rm.getBasicInternalReport(expr); |
503 | ReportID rep = rm.getInternalId(ir); |
504 | |
505 | NFAVertex v_a = add_vertex(g); |
506 | g[v_a].assert_flags = POS_FLAG_VIRTUAL_START; |
507 | g[v_a].char_reach = UTF_ASCII_CR; |
508 | add_edge(v_a, g.accept, g[orig], g); |
509 | |
510 | NFAVertex v_2 = add_vertex(g); |
511 | g[v_2].assert_flags = POS_FLAG_VIRTUAL_START; |
512 | g[v_2].char_reach = CharReach(UTF_TWO_BYTE_MIN, UTF_TWO_BYTE_MAX); |
513 | |
514 | NFAVertex v_3 = add_vertex(g); |
515 | g[v_3].assert_flags = POS_FLAG_VIRTUAL_START; |
516 | g[v_3].char_reach = CharReach(UTF_THREE_BYTE_MIN, UTF_THREE_BYTE_MAX); |
517 | |
518 | NFAVertex v_4 = add_vertex(g); |
519 | g[v_4].assert_flags = POS_FLAG_VIRTUAL_START; |
520 | g[v_4].char_reach = CharReach(UTF_FOUR_BYTE_MIN, UTF_FOUR_BYTE_MAX); |
521 | |
522 | NFAVertex v_c = add_vertex(g); |
523 | g[v_c].assert_flags = POS_FLAG_VIRTUAL_START; |
524 | g[v_c].char_reach = UTF_CONT_CR; |
525 | add_edge(v_c, g.accept, g[orig], g); |
526 | |
527 | add_edge(v_2, v_c, g); |
528 | |
529 | NFAVertex v_3c = add_vertex(g); |
530 | g[v_3c].assert_flags = POS_FLAG_VIRTUAL_START; |
531 | g[v_3c].char_reach = UTF_CONT_CR; |
532 | add_edge(v_3c, v_c, g); |
533 | add_edge(v_3, v_3c, g); |
534 | |
535 | NFAVertex v_4c = add_vertex(g); |
536 | g[v_4c].assert_flags = POS_FLAG_VIRTUAL_START; |
537 | g[v_4c].char_reach = UTF_CONT_CR; |
538 | add_edge(v_4c, v_3c, g); |
539 | add_edge(v_4, v_4c, g); |
540 | |
541 | g[v_a].reports.insert(rep); |
542 | g[v_c].reports.insert(rep); |
543 | |
544 | add_edge(g.start, v_a, g); |
545 | add_edge(g.startDs, v_a, g); |
546 | add_edge(g.start, v_2, g); |
547 | add_edge(g.startDs, v_2, g); |
548 | add_edge(g.start, v_3, g); |
549 | add_edge(g.startDs, v_3, g); |
550 | add_edge(g.start, v_4, g); |
551 | add_edge(g.startDs, v_4, g); |
552 | remove_edge(orig, g); |
553 | renumber_edges(g); |
554 | clearReports(g); |
555 | } |
556 | } |
557 | |
558 | } // namespace ue2 |
559 | |