1/*
2 * DFA routines
3 * This file is #included by regexec.c.
4 *
5 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
6 *
7 * Development of this software was funded, in part, by Cray Research Inc.,
8 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9 * Corporation, none of whom are responsible for the results. The author
10 * thanks all of them.
11 *
12 * Redistribution and use in source and binary forms -- with or without
13 * modification -- are permitted for any purpose, provided that
14 * redistributions in source form retain this entire copyright notice and
15 * indicate the origin and nature of any modifications.
16 *
17 * I'd appreciate being given credit for this package in the documentation
18 * of software which uses it, but that is not a requirement.
19 *
20 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * src/backend/regex/rege_dfa.c
32 *
33 */
34
35/*
36 * longest - longest-preferred matching engine
37 *
38 * On success, returns match endpoint address. Returns NULL on no match.
39 * Internal errors also return NULL, with v->err set.
40 */
41static chr *
42longest(struct vars *v,
43 struct dfa *d,
44 chr *start, /* where the match should start */
45 chr *stop, /* match must end at or before here */
46 int *hitstopp) /* record whether hit v->stop, if non-NULL */
47{
48 chr *cp;
49 chr *realstop = (stop == v->stop) ? stop : stop + 1;
50 color co;
51 struct sset *css;
52 struct sset *ss;
53 chr *post;
54 int i;
55 struct colormap *cm = d->cm;
56
57 /* prevent "uninitialized variable" warnings */
58 if (hitstopp != NULL)
59 *hitstopp = 0;
60
61 /* initialize */
62 css = initialize(v, d, start);
63 if (css == NULL)
64 return NULL;
65 cp = start;
66
67 /* startup */
68 FDEBUG(("+++ startup +++\n"));
69 if (cp == v->start)
70 {
71 co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
72 FDEBUG(("color %ld\n", (long) co));
73 }
74 else
75 {
76 co = GETCOLOR(cm, *(cp - 1));
77 FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
78 }
79 css = miss(v, d, css, co, cp, start);
80 if (css == NULL)
81 return NULL;
82 css->lastseen = cp;
83
84 /*
85 * This is the main text-scanning loop. It seems worth having two copies
86 * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
87 * builds, when you're not actively tracing.
88 */
89#ifdef REG_DEBUG
90 if (v->eflags & REG_FTRACE)
91 {
92 while (cp < realstop)
93 {
94 FDEBUG(("+++ at c%d +++\n", (int) (css - d->ssets)));
95 co = GETCOLOR(cm, *cp);
96 FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
97 ss = css->outs[co];
98 if (ss == NULL)
99 {
100 ss = miss(v, d, css, co, cp + 1, start);
101 if (ss == NULL)
102 break; /* NOTE BREAK OUT */
103 }
104 cp++;
105 ss->lastseen = cp;
106 css = ss;
107 }
108 }
109 else
110#endif
111 {
112 while (cp < realstop)
113 {
114 co = GETCOLOR(cm, *cp);
115 ss = css->outs[co];
116 if (ss == NULL)
117 {
118 ss = miss(v, d, css, co, cp + 1, start);
119 if (ss == NULL)
120 break; /* NOTE BREAK OUT */
121 }
122 cp++;
123 ss->lastseen = cp;
124 css = ss;
125 }
126 }
127
128 if (ISERR())
129 return NULL;
130
131 /* shutdown */
132 FDEBUG(("+++ shutdown at c%d +++\n", (int) (css - d->ssets)));
133 if (cp == v->stop && stop == v->stop)
134 {
135 if (hitstopp != NULL)
136 *hitstopp = 1;
137 co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
138 FDEBUG(("color %ld\n", (long) co));
139 ss = miss(v, d, css, co, cp, start);
140 if (ISERR())
141 return NULL;
142 /* special case: match ended at eol? */
143 if (ss != NULL && (ss->flags & POSTSTATE))
144 return cp;
145 else if (ss != NULL)
146 ss->lastseen = cp; /* to be tidy */
147 }
148
149 /* find last match, if any */
150 post = d->lastpost;
151 for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
152 if ((ss->flags & POSTSTATE) && post != ss->lastseen &&
153 (post == NULL || post < ss->lastseen))
154 post = ss->lastseen;
155 if (post != NULL) /* found one */
156 return post - 1;
157
158 return NULL;
159}
160
161/*
162 * shortest - shortest-preferred matching engine
163 *
164 * On success, returns match endpoint address. Returns NULL on no match.
165 * Internal errors also return NULL, with v->err set.
166 */
167static chr *
168shortest(struct vars *v,
169 struct dfa *d,
170 chr *start, /* where the match should start */
171 chr *min, /* match must end at or after here */
172 chr *max, /* match must end at or before here */
173 chr **coldp, /* store coldstart pointer here, if non-NULL */
174 int *hitstopp) /* record whether hit v->stop, if non-NULL */
175{
176 chr *cp;
177 chr *realmin = (min == v->stop) ? min : min + 1;
178 chr *realmax = (max == v->stop) ? max : max + 1;
179 color co;
180 struct sset *css;
181 struct sset *ss;
182 struct colormap *cm = d->cm;
183
184 /* prevent "uninitialized variable" warnings */
185 if (coldp != NULL)
186 *coldp = NULL;
187 if (hitstopp != NULL)
188 *hitstopp = 0;
189
190 /* initialize */
191 css = initialize(v, d, start);
192 if (css == NULL)
193 return NULL;
194 cp = start;
195
196 /* startup */
197 FDEBUG(("--- startup ---\n"));
198 if (cp == v->start)
199 {
200 co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
201 FDEBUG(("color %ld\n", (long) co));
202 }
203 else
204 {
205 co = GETCOLOR(cm, *(cp - 1));
206 FDEBUG(("char %c, color %ld\n", (char) *(cp - 1), (long) co));
207 }
208 css = miss(v, d, css, co, cp, start);
209 if (css == NULL)
210 return NULL;
211 css->lastseen = cp;
212 ss = css;
213
214 /*
215 * This is the main text-scanning loop. It seems worth having two copies
216 * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
217 * builds, when you're not actively tracing.
218 */
219#ifdef REG_DEBUG
220 if (v->eflags & REG_FTRACE)
221 {
222 while (cp < realmax)
223 {
224 FDEBUG(("--- at c%d ---\n", (int) (css - d->ssets)));
225 co = GETCOLOR(cm, *cp);
226 FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
227 ss = css->outs[co];
228 if (ss == NULL)
229 {
230 ss = miss(v, d, css, co, cp + 1, start);
231 if (ss == NULL)
232 break; /* NOTE BREAK OUT */
233 }
234 cp++;
235 ss->lastseen = cp;
236 css = ss;
237 if ((ss->flags & POSTSTATE) && cp >= realmin)
238 break; /* NOTE BREAK OUT */
239 }
240 }
241 else
242#endif
243 {
244 while (cp < realmax)
245 {
246 co = GETCOLOR(cm, *cp);
247 ss = css->outs[co];
248 if (ss == NULL)
249 {
250 ss = miss(v, d, css, co, cp + 1, start);
251 if (ss == NULL)
252 break; /* NOTE BREAK OUT */
253 }
254 cp++;
255 ss->lastseen = cp;
256 css = ss;
257 if ((ss->flags & POSTSTATE) && cp >= realmin)
258 break; /* NOTE BREAK OUT */
259 }
260 }
261
262 if (ss == NULL)
263 return NULL;
264
265 if (coldp != NULL) /* report last no-progress state set, if any */
266 *coldp = lastcold(v, d);
267
268 if ((ss->flags & POSTSTATE) && cp > min)
269 {
270 assert(cp >= realmin);
271 cp--;
272 }
273 else if (cp == v->stop && max == v->stop)
274 {
275 co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
276 FDEBUG(("color %ld\n", (long) co));
277 ss = miss(v, d, css, co, cp, start);
278 /* match might have ended at eol */
279 if ((ss == NULL || !(ss->flags & POSTSTATE)) && hitstopp != NULL)
280 *hitstopp = 1;
281 }
282
283 if (ss == NULL || !(ss->flags & POSTSTATE))
284 return NULL;
285
286 return cp;
287}
288
289/*
290 * matchuntil - incremental matching engine
291 *
292 * This is meant for use with a search-style NFA (that is, the pattern is
293 * known to act as though it had a leading .*). We determine whether a
294 * match exists starting at v->start and ending at probe. Multiple calls
295 * require only O(N) time not O(N^2) so long as the probe values are
296 * nondecreasing. *lastcss and *lastcp must be initialized to NULL before
297 * starting a series of calls.
298 *
299 * Returns 1 if a match exists, 0 if not.
300 * Internal errors also return 0, with v->err set.
301 */
302static int
303matchuntil(struct vars *v,
304 struct dfa *d,
305 chr *probe, /* we want to know if a match ends here */
306 struct sset **lastcss, /* state storage across calls */
307 chr **lastcp) /* state storage across calls */
308{
309 chr *cp = *lastcp;
310 color co;
311 struct sset *css = *lastcss;
312 struct sset *ss;
313 struct colormap *cm = d->cm;
314
315 /* initialize and startup, or restart, if necessary */
316 if (cp == NULL || cp > probe)
317 {
318 cp = v->start;
319 css = initialize(v, d, cp);
320 if (css == NULL)
321 return 0;
322
323 FDEBUG((">>> startup >>>\n"));
324 co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
325 FDEBUG(("color %ld\n", (long) co));
326
327 css = miss(v, d, css, co, cp, v->start);
328 if (css == NULL)
329 return 0;
330 css->lastseen = cp;
331 }
332 else if (css == NULL)
333 {
334 /* we previously found that no match is possible beyond *lastcp */
335 return 0;
336 }
337 ss = css;
338
339 /*
340 * This is the main text-scanning loop. It seems worth having two copies
341 * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
342 * builds, when you're not actively tracing.
343 */
344#ifdef REG_DEBUG
345 if (v->eflags & REG_FTRACE)
346 {
347 while (cp < probe)
348 {
349 FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
350 co = GETCOLOR(cm, *cp);
351 FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
352 ss = css->outs[co];
353 if (ss == NULL)
354 {
355 ss = miss(v, d, css, co, cp + 1, v->start);
356 if (ss == NULL)
357 break; /* NOTE BREAK OUT */
358 }
359 cp++;
360 ss->lastseen = cp;
361 css = ss;
362 }
363 }
364 else
365#endif
366 {
367 while (cp < probe)
368 {
369 co = GETCOLOR(cm, *cp);
370 ss = css->outs[co];
371 if (ss == NULL)
372 {
373 ss = miss(v, d, css, co, cp + 1, v->start);
374 if (ss == NULL)
375 break; /* NOTE BREAK OUT */
376 }
377 cp++;
378 ss->lastseen = cp;
379 css = ss;
380 }
381 }
382
383 *lastcss = ss;
384 *lastcp = cp;
385
386 if (ss == NULL)
387 return 0; /* impossible match, or internal error */
388
389 /* We need to process one more chr, or the EOS symbol, to check match */
390 if (cp < v->stop)
391 {
392 FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
393 co = GETCOLOR(cm, *cp);
394 FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
395 ss = css->outs[co];
396 if (ss == NULL)
397 ss = miss(v, d, css, co, cp + 1, v->start);
398 }
399 else
400 {
401 assert(cp == v->stop);
402 co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
403 FDEBUG(("color %ld\n", (long) co));
404 ss = miss(v, d, css, co, cp, v->start);
405 }
406
407 if (ss == NULL || !(ss->flags & POSTSTATE))
408 return 0;
409
410 return 1;
411}
412
413/*
414 * lastcold - determine last point at which no progress had been made
415 */
416static chr * /* endpoint, or NULL */
417lastcold(struct vars *v,
418 struct dfa *d)
419{
420 struct sset *ss;
421 chr *nopr;
422 int i;
423
424 nopr = d->lastnopr;
425 if (nopr == NULL)
426 nopr = v->start;
427 for (ss = d->ssets, i = d->nssused; i > 0; ss++, i--)
428 if ((ss->flags & NOPROGRESS) && nopr < ss->lastseen)
429 nopr = ss->lastseen;
430 return nopr;
431}
432
433/*
434 * newdfa - set up a fresh DFA
435 */
436static struct dfa *
437newdfa(struct vars *v,
438 struct cnfa *cnfa,
439 struct colormap *cm,
440 struct smalldfa *sml) /* preallocated space, may be NULL */
441{
442 struct dfa *d;
443 size_t nss = cnfa->nstates * 2;
444 int wordsper = (cnfa->nstates + UBITS - 1) / UBITS;
445 struct smalldfa *smallwas = sml;
446
447 assert(cnfa != NULL && cnfa->nstates != 0);
448
449 if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS)
450 {
451 assert(wordsper == 1);
452 if (sml == NULL)
453 {
454 sml = (struct smalldfa *) MALLOC(sizeof(struct smalldfa));
455 if (sml == NULL)
456 {
457 ERR(REG_ESPACE);
458 return NULL;
459 }
460 }
461 d = &sml->dfa;
462 d->ssets = sml->ssets;
463 d->statesarea = sml->statesarea;
464 d->work = &d->statesarea[nss];
465 d->outsarea = sml->outsarea;
466 d->incarea = sml->incarea;
467 d->cptsmalloced = 0;
468 d->mallocarea = (smallwas == NULL) ? (char *) sml : NULL;
469 }
470 else
471 {
472 d = (struct dfa *) MALLOC(sizeof(struct dfa));
473 if (d == NULL)
474 {
475 ERR(REG_ESPACE);
476 return NULL;
477 }
478 d->ssets = (struct sset *) MALLOC(nss * sizeof(struct sset));
479 d->statesarea = (unsigned *) MALLOC((nss + WORK) * wordsper *
480 sizeof(unsigned));
481 d->work = &d->statesarea[nss * wordsper];
482 d->outsarea = (struct sset **) MALLOC(nss * cnfa->ncolors *
483 sizeof(struct sset *));
484 d->incarea = (struct arcp *) MALLOC(nss * cnfa->ncolors *
485 sizeof(struct arcp));
486 d->cptsmalloced = 1;
487 d->mallocarea = (char *) d;
488 if (d->ssets == NULL || d->statesarea == NULL ||
489 d->outsarea == NULL || d->incarea == NULL)
490 {
491 freedfa(d);
492 ERR(REG_ESPACE);
493 return NULL;
494 }
495 }
496
497 d->nssets = (v->eflags & REG_SMALL) ? 7 : nss;
498 d->nssused = 0;
499 d->nstates = cnfa->nstates;
500 d->ncolors = cnfa->ncolors;
501 d->wordsper = wordsper;
502 d->cnfa = cnfa;
503 d->cm = cm;
504 d->lastpost = NULL;
505 d->lastnopr = NULL;
506 d->search = d->ssets;
507
508 /* initialization of sset fields is done as needed */
509
510 return d;
511}
512
513/*
514 * freedfa - free a DFA
515 */
516static void
517freedfa(struct dfa *d)
518{
519 if (d->cptsmalloced)
520 {
521 if (d->ssets != NULL)
522 FREE(d->ssets);
523 if (d->statesarea != NULL)
524 FREE(d->statesarea);
525 if (d->outsarea != NULL)
526 FREE(d->outsarea);
527 if (d->incarea != NULL)
528 FREE(d->incarea);
529 }
530
531 if (d->mallocarea != NULL)
532 FREE(d->mallocarea);
533}
534
535/*
536 * hash - construct a hash code for a bitvector
537 *
538 * There are probably better ways, but they're more expensive.
539 */
540static unsigned
541hash(unsigned *uv,
542 int n)
543{
544 int i;
545 unsigned h;
546
547 h = 0;
548 for (i = 0; i < n; i++)
549 h ^= uv[i];
550 return h;
551}
552
553/*
554 * initialize - hand-craft a cache entry for startup, otherwise get ready
555 */
556static struct sset *
557initialize(struct vars *v,
558 struct dfa *d,
559 chr *start)
560{
561 struct sset *ss;
562 int i;
563
564 /* is previous one still there? */
565 if (d->nssused > 0 && (d->ssets[0].flags & STARTER))
566 ss = &d->ssets[0];
567 else
568 { /* no, must (re)build it */
569 ss = getvacant(v, d, start, start);
570 if (ss == NULL)
571 return NULL;
572 for (i = 0; i < d->wordsper; i++)
573 ss->states[i] = 0;
574 BSET(ss->states, d->cnfa->pre);
575 ss->hash = HASH(ss->states, d->wordsper);
576 assert(d->cnfa->pre != d->cnfa->post);
577 ss->flags = STARTER | LOCKED | NOPROGRESS;
578 /* lastseen dealt with below */
579 }
580
581 for (i = 0; i < d->nssused; i++)
582 d->ssets[i].lastseen = NULL;
583 ss->lastseen = start; /* maybe untrue, but harmless */
584 d->lastpost = NULL;
585 d->lastnopr = NULL;
586 return ss;
587}
588
589/*
590 * miss - handle a stateset cache miss
591 *
592 * css is the current stateset, co is the color of the current input character,
593 * cp points to the character after that (which is where we may need to test
594 * LACONs). start does not affect matching behavior but is needed for pickss'
595 * heuristics about which stateset cache entry to replace.
596 *
597 * Ordinarily, returns the address of the next stateset (the one that is
598 * valid after consuming the input character). Returns NULL if no valid
599 * NFA states remain, ie we have a certain match failure.
600 * Internal errors also return NULL, with v->err set.
601 */
602static struct sset *
603miss(struct vars *v,
604 struct dfa *d,
605 struct sset *css,
606 color co,
607 chr *cp, /* next chr */
608 chr *start) /* where the attempt got started */
609{
610 struct cnfa *cnfa = d->cnfa;
611 int i;
612 unsigned h;
613 struct carc *ca;
614 struct sset *p;
615 int ispost;
616 int noprogress;
617 int gotstate;
618 int dolacons;
619 int sawlacons;
620
621 /* for convenience, we can be called even if it might not be a miss */
622 if (css->outs[co] != NULL)
623 {
624 FDEBUG(("hit\n"));
625 return css->outs[co];
626 }
627 FDEBUG(("miss\n"));
628
629 /*
630 * Checking for operation cancel in the inner text search loop seems
631 * unduly expensive. As a compromise, check during cache misses.
632 */
633 if (CANCEL_REQUESTED(v->re))
634 {
635 ERR(REG_CANCEL);
636 return NULL;
637 }
638
639 /*
640 * What set of states would we end up in after consuming the co character?
641 * We first consider PLAIN arcs that consume the character, and then look
642 * to see what LACON arcs could be traversed after consuming it.
643 */
644 for (i = 0; i < d->wordsper; i++)
645 d->work[i] = 0; /* build new stateset bitmap in d->work */
646 ispost = 0;
647 noprogress = 1;
648 gotstate = 0;
649 for (i = 0; i < d->nstates; i++)
650 if (ISBSET(css->states, i))
651 for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
652 if (ca->co == co)
653 {
654 BSET(d->work, ca->to);
655 gotstate = 1;
656 if (ca->to == cnfa->post)
657 ispost = 1;
658 if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
659 noprogress = 0;
660 FDEBUG(("%d -> %d\n", i, ca->to));
661 }
662 if (!gotstate)
663 return NULL; /* character cannot reach any new state */
664 dolacons = (cnfa->flags & HASLACONS);
665 sawlacons = 0;
666 /* outer loop handles transitive closure of reachable-by-LACON states */
667 while (dolacons)
668 {
669 dolacons = 0;
670 for (i = 0; i < d->nstates; i++)
671 if (ISBSET(d->work, i))
672 for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
673 {
674 if (ca->co < cnfa->ncolors)
675 continue; /* not a LACON arc */
676 if (ISBSET(d->work, ca->to))
677 continue; /* arc would be a no-op anyway */
678 sawlacons = 1; /* this LACON affects our result */
679 if (!lacon(v, cnfa, cp, ca->co))
680 {
681 if (ISERR())
682 return NULL;
683 continue; /* LACON arc cannot be traversed */
684 }
685 if (ISERR())
686 return NULL;
687 BSET(d->work, ca->to);
688 dolacons = 1;
689 if (ca->to == cnfa->post)
690 ispost = 1;
691 if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
692 noprogress = 0;
693 FDEBUG(("%d :> %d\n", i, ca->to));
694 }
695 }
696 h = HASH(d->work, d->wordsper);
697
698 /* Is this stateset already in the cache? */
699 for (p = d->ssets, i = d->nssused; i > 0; p++, i--)
700 if (HIT(h, d->work, p, d->wordsper))
701 {
702 FDEBUG(("cached c%d\n", (int) (p - d->ssets)));
703 break; /* NOTE BREAK OUT */
704 }
705 if (i == 0)
706 { /* nope, need a new cache entry */
707 p = getvacant(v, d, cp, start);
708 if (p == NULL)
709 return NULL;
710 assert(p != css);
711 for (i = 0; i < d->wordsper; i++)
712 p->states[i] = d->work[i];
713 p->hash = h;
714 p->flags = (ispost) ? POSTSTATE : 0;
715 if (noprogress)
716 p->flags |= NOPROGRESS;
717 /* lastseen to be dealt with by caller */
718 }
719
720 /*
721 * Link new stateset to old, unless a LACON affected the result, in which
722 * case we don't create the link. That forces future transitions across
723 * this same arc (same prior stateset and character color) to come through
724 * miss() again, so that we can recheck the LACON(s), which might or might
725 * not pass since context will be different.
726 */
727 if (!sawlacons)
728 {
729 FDEBUG(("c%d[%d]->c%d\n",
730 (int) (css - d->ssets), co, (int) (p - d->ssets)));
731 css->outs[co] = p;
732 css->inchain[co] = p->ins;
733 p->ins.ss = css;
734 p->ins.co = co;
735 }
736 return p;
737}
738
739/*
740 * lacon - lookaround-constraint checker for miss()
741 */
742static int /* predicate: constraint satisfied? */
743lacon(struct vars *v,
744 struct cnfa *pcnfa, /* parent cnfa */
745 chr *cp,
746 color co) /* "color" of the lookaround constraint */
747{
748 int n;
749 struct subre *sub;
750 struct dfa *d;
751 chr *end;
752 int satisfied;
753
754 /* Since this is recursive, it could be driven to stack overflow */
755 if (STACK_TOO_DEEP(v->re))
756 {
757 ERR(REG_ETOOBIG);
758 return 0;
759 }
760
761 n = co - pcnfa->ncolors;
762 assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
763 FDEBUG(("=== testing lacon %d\n", n));
764 sub = &v->g->lacons[n];
765 d = getladfa(v, n);
766 if (d == NULL)
767 return 0;
768 if (LATYPE_IS_AHEAD(sub->subno))
769 {
770 /* used to use longest() here, but shortest() could be much cheaper */
771 end = shortest(v, d, cp, cp, v->stop,
772 (chr **) NULL, (int *) NULL);
773 satisfied = LATYPE_IS_POS(sub->subno) ? (end != NULL) : (end == NULL);
774 }
775 else
776 {
777 /*
778 * To avoid doing O(N^2) work when repeatedly testing a lookbehind
779 * constraint in an N-character string, we use matchuntil() which can
780 * cache the DFA state across calls. We only need to restart if the
781 * probe point decreases, which is not common. The NFA we're using is
782 * a search NFA, so it doesn't mind scanning over stuff before the
783 * nominal match.
784 */
785 satisfied = matchuntil(v, d, cp, &v->lblastcss[n], &v->lblastcp[n]);
786 if (!LATYPE_IS_POS(sub->subno))
787 satisfied = !satisfied;
788 }
789 FDEBUG(("=== lacon %d satisfied %d\n", n, satisfied));
790 return satisfied;
791}
792
793/*
794 * getvacant - get a vacant state set
795 *
796 * This routine clears out the inarcs and outarcs, but does not otherwise
797 * clear the innards of the state set -- that's up to the caller.
798 */
799static struct sset *
800getvacant(struct vars *v,
801 struct dfa *d,
802 chr *cp,
803 chr *start)
804{
805 int i;
806 struct sset *ss;
807 struct sset *p;
808 struct arcp ap;
809 color co;
810
811 ss = pickss(v, d, cp, start);
812 if (ss == NULL)
813 return NULL;
814 assert(!(ss->flags & LOCKED));
815
816 /* clear out its inarcs, including self-referential ones */
817 ap = ss->ins;
818 while ((p = ap.ss) != NULL)
819 {
820 co = ap.co;
821 FDEBUG(("zapping c%d's %ld outarc\n", (int) (p - d->ssets), (long) co));
822 p->outs[co] = NULL;
823 ap = p->inchain[co];
824 p->inchain[co].ss = NULL; /* paranoia */
825 }
826 ss->ins.ss = NULL;
827
828 /* take it off the inarc chains of the ssets reached by its outarcs */
829 for (i = 0; i < d->ncolors; i++)
830 {
831 p = ss->outs[i];
832 assert(p != ss); /* not self-referential */
833 if (p == NULL)
834 continue; /* NOTE CONTINUE */
835 FDEBUG(("del outarc %d from c%d's in chn\n", i, (int) (p - d->ssets)));
836 if (p->ins.ss == ss && p->ins.co == i)
837 p->ins = ss->inchain[i];
838 else
839 {
840 struct arcp lastap = {NULL, 0};
841
842 assert(p->ins.ss != NULL);
843 for (ap = p->ins; ap.ss != NULL &&
844 !(ap.ss == ss && ap.co == i);
845 ap = ap.ss->inchain[ap.co])
846 lastap = ap;
847 assert(ap.ss != NULL);
848 lastap.ss->inchain[lastap.co] = ss->inchain[i];
849 }
850 ss->outs[i] = NULL;
851 ss->inchain[i].ss = NULL;
852 }
853
854 /* if ss was a success state, may need to remember location */
855 if ((ss->flags & POSTSTATE) && ss->lastseen != d->lastpost &&
856 (d->lastpost == NULL || d->lastpost < ss->lastseen))
857 d->lastpost = ss->lastseen;
858
859 /* likewise for a no-progress state */
860 if ((ss->flags & NOPROGRESS) && ss->lastseen != d->lastnopr &&
861 (d->lastnopr == NULL || d->lastnopr < ss->lastseen))
862 d->lastnopr = ss->lastseen;
863
864 return ss;
865}
866
867/*
868 * pickss - pick the next stateset to be used
869 */
870static struct sset *
871pickss(struct vars *v,
872 struct dfa *d,
873 chr *cp,
874 chr *start)
875{
876 int i;
877 struct sset *ss;
878 struct sset *end;
879 chr *ancient;
880
881 /* shortcut for cases where cache isn't full */
882 if (d->nssused < d->nssets)
883 {
884 i = d->nssused;
885 d->nssused++;
886 ss = &d->ssets[i];
887 FDEBUG(("new c%d\n", i));
888 /* set up innards */
889 ss->states = &d->statesarea[i * d->wordsper];
890 ss->flags = 0;
891 ss->ins.ss = NULL;
892 ss->ins.co = WHITE; /* give it some value */
893 ss->outs = &d->outsarea[i * d->ncolors];
894 ss->inchain = &d->incarea[i * d->ncolors];
895 for (i = 0; i < d->ncolors; i++)
896 {
897 ss->outs[i] = NULL;
898 ss->inchain[i].ss = NULL;
899 }
900 return ss;
901 }
902
903 /* look for oldest, or old enough anyway */
904 if (cp - start > d->nssets * 2 / 3) /* oldest 33% are expendable */
905 ancient = cp - d->nssets * 2 / 3;
906 else
907 ancient = start;
908 for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++)
909 if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
910 !(ss->flags & LOCKED))
911 {
912 d->search = ss + 1;
913 FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
914 return ss;
915 }
916 for (ss = d->ssets, end = d->search; ss < end; ss++)
917 if ((ss->lastseen == NULL || ss->lastseen < ancient) &&
918 !(ss->flags & LOCKED))
919 {
920 d->search = ss + 1;
921 FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
922 return ss;
923 }
924
925 /* nobody's old enough?!? -- something's really wrong */
926 FDEBUG(("cannot find victim to replace!\n"));
927 ERR(REG_ASSERT);
928 return NULL;
929}
930