1/*
2 * NFA utilities.
3 * This file is #included by regcomp.c.
4 *
5 * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
6 *
7 * Development of this software was funded, in part, by Cray Research Inc.,
8 * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9 * Corporation, none of whom are responsible for the results. The author
10 * thanks all of them.
11 *
12 * Redistribution and use in source and binary forms -- with or without
13 * modification -- are permitted for any purpose, provided that
14 * redistributions in source form retain this entire copyright notice and
15 * indicate the origin and nature of any modifications.
16 *
17 * I'd appreciate being given credit for this package in the documentation
18 * of software which uses it, but that is not a requirement.
19 *
20 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * src/backend/regex/regc_nfa.c
32 *
33 *
34 * One or two things that technically ought to be in here
35 * are actually in color.c, thanks to some incestuous relationships in
36 * the color chains.
37 */
38
39#define NISERR() VISERR(nfa->v)
40#define NERR(e) VERR(nfa->v, (e))
41
42
43/*
44 * newnfa - set up an NFA
45 */
46static struct nfa * /* the NFA, or NULL */
47newnfa(struct vars *v,
48 struct colormap *cm,
49 struct nfa *parent) /* NULL if primary NFA */
50{
51 struct nfa *nfa;
52
53 nfa = (struct nfa *) MALLOC(sizeof(struct nfa));
54 if (nfa == NULL)
55 {
56 ERR(REG_ESPACE);
57 return NULL;
58 }
59
60 nfa->states = NULL;
61 nfa->slast = NULL;
62 nfa->free = NULL;
63 nfa->nstates = 0;
64 nfa->cm = cm;
65 nfa->v = v;
66 nfa->bos[0] = nfa->bos[1] = COLORLESS;
67 nfa->eos[0] = nfa->eos[1] = COLORLESS;
68 nfa->parent = parent; /* Precedes newfstate so parent is valid. */
69 nfa->post = newfstate(nfa, '@'); /* number 0 */
70 nfa->pre = newfstate(nfa, '>'); /* number 1 */
71
72 nfa->init = newstate(nfa); /* may become invalid later */
73 nfa->final = newstate(nfa);
74 if (ISERR())
75 {
76 freenfa(nfa);
77 return NULL;
78 }
79 rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
80 newarc(nfa, '^', 1, nfa->pre, nfa->init);
81 newarc(nfa, '^', 0, nfa->pre, nfa->init);
82 rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
83 newarc(nfa, '$', 1, nfa->final, nfa->post);
84 newarc(nfa, '$', 0, nfa->final, nfa->post);
85
86 if (ISERR())
87 {
88 freenfa(nfa);
89 return NULL;
90 }
91 return nfa;
92}
93
94/*
95 * freenfa - free an entire NFA
96 */
97static void
98freenfa(struct nfa *nfa)
99{
100 struct state *s;
101
102 while ((s = nfa->states) != NULL)
103 {
104 s->nins = s->nouts = 0; /* don't worry about arcs */
105 freestate(nfa, s);
106 }
107 while ((s = nfa->free) != NULL)
108 {
109 nfa->free = s->next;
110 destroystate(nfa, s);
111 }
112
113 nfa->slast = NULL;
114 nfa->nstates = -1;
115 nfa->pre = NULL;
116 nfa->post = NULL;
117 FREE(nfa);
118}
119
120/*
121 * newstate - allocate an NFA state, with zero flag value
122 */
123static struct state * /* NULL on error */
124newstate(struct nfa *nfa)
125{
126 struct state *s;
127
128 /*
129 * This is a handy place to check for operation cancel during regex
130 * compilation, since no code path will go very long without making a new
131 * state or arc.
132 */
133 if (CANCEL_REQUESTED(nfa->v->re))
134 {
135 NERR(REG_CANCEL);
136 return NULL;
137 }
138
139 if (nfa->free != NULL)
140 {
141 s = nfa->free;
142 nfa->free = s->next;
143 }
144 else
145 {
146 if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
147 {
148 NERR(REG_ETOOBIG);
149 return NULL;
150 }
151 s = (struct state *) MALLOC(sizeof(struct state));
152 if (s == NULL)
153 {
154 NERR(REG_ESPACE);
155 return NULL;
156 }
157 nfa->v->spaceused += sizeof(struct state);
158 s->oas.next = NULL;
159 s->free = NULL;
160 s->noas = 0;
161 }
162
163 assert(nfa->nstates >= 0);
164 s->no = nfa->nstates++;
165 s->flag = 0;
166 if (nfa->states == NULL)
167 nfa->states = s;
168 s->nins = 0;
169 s->ins = NULL;
170 s->nouts = 0;
171 s->outs = NULL;
172 s->tmp = NULL;
173 s->next = NULL;
174 if (nfa->slast != NULL)
175 {
176 assert(nfa->slast->next == NULL);
177 nfa->slast->next = s;
178 }
179 s->prev = nfa->slast;
180 nfa->slast = s;
181 return s;
182}
183
184/*
185 * newfstate - allocate an NFA state with a specified flag value
186 */
187static struct state * /* NULL on error */
188newfstate(struct nfa *nfa, int flag)
189{
190 struct state *s;
191
192 s = newstate(nfa);
193 if (s != NULL)
194 s->flag = (char) flag;
195 return s;
196}
197
198/*
199 * dropstate - delete a state's inarcs and outarcs and free it
200 */
201static void
202dropstate(struct nfa *nfa,
203 struct state *s)
204{
205 struct arc *a;
206
207 while ((a = s->ins) != NULL)
208 freearc(nfa, a);
209 while ((a = s->outs) != NULL)
210 freearc(nfa, a);
211 freestate(nfa, s);
212}
213
214/*
215 * freestate - free a state, which has no in-arcs or out-arcs
216 */
217static void
218freestate(struct nfa *nfa,
219 struct state *s)
220{
221 assert(s != NULL);
222 assert(s->nins == 0 && s->nouts == 0);
223
224 s->no = FREESTATE;
225 s->flag = 0;
226 if (s->next != NULL)
227 s->next->prev = s->prev;
228 else
229 {
230 assert(s == nfa->slast);
231 nfa->slast = s->prev;
232 }
233 if (s->prev != NULL)
234 s->prev->next = s->next;
235 else
236 {
237 assert(s == nfa->states);
238 nfa->states = s->next;
239 }
240 s->prev = NULL;
241 s->next = nfa->free; /* don't delete it, put it on the free list */
242 nfa->free = s;
243}
244
245/*
246 * destroystate - really get rid of an already-freed state
247 */
248static void
249destroystate(struct nfa *nfa,
250 struct state *s)
251{
252 struct arcbatch *ab;
253 struct arcbatch *abnext;
254
255 assert(s->no == FREESTATE);
256 for (ab = s->oas.next; ab != NULL; ab = abnext)
257 {
258 abnext = ab->next;
259 FREE(ab);
260 nfa->v->spaceused -= sizeof(struct arcbatch);
261 }
262 s->ins = NULL;
263 s->outs = NULL;
264 s->next = NULL;
265 FREE(s);
266 nfa->v->spaceused -= sizeof(struct state);
267}
268
269/*
270 * newarc - set up a new arc within an NFA
271 *
272 * This function checks to make sure that no duplicate arcs are created.
273 * In general we never want duplicates.
274 */
275static void
276newarc(struct nfa *nfa,
277 int t,
278 color co,
279 struct state *from,
280 struct state *to)
281{
282 struct arc *a;
283
284 assert(from != NULL && to != NULL);
285
286 /*
287 * This is a handy place to check for operation cancel during regex
288 * compilation, since no code path will go very long without making a new
289 * state or arc.
290 */
291 if (CANCEL_REQUESTED(nfa->v->re))
292 {
293 NERR(REG_CANCEL);
294 return;
295 }
296
297 /* check for duplicate arc, using whichever chain is shorter */
298 if (from->nouts <= to->nins)
299 {
300 for (a = from->outs; a != NULL; a = a->outchain)
301 if (a->to == to && a->co == co && a->type == t)
302 return;
303 }
304 else
305 {
306 for (a = to->ins; a != NULL; a = a->inchain)
307 if (a->from == from && a->co == co && a->type == t)
308 return;
309 }
310
311 /* no dup, so create the arc */
312 createarc(nfa, t, co, from, to);
313}
314
315/*
316 * createarc - create a new arc within an NFA
317 *
318 * This function must *only* be used after verifying that there is no existing
319 * identical arc (same type/color/from/to).
320 */
321static void
322createarc(struct nfa *nfa,
323 int t,
324 color co,
325 struct state *from,
326 struct state *to)
327{
328 struct arc *a;
329
330 /* the arc is physically allocated within its from-state */
331 a = allocarc(nfa, from);
332 if (NISERR())
333 return;
334 assert(a != NULL);
335
336 a->type = t;
337 a->co = co;
338 a->to = to;
339 a->from = from;
340
341 /*
342 * Put the new arc on the beginning, not the end, of the chains; it's
343 * simpler here, and freearc() is the same cost either way. See also the
344 * logic in moveins() and its cohorts, as well as fixempties().
345 */
346 a->inchain = to->ins;
347 a->inchainRev = NULL;
348 if (to->ins)
349 to->ins->inchainRev = a;
350 to->ins = a;
351 a->outchain = from->outs;
352 a->outchainRev = NULL;
353 if (from->outs)
354 from->outs->outchainRev = a;
355 from->outs = a;
356
357 from->nouts++;
358 to->nins++;
359
360 if (COLORED(a) && nfa->parent == NULL)
361 colorchain(nfa->cm, a);
362}
363
364/*
365 * allocarc - allocate a new out-arc within a state
366 */
367static struct arc * /* NULL for failure */
368allocarc(struct nfa *nfa,
369 struct state *s)
370{
371 struct arc *a;
372
373 /* shortcut */
374 if (s->free == NULL && s->noas < ABSIZE)
375 {
376 a = &s->oas.a[s->noas];
377 s->noas++;
378 return a;
379 }
380
381 /* if none at hand, get more */
382 if (s->free == NULL)
383 {
384 struct arcbatch *newAb;
385 int i;
386
387 if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
388 {
389 NERR(REG_ETOOBIG);
390 return NULL;
391 }
392 newAb = (struct arcbatch *) MALLOC(sizeof(struct arcbatch));
393 if (newAb == NULL)
394 {
395 NERR(REG_ESPACE);
396 return NULL;
397 }
398 nfa->v->spaceused += sizeof(struct arcbatch);
399 newAb->next = s->oas.next;
400 s->oas.next = newAb;
401
402 for (i = 0; i < ABSIZE; i++)
403 {
404 newAb->a[i].type = 0;
405 newAb->a[i].freechain = &newAb->a[i + 1];
406 }
407 newAb->a[ABSIZE - 1].freechain = NULL;
408 s->free = &newAb->a[0];
409 }
410 assert(s->free != NULL);
411
412 a = s->free;
413 s->free = a->freechain;
414 return a;
415}
416
417/*
418 * freearc - free an arc
419 */
420static void
421freearc(struct nfa *nfa,
422 struct arc *victim)
423{
424 struct state *from = victim->from;
425 struct state *to = victim->to;
426 struct arc *predecessor;
427
428 assert(victim->type != 0);
429
430 /* take it off color chain if necessary */
431 if (COLORED(victim) && nfa->parent == NULL)
432 uncolorchain(nfa->cm, victim);
433
434 /* take it off source's out-chain */
435 assert(from != NULL);
436 predecessor = victim->outchainRev;
437 if (predecessor == NULL)
438 {
439 assert(from->outs == victim);
440 from->outs = victim->outchain;
441 }
442 else
443 {
444 assert(predecessor->outchain == victim);
445 predecessor->outchain = victim->outchain;
446 }
447 if (victim->outchain != NULL)
448 {
449 assert(victim->outchain->outchainRev == victim);
450 victim->outchain->outchainRev = predecessor;
451 }
452 from->nouts--;
453
454 /* take it off target's in-chain */
455 assert(to != NULL);
456 predecessor = victim->inchainRev;
457 if (predecessor == NULL)
458 {
459 assert(to->ins == victim);
460 to->ins = victim->inchain;
461 }
462 else
463 {
464 assert(predecessor->inchain == victim);
465 predecessor->inchain = victim->inchain;
466 }
467 if (victim->inchain != NULL)
468 {
469 assert(victim->inchain->inchainRev == victim);
470 victim->inchain->inchainRev = predecessor;
471 }
472 to->nins--;
473
474 /* clean up and place on from-state's free list */
475 victim->type = 0;
476 victim->from = NULL; /* precautions... */
477 victim->to = NULL;
478 victim->inchain = NULL;
479 victim->inchainRev = NULL;
480 victim->outchain = NULL;
481 victim->outchainRev = NULL;
482 victim->freechain = from->free;
483 from->free = victim;
484}
485
486/*
487 * changearctarget - flip an arc to have a different to state
488 *
489 * Caller must have verified that there is no pre-existing duplicate arc.
490 *
491 * Note that because we store arcs in their from state, we can't easily have
492 * a similar changearcsource function.
493 */
494static void
495changearctarget(struct arc *a, struct state *newto)
496{
497 struct state *oldto = a->to;
498 struct arc *predecessor;
499
500 assert(oldto != newto);
501
502 /* take it off old target's in-chain */
503 assert(oldto != NULL);
504 predecessor = a->inchainRev;
505 if (predecessor == NULL)
506 {
507 assert(oldto->ins == a);
508 oldto->ins = a->inchain;
509 }
510 else
511 {
512 assert(predecessor->inchain == a);
513 predecessor->inchain = a->inchain;
514 }
515 if (a->inchain != NULL)
516 {
517 assert(a->inchain->inchainRev == a);
518 a->inchain->inchainRev = predecessor;
519 }
520 oldto->nins--;
521
522 a->to = newto;
523
524 /* prepend it to new target's in-chain */
525 a->inchain = newto->ins;
526 a->inchainRev = NULL;
527 if (newto->ins)
528 newto->ins->inchainRev = a;
529 newto->ins = a;
530 newto->nins++;
531}
532
533/*
534 * hasnonemptyout - Does state have a non-EMPTY out arc?
535 */
536static int
537hasnonemptyout(struct state *s)
538{
539 struct arc *a;
540
541 for (a = s->outs; a != NULL; a = a->outchain)
542 {
543 if (a->type != EMPTY)
544 return 1;
545 }
546 return 0;
547}
548
549/*
550 * findarc - find arc, if any, from given source with given type and color
551 * If there is more than one such arc, the result is random.
552 */
553static struct arc *
554findarc(struct state *s,
555 int type,
556 color co)
557{
558 struct arc *a;
559
560 for (a = s->outs; a != NULL; a = a->outchain)
561 if (a->type == type && a->co == co)
562 return a;
563 return NULL;
564}
565
566/*
567 * cparc - allocate a new arc within an NFA, copying details from old one
568 */
569static void
570cparc(struct nfa *nfa,
571 struct arc *oa,
572 struct state *from,
573 struct state *to)
574{
575 newarc(nfa, oa->type, oa->co, from, to);
576}
577
578/*
579 * sortins - sort the in arcs of a state by from/color/type
580 */
581static void
582sortins(struct nfa *nfa,
583 struct state *s)
584{
585 struct arc **sortarray;
586 struct arc *a;
587 int n = s->nins;
588 int i;
589
590 if (n <= 1)
591 return; /* nothing to do */
592 /* make an array of arc pointers ... */
593 sortarray = (struct arc **) MALLOC(n * sizeof(struct arc *));
594 if (sortarray == NULL)
595 {
596 NERR(REG_ESPACE);
597 return;
598 }
599 i = 0;
600 for (a = s->ins; a != NULL; a = a->inchain)
601 sortarray[i++] = a;
602 assert(i == n);
603 /* ... sort the array */
604 qsort(sortarray, n, sizeof(struct arc *), sortins_cmp);
605 /* ... and rebuild arc list in order */
606 /* it seems worth special-casing first and last items to simplify loop */
607 a = sortarray[0];
608 s->ins = a;
609 a->inchain = sortarray[1];
610 a->inchainRev = NULL;
611 for (i = 1; i < n - 1; i++)
612 {
613 a = sortarray[i];
614 a->inchain = sortarray[i + 1];
615 a->inchainRev = sortarray[i - 1];
616 }
617 a = sortarray[i];
618 a->inchain = NULL;
619 a->inchainRev = sortarray[i - 1];
620 FREE(sortarray);
621}
622
623static int
624sortins_cmp(const void *a, const void *b)
625{
626 const struct arc *aa = *((const struct arc *const *) a);
627 const struct arc *bb = *((const struct arc *const *) b);
628
629 /* we check the fields in the order they are most likely to be different */
630 if (aa->from->no < bb->from->no)
631 return -1;
632 if (aa->from->no > bb->from->no)
633 return 1;
634 if (aa->co < bb->co)
635 return -1;
636 if (aa->co > bb->co)
637 return 1;
638 if (aa->type < bb->type)
639 return -1;
640 if (aa->type > bb->type)
641 return 1;
642 return 0;
643}
644
645/*
646 * sortouts - sort the out arcs of a state by to/color/type
647 */
648static void
649sortouts(struct nfa *nfa,
650 struct state *s)
651{
652 struct arc **sortarray;
653 struct arc *a;
654 int n = s->nouts;
655 int i;
656
657 if (n <= 1)
658 return; /* nothing to do */
659 /* make an array of arc pointers ... */
660 sortarray = (struct arc **) MALLOC(n * sizeof(struct arc *));
661 if (sortarray == NULL)
662 {
663 NERR(REG_ESPACE);
664 return;
665 }
666 i = 0;
667 for (a = s->outs; a != NULL; a = a->outchain)
668 sortarray[i++] = a;
669 assert(i == n);
670 /* ... sort the array */
671 qsort(sortarray, n, sizeof(struct arc *), sortouts_cmp);
672 /* ... and rebuild arc list in order */
673 /* it seems worth special-casing first and last items to simplify loop */
674 a = sortarray[0];
675 s->outs = a;
676 a->outchain = sortarray[1];
677 a->outchainRev = NULL;
678 for (i = 1; i < n - 1; i++)
679 {
680 a = sortarray[i];
681 a->outchain = sortarray[i + 1];
682 a->outchainRev = sortarray[i - 1];
683 }
684 a = sortarray[i];
685 a->outchain = NULL;
686 a->outchainRev = sortarray[i - 1];
687 FREE(sortarray);
688}
689
690static int
691sortouts_cmp(const void *a, const void *b)
692{
693 const struct arc *aa = *((const struct arc *const *) a);
694 const struct arc *bb = *((const struct arc *const *) b);
695
696 /* we check the fields in the order they are most likely to be different */
697 if (aa->to->no < bb->to->no)
698 return -1;
699 if (aa->to->no > bb->to->no)
700 return 1;
701 if (aa->co < bb->co)
702 return -1;
703 if (aa->co > bb->co)
704 return 1;
705 if (aa->type < bb->type)
706 return -1;
707 if (aa->type > bb->type)
708 return 1;
709 return 0;
710}
711
712/*
713 * Common decision logic about whether to use arc-by-arc operations or
714 * sort/merge. If there's just a few source arcs we cannot recoup the
715 * cost of sorting the destination arc list, no matter how large it is.
716 * Otherwise, limit the number of arc-by-arc comparisons to about 1000
717 * (a somewhat arbitrary choice, but the breakeven point would probably
718 * be machine dependent anyway).
719 */
720#define BULK_ARC_OP_USE_SORT(nsrcarcs, ndestarcs) \
721 ((nsrcarcs) < 4 ? 0 : ((nsrcarcs) > 32 || (ndestarcs) > 32))
722
723/*
724 * moveins - move all in arcs of a state to another state
725 *
726 * You might think this could be done better by just updating the
727 * existing arcs, and you would be right if it weren't for the need
728 * for duplicate suppression, which makes it easier to just make new
729 * ones to exploit the suppression built into newarc.
730 *
731 * However, if we have a whole lot of arcs to deal with, retail duplicate
732 * checks become too slow. In that case we proceed by sorting and merging
733 * the arc lists, and then we can indeed just update the arcs in-place.
734 */
735static void
736moveins(struct nfa *nfa,
737 struct state *oldState,
738 struct state *newState)
739{
740 assert(oldState != newState);
741
742 if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins))
743 {
744 /* With not too many arcs, just do them one at a time */
745 struct arc *a;
746
747 while ((a = oldState->ins) != NULL)
748 {
749 cparc(nfa, a, a->from, newState);
750 freearc(nfa, a);
751 }
752 }
753 else
754 {
755 /*
756 * With many arcs, use a sort-merge approach. Note changearctarget()
757 * will put the arc onto the front of newState's chain, so it does not
758 * break our walk through the sorted part of the chain.
759 */
760 struct arc *oa;
761 struct arc *na;
762
763 /*
764 * Because we bypass newarc() in this code path, we'd better include a
765 * cancel check.
766 */
767 if (CANCEL_REQUESTED(nfa->v->re))
768 {
769 NERR(REG_CANCEL);
770 return;
771 }
772
773 sortins(nfa, oldState);
774 sortins(nfa, newState);
775 if (NISERR())
776 return; /* might have failed to sort */
777 oa = oldState->ins;
778 na = newState->ins;
779 while (oa != NULL && na != NULL)
780 {
781 struct arc *a = oa;
782
783 switch (sortins_cmp(&oa, &na))
784 {
785 case -1:
786 /* newState does not have anything matching oa */
787 oa = oa->inchain;
788
789 /*
790 * Rather than doing createarc+freearc, we can just unlink
791 * and relink the existing arc struct.
792 */
793 changearctarget(a, newState);
794 break;
795 case 0:
796 /* match, advance in both lists */
797 oa = oa->inchain;
798 na = na->inchain;
799 /* ... and drop duplicate arc from oldState */
800 freearc(nfa, a);
801 break;
802 case +1:
803 /* advance only na; oa might have a match later */
804 na = na->inchain;
805 break;
806 default:
807 assert(NOTREACHED);
808 }
809 }
810 while (oa != NULL)
811 {
812 /* newState does not have anything matching oa */
813 struct arc *a = oa;
814
815 oa = oa->inchain;
816 changearctarget(a, newState);
817 }
818 }
819
820 assert(oldState->nins == 0);
821 assert(oldState->ins == NULL);
822}
823
824/*
825 * copyins - copy in arcs of a state to another state
826 */
827static void
828copyins(struct nfa *nfa,
829 struct state *oldState,
830 struct state *newState)
831{
832 assert(oldState != newState);
833
834 if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins))
835 {
836 /* With not too many arcs, just do them one at a time */
837 struct arc *a;
838
839 for (a = oldState->ins; a != NULL; a = a->inchain)
840 cparc(nfa, a, a->from, newState);
841 }
842 else
843 {
844 /*
845 * With many arcs, use a sort-merge approach. Note that createarc()
846 * will put new arcs onto the front of newState's chain, so it does
847 * not break our walk through the sorted part of the chain.
848 */
849 struct arc *oa;
850 struct arc *na;
851
852 /*
853 * Because we bypass newarc() in this code path, we'd better include a
854 * cancel check.
855 */
856 if (CANCEL_REQUESTED(nfa->v->re))
857 {
858 NERR(REG_CANCEL);
859 return;
860 }
861
862 sortins(nfa, oldState);
863 sortins(nfa, newState);
864 if (NISERR())
865 return; /* might have failed to sort */
866 oa = oldState->ins;
867 na = newState->ins;
868 while (oa != NULL && na != NULL)
869 {
870 struct arc *a = oa;
871
872 switch (sortins_cmp(&oa, &na))
873 {
874 case -1:
875 /* newState does not have anything matching oa */
876 oa = oa->inchain;
877 createarc(nfa, a->type, a->co, a->from, newState);
878 break;
879 case 0:
880 /* match, advance in both lists */
881 oa = oa->inchain;
882 na = na->inchain;
883 break;
884 case +1:
885 /* advance only na; oa might have a match later */
886 na = na->inchain;
887 break;
888 default:
889 assert(NOTREACHED);
890 }
891 }
892 while (oa != NULL)
893 {
894 /* newState does not have anything matching oa */
895 struct arc *a = oa;
896
897 oa = oa->inchain;
898 createarc(nfa, a->type, a->co, a->from, newState);
899 }
900 }
901}
902
903/*
904 * mergeins - merge a list of inarcs into a state
905 *
906 * This is much like copyins, but the source arcs are listed in an array,
907 * and are not guaranteed unique. It's okay to clobber the array contents.
908 */
909static void
910mergeins(struct nfa *nfa,
911 struct state *s,
912 struct arc **arcarray,
913 int arccount)
914{
915 struct arc *na;
916 int i;
917 int j;
918
919 if (arccount <= 0)
920 return;
921
922 /*
923 * Because we bypass newarc() in this code path, we'd better include a
924 * cancel check.
925 */
926 if (CANCEL_REQUESTED(nfa->v->re))
927 {
928 NERR(REG_CANCEL);
929 return;
930 }
931
932 /* Sort existing inarcs as well as proposed new ones */
933 sortins(nfa, s);
934 if (NISERR())
935 return; /* might have failed to sort */
936
937 qsort(arcarray, arccount, sizeof(struct arc *), sortins_cmp);
938
939 /*
940 * arcarray very likely includes dups, so we must eliminate them. (This
941 * could be folded into the next loop, but it's not worth the trouble.)
942 */
943 j = 0;
944 for (i = 1; i < arccount; i++)
945 {
946 switch (sortins_cmp(&arcarray[j], &arcarray[i]))
947 {
948 case -1:
949 /* non-dup */
950 arcarray[++j] = arcarray[i];
951 break;
952 case 0:
953 /* dup */
954 break;
955 default:
956 /* trouble */
957 assert(NOTREACHED);
958 }
959 }
960 arccount = j + 1;
961
962 /*
963 * Now merge into s' inchain. Note that createarc() will put new arcs
964 * onto the front of s's chain, so it does not break our walk through the
965 * sorted part of the chain.
966 */
967 i = 0;
968 na = s->ins;
969 while (i < arccount && na != NULL)
970 {
971 struct arc *a = arcarray[i];
972
973 switch (sortins_cmp(&a, &na))
974 {
975 case -1:
976 /* s does not have anything matching a */
977 createarc(nfa, a->type, a->co, a->from, s);
978 i++;
979 break;
980 case 0:
981 /* match, advance in both lists */
982 i++;
983 na = na->inchain;
984 break;
985 case +1:
986 /* advance only na; array might have a match later */
987 na = na->inchain;
988 break;
989 default:
990 assert(NOTREACHED);
991 }
992 }
993 while (i < arccount)
994 {
995 /* s does not have anything matching a */
996 struct arc *a = arcarray[i];
997
998 createarc(nfa, a->type, a->co, a->from, s);
999 i++;
1000 }
1001}
1002
1003/*
1004 * moveouts - move all out arcs of a state to another state
1005 */
1006static void
1007moveouts(struct nfa *nfa,
1008 struct state *oldState,
1009 struct state *newState)
1010{
1011 assert(oldState != newState);
1012
1013 if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts))
1014 {
1015 /* With not too many arcs, just do them one at a time */
1016 struct arc *a;
1017
1018 while ((a = oldState->outs) != NULL)
1019 {
1020 cparc(nfa, a, newState, a->to);
1021 freearc(nfa, a);
1022 }
1023 }
1024 else
1025 {
1026 /*
1027 * With many arcs, use a sort-merge approach. Note that createarc()
1028 * will put new arcs onto the front of newState's chain, so it does
1029 * not break our walk through the sorted part of the chain.
1030 */
1031 struct arc *oa;
1032 struct arc *na;
1033
1034 /*
1035 * Because we bypass newarc() in this code path, we'd better include a
1036 * cancel check.
1037 */
1038 if (CANCEL_REQUESTED(nfa->v->re))
1039 {
1040 NERR(REG_CANCEL);
1041 return;
1042 }
1043
1044 sortouts(nfa, oldState);
1045 sortouts(nfa, newState);
1046 if (NISERR())
1047 return; /* might have failed to sort */
1048 oa = oldState->outs;
1049 na = newState->outs;
1050 while (oa != NULL && na != NULL)
1051 {
1052 struct arc *a = oa;
1053
1054 switch (sortouts_cmp(&oa, &na))
1055 {
1056 case -1:
1057 /* newState does not have anything matching oa */
1058 oa = oa->outchain;
1059 createarc(nfa, a->type, a->co, newState, a->to);
1060 freearc(nfa, a);
1061 break;
1062 case 0:
1063 /* match, advance in both lists */
1064 oa = oa->outchain;
1065 na = na->outchain;
1066 /* ... and drop duplicate arc from oldState */
1067 freearc(nfa, a);
1068 break;
1069 case +1:
1070 /* advance only na; oa might have a match later */
1071 na = na->outchain;
1072 break;
1073 default:
1074 assert(NOTREACHED);
1075 }
1076 }
1077 while (oa != NULL)
1078 {
1079 /* newState does not have anything matching oa */
1080 struct arc *a = oa;
1081
1082 oa = oa->outchain;
1083 createarc(nfa, a->type, a->co, newState, a->to);
1084 freearc(nfa, a);
1085 }
1086 }
1087
1088 assert(oldState->nouts == 0);
1089 assert(oldState->outs == NULL);
1090}
1091
1092/*
1093 * copyouts - copy out arcs of a state to another state
1094 */
1095static void
1096copyouts(struct nfa *nfa,
1097 struct state *oldState,
1098 struct state *newState)
1099{
1100 assert(oldState != newState);
1101
1102 if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts))
1103 {
1104 /* With not too many arcs, just do them one at a time */
1105 struct arc *a;
1106
1107 for (a = oldState->outs; a != NULL; a = a->outchain)
1108 cparc(nfa, a, newState, a->to);
1109 }
1110 else
1111 {
1112 /*
1113 * With many arcs, use a sort-merge approach. Note that createarc()
1114 * will put new arcs onto the front of newState's chain, so it does
1115 * not break our walk through the sorted part of the chain.
1116 */
1117 struct arc *oa;
1118 struct arc *na;
1119
1120 /*
1121 * Because we bypass newarc() in this code path, we'd better include a
1122 * cancel check.
1123 */
1124 if (CANCEL_REQUESTED(nfa->v->re))
1125 {
1126 NERR(REG_CANCEL);
1127 return;
1128 }
1129
1130 sortouts(nfa, oldState);
1131 sortouts(nfa, newState);
1132 if (NISERR())
1133 return; /* might have failed to sort */
1134 oa = oldState->outs;
1135 na = newState->outs;
1136 while (oa != NULL && na != NULL)
1137 {
1138 struct arc *a = oa;
1139
1140 switch (sortouts_cmp(&oa, &na))
1141 {
1142 case -1:
1143 /* newState does not have anything matching oa */
1144 oa = oa->outchain;
1145 createarc(nfa, a->type, a->co, newState, a->to);
1146 break;
1147 case 0:
1148 /* match, advance in both lists */
1149 oa = oa->outchain;
1150 na = na->outchain;
1151 break;
1152 case +1:
1153 /* advance only na; oa might have a match later */
1154 na = na->outchain;
1155 break;
1156 default:
1157 assert(NOTREACHED);
1158 }
1159 }
1160 while (oa != NULL)
1161 {
1162 /* newState does not have anything matching oa */
1163 struct arc *a = oa;
1164
1165 oa = oa->outchain;
1166 createarc(nfa, a->type, a->co, newState, a->to);
1167 }
1168 }
1169}
1170
1171/*
1172 * cloneouts - copy out arcs of a state to another state pair, modifying type
1173 */
1174static void
1175cloneouts(struct nfa *nfa,
1176 struct state *old,
1177 struct state *from,
1178 struct state *to,
1179 int type)
1180{
1181 struct arc *a;
1182
1183 assert(old != from);
1184
1185 for (a = old->outs; a != NULL; a = a->outchain)
1186 newarc(nfa, type, a->co, from, to);
1187}
1188
1189/*
1190 * delsub - delete a sub-NFA, updating subre pointers if necessary
1191 *
1192 * This uses a recursive traversal of the sub-NFA, marking already-seen
1193 * states using their tmp pointer.
1194 */
1195static void
1196delsub(struct nfa *nfa,
1197 struct state *lp, /* the sub-NFA goes from here... */
1198 struct state *rp) /* ...to here, *not* inclusive */
1199{
1200 assert(lp != rp);
1201
1202 rp->tmp = rp; /* mark end */
1203
1204 deltraverse(nfa, lp, lp);
1205 if (NISERR())
1206 return; /* asserts might not hold after failure */
1207 assert(lp->nouts == 0 && rp->nins == 0); /* did the job */
1208 assert(lp->no != FREESTATE && rp->no != FREESTATE); /* no more */
1209
1210 rp->tmp = NULL; /* unmark end */
1211 lp->tmp = NULL; /* and begin, marked by deltraverse */
1212}
1213
1214/*
1215 * deltraverse - the recursive heart of delsub
1216 * This routine's basic job is to destroy all out-arcs of the state.
1217 */
1218static void
1219deltraverse(struct nfa *nfa,
1220 struct state *leftend,
1221 struct state *s)
1222{
1223 struct arc *a;
1224 struct state *to;
1225
1226 /* Since this is recursive, it could be driven to stack overflow */
1227 if (STACK_TOO_DEEP(nfa->v->re))
1228 {
1229 NERR(REG_ETOOBIG);
1230 return;
1231 }
1232
1233 if (s->nouts == 0)
1234 return; /* nothing to do */
1235 if (s->tmp != NULL)
1236 return; /* already in progress */
1237
1238 s->tmp = s; /* mark as in progress */
1239
1240 while ((a = s->outs) != NULL)
1241 {
1242 to = a->to;
1243 deltraverse(nfa, leftend, to);
1244 if (NISERR())
1245 return; /* asserts might not hold after failure */
1246 assert(to->nouts == 0 || to->tmp != NULL);
1247 freearc(nfa, a);
1248 if (to->nins == 0 && to->tmp == NULL)
1249 {
1250 assert(to->nouts == 0);
1251 freestate(nfa, to);
1252 }
1253 }
1254
1255 assert(s->no != FREESTATE); /* we're still here */
1256 assert(s == leftend || s->nins != 0); /* and still reachable */
1257 assert(s->nouts == 0); /* but have no outarcs */
1258
1259 s->tmp = NULL; /* we're done here */
1260}
1261
1262/*
1263 * dupnfa - duplicate sub-NFA
1264 *
1265 * Another recursive traversal, this time using tmp to point to duplicates
1266 * as well as mark already-seen states. (You knew there was a reason why
1267 * it's a state pointer, didn't you? :-))
1268 */
1269static void
1270dupnfa(struct nfa *nfa,
1271 struct state *start, /* duplicate of subNFA starting here */
1272 struct state *stop, /* and stopping here */
1273 struct state *from, /* stringing duplicate from here */
1274 struct state *to) /* to here */
1275{
1276 if (start == stop)
1277 {
1278 newarc(nfa, EMPTY, 0, from, to);
1279 return;
1280 }
1281
1282 stop->tmp = to;
1283 duptraverse(nfa, start, from);
1284 /* done, except for clearing out the tmp pointers */
1285
1286 stop->tmp = NULL;
1287 cleartraverse(nfa, start);
1288}
1289
1290/*
1291 * duptraverse - recursive heart of dupnfa
1292 */
1293static void
1294duptraverse(struct nfa *nfa,
1295 struct state *s,
1296 struct state *stmp) /* s's duplicate, or NULL */
1297{
1298 struct arc *a;
1299
1300 /* Since this is recursive, it could be driven to stack overflow */
1301 if (STACK_TOO_DEEP(nfa->v->re))
1302 {
1303 NERR(REG_ETOOBIG);
1304 return;
1305 }
1306
1307 if (s->tmp != NULL)
1308 return; /* already done */
1309
1310 s->tmp = (stmp == NULL) ? newstate(nfa) : stmp;
1311 if (s->tmp == NULL)
1312 {
1313 assert(NISERR());
1314 return;
1315 }
1316
1317 for (a = s->outs; a != NULL && !NISERR(); a = a->outchain)
1318 {
1319 duptraverse(nfa, a->to, (struct state *) NULL);
1320 if (NISERR())
1321 break;
1322 assert(a->to->tmp != NULL);
1323 cparc(nfa, a, s->tmp, a->to->tmp);
1324 }
1325}
1326
1327/*
1328 * cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
1329 */
1330static void
1331cleartraverse(struct nfa *nfa,
1332 struct state *s)
1333{
1334 struct arc *a;
1335
1336 /* Since this is recursive, it could be driven to stack overflow */
1337 if (STACK_TOO_DEEP(nfa->v->re))
1338 {
1339 NERR(REG_ETOOBIG);
1340 return;
1341 }
1342
1343 if (s->tmp == NULL)
1344 return;
1345 s->tmp = NULL;
1346
1347 for (a = s->outs; a != NULL; a = a->outchain)
1348 cleartraverse(nfa, a->to);
1349}
1350
1351/*
1352 * single_color_transition - does getting from s1 to s2 cross one PLAIN arc?
1353 *
1354 * If traversing from s1 to s2 requires a single PLAIN match (possibly of any
1355 * of a set of colors), return a state whose outarc list contains only PLAIN
1356 * arcs of those color(s). Otherwise return NULL.
1357 *
1358 * This is used before optimizing the NFA, so there may be EMPTY arcs, which
1359 * we should ignore; the possibility of an EMPTY is why the result state could
1360 * be different from s1.
1361 *
1362 * It's worth troubling to handle multiple parallel PLAIN arcs here because a
1363 * bracket construct such as [abc] might yield either one or several parallel
1364 * PLAIN arcs depending on earlier atoms in the expression. We'd rather that
1365 * that implementation detail not create user-visible performance differences.
1366 */
1367static struct state *
1368single_color_transition(struct state *s1, struct state *s2)
1369{
1370 struct arc *a;
1371
1372 /* Ignore leading EMPTY arc, if any */
1373 if (s1->nouts == 1 && s1->outs->type == EMPTY)
1374 s1 = s1->outs->to;
1375 /* Likewise for any trailing EMPTY arc */
1376 if (s2->nins == 1 && s2->ins->type == EMPTY)
1377 s2 = s2->ins->from;
1378 /* Perhaps we could have a single-state loop in between, if so reject */
1379 if (s1 == s2)
1380 return NULL;
1381 /* s1 must have at least one outarc... */
1382 if (s1->outs == NULL)
1383 return NULL;
1384 /* ... and they must all be PLAIN arcs to s2 */
1385 for (a = s1->outs; a != NULL; a = a->outchain)
1386 {
1387 if (a->type != PLAIN || a->to != s2)
1388 return NULL;
1389 }
1390 /* OK, return s1 as the possessor of the relevant outarcs */
1391 return s1;
1392}
1393
1394/*
1395 * specialcolors - fill in special colors for an NFA
1396 */
1397static void
1398specialcolors(struct nfa *nfa)
1399{
1400 /* false colors for BOS, BOL, EOS, EOL */
1401 if (nfa->parent == NULL)
1402 {
1403 nfa->bos[0] = pseudocolor(nfa->cm);
1404 nfa->bos[1] = pseudocolor(nfa->cm);
1405 nfa->eos[0] = pseudocolor(nfa->cm);
1406 nfa->eos[1] = pseudocolor(nfa->cm);
1407 }
1408 else
1409 {
1410 assert(nfa->parent->bos[0] != COLORLESS);
1411 nfa->bos[0] = nfa->parent->bos[0];
1412 assert(nfa->parent->bos[1] != COLORLESS);
1413 nfa->bos[1] = nfa->parent->bos[1];
1414 assert(nfa->parent->eos[0] != COLORLESS);
1415 nfa->eos[0] = nfa->parent->eos[0];
1416 assert(nfa->parent->eos[1] != COLORLESS);
1417 nfa->eos[1] = nfa->parent->eos[1];
1418 }
1419}
1420
1421/*
1422 * optimize - optimize an NFA
1423 *
1424 * The main goal of this function is not so much "optimization" (though it
1425 * does try to get rid of useless NFA states) as reducing the NFA to a form
1426 * the regex executor can handle. The executor, and indeed the cNFA format
1427 * that is its input, can only handle PLAIN and LACON arcs. The output of
1428 * the regex parser also includes EMPTY (do-nothing) arcs, as well as
1429 * ^, $, AHEAD, and BEHIND constraint arcs, which we must get rid of here.
1430 * We first get rid of EMPTY arcs and then deal with the constraint arcs.
1431 * The hardest part of either job is to get rid of circular loops of the
1432 * target arc type. We would have to do that in any case, though, as such a
1433 * loop would otherwise allow the executor to cycle through the loop endlessly
1434 * without making any progress in the input string.
1435 */
1436static long /* re_info bits */
1437optimize(struct nfa *nfa,
1438 FILE *f) /* for debug output; NULL none */
1439{
1440#ifdef REG_DEBUG
1441 int verbose = (f != NULL) ? 1 : 0;
1442
1443 if (verbose)
1444 fprintf(f, "\ninitial cleanup:\n");
1445#endif
1446 cleanup(nfa); /* may simplify situation */
1447#ifdef REG_DEBUG
1448 if (verbose)
1449 dumpnfa(nfa, f);
1450 if (verbose)
1451 fprintf(f, "\nempties:\n");
1452#endif
1453 fixempties(nfa, f); /* get rid of EMPTY arcs */
1454#ifdef REG_DEBUG
1455 if (verbose)
1456 fprintf(f, "\nconstraints:\n");
1457#endif
1458 fixconstraintloops(nfa, f); /* get rid of constraint loops */
1459 pullback(nfa, f); /* pull back constraints backward */
1460 pushfwd(nfa, f); /* push fwd constraints forward */
1461#ifdef REG_DEBUG
1462 if (verbose)
1463 fprintf(f, "\nfinal cleanup:\n");
1464#endif
1465 cleanup(nfa); /* final tidying */
1466#ifdef REG_DEBUG
1467 if (verbose)
1468 dumpnfa(nfa, f);
1469#endif
1470 return analyze(nfa); /* and analysis */
1471}
1472
1473/*
1474 * pullback - pull back constraints backward to eliminate them
1475 */
1476static void
1477pullback(struct nfa *nfa,
1478 FILE *f) /* for debug output; NULL none */
1479{
1480 struct state *s;
1481 struct state *nexts;
1482 struct arc *a;
1483 struct arc *nexta;
1484 struct state *intermediates;
1485 int progress;
1486
1487 /* find and pull until there are no more */
1488 do
1489 {
1490 progress = 0;
1491 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
1492 {
1493 nexts = s->next;
1494 intermediates = NULL;
1495 for (a = s->outs; a != NULL && !NISERR(); a = nexta)
1496 {
1497 nexta = a->outchain;
1498 if (a->type == '^' || a->type == BEHIND)
1499 if (pull(nfa, a, &intermediates))
1500 progress = 1;
1501 }
1502 /* clear tmp fields of intermediate states created here */
1503 while (intermediates != NULL)
1504 {
1505 struct state *ns = intermediates->tmp;
1506
1507 intermediates->tmp = NULL;
1508 intermediates = ns;
1509 }
1510 /* if s is now useless, get rid of it */
1511 if ((s->nins == 0 || s->nouts == 0) && !s->flag)
1512 dropstate(nfa, s);
1513 }
1514 if (progress && f != NULL)
1515 dumpnfa(nfa, f);
1516 } while (progress && !NISERR());
1517 if (NISERR())
1518 return;
1519
1520 /*
1521 * Any ^ constraints we were able to pull to the start state can now be
1522 * replaced by PLAIN arcs referencing the BOS or BOL colors. There should
1523 * be no other ^ or BEHIND arcs left in the NFA, though we do not check
1524 * that here (compact() will fail if so).
1525 */
1526 for (a = nfa->pre->outs; a != NULL; a = nexta)
1527 {
1528 nexta = a->outchain;
1529 if (a->type == '^')
1530 {
1531 assert(a->co == 0 || a->co == 1);
1532 newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to);
1533 freearc(nfa, a);
1534 }
1535 }
1536}
1537
1538/*
1539 * pull - pull a back constraint backward past its source state
1540 *
1541 * Returns 1 if successful (which it always is unless the source is the
1542 * start state or we have an internal error), 0 if nothing happened.
1543 *
1544 * A significant property of this function is that it deletes no pre-existing
1545 * states, and no outarcs of the constraint's from state other than the given
1546 * constraint arc. This makes the loops in pullback() safe, at the cost that
1547 * we may leave useless states behind. Therefore, we leave it to pullback()
1548 * to delete such states.
1549 *
1550 * If the from state has multiple back-constraint outarcs, and/or multiple
1551 * compatible constraint inarcs, we only need to create one new intermediate
1552 * state per combination of predecessor and successor states. *intermediates
1553 * points to a list of such intermediate states for this from state (chained
1554 * through their tmp fields).
1555 */
1556static int
1557pull(struct nfa *nfa,
1558 struct arc *con,
1559 struct state **intermediates)
1560{
1561 struct state *from = con->from;
1562 struct state *to = con->to;
1563 struct arc *a;
1564 struct arc *nexta;
1565 struct state *s;
1566
1567 assert(from != to); /* should have gotten rid of this earlier */
1568 if (from->flag) /* can't pull back beyond start */
1569 return 0;
1570 if (from->nins == 0)
1571 { /* unreachable */
1572 freearc(nfa, con);
1573 return 1;
1574 }
1575
1576 /*
1577 * First, clone from state if necessary to avoid other outarcs. This may
1578 * seem wasteful, but it simplifies the logic, and we'll get rid of the
1579 * clone state again at the bottom.
1580 */
1581 if (from->nouts > 1)
1582 {
1583 s = newstate(nfa);
1584 if (NISERR())
1585 return 0;
1586 copyins(nfa, from, s); /* duplicate inarcs */
1587 cparc(nfa, con, s, to); /* move constraint arc */
1588 freearc(nfa, con);
1589 if (NISERR())
1590 return 0;
1591 from = s;
1592 con = from->outs;
1593 }
1594 assert(from->nouts == 1);
1595
1596 /* propagate the constraint into the from state's inarcs */
1597 for (a = from->ins; a != NULL && !NISERR(); a = nexta)
1598 {
1599 nexta = a->inchain;
1600 switch (combine(con, a))
1601 {
1602 case INCOMPATIBLE: /* destroy the arc */
1603 freearc(nfa, a);
1604 break;
1605 case SATISFIED: /* no action needed */
1606 break;
1607 case COMPATIBLE: /* swap the two arcs, more or less */
1608 /* need an intermediate state, but might have one already */
1609 for (s = *intermediates; s != NULL; s = s->tmp)
1610 {
1611 assert(s->nins > 0 && s->nouts > 0);
1612 if (s->ins->from == a->from && s->outs->to == to)
1613 break;
1614 }
1615 if (s == NULL)
1616 {
1617 s = newstate(nfa);
1618 if (NISERR())
1619 return 0;
1620 s->tmp = *intermediates;
1621 *intermediates = s;
1622 }
1623 cparc(nfa, con, a->from, s);
1624 cparc(nfa, a, s, to);
1625 freearc(nfa, a);
1626 break;
1627 default:
1628 assert(NOTREACHED);
1629 break;
1630 }
1631 }
1632
1633 /* remaining inarcs, if any, incorporate the constraint */
1634 moveins(nfa, from, to);
1635 freearc(nfa, con);
1636 /* from state is now useless, but we leave it to pullback() to clean up */
1637 return 1;
1638}
1639
1640/*
1641 * pushfwd - push forward constraints forward to eliminate them
1642 */
1643static void
1644pushfwd(struct nfa *nfa,
1645 FILE *f) /* for debug output; NULL none */
1646{
1647 struct state *s;
1648 struct state *nexts;
1649 struct arc *a;
1650 struct arc *nexta;
1651 struct state *intermediates;
1652 int progress;
1653
1654 /* find and push until there are no more */
1655 do
1656 {
1657 progress = 0;
1658 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
1659 {
1660 nexts = s->next;
1661 intermediates = NULL;
1662 for (a = s->ins; a != NULL && !NISERR(); a = nexta)
1663 {
1664 nexta = a->inchain;
1665 if (a->type == '$' || a->type == AHEAD)
1666 if (push(nfa, a, &intermediates))
1667 progress = 1;
1668 }
1669 /* clear tmp fields of intermediate states created here */
1670 while (intermediates != NULL)
1671 {
1672 struct state *ns = intermediates->tmp;
1673
1674 intermediates->tmp = NULL;
1675 intermediates = ns;
1676 }
1677 /* if s is now useless, get rid of it */
1678 if ((s->nins == 0 || s->nouts == 0) && !s->flag)
1679 dropstate(nfa, s);
1680 }
1681 if (progress && f != NULL)
1682 dumpnfa(nfa, f);
1683 } while (progress && !NISERR());
1684 if (NISERR())
1685 return;
1686
1687 /*
1688 * Any $ constraints we were able to push to the post state can now be
1689 * replaced by PLAIN arcs referencing the EOS or EOL colors. There should
1690 * be no other $ or AHEAD arcs left in the NFA, though we do not check
1691 * that here (compact() will fail if so).
1692 */
1693 for (a = nfa->post->ins; a != NULL; a = nexta)
1694 {
1695 nexta = a->inchain;
1696 if (a->type == '$')
1697 {
1698 assert(a->co == 0 || a->co == 1);
1699 newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to);
1700 freearc(nfa, a);
1701 }
1702 }
1703}
1704
1705/*
1706 * push - push a forward constraint forward past its destination state
1707 *
1708 * Returns 1 if successful (which it always is unless the destination is the
1709 * post state or we have an internal error), 0 if nothing happened.
1710 *
1711 * A significant property of this function is that it deletes no pre-existing
1712 * states, and no inarcs of the constraint's to state other than the given
1713 * constraint arc. This makes the loops in pushfwd() safe, at the cost that
1714 * we may leave useless states behind. Therefore, we leave it to pushfwd()
1715 * to delete such states.
1716 *
1717 * If the to state has multiple forward-constraint inarcs, and/or multiple
1718 * compatible constraint outarcs, we only need to create one new intermediate
1719 * state per combination of predecessor and successor states. *intermediates
1720 * points to a list of such intermediate states for this to state (chained
1721 * through their tmp fields).
1722 */
1723static int
1724push(struct nfa *nfa,
1725 struct arc *con,
1726 struct state **intermediates)
1727{
1728 struct state *from = con->from;
1729 struct state *to = con->to;
1730 struct arc *a;
1731 struct arc *nexta;
1732 struct state *s;
1733
1734 assert(to != from); /* should have gotten rid of this earlier */
1735 if (to->flag) /* can't push forward beyond end */
1736 return 0;
1737 if (to->nouts == 0)
1738 { /* dead end */
1739 freearc(nfa, con);
1740 return 1;
1741 }
1742
1743 /*
1744 * First, clone to state if necessary to avoid other inarcs. This may
1745 * seem wasteful, but it simplifies the logic, and we'll get rid of the
1746 * clone state again at the bottom.
1747 */
1748 if (to->nins > 1)
1749 {
1750 s = newstate(nfa);
1751 if (NISERR())
1752 return 0;
1753 copyouts(nfa, to, s); /* duplicate outarcs */
1754 cparc(nfa, con, from, s); /* move constraint arc */
1755 freearc(nfa, con);
1756 if (NISERR())
1757 return 0;
1758 to = s;
1759 con = to->ins;
1760 }
1761 assert(to->nins == 1);
1762
1763 /* propagate the constraint into the to state's outarcs */
1764 for (a = to->outs; a != NULL && !NISERR(); a = nexta)
1765 {
1766 nexta = a->outchain;
1767 switch (combine(con, a))
1768 {
1769 case INCOMPATIBLE: /* destroy the arc */
1770 freearc(nfa, a);
1771 break;
1772 case SATISFIED: /* no action needed */
1773 break;
1774 case COMPATIBLE: /* swap the two arcs, more or less */
1775 /* need an intermediate state, but might have one already */
1776 for (s = *intermediates; s != NULL; s = s->tmp)
1777 {
1778 assert(s->nins > 0 && s->nouts > 0);
1779 if (s->ins->from == from && s->outs->to == a->to)
1780 break;
1781 }
1782 if (s == NULL)
1783 {
1784 s = newstate(nfa);
1785 if (NISERR())
1786 return 0;
1787 s->tmp = *intermediates;
1788 *intermediates = s;
1789 }
1790 cparc(nfa, con, s, a->to);
1791 cparc(nfa, a, from, s);
1792 freearc(nfa, a);
1793 break;
1794 default:
1795 assert(NOTREACHED);
1796 break;
1797 }
1798 }
1799
1800 /* remaining outarcs, if any, incorporate the constraint */
1801 moveouts(nfa, to, from);
1802 freearc(nfa, con);
1803 /* to state is now useless, but we leave it to pushfwd() to clean up */
1804 return 1;
1805}
1806
1807/*
1808 * combine - constraint lands on an arc, what happens?
1809 *
1810 * #def INCOMPATIBLE 1 // destroys arc
1811 * #def SATISFIED 2 // constraint satisfied
1812 * #def COMPATIBLE 3 // compatible but not satisfied yet
1813 */
1814static int
1815combine(struct arc *con,
1816 struct arc *a)
1817{
1818#define CA(ct,at) (((ct)<<CHAR_BIT) | (at))
1819
1820 switch (CA(con->type, a->type))
1821 {
1822 case CA('^', PLAIN): /* newlines are handled separately */
1823 case CA('$', PLAIN):
1824 return INCOMPATIBLE;
1825 break;
1826 case CA(AHEAD, PLAIN): /* color constraints meet colors */
1827 case CA(BEHIND, PLAIN):
1828 if (con->co == a->co)
1829 return SATISFIED;
1830 return INCOMPATIBLE;
1831 break;
1832 case CA('^', '^'): /* collision, similar constraints */
1833 case CA('$', '$'):
1834 case CA(AHEAD, AHEAD):
1835 case CA(BEHIND, BEHIND):
1836 if (con->co == a->co) /* true duplication */
1837 return SATISFIED;
1838 return INCOMPATIBLE;
1839 break;
1840 case CA('^', BEHIND): /* collision, dissimilar constraints */
1841 case CA(BEHIND, '^'):
1842 case CA('$', AHEAD):
1843 case CA(AHEAD, '$'):
1844 return INCOMPATIBLE;
1845 break;
1846 case CA('^', '$'): /* constraints passing each other */
1847 case CA('^', AHEAD):
1848 case CA(BEHIND, '$'):
1849 case CA(BEHIND, AHEAD):
1850 case CA('$', '^'):
1851 case CA('$', BEHIND):
1852 case CA(AHEAD, '^'):
1853 case CA(AHEAD, BEHIND):
1854 case CA('^', LACON):
1855 case CA(BEHIND, LACON):
1856 case CA('$', LACON):
1857 case CA(AHEAD, LACON):
1858 return COMPATIBLE;
1859 break;
1860 }
1861 assert(NOTREACHED);
1862 return INCOMPATIBLE; /* for benefit of blind compilers */
1863}
1864
1865/*
1866 * fixempties - get rid of EMPTY arcs
1867 */
1868static void
1869fixempties(struct nfa *nfa,
1870 FILE *f) /* for debug output; NULL none */
1871{
1872 struct state *s;
1873 struct state *s2;
1874 struct state *nexts;
1875 struct arc *a;
1876 struct arc *nexta;
1877 int totalinarcs;
1878 struct arc **inarcsorig;
1879 struct arc **arcarray;
1880 int arccount;
1881 int prevnins;
1882 int nskip;
1883
1884 /*
1885 * First, get rid of any states whose sole out-arc is an EMPTY, since
1886 * they're basically just aliases for their successor. The parsing
1887 * algorithm creates enough of these that it's worth special-casing this.
1888 */
1889 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
1890 {
1891 nexts = s->next;
1892 if (s->flag || s->nouts != 1)
1893 continue;
1894 a = s->outs;
1895 assert(a != NULL && a->outchain == NULL);
1896 if (a->type != EMPTY)
1897 continue;
1898 if (s != a->to)
1899 moveins(nfa, s, a->to);
1900 dropstate(nfa, s);
1901 }
1902
1903 /*
1904 * Similarly, get rid of any state with a single EMPTY in-arc, by folding
1905 * it into its predecessor.
1906 */
1907 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
1908 {
1909 nexts = s->next;
1910 /* while we're at it, ensure tmp fields are clear for next step */
1911 assert(s->tmp == NULL);
1912 if (s->flag || s->nins != 1)
1913 continue;
1914 a = s->ins;
1915 assert(a != NULL && a->inchain == NULL);
1916 if (a->type != EMPTY)
1917 continue;
1918 if (s != a->from)
1919 moveouts(nfa, s, a->from);
1920 dropstate(nfa, s);
1921 }
1922
1923 if (NISERR())
1924 return;
1925
1926 /*
1927 * For each remaining NFA state, find all other states from which it is
1928 * reachable by a chain of one or more EMPTY arcs. Then generate new arcs
1929 * that eliminate the need for each such chain.
1930 *
1931 * We could replace a chain of EMPTY arcs that leads from a "from" state
1932 * to a "to" state either by pushing non-EMPTY arcs forward (linking
1933 * directly from "from"'s predecessors to "to") or by pulling them back
1934 * (linking directly from "from" to "to"'s successors). We choose to
1935 * always do the former; this choice is somewhat arbitrary, but the
1936 * approach below requires that we uniformly do one or the other.
1937 *
1938 * Suppose we have a chain of N successive EMPTY arcs (where N can easily
1939 * approach the size of the NFA). All of the intermediate states must
1940 * have additional inarcs and outarcs, else they'd have been removed by
1941 * the steps above. Assuming their inarcs are mostly not empties, we will
1942 * add O(N^2) arcs to the NFA, since a non-EMPTY inarc leading to any one
1943 * state in the chain must be duplicated to lead to all its successor
1944 * states as well. So there is no hope of doing less than O(N^2) work;
1945 * however, we should endeavor to keep the big-O cost from being even
1946 * worse than that, which it can easily become without care. In
1947 * particular, suppose we were to copy all S1's inarcs forward to S2, and
1948 * then also to S3, and then later we consider pushing S2's inarcs forward
1949 * to S3. If we include the arcs already copied from S1 in that, we'd be
1950 * doing O(N^3) work. (The duplicate-arc elimination built into newarc()
1951 * and its cohorts would get rid of the extra arcs, but not without cost.)
1952 *
1953 * We can avoid this cost by treating only arcs that existed at the start
1954 * of this phase as candidates to be pushed forward. To identify those,
1955 * we remember the first inarc each state had to start with. We rely on
1956 * the fact that newarc() and friends put new arcs on the front of their
1957 * to-states' inchains, and that this phase never deletes arcs, so that
1958 * the original arcs must be the last arcs in their to-states' inchains.
1959 *
1960 * So the process here is that, for each state in the NFA, we gather up
1961 * all non-EMPTY inarcs of states that can reach the target state via
1962 * EMPTY arcs. We then sort, de-duplicate, and merge these arcs into the
1963 * target state's inchain. (We can safely use sort-merge for this as long
1964 * as we update each state's original-arcs pointer after we add arcs to
1965 * it; the sort step of mergeins probably changed the order of the old
1966 * arcs.)
1967 *
1968 * Another refinement worth making is that, because we only add non-EMPTY
1969 * arcs during this phase, and all added arcs have the same from-state as
1970 * the non-EMPTY arc they were cloned from, we know ahead of time that any
1971 * states having only EMPTY outarcs will be useless for lack of outarcs
1972 * after we drop the EMPTY arcs. (They cannot gain non-EMPTY outarcs if
1973 * they had none to start with.) So we need not bother to update the
1974 * inchains of such states at all.
1975 */
1976
1977 /* Remember the states' first original inarcs */
1978 /* ... and while at it, count how many old inarcs there are altogether */
1979 inarcsorig = (struct arc **) MALLOC(nfa->nstates * sizeof(struct arc *));
1980 if (inarcsorig == NULL)
1981 {
1982 NERR(REG_ESPACE);
1983 return;
1984 }
1985 totalinarcs = 0;
1986 for (s = nfa->states; s != NULL; s = s->next)
1987 {
1988 inarcsorig[s->no] = s->ins;
1989 totalinarcs += s->nins;
1990 }
1991
1992 /*
1993 * Create a workspace for accumulating the inarcs to be added to the
1994 * current target state. totalinarcs is probably a considerable
1995 * overestimate of the space needed, but the NFA is unlikely to be large
1996 * enough at this point to make it worth being smarter.
1997 */
1998 arcarray = (struct arc **) MALLOC(totalinarcs * sizeof(struct arc *));
1999 if (arcarray == NULL)
2000 {
2001 NERR(REG_ESPACE);
2002 FREE(inarcsorig);
2003 return;
2004 }
2005
2006 /* And iterate over the target states */
2007 for (s = nfa->states; s != NULL && !NISERR(); s = s->next)
2008 {
2009 /* Ignore target states without non-EMPTY outarcs, per note above */
2010 if (!s->flag && !hasnonemptyout(s))
2011 continue;
2012
2013 /* Find predecessor states and accumulate their original inarcs */
2014 arccount = 0;
2015 for (s2 = emptyreachable(nfa, s, s, inarcsorig); s2 != s; s2 = nexts)
2016 {
2017 /* Add s2's original inarcs to arcarray[], but ignore empties */
2018 for (a = inarcsorig[s2->no]; a != NULL; a = a->inchain)
2019 {
2020 if (a->type != EMPTY)
2021 arcarray[arccount++] = a;
2022 }
2023
2024 /* Reset the tmp fields as we walk back */
2025 nexts = s2->tmp;
2026 s2->tmp = NULL;
2027 }
2028 s->tmp = NULL;
2029 assert(arccount <= totalinarcs);
2030
2031 /* Remember how many original inarcs this state has */
2032 prevnins = s->nins;
2033
2034 /* Add non-duplicate inarcs to target state */
2035 mergeins(nfa, s, arcarray, arccount);
2036
2037 /* Now we must update the state's inarcsorig pointer */
2038 nskip = s->nins - prevnins;
2039 a = s->ins;
2040 while (nskip-- > 0)
2041 a = a->inchain;
2042 inarcsorig[s->no] = a;
2043 }
2044
2045 FREE(arcarray);
2046 FREE(inarcsorig);
2047
2048 if (NISERR())
2049 return;
2050
2051 /*
2052 * Now remove all the EMPTY arcs, since we don't need them anymore.
2053 */
2054 for (s = nfa->states; s != NULL; s = s->next)
2055 {
2056 for (a = s->outs; a != NULL; a = nexta)
2057 {
2058 nexta = a->outchain;
2059 if (a->type == EMPTY)
2060 freearc(nfa, a);
2061 }
2062 }
2063
2064 /*
2065 * And remove any states that have become useless. (This cleanup is not
2066 * very thorough, and would be even less so if we tried to combine it with
2067 * the previous step; but cleanup() will take care of anything we miss.)
2068 */
2069 for (s = nfa->states; s != NULL; s = nexts)
2070 {
2071 nexts = s->next;
2072 if ((s->nins == 0 || s->nouts == 0) && !s->flag)
2073 dropstate(nfa, s);
2074 }
2075
2076 if (f != NULL)
2077 dumpnfa(nfa, f);
2078}
2079
2080/*
2081 * emptyreachable - recursively find all states that can reach s by EMPTY arcs
2082 *
2083 * The return value is the last such state found. Its tmp field links back
2084 * to the next-to-last such state, and so on back to s, so that all these
2085 * states can be located without searching the whole NFA.
2086 *
2087 * Since this is only used in fixempties(), we pass in the inarcsorig[] array
2088 * maintained by that function. This lets us skip over all new inarcs, which
2089 * are certainly not EMPTY arcs.
2090 *
2091 * The maximum recursion depth here is equal to the length of the longest
2092 * loop-free chain of EMPTY arcs, which is surely no more than the size of
2093 * the NFA ... but that could still be enough to cause trouble.
2094 */
2095static struct state *
2096emptyreachable(struct nfa *nfa,
2097 struct state *s,
2098 struct state *lastfound,
2099 struct arc **inarcsorig)
2100{
2101 struct arc *a;
2102
2103 /* Since this is recursive, it could be driven to stack overflow */
2104 if (STACK_TOO_DEEP(nfa->v->re))
2105 {
2106 NERR(REG_ETOOBIG);
2107 return lastfound;
2108 }
2109
2110 s->tmp = lastfound;
2111 lastfound = s;
2112 for (a = inarcsorig[s->no]; a != NULL; a = a->inchain)
2113 {
2114 if (a->type == EMPTY && a->from->tmp == NULL)
2115 lastfound = emptyreachable(nfa, a->from, lastfound, inarcsorig);
2116 }
2117 return lastfound;
2118}
2119
2120/*
2121 * isconstraintarc - detect whether an arc is of a constraint type
2122 */
2123static inline int
2124isconstraintarc(struct arc *a)
2125{
2126 switch (a->type)
2127 {
2128 case '^':
2129 case '$':
2130 case BEHIND:
2131 case AHEAD:
2132 case LACON:
2133 return 1;
2134 }
2135 return 0;
2136}
2137
2138/*
2139 * hasconstraintout - does state have a constraint out arc?
2140 */
2141static int
2142hasconstraintout(struct state *s)
2143{
2144 struct arc *a;
2145
2146 for (a = s->outs; a != NULL; a = a->outchain)
2147 {
2148 if (isconstraintarc(a))
2149 return 1;
2150 }
2151 return 0;
2152}
2153
2154/*
2155 * fixconstraintloops - get rid of loops containing only constraint arcs
2156 *
2157 * A loop of states that contains only constraint arcs is useless, since
2158 * passing around the loop represents no forward progress. Moreover, it
2159 * would cause infinite looping in pullback/pushfwd, so we need to get rid
2160 * of such loops before doing that.
2161 */
2162static void
2163fixconstraintloops(struct nfa *nfa,
2164 FILE *f) /* for debug output; NULL none */
2165{
2166 struct state *s;
2167 struct state *nexts;
2168 struct arc *a;
2169 struct arc *nexta;
2170 int hasconstraints;
2171
2172 /*
2173 * In the trivial case of a state that loops to itself, we can just drop
2174 * the constraint arc altogether. This is worth special-casing because
2175 * such loops are far more common than loops containing multiple states.
2176 * While we're at it, note whether any constraint arcs survive.
2177 */
2178 hasconstraints = 0;
2179 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
2180 {
2181 nexts = s->next;
2182 /* while we're at it, ensure tmp fields are clear for next step */
2183 assert(s->tmp == NULL);
2184 for (a = s->outs; a != NULL && !NISERR(); a = nexta)
2185 {
2186 nexta = a->outchain;
2187 if (isconstraintarc(a))
2188 {
2189 if (a->to == s)
2190 freearc(nfa, a);
2191 else
2192 hasconstraints = 1;
2193 }
2194 }
2195 /* If we removed all the outarcs, the state is useless. */
2196 if (s->nouts == 0 && !s->flag)
2197 dropstate(nfa, s);
2198 }
2199
2200 /* Nothing to do if no remaining constraint arcs */
2201 if (NISERR() || !hasconstraints)
2202 return;
2203
2204 /*
2205 * Starting from each remaining NFA state, search outwards for a
2206 * constraint loop. If we find a loop, break the loop, then start the
2207 * search over. (We could possibly retain some state from the first scan,
2208 * but it would complicate things greatly, and multi-state constraint
2209 * loops are rare enough that it's not worth optimizing the case.)
2210 */
2211restart:
2212 for (s = nfa->states; s != NULL && !NISERR(); s = s->next)
2213 {
2214 if (findconstraintloop(nfa, s))
2215 goto restart;
2216 }
2217
2218 if (NISERR())
2219 return;
2220
2221 /*
2222 * Now remove any states that have become useless. (This cleanup is not
2223 * very thorough, and would be even less so if we tried to combine it with
2224 * the previous step; but cleanup() will take care of anything we miss.)
2225 *
2226 * Because findconstraintloop intentionally doesn't reset all tmp fields,
2227 * we have to clear them after it's done. This is a convenient place to
2228 * do that, too.
2229 */
2230 for (s = nfa->states; s != NULL; s = nexts)
2231 {
2232 nexts = s->next;
2233 s->tmp = NULL;
2234 if ((s->nins == 0 || s->nouts == 0) && !s->flag)
2235 dropstate(nfa, s);
2236 }
2237
2238 if (f != NULL)
2239 dumpnfa(nfa, f);
2240}
2241
2242/*
2243 * findconstraintloop - recursively find a loop of constraint arcs
2244 *
2245 * If we find a loop, break it by calling breakconstraintloop(), then
2246 * return 1; otherwise return 0.
2247 *
2248 * State tmp fields are guaranteed all NULL on a success return, because
2249 * breakconstraintloop does that. After a failure return, any state that
2250 * is known not to be part of a loop is marked with s->tmp == s; this allows
2251 * us not to have to re-prove that fact on later calls. (This convention is
2252 * workable because we already eliminated single-state loops.)
2253 *
2254 * Note that the found loop doesn't necessarily include the first state we
2255 * are called on. Any loop reachable from that state will do.
2256 *
2257 * The maximum recursion depth here is one more than the length of the longest
2258 * loop-free chain of constraint arcs, which is surely no more than the size
2259 * of the NFA ... but that could still be enough to cause trouble.
2260 */
2261static int
2262findconstraintloop(struct nfa *nfa, struct state *s)
2263{
2264 struct arc *a;
2265
2266 /* Since this is recursive, it could be driven to stack overflow */
2267 if (STACK_TOO_DEEP(nfa->v->re))
2268 {
2269 NERR(REG_ETOOBIG);
2270 return 1; /* to exit as quickly as possible */
2271 }
2272
2273 if (s->tmp != NULL)
2274 {
2275 /* Already proven uninteresting? */
2276 if (s->tmp == s)
2277 return 0;
2278 /* Found a loop involving s */
2279 breakconstraintloop(nfa, s);
2280 /* The tmp fields have been cleaned up by breakconstraintloop */
2281 return 1;
2282 }
2283 for (a = s->outs; a != NULL; a = a->outchain)
2284 {
2285 if (isconstraintarc(a))
2286 {
2287 struct state *sto = a->to;
2288
2289 assert(sto != s);
2290 s->tmp = sto;
2291 if (findconstraintloop(nfa, sto))
2292 return 1;
2293 }
2294 }
2295
2296 /*
2297 * If we get here, no constraint loop exists leading out from s. Mark it
2298 * with s->tmp == s so we need not rediscover that fact again later.
2299 */
2300 s->tmp = s;
2301 return 0;
2302}
2303
2304/*
2305 * breakconstraintloop - break a loop of constraint arcs
2306 *
2307 * sinitial is any one member state of the loop. Each loop member's tmp
2308 * field links to its successor within the loop. (Note that this function
2309 * will reset all the tmp fields to NULL.)
2310 *
2311 * We can break the loop by, for any one state S1 in the loop, cloning its
2312 * loop successor state S2 (and possibly following states), and then moving
2313 * all S1->S2 constraint arcs to point to the cloned S2. The cloned S2 should
2314 * copy any non-constraint outarcs of S2. Constraint outarcs should be
2315 * dropped if they point back to S1, else they need to be copied as arcs to
2316 * similarly cloned states S3, S4, etc. In general, each cloned state copies
2317 * non-constraint outarcs, drops constraint outarcs that would lead to itself
2318 * or any earlier cloned state, and sends other constraint outarcs to newly
2319 * cloned states. No cloned state will have any inarcs that aren't constraint
2320 * arcs or do not lead from S1 or earlier-cloned states. It's okay to drop
2321 * constraint back-arcs since they would not take us to any state we've not
2322 * already been in; therefore, no new constraint loop is created. In this way
2323 * we generate a modified NFA that can still represent every useful state
2324 * sequence, but not sequences that represent state loops with no consumption
2325 * of input data. Note that the set of cloned states will certainly include
2326 * all of the loop member states other than S1, and it may also include
2327 * non-loop states that are reachable from S2 via constraint arcs. This is
2328 * important because there is no guarantee that findconstraintloop found a
2329 * maximal loop (and searching for one would be NP-hard, so don't try).
2330 * Frequently the "non-loop states" are actually part of a larger loop that
2331 * we didn't notice, and indeed there may be several overlapping loops.
2332 * This technique ensures convergence in such cases, while considering only
2333 * the originally-found loop does not.
2334 *
2335 * If there is only one S1->S2 constraint arc, then that constraint is
2336 * certainly satisfied when we enter any of the clone states. This means that
2337 * in the common case where many of the constraint arcs are identically
2338 * labeled, we can merge together clone states linked by a similarly-labeled
2339 * constraint: if we can get to the first one we can certainly get to the
2340 * second, so there's no need to distinguish. This greatly reduces the number
2341 * of new states needed, so we preferentially break the given loop at a state
2342 * pair where this is true.
2343 *
2344 * Furthermore, it's fairly common to find that a cloned successor state has
2345 * no outarcs, especially if we're a bit aggressive about removing unnecessary
2346 * outarcs. If that happens, then there is simply not any interesting state
2347 * that can be reached through the predecessor's loop arcs, which means we can
2348 * break the loop just by removing those loop arcs, with no new states added.
2349 */
2350static void
2351breakconstraintloop(struct nfa *nfa, struct state *sinitial)
2352{
2353 struct state *s;
2354 struct state *shead;
2355 struct state *stail;
2356 struct state *sclone;
2357 struct state *nexts;
2358 struct arc *refarc;
2359 struct arc *a;
2360 struct arc *nexta;
2361
2362 /*
2363 * Start by identifying which loop step we want to break at.
2364 * Preferentially this is one with only one constraint arc. (XXX are
2365 * there any other secondary heuristics we want to use here?) Set refarc
2366 * to point to the selected lone constraint arc, if there is one.
2367 */
2368 refarc = NULL;
2369 s = sinitial;
2370 do
2371 {
2372 nexts = s->tmp;
2373 assert(nexts != s); /* should not see any one-element loops */
2374 if (refarc == NULL)
2375 {
2376 int narcs = 0;
2377
2378 for (a = s->outs; a != NULL; a = a->outchain)
2379 {
2380 if (a->to == nexts && isconstraintarc(a))
2381 {
2382 refarc = a;
2383 narcs++;
2384 }
2385 }
2386 assert(narcs > 0);
2387 if (narcs > 1)
2388 refarc = NULL; /* multiple constraint arcs here, no good */
2389 }
2390 s = nexts;
2391 } while (s != sinitial);
2392
2393 if (refarc)
2394 {
2395 /* break at the refarc */
2396 shead = refarc->from;
2397 stail = refarc->to;
2398 assert(stail == shead->tmp);
2399 }
2400 else
2401 {
2402 /* for lack of a better idea, break after sinitial */
2403 shead = sinitial;
2404 stail = sinitial->tmp;
2405 }
2406
2407 /*
2408 * Reset the tmp fields so that we can use them for local storage in
2409 * clonesuccessorstates. (findconstraintloop won't mind, since it's just
2410 * going to abandon its search anyway.)
2411 */
2412 for (s = nfa->states; s != NULL; s = s->next)
2413 s->tmp = NULL;
2414
2415 /*
2416 * Recursively build clone state(s) as needed.
2417 */
2418 sclone = newstate(nfa);
2419 if (sclone == NULL)
2420 {
2421 assert(NISERR());
2422 return;
2423 }
2424
2425 clonesuccessorstates(nfa, stail, sclone, shead, refarc,
2426 NULL, NULL, nfa->nstates);
2427
2428 if (NISERR())
2429 return;
2430
2431 /*
2432 * It's possible that sclone has no outarcs at all, in which case it's
2433 * useless. (We don't try extremely hard to get rid of useless states
2434 * here, but this is an easy and fairly common case.)
2435 */
2436 if (sclone->nouts == 0)
2437 {
2438 freestate(nfa, sclone);
2439 sclone = NULL;
2440 }
2441
2442 /*
2443 * Move shead's constraint-loop arcs to point to sclone, or just drop them
2444 * if we discovered we don't need sclone.
2445 */
2446 for (a = shead->outs; a != NULL; a = nexta)
2447 {
2448 nexta = a->outchain;
2449 if (a->to == stail && isconstraintarc(a))
2450 {
2451 if (sclone)
2452 cparc(nfa, a, shead, sclone);
2453 freearc(nfa, a);
2454 if (NISERR())
2455 break;
2456 }
2457 }
2458}
2459
2460/*
2461 * clonesuccessorstates - create a tree of constraint-arc successor states
2462 *
2463 * ssource is the state to be cloned, and sclone is the state to copy its
2464 * outarcs into. sclone's inarcs, if any, should already be set up.
2465 *
2466 * spredecessor is the original predecessor state that we are trying to build
2467 * successors for (it may not be the immediate predecessor of ssource).
2468 * refarc, if not NULL, is the original constraint arc that is known to have
2469 * been traversed out of spredecessor to reach the successor(s).
2470 *
2471 * For each cloned successor state, we transiently create a "donemap" that is
2472 * a boolean array showing which source states we've already visited for this
2473 * clone state. This prevents infinite recursion as well as useless repeat
2474 * visits to the same state subtree (which can add up fast, since typical NFAs
2475 * have multiple redundant arc pathways). Each donemap is a char array
2476 * indexed by state number. The donemaps are all of the same size "nstates",
2477 * which is nfa->nstates as of the start of the recursion. This is enough to
2478 * have entries for all pre-existing states, but *not* entries for clone
2479 * states created during the recursion. That's okay since we have no need to
2480 * mark those.
2481 *
2482 * curdonemap is NULL when recursing to a new sclone state, or sclone's
2483 * donemap when we are recursing without having created a new state (which we
2484 * do when we decide we can merge a successor state into the current clone
2485 * state). outerdonemap is NULL at the top level and otherwise the parent
2486 * clone state's donemap.
2487 *
2488 * The successor states we create and fill here form a strict tree structure,
2489 * with each state having exactly one predecessor, except that the toplevel
2490 * state has no inarcs as yet (breakconstraintloop will add its inarcs from
2491 * spredecessor after we're done). Thus, we can examine sclone's inarcs back
2492 * to the root, plus refarc if any, to identify the set of constraints already
2493 * known valid at the current point. This allows us to avoid generating extra
2494 * successor states.
2495 */
2496static void
2497clonesuccessorstates(struct nfa *nfa,
2498 struct state *ssource,
2499 struct state *sclone,
2500 struct state *spredecessor,
2501 struct arc *refarc,
2502 char *curdonemap,
2503 char *outerdonemap,
2504 int nstates)
2505{
2506 char *donemap;
2507 struct arc *a;
2508
2509 /* Since this is recursive, it could be driven to stack overflow */
2510 if (STACK_TOO_DEEP(nfa->v->re))
2511 {
2512 NERR(REG_ETOOBIG);
2513 return;
2514 }
2515
2516 /* If this state hasn't already got a donemap, create one */
2517 donemap = curdonemap;
2518 if (donemap == NULL)
2519 {
2520 donemap = (char *) MALLOC(nstates * sizeof(char));
2521 if (donemap == NULL)
2522 {
2523 NERR(REG_ESPACE);
2524 return;
2525 }
2526
2527 if (outerdonemap != NULL)
2528 {
2529 /*
2530 * Not at outermost recursion level, so copy the outer level's
2531 * donemap; this ensures that we see states in process of being
2532 * visited at outer levels, or already merged into predecessor
2533 * states, as ones we shouldn't traverse back to.
2534 */
2535 memcpy(donemap, outerdonemap, nstates * sizeof(char));
2536 }
2537 else
2538 {
2539 /* At outermost level, only spredecessor is off-limits */
2540 memset(donemap, 0, nstates * sizeof(char));
2541 assert(spredecessor->no < nstates);
2542 donemap[spredecessor->no] = 1;
2543 }
2544 }
2545
2546 /* Mark ssource as visited in the donemap */
2547 assert(ssource->no < nstates);
2548 assert(donemap[ssource->no] == 0);
2549 donemap[ssource->no] = 1;
2550
2551 /*
2552 * We proceed by first cloning all of ssource's outarcs, creating new
2553 * clone states as needed but not doing more with them than that. Then in
2554 * a second pass, recurse to process the child clone states. This allows
2555 * us to have only one child clone state per reachable source state, even
2556 * when there are multiple outarcs leading to the same state. Also, when
2557 * we do visit a child state, its set of inarcs is known exactly, which
2558 * makes it safe to apply the constraint-is-already-checked optimization.
2559 * Also, this ensures that we've merged all the states we can into the
2560 * current clone before we recurse to any children, thus possibly saving
2561 * them from making extra images of those states.
2562 *
2563 * While this function runs, child clone states of the current state are
2564 * marked by setting their tmp fields to point to the original state they
2565 * were cloned from. This makes it possible to detect multiple outarcs
2566 * leading to the same state, and also makes it easy to distinguish clone
2567 * states from original states (which will have tmp == NULL).
2568 */
2569 for (a = ssource->outs; a != NULL && !NISERR(); a = a->outchain)
2570 {
2571 struct state *sto = a->to;
2572
2573 /*
2574 * We do not consider cloning successor states that have no constraint
2575 * outarcs; just link to them as-is. They cannot be part of a
2576 * constraint loop so there is no need to make copies. In particular,
2577 * this rule keeps us from trying to clone the post state, which would
2578 * be a bad idea.
2579 */
2580 if (isconstraintarc(a) && hasconstraintout(sto))
2581 {
2582 struct state *prevclone;
2583 int canmerge;
2584 struct arc *a2;
2585
2586 /*
2587 * Back-link constraint arcs must not be followed. Nor is there a
2588 * need to revisit states previously merged into this clone.
2589 */
2590 assert(sto->no < nstates);
2591 if (donemap[sto->no] != 0)
2592 continue;
2593
2594 /*
2595 * Check whether we already have a child clone state for this
2596 * source state.
2597 */
2598 prevclone = NULL;
2599 for (a2 = sclone->outs; a2 != NULL; a2 = a2->outchain)
2600 {
2601 if (a2->to->tmp == sto)
2602 {
2603 prevclone = a2->to;
2604 break;
2605 }
2606 }
2607
2608 /*
2609 * If this arc is labeled the same as refarc, or the same as any
2610 * arc we must have traversed to get to sclone, then no additional
2611 * constraints need to be met to get to sto, so we should just
2612 * merge its outarcs into sclone.
2613 */
2614 if (refarc && a->type == refarc->type && a->co == refarc->co)
2615 canmerge = 1;
2616 else
2617 {
2618 struct state *s;
2619
2620 canmerge = 0;
2621 for (s = sclone; s->ins; s = s->ins->from)
2622 {
2623 if (s->nins == 1 &&
2624 a->type == s->ins->type && a->co == s->ins->co)
2625 {
2626 canmerge = 1;
2627 break;
2628 }
2629 }
2630 }
2631
2632 if (canmerge)
2633 {
2634 /*
2635 * We can merge into sclone. If we previously made a child
2636 * clone state, drop it; there's no need to visit it. (This
2637 * can happen if ssource has multiple pathways to sto, and we
2638 * only just now found one that is provably a no-op.)
2639 */
2640 if (prevclone)
2641 dropstate(nfa, prevclone); /* kills our outarc, too */
2642
2643 /* Recurse to merge sto's outarcs into sclone */
2644 clonesuccessorstates(nfa,
2645 sto,
2646 sclone,
2647 spredecessor,
2648 refarc,
2649 donemap,
2650 outerdonemap,
2651 nstates);
2652 /* sto should now be marked as previously visited */
2653 assert(NISERR() || donemap[sto->no] == 1);
2654 }
2655 else if (prevclone)
2656 {
2657 /*
2658 * We already have a clone state for this successor, so just
2659 * make another arc to it.
2660 */
2661 cparc(nfa, a, sclone, prevclone);
2662 }
2663 else
2664 {
2665 /*
2666 * We need to create a new successor clone state.
2667 */
2668 struct state *stoclone;
2669
2670 stoclone = newstate(nfa);
2671 if (stoclone == NULL)
2672 {
2673 assert(NISERR());
2674 break;
2675 }
2676 /* Mark it as to what it's a clone of */
2677 stoclone->tmp = sto;
2678 /* ... and add the outarc leading to it */
2679 cparc(nfa, a, sclone, stoclone);
2680 }
2681 }
2682 else
2683 {
2684 /*
2685 * Non-constraint outarcs just get copied to sclone, as do outarcs
2686 * leading to states with no constraint outarc.
2687 */
2688 cparc(nfa, a, sclone, sto);
2689 }
2690 }
2691
2692 /*
2693 * If we are at outer level for this clone state, recurse to all its child
2694 * clone states, clearing their tmp fields as we go. (If we're not
2695 * outermost for sclone, leave this to be done by the outer call level.)
2696 * Note that if we have multiple outarcs leading to the same clone state,
2697 * it will only be recursed-to once.
2698 */
2699 if (curdonemap == NULL)
2700 {
2701 for (a = sclone->outs; a != NULL && !NISERR(); a = a->outchain)
2702 {
2703 struct state *stoclone = a->to;
2704 struct state *sto = stoclone->tmp;
2705
2706 if (sto != NULL)
2707 {
2708 stoclone->tmp = NULL;
2709 clonesuccessorstates(nfa,
2710 sto,
2711 stoclone,
2712 spredecessor,
2713 refarc,
2714 NULL,
2715 donemap,
2716 nstates);
2717 }
2718 }
2719
2720 /* Don't forget to free sclone's donemap when done with it */
2721 FREE(donemap);
2722 }
2723}
2724
2725/*
2726 * cleanup - clean up NFA after optimizations
2727 */
2728static void
2729cleanup(struct nfa *nfa)
2730{
2731 struct state *s;
2732 struct state *nexts;
2733 int n;
2734
2735 if (NISERR())
2736 return;
2737
2738 /* clear out unreachable or dead-end states */
2739 /* use pre to mark reachable, then post to mark can-reach-post */
2740 markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre);
2741 markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
2742 for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
2743 {
2744 nexts = s->next;
2745 if (s->tmp != nfa->post && !s->flag)
2746 dropstate(nfa, s);
2747 }
2748 assert(NISERR() || nfa->post->nins == 0 || nfa->post->tmp == nfa->post);
2749 cleartraverse(nfa, nfa->pre);
2750 assert(NISERR() || nfa->post->nins == 0 || nfa->post->tmp == NULL);
2751 /* the nins==0 (final unreachable) case will be caught later */
2752
2753 /* renumber surviving states */
2754 n = 0;
2755 for (s = nfa->states; s != NULL; s = s->next)
2756 s->no = n++;
2757 nfa->nstates = n;
2758}
2759
2760/*
2761 * markreachable - recursive marking of reachable states
2762 */
2763static void
2764markreachable(struct nfa *nfa,
2765 struct state *s,
2766 struct state *okay, /* consider only states with this mark */
2767 struct state *mark) /* the value to mark with */
2768{
2769 struct arc *a;
2770
2771 /* Since this is recursive, it could be driven to stack overflow */
2772 if (STACK_TOO_DEEP(nfa->v->re))
2773 {
2774 NERR(REG_ETOOBIG);
2775 return;
2776 }
2777
2778 if (s->tmp != okay)
2779 return;
2780 s->tmp = mark;
2781
2782 for (a = s->outs; a != NULL; a = a->outchain)
2783 markreachable(nfa, a->to, okay, mark);
2784}
2785
2786/*
2787 * markcanreach - recursive marking of states which can reach here
2788 */
2789static void
2790markcanreach(struct nfa *nfa,
2791 struct state *s,
2792 struct state *okay, /* consider only states with this mark */
2793 struct state *mark) /* the value to mark with */
2794{
2795 struct arc *a;
2796
2797 /* Since this is recursive, it could be driven to stack overflow */
2798 if (STACK_TOO_DEEP(nfa->v->re))
2799 {
2800 NERR(REG_ETOOBIG);
2801 return;
2802 }
2803
2804 if (s->tmp != okay)
2805 return;
2806 s->tmp = mark;
2807
2808 for (a = s->ins; a != NULL; a = a->inchain)
2809 markcanreach(nfa, a->from, okay, mark);
2810}
2811
2812/*
2813 * analyze - ascertain potentially-useful facts about an optimized NFA
2814 */
2815static long /* re_info bits to be ORed in */
2816analyze(struct nfa *nfa)
2817{
2818 struct arc *a;
2819 struct arc *aa;
2820
2821 if (NISERR())
2822 return 0;
2823
2824 if (nfa->pre->outs == NULL)
2825 return REG_UIMPOSSIBLE;
2826 for (a = nfa->pre->outs; a != NULL; a = a->outchain)
2827 for (aa = a->to->outs; aa != NULL; aa = aa->outchain)
2828 if (aa->to == nfa->post)
2829 return REG_UEMPTYMATCH;
2830 return 0;
2831}
2832
2833/*
2834 * compact - construct the compact representation of an NFA
2835 */
2836static void
2837compact(struct nfa *nfa,
2838 struct cnfa *cnfa)
2839{
2840 struct state *s;
2841 struct arc *a;
2842 size_t nstates;
2843 size_t narcs;
2844 struct carc *ca;
2845 struct carc *first;
2846
2847 assert(!NISERR());
2848
2849 nstates = 0;
2850 narcs = 0;
2851 for (s = nfa->states; s != NULL; s = s->next)
2852 {
2853 nstates++;
2854 narcs += s->nouts + 1; /* need one extra for endmarker */
2855 }
2856
2857 cnfa->stflags = (char *) MALLOC(nstates * sizeof(char));
2858 cnfa->states = (struct carc **) MALLOC(nstates * sizeof(struct carc *));
2859 cnfa->arcs = (struct carc *) MALLOC(narcs * sizeof(struct carc));
2860 if (cnfa->stflags == NULL || cnfa->states == NULL || cnfa->arcs == NULL)
2861 {
2862 if (cnfa->stflags != NULL)
2863 FREE(cnfa->stflags);
2864 if (cnfa->states != NULL)
2865 FREE(cnfa->states);
2866 if (cnfa->arcs != NULL)
2867 FREE(cnfa->arcs);
2868 NERR(REG_ESPACE);
2869 return;
2870 }
2871 cnfa->nstates = nstates;
2872 cnfa->pre = nfa->pre->no;
2873 cnfa->post = nfa->post->no;
2874 cnfa->bos[0] = nfa->bos[0];
2875 cnfa->bos[1] = nfa->bos[1];
2876 cnfa->eos[0] = nfa->eos[0];
2877 cnfa->eos[1] = nfa->eos[1];
2878 cnfa->ncolors = maxcolor(nfa->cm) + 1;
2879 cnfa->flags = 0;
2880
2881 ca = cnfa->arcs;
2882 for (s = nfa->states; s != NULL; s = s->next)
2883 {
2884 assert((size_t) s->no < nstates);
2885 cnfa->stflags[s->no] = 0;
2886 cnfa->states[s->no] = ca;
2887 first = ca;
2888 for (a = s->outs; a != NULL; a = a->outchain)
2889 switch (a->type)
2890 {
2891 case PLAIN:
2892 ca->co = a->co;
2893 ca->to = a->to->no;
2894 ca++;
2895 break;
2896 case LACON:
2897 assert(s->no != cnfa->pre);
2898 ca->co = (color) (cnfa->ncolors + a->co);
2899 ca->to = a->to->no;
2900 ca++;
2901 cnfa->flags |= HASLACONS;
2902 break;
2903 default:
2904 NERR(REG_ASSERT);
2905 break;
2906 }
2907 carcsort(first, ca - first);
2908 ca->co = COLORLESS;
2909 ca->to = 0;
2910 ca++;
2911 }
2912 assert(ca == &cnfa->arcs[narcs]);
2913 assert(cnfa->nstates != 0);
2914
2915 /* mark no-progress states */
2916 for (a = nfa->pre->outs; a != NULL; a = a->outchain)
2917 cnfa->stflags[a->to->no] = CNFA_NOPROGRESS;
2918 cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS;
2919}
2920
2921/*
2922 * carcsort - sort compacted-NFA arcs by color
2923 */
2924static void
2925carcsort(struct carc *first, size_t n)
2926{
2927 if (n > 1)
2928 qsort(first, n, sizeof(struct carc), carc_cmp);
2929}
2930
2931static int
2932carc_cmp(const void *a, const void *b)
2933{
2934 const struct carc *aa = (const struct carc *) a;
2935 const struct carc *bb = (const struct carc *) b;
2936
2937 if (aa->co < bb->co)
2938 return -1;
2939 if (aa->co > bb->co)
2940 return +1;
2941 if (aa->to < bb->to)
2942 return -1;
2943 if (aa->to > bb->to)
2944 return +1;
2945 return 0;
2946}
2947
2948/*
2949 * freecnfa - free a compacted NFA
2950 */
2951static void
2952freecnfa(struct cnfa *cnfa)
2953{
2954 assert(cnfa->nstates != 0); /* not empty already */
2955 cnfa->nstates = 0;
2956 FREE(cnfa->stflags);
2957 FREE(cnfa->states);
2958 FREE(cnfa->arcs);
2959}
2960
2961/*
2962 * dumpnfa - dump an NFA in human-readable form
2963 */
2964static void
2965dumpnfa(struct nfa *nfa,
2966 FILE *f)
2967{
2968#ifdef REG_DEBUG
2969 struct state *s;
2970 int nstates = 0;
2971 int narcs = 0;
2972
2973 fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
2974 if (nfa->bos[0] != COLORLESS)
2975 fprintf(f, ", bos [%ld]", (long) nfa->bos[0]);
2976 if (nfa->bos[1] != COLORLESS)
2977 fprintf(f, ", bol [%ld]", (long) nfa->bos[1]);
2978 if (nfa->eos[0] != COLORLESS)
2979 fprintf(f, ", eos [%ld]", (long) nfa->eos[0]);
2980 if (nfa->eos[1] != COLORLESS)
2981 fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
2982 fprintf(f, "\n");
2983 for (s = nfa->states; s != NULL; s = s->next)
2984 {
2985 dumpstate(s, f);
2986 nstates++;
2987 narcs += s->nouts;
2988 }
2989 fprintf(f, "total of %d states, %d arcs\n", nstates, narcs);
2990 if (nfa->parent == NULL)
2991 dumpcolors(nfa->cm, f);
2992 fflush(f);
2993#endif
2994}
2995
2996#ifdef REG_DEBUG /* subordinates of dumpnfa */
2997
2998/*
2999 * dumpstate - dump an NFA state in human-readable form
3000 */
3001static void
3002dumpstate(struct state *s,
3003 FILE *f)
3004{
3005 struct arc *a;
3006
3007 fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
3008 (s->flag) ? s->flag : '.');
3009 if (s->prev != NULL && s->prev->next != s)
3010 fprintf(f, "\tstate chain bad\n");
3011 if (s->nouts == 0)
3012 fprintf(f, "\tno out arcs\n");
3013 else
3014 dumparcs(s, f);
3015 fflush(f);
3016 for (a = s->ins; a != NULL; a = a->inchain)
3017 {
3018 if (a->to != s)
3019 fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
3020 a->from->no, a->to->no, s->no);
3021 }
3022}
3023
3024/*
3025 * dumparcs - dump out-arcs in human-readable form
3026 */
3027static void
3028dumparcs(struct state *s,
3029 FILE *f)
3030{
3031 int pos;
3032 struct arc *a;
3033
3034 /* printing oldest arcs first is usually clearer */
3035 a = s->outs;
3036 assert(a != NULL);
3037 while (a->outchain != NULL)
3038 a = a->outchain;
3039 pos = 1;
3040 do
3041 {
3042 dumparc(a, s, f);
3043 if (pos == 5)
3044 {
3045 fprintf(f, "\n");
3046 pos = 1;
3047 }
3048 else
3049 pos++;
3050 a = a->outchainRev;
3051 } while (a != NULL);
3052 if (pos != 1)
3053 fprintf(f, "\n");
3054}
3055
3056/*
3057 * dumparc - dump one outarc in readable form, including prefixing tab
3058 */
3059static void
3060dumparc(struct arc *a,
3061 struct state *s,
3062 FILE *f)
3063{
3064 struct arc *aa;
3065 struct arcbatch *ab;
3066
3067 fprintf(f, "\t");
3068 switch (a->type)
3069 {
3070 case PLAIN:
3071 fprintf(f, "[%ld]", (long) a->co);
3072 break;
3073 case AHEAD:
3074 fprintf(f, ">%ld>", (long) a->co);
3075 break;
3076 case BEHIND:
3077 fprintf(f, "<%ld<", (long) a->co);
3078 break;
3079 case LACON:
3080 fprintf(f, ":%ld:", (long) a->co);
3081 break;
3082 case '^':
3083 case '$':
3084 fprintf(f, "%c%d", a->type, (int) a->co);
3085 break;
3086 case EMPTY:
3087 break;
3088 default:
3089 fprintf(f, "0x%x/0%lo", a->type, (long) a->co);
3090 break;
3091 }
3092 if (a->from != s)
3093 fprintf(f, "?%d?", a->from->no);
3094 for (ab = &a->from->oas; ab != NULL; ab = ab->next)
3095 {
3096 for (aa = &ab->a[0]; aa < &ab->a[ABSIZE]; aa++)
3097 if (aa == a)
3098 break; /* NOTE BREAK OUT */
3099 if (aa < &ab->a[ABSIZE]) /* propagate break */
3100 break; /* NOTE BREAK OUT */
3101 }
3102 if (ab == NULL)
3103 fprintf(f, "?!?"); /* not in allocated space */
3104 fprintf(f, "->");
3105 if (a->to == NULL)
3106 {
3107 fprintf(f, "NULL");
3108 return;
3109 }
3110 fprintf(f, "%d", a->to->no);
3111 for (aa = a->to->ins; aa != NULL; aa = aa->inchain)
3112 if (aa == a)
3113 break; /* NOTE BREAK OUT */
3114 if (aa == NULL)
3115 fprintf(f, "?!?"); /* missing from in-chain */
3116}
3117#endif /* REG_DEBUG */
3118
3119/*
3120 * dumpcnfa - dump a compacted NFA in human-readable form
3121 */
3122#ifdef REG_DEBUG
3123static void
3124dumpcnfa(struct cnfa *cnfa,
3125 FILE *f)
3126{
3127 int st;
3128
3129 fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
3130 if (cnfa->bos[0] != COLORLESS)
3131 fprintf(f, ", bos [%ld]", (long) cnfa->bos[0]);
3132 if (cnfa->bos[1] != COLORLESS)
3133 fprintf(f, ", bol [%ld]", (long) cnfa->bos[1]);
3134 if (cnfa->eos[0] != COLORLESS)
3135 fprintf(f, ", eos [%ld]", (long) cnfa->eos[0]);
3136 if (cnfa->eos[1] != COLORLESS)
3137 fprintf(f, ", eol [%ld]", (long) cnfa->eos[1]);
3138 if (cnfa->flags & HASLACONS)
3139 fprintf(f, ", haslacons");
3140 fprintf(f, "\n");
3141 for (st = 0; st < cnfa->nstates; st++)
3142 dumpcstate(st, cnfa, f);
3143 fflush(f);
3144}
3145#endif
3146
3147#ifdef REG_DEBUG /* subordinates of dumpcnfa */
3148
3149/*
3150 * dumpcstate - dump a compacted-NFA state in human-readable form
3151 */
3152static void
3153dumpcstate(int st,
3154 struct cnfa *cnfa,
3155 FILE *f)
3156{
3157 struct carc *ca;
3158 int pos;
3159
3160 fprintf(f, "%d%s", st, (cnfa->stflags[st] & CNFA_NOPROGRESS) ? ":" : ".");
3161 pos = 1;
3162 for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
3163 {
3164 if (ca->co < cnfa->ncolors)
3165 fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to);
3166 else
3167 fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to);
3168 if (pos == 5)
3169 {
3170 fprintf(f, "\n");
3171 pos = 1;
3172 }
3173 else
3174 pos++;
3175 }
3176 if (ca == cnfa->states[st] || pos != 1)
3177 fprintf(f, "\n");
3178 fflush(f);
3179}
3180
3181#endif /* REG_DEBUG */
3182