regc_nfa.c source code [PostgreSQL/src/backend/regex/regc_nfa.c]

1	/*
2	* NFA utilities.
3	* This file is #included by regcomp.c.
4	*
5	* Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
6	*
7	* Development of this software was funded, in part, by Cray Research Inc.,
8	* UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9	* Corporation, none of whom are responsible for the results. The author
10	* thanks all of them.
11	*
12	* Redistribution and use in source and binary forms -- with or without
13	* modification -- are permitted for any purpose, provided that
14	* redistributions in source form retain this entire copyright notice and
15	* indicate the origin and nature of any modifications.
16	*
17	* I'd appreciate being given credit for this package in the documentation
18	* of software which uses it, but that is not a requirement.
19	*
20	* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21	* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22	* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23	* HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26	* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27	* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28	* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29	* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30	*
31	* src/backend/regex/regc_nfa.c
32	*
33	*
34	* One or two things that technically ought to be in here
35	* are actually in color.c, thanks to some incestuous relationships in
36	* the color chains.
37	*/
38
39	#define NISERR() VISERR(nfa->v)
40	#define NERR(e) VERR(nfa->v, (e))
41
42
43	/*
44	* newnfa - set up an NFA
45	*/
46	static struct nfa * / the NFA, or NULL /
47	newnfa(struct vars *v,
48	struct colormap *cm,
49	struct nfa parent) /* NULL if primary NFA /
50	{
51	struct nfa *nfa;
52
53	nfa = (struct nfa ) MALLOC(sizeof(struct* nfa));
54	if (nfa == NULL)
55	{
56	ERR(REG_ESPACE);
57	return NULL;
58	}
59
60	nfa->states = NULL;
61	nfa->slast = NULL;
62	nfa->free = NULL;
63	nfa->nstates = `0`;
64	nfa->cm = cm;
65	nfa->v = v;
66	nfa->bos[`0`] = nfa->bos[`1`] = COLORLESS;
67	nfa->eos[`0`] = nfa->eos[`1`] = COLORLESS;
68	nfa->parent = parent; / Precedes newfstate so parent is valid. /
69	nfa->post = newfstate(nfa, `'@'`); / number 0 /
70	nfa->pre = newfstate(nfa, `'>'`); / number 1 /
71
72	nfa->init = newstate(nfa); / may become invalid later /
73	nfa->final = newstate(nfa);
74	if (ISERR())
75	{
76	freenfa(nfa);
77	return NULL;
78	}
79	rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->pre, nfa->init);
80	newarc(nfa, `'^'`, `1`, nfa->pre, nfa->init);
81	newarc(nfa, `'^'`, `0`, nfa->pre, nfa->init);
82	rainbow(nfa, nfa->cm, PLAIN, COLORLESS, nfa->final, nfa->post);
83	newarc(nfa, `'$'`, `1`, nfa->final, nfa->post);
84	newarc(nfa, `'$'`, `0`, nfa->final, nfa->post);
85
86	if (ISERR())
87	{
88	freenfa(nfa);
89	return NULL;
90	}
91	return nfa;
92	}
93
94	/*
95	* freenfa - free an entire NFA
96	*/
97	static void
98	freenfa(struct nfa *nfa)
99	{
100	struct state *s;
101
102	while ((s = nfa->states) != NULL)
103	{
104	s->nins = s->nouts = `0`; / don't worry about arcs /
105	freestate(nfa, s);
106	}
107	while ((s = nfa->free) != NULL)
108	{
109	nfa->free = s->next;
110	destroystate(nfa, s);
111	}
112
113	nfa->slast = NULL;
114	nfa->nstates = -`1`;
115	nfa->pre = NULL;
116	nfa->post = NULL;
117	FREE(nfa);
118	}
119
120	/*
121	* newstate - allocate an NFA state, with zero flag value
122	*/
123	static struct state * / NULL on error /
124	newstate(struct nfa *nfa)
125	{
126	struct state *s;
127
128	/*
129	* This is a handy place to check for operation cancel during regex
130	* compilation, since no code path will go very long without making a new
131	* state or arc.
132	*/
133	if (CANCEL_REQUESTED(nfa->v->re))
134	{
135	NERR(REG_CANCEL);
136	return NULL;
137	}
138
139	if (nfa->free != NULL)
140	{
141	s = nfa->free;
142	nfa->free = s->next;
143	}
144	else
145	{
146	if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
147	{
148	NERR(REG_ETOOBIG);
149	return NULL;
150	}
151	s = (struct state ) MALLOC(sizeof(struct* state));
152	if (s == NULL)
153	{
154	NERR(REG_ESPACE);
155	return NULL;
156	}
157	nfa->v->spaceused += sizeof(struct state);
158	s->oas.next = NULL;
159	s->free = NULL;
160	s->noas = `0`;
161	}
162
163	assert(nfa->nstates >= `0`);
164	s->no = nfa->nstates++;
165	s->flag = `0`;
166	if (nfa->states == NULL)
167	nfa->states = s;
168	s->nins = `0`;
169	s->ins = NULL;
170	s->nouts = `0`;
171	s->outs = NULL;
172	s->tmp = NULL;
173	s->next = NULL;
174	if (nfa->slast != NULL)
175	{
176	assert(nfa->slast->next == NULL);
177	nfa->slast->next = s;
178	}
179	s->prev = nfa->slast;
180	nfa->slast = s;
181	return s;
182	}
183
184	/*
185	* newfstate - allocate an NFA state with a specified flag value
186	*/
187	static struct state * / NULL on error /
188	newfstate(struct nfa nfa, int* flag)
189	{
190	struct state *s;
191
192	s = newstate(nfa);
193	if (s != NULL)
194	s->flag = (char) flag;
195	return s;
196	}
197
198	/*
199	* dropstate - delete a state's inarcs and outarcs and free it
200	*/
201	static void
202	dropstate(struct nfa *nfa,
203	struct state *s)
204	{
205	struct arc *a;
206
207	while ((a = s->ins) != NULL)
208	freearc(nfa, a);
209	while ((a = s->outs) != NULL)
210	freearc(nfa, a);
211	freestate(nfa, s);
212	}
213
214	/*
215	* freestate - free a state, which has no in-arcs or out-arcs
216	*/
217	static void
218	freestate(struct nfa *nfa,
219	struct state *s)
220	{
221	assert(s != NULL);
222	assert(s->nins == `0` && s->nouts == `0`);
223
224	s->no = FREESTATE;
225	s->flag = `0`;
226	if (s->next != NULL)
227	s->next->prev = s->prev;
228	else
229	{
230	assert(s == nfa->slast);
231	nfa->slast = s->prev;
232	}
233	if (s->prev != NULL)
234	s->prev->next = s->next;
235	else
236	{
237	assert(s == nfa->states);
238	nfa->states = s->next;
239	}
240	s->prev = NULL;
241	s->next = nfa->free; / don't delete it, put it on the free list /
242	nfa->free = s;
243	}
244
245	/*
246	* destroystate - really get rid of an already-freed state
247	*/
248	static void
249	destroystate(struct nfa *nfa,
250	struct state *s)
251	{
252	struct arcbatch *ab;
253	struct arcbatch *abnext;
254
255	assert(s->no == FREESTATE);
256	for (ab = s->oas.next; ab != NULL; ab = abnext)
257	{
258	abnext = ab->next;
259	FREE(ab);
260	nfa->v->spaceused -= sizeof(struct arcbatch);
261	}
262	s->ins = NULL;
263	s->outs = NULL;
264	s->next = NULL;
265	FREE(s);
266	nfa->v->spaceused -= sizeof(struct state);
267	}
268
269	/*
270	* newarc - set up a new arc within an NFA
271	*
272	* This function checks to make sure that no duplicate arcs are created.
273	* In general we never want duplicates.
274	*/
275	static void
276	newarc(struct nfa *nfa,
277	int t,
278	color co,
279	struct state *from,
280	struct state *to)
281	{
282	struct arc *a;
283
284	assert(from != NULL && to != NULL);
285
286	/*
287	* This is a handy place to check for operation cancel during regex
288	* compilation, since no code path will go very long without making a new
289	* state or arc.
290	*/
291	if (CANCEL_REQUESTED(nfa->v->re))
292	{
293	NERR(REG_CANCEL);
294	return;
295	}
296
297	/ check for duplicate arc, using whichever chain is shorter /
298	if (from->nouts <= to->nins)
299	{
300	for (a = from->outs; a != NULL; a = a->outchain)
301	if (a->to == to && a->co == co && a->type == t)
302	return;
303	}
304	else
305	{
306	for (a = to->ins; a != NULL; a = a->inchain)
307	if (a->from == from && a->co == co && a->type == t)
308	return;
309	}
310
311	/ no dup, so create the arc /
312	createarc(nfa, t, co, from, to);
313	}
314
315	/*
316	* createarc - create a new arc within an NFA
317	*
318	* This function must only be used after verifying that there is no existing
319	* identical arc (same type/color/from/to).
320	*/
321	static void
322	createarc(struct nfa *nfa,
323	int t,
324	color co,
325	struct state *from,
326	struct state *to)
327	{
328	struct arc *a;
329
330	/ the arc is physically allocated within its from-state /
331	a = allocarc(nfa, from);
332	if (NISERR())
333	return;
334	assert(a != NULL);
335
336	a->type = t;
337	a->co = co;
338	a->to = to;
339	a->from = from;
340
341	/*
342	* Put the new arc on the beginning, not the end, of the chains; it's
343	* simpler here, and freearc() is the same cost either way. See also the
344	* logic in moveins() and its cohorts, as well as fixempties().
345	*/
346	a->inchain = to->ins;
347	a->inchainRev = NULL;
348	if (to->ins)
349	to->ins->inchainRev = a;
350	to->ins = a;
351	a->outchain = from->outs;
352	a->outchainRev = NULL;
353	if (from->outs)
354	from->outs->outchainRev = a;
355	from->outs = a;
356
357	from->nouts++;
358	to->nins++;
359
360	if (COLORED(a) && nfa->parent == NULL)
361	colorchain(nfa->cm, a);
362	}
363
364	/*
365	* allocarc - allocate a new out-arc within a state
366	*/
367	static struct arc * / NULL for failure /
368	allocarc(struct nfa *nfa,
369	struct state *s)
370	{
371	struct arc *a;
372
373	/ shortcut /
374	if (s->free == NULL && s->noas < ABSIZE)
375	{
376	a = &s->oas.a[s->noas];
377	s->noas++;
378	return a;
379	}
380
381	/ if none at hand, get more /
382	if (s->free == NULL)
383	{
384	struct arcbatch *newAb;
385	int i;
386
387	if (nfa->v->spaceused >= REG_MAX_COMPILE_SPACE)
388	{
389	NERR(REG_ETOOBIG);
390	return NULL;
391	}
392	newAb = (struct arcbatch ) MALLOC(sizeof(struct* arcbatch));
393	if (newAb == NULL)
394	{
395	NERR(REG_ESPACE);
396	return NULL;
397	}
398	nfa->v->spaceused += sizeof(struct arcbatch);
399	newAb->next = s->oas.next;
400	s->oas.next = newAb;
401
402	for (i = `0`; i < ABSIZE; i++)
403	{
404	newAb->a[i].type = `0`;
405	newAb->a[i].freechain = &newAb->a[i + `1`];
406	}
407	newAb->a[ABSIZE - `1`].freechain = NULL;
408	s->free = &newAb->a[`0`];
409	}
410	assert(s->free != NULL);
411
412	a = s->free;
413	s->free = a->freechain;
414	return a;
415	}
416
417	/*
418	* freearc - free an arc
419	*/
420	static void
421	freearc(struct nfa *nfa,
422	struct arc *victim)
423	{
424	struct state *from = victim->from;
425	struct state *to = victim->to;
426	struct arc *predecessor;
427
428	assert(victim->type != `0`);
429
430	/ take it off color chain if necessary /
431	if (COLORED(victim) && nfa->parent == NULL)
432	uncolorchain(nfa->cm, victim);
433
434	/ take it off source's out-chain /
435	assert(from != NULL);
436	predecessor = victim->outchainRev;
437	if (predecessor == NULL)
438	{
439	assert(from->outs == victim);
440	from->outs = victim->outchain;
441	}
442	else
443	{
444	assert(predecessor->outchain == victim);
445	predecessor->outchain = victim->outchain;
446	}
447	if (victim->outchain != NULL)
448	{
449	assert(victim->outchain->outchainRev == victim);
450	victim->outchain->outchainRev = predecessor;
451	}
452	from->nouts--;
453
454	/ take it off target's in-chain /
455	assert(to != NULL);
456	predecessor = victim->inchainRev;
457	if (predecessor == NULL)
458	{
459	assert(to->ins == victim);
460	to->ins = victim->inchain;
461	}
462	else
463	{
464	assert(predecessor->inchain == victim);
465	predecessor->inchain = victim->inchain;
466	}
467	if (victim->inchain != NULL)
468	{
469	assert(victim->inchain->inchainRev == victim);
470	victim->inchain->inchainRev = predecessor;
471	}
472	to->nins--;
473
474	/ clean up and place on from-state's free list /
475	victim->type = `0`;
476	victim->from = NULL; / precautions... /
477	victim->to = NULL;
478	victim->inchain = NULL;
479	victim->inchainRev = NULL;
480	victim->outchain = NULL;
481	victim->outchainRev = NULL;
482	victim->freechain = from->free;
483	from->free = victim;
484	}
485
486	/*
487	* changearctarget - flip an arc to have a different to state
488	*
489	* Caller must have verified that there is no pre-existing duplicate arc.
490	*
491	* Note that because we store arcs in their from state, we can't easily have
492	* a similar changearcsource function.
493	*/
494	static void
495	changearctarget(struct arc a, struct* state *newto)
496	{
497	struct state *oldto = a->to;
498	struct arc *predecessor;
499
500	assert(oldto != newto);
501
502	/ take it off old target's in-chain /
503	assert(oldto != NULL);
504	predecessor = a->inchainRev;
505	if (predecessor == NULL)
506	{
507	assert(oldto->ins == a);
508	oldto->ins = a->inchain;
509	}
510	else
511	{
512	assert(predecessor->inchain == a);
513	predecessor->inchain = a->inchain;
514	}
515	if (a->inchain != NULL)
516	{
517	assert(a->inchain->inchainRev == a);
518	a->inchain->inchainRev = predecessor;
519	}
520	oldto->nins--;
521
522	a->to = newto;
523
524	/ prepend it to new target's in-chain /
525	a->inchain = newto->ins;
526	a->inchainRev = NULL;
527	if (newto->ins)
528	newto->ins->inchainRev = a;
529	newto->ins = a;
530	newto->nins++;
531	}
532
533	/*
534	* hasnonemptyout - Does state have a non-EMPTY out arc?
535	*/
536	static int
537	hasnonemptyout(struct state *s)
538	{
539	struct arc *a;
540
541	for (a = s->outs; a != NULL; a = a->outchain)
542	{
543	if (a->type != EMPTY)
544	return `1`;
545	}
546	return `0`;
547	}
548
549	/*
550	* findarc - find arc, if any, from given source with given type and color
551	* If there is more than one such arc, the result is random.
552	*/
553	static struct arc *
554	findarc(struct state *s,
555	int type,
556	color co)
557	{
558	struct arc *a;
559
560	for (a = s->outs; a != NULL; a = a->outchain)
561	if (a->type == type && a->co == co)
562	return a;
563	return NULL;
564	}
565
566	/*
567	* cparc - allocate a new arc within an NFA, copying details from old one
568	*/
569	static void
570	cparc(struct nfa *nfa,
571	struct arc *oa,
572	struct state *from,
573	struct state *to)
574	{
575	newarc(nfa, oa->type, oa->co, from, to);
576	}
577
578	/*
579	* sortins - sort the in arcs of a state by from/color/type
580	*/
581	static void
582	sortins(struct nfa *nfa,
583	struct state *s)
584	{
585	struct arc **sortarray;
586	struct arc *a;
587	int n = s->nins;
588	int i;
589
590	if (n <= `1`)
591	return; / nothing to do /
592	/ make an array of arc pointers ... /
593	sortarray = (struct arc *) MALLOC(n sizeof(struct arc *));
594	if (sortarray == NULL)
595	{
596	NERR(REG_ESPACE);
597	return;
598	}
599	i = `0`;
600	for (a = s->ins; a != NULL; a = a->inchain)
601	sortarray[i++] = a;
602	assert(i == n);
603	/ ... sort the array /
604	qsort(sortarray, n, sizeof(struct arc *), sortins_cmp);
605	/ ... and rebuild arc list in order /
606	/ it seems worth special-casing first and last items to simplify loop /
607	a = sortarray[`0`];
608	s->ins = a;
609	a->inchain = sortarray[`1`];
610	a->inchainRev = NULL;
611	for (i = `1`; i < n - `1`; i++)
612	{
613	a = sortarray[i];
614	a->inchain = sortarray[i + `1`];
615	a->inchainRev = sortarray[i - `1`];
616	}
617	a = sortarray[i];
618	a->inchain = NULL;
619	a->inchainRev = sortarray[i - `1`];
620	FREE(sortarray);
621	}
622
623	static int
624	sortins_cmp(const void a, const* void *b)
625	{
626	const struct arc aa = ((const struct arc *const *) a);
627	const struct arc bb = ((const struct arc *const *) b);
628
629	/ we check the fields in the order they are most likely to be different /
630	if (aa->from->no < bb->from->no)
631	return -`1`;
632	if (aa->from->no > bb->from->no)
633	return `1`;
634	if (aa->co < bb->co)
635	return -`1`;
636	if (aa->co > bb->co)
637	return `1`;
638	if (aa->type < bb->type)
639	return -`1`;
640	if (aa->type > bb->type)
641	return `1`;
642	return `0`;
643	}
644
645	/*
646	* sortouts - sort the out arcs of a state by to/color/type
647	*/
648	static void
649	sortouts(struct nfa *nfa,
650	struct state *s)
651	{
652	struct arc **sortarray;
653	struct arc *a;
654	int n = s->nouts;
655	int i;
656
657	if (n <= `1`)
658	return; / nothing to do /
659	/ make an array of arc pointers ... /
660	sortarray = (struct arc *) MALLOC(n sizeof(struct arc *));
661	if (sortarray == NULL)
662	{
663	NERR(REG_ESPACE);
664	return;
665	}
666	i = `0`;
667	for (a = s->outs; a != NULL; a = a->outchain)
668	sortarray[i++] = a;
669	assert(i == n);
670	/ ... sort the array /
671	qsort(sortarray, n, sizeof(struct arc *), sortouts_cmp);
672	/ ... and rebuild arc list in order /
673	/ it seems worth special-casing first and last items to simplify loop /
674	a = sortarray[`0`];
675	s->outs = a;
676	a->outchain = sortarray[`1`];
677	a->outchainRev = NULL;
678	for (i = `1`; i < n - `1`; i++)
679	{
680	a = sortarray[i];
681	a->outchain = sortarray[i + `1`];
682	a->outchainRev = sortarray[i - `1`];
683	}
684	a = sortarray[i];
685	a->outchain = NULL;
686	a->outchainRev = sortarray[i - `1`];
687	FREE(sortarray);
688	}
689
690	static int
691	sortouts_cmp(const void a, const* void *b)
692	{
693	const struct arc aa = ((const struct arc *const *) a);
694	const struct arc bb = ((const struct arc *const *) b);
695
696	/ we check the fields in the order they are most likely to be different /
697	if (aa->to->no < bb->to->no)
698	return -`1`;
699	if (aa->to->no > bb->to->no)
700	return `1`;
701	if (aa->co < bb->co)
702	return -`1`;
703	if (aa->co > bb->co)
704	return `1`;
705	if (aa->type < bb->type)
706	return -`1`;
707	if (aa->type > bb->type)
708	return `1`;
709	return `0`;
710	}
711
712	/*
713	* Common decision logic about whether to use arc-by-arc operations or
714	* sort/merge. If there's just a few source arcs we cannot recoup the
715	* cost of sorting the destination arc list, no matter how large it is.
716	* Otherwise, limit the number of arc-by-arc comparisons to about 1000
717	* (a somewhat arbitrary choice, but the breakeven point would probably
718	* be machine dependent anyway).
719	*/
720	#define BULK_ARC_OP_USE_SORT(nsrcarcs, ndestarcs) \
721	((nsrcarcs) < 4 ? 0 : ((nsrcarcs) > 32 \|\| (ndestarcs) > 32))
722
723	/*
724	* moveins - move all in arcs of a state to another state
725	*
726	* You might think this could be done better by just updating the
727	* existing arcs, and you would be right if it weren't for the need
728	* for duplicate suppression, which makes it easier to just make new
729	* ones to exploit the suppression built into newarc.
730	*
731	* However, if we have a whole lot of arcs to deal with, retail duplicate
732	* checks become too slow. In that case we proceed by sorting and merging
733	* the arc lists, and then we can indeed just update the arcs in-place.
734	*/
735	static void
736	moveins(struct nfa *nfa,
737	struct state *oldState,
738	struct state *newState)
739	{
740	assert(oldState != newState);
741
742	if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins))
743	{
744	/ With not too many arcs, just do them one at a time /
745	struct arc *a;
746
747	while ((a = oldState->ins) != NULL)
748	{
749	cparc(nfa, a, a->from, newState);
750	freearc(nfa, a);
751	}
752	}
753	else
754	{
755	/*
756	* With many arcs, use a sort-merge approach. Note changearctarget()
757	* will put the arc onto the front of newState's chain, so it does not
758	* break our walk through the sorted part of the chain.
759	*/
760	struct arc *oa;
761	struct arc *na;
762
763	/*
764	* Because we bypass newarc() in this code path, we'd better include a
765	* cancel check.
766	*/
767	if (CANCEL_REQUESTED(nfa->v->re))
768	{
769	NERR(REG_CANCEL);
770	return;
771	}
772
773	sortins(nfa, oldState);
774	sortins(nfa, newState);
775	if (NISERR())
776	return; / might have failed to sort /
777	oa = oldState->ins;
778	na = newState->ins;
779	while (oa != NULL && na != NULL)
780	{
781	struct arc *a = oa;
782
783	switch (sortins_cmp(&oa, &na))
784	{
785	case -`1`:
786	/ newState does not have anything matching oa /
787	oa = oa->inchain;
788
789	/*
790	* Rather than doing createarc+freearc, we can just unlink
791	* and relink the existing arc struct.
792	*/
793	changearctarget(a, newState);
794	break;
795	case `0`:
796	/ match, advance in both lists /
797	oa = oa->inchain;
798	na = na->inchain;
799	/ ... and drop duplicate arc from oldState /
800	freearc(nfa, a);
801	break;
802	case +`1`:
803	/ advance only na; oa might have a match later /
804	na = na->inchain;
805	break;
806	default:
807	assert(NOTREACHED);
808	}
809	}
810	while (oa != NULL)
811	{
812	/ newState does not have anything matching oa /
813	struct arc *a = oa;
814
815	oa = oa->inchain;
816	changearctarget(a, newState);
817	}
818	}
819
820	assert(oldState->nins == `0`);
821	assert(oldState->ins == NULL);
822	}
823
824	/*
825	* copyins - copy in arcs of a state to another state
826	*/
827	static void
828	copyins(struct nfa *nfa,
829	struct state *oldState,
830	struct state *newState)
831	{
832	assert(oldState != newState);
833
834	if (!BULK_ARC_OP_USE_SORT(oldState->nins, newState->nins))
835	{
836	/ With not too many arcs, just do them one at a time /
837	struct arc *a;
838
839	for (a = oldState->ins; a != NULL; a = a->inchain)
840	cparc(nfa, a, a->from, newState);
841	}
842	else
843	{
844	/*
845	* With many arcs, use a sort-merge approach. Note that createarc()
846	* will put new arcs onto the front of newState's chain, so it does
847	* not break our walk through the sorted part of the chain.
848	*/
849	struct arc *oa;
850	struct arc *na;
851
852	/*
853	* Because we bypass newarc() in this code path, we'd better include a
854	* cancel check.
855	*/
856	if (CANCEL_REQUESTED(nfa->v->re))
857	{
858	NERR(REG_CANCEL);
859	return;
860	}
861
862	sortins(nfa, oldState);
863	sortins(nfa, newState);
864	if (NISERR())
865	return; / might have failed to sort /
866	oa = oldState->ins;
867	na = newState->ins;
868	while (oa != NULL && na != NULL)
869	{
870	struct arc *a = oa;
871
872	switch (sortins_cmp(&oa, &na))
873	{
874	case -`1`:
875	/ newState does not have anything matching oa /
876	oa = oa->inchain;
877	createarc(nfa, a->type, a->co, a->from, newState);
878	break;
879	case `0`:
880	/ match, advance in both lists /
881	oa = oa->inchain;
882	na = na->inchain;
883	break;
884	case +`1`:
885	/ advance only na; oa might have a match later /
886	na = na->inchain;
887	break;
888	default:
889	assert(NOTREACHED);
890	}
891	}
892	while (oa != NULL)
893	{
894	/ newState does not have anything matching oa /
895	struct arc *a = oa;
896
897	oa = oa->inchain;
898	createarc(nfa, a->type, a->co, a->from, newState);
899	}
900	}
901	}
902
903	/*
904	* mergeins - merge a list of inarcs into a state
905	*
906	* This is much like copyins, but the source arcs are listed in an array,
907	* and are not guaranteed unique. It's okay to clobber the array contents.
908	*/
909	static void
910	mergeins(struct nfa *nfa,
911	struct state *s,
912	struct arc **arcarray,
913	int arccount)
914	{
915	struct arc *na;
916	int i;
917	int j;
918
919	if (arccount <= `0`)
920	return;
921
922	/*
923	* Because we bypass newarc() in this code path, we'd better include a
924	* cancel check.
925	*/
926	if (CANCEL_REQUESTED(nfa->v->re))
927	{
928	NERR(REG_CANCEL);
929	return;
930	}
931
932	/ Sort existing inarcs as well as proposed new ones /
933	sortins(nfa, s);
934	if (NISERR())
935	return; / might have failed to sort /
936
937	qsort(arcarray, arccount, sizeof(struct arc *), sortins_cmp);
938
939	/*
940	* arcarray very likely includes dups, so we must eliminate them. (This
941	* could be folded into the next loop, but it's not worth the trouble.)
942	*/
943	j = `0`;
944	for (i = `1`; i < arccount; i++)
945	{
946	switch (sortins_cmp(&arcarray[j], &arcarray[i]))
947	{
948	case -`1`:
949	/ non-dup /
950	arcarray[++j] = arcarray[i];
951	break;
952	case `0`:
953	/ dup /
954	break;
955	default:
956	/ trouble /
957	assert(NOTREACHED);
958	}
959	}
960	arccount = j + `1`;
961
962	/*
963	* Now merge into s' inchain. Note that createarc() will put new arcs
964	* onto the front of s's chain, so it does not break our walk through the
965	* sorted part of the chain.
966	*/
967	i = `0`;
968	na = s->ins;
969	while (i < arccount && na != NULL)
970	{
971	struct arc *a = arcarray[i];
972
973	switch (sortins_cmp(&a, &na))
974	{
975	case -`1`:
976	/ s does not have anything matching a /
977	createarc(nfa, a->type, a->co, a->from, s);
978	i++;
979	break;
980	case `0`:
981	/ match, advance in both lists /
982	i++;
983	na = na->inchain;
984	break;
985	case +`1`:
986	/ advance only na; array might have a match later /
987	na = na->inchain;
988	break;
989	default:
990	assert(NOTREACHED);
991	}
992	}
993	while (i < arccount)
994	{
995	/ s does not have anything matching a /
996	struct arc *a = arcarray[i];
997
998	createarc(nfa, a->type, a->co, a->from, s);
999	i++;
1000	}
1001	}
1002
1003	/*
1004	* moveouts - move all out arcs of a state to another state
1005	*/
1006	static void
1007	moveouts(struct nfa *nfa,
1008	struct state *oldState,
1009	struct state *newState)
1010	{
1011	assert(oldState != newState);
1012
1013	if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts))
1014	{
1015	/ With not too many arcs, just do them one at a time /
1016	struct arc *a;
1017
1018	while ((a = oldState->outs) != NULL)
1019	{
1020	cparc(nfa, a, newState, a->to);
1021	freearc(nfa, a);
1022	}
1023	}
1024	else
1025	{
1026	/*
1027	* With many arcs, use a sort-merge approach. Note that createarc()
1028	* will put new arcs onto the front of newState's chain, so it does
1029	* not break our walk through the sorted part of the chain.
1030	*/
1031	struct arc *oa;
1032	struct arc *na;
1033
1034	/*
1035	* Because we bypass newarc() in this code path, we'd better include a
1036	* cancel check.
1037	*/
1038	if (CANCEL_REQUESTED(nfa->v->re))
1039	{
1040	NERR(REG_CANCEL);
1041	return;
1042	}
1043
1044	sortouts(nfa, oldState);
1045	sortouts(nfa, newState);
1046	if (NISERR())
1047	return; / might have failed to sort /
1048	oa = oldState->outs;
1049	na = newState->outs;
1050	while (oa != NULL && na != NULL)
1051	{
1052	struct arc *a = oa;
1053
1054	switch (sortouts_cmp(&oa, &na))
1055	{
1056	case -`1`:
1057	/ newState does not have anything matching oa /
1058	oa = oa->outchain;
1059	createarc(nfa, a->type, a->co, newState, a->to);
1060	freearc(nfa, a);
1061	break;
1062	case `0`:
1063	/ match, advance in both lists /
1064	oa = oa->outchain;
1065	na = na->outchain;
1066	/ ... and drop duplicate arc from oldState /
1067	freearc(nfa, a);
1068	break;
1069	case +`1`:
1070	/ advance only na; oa might have a match later /
1071	na = na->outchain;
1072	break;
1073	default:
1074	assert(NOTREACHED);
1075	}
1076	}
1077	while (oa != NULL)
1078	{
1079	/ newState does not have anything matching oa /
1080	struct arc *a = oa;
1081
1082	oa = oa->outchain;
1083	createarc(nfa, a->type, a->co, newState, a->to);
1084	freearc(nfa, a);
1085	}
1086	}
1087
1088	assert(oldState->nouts == `0`);
1089	assert(oldState->outs == NULL);
1090	}
1091
1092	/*
1093	* copyouts - copy out arcs of a state to another state
1094	*/
1095	static void
1096	copyouts(struct nfa *nfa,
1097	struct state *oldState,
1098	struct state *newState)
1099	{
1100	assert(oldState != newState);
1101
1102	if (!BULK_ARC_OP_USE_SORT(oldState->nouts, newState->nouts))
1103	{
1104	/ With not too many arcs, just do them one at a time /
1105	struct arc *a;
1106
1107	for (a = oldState->outs; a != NULL; a = a->outchain)
1108	cparc(nfa, a, newState, a->to);
1109	}
1110	else
1111	{
1112	/*
1113	* With many arcs, use a sort-merge approach. Note that createarc()
1114	* will put new arcs onto the front of newState's chain, so it does
1115	* not break our walk through the sorted part of the chain.
1116	*/
1117	struct arc *oa;
1118	struct arc *na;
1119
1120	/*
1121	* Because we bypass newarc() in this code path, we'd better include a
1122	* cancel check.
1123	*/
1124	if (CANCEL_REQUESTED(nfa->v->re))
1125	{
1126	NERR(REG_CANCEL);
1127	return;
1128	}
1129
1130	sortouts(nfa, oldState);
1131	sortouts(nfa, newState);
1132	if (NISERR())
1133	return; / might have failed to sort /
1134	oa = oldState->outs;
1135	na = newState->outs;
1136	while (oa != NULL && na != NULL)
1137	{
1138	struct arc *a = oa;
1139
1140	switch (sortouts_cmp(&oa, &na))
1141	{
1142	case -`1`:
1143	/ newState does not have anything matching oa /
1144	oa = oa->outchain;
1145	createarc(nfa, a->type, a->co, newState, a->to);
1146	break;
1147	case `0`:
1148	/ match, advance in both lists /
1149	oa = oa->outchain;
1150	na = na->outchain;
1151	break;
1152	case +`1`:
1153	/ advance only na; oa might have a match later /
1154	na = na->outchain;
1155	break;
1156	default:
1157	assert(NOTREACHED);
1158	}
1159	}
1160	while (oa != NULL)
1161	{
1162	/ newState does not have anything matching oa /
1163	struct arc *a = oa;
1164
1165	oa = oa->outchain;
1166	createarc(nfa, a->type, a->co, newState, a->to);
1167	}
1168	}
1169	}
1170
1171	/*
1172	* cloneouts - copy out arcs of a state to another state pair, modifying type
1173	*/
1174	static void
1175	cloneouts(struct nfa *nfa,
1176	struct state *old,
1177	struct state *from,
1178	struct state *to,
1179	int type)
1180	{
1181	struct arc *a;
1182
1183	assert(old != from);
1184
1185	for (a = old->outs; a != NULL; a = a->outchain)
1186	newarc(nfa, type, a->co, from, to);
1187	}
1188
1189	/*
1190	* delsub - delete a sub-NFA, updating subre pointers if necessary
1191	*
1192	* This uses a recursive traversal of the sub-NFA, marking already-seen
1193	* states using their tmp pointer.
1194	*/
1195	static void
1196	delsub(struct nfa *nfa,
1197	struct state lp, /* the sub-NFA goes from here... /
1198	struct state rp) /* ...to here, not inclusive /
1199	{
1200	assert(lp != rp);
1201
1202	rp->tmp = rp; / mark end /
1203
1204	deltraverse(nfa, lp, lp);
1205	if (NISERR())
1206	return; / asserts might not hold after failure /
1207	assert(lp->nouts == `0` && rp->nins == `0`); / did the job /
1208	assert(lp->no != FREESTATE && rp->no != FREESTATE); / no more /
1209
1210	rp->tmp = NULL; / unmark end /
1211	lp->tmp = NULL; / and begin, marked by deltraverse /
1212	}
1213
1214	/*
1215	* deltraverse - the recursive heart of delsub
1216	* This routine's basic job is to destroy all out-arcs of the state.
1217	*/
1218	static void
1219	deltraverse(struct nfa *nfa,
1220	struct state *leftend,
1221	struct state *s)
1222	{
1223	struct arc *a;
1224	struct state *to;
1225
1226	/ Since this is recursive, it could be driven to stack overflow /
1227	if (STACK_TOO_DEEP(nfa->v->re))
1228	{
1229	NERR(REG_ETOOBIG);
1230	return;
1231	}
1232
1233	if (s->nouts == `0`)
1234	return; / nothing to do /
1235	if (s->tmp != NULL)
1236	return; / already in progress /
1237
1238	s->tmp = s; / mark as in progress /
1239
1240	while ((a = s->outs) != NULL)
1241	{
1242	to = a->to;
1243	deltraverse(nfa, leftend, to);
1244	if (NISERR())
1245	return; / asserts might not hold after failure /
1246	assert(to->nouts == `0` \|\| to->tmp != NULL);
1247	freearc(nfa, a);
1248	if (to->nins == `0` && to->tmp == NULL)
1249	{
1250	assert(to->nouts == `0`);
1251	freestate(nfa, to);
1252	}
1253	}
1254
1255	assert(s->no != FREESTATE); / we're still here /
1256	assert(s == leftend \|\| s->nins != `0`); / and still reachable /
1257	assert(s->nouts == `0`); / but have no outarcs /
1258
1259	s->tmp = NULL; / we're done here /
1260	}
1261
1262	/*
1263	* dupnfa - duplicate sub-NFA
1264	*
1265	* Another recursive traversal, this time using tmp to point to duplicates
1266	* as well as mark already-seen states. (You knew there was a reason why
1267	* it's a state pointer, didn't you? :-))
1268	*/
1269	static void
1270	dupnfa(struct nfa *nfa,
1271	struct state start, /* duplicate of subNFA starting here /
1272	struct state stop, /* and stopping here /
1273	struct state from, /* stringing duplicate from here /
1274	struct state to) /* to here /
1275	{
1276	if (start == stop)
1277	{
1278	newarc(nfa, EMPTY, `0`, from, to);
1279	return;
1280	}
1281
1282	stop->tmp = to;
1283	duptraverse(nfa, start, from);
1284	/ done, except for clearing out the tmp pointers /
1285
1286	stop->tmp = NULL;
1287	cleartraverse(nfa, start);
1288	}
1289
1290	/*
1291	* duptraverse - recursive heart of dupnfa
1292	*/
1293	static void
1294	duptraverse(struct nfa *nfa,
1295	struct state *s,
1296	struct state stmp) /* s's duplicate, or NULL /
1297	{
1298	struct arc *a;
1299
1300	/ Since this is recursive, it could be driven to stack overflow /
1301	if (STACK_TOO_DEEP(nfa->v->re))
1302	{
1303	NERR(REG_ETOOBIG);
1304	return;
1305	}
1306
1307	if (s->tmp != NULL)
1308	return; / already done /
1309
1310	s->tmp = (stmp == NULL) ? newstate(nfa) : stmp;
1311	if (s->tmp == NULL)
1312	{
1313	assert(NISERR());
1314	return;
1315	}
1316
1317	for (a = s->outs; a != NULL && !NISERR(); a = a->outchain)
1318	{
1319	duptraverse(nfa, a->to, (struct state *) NULL);
1320	if (NISERR())
1321	break;
1322	assert(a->to->tmp != NULL);
1323	cparc(nfa, a, s->tmp, a->to->tmp);
1324	}
1325	}
1326
1327	/*
1328	* cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
1329	*/
1330	static void
1331	cleartraverse(struct nfa *nfa,
1332	struct state *s)
1333	{
1334	struct arc *a;
1335
1336	/ Since this is recursive, it could be driven to stack overflow /
1337	if (STACK_TOO_DEEP(nfa->v->re))
1338	{
1339	NERR(REG_ETOOBIG);
1340	return;
1341	}
1342
1343	if (s->tmp == NULL)
1344	return;
1345	s->tmp = NULL;
1346
1347	for (a = s->outs; a != NULL; a = a->outchain)
1348	cleartraverse(nfa, a->to);
1349	}
1350
1351	/*
1352	* single_color_transition - does getting from s1 to s2 cross one PLAIN arc?
1353	*
1354	* If traversing from s1 to s2 requires a single PLAIN match (possibly of any
1355	* of a set of colors), return a state whose outarc list contains only PLAIN
1356	* arcs of those color(s). Otherwise return NULL.
1357	*
1358	* This is used before optimizing the NFA, so there may be EMPTY arcs, which
1359	* we should ignore; the possibility of an EMPTY is why the result state could
1360	* be different from s1.
1361	*
1362	* It's worth troubling to handle multiple parallel PLAIN arcs here because a
1363	* bracket construct such as [abc] might yield either one or several parallel
1364	* PLAIN arcs depending on earlier atoms in the expression. We'd rather that
1365	* that implementation detail not create user-visible performance differences.
1366	*/
1367	static struct state *
1368	single_color_transition(struct state s1, struct* state *s2)
1369	{
1370	struct arc *a;
1371
1372	/ Ignore leading EMPTY arc, if any /
1373	if (s1->nouts == `1` && s1->outs->type == EMPTY)
1374	s1 = s1->outs->to;
1375	/ Likewise for any trailing EMPTY arc /
1376	if (s2->nins == `1` && s2->ins->type == EMPTY)
1377	s2 = s2->ins->from;
1378	/ Perhaps we could have a single-state loop in between, if so reject /
1379	if (s1 == s2)
1380	return NULL;
1381	/ s1 must have at least one outarc... /
1382	if (s1->outs == NULL)
1383	return NULL;
1384	/ ... and they must all be PLAIN arcs to s2 /
1385	for (a = s1->outs; a != NULL; a = a->outchain)
1386	{
1387	if (a->type != PLAIN \|\| a->to != s2)
1388	return NULL;
1389	}
1390	/ OK, return s1 as the possessor of the relevant outarcs /
1391	return s1;
1392	}
1393
1394	/*
1395	* specialcolors - fill in special colors for an NFA
1396	*/
1397	static void
1398	specialcolors(struct nfa *nfa)
1399	{
1400	/ false colors for BOS, BOL, EOS, EOL /
1401	if (nfa->parent == NULL)
1402	{
1403	nfa->bos[`0`] = pseudocolor(nfa->cm);
1404	nfa->bos[`1`] = pseudocolor(nfa->cm);
1405	nfa->eos[`0`] = pseudocolor(nfa->cm);
1406	nfa->eos[`1`] = pseudocolor(nfa->cm);
1407	}
1408	else
1409	{
1410	assert(nfa->parent->bos[`0`] != COLORLESS);
1411	nfa->bos[`0`] = nfa->parent->bos[`0`];
1412	assert(nfa->parent->bos[`1`] != COLORLESS);
1413	nfa->bos[`1`] = nfa->parent->bos[`1`];
1414	assert(nfa->parent->eos[`0`] != COLORLESS);
1415	nfa->eos[`0`] = nfa->parent->eos[`0`];
1416	assert(nfa->parent->eos[`1`] != COLORLESS);
1417	nfa->eos[`1`] = nfa->parent->eos[`1`];
1418	}
1419	}
1420
1421	/*
1422	* optimize - optimize an NFA
1423	*
1424	* The main goal of this function is not so much "optimization" (though it
1425	* does try to get rid of useless NFA states) as reducing the NFA to a form
1426	* the regex executor can handle. The executor, and indeed the cNFA format
1427	* that is its input, can only handle PLAIN and LACON arcs. The output of
1428	* the regex parser also includes EMPTY (do-nothing) arcs, as well as
1429	* ^, $, AHEAD, and BEHIND constraint arcs, which we must get rid of here.
1430	* We first get rid of EMPTY arcs and then deal with the constraint arcs.
1431	* The hardest part of either job is to get rid of circular loops of the
1432	* target arc type. We would have to do that in any case, though, as such a
1433	* loop would otherwise allow the executor to cycle through the loop endlessly
1434	* without making any progress in the input string.
1435	*/
1436	static long / re_info bits /
1437	optimize(struct nfa *nfa,
1438	FILE f) /* for debug output; NULL none /
1439	{
1440	#ifdef REG_DEBUG
1441	int verbose = (f != NULL) ? `1` : `0`;
1442
1443	if (verbose)
1444	fprintf(f, "\ninitial cleanup:\n");
1445	#endif
1446	cleanup(nfa); / may simplify situation /
1447	#ifdef REG_DEBUG
1448	if (verbose)
1449	dumpnfa(nfa, f);
1450	if (verbose)
1451	fprintf(f, "\nempties:\n");
1452	#endif
1453	fixempties(nfa, f); / get rid of EMPTY arcs /
1454	#ifdef REG_DEBUG
1455	if (verbose)
1456	fprintf(f, "\nconstraints:\n");
1457	#endif
1458	fixconstraintloops(nfa, f); / get rid of constraint loops /
1459	pullback(nfa, f); / pull back constraints backward /
1460	pushfwd(nfa, f); / push fwd constraints forward /
1461	#ifdef REG_DEBUG
1462	if (verbose)
1463	fprintf(f, "\nfinal cleanup:\n");
1464	#endif
1465	cleanup(nfa); / final tidying /
1466	#ifdef REG_DEBUG
1467	if (verbose)
1468	dumpnfa(nfa, f);
1469	#endif
1470	return analyze(nfa); / and analysis /
1471	}
1472
1473	/*
1474	* pullback - pull back constraints backward to eliminate them
1475	*/
1476	static void
1477	pullback(struct nfa *nfa,
1478	FILE f) /* for debug output; NULL none /
1479	{
1480	struct state *s;
1481	struct state *nexts;
1482	struct arc *a;
1483	struct arc *nexta;
1484	struct state *intermediates;
1485	int progress;
1486
1487	/ find and pull until there are no more /
1488	do
1489	{
1490	progress = `0`;
1491	for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
1492	{
1493	nexts = s->next;
1494	intermediates = NULL;
1495	for (a = s->outs; a != NULL && !NISERR(); a = nexta)
1496	{
1497	nexta = a->outchain;
1498	if (a->type == `'^'` \|\| a->type == BEHIND)
1499	if (pull(nfa, a, &intermediates))
1500	progress = `1`;
1501	}
1502	/ clear tmp fields of intermediate states created here /
1503	while (intermediates != NULL)
1504	{
1505	struct state *ns = intermediates->tmp;
1506
1507	intermediates->tmp = NULL;
1508	intermediates = ns;
1509	}
1510	/ if s is now useless, get rid of it /
1511	if ((s->nins == `0` \|\| s->nouts == `0`) && !s->flag)
1512	dropstate(nfa, s);
1513	}
1514	if (progress && f != NULL)
1515	dumpnfa(nfa, f);
1516	} while (progress && !NISERR());
1517	if (NISERR())
1518	return;
1519
1520	/*
1521	* Any ^ constraints we were able to pull to the start state can now be
1522	* replaced by PLAIN arcs referencing the BOS or BOL colors. There should
1523	* be no other ^ or BEHIND arcs left in the NFA, though we do not check
1524	* that here (compact() will fail if so).
1525	*/
1526	for (a = nfa->pre->outs; a != NULL; a = nexta)
1527	{
1528	nexta = a->outchain;
1529	if (a->type == `'^'`)
1530	{
1531	assert(a->co == `0` \|\| a->co == `1`);
1532	newarc(nfa, PLAIN, nfa->bos[a->co], a->from, a->to);
1533	freearc(nfa, a);
1534	}
1535	}
1536	}
1537
1538	/*
1539	* pull - pull a back constraint backward past its source state
1540	*
1541	* Returns 1 if successful (which it always is unless the source is the
1542	* start state or we have an internal error), 0 if nothing happened.
1543	*
1544	* A significant property of this function is that it deletes no pre-existing
1545	* states, and no outarcs of the constraint's from state other than the given
1546	* constraint arc. This makes the loops in pullback() safe, at the cost that
1547	* we may leave useless states behind. Therefore, we leave it to pullback()
1548	* to delete such states.
1549	*
1550	* If the from state has multiple back-constraint outarcs, and/or multiple
1551	* compatible constraint inarcs, we only need to create one new intermediate
1552	* state per combination of predecessor and successor states. *intermediates
1553	* points to a list of such intermediate states for this from state (chained
1554	* through their tmp fields).
1555	*/
1556	static int
1557	pull(struct nfa *nfa,
1558	struct arc *con,
1559	struct state **intermediates)
1560	{
1561	struct state *from = con->from;
1562	struct state *to = con->to;
1563	struct arc *a;
1564	struct arc *nexta;
1565	struct state *s;
1566
1567	assert(from != to); / should have gotten rid of this earlier /
1568	if (from->flag) / can't pull back beyond start /
1569	return `0`;
1570	if (from->nins == `0`)
1571	{ / unreachable /
1572	freearc(nfa, con);
1573	return `1`;
1574	}
1575
1576	/*
1577	* First, clone from state if necessary to avoid other outarcs. This may
1578	* seem wasteful, but it simplifies the logic, and we'll get rid of the
1579	* clone state again at the bottom.
1580	*/
1581	if (from->nouts > `1`)
1582	{
1583	s = newstate(nfa);
1584	if (NISERR())
1585	return `0`;
1586	copyins(nfa, from, s); / duplicate inarcs /
1587	cparc(nfa, con, s, to); / move constraint arc /
1588	freearc(nfa, con);
1589	if (NISERR())
1590	return `0`;
1591	from = s;
1592	con = from->outs;
1593	}
1594	assert(from->nouts == `1`);
1595
1596	/ propagate the constraint into the from state's inarcs /
1597	for (a = from->ins; a != NULL && !NISERR(); a = nexta)
1598	{
1599	nexta = a->inchain;
1600	switch (combine(con, a))
1601	{
1602	case INCOMPATIBLE: / destroy the arc /
1603	freearc(nfa, a);
1604	break;
1605	case SATISFIED: / no action needed /
1606	break;
1607	case COMPATIBLE: / swap the two arcs, more or less /
1608	/ need an intermediate state, but might have one already /
1609	for (s = *intermediates; s != NULL; s = s->tmp)
1610	{
1611	assert(s->nins > `0` && s->nouts > `0`);
1612	if (s->ins->from == a->from && s->outs->to == to)
1613	break;
1614	}
1615	if (s == NULL)
1616	{
1617	s = newstate(nfa);
1618	if (NISERR())
1619	return `0`;
1620	s->tmp = *intermediates;
1621	*intermediates = s;
1622	}
1623	cparc(nfa, con, a->from, s);
1624	cparc(nfa, a, s, to);
1625	freearc(nfa, a);
1626	break;
1627	default:
1628	assert(NOTREACHED);
1629	break;
1630	}
1631	}
1632
1633	/ remaining inarcs, if any, incorporate the constraint /
1634	moveins(nfa, from, to);
1635	freearc(nfa, con);
1636	/ from state is now useless, but we leave it to pullback() to clean up /
1637	return `1`;
1638	}
1639
1640	/*
1641	* pushfwd - push forward constraints forward to eliminate them
1642	*/
1643	static void
1644	pushfwd(struct nfa *nfa,
1645	FILE f) /* for debug output; NULL none /
1646	{
1647	struct state *s;
1648	struct state *nexts;
1649	struct arc *a;
1650	struct arc *nexta;
1651	struct state *intermediates;
1652	int progress;
1653
1654	/ find and push until there are no more /
1655	do
1656	{
1657	progress = `0`;
1658	for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
1659	{
1660	nexts = s->next;
1661	intermediates = NULL;
1662	for (a = s->ins; a != NULL && !NISERR(); a = nexta)
1663	{
1664	nexta = a->inchain;
1665	if (a->type == `'$'` \|\| a->type == AHEAD)
1666	if (push(nfa, a, &intermediates))
1667	progress = `1`;
1668	}
1669	/ clear tmp fields of intermediate states created here /
1670	while (intermediates != NULL)
1671	{
1672	struct state *ns = intermediates->tmp;
1673
1674	intermediates->tmp = NULL;
1675	intermediates = ns;
1676	}
1677	/ if s is now useless, get rid of it /
1678	if ((s->nins == `0` \|\| s->nouts == `0`) && !s->flag)
1679	dropstate(nfa, s);
1680	}
1681	if (progress && f != NULL)
1682	dumpnfa(nfa, f);
1683	} while (progress && !NISERR());
1684	if (NISERR())
1685	return;
1686
1687	/*
1688	* Any $ constraints we were able to push to the post state can now be
1689	* replaced by PLAIN arcs referencing the EOS or EOL colors. There should
1690	* be no other $ or AHEAD arcs left in the NFA, though we do not check
1691	* that here (compact() will fail if so).
1692	*/
1693	for (a = nfa->post->ins; a != NULL; a = nexta)
1694	{
1695	nexta = a->inchain;
1696	if (a->type == `'$'`)
1697	{
1698	assert(a->co == `0` \|\| a->co == `1`);
1699	newarc(nfa, PLAIN, nfa->eos[a->co], a->from, a->to);
1700	freearc(nfa, a);
1701	}
1702	}
1703	}
1704
1705	/*
1706	* push - push a forward constraint forward past its destination state
1707	*
1708	* Returns 1 if successful (which it always is unless the destination is the
1709	* post state or we have an internal error), 0 if nothing happened.
1710	*
1711	* A significant property of this function is that it deletes no pre-existing
1712	* states, and no inarcs of the constraint's to state other than the given
1713	* constraint arc. This makes the loops in pushfwd() safe, at the cost that
1714	* we may leave useless states behind. Therefore, we leave it to pushfwd()
1715	* to delete such states.
1716	*
1717	* If the to state has multiple forward-constraint inarcs, and/or multiple
1718	* compatible constraint outarcs, we only need to create one new intermediate
1719	* state per combination of predecessor and successor states. *intermediates
1720	* points to a list of such intermediate states for this to state (chained
1721	* through their tmp fields).
1722	*/
1723	static int
1724	push(struct nfa *nfa,
1725	struct arc *con,
1726	struct state **intermediates)
1727	{
1728	struct state *from = con->from;
1729	struct state *to = con->to;
1730	struct arc *a;
1731	struct arc *nexta;
1732	struct state *s;
1733
1734	assert(to != from); / should have gotten rid of this earlier /
1735	if (to->flag) / can't push forward beyond end /
1736	return `0`;
1737	if (to->nouts == `0`)
1738	{ / dead end /
1739	freearc(nfa, con);
1740	return `1`;
1741	}
1742
1743	/*
1744	* First, clone to state if necessary to avoid other inarcs. This may
1745	* seem wasteful, but it simplifies the logic, and we'll get rid of the
1746	* clone state again at the bottom.
1747	*/
1748	if (to->nins > `1`)
1749	{
1750	s = newstate(nfa);
1751	if (NISERR())
1752	return `0`;
1753	copyouts(nfa, to, s); / duplicate outarcs /
1754	cparc(nfa, con, from, s); / move constraint arc /
1755	freearc(nfa, con);
1756	if (NISERR())
1757	return `0`;
1758	to = s;
1759	con = to->ins;
1760	}
1761	assert(to->nins == `1`);
1762
1763	/ propagate the constraint into the to state's outarcs /
1764	for (a = to->outs; a != NULL && !NISERR(); a = nexta)
1765	{
1766	nexta = a->outchain;
1767	switch (combine(con, a))
1768	{
1769	case INCOMPATIBLE: / destroy the arc /
1770	freearc(nfa, a);
1771	break;
1772	case SATISFIED: / no action needed /
1773	break;
1774	case COMPATIBLE: / swap the two arcs, more or less /
1775	/ need an intermediate state, but might have one already /
1776	for (s = *intermediates; s != NULL; s = s->tmp)
1777	{
1778	assert(s->nins > `0` && s->nouts > `0`);
1779	if (s->ins->from == from && s->outs->to == a->to)
1780	break;
1781	}
1782	if (s == NULL)
1783	{
1784	s = newstate(nfa);
1785	if (NISERR())
1786	return `0`;
1787	s->tmp = *intermediates;
1788	*intermediates = s;
1789	}
1790	cparc(nfa, con, s, a->to);
1791	cparc(nfa, a, from, s);
1792	freearc(nfa, a);
1793	break;
1794	default:
1795	assert(NOTREACHED);
1796	break;
1797	}
1798	}
1799
1800	/ remaining outarcs, if any, incorporate the constraint /
1801	moveouts(nfa, to, from);
1802	freearc(nfa, con);
1803	/ to state is now useless, but we leave it to pushfwd() to clean up /
1804	return `1`;
1805	}
1806
1807	/*
1808	* combine - constraint lands on an arc, what happens?
1809	*
1810	* #def INCOMPATIBLE 1 // destroys arc
1811	* #def SATISFIED 2 // constraint satisfied
1812	* #def COMPATIBLE 3 // compatible but not satisfied yet
1813	*/
1814	static int
1815	combine(struct arc *con,
1816	struct arc *a)
1817	{
1818	#define CA(ct,at) (((ct)<<CHAR_BIT) \| (at))
1819
1820	switch (CA(con->type, a->type))
1821	{
1822	case CA(`'^'`, PLAIN): / newlines are handled separately /
1823	case CA(`'$'`, PLAIN):
1824	return INCOMPATIBLE;
1825	break;
1826	case CA(AHEAD, PLAIN): / color constraints meet colors /
1827	case CA(BEHIND, PLAIN):
1828	if (con->co == a->co)
1829	return SATISFIED;
1830	return INCOMPATIBLE;
1831	break;
1832	case CA(`'^'`, `'^'`): / collision, similar constraints /
1833	case CA(`'$'`, `'$'`):
1834	case CA(AHEAD, AHEAD):
1835	case CA(BEHIND, BEHIND):
1836	if (con->co == a->co) / true duplication /
1837	return SATISFIED;
1838	return INCOMPATIBLE;
1839	break;
1840	case CA(`'^'`, BEHIND): / collision, dissimilar constraints /
1841	case CA(BEHIND, `'^'`):
1842	case CA(`'$'`, AHEAD):
1843	case CA(AHEAD, `'$'`):
1844	return INCOMPATIBLE;
1845	break;
1846	case CA(`'^'`, `'$'`): / constraints passing each other /
1847	case CA(`'^'`, AHEAD):
1848	case CA(BEHIND, `'$'`):
1849	case CA(BEHIND, AHEAD):
1850	case CA(`'$'`, `'^'`):
1851	case CA(`'$'`, BEHIND):
1852	case CA(AHEAD, `'^'`):
1853	case CA(AHEAD, BEHIND):
1854	case CA(`'^'`, LACON):
1855	case CA(BEHIND, LACON):
1856	case CA(`'$'`, LACON):
1857	case CA(AHEAD, LACON):
1858	return COMPATIBLE;
1859	break;
1860	}
1861	assert(NOTREACHED);
1862	return INCOMPATIBLE; / for benefit of blind compilers /
1863	}
1864
1865	/*
1866	* fixempties - get rid of EMPTY arcs
1867	*/
1868	static void
1869	fixempties(struct nfa *nfa,
1870	FILE f) /* for debug output; NULL none /
1871	{
1872	struct state *s;
1873	struct state *s2;
1874	struct state *nexts;
1875	struct arc *a;
1876	struct arc *nexta;
1877	int totalinarcs;
1878	struct arc **inarcsorig;
1879	struct arc **arcarray;
1880	int arccount;
1881	int prevnins;
1882	int nskip;
1883
1884	/*
1885	* First, get rid of any states whose sole out-arc is an EMPTY, since
1886	* they're basically just aliases for their successor. The parsing
1887	* algorithm creates enough of these that it's worth special-casing this.
1888	*/
1889	for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
1890	{
1891	nexts = s->next;
1892	if (s->flag \|\| s->nouts != `1`)
1893	continue;
1894	a = s->outs;
1895	assert(a != NULL && a->outchain == NULL);
1896	if (a->type != EMPTY)
1897	continue;
1898	if (s != a->to)
1899	moveins(nfa, s, a->to);
1900	dropstate(nfa, s);
1901	}
1902
1903	/*
1904	* Similarly, get rid of any state with a single EMPTY in-arc, by folding
1905	* it into its predecessor.
1906	*/
1907	for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
1908	{
1909	nexts = s->next;
1910	/ while we're at it, ensure tmp fields are clear for next step /
1911	assert(s->tmp == NULL);
1912	if (s->flag \|\| s->nins != `1`)
1913	continue;
1914	a = s->ins;
1915	assert(a != NULL && a->inchain == NULL);
1916	if (a->type != EMPTY)
1917	continue;
1918	if (s != a->from)
1919	moveouts(nfa, s, a->from);
1920	dropstate(nfa, s);
1921	}
1922
1923	if (NISERR())
1924	return;
1925
1926	/*
1927	* For each remaining NFA state, find all other states from which it is
1928	* reachable by a chain of one or more EMPTY arcs. Then generate new arcs
1929	* that eliminate the need for each such chain.
1930	*
1931	* We could replace a chain of EMPTY arcs that leads from a "from" state
1932	* to a "to" state either by pushing non-EMPTY arcs forward (linking
1933	* directly from "from"'s predecessors to "to") or by pulling them back
1934	* (linking directly from "from" to "to"'s successors). We choose to
1935	* always do the former; this choice is somewhat arbitrary, but the
1936	* approach below requires that we uniformly do one or the other.
1937	*
1938	* Suppose we have a chain of N successive EMPTY arcs (where N can easily
1939	* approach the size of the NFA). All of the intermediate states must
1940	* have additional inarcs and outarcs, else they'd have been removed by
1941	* the steps above. Assuming their inarcs are mostly not empties, we will
1942	* add O(N^2) arcs to the NFA, since a non-EMPTY inarc leading to any one
1943	* state in the chain must be duplicated to lead to all its successor
1944	* states as well. So there is no hope of doing less than O(N^2) work;
1945	* however, we should endeavor to keep the big-O cost from being even
1946	* worse than that, which it can easily become without care. In
1947	* particular, suppose we were to copy all S1's inarcs forward to S2, and
1948	* then also to S3, and then later we consider pushing S2's inarcs forward
1949	* to S3. If we include the arcs already copied from S1 in that, we'd be
1950	* doing O(N^3) work. (The duplicate-arc elimination built into newarc()
1951	* and its cohorts would get rid of the extra arcs, but not without cost.)
1952	*
1953	* We can avoid this cost by treating only arcs that existed at the start
1954	* of this phase as candidates to be pushed forward. To identify those,
1955	* we remember the first inarc each state had to start with. We rely on
1956	* the fact that newarc() and friends put new arcs on the front of their
1957	* to-states' inchains, and that this phase never deletes arcs, so that
1958	* the original arcs must be the last arcs in their to-states' inchains.
1959	*
1960	* So the process here is that, for each state in the NFA, we gather up
1961	* all non-EMPTY inarcs of states that can reach the target state via
1962	* EMPTY arcs. We then sort, de-duplicate, and merge these arcs into the
1963	* target state's inchain. (We can safely use sort-merge for this as long
1964	* as we update each state's original-arcs pointer after we add arcs to
1965	* it; the sort step of mergeins probably changed the order of the old
1966	* arcs.)
1967	*
1968	* Another refinement worth making is that, because we only add non-EMPTY
1969	* arcs during this phase, and all added arcs have the same from-state as
1970	* the non-EMPTY arc they were cloned from, we know ahead of time that any
1971	* states having only EMPTY outarcs will be useless for lack of outarcs
1972	* after we drop the EMPTY arcs. (They cannot gain non-EMPTY outarcs if
1973	* they had none to start with.) So we need not bother to update the
1974	* inchains of such states at all.
1975	*/
1976
1977	/ Remember the states' first original inarcs /
1978	/ ... and while at it, count how many old inarcs there are altogether /
1979	inarcsorig = (struct arc *) MALLOC(nfa->nstates sizeof(struct arc *));
1980	if (inarcsorig == NULL)
1981	{
1982	NERR(REG_ESPACE);
1983	return;
1984	}
1985	totalinarcs = `0`;
1986	for (s = nfa->states; s != NULL; s = s->next)
1987	{
1988	inarcsorig[s->no] = s->ins;
1989	totalinarcs += s->nins;
1990	}
1991
1992	/*
1993	* Create a workspace for accumulating the inarcs to be added to the
1994	* current target state. totalinarcs is probably a considerable
1995	* overestimate of the space needed, but the NFA is unlikely to be large
1996	* enough at this point to make it worth being smarter.
1997	*/
1998	arcarray = (struct arc *) MALLOC(totalinarcs sizeof(struct arc *));
1999	if (arcarray == NULL)
2000	{
2001	NERR(REG_ESPACE);
2002	FREE(inarcsorig);
2003	return;
2004	}
2005
2006	/ And iterate over the target states /
2007	for (s = nfa->states; s != NULL && !NISERR(); s = s->next)
2008	{
2009	/ Ignore target states without non-EMPTY outarcs, per note above /
2010	if (!s->flag && !hasnonemptyout(s))
2011	continue;
2012
2013	/ Find predecessor states and accumulate their original inarcs /
2014	arccount = `0`;
2015	for (s2 = emptyreachable(nfa, s, s, inarcsorig); s2 != s; s2 = nexts)
2016	{
2017	/ Add s2's original inarcs to arcarray[], but ignore empties /
2018	for (a = inarcsorig[s2->no]; a != NULL; a = a->inchain)
2019	{
2020	if (a->type != EMPTY)
2021	arcarray[arccount++] = a;
2022	}
2023
2024	/ Reset the tmp fields as we walk back /
2025	nexts = s2->tmp;
2026	s2->tmp = NULL;
2027	}
2028	s->tmp = NULL;
2029	assert(arccount <= totalinarcs);
2030
2031	/ Remember how many original inarcs this state has /
2032	prevnins = s->nins;
2033
2034	/ Add non-duplicate inarcs to target state /
2035	mergeins(nfa, s, arcarray, arccount);
2036
2037	/ Now we must update the state's inarcsorig pointer /
2038	nskip = s->nins - prevnins;
2039	a = s->ins;
2040	while (nskip-- > `0`)
2041	a = a->inchain;
2042	inarcsorig[s->no] = a;
2043	}
2044
2045	FREE(arcarray);
2046	FREE(inarcsorig);
2047
2048	if (NISERR())
2049	return;
2050
2051	/*
2052	* Now remove all the EMPTY arcs, since we don't need them anymore.
2053	*/
2054	for (s = nfa->states; s != NULL; s = s->next)
2055	{
2056	for (a = s->outs; a != NULL; a = nexta)
2057	{
2058	nexta = a->outchain;
2059	if (a->type == EMPTY)
2060	freearc(nfa, a);
2061	}
2062	}
2063
2064	/*
2065	* And remove any states that have become useless. (This cleanup is not
2066	* very thorough, and would be even less so if we tried to combine it with
2067	* the previous step; but cleanup() will take care of anything we miss.)
2068	*/
2069	for (s = nfa->states; s != NULL; s = nexts)
2070	{
2071	nexts = s->next;
2072	if ((s->nins == `0` \|\| s->nouts == `0`) && !s->flag)
2073	dropstate(nfa, s);
2074	}
2075
2076	if (f != NULL)
2077	dumpnfa(nfa, f);
2078	}
2079
2080	/*
2081	* emptyreachable - recursively find all states that can reach s by EMPTY arcs
2082	*
2083	* The return value is the last such state found. Its tmp field links back
2084	* to the next-to-last such state, and so on back to s, so that all these
2085	* states can be located without searching the whole NFA.
2086	*
2087	* Since this is only used in fixempties(), we pass in the inarcsorig[] array
2088	* maintained by that function. This lets us skip over all new inarcs, which
2089	* are certainly not EMPTY arcs.
2090	*
2091	* The maximum recursion depth here is equal to the length of the longest
2092	* loop-free chain of EMPTY arcs, which is surely no more than the size of
2093	* the NFA ... but that could still be enough to cause trouble.
2094	*/
2095	static struct state *
2096	emptyreachable(struct nfa *nfa,
2097	struct state *s,
2098	struct state *lastfound,
2099	struct arc **inarcsorig)
2100	{
2101	struct arc *a;
2102
2103	/ Since this is recursive, it could be driven to stack overflow /
2104	if (STACK_TOO_DEEP(nfa->v->re))
2105	{
2106	NERR(REG_ETOOBIG);
2107	return lastfound;
2108	}
2109
2110	s->tmp = lastfound;
2111	lastfound = s;
2112	for (a = inarcsorig[s->no]; a != NULL; a = a->inchain)
2113	{
2114	if (a->type == EMPTY && a->from->tmp == NULL)
2115	lastfound = emptyreachable(nfa, a->from, lastfound, inarcsorig);
2116	}
2117	return lastfound;
2118	}
2119
2120	/*
2121	* isconstraintarc - detect whether an arc is of a constraint type
2122	*/
2123	static inline int
2124	isconstraintarc(struct arc *a)
2125	{
2126	switch (a->type)
2127	{
2128	case `'^'`:
2129	case `'$'`:
2130	case BEHIND:
2131	case AHEAD:
2132	case LACON:
2133	return `1`;
2134	}
2135	return `0`;
2136	}
2137
2138	/*
2139	* hasconstraintout - does state have a constraint out arc?
2140	*/
2141	static int
2142	hasconstraintout(struct state *s)
2143	{
2144	struct arc *a;
2145
2146	for (a = s->outs; a != NULL; a = a->outchain)
2147	{
2148	if (isconstraintarc(a))
2149	return `1`;
2150	}
2151	return `0`;
2152	}
2153
2154	/*
2155	* fixconstraintloops - get rid of loops containing only constraint arcs
2156	*
2157	* A loop of states that contains only constraint arcs is useless, since
2158	* passing around the loop represents no forward progress. Moreover, it
2159	* would cause infinite looping in pullback/pushfwd, so we need to get rid
2160	* of such loops before doing that.
2161	*/
2162	static void
2163	fixconstraintloops(struct nfa *nfa,
2164	FILE f) /* for debug output; NULL none /
2165	{
2166	struct state *s;
2167	struct state *nexts;
2168	struct arc *a;
2169	struct arc *nexta;
2170	int hasconstraints;
2171
2172	/*
2173	* In the trivial case of a state that loops to itself, we can just drop
2174	* the constraint arc altogether. This is worth special-casing because
2175	* such loops are far more common than loops containing multiple states.
2176	* While we're at it, note whether any constraint arcs survive.
2177	*/
2178	hasconstraints = `0`;
2179	for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
2180	{
2181	nexts = s->next;
2182	/ while we're at it, ensure tmp fields are clear for next step /
2183	assert(s->tmp == NULL);
2184	for (a = s->outs; a != NULL && !NISERR(); a = nexta)
2185	{
2186	nexta = a->outchain;
2187	if (isconstraintarc(a))
2188	{
2189	if (a->to == s)
2190	freearc(nfa, a);
2191	else
2192	hasconstraints = `1`;
2193	}
2194	}
2195	/ If we removed all the outarcs, the state is useless. /
2196	if (s->nouts == `0` && !s->flag)
2197	dropstate(nfa, s);
2198	}
2199
2200	/ Nothing to do if no remaining constraint arcs /
2201	if (NISERR() \|\| !hasconstraints)
2202	return;
2203
2204	/*
2205	* Starting from each remaining NFA state, search outwards for a
2206	* constraint loop. If we find a loop, break the loop, then start the
2207	* search over. (We could possibly retain some state from the first scan,
2208	* but it would complicate things greatly, and multi-state constraint
2209	* loops are rare enough that it's not worth optimizing the case.)
2210	*/
2211	restart:
2212	for (s = nfa->states; s != NULL && !NISERR(); s = s->next)
2213	{
2214	if (findconstraintloop(nfa, s))
2215	goto restart;
2216	}
2217
2218	if (NISERR())
2219	return;
2220
2221	/*
2222	* Now remove any states that have become useless. (This cleanup is not
2223	* very thorough, and would be even less so if we tried to combine it with
2224	* the previous step; but cleanup() will take care of anything we miss.)
2225	*
2226	* Because findconstraintloop intentionally doesn't reset all tmp fields,
2227	* we have to clear them after it's done. This is a convenient place to
2228	* do that, too.
2229	*/
2230	for (s = nfa->states; s != NULL; s = nexts)
2231	{
2232	nexts = s->next;
2233	s->tmp = NULL;
2234	if ((s->nins == `0` \|\| s->nouts == `0`) && !s->flag)
2235	dropstate(nfa, s);
2236	}
2237
2238	if (f != NULL)
2239	dumpnfa(nfa, f);
2240	}
2241
2242	/*
2243	* findconstraintloop - recursively find a loop of constraint arcs
2244	*
2245	* If we find a loop, break it by calling breakconstraintloop(), then
2246	* return 1; otherwise return 0.
2247	*
2248	* State tmp fields are guaranteed all NULL on a success return, because
2249	* breakconstraintloop does that. After a failure return, any state that
2250	* is known not to be part of a loop is marked with s->tmp == s; this allows
2251	* us not to have to re-prove that fact on later calls. (This convention is
2252	* workable because we already eliminated single-state loops.)
2253	*
2254	* Note that the found loop doesn't necessarily include the first state we
2255	* are called on. Any loop reachable from that state will do.
2256	*
2257	* The maximum recursion depth here is one more than the length of the longest
2258	* loop-free chain of constraint arcs, which is surely no more than the size
2259	* of the NFA ... but that could still be enough to cause trouble.
2260	*/
2261	static int
2262	findconstraintloop(struct nfa nfa, struct* state *s)
2263	{
2264	struct arc *a;
2265
2266	/ Since this is recursive, it could be driven to stack overflow /
2267	if (STACK_TOO_DEEP(nfa->v->re))
2268	{
2269	NERR(REG_ETOOBIG);
2270	return `1`; / to exit as quickly as possible /
2271	}
2272
2273	if (s->tmp != NULL)
2274	{
2275	/ Already proven uninteresting? /
2276	if (s->tmp == s)
2277	return `0`;
2278	/ Found a loop involving s /
2279	breakconstraintloop(nfa, s);
2280	/ The tmp fields have been cleaned up by breakconstraintloop /
2281	return `1`;
2282	}
2283	for (a = s->outs; a != NULL; a = a->outchain)
2284	{
2285	if (isconstraintarc(a))
2286	{
2287	struct state *sto = a->to;
2288
2289	assert(sto != s);
2290	s->tmp = sto;
2291	if (findconstraintloop(nfa, sto))
2292	return `1`;
2293	}
2294	}
2295
2296	/*
2297	* If we get here, no constraint loop exists leading out from s. Mark it
2298	* with s->tmp == s so we need not rediscover that fact again later.
2299	*/
2300	s->tmp = s;
2301	return `0`;
2302	}
2303
2304	/*
2305	* breakconstraintloop - break a loop of constraint arcs
2306	*
2307	* sinitial is any one member state of the loop. Each loop member's tmp
2308	* field links to its successor within the loop. (Note that this function
2309	* will reset all the tmp fields to NULL.)
2310	*
2311	* We can break the loop by, for any one state S1 in the loop, cloning its
2312	* loop successor state S2 (and possibly following states), and then moving
2313	* all S1->S2 constraint arcs to point to the cloned S2. The cloned S2 should
2314	* copy any non-constraint outarcs of S2. Constraint outarcs should be
2315	* dropped if they point back to S1, else they need to be copied as arcs to
2316	* similarly cloned states S3, S4, etc. In general, each cloned state copies
2317	* non-constraint outarcs, drops constraint outarcs that would lead to itself
2318	* or any earlier cloned state, and sends other constraint outarcs to newly
2319	* cloned states. No cloned state will have any inarcs that aren't constraint
2320	* arcs or do not lead from S1 or earlier-cloned states. It's okay to drop
2321	* constraint back-arcs since they would not take us to any state we've not
2322	* already been in; therefore, no new constraint loop is created. In this way
2323	* we generate a modified NFA that can still represent every useful state
2324	* sequence, but not sequences that represent state loops with no consumption
2325	* of input data. Note that the set of cloned states will certainly include
2326	* all of the loop member states other than S1, and it may also include
2327	* non-loop states that are reachable from S2 via constraint arcs. This is
2328	* important because there is no guarantee that findconstraintloop found a
2329	* maximal loop (and searching for one would be NP-hard, so don't try).
2330	* Frequently the "non-loop states" are actually part of a larger loop that
2331	* we didn't notice, and indeed there may be several overlapping loops.
2332	* This technique ensures convergence in such cases, while considering only
2333	* the originally-found loop does not.
2334	*
2335	* If there is only one S1->S2 constraint arc, then that constraint is
2336	* certainly satisfied when we enter any of the clone states. This means that
2337	* in the common case where many of the constraint arcs are identically
2338	* labeled, we can merge together clone states linked by a similarly-labeled
2339	* constraint: if we can get to the first one we can certainly get to the
2340	* second, so there's no need to distinguish. This greatly reduces the number
2341	* of new states needed, so we preferentially break the given loop at a state
2342	* pair where this is true.
2343	*
2344	* Furthermore, it's fairly common to find that a cloned successor state has
2345	* no outarcs, especially if we're a bit aggressive about removing unnecessary
2346	* outarcs. If that happens, then there is simply not any interesting state
2347	* that can be reached through the predecessor's loop arcs, which means we can
2348	* break the loop just by removing those loop arcs, with no new states added.
2349	*/
2350	static void
2351	breakconstraintloop(struct nfa nfa, struct* state *sinitial)
2352	{
2353	struct state *s;
2354	struct state *shead;
2355	struct state *stail;
2356	struct state *sclone;
2357	struct state *nexts;
2358	struct arc *refarc;
2359	struct arc *a;
2360	struct arc *nexta;
2361
2362	/*
2363	* Start by identifying which loop step we want to break at.
2364	* Preferentially this is one with only one constraint arc. (XXX are
2365	* there any other secondary heuristics we want to use here?) Set refarc
2366	* to point to the selected lone constraint arc, if there is one.
2367	*/
2368	refarc = NULL;
2369	s = sinitial;
2370	do
2371	{
2372	nexts = s->tmp;
2373	assert(nexts != s); / should not see any one-element loops /
2374	if (refarc == NULL)
2375	{
2376	int narcs = `0`;
2377
2378	for (a = s->outs; a != NULL; a = a->outchain)
2379	{
2380	if (a->to == nexts && isconstraintarc(a))
2381	{
2382	refarc = a;
2383	narcs++;
2384	}
2385	}
2386	assert(narcs > `0`);
2387	if (narcs > `1`)
2388	refarc = NULL; / multiple constraint arcs here, no good /
2389	}
2390	s = nexts;
2391	} while (s != sinitial);
2392
2393	if (refarc)
2394	{
2395	/ break at the refarc /
2396	shead = refarc->from;
2397	stail = refarc->to;
2398	assert(stail == shead->tmp);
2399	}
2400	else
2401	{
2402	/ for lack of a better idea, break after sinitial /
2403	shead = sinitial;
2404	stail = sinitial->tmp;
2405	}
2406
2407	/*
2408	* Reset the tmp fields so that we can use them for local storage in
2409	* clonesuccessorstates. (findconstraintloop won't mind, since it's just
2410	* going to abandon its search anyway.)
2411	*/
2412	for (s = nfa->states; s != NULL; s = s->next)
2413	s->tmp = NULL;
2414
2415	/*
2416	* Recursively build clone state(s) as needed.
2417	*/
2418	sclone = newstate(nfa);
2419	if (sclone == NULL)
2420	{
2421	assert(NISERR());
2422	return;
2423	}
2424
2425	clonesuccessorstates(nfa, stail, sclone, shead, refarc,
2426	NULL, NULL, nfa->nstates);
2427
2428	if (NISERR())
2429	return;
2430
2431	/*
2432	* It's possible that sclone has no outarcs at all, in which case it's
2433	* useless. (We don't try extremely hard to get rid of useless states
2434	* here, but this is an easy and fairly common case.)
2435	*/
2436	if (sclone->nouts == `0`)
2437	{
2438	freestate(nfa, sclone);
2439	sclone = NULL;
2440	}
2441
2442	/*
2443	* Move shead's constraint-loop arcs to point to sclone, or just drop them
2444	* if we discovered we don't need sclone.
2445	*/
2446	for (a = shead->outs; a != NULL; a = nexta)
2447	{
2448	nexta = a->outchain;
2449	if (a->to == stail && isconstraintarc(a))
2450	{
2451	if (sclone)
2452	cparc(nfa, a, shead, sclone);
2453	freearc(nfa, a);
2454	if (NISERR())
2455	break;
2456	}
2457	}
2458	}
2459
2460	/*
2461	* clonesuccessorstates - create a tree of constraint-arc successor states
2462	*
2463	* ssource is the state to be cloned, and sclone is the state to copy its
2464	* outarcs into. sclone's inarcs, if any, should already be set up.
2465	*
2466	* spredecessor is the original predecessor state that we are trying to build
2467	* successors for (it may not be the immediate predecessor of ssource).
2468	* refarc, if not NULL, is the original constraint arc that is known to have
2469	* been traversed out of spredecessor to reach the successor(s).
2470	*
2471	* For each cloned successor state, we transiently create a "donemap" that is
2472	* a boolean array showing which source states we've already visited for this
2473	* clone state. This prevents infinite recursion as well as useless repeat
2474	* visits to the same state subtree (which can add up fast, since typical NFAs
2475	* have multiple redundant arc pathways). Each donemap is a char array
2476	* indexed by state number. The donemaps are all of the same size "nstates",
2477	* which is nfa->nstates as of the start of the recursion. This is enough to
2478	* have entries for all pre-existing states, but not entries for clone
2479	* states created during the recursion. That's okay since we have no need to
2480	* mark those.
2481	*
2482	* curdonemap is NULL when recursing to a new sclone state, or sclone's
2483	* donemap when we are recursing without having created a new state (which we
2484	* do when we decide we can merge a successor state into the current clone
2485	* state). outerdonemap is NULL at the top level and otherwise the parent
2486	* clone state's donemap.
2487	*
2488	* The successor states we create and fill here form a strict tree structure,
2489	* with each state having exactly one predecessor, except that the toplevel
2490	* state has no inarcs as yet (breakconstraintloop will add its inarcs from
2491	* spredecessor after we're done). Thus, we can examine sclone's inarcs back
2492	* to the root, plus refarc if any, to identify the set of constraints already
2493	* known valid at the current point. This allows us to avoid generating extra
2494	* successor states.
2495	*/
2496	static void
2497	clonesuccessorstates(struct nfa *nfa,
2498	struct state *ssource,
2499	struct state *sclone,
2500	struct state *spredecessor,
2501	struct arc *refarc,
2502	char *curdonemap,
2503	char *outerdonemap,
2504	int nstates)
2505	{
2506	char *donemap;
2507	struct arc *a;
2508
2509	/ Since this is recursive, it could be driven to stack overflow /
2510	if (STACK_TOO_DEEP(nfa->v->re))
2511	{
2512	NERR(REG_ETOOBIG);
2513	return;
2514	}
2515
2516	/ If this state hasn't already got a donemap, create one /
2517	donemap = curdonemap;
2518	if (donemap == NULL)
2519	{
2520	donemap = (char ) MALLOC(nstates sizeof(char));
2521	if (donemap == NULL)
2522	{
2523	NERR(REG_ESPACE);
2524	return;
2525	}
2526
2527	if (outerdonemap != NULL)
2528	{
2529	/*
2530	* Not at outermost recursion level, so copy the outer level's
2531	* donemap; this ensures that we see states in process of being
2532	* visited at outer levels, or already merged into predecessor
2533	* states, as ones we shouldn't traverse back to.
2534	*/
2535	memcpy(donemap, outerdonemap, nstates * sizeof(char));
2536	}
2537	else
2538	{
2539	/ At outermost level, only spredecessor is off-limits /
2540	memset(donemap, `0`, nstates * sizeof(char));
2541	assert(spredecessor->no < nstates);
2542	donemap[spredecessor->no] = `1`;
2543	}
2544	}
2545
2546	/ Mark ssource as visited in the donemap /
2547	assert(ssource->no < nstates);
2548	assert(donemap[ssource->no] == `0`);
2549	donemap[ssource->no] = `1`;
2550
2551	/*
2552	* We proceed by first cloning all of ssource's outarcs, creating new
2553	* clone states as needed but not doing more with them than that. Then in
2554	* a second pass, recurse to process the child clone states. This allows
2555	* us to have only one child clone state per reachable source state, even
2556	* when there are multiple outarcs leading to the same state. Also, when
2557	* we do visit a child state, its set of inarcs is known exactly, which
2558	* makes it safe to apply the constraint-is-already-checked optimization.
2559	* Also, this ensures that we've merged all the states we can into the
2560	* current clone before we recurse to any children, thus possibly saving
2561	* them from making extra images of those states.
2562	*
2563	* While this function runs, child clone states of the current state are
2564	* marked by setting their tmp fields to point to the original state they
2565	* were cloned from. This makes it possible to detect multiple outarcs
2566	* leading to the same state, and also makes it easy to distinguish clone
2567	* states from original states (which will have tmp == NULL).
2568	*/
2569	for (a = ssource->outs; a != NULL && !NISERR(); a = a->outchain)
2570	{
2571	struct state *sto = a->to;
2572
2573	/*
2574	* We do not consider cloning successor states that have no constraint
2575	* outarcs; just link to them as-is. They cannot be part of a
2576	* constraint loop so there is no need to make copies. In particular,
2577	* this rule keeps us from trying to clone the post state, which would
2578	* be a bad idea.
2579	*/
2580	if (isconstraintarc(a) && hasconstraintout(sto))
2581	{
2582	struct state *prevclone;
2583	int canmerge;
2584	struct arc *a2;
2585
2586	/*
2587	* Back-link constraint arcs must not be followed. Nor is there a
2588	* need to revisit states previously merged into this clone.
2589	*/
2590	assert(sto->no < nstates);
2591	if (donemap[sto->no] != `0`)
2592	continue;
2593
2594	/*
2595	* Check whether we already have a child clone state for this
2596	* source state.
2597	*/
2598	prevclone = NULL;
2599	for (a2 = sclone->outs; a2 != NULL; a2 = a2->outchain)
2600	{
2601	if (a2->to->tmp == sto)
2602	{
2603	prevclone = a2->to;
2604	break;
2605	}
2606	}
2607
2608	/*
2609	* If this arc is labeled the same as refarc, or the same as any
2610	* arc we must have traversed to get to sclone, then no additional
2611	* constraints need to be met to get to sto, so we should just
2612	* merge its outarcs into sclone.
2613	*/
2614	if (refarc && a->type == refarc->type && a->co == refarc->co)
2615	canmerge = `1`;
2616	else
2617	{
2618	struct state *s;
2619
2620	canmerge = `0`;
2621	for (s = sclone; s->ins; s = s->ins->from)
2622	{
2623	if (s->nins == `1` &&
2624	a->type == s->ins->type && a->co == s->ins->co)
2625	{
2626	canmerge = `1`;
2627	break;
2628	}
2629	}
2630	}
2631
2632	if (canmerge)
2633	{
2634	/*
2635	* We can merge into sclone. If we previously made a child
2636	* clone state, drop it; there's no need to visit it. (This
2637	* can happen if ssource has multiple pathways to sto, and we
2638	* only just now found one that is provably a no-op.)
2639	*/
2640	if (prevclone)
2641	dropstate(nfa, prevclone); / kills our outarc, too /
2642
2643	/ Recurse to merge sto's outarcs into sclone /
2644	clonesuccessorstates(nfa,
2645	sto,
2646	sclone,
2647	spredecessor,
2648	refarc,
2649	donemap,
2650	outerdonemap,
2651	nstates);
2652	/ sto should now be marked as previously visited /
2653	assert(NISERR() \|\| donemap[sto->no] == `1`);
2654	}
2655	else if (prevclone)
2656	{
2657	/*
2658	* We already have a clone state for this successor, so just
2659	* make another arc to it.
2660	*/
2661	cparc(nfa, a, sclone, prevclone);
2662	}
2663	else
2664	{
2665	/*
2666	* We need to create a new successor clone state.
2667	*/
2668	struct state *stoclone;
2669
2670	stoclone = newstate(nfa);
2671	if (stoclone == NULL)
2672	{
2673	assert(NISERR());
2674	break;
2675	}
2676	/ Mark it as to what it's a clone of /
2677	stoclone->tmp = sto;
2678	/ ... and add the outarc leading to it /
2679	cparc(nfa, a, sclone, stoclone);
2680	}
2681	}
2682	else
2683	{
2684	/*
2685	* Non-constraint outarcs just get copied to sclone, as do outarcs
2686	* leading to states with no constraint outarc.
2687	*/
2688	cparc(nfa, a, sclone, sto);
2689	}
2690	}
2691
2692	/*
2693	* If we are at outer level for this clone state, recurse to all its child
2694	* clone states, clearing their tmp fields as we go. (If we're not
2695	* outermost for sclone, leave this to be done by the outer call level.)
2696	* Note that if we have multiple outarcs leading to the same clone state,
2697	* it will only be recursed-to once.
2698	*/
2699	if (curdonemap == NULL)
2700	{
2701	for (a = sclone->outs; a != NULL && !NISERR(); a = a->outchain)
2702	{
2703	struct state *stoclone = a->to;
2704	struct state *sto = stoclone->tmp;
2705
2706	if (sto != NULL)
2707	{
2708	stoclone->tmp = NULL;
2709	clonesuccessorstates(nfa,
2710	sto,
2711	stoclone,
2712	spredecessor,
2713	refarc,
2714	NULL,
2715	donemap,
2716	nstates);
2717	}
2718	}
2719
2720	/ Don't forget to free sclone's donemap when done with it /
2721	FREE(donemap);
2722	}
2723	}
2724
2725	/*
2726	* cleanup - clean up NFA after optimizations
2727	*/
2728	static void
2729	cleanup(struct nfa *nfa)
2730	{
2731	struct state *s;
2732	struct state *nexts;
2733	int n;
2734
2735	if (NISERR())
2736	return;
2737
2738	/ clear out unreachable or dead-end states /
2739	/ use pre to mark reachable, then post to mark can-reach-post /
2740	markreachable(nfa, nfa->pre, (struct state *) NULL, nfa->pre);
2741	markcanreach(nfa, nfa->post, nfa->pre, nfa->post);
2742	for (s = nfa->states; s != NULL && !NISERR(); s = nexts)
2743	{
2744	nexts = s->next;
2745	if (s->tmp != nfa->post && !s->flag)
2746	dropstate(nfa, s);
2747	}
2748	assert(NISERR() \|\| nfa->post->nins == `0` \|\| nfa->post->tmp == nfa->post);
2749	cleartraverse(nfa, nfa->pre);
2750	assert(NISERR() \|\| nfa->post->nins == `0` \|\| nfa->post->tmp == NULL);
2751	/ the nins==0 (final unreachable) case will be caught later /
2752
2753	/ renumber surviving states /
2754	n = `0`;
2755	for (s = nfa->states; s != NULL; s = s->next)
2756	s->no = n++;
2757	nfa->nstates = n;
2758	}
2759
2760	/*
2761	* markreachable - recursive marking of reachable states
2762	*/
2763	static void
2764	markreachable(struct nfa *nfa,
2765	struct state *s,
2766	struct state okay, /* consider only states with this mark /
2767	struct state mark) /* the value to mark with /
2768	{
2769	struct arc *a;
2770
2771	/ Since this is recursive, it could be driven to stack overflow /
2772	if (STACK_TOO_DEEP(nfa->v->re))
2773	{
2774	NERR(REG_ETOOBIG);
2775	return;
2776	}
2777
2778	if (s->tmp != okay)
2779	return;
2780	s->tmp = mark;
2781
2782	for (a = s->outs; a != NULL; a = a->outchain)
2783	markreachable(nfa, a->to, okay, mark);
2784	}
2785
2786	/*
2787	* markcanreach - recursive marking of states which can reach here
2788	*/
2789	static void
2790	markcanreach(struct nfa *nfa,
2791	struct state *s,
2792	struct state okay, /* consider only states with this mark /
2793	struct state mark) /* the value to mark with /
2794	{
2795	struct arc *a;
2796
2797	/ Since this is recursive, it could be driven to stack overflow /
2798	if (STACK_TOO_DEEP(nfa->v->re))
2799	{
2800	NERR(REG_ETOOBIG);
2801	return;
2802	}
2803
2804	if (s->tmp != okay)
2805	return;
2806	s->tmp = mark;
2807
2808	for (a = s->ins; a != NULL; a = a->inchain)
2809	markcanreach(nfa, a->from, okay, mark);
2810	}
2811
2812	/*
2813	* analyze - ascertain potentially-useful facts about an optimized NFA
2814	*/
2815	static long / re_info bits to be ORed in /
2816	analyze(struct nfa *nfa)
2817	{
2818	struct arc *a;
2819	struct arc *aa;
2820
2821	if (NISERR())
2822	return `0`;
2823
2824	if (nfa->pre->outs == NULL)
2825	return REG_UIMPOSSIBLE;
2826	for (a = nfa->pre->outs; a != NULL; a = a->outchain)
2827	for (aa = a->to->outs; aa != NULL; aa = aa->outchain)
2828	if (aa->to == nfa->post)
2829	return REG_UEMPTYMATCH;
2830	return `0`;
2831	}
2832
2833	/*
2834	* compact - construct the compact representation of an NFA
2835	*/
2836	static void
2837	compact(struct nfa *nfa,
2838	struct cnfa *cnfa)
2839	{
2840	struct state *s;
2841	struct arc *a;
2842	size_t nstates;
2843	size_t narcs;
2844	struct carc *ca;
2845	struct carc *first;
2846
2847	assert(!NISERR());
2848
2849	nstates = `0`;
2850	narcs = `0`;
2851	for (s = nfa->states; s != NULL; s = s->next)
2852	{
2853	nstates++;
2854	narcs += s->nouts + `1`; / need one extra for endmarker /
2855	}
2856
2857	cnfa->stflags = (char ) MALLOC(nstates sizeof(char));
2858	cnfa->states = (struct carc *) MALLOC(nstates sizeof(struct carc *));
2859	cnfa->arcs = (struct carc ) MALLOC(narcs sizeof(struct carc));
2860	if (cnfa->stflags == NULL \|\| cnfa->states == NULL \|\| cnfa->arcs == NULL)
2861	{
2862	if (cnfa->stflags != NULL)
2863	FREE(cnfa->stflags);
2864	if (cnfa->states != NULL)
2865	FREE(cnfa->states);
2866	if (cnfa->arcs != NULL)
2867	FREE(cnfa->arcs);
2868	NERR(REG_ESPACE);
2869	return;
2870	}
2871	cnfa->nstates = nstates;
2872	cnfa->pre = nfa->pre->no;
2873	cnfa->post = nfa->post->no;
2874	cnfa->bos[`0`] = nfa->bos[`0`];
2875	cnfa->bos[`1`] = nfa->bos[`1`];
2876	cnfa->eos[`0`] = nfa->eos[`0`];
2877	cnfa->eos[`1`] = nfa->eos[`1`];
2878	cnfa->ncolors = maxcolor(nfa->cm) + `1`;
2879	cnfa->flags = `0`;
2880
2881	ca = cnfa->arcs;
2882	for (s = nfa->states; s != NULL; s = s->next)
2883	{
2884	assert((size_t) s->no < nstates);
2885	cnfa->stflags[s->no] = `0`;
2886	cnfa->states[s->no] = ca;
2887	first = ca;
2888	for (a = s->outs; a != NULL; a = a->outchain)
2889	switch (a->type)
2890	{
2891	case PLAIN:
2892	ca->co = a->co;
2893	ca->to = a->to->no;
2894	ca++;
2895	break;
2896	case LACON:
2897	assert(s->no != cnfa->pre);
2898	ca->co = (color) (cnfa->ncolors + a->co);
2899	ca->to = a->to->no;
2900	ca++;
2901	cnfa->flags \|= HASLACONS;
2902	break;
2903	default:
2904	NERR(REG_ASSERT);
2905	break;
2906	}
2907	carcsort(first, ca - first);
2908	ca->co = COLORLESS;
2909	ca->to = `0`;
2910	ca++;
2911	}
2912	assert(ca == &cnfa->arcs[narcs]);
2913	assert(cnfa->nstates != `0`);
2914
2915	/ mark no-progress states /
2916	for (a = nfa->pre->outs; a != NULL; a = a->outchain)
2917	cnfa->stflags[a->to->no] = CNFA_NOPROGRESS;
2918	cnfa->stflags[nfa->pre->no] = CNFA_NOPROGRESS;
2919	}
2920
2921	/*
2922	* carcsort - sort compacted-NFA arcs by color
2923	*/
2924	static void
2925	carcsort(struct carc *first, size_t n)
2926	{
2927	if (n > `1`)
2928	qsort(first, n, sizeof(struct carc), carc_cmp);
2929	}
2930
2931	static int
2932	carc_cmp(const void a, const* void *b)
2933	{
2934	const struct carc aa = (const* struct carc *) a;
2935	const struct carc bb = (const* struct carc *) b;
2936
2937	if (aa->co < bb->co)
2938	return -`1`;
2939	if (aa->co > bb->co)
2940	return +`1`;
2941	if (aa->to < bb->to)
2942	return -`1`;
2943	if (aa->to > bb->to)
2944	return +`1`;
2945	return `0`;
2946	}
2947
2948	/*
2949	* freecnfa - free a compacted NFA
2950	*/
2951	static void
2952	freecnfa(struct cnfa *cnfa)
2953	{
2954	assert(cnfa->nstates != `0`); / not empty already /
2955	cnfa->nstates = `0`;
2956	FREE(cnfa->stflags);
2957	FREE(cnfa->states);
2958	FREE(cnfa->arcs);
2959	}
2960
2961	/*
2962	* dumpnfa - dump an NFA in human-readable form
2963	*/
2964	static void
2965	dumpnfa(struct nfa *nfa,
2966	FILE *f)
2967	{
2968	#ifdef REG_DEBUG
2969	struct state *s;
2970	int nstates = `0`;
2971	int narcs = `0`;
2972
2973	fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no);
2974	if (nfa->bos[`0`] != COLORLESS)
2975	fprintf(f, ", bos [%ld]", (long) nfa->bos[`0`]);
2976	if (nfa->bos[`1`] != COLORLESS)
2977	fprintf(f, ", bol [%ld]", (long) nfa->bos[`1`]);
2978	if (nfa->eos[`0`] != COLORLESS)
2979	fprintf(f, ", eos [%ld]", (long) nfa->eos[`0`]);
2980	if (nfa->eos[`1`] != COLORLESS)
2981	fprintf(f, ", eol [%ld]", (long) nfa->eos[`1`]);
2982	fprintf(f, "\n");
2983	for (s = nfa->states; s != NULL; s = s->next)
2984	{
2985	dumpstate(s, f);
2986	nstates++;
2987	narcs += s->nouts;
2988	}
2989	fprintf(f, "total of %d states, %d arcs\n", nstates, narcs);
2990	if (nfa->parent == NULL)
2991	dumpcolors(nfa->cm, f);
2992	fflush(f);
2993	#endif
2994	}
2995
2996	#ifdef REG_DEBUG /* subordinates of dumpnfa */
2997
2998	/*
2999	* dumpstate - dump an NFA state in human-readable form
3000	*/
3001	static void
3002	dumpstate(struct state *s,
3003	FILE *f)
3004	{
3005	struct arc *a;
3006
3007	fprintf(f, "%d%s%c", s->no, (s->tmp != NULL) ? "T" : "",
3008	(s->flag) ? s->flag : `'.'`);
3009	if (s->prev != NULL && s->prev->next != s)
3010	fprintf(f, "\tstate chain bad\n");
3011	if (s->nouts == `0`)
3012	fprintf(f, "\tno out arcs\n");
3013	else
3014	dumparcs(s, f);
3015	fflush(f);
3016	for (a = s->ins; a != NULL; a = a->inchain)
3017	{
3018	if (a->to != s)
3019	fprintf(f, "\tlink from %d to %d on %d's in-chain\n",
3020	a->from->no, a->to->no, s->no);
3021	}
3022	}
3023
3024	/*
3025	* dumparcs - dump out-arcs in human-readable form
3026	*/
3027	static void
3028	dumparcs(struct state *s,
3029	FILE *f)
3030	{
3031	int pos;
3032	struct arc *a;
3033
3034	/ printing oldest arcs first is usually clearer /
3035	a = s->outs;
3036	assert(a != NULL);
3037	while (a->outchain != NULL)
3038	a = a->outchain;
3039	pos = `1`;
3040	do
3041	{
3042	dumparc(a, s, f);
3043	if (pos == `5`)
3044	{
3045	fprintf(f, "\n");
3046	pos = `1`;
3047	}
3048	else
3049	pos++;
3050	a = a->outchainRev;
3051	} while (a != NULL);
3052	if (pos != `1`)
3053	fprintf(f, "\n");
3054	}
3055
3056	/*
3057	* dumparc - dump one outarc in readable form, including prefixing tab
3058	*/
3059	static void
3060	dumparc(struct arc *a,
3061	struct state *s,
3062	FILE *f)
3063	{
3064	struct arc *aa;
3065	struct arcbatch *ab;
3066
3067	fprintf(f, "\t");
3068	switch (a->type)
3069	{
3070	case PLAIN:
3071	fprintf(f, "[%ld]", (long) a->co);
3072	break;
3073	case AHEAD:
3074	fprintf(f, ">%ld>", (long) a->co);
3075	break;
3076	case BEHIND:
3077	fprintf(f, "<%ld<", (long) a->co);
3078	break;
3079	case LACON:
3080	fprintf(f, ":%ld:", (long) a->co);
3081	break;
3082	case `'^'`:
3083	case `'$'`:
3084	fprintf(f, "%c%d", a->type, (int) a->co);
3085	break;
3086	case EMPTY:
3087	break;
3088	default:
3089	fprintf(f, "0x%x/0%lo", a->type, (long) a->co);
3090	break;
3091	}
3092	if (a->from != s)
3093	fprintf(f, "?%d?", a->from->no);
3094	for (ab = &a->from->oas; ab != NULL; ab = ab->next)
3095	{
3096	for (aa = &ab->a[`0`]; aa < &ab->a[ABSIZE]; aa++)
3097	if (aa == a)
3098	break; / NOTE BREAK OUT /
3099	if (aa < &ab->a[ABSIZE]) / propagate break /
3100	break; / NOTE BREAK OUT /
3101	}
3102	if (ab == NULL)
3103	fprintf(f, "?!?"); / not in allocated space /
3104	fprintf(f, "->");
3105	if (a->to == NULL)
3106	{
3107	fprintf(f, "NULL");
3108	return;
3109	}
3110	fprintf(f, "%d", a->to->no);
3111	for (aa = a->to->ins; aa != NULL; aa = aa->inchain)
3112	if (aa == a)
3113	break; / NOTE BREAK OUT /
3114	if (aa == NULL)
3115	fprintf(f, "?!?"); / missing from in-chain /
3116	}
3117	#endif /* REG_DEBUG */
3118
3119	/*
3120	* dumpcnfa - dump a compacted NFA in human-readable form
3121	*/
3122	#ifdef REG_DEBUG
3123	static void
3124	dumpcnfa(struct cnfa *cnfa,
3125	FILE *f)
3126	{
3127	int st;
3128
3129	fprintf(f, "pre %d, post %d", cnfa->pre, cnfa->post);
3130	if (cnfa->bos[`0`] != COLORLESS)
3131	fprintf(f, ", bos [%ld]", (long) cnfa->bos[`0`]);
3132	if (cnfa->bos[`1`] != COLORLESS)
3133	fprintf(f, ", bol [%ld]", (long) cnfa->bos[`1`]);
3134	if (cnfa->eos[`0`] != COLORLESS)
3135	fprintf(f, ", eos [%ld]", (long) cnfa->eos[`0`]);
3136	if (cnfa->eos[`1`] != COLORLESS)
3137	fprintf(f, ", eol [%ld]", (long) cnfa->eos[`1`]);
3138	if (cnfa->flags & HASLACONS)
3139	fprintf(f, ", haslacons");
3140	fprintf(f, "\n");
3141	for (st = `0`; st < cnfa->nstates; st++)
3142	dumpcstate(st, cnfa, f);
3143	fflush(f);
3144	}
3145	#endif
3146
3147	#ifdef REG_DEBUG /* subordinates of dumpcnfa */
3148
3149	/*
3150	* dumpcstate - dump a compacted-NFA state in human-readable form
3151	*/
3152	static void
3153	dumpcstate(int st,
3154	struct cnfa *cnfa,
3155	FILE *f)
3156	{
3157	struct carc *ca;
3158	int pos;
3159
3160	fprintf(f, "%d%s", st, (cnfa->stflags[st] & CNFA_NOPROGRESS) ? ":" : ".");
3161	pos = `1`;
3162	for (ca = cnfa->states[st]; ca->co != COLORLESS; ca++)
3163	{
3164	if (ca->co < cnfa->ncolors)
3165	fprintf(f, "\t[%ld]->%d", (long) ca->co, ca->to);
3166	else
3167	fprintf(f, "\t:%ld:->%d", (long) (ca->co - cnfa->ncolors), ca->to);
3168	if (pos == `5`)
3169	{
3170	fprintf(f, "\n");
3171	pos = `1`;
3172	}
3173	else
3174	pos++;
3175	}
3176	if (ca == cnfa->states[st] \|\| pos != `1`)
3177	fprintf(f, "\n");
3178	fflush(f);
3179	}
3180
3181	#endif /* REG_DEBUG */
3182

Browse the source code of PostgreSQL/src/backend/regex/regc_nfa.c