rege_dfa.c source code [PostgreSQL/src/backend/regex/rege_dfa.c]

1	/*
2	* DFA routines
3	* This file is #included by regexec.c.
4	*
5	* Copyright (c) 1998, 1999 Henry Spencer. All rights reserved.
6	*
7	* Development of this software was funded, in part, by Cray Research Inc.,
8	* UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics
9	* Corporation, none of whom are responsible for the results. The author
10	* thanks all of them.
11	*
12	* Redistribution and use in source and binary forms -- with or without
13	* modification -- are permitted for any purpose, provided that
14	* redistributions in source form retain this entire copyright notice and
15	* indicate the origin and nature of any modifications.
16	*
17	* I'd appreciate being given credit for this package in the documentation
18	* of software which uses it, but that is not a requirement.
19	*
20	* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
21	* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
22	* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23	* HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
26	* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27	* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28	* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
29	* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30	*
31	* src/backend/regex/rege_dfa.c
32	*
33	*/
34
35	/*
36	* longest - longest-preferred matching engine
37	*
38	* On success, returns match endpoint address. Returns NULL on no match.
39	* Internal errors also return NULL, with v->err set.
40	*/
41	static chr *
42	longest(struct vars *v,
43	struct dfa *d,
44	chr start, /* where the match should start /
45	chr stop, /* match must end at or before here /
46	int hitstopp) /* record whether hit v->stop, if non-NULL /
47	{
48	chr *cp;
49	chr *realstop = (stop == v->stop) ? stop : stop + `1`;
50	color co;
51	struct sset *css;
52	struct sset *ss;
53	chr *post;
54	int i;
55	struct colormap *cm = d->cm;
56
57	/ prevent "uninitialized variable" warnings /
58	if (hitstopp != NULL)
59	*hitstopp = `0`;
60
61	/ initialize /
62	css = initialize(v, d, start);
63	if (css == NULL)
64	return NULL;
65	cp = start;
66
67	/ startup /
68	FDEBUG(("+++ startup +++\n"));
69	if (cp == v->start)
70	{
71	co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? `0` : `1`];
72	FDEBUG(("color %ld\n", (long) co));
73	}
74	else
75	{
76	co = GETCOLOR(cm, *(cp - `1`));
77	FDEBUG(("char %c, color %ld\n", (char) (cp - `1`), (long*) co));
78	}
79	css = miss(v, d, css, co, cp, start);
80	if (css == NULL)
81	return NULL;
82	css->lastseen = cp;
83
84	/*
85	* This is the main text-scanning loop. It seems worth having two copies
86	* to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
87	* builds, when you're not actively tracing.
88	*/
89	#ifdef REG_DEBUG
90	if (v->eflags & REG_FTRACE)
91	{
92	while (cp < realstop)
93	{
94	FDEBUG(("+++ at c%d +++\n", (int) (css - d->ssets)));
95	co = GETCOLOR(cm, *cp);
96	FDEBUG(("char %c, color %ld\n", (char) cp, (long*) co));
97	ss = css->outs[co];
98	if (ss == NULL)
99	{
100	ss = miss(v, d, css, co, cp + `1`, start);
101	if (ss == NULL)
102	break; / NOTE BREAK OUT /
103	}
104	cp++;
105	ss->lastseen = cp;
106	css = ss;
107	}
108	}
109	else
110	#endif
111	{
112	while (cp < realstop)
113	{
114	co = GETCOLOR(cm, *cp);
115	ss = css->outs[co];
116	if (ss == NULL)
117	{
118	ss = miss(v, d, css, co, cp + `1`, start);
119	if (ss == NULL)
120	break; / NOTE BREAK OUT /
121	}
122	cp++;
123	ss->lastseen = cp;
124	css = ss;
125	}
126	}
127
128	if (ISERR())
129	return NULL;
130
131	/ shutdown /
132	FDEBUG(("+++ shutdown at c%d +++\n", (int) (css - d->ssets)));
133	if (cp == v->stop && stop == v->stop)
134	{
135	if (hitstopp != NULL)
136	*hitstopp = `1`;
137	co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? `0` : `1`];
138	FDEBUG(("color %ld\n", (long) co));
139	ss = miss(v, d, css, co, cp, start);
140	if (ISERR())
141	return NULL;
142	/ special case: match ended at eol? /
143	if (ss != NULL && (ss->flags & POSTSTATE))
144	return cp;
145	else if (ss != NULL)
146	ss->lastseen = cp; / to be tidy /
147	}
148
149	/ find last match, if any /
150	post = d->lastpost;
151	for (ss = d->ssets, i = d->nssused; i > `0`; ss++, i--)
152	if ((ss->flags & POSTSTATE) && post != ss->lastseen &&
153	(post == NULL \|\| post < ss->lastseen))
154	post = ss->lastseen;
155	if (post != NULL) / found one /
156	return post - `1`;
157
158	return NULL;
159	}
160
161	/*
162	* shortest - shortest-preferred matching engine
163	*
164	* On success, returns match endpoint address. Returns NULL on no match.
165	* Internal errors also return NULL, with v->err set.
166	*/
167	static chr *
168	shortest(struct vars *v,
169	struct dfa *d,
170	chr start, /* where the match should start /
171	chr min, /* match must end at or after here /
172	chr max, /* match must end at or before here /
173	chr *coldp, /* store coldstart pointer here, if non-NULL /
174	int hitstopp) /* record whether hit v->stop, if non-NULL /
175	{
176	chr *cp;
177	chr *realmin = (min == v->stop) ? min : min + `1`;
178	chr *realmax = (max == v->stop) ? max : max + `1`;
179	color co;
180	struct sset *css;
181	struct sset *ss;
182	struct colormap *cm = d->cm;
183
184	/ prevent "uninitialized variable" warnings /
185	if (coldp != NULL)
186	*coldp = NULL;
187	if (hitstopp != NULL)
188	*hitstopp = `0`;
189
190	/ initialize /
191	css = initialize(v, d, start);
192	if (css == NULL)
193	return NULL;
194	cp = start;
195
196	/ startup /
197	FDEBUG(("--- startup ---\n"));
198	if (cp == v->start)
199	{
200	co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? `0` : `1`];
201	FDEBUG(("color %ld\n", (long) co));
202	}
203	else
204	{
205	co = GETCOLOR(cm, *(cp - `1`));
206	FDEBUG(("char %c, color %ld\n", (char) (cp - `1`), (long*) co));
207	}
208	css = miss(v, d, css, co, cp, start);
209	if (css == NULL)
210	return NULL;
211	css->lastseen = cp;
212	ss = css;
213
214	/*
215	* This is the main text-scanning loop. It seems worth having two copies
216	* to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
217	* builds, when you're not actively tracing.
218	*/
219	#ifdef REG_DEBUG
220	if (v->eflags & REG_FTRACE)
221	{
222	while (cp < realmax)
223	{
224	FDEBUG(("--- at c%d ---\n", (int) (css - d->ssets)));
225	co = GETCOLOR(cm, *cp);
226	FDEBUG(("char %c, color %ld\n", (char) cp, (long*) co));
227	ss = css->outs[co];
228	if (ss == NULL)
229	{
230	ss = miss(v, d, css, co, cp + `1`, start);
231	if (ss == NULL)
232	break; / NOTE BREAK OUT /
233	}
234	cp++;
235	ss->lastseen = cp;
236	css = ss;
237	if ((ss->flags & POSTSTATE) && cp >= realmin)
238	break; / NOTE BREAK OUT /
239	}
240	}
241	else
242	#endif
243	{
244	while (cp < realmax)
245	{
246	co = GETCOLOR(cm, *cp);
247	ss = css->outs[co];
248	if (ss == NULL)
249	{
250	ss = miss(v, d, css, co, cp + `1`, start);
251	if (ss == NULL)
252	break; / NOTE BREAK OUT /
253	}
254	cp++;
255	ss->lastseen = cp;
256	css = ss;
257	if ((ss->flags & POSTSTATE) && cp >= realmin)
258	break; / NOTE BREAK OUT /
259	}
260	}
261
262	if (ss == NULL)
263	return NULL;
264
265	if (coldp != NULL) / report last no-progress state set, if any /
266	*coldp = lastcold(v, d);
267
268	if ((ss->flags & POSTSTATE) && cp > min)
269	{
270	assert(cp >= realmin);
271	cp--;
272	}
273	else if (cp == v->stop && max == v->stop)
274	{
275	co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? `0` : `1`];
276	FDEBUG(("color %ld\n", (long) co));
277	ss = miss(v, d, css, co, cp, start);
278	/ match might have ended at eol /
279	if ((ss == NULL \|\| !(ss->flags & POSTSTATE)) && hitstopp != NULL)
280	*hitstopp = `1`;
281	}
282
283	if (ss == NULL \|\| !(ss->flags & POSTSTATE))
284	return NULL;
285
286	return cp;
287	}
288
289	/*
290	* matchuntil - incremental matching engine
291	*
292	* This is meant for use with a search-style NFA (that is, the pattern is
293	* known to act as though it had a leading .*). We determine whether a
294	* match exists starting at v->start and ending at probe. Multiple calls
295	* require only O(N) time not O(N^2) so long as the probe values are
296	* nondecreasing. lastcss and lastcp must be initialized to NULL before
297	* starting a series of calls.
298	*
299	* Returns 1 if a match exists, 0 if not.
300	* Internal errors also return 0, with v->err set.
301	*/
302	static int
303	matchuntil(struct vars *v,
304	struct dfa *d,
305	chr probe, /* we want to know if a match ends here /
306	struct sset *lastcss, /* state storage across calls /
307	chr *lastcp) /* state storage across calls /
308	{
309	chr cp = lastcp;
310	color co;
311	struct sset css = lastcss;
312	struct sset *ss;
313	struct colormap *cm = d->cm;
314
315	/ initialize and startup, or restart, if necessary /
316	if (cp == NULL \|\| cp > probe)
317	{
318	cp = v->start;
319	css = initialize(v, d, cp);
320	if (css == NULL)
321	return `0`;
322
323	FDEBUG((">>> startup >>>\n"));
324	co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? `0` : `1`];
325	FDEBUG(("color %ld\n", (long) co));
326
327	css = miss(v, d, css, co, cp, v->start);
328	if (css == NULL)
329	return `0`;
330	css->lastseen = cp;
331	}
332	else if (css == NULL)
333	{
334	/ we previously found that no match is possible beyond lastcp /*
335	return `0`;
336	}
337	ss = css;
338
339	/*
340	* This is the main text-scanning loop. It seems worth having two copies
341	* to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
342	* builds, when you're not actively tracing.
343	*/
344	#ifdef REG_DEBUG
345	if (v->eflags & REG_FTRACE)
346	{
347	while (cp < probe)
348	{
349	FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
350	co = GETCOLOR(cm, *cp);
351	FDEBUG(("char %c, color %ld\n", (char) cp, (long*) co));
352	ss = css->outs[co];
353	if (ss == NULL)
354	{
355	ss = miss(v, d, css, co, cp + `1`, v->start);
356	if (ss == NULL)
357	break; / NOTE BREAK OUT /
358	}
359	cp++;
360	ss->lastseen = cp;
361	css = ss;
362	}
363	}
364	else
365	#endif
366	{
367	while (cp < probe)
368	{
369	co = GETCOLOR(cm, *cp);
370	ss = css->outs[co];
371	if (ss == NULL)
372	{
373	ss = miss(v, d, css, co, cp + `1`, v->start);
374	if (ss == NULL)
375	break; / NOTE BREAK OUT /
376	}
377	cp++;
378	ss->lastseen = cp;
379	css = ss;
380	}
381	}
382
383	*lastcss = ss;
384	*lastcp = cp;
385
386	if (ss == NULL)
387	return `0`; / impossible match, or internal error /
388
389	/ We need to process one more chr, or the EOS symbol, to check match /
390	if (cp < v->stop)
391	{
392	FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
393	co = GETCOLOR(cm, *cp);
394	FDEBUG(("char %c, color %ld\n", (char) cp, (long*) co));
395	ss = css->outs[co];
396	if (ss == NULL)
397	ss = miss(v, d, css, co, cp + `1`, v->start);
398	}
399	else
400	{
401	assert(cp == v->stop);
402	co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? `0` : `1`];
403	FDEBUG(("color %ld\n", (long) co));
404	ss = miss(v, d, css, co, cp, v->start);
405	}
406
407	if (ss == NULL \|\| !(ss->flags & POSTSTATE))
408	return `0`;
409
410	return `1`;
411	}
412
413	/*
414	* lastcold - determine last point at which no progress had been made
415	*/
416	static chr * / endpoint, or NULL /
417	lastcold(struct vars *v,
418	struct dfa *d)
419	{
420	struct sset *ss;
421	chr *nopr;
422	int i;
423
424	nopr = d->lastnopr;
425	if (nopr == NULL)
426	nopr = v->start;
427	for (ss = d->ssets, i = d->nssused; i > `0`; ss++, i--)
428	if ((ss->flags & NOPROGRESS) && nopr < ss->lastseen)
429	nopr = ss->lastseen;
430	return nopr;
431	}
432
433	/*
434	* newdfa - set up a fresh DFA
435	*/
436	static struct dfa *
437	newdfa(struct vars *v,
438	struct cnfa *cnfa,
439	struct colormap *cm,
440	struct smalldfa sml) /* preallocated space, may be NULL /
441	{
442	struct dfa *d;
443	size_t nss = cnfa->nstates * `2`;
444	int wordsper = (cnfa->nstates + UBITS - `1`) / UBITS;
445	struct smalldfa *smallwas = sml;
446
447	assert(cnfa != NULL && cnfa->nstates != `0`);
448
449	if (nss <= FEWSTATES && cnfa->ncolors <= FEWCOLORS)
450	{
451	assert(wordsper == `1`);
452	if (sml == NULL)
453	{
454	sml = (struct smalldfa ) MALLOC(sizeof(struct* smalldfa));
455	if (sml == NULL)
456	{
457	ERR(REG_ESPACE);
458	return NULL;
459	}
460	}
461	d = &sml->dfa;
462	d->ssets = sml->ssets;
463	d->statesarea = sml->statesarea;
464	d->work = &d->statesarea[nss];
465	d->outsarea = sml->outsarea;
466	d->incarea = sml->incarea;
467	d->cptsmalloced = `0`;
468	d->mallocarea = (smallwas == NULL) ? (char *) sml : NULL;
469	}
470	else
471	{
472	d = (struct dfa ) MALLOC(sizeof(struct* dfa));
473	if (d == NULL)
474	{
475	ERR(REG_ESPACE);
476	return NULL;
477	}
478	d->ssets = (struct sset ) MALLOC(nss sizeof(struct sset));
479	d->statesarea = (unsigned ) MALLOC((nss + WORK) wordsper *
480	sizeof(unsigned));
481	d->work = &d->statesarea[nss * wordsper];
482	d->outsarea = (struct sset *) MALLOC(nss cnfa->ncolors *
483	sizeof(struct sset *));
484	d->incarea = (struct arcp ) MALLOC(nss cnfa->ncolors *
485	sizeof(struct arcp));
486	d->cptsmalloced = `1`;
487	d->mallocarea = (char *) d;
488	if (d->ssets == NULL \|\| d->statesarea == NULL \|\|
489	d->outsarea == NULL \|\| d->incarea == NULL)
490	{
491	freedfa(d);
492	ERR(REG_ESPACE);
493	return NULL;
494	}
495	}
496
497	d->nssets = (v->eflags & REG_SMALL) ? `7` : nss;
498	d->nssused = `0`;
499	d->nstates = cnfa->nstates;
500	d->ncolors = cnfa->ncolors;
501	d->wordsper = wordsper;
502	d->cnfa = cnfa;
503	d->cm = cm;
504	d->lastpost = NULL;
505	d->lastnopr = NULL;
506	d->search = d->ssets;
507
508	/ initialization of sset fields is done as needed /
509
510	return d;
511	}
512
513	/*
514	* freedfa - free a DFA
515	*/
516	static void
517	freedfa(struct dfa *d)
518	{
519	if (d->cptsmalloced)
520	{
521	if (d->ssets != NULL)
522	FREE(d->ssets);
523	if (d->statesarea != NULL)
524	FREE(d->statesarea);
525	if (d->outsarea != NULL)
526	FREE(d->outsarea);
527	if (d->incarea != NULL)
528	FREE(d->incarea);
529	}
530
531	if (d->mallocarea != NULL)
532	FREE(d->mallocarea);
533	}
534
535	/*
536	* hash - construct a hash code for a bitvector
537	*
538	* There are probably better ways, but they're more expensive.
539	*/
540	static unsigned
541	hash(unsigned *uv,
542	int n)
543	{
544	int i;
545	unsigned h;
546
547	h = `0`;
548	for (i = `0`; i < n; i++)
549	h ^= uv[i];
550	return h;
551	}
552
553	/*
554	* initialize - hand-craft a cache entry for startup, otherwise get ready
555	*/
556	static struct sset *
557	initialize(struct vars *v,
558	struct dfa *d,
559	chr *start)
560	{
561	struct sset *ss;
562	int i;
563
564	/ is previous one still there? /
565	if (d->nssused > `0` && (d->ssets[`0`].flags & STARTER))
566	ss = &d->ssets[`0`];
567	else
568	{ / no, must (re)build it /
569	ss = getvacant(v, d, start, start);
570	if (ss == NULL)
571	return NULL;
572	for (i = `0`; i < d->wordsper; i++)
573	ss->states[i] = `0`;
574	BSET(ss->states, d->cnfa->pre);
575	ss->hash = HASH(ss->states, d->wordsper);
576	assert(d->cnfa->pre != d->cnfa->post);
577	ss->flags = STARTER \| LOCKED \| NOPROGRESS;
578	/ lastseen dealt with below /
579	}
580
581	for (i = `0`; i < d->nssused; i++)
582	d->ssets[i].lastseen = NULL;
583	ss->lastseen = start; / maybe untrue, but harmless /
584	d->lastpost = NULL;
585	d->lastnopr = NULL;
586	return ss;
587	}
588
589	/*
590	* miss - handle a stateset cache miss
591	*
592	* css is the current stateset, co is the color of the current input character,
593	* cp points to the character after that (which is where we may need to test
594	* LACONs). start does not affect matching behavior but is needed for pickss'
595	* heuristics about which stateset cache entry to replace.
596	*
597	* Ordinarily, returns the address of the next stateset (the one that is
598	* valid after consuming the input character). Returns NULL if no valid
599	* NFA states remain, ie we have a certain match failure.
600	* Internal errors also return NULL, with v->err set.
601	*/
602	static struct sset *
603	miss(struct vars *v,
604	struct dfa *d,
605	struct sset *css,
606	color co,
607	chr cp, /* next chr /
608	chr start) /* where the attempt got started /
609	{
610	struct cnfa *cnfa = d->cnfa;
611	int i;
612	unsigned h;
613	struct carc *ca;
614	struct sset *p;
615	int ispost;
616	int noprogress;
617	int gotstate;
618	int dolacons;
619	int sawlacons;
620
621	/ for convenience, we can be called even if it might not be a miss /
622	if (css->outs[co] != NULL)
623	{
624	FDEBUG(("hit\n"));
625	return css->outs[co];
626	}
627	FDEBUG(("miss\n"));
628
629	/*
630	* Checking for operation cancel in the inner text search loop seems
631	* unduly expensive. As a compromise, check during cache misses.
632	*/
633	if (CANCEL_REQUESTED(v->re))
634	{
635	ERR(REG_CANCEL);
636	return NULL;
637	}
638
639	/*
640	* What set of states would we end up in after consuming the co character?
641	* We first consider PLAIN arcs that consume the character, and then look
642	* to see what LACON arcs could be traversed after consuming it.
643	*/
644	for (i = `0`; i < d->wordsper; i++)
645	d->work[i] = `0`; / build new stateset bitmap in d->work /
646	ispost = `0`;
647	noprogress = `1`;
648	gotstate = `0`;
649	for (i = `0`; i < d->nstates; i++)
650	if (ISBSET(css->states, i))
651	for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
652	if (ca->co == co)
653	{
654	BSET(d->work, ca->to);
655	gotstate = `1`;
656	if (ca->to == cnfa->post)
657	ispost = `1`;
658	if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
659	noprogress = `0`;
660	FDEBUG(("%d -> %d\n", i, ca->to));
661	}
662	if (!gotstate)
663	return NULL; / character cannot reach any new state /
664	dolacons = (cnfa->flags & HASLACONS);
665	sawlacons = `0`;
666	/ outer loop handles transitive closure of reachable-by-LACON states /
667	while (dolacons)
668	{
669	dolacons = `0`;
670	for (i = `0`; i < d->nstates; i++)
671	if (ISBSET(d->work, i))
672	for (ca = cnfa->states[i]; ca->co != COLORLESS; ca++)
673	{
674	if (ca->co < cnfa->ncolors)
675	continue; / not a LACON arc /
676	if (ISBSET(d->work, ca->to))
677	continue; / arc would be a no-op anyway /
678	sawlacons = `1`; / this LACON affects our result /
679	if (!lacon(v, cnfa, cp, ca->co))
680	{
681	if (ISERR())
682	return NULL;
683	continue; / LACON arc cannot be traversed /
684	}
685	if (ISERR())
686	return NULL;
687	BSET(d->work, ca->to);
688	dolacons = `1`;
689	if (ca->to == cnfa->post)
690	ispost = `1`;
691	if (!(cnfa->stflags[ca->to] & CNFA_NOPROGRESS))
692	noprogress = `0`;
693	FDEBUG(("%d :> %d\n", i, ca->to));
694	}
695	}
696	h = HASH(d->work, d->wordsper);
697
698	/ Is this stateset already in the cache? /
699	for (p = d->ssets, i = d->nssused; i > `0`; p++, i--)
700	if (HIT(h, d->work, p, d->wordsper))
701	{
702	FDEBUG(("cached c%d\n", (int) (p - d->ssets)));
703	break; / NOTE BREAK OUT /
704	}
705	if (i == `0`)
706	{ / nope, need a new cache entry /
707	p = getvacant(v, d, cp, start);
708	if (p == NULL)
709	return NULL;
710	assert(p != css);
711	for (i = `0`; i < d->wordsper; i++)
712	p->states[i] = d->work[i];
713	p->hash = h;
714	p->flags = (ispost) ? POSTSTATE : `0`;
715	if (noprogress)
716	p->flags \|= NOPROGRESS;
717	/ lastseen to be dealt with by caller /
718	}
719
720	/*
721	* Link new stateset to old, unless a LACON affected the result, in which
722	* case we don't create the link. That forces future transitions across
723	* this same arc (same prior stateset and character color) to come through
724	* miss() again, so that we can recheck the LACON(s), which might or might
725	* not pass since context will be different.
726	*/
727	if (!sawlacons)
728	{
729	FDEBUG(("c%d[%d]->c%d\n",
730	(int) (css - d->ssets), co, (int) (p - d->ssets)));
731	css->outs[co] = p;
732	css->inchain[co] = p->ins;
733	p->ins.ss = css;
734	p->ins.co = co;
735	}
736	return p;
737	}
738
739	/*
740	* lacon - lookaround-constraint checker for miss()
741	*/
742	static int / predicate: constraint satisfied? /
743	lacon(struct vars *v,
744	struct cnfa pcnfa, /* parent cnfa /
745	chr *cp,
746	color co) / "color" of the lookaround constraint /
747	{
748	int n;
749	struct subre *sub;
750	struct dfa *d;
751	chr *end;
752	int satisfied;
753
754	/ Since this is recursive, it could be driven to stack overflow /
755	if (STACK_TOO_DEEP(v->re))
756	{
757	ERR(REG_ETOOBIG);
758	return `0`;
759	}
760
761	n = co - pcnfa->ncolors;
762	assert(n > `0` && n < v->g->nlacons && v->g->lacons != NULL);
763	FDEBUG(("=== testing lacon %d\n", n));
764	sub = &v->g->lacons[n];
765	d = getladfa(v, n);
766	if (d == NULL)
767	return `0`;
768	if (LATYPE_IS_AHEAD(sub->subno))
769	{
770	/ used to use longest() here, but shortest() could be much cheaper /
771	end = shortest(v, d, cp, cp, v->stop,
772	(chr *) NULL, (int* *) NULL);
773	satisfied = LATYPE_IS_POS(sub->subno) ? (end != NULL) : (end == NULL);
774	}
775	else
776	{
777	/*
778	* To avoid doing O(N^2) work when repeatedly testing a lookbehind
779	* constraint in an N-character string, we use matchuntil() which can
780	* cache the DFA state across calls. We only need to restart if the
781	* probe point decreases, which is not common. The NFA we're using is
782	* a search NFA, so it doesn't mind scanning over stuff before the
783	* nominal match.
784	*/
785	satisfied = matchuntil(v, d, cp, &v->lblastcss[n], &v->lblastcp[n]);
786	if (!LATYPE_IS_POS(sub->subno))
787	satisfied = !satisfied;
788	}
789	FDEBUG(("=== lacon %d satisfied %d\n", n, satisfied));
790	return satisfied;
791	}
792
793	/*
794	* getvacant - get a vacant state set
795	*
796	* This routine clears out the inarcs and outarcs, but does not otherwise
797	* clear the innards of the state set -- that's up to the caller.
798	*/
799	static struct sset *
800	getvacant(struct vars *v,
801	struct dfa *d,
802	chr *cp,
803	chr *start)
804	{
805	int i;
806	struct sset *ss;
807	struct sset *p;
808	struct arcp ap;
809	color co;
810
811	ss = pickss(v, d, cp, start);
812	if (ss == NULL)
813	return NULL;
814	assert(!(ss->flags & LOCKED));
815
816	/ clear out its inarcs, including self-referential ones /
817	ap = ss->ins;
818	while ((p = ap.ss) != NULL)
819	{
820	co = ap.co;
821	FDEBUG(("zapping c%d's %ld outarc\n", (int) (p - d->ssets), (long) co));
822	p->outs[co] = NULL;
823	ap = p->inchain[co];
824	p->inchain[co].ss = NULL; / paranoia /
825	}
826	ss->ins.ss = NULL;
827
828	/ take it off the inarc chains of the ssets reached by its outarcs /
829	for (i = `0`; i < d->ncolors; i++)
830	{
831	p = ss->outs[i];
832	assert(p != ss); / not self-referential /
833	if (p == NULL)
834	continue; / NOTE CONTINUE /
835	FDEBUG(("del outarc %d from c%d's in chn\n", i, (int) (p - d->ssets)));
836	if (p->ins.ss == ss && p->ins.co == i)
837	p->ins = ss->inchain[i];
838	else
839	{
840	struct arcp lastap = {NULL, `0`};
841
842	assert(p->ins.ss != NULL);
843	for (ap = p->ins; ap.ss != NULL &&
844	!(ap.ss == ss && ap.co == i);
845	ap = ap.ss->inchain[ap.co])
846	lastap = ap;
847	assert(ap.ss != NULL);
848	lastap.ss->inchain[lastap.co] = ss->inchain[i];
849	}
850	ss->outs[i] = NULL;
851	ss->inchain[i].ss = NULL;
852	}
853
854	/ if ss was a success state, may need to remember location /
855	if ((ss->flags & POSTSTATE) && ss->lastseen != d->lastpost &&
856	(d->lastpost == NULL \|\| d->lastpost < ss->lastseen))
857	d->lastpost = ss->lastseen;
858
859	/ likewise for a no-progress state /
860	if ((ss->flags & NOPROGRESS) && ss->lastseen != d->lastnopr &&
861	(d->lastnopr == NULL \|\| d->lastnopr < ss->lastseen))
862	d->lastnopr = ss->lastseen;
863
864	return ss;
865	}
866
867	/*
868	* pickss - pick the next stateset to be used
869	*/
870	static struct sset *
871	pickss(struct vars *v,
872	struct dfa *d,
873	chr *cp,
874	chr *start)
875	{
876	int i;
877	struct sset *ss;
878	struct sset *end;
879	chr *ancient;
880
881	/ shortcut for cases where cache isn't full /
882	if (d->nssused < d->nssets)
883	{
884	i = d->nssused;
885	d->nssused++;
886	ss = &d->ssets[i];
887	FDEBUG(("new c%d\n", i));
888	/ set up innards /
889	ss->states = &d->statesarea[i * d->wordsper];
890	ss->flags = `0`;
891	ss->ins.ss = NULL;
892	ss->ins.co = WHITE; / give it some value /
893	ss->outs = &d->outsarea[i * d->ncolors];
894	ss->inchain = &d->incarea[i * d->ncolors];
895	for (i = `0`; i < d->ncolors; i++)
896	{
897	ss->outs[i] = NULL;
898	ss->inchain[i].ss = NULL;
899	}
900	return ss;
901	}
902
903	/ look for oldest, or old enough anyway /
904	if (cp - start > d->nssets * `2` / `3`) / oldest 33% are expendable /
905	ancient = cp - d->nssets * `2` / `3`;
906	else
907	ancient = start;
908	for (ss = d->search, end = &d->ssets[d->nssets]; ss < end; ss++)
909	if ((ss->lastseen == NULL \|\| ss->lastseen < ancient) &&
910	!(ss->flags & LOCKED))
911	{
912	d->search = ss + `1`;
913	FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
914	return ss;
915	}
916	for (ss = d->ssets, end = d->search; ss < end; ss++)
917	if ((ss->lastseen == NULL \|\| ss->lastseen < ancient) &&
918	!(ss->flags & LOCKED))
919	{
920	d->search = ss + `1`;
921	FDEBUG(("replacing c%d\n", (int) (ss - d->ssets)));
922	return ss;
923	}
924
925	/ nobody's old enough?!? -- something's really wrong /
926	FDEBUG(("cannot find victim to replace!\n"));
927	ERR(REG_ASSERT);
928	return NULL;
929	}
930

Browse the source code of PostgreSQL/src/backend/regex/rege_dfa.c