pcre_dfa_exec.c source code [POCO/Foundation/src/pcre_dfa_exec.c]

1	/*************************************************
2	* Perl-Compatible Regular Expressions *
3	*************************************************/
4
5	/ PCRE is a library of functions to support regular expressions whose syntax*
6	and semantics are as close as possible to those of the Perl 5 language (but see
7	below for why this module is different).
8
9	Written by Philip Hazel
10	Copyright (c) 1997-2017 University of Cambridge
11
12	-----------------------------------------------------------------------------
13	Redistribution and use in source and binary forms, with or without
14	modification, are permitted provided that the following conditions are met:
15
16	* Redistributions of source code must retain the above copyright notice,
17	this list of conditions and the following disclaimer.
18
19	* Redistributions in binary form must reproduce the above copyright
20	notice, this list of conditions and the following disclaimer in the
21	documentation and/or other materials provided with the distribution.
22
23	* Neither the name of the University of Cambridge nor the names of its
24	contributors may be used to endorse or promote products derived from
25	this software without specific prior written permission.
26
27	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31	LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35	CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37	POSSIBILITY OF SUCH DAMAGE.
38	-----------------------------------------------------------------------------
39	*/
40
41	/ This module contains the external function pcre_dfa_exec(), which is an*
42	alternative matching function that uses a sort of DFA algorithm (not a true
43	FSM). This is NOT Perl-compatible, but it has advantages in certain
44	applications. /*
45
46
47	/ NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved*
48	the performance of his patterns greatly. I could not use it as it stood, as it
49	was not thread safe, and made assumptions about pattern sizes. Also, it caused
50	test 7 to loop, and test 9 to crash with a segfault.
51
52	The issue is the check for duplicate states, which is done by a simple linear
53	search up the state list. (Grep for "duplicate" below to find the code.) For
54	many patterns, there will never be many states active at one time, so a simple
55	linear search is fine. In patterns that have many active states, it might be a
56	bottleneck. The suggested code used an indexing scheme to remember which states
57	had previously been used for each character, and avoided the linear search when
58	it knew there was no chance of a duplicate. This was implemented when adding
59	states to the state lists.
60
61	I wrote some thread-safe, not-limited code to try something similar at the time
62	of checking for duplicates (instead of when adding states), using index vectors
63	on the stack. It did give a 13% improvement with one specially constructed
64	pattern for certain subject strings, but on other strings and on many of the
65	simpler patterns in the test suite it did worse. The major problem, I think,
66	was the extra time to initialize the index. This had to be done for each call
67	of internal_dfa_exec(). (The supplied patch used a static vector, initialized
68	only once - I suspect this was the cause of the problems with the tests.)
69
70	Overall, I concluded that the gains in some cases did not outweigh the losses
71	in others, so I abandoned this code. /*
72
73	#pragma warning( disable : 4244) // conversion from 'int' to 'unsigned short', possible loss of data
74	#pragma warning( disable : 4146) // unary minus operator applied to unsigned type, result still unsigned
75
76	#include "pcre_config.h"
77
78	#define NLBLOCK md /* Block containing newline information */
79	#define PSSTART start_subject /* Field containing processed string start */
80	#define PSEND end_subject /* Field containing processed string end */
81
82	#include "pcre_internal.h"
83
84
85	/ For use to indent debugging output /
86
87	#define SP " "
88
89
90	/*************************************************
91	* Code parameters and static tables *
92	*************************************************/
93
94	/ These are offsets that are used to turn the OP_TYPESTAR and friends opcodes*
95	into others, under special conditions. A gap of 20 between the blocks should be
96	enough. The resulting opcodes don't have to be less than 256 because they are
97	never stored, so we push them well clear of the normal opcodes. /*
98
99	#define OP_PROP_EXTRA 300
100	#define OP_EXTUNI_EXTRA 320
101	#define OP_ANYNL_EXTRA 340
102	#define OP_HSPACE_EXTRA 360
103	#define OP_VSPACE_EXTRA 380
104
105
106	/ This table identifies those opcodes that are followed immediately by a*
107	character that is to be tested in some way. This makes it possible to
108	centralize the loading of these characters. In the case of Type etc, the*
109	"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
110	small value. Non-zero values in the table are the offsets from the opcode where
111	the character is to be found. NOTE* If the start of this table is*
112	modified, the three tables that follow must also be modified. /*
113
114	static const pcre_uint8 coptable[] = {
115	`0`, / End /
116	`0`, `0`, `0`, `0`, `0`, / \A, \G, \K, \B, \b /
117	`0`, `0`, `0`, `0`, `0`, `0`, / \D, \d, \S, \s, \W, \w /
118	`0`, `0`, `0`, / Any, AllAny, Anybyte /
119	`0`, `0`, / \P, \p /
120	`0`, `0`, `0`, `0`, `0`, / \R, \H, \h, \V, \v /
121	`0`, / \X /
122	`0`, `0`, `0`, `0`, `0`, `0`, / \Z, \z, $, $M, ^, ^M /
123	`1`, / Char /
124	`1`, / Chari /
125	`1`, / not /
126	`1`, / noti /
127	/ Positive single-char repeats /
128	`1`, `1`, `1`, `1`, `1`, `1`, / , ?, +, +?, ?, ?? /
129	`1`+IMM2_SIZE, `1`+IMM2_SIZE, / upto, minupto /
130	`1`+IMM2_SIZE, / exact /
131	`1`, `1`, `1`, `1`+IMM2_SIZE, / +, ++, ?+, upto+ /*
132	`1`, `1`, `1`, `1`, `1`, `1`, / I, ?I, +I, +?I, ?I, ??I /
133	`1`+IMM2_SIZE, `1`+IMM2_SIZE, / upto I, minupto I /
134	`1`+IMM2_SIZE, / exact I /
135	`1`, `1`, `1`, `1`+IMM2_SIZE, / +I, ++I, ?+I, upto+I /*
136	/ Negative single-char repeats - only for chars < 256 /
137	`1`, `1`, `1`, `1`, `1`, `1`, / NOT , ?, +, +?, ?, ?? /
138	`1`+IMM2_SIZE, `1`+IMM2_SIZE, / NOT upto, minupto /
139	`1`+IMM2_SIZE, / NOT exact /
140	`1`, `1`, `1`, `1`+IMM2_SIZE, / NOT +, ++, ?+, upto+ /*
141	`1`, `1`, `1`, `1`, `1`, `1`, / NOT I, ?I, +I, +?I, ?I, ??I /
142	`1`+IMM2_SIZE, `1`+IMM2_SIZE, / NOT upto I, minupto I /
143	`1`+IMM2_SIZE, / NOT exact I /
144	`1`, `1`, `1`, `1`+IMM2_SIZE, / NOT +I, ++I, ?+I, upto+I /*
145	/ Positive type repeats /
146	`1`, `1`, `1`, `1`, `1`, `1`, / Type , ?, +, +?, ?, ?? /
147	`1`+IMM2_SIZE, `1`+IMM2_SIZE, / Type upto, minupto /
148	`1`+IMM2_SIZE, / Type exact /
149	`1`, `1`, `1`, `1`+IMM2_SIZE, / Type +, ++, ?+, upto+ /*
150	/ Character class & ref repeats /
151	`0`, `0`, `0`, `0`, `0`, `0`, / , ?, +, +?, ?, ?? /
152	`0`, `0`, / CRRANGE, CRMINRANGE /
153	`0`, `0`, `0`, `0`, / Possessive +, ++, ?+, CRPOSRANGE /*
154	`0`, / CLASS /
155	`0`, / NCLASS /
156	`0`, / XCLASS - variable length /
157	`0`, / REF /
158	`0`, / REFI /
159	`0`, / DNREF /
160	`0`, / DNREFI /
161	`0`, / RECURSE /
162	`0`, / CALLOUT /
163	`0`, / Alt /
164	`0`, / Ket /
165	`0`, / KetRmax /
166	`0`, / KetRmin /
167	`0`, / KetRpos /
168	`0`, / Reverse /
169	`0`, / Assert /
170	`0`, / Assert not /
171	`0`, / Assert behind /
172	`0`, / Assert behind not /
173	`0`, `0`, / ONCE, ONCE_NC /
174	`0`, `0`, `0`, `0`, `0`, / BRA, BRAPOS, CBRA, CBRAPOS, COND /
175	`0`, `0`, `0`, `0`, `0`, / SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND /
176	`0`, `0`, / CREF, DNCREF /
177	`0`, `0`, / RREF, DNRREF /
178	`0`, / DEF /
179	`0`, `0`, `0`, / BRAZERO, BRAMINZERO, BRAPOSZERO /
180	`0`, `0`, `0`, / MARK, PRUNE, PRUNE_ARG /
181	`0`, `0`, `0`, `0`, / SKIP, SKIP_ARG, THEN, THEN_ARG /
182	`0`, `0`, `0`, `0`, / COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT /
183	`0`, `0` / CLOSE, SKIPZERO /
184	};
185
186	/ This table identifies those opcodes that inspect a character. It is used to*
187	remember the fact that a character could have been inspected when the end of
188	the subject is reached. NOTE* If the start of this table is modified, the*
189	two tables that follow must also be modified. /*
190
191	static const pcre_uint8 poptable[] = {
192	`0`, / End /
193	`0`, `0`, `0`, `1`, `1`, / \A, \G, \K, \B, \b /
194	`1`, `1`, `1`, `1`, `1`, `1`, / \D, \d, \S, \s, \W, \w /
195	`1`, `1`, `1`, / Any, AllAny, Anybyte /
196	`1`, `1`, / \P, \p /
197	`1`, `1`, `1`, `1`, `1`, / \R, \H, \h, \V, \v /
198	`1`, / \X /
199	`0`, `0`, `0`, `0`, `0`, `0`, / \Z, \z, $, $M, ^, ^M /
200	`1`, / Char /
201	`1`, / Chari /
202	`1`, / not /
203	`1`, / noti /
204	/ Positive single-char repeats /
205	`1`, `1`, `1`, `1`, `1`, `1`, / , ?, +, +?, ?, ?? /
206	`1`, `1`, `1`, / upto, minupto, exact /
207	`1`, `1`, `1`, `1`, / +, ++, ?+, upto+ /*
208	`1`, `1`, `1`, `1`, `1`, `1`, / I, ?I, +I, +?I, ?I, ??I /
209	`1`, `1`, `1`, / upto I, minupto I, exact I /
210	`1`, `1`, `1`, `1`, / +I, ++I, ?+I, upto+I /*
211	/ Negative single-char repeats - only for chars < 256 /
212	`1`, `1`, `1`, `1`, `1`, `1`, / NOT , ?, +, +?, ?, ?? /
213	`1`, `1`, `1`, / NOT upto, minupto, exact /
214	`1`, `1`, `1`, `1`, / NOT +, ++, ?+, upto+ /*
215	`1`, `1`, `1`, `1`, `1`, `1`, / NOT I, ?I, +I, +?I, ?I, ??I /
216	`1`, `1`, `1`, / NOT upto I, minupto I, exact I /
217	`1`, `1`, `1`, `1`, / NOT +I, ++I, ?+I, upto+I /*
218	/ Positive type repeats /
219	`1`, `1`, `1`, `1`, `1`, `1`, / Type , ?, +, +?, ?, ?? /
220	`1`, `1`, `1`, / Type upto, minupto, exact /
221	`1`, `1`, `1`, `1`, / Type +, ++, ?+, upto+ /*
222	/ Character class & ref repeats /
223	`1`, `1`, `1`, `1`, `1`, `1`, / , ?, +, +?, ?, ?? /
224	`1`, `1`, / CRRANGE, CRMINRANGE /
225	`1`, `1`, `1`, `1`, / Possessive +, ++, ?+, CRPOSRANGE /*
226	`1`, / CLASS /
227	`1`, / NCLASS /
228	`1`, / XCLASS - variable length /
229	`0`, / REF /
230	`0`, / REFI /
231	`0`, / DNREF /
232	`0`, / DNREFI /
233	`0`, / RECURSE /
234	`0`, / CALLOUT /
235	`0`, / Alt /
236	`0`, / Ket /
237	`0`, / KetRmax /
238	`0`, / KetRmin /
239	`0`, / KetRpos /
240	`0`, / Reverse /
241	`0`, / Assert /
242	`0`, / Assert not /
243	`0`, / Assert behind /
244	`0`, / Assert behind not /
245	`0`, `0`, / ONCE, ONCE_NC /
246	`0`, `0`, `0`, `0`, `0`, / BRA, BRAPOS, CBRA, CBRAPOS, COND /
247	`0`, `0`, `0`, `0`, `0`, / SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND /
248	`0`, `0`, / CREF, DNCREF /
249	`0`, `0`, / RREF, DNRREF /
250	`0`, / DEF /
251	`0`, `0`, `0`, / BRAZERO, BRAMINZERO, BRAPOSZERO /
252	`0`, `0`, `0`, / MARK, PRUNE, PRUNE_ARG /
253	`0`, `0`, `0`, `0`, / SKIP, SKIP_ARG, THEN, THEN_ARG /
254	`0`, `0`, `0`, `0`, / COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT /
255	`0`, `0` / CLOSE, SKIPZERO /
256	};
257
258	/ These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,*
259	and \w /*
260
261	static const pcre_uint8 toptable1[] = {
262	`0`, `0`, `0`, `0`, `0`, `0`,
263	ctype_digit, ctype_digit,
264	ctype_space, ctype_space,
265	ctype_word, ctype_word,
266	`0`, `0` / OP_ANY, OP_ALLANY /
267	};
268
269	static const pcre_uint8 toptable2[] = {
270	`0`, `0`, `0`, `0`, `0`, `0`,
271	ctype_digit, `0`,
272	ctype_space, `0`,
273	ctype_word, `0`,
274	`1`, `1` / OP_ANY, OP_ALLANY /
275	};
276
277
278	/ Structure for holding data about a particular state, which is in effect the*
279	current data for an active path through the match tree. It must consist
280	entirely of ints because the working vector we are passed, and which we put
281	these structures in, is a vector of ints. /*
282
283	typedef struct stateblock {
284	int offset; / Offset to opcode /
285	int count; / Count for repeats /
286	int data; / Some use extra data /
287	} stateblock;
288
289	#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
290
291
292	#ifdef PCRE_DEBUG
293	/*************************************************
294	* Print character string *
295	*************************************************/
296
297	/ Character string printing function for debugging.*
298
299	Arguments:
300	p points to string
301	length number of bytes
302	f where to print
303
304	Returns: nothing
305	*/
306
307	static void
308	pchars(const pcre_uchar p, int* length, FILE *f)
309	{
310	pcre_uint32 c;
311	while (length-- > `0`)
312	{
313	if (isprint(c = *(p++)))
314	fprintf(f, "%c", c);
315	else
316	fprintf(f, "\\x{%02x}", c);
317	}
318	}
319	#endif
320
321
322
323	/*************************************************
324	* Execute a Regular Expression - DFA engine *
325	*************************************************/
326
327	/ This internal function applies a compiled pattern to a subject string,*
328	starting at a given point, using a DFA engine. This function is called from the
329	external one, possibly multiple times if the pattern is not anchored. The
330	function calls itself recursively for some kinds of subpattern.
331
332	Arguments:
333	md the match_data block with fixed information
334	this_start_code the opening bracket of this subexpression's code
335	current_subject where we currently are in the subject string
336	start_offset start offset in the subject string
337	offsets vector to contain the matching string offsets
338	offsetcount size of same
339	workspace vector of workspace
340	wscount size of same
341	rlevel function call recursion level
342
343	Returns: > 0 => number of match offset pairs placed in offsets
344	= 0 => offsets overflowed; longest matches are present
345	-1 => failed to match
346	< -1 => some kind of unexpected problem
347
348	The following macros are used for adding states to the two state vectors (one
349	for the current character, one for the following character). /*
350
351	#define ADD_ACTIVE(x,y) \
352	if (active_count++ < wscount) \
353	{ \
354	next_active_state->offset = (x); \
355	next_active_state->count = (y); \
356	next_active_state++; \
357	DPRINTF(("%.sADD_ACTIVE(%d,%d)\n", rlevel2-2, SP, (x), (y))); \
358	} \
359	else return PCRE_ERROR_DFA_WSSIZE
360
361	#define ADD_ACTIVE_DATA(x,y,z) \
362	if (active_count++ < wscount) \
363	{ \
364	next_active_state->offset = (x); \
365	next_active_state->count = (y); \
366	next_active_state->data = (z); \
367	next_active_state++; \
368	DPRINTF(("%.sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel2-2, SP, (x), (y), (z))); \
369	} \
370	else return PCRE_ERROR_DFA_WSSIZE
371
372	#define ADD_NEW(x,y) \
373	if (new_count++ < wscount) \
374	{ \
375	next_new_state->offset = (x); \
376	next_new_state->count = (y); \
377	next_new_state++; \
378	DPRINTF(("%.sADD_NEW(%d,%d)\n", rlevel2-2, SP, (x), (y))); \
379	} \
380	else return PCRE_ERROR_DFA_WSSIZE
381
382	#define ADD_NEW_DATA(x,y,z) \
383	if (new_count++ < wscount) \
384	{ \
385	next_new_state->offset = (x); \
386	next_new_state->count = (y); \
387	next_new_state->data = (z); \
388	next_new_state++; \
389	DPRINTF(("%.sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel2-2, SP, \
390	(x), (y), (z), __LINE__)); \
391	} \
392	else return PCRE_ERROR_DFA_WSSIZE
393
394	/ And now, here is the code /
395
396	static int
397	internal_dfa_exec(
398	dfa_match_data *md,
399	const pcre_uchar *this_start_code,
400	const pcre_uchar *current_subject,
401	int start_offset,
402	int *offsets,
403	int offsetcount,
404	int *workspace,
405	int wscount,
406	int rlevel)
407	{
408	stateblock active_states, new_states, *temp_states;
409	stateblock next_active_state, next_new_state;
410
411	const pcre_uint8 ctypes, lcc, *fcc;
412	const pcre_uchar *ptr;
413	const pcre_uchar end_code, first_op;
414
415	dfa_recursion_info new_recursive;
416
417	int active_count, new_count, match_count;
418
419	/ Some fields in the md block are frequently referenced, so we load them into*
420	independent variables in the hope that this will perform better. /*
421
422	const pcre_uchar *start_subject = md->start_subject;
423	const pcre_uchar *end_subject = md->end_subject;
424	const pcre_uchar *start_code = md->start_code;
425
426	#ifdef SUPPORT_UTF
427	BOOL utf = (md->poptions & PCRE_UTF8) != `0`;
428	#else
429	BOOL utf = FALSE;
430	#endif
431
432	BOOL reset_could_continue = FALSE;
433
434	rlevel++;
435	offsetcount &= (-`2`);
436
437	wscount -= `2`;
438	wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * `2`))) /
439	(`2` * INTS_PER_STATEBLOCK);
440
441	DPRINTF(("\n%.*s---------------------\n"
442	"%.*sCall to internal_dfa_exec f=%d\n",
443	rlevel`2`-`2`, SP, rlevel`2`-`2`, SP, rlevel));
444
445	ctypes = md->tables + ctypes_offset;
446	lcc = md->tables + lcc_offset;
447	fcc = md->tables + fcc_offset;
448
449	match_count = PCRE_ERROR_NOMATCH; / A negative number /
450
451	active_states = (stateblock *)(workspace + `2`);
452	next_new_state = new_states = active_states + wscount;
453	new_count = `0`;
454
455	first_op = this_start_code + `1` + LINK_SIZE +
456	((this_start_code == OP_CBRA \|\| this_start_code == OP_SCBRA \|\|
457	this_start_code == OP_CBRAPOS \|\| this_start_code == OP_SCBRAPOS)
458	? IMM2_SIZE:`0`);
459
460	/ The first thing in any (sub) pattern is a bracket of some sort. Push all*
461	the alternative states onto the list, and find out where the end is. This
462	makes is possible to use this function recursively, when we want to stop at a
463	matching internal ket rather than at the end.
464
465	If the first opcode in the first alternative is OP_REVERSE, we are dealing with
466	a backward assertion. In that case, we have to find out the maximum amount to
467	move back, and set up each alternative appropriately. /*
468
469	if (*first_op == OP_REVERSE)
470	{
471	int max_back = `0`;
472	int gone_back;
473
474	end_code = this_start_code;
475	do
476	{
477	int back = GET(end_code, `2`+LINK_SIZE);
478	if (back > max_back) max_back = back;
479	end_code += GET(end_code, `1`);
480	}
481	while (*end_code == OP_ALT);
482
483	/ If we can't go back the amount required for the longest lookbehind*
484	pattern, go back as far as we can; some alternatives may still be viable. /*
485
486	#ifdef SUPPORT_UTF
487	/ In character mode we have to step back character by character /
488
489	if (utf)
490	{
491	for (gone_back = `0`; gone_back < max_back; gone_back++)
492	{
493	if (current_subject <= start_subject) break;
494	current_subject--;
495	ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
496	}
497	}
498	else
499	#endif
500
501	/ In byte-mode we can do this quickly. /
502
503	{
504	gone_back = (current_subject - max_back < start_subject)?
505	(int)(current_subject - start_subject) : max_back;
506	current_subject -= gone_back;
507	}
508
509	/ Save the earliest consulted character /
510
511	if (current_subject < md->start_used_ptr)
512	md->start_used_ptr = current_subject;
513
514	/ Now we can process the individual branches. /
515
516	end_code = this_start_code;
517	do
518	{
519	int back = GET(end_code, `2`+LINK_SIZE);
520	if (back <= gone_back)
521	{
522	int bstate = (int)(end_code - start_code + `2` + `2`*LINK_SIZE);
523	ADD_NEW_DATA(-bstate, `0`, gone_back - back);
524	}
525	end_code += GET(end_code, `1`);
526	}
527	while (*end_code == OP_ALT);
528	}
529
530	/ This is the code for a "normal" subpattern (not a backward assertion). The*
531	start of a whole pattern is always one of these. If we are at the top level,
532	we may be asked to restart matching from the same point that we reached for a
533	previous partial match. We still have to scan through the top-level branches to
534	find the end state. /*
535
536	else
537	{
538	end_code = this_start_code;
539
540	/ Restarting /
541
542	if (rlevel == `1` && (md->moptions & PCRE_DFA_RESTART) != `0`)
543	{
544	do { end_code += GET(end_code, `1`); } while (*end_code == OP_ALT);
545	new_count = workspace[`1`];
546	if (!workspace[`0`])
547	memcpy(new_states, active_states, new_count * sizeof(stateblock));
548	}
549
550	/ Not restarting /
551
552	else
553	{
554	int length = `1` + LINK_SIZE +
555	((this_start_code == OP_CBRA \|\| this_start_code == OP_SCBRA \|\|
556	this_start_code == OP_CBRAPOS \|\| this_start_code == OP_SCBRAPOS)
557	? IMM2_SIZE:`0`);
558	do
559	{
560	ADD_NEW((int)(end_code - start_code + length), `0`);
561	end_code += GET(end_code, `1`);
562	length = `1` + LINK_SIZE;
563	}
564	while (*end_code == OP_ALT);
565	}
566	}
567
568	workspace[`0`] = `0`; / Bit indicating which vector is current /
569
570	DPRINTF(("%.sEnd state = %d\n", rlevel`2`-`2`, SP, (int)(end_code - start_code)));
571
572	/ Loop for scanning the subject /
573
574	ptr = current_subject;
575	for (;;)
576	{
577	int i, j;
578	int clen, dlen;
579	pcre_uint32 c, d;
580	int forced_fail = `0`;
581	BOOL partial_newline = FALSE;
582	BOOL could_continue = reset_could_continue;
583	reset_could_continue = FALSE;
584
585	/ Make the new state list into the active state list and empty the*
586	new state list. /*
587
588	temp_states = active_states;
589	active_states = new_states;
590	new_states = temp_states;
591	active_count = new_count;
592	new_count = `0`;
593
594	workspace[`0`] ^= `1`; / Remember for the restarting feature /
595	workspace[`1`] = active_count;
596
597	#ifdef PCRE_DEBUG
598	printf("%.sNext character: rest of subject = \"", rlevel`2`-`2`, SP);
599	pchars(ptr, STRLEN_UC(ptr), stdout);
600	printf("\"\n");
601
602	printf("%.sActive states: ", rlevel`2`-`2`, SP);
603	for (i = `0`; i < active_count; i++)
604	printf("%d/%d ", active_states[i].offset, active_states[i].count);
605	printf("\n");
606	#endif
607
608	/ Set the pointers for adding new states /
609
610	next_active_state = active_states + active_count;
611	next_new_state = new_states;
612
613	/ Load the current character from the subject outside the loop, as many*
614	different states may want to look at it, and we assume that at least one
615	will. /*
616
617	if (ptr < end_subject)
618	{
619	clen = `1`; / Number of data items in the character /
620	#ifdef SUPPORT_UTF
621	GETCHARLENTEST(c, ptr, clen);
622	#else
623	c = *ptr;
624	#endif /* SUPPORT_UTF */
625	}
626	else
627	{
628	clen = `0`; / This indicates the end of the subject /
629	c = NOTACHAR; / This value should never actually be used /
630	}
631
632	/ Scan up the active states and act on each one. The result of an action*
633	may be to add more states to the currently active list (e.g. on hitting a
634	parenthesis) or it may be to put states on the new list, for considering
635	when we move the character pointer on. /*
636
637	for (i = `0`; i < active_count; i++)
638	{
639	stateblock *current_state = active_states + i;
640	BOOL caseless = FALSE;
641	const pcre_uchar *code;
642	int state_offset = current_state->offset;
643	int codevalue, rrc;
644	int count;
645
646	#ifdef PCRE_DEBUG
647	printf ("%.sProcessing state %d c=", rlevel`2`-`2`, SP, state_offset);
648	if (clen == `0`) printf("EOL\n");
649	else if (c > `32` && c < `127`) printf("'%c'\n", c);
650	else printf("0x%02x\n", c);
651	#endif
652
653	/ A negative offset is a special case meaning "hold off going to this*
654	(negated) state until the number of characters in the data field have
655	been skipped". If the could_continue flag was passed over from a previous
656	state, arrange for it to passed on. /*
657
658	if (state_offset < `0`)
659	{
660	if (current_state->data > `0`)
661	{
662	DPRINTF(("%.sSkipping this character\n", rlevel`2`-`2`, SP));
663	ADD_NEW_DATA(state_offset, current_state->count,
664	current_state->data - `1`);
665	if (could_continue) reset_could_continue = TRUE;
666	continue;
667	}
668	else
669	{
670	current_state->offset = state_offset = -state_offset;
671	}
672	}
673
674	/ Check for a duplicate state with the same count, and skip if found.*
675	See the note at the head of this module about the possibility of improving
676	performance here. /*
677
678	for (j = `0`; j < i; j++)
679	{
680	if (active_states[j].offset == state_offset &&
681	active_states[j].count == current_state->count)
682	{
683	DPRINTF(("%.sDuplicate state: skipped\n", rlevel`2`-`2`, SP));
684	goto NEXT_ACTIVE_STATE;
685	}
686	}
687
688	/ The state offset is the offset to the opcode /
689
690	code = start_code + state_offset;
691	codevalue = *code;
692
693	/ If this opcode inspects a character, but we are at the end of the*
694	subject, remember the fact for use when testing for a partial match. /*
695
696	if (clen == `0` && poptable[codevalue] != `0`)
697	could_continue = TRUE;
698
699	/ If this opcode is followed by an inline character, load it. It is*
700	tempting to test for the presence of a subject character here, but that
701	is wrong, because sometimes zero repetitions of the subject are
702	permitted.
703
704	We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
705	argument that is not a data character - but is always one byte long because
706	the values are small. We have to take special action to deal with \P, \p,
707	\H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
708	these ones to new opcodes. /*
709
710	if (coptable[codevalue] > `0`)
711	{
712	dlen = `1`;
713	#ifdef SUPPORT_UTF
714	if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
715	#endif /* SUPPORT_UTF */
716	d = code[coptable[codevalue]];
717	if (codevalue >= OP_TYPESTAR)
718	{
719	switch(d)
720	{
721	case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
722	case OP_NOTPROP:
723	case OP_PROP: codevalue += OP_PROP_EXTRA; break;
724	case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
725	case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
726	case OP_NOT_HSPACE:
727	case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
728	case OP_NOT_VSPACE:
729	case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
730	default: break;
731	}
732	}
733	}
734	else
735	{
736	dlen = `0`; / Not strictly necessary, but compilers moan /
737	d = NOTACHAR; / if these variables are not set. /
738	}
739
740
741	/ Now process the individual opcodes /
742
743	switch (codevalue)
744	{
745	/ ========================================================================== /
746	/ These cases are never obeyed. This is a fudge that causes a compile-*
747	time error if the vectors coptable or poptable, which are indexed by
748	opcode, are not the correct length. It seems to be the only way to do
749	such a check at compile time, as the sizeof() operator does not work
750	in the C preprocessor. /*
751
752	case OP_TABLE_LENGTH:
753	case OP_TABLE_LENGTH +
754	((sizeof(coptable) == OP_TABLE_LENGTH) &&
755	(sizeof(poptable) == OP_TABLE_LENGTH)):
756	break;
757
758	/ ========================================================================== /
759	/ Reached a closing bracket. If not at the end of the pattern, carry*
760	on with the next opcode. For repeating opcodes, also add the repeat
761	state. Note that KETRPOS will always be encountered at the end of the
762	subpattern, because the possessive subpattern repeats are always handled
763	using recursive calls. Thus, it never adds any new states.
764
765	At the end of the (sub)pattern, unless we have an empty string and
766	PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
767	start of the subject, save the match data, shifting up all previous
768	matches so we always have the longest first. /*
769
770	case OP_KET:
771	case OP_KETRMIN:
772	case OP_KETRMAX:
773	case OP_KETRPOS:
774	if (code != end_code)
775	{
776	ADD_ACTIVE(state_offset + `1` + LINK_SIZE, `0`);
777	if (codevalue != OP_KET)
778	{
779	ADD_ACTIVE(state_offset - GET(code, `1`), `0`);
780	}
781	}
782	else
783	{
784	if (ptr > current_subject \|\|
785	((md->moptions & PCRE_NOTEMPTY) == `0` &&
786	((md->moptions & PCRE_NOTEMPTY_ATSTART) == `0` \|\|
787	current_subject > start_subject + md->start_offset)))
788	{
789	if (match_count < `0`) match_count = (offsetcount >= `2`)? `1` : `0`;
790	else if (match_count > `0` && ++match_count * `2` > offsetcount)
791	match_count = `0`;
792	count = ((match_count == `0`)? offsetcount : match_count * `2`) - `2`;
793	if (count > `0`) memmove(offsets + `2`, offsets, count * sizeof(int));
794	if (offsetcount >= `2`)
795	{
796	offsets[`0`] = (int)(current_subject - start_subject);
797	offsets[`1`] = (int)(ptr - start_subject);
798	DPRINTF(("%.sSet matched string = \"%.s\"\n", rlevel*`2`-`2`, SP,
799	offsets[`1`] - offsets[`0`], (char *)current_subject));
800	}
801	if ((md->moptions & PCRE_DFA_SHORTEST) != `0`)
802	{
803	DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
804	"%.s---------------------\n\n", rlevel`2`-`2`, SP, rlevel,
805	match_count, rlevel*`2`-`2`, SP));
806	return match_count;
807	}
808	}
809	}
810	break;
811
812	/ ========================================================================== /
813	/ These opcodes add to the current list of states without looking*
814	at the current character. /*
815
816	/-----------------------------------------------------------------/
817	case OP_ALT:
818	do { code += GET(code, `1`); } while (*code == OP_ALT);
819	ADD_ACTIVE((int)(code - start_code), `0`);
820	break;
821
822	/-----------------------------------------------------------------/
823	case OP_BRA:
824	case OP_SBRA:
825	do
826	{
827	ADD_ACTIVE((int)(code - start_code + `1` + LINK_SIZE), `0`);
828	code += GET(code, `1`);
829	}
830	while (*code == OP_ALT);
831	break;
832
833	/-----------------------------------------------------------------/
834	case OP_CBRA:
835	case OP_SCBRA:
836	ADD_ACTIVE((int)(code - start_code + `1` + LINK_SIZE + IMM2_SIZE), `0`);
837	code += GET(code, `1`);
838	while (*code == OP_ALT)
839	{
840	ADD_ACTIVE((int)(code - start_code + `1` + LINK_SIZE), `0`);
841	code += GET(code, `1`);
842	}
843	break;
844
845	/-----------------------------------------------------------------/
846	case OP_BRAZERO:
847	case OP_BRAMINZERO:
848	ADD_ACTIVE(state_offset + `1`, `0`);
849	code += `1` + GET(code, `2`);
850	while (*code == OP_ALT) code += GET(code, `1`);
851	ADD_ACTIVE((int)(code - start_code + `1` + LINK_SIZE), `0`);
852	break;
853
854	/-----------------------------------------------------------------/
855	case OP_SKIPZERO:
856	code += `1` + GET(code, `2`);
857	while (*code == OP_ALT) code += GET(code, `1`);
858	ADD_ACTIVE((int)(code - start_code + `1` + LINK_SIZE), `0`);
859	break;
860
861	/-----------------------------------------------------------------/
862	case OP_CIRC:
863	if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == `0`)
864	{ ADD_ACTIVE(state_offset + `1`, `0`); }
865	break;
866
867	/-----------------------------------------------------------------/
868	case OP_CIRCM:
869	if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == `0`) \|\|
870	(ptr != end_subject && WAS_NEWLINE(ptr)))
871	{ ADD_ACTIVE(state_offset + `1`, `0`); }
872	break;
873
874	/-----------------------------------------------------------------/
875	case OP_EOD:
876	if (ptr >= end_subject)
877	{
878	if ((md->moptions & PCRE_PARTIAL_HARD) != `0`)
879	could_continue = TRUE;
880	else { ADD_ACTIVE(state_offset + `1`, `0`); }
881	}
882	break;
883
884	/-----------------------------------------------------------------/
885	case OP_SOD:
886	if (ptr == start_subject) { ADD_ACTIVE(state_offset + `1`, `0`); }
887	break;
888
889	/-----------------------------------------------------------------/
890	case OP_SOM:
891	if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + `1`, `0`); }
892	break;
893
894
895	/ ========================================================================== /
896	/ These opcodes inspect the next subject character, and sometimes*
897	the previous one as well, but do not have an argument. The variable
898	clen contains the length of the current character and is zero if we are
899	at the end of the subject. /*
900
901	/-----------------------------------------------------------------/
902	case OP_ANY:
903	if (clen > `0` && !IS_NEWLINE(ptr))
904	{
905	if (ptr + `1` >= md->end_subject &&
906	(md->moptions & (PCRE_PARTIAL_HARD)) != `0` &&
907	NLBLOCK->nltype == NLTYPE_FIXED &&
908	NLBLOCK->nllen == `2` &&
909	c == NLBLOCK->nl[`0`])
910	{
911	could_continue = partial_newline = TRUE;
912	}
913	else
914	{
915	ADD_NEW(state_offset + `1`, `0`);
916	}
917	}
918	break;
919
920	/-----------------------------------------------------------------/
921	case OP_ALLANY:
922	if (clen > `0`)
923	{ ADD_NEW(state_offset + `1`, `0`); }
924	break;
925
926	/-----------------------------------------------------------------/
927	case OP_EODN:
928	if (clen == `0` && (md->moptions & PCRE_PARTIAL_HARD) != `0`)
929	could_continue = TRUE;
930	else if (clen == `0` \|\| (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
931	{ ADD_ACTIVE(state_offset + `1`, `0`); }
932	break;
933
934	/-----------------------------------------------------------------/
935	case OP_DOLL:
936	if ((md->moptions & PCRE_NOTEOL) == `0`)
937	{
938	if (clen == `0` && (md->moptions & PCRE_PARTIAL_HARD) != `0`)
939	could_continue = TRUE;
940	else if (clen == `0` \|\|
941	((md->poptions & PCRE_DOLLAR_ENDONLY) == `0` && IS_NEWLINE(ptr) &&
942	(ptr == end_subject - md->nllen)
943	))
944	{ ADD_ACTIVE(state_offset + `1`, `0`); }
945	else if (ptr + `1` >= md->end_subject &&
946	(md->moptions & (PCRE_PARTIAL_HARD\|PCRE_PARTIAL_SOFT)) != `0` &&
947	NLBLOCK->nltype == NLTYPE_FIXED &&
948	NLBLOCK->nllen == `2` &&
949	c == NLBLOCK->nl[`0`])
950	{
951	if ((md->moptions & PCRE_PARTIAL_HARD) != `0`)
952	{
953	reset_could_continue = TRUE;
954	ADD_NEW_DATA(-(state_offset + `1`), `0`, `1`);
955	}
956	else could_continue = partial_newline = TRUE;
957	}
958	}
959	break;
960
961	/-----------------------------------------------------------------/
962	case OP_DOLLM:
963	if ((md->moptions & PCRE_NOTEOL) == `0`)
964	{
965	if (clen == `0` && (md->moptions & PCRE_PARTIAL_HARD) != `0`)
966	could_continue = TRUE;
967	else if (clen == `0` \|\|
968	((md->poptions & PCRE_DOLLAR_ENDONLY) == `0` && IS_NEWLINE(ptr)))
969	{ ADD_ACTIVE(state_offset + `1`, `0`); }
970	else if (ptr + `1` >= md->end_subject &&
971	(md->moptions & (PCRE_PARTIAL_HARD\|PCRE_PARTIAL_SOFT)) != `0` &&
972	NLBLOCK->nltype == NLTYPE_FIXED &&
973	NLBLOCK->nllen == `2` &&
974	c == NLBLOCK->nl[`0`])
975	{
976	if ((md->moptions & PCRE_PARTIAL_HARD) != `0`)
977	{
978	reset_could_continue = TRUE;
979	ADD_NEW_DATA(-(state_offset + `1`), `0`, `1`);
980	}
981	else could_continue = partial_newline = TRUE;
982	}
983	}
984	else if (IS_NEWLINE(ptr))
985	{ ADD_ACTIVE(state_offset + `1`, `0`); }
986	break;
987
988	/-----------------------------------------------------------------/
989
990	case OP_DIGIT:
991	case OP_WHITESPACE:
992	case OP_WORDCHAR:
993	if (clen > `0` && c < `256` &&
994	((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != `0`)
995	{ ADD_NEW(state_offset + `1`, `0`); }
996	break;
997
998	/-----------------------------------------------------------------/
999	case OP_NOT_DIGIT:
1000	case OP_NOT_WHITESPACE:
1001	case OP_NOT_WORDCHAR:
1002	if (clen > `0` && (c >= `256` \|\|
1003	((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != `0`))
1004	{ ADD_NEW(state_offset + `1`, `0`); }
1005	break;
1006
1007	/-----------------------------------------------------------------/
1008	case OP_WORD_BOUNDARY:
1009	case OP_NOT_WORD_BOUNDARY:
1010	{
1011	int left_word, right_word;
1012
1013	if (ptr > start_subject)
1014	{
1015	const pcre_uchar *temp = ptr - `1`;
1016	if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1017	#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1018	if (utf) { BACKCHAR(temp); }
1019	#endif
1020	GETCHARTEST(d, temp);
1021	#ifdef SUPPORT_UCP
1022	if ((md->poptions & PCRE_UCP) != `0`)
1023	{
1024	if (d == `'_'`) left_word = TRUE; else
1025	{
1026	int cat = UCD_CATEGORY(d);
1027	left_word = (cat == ucp_L \|\| cat == ucp_N);
1028	}
1029	}
1030	else
1031	#endif
1032	left_word = d < `256` && (ctypes[d] & ctype_word) != `0`;
1033	}
1034	else left_word = FALSE;
1035
1036	if (clen > `0`)
1037	{
1038	#ifdef SUPPORT_UCP
1039	if ((md->poptions & PCRE_UCP) != `0`)
1040	{
1041	if (c == `'_'`) right_word = TRUE; else
1042	{
1043	int cat = UCD_CATEGORY(c);
1044	right_word = (cat == ucp_L \|\| cat == ucp_N);
1045	}
1046	}
1047	else
1048	#endif
1049	right_word = c < `256` && (ctypes[c] & ctype_word) != `0`;
1050	}
1051	else right_word = FALSE;
1052
1053	if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
1054	{ ADD_ACTIVE(state_offset + `1`, `0`); }
1055	}
1056	break;
1057
1058
1059	/-----------------------------------------------------------------/
1060	/ Check the next character by Unicode property. We will get here only*
1061	if the support is in the binary; otherwise a compile-time error occurs.
1062	*/
1063
1064	#ifdef SUPPORT_UCP
1065	case OP_PROP:
1066	case OP_NOTPROP:
1067	if (clen > `0`)
1068	{
1069	BOOL OK;
1070	const pcre_uint32 *cp;
1071	const ucd_record * prop = GET_UCD(c);
1072	switch(code[`1`])
1073	{
1074	case PT_ANY:
1075	OK = TRUE;
1076	break;
1077
1078	case PT_LAMP:
1079	OK = prop->chartype == ucp_Lu \|\| prop->chartype == ucp_Ll \|\|
1080	prop->chartype == ucp_Lt;
1081	break;
1082
1083	case PT_GC:
1084	OK = PRIV(ucp_gentype)[prop->chartype] == code[`2`];
1085	break;
1086
1087	case PT_PC:
1088	OK = prop->chartype == code[`2`];
1089	break;
1090
1091	case PT_SC:
1092	OK = prop->script == code[`2`];
1093	break;
1094
1095	/ These are specials for combination cases. /
1096
1097	case PT_ALNUM:
1098	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
1099	PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1100	break;
1101
1102	/ Perl space used to exclude VT, but from Perl 5.18 it is included,*
1103	which means that Perl space and POSIX space are now identical. PCRE
1104	was changed at release 8.34. /*
1105
1106	case PT_SPACE: / Perl space /
1107	case PT_PXSPACE: / POSIX space /
1108	switch(c)
1109	{
1110	HSPACE_CASES:
1111	VSPACE_CASES:
1112	OK = TRUE;
1113	break;
1114
1115	default:
1116	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1117	break;
1118	}
1119	break;
1120
1121	case PT_WORD:
1122	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
1123	PRIV(ucp_gentype)[prop->chartype] == ucp_N \|\|
1124	c == CHAR_UNDERSCORE;
1125	break;
1126
1127	case PT_CLIST:
1128	cp = PRIV(ucd_caseless_sets) + code[`2`];
1129	for (;;)
1130	{
1131	if (c < cp) { OK = FALSE; break*; }
1132	if (c == cp++) { OK = TRUE; break*; }
1133	}
1134	break;
1135
1136	case PT_UCNC:
1137	OK = c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
1138	c == CHAR_GRAVE_ACCENT \|\| (c >= `0xa0` && c <= `0xd7ff`) \|\|
1139	c >= `0xe000`;
1140	break;
1141
1142	/ Should never occur, but keep compilers from grumbling. /
1143
1144	default:
1145	OK = codevalue != OP_PROP;
1146	break;
1147	}
1148
1149	if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + `3`, `0`); }
1150	}
1151	break;
1152	#endif
1153
1154
1155
1156	/ ========================================================================== /
1157	/ These opcodes likewise inspect the subject character, but have an*
1158	argument that is not a data character. It is one of these opcodes:
1159	OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
1160	OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. /*
1161
1162	case OP_TYPEPLUS:
1163	case OP_TYPEMINPLUS:
1164	case OP_TYPEPOSPLUS:
1165	count = current_state->count; / Already matched /
1166	if (count > `0`) { ADD_ACTIVE(state_offset + `2`, `0`); }
1167	if (clen > `0`)
1168	{
1169	if (d == OP_ANY && ptr + `1` >= md->end_subject &&
1170	(md->moptions & (PCRE_PARTIAL_HARD)) != `0` &&
1171	NLBLOCK->nltype == NLTYPE_FIXED &&
1172	NLBLOCK->nllen == `2` &&
1173	c == NLBLOCK->nl[`0`])
1174	{
1175	could_continue = partial_newline = TRUE;
1176	}
1177	else if ((c >= `256` && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
1178	(c < `256` &&
1179	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
1180	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != `0`))
1181	{
1182	if (count > `0` && codevalue == OP_TYPEPOSPLUS)
1183	{
1184	active_count--; / Remove non-match possibility /
1185	next_active_state--;
1186	}
1187	count++;
1188	ADD_NEW(state_offset, count);
1189	}
1190	}
1191	break;
1192
1193	/-----------------------------------------------------------------/
1194	case OP_TYPEQUERY:
1195	case OP_TYPEMINQUERY:
1196	case OP_TYPEPOSQUERY:
1197	ADD_ACTIVE(state_offset + `2`, `0`);
1198	if (clen > `0`)
1199	{
1200	if (d == OP_ANY && ptr + `1` >= md->end_subject &&
1201	(md->moptions & (PCRE_PARTIAL_HARD)) != `0` &&
1202	NLBLOCK->nltype == NLTYPE_FIXED &&
1203	NLBLOCK->nllen == `2` &&
1204	c == NLBLOCK->nl[`0`])
1205	{
1206	could_continue = partial_newline = TRUE;
1207	}
1208	else if ((c >= `256` && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
1209	(c < `256` &&
1210	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
1211	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != `0`))
1212	{
1213	if (codevalue == OP_TYPEPOSQUERY)
1214	{
1215	active_count--; / Remove non-match possibility /
1216	next_active_state--;
1217	}
1218	ADD_NEW(state_offset + `2`, `0`);
1219	}
1220	}
1221	break;
1222
1223	/-----------------------------------------------------------------/
1224	case OP_TYPESTAR:
1225	case OP_TYPEMINSTAR:
1226	case OP_TYPEPOSSTAR:
1227	ADD_ACTIVE(state_offset + `2`, `0`);
1228	if (clen > `0`)
1229	{
1230	if (d == OP_ANY && ptr + `1` >= md->end_subject &&
1231	(md->moptions & (PCRE_PARTIAL_HARD)) != `0` &&
1232	NLBLOCK->nltype == NLTYPE_FIXED &&
1233	NLBLOCK->nllen == `2` &&
1234	c == NLBLOCK->nl[`0`])
1235	{
1236	could_continue = partial_newline = TRUE;
1237	}
1238	else if ((c >= `256` && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
1239	(c < `256` &&
1240	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
1241	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != `0`))
1242	{
1243	if (codevalue == OP_TYPEPOSSTAR)
1244	{
1245	active_count--; / Remove non-match possibility /
1246	next_active_state--;
1247	}
1248	ADD_NEW(state_offset, `0`);
1249	}
1250	}
1251	break;
1252
1253	/-----------------------------------------------------------------/
1254	case OP_TYPEEXACT:
1255	count = current_state->count; / Number already matched /
1256	if (clen > `0`)
1257	{
1258	if (d == OP_ANY && ptr + `1` >= md->end_subject &&
1259	(md->moptions & (PCRE_PARTIAL_HARD)) != `0` &&
1260	NLBLOCK->nltype == NLTYPE_FIXED &&
1261	NLBLOCK->nllen == `2` &&
1262	c == NLBLOCK->nl[`0`])
1263	{
1264	could_continue = partial_newline = TRUE;
1265	}
1266	else if ((c >= `256` && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
1267	(c < `256` &&
1268	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
1269	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != `0`))
1270	{
1271	if (++count >= (int)GET2(code, `1`))
1272	{ ADD_NEW(state_offset + `1` + IMM2_SIZE + `1`, `0`); }
1273	else
1274	{ ADD_NEW(state_offset, count); }
1275	}
1276	}
1277	break;
1278
1279	/-----------------------------------------------------------------/
1280	case OP_TYPEUPTO:
1281	case OP_TYPEMINUPTO:
1282	case OP_TYPEPOSUPTO:
1283	ADD_ACTIVE(state_offset + `2` + IMM2_SIZE, `0`);
1284	count = current_state->count; / Number already matched /
1285	if (clen > `0`)
1286	{
1287	if (d == OP_ANY && ptr + `1` >= md->end_subject &&
1288	(md->moptions & (PCRE_PARTIAL_HARD)) != `0` &&
1289	NLBLOCK->nltype == NLTYPE_FIXED &&
1290	NLBLOCK->nllen == `2` &&
1291	c == NLBLOCK->nl[`0`])
1292	{
1293	could_continue = partial_newline = TRUE;
1294	}
1295	else if ((c >= `256` && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
1296	(c < `256` &&
1297	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
1298	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != `0`))
1299	{
1300	if (codevalue == OP_TYPEPOSUPTO)
1301	{
1302	active_count--; / Remove non-match possibility /
1303	next_active_state--;
1304	}
1305	if (++count >= (int)GET2(code, `1`))
1306	{ ADD_NEW(state_offset + `2` + IMM2_SIZE, `0`); }
1307	else
1308	{ ADD_NEW(state_offset, count); }
1309	}
1310	}
1311	break;
1312
1313	/ ========================================================================== /
1314	/ These are virtual opcodes that are used when something like*
1315	OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
1316	argument. It keeps the code above fast for the other cases. The argument
1317	is in the d variable. /*
1318
1319	#ifdef SUPPORT_UCP
1320	case OP_PROP_EXTRA + OP_TYPEPLUS:
1321	case OP_PROP_EXTRA + OP_TYPEMINPLUS:
1322	case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
1323	count = current_state->count; / Already matched /
1324	if (count > `0`) { ADD_ACTIVE(state_offset + `4`, `0`); }
1325	if (clen > `0`)
1326	{
1327	BOOL OK;
1328	const pcre_uint32 *cp;
1329	const ucd_record * prop = GET_UCD(c);
1330	switch(code[`2`])
1331	{
1332	case PT_ANY:
1333	OK = TRUE;
1334	break;
1335
1336	case PT_LAMP:
1337	OK = prop->chartype == ucp_Lu \|\| prop->chartype == ucp_Ll \|\|
1338	prop->chartype == ucp_Lt;
1339	break;
1340
1341	case PT_GC:
1342	OK = PRIV(ucp_gentype)[prop->chartype] == code[`3`];
1343	break;
1344
1345	case PT_PC:
1346	OK = prop->chartype == code[`3`];
1347	break;
1348
1349	case PT_SC:
1350	OK = prop->script == code[`3`];
1351	break;
1352
1353	/ These are specials for combination cases. /
1354
1355	case PT_ALNUM:
1356	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
1357	PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1358	break;
1359
1360	/ Perl space used to exclude VT, but from Perl 5.18 it is included,*
1361	which means that Perl space and POSIX space are now identical. PCRE
1362	was changed at release 8.34. /*
1363
1364	case PT_SPACE: / Perl space /
1365	case PT_PXSPACE: / POSIX space /
1366	switch(c)
1367	{
1368	HSPACE_CASES:
1369	VSPACE_CASES:
1370	OK = TRUE;
1371	break;
1372
1373	default:
1374	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1375	break;
1376	}
1377	break;
1378
1379	case PT_WORD:
1380	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
1381	PRIV(ucp_gentype)[prop->chartype] == ucp_N \|\|
1382	c == CHAR_UNDERSCORE;
1383	break;
1384
1385	case PT_CLIST:
1386	cp = PRIV(ucd_caseless_sets) + code[`3`];
1387	for (;;)
1388	{
1389	if (c < cp) { OK = FALSE; break*; }
1390	if (c == cp++) { OK = TRUE; break*; }
1391	}
1392	break;
1393
1394	case PT_UCNC:
1395	OK = c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
1396	c == CHAR_GRAVE_ACCENT \|\| (c >= `0xa0` && c <= `0xd7ff`) \|\|
1397	c >= `0xe000`;
1398	break;
1399
1400	/ Should never occur, but keep compilers from grumbling. /
1401
1402	default:
1403	OK = codevalue != OP_PROP;
1404	break;
1405	}
1406
1407	if (OK == (d == OP_PROP))
1408	{
1409	if (count > `0` && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1410	{
1411	active_count--; / Remove non-match possibility /
1412	next_active_state--;
1413	}
1414	count++;
1415	ADD_NEW(state_offset, count);
1416	}
1417	}
1418	break;
1419
1420	/-----------------------------------------------------------------/
1421	case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1422	case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1423	case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1424	count = current_state->count; / Already matched /
1425	if (count > `0`) { ADD_ACTIVE(state_offset + `2`, `0`); }
1426	if (clen > `0`)
1427	{
1428	int lgb, rgb;
1429	const pcre_uchar *nptr = ptr + clen;
1430	int ncount = `0`;
1431	if (count > `0` && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1432	{
1433	active_count--; / Remove non-match possibility /
1434	next_active_state--;
1435	}
1436	lgb = UCD_GRAPHBREAK(c);
1437	while (nptr < end_subject)
1438	{
1439	dlen = `1`;
1440	if (!utf) d = nptr; else* { GETCHARLEN(d, nptr, dlen); }
1441	rgb = UCD_GRAPHBREAK(d);
1442	if ((PRIV(ucp_gbtable)[lgb] & (`1` << rgb)) == `0`) break;
1443	ncount++;
1444	lgb = rgb;
1445	nptr += dlen;
1446	}
1447	count++;
1448	ADD_NEW_DATA(-state_offset, count, ncount);
1449	}
1450	break;
1451	#endif
1452
1453	/-----------------------------------------------------------------/
1454	case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1455	case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1456	case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1457	count = current_state->count; / Already matched /
1458	if (count > `0`) { ADD_ACTIVE(state_offset + `2`, `0`); }
1459	if (clen > `0`)
1460	{
1461	int ncount = `0`;
1462	switch (c)
1463	{
1464	case CHAR_VT:
1465	case CHAR_FF:
1466	case CHAR_NEL:
1467	#ifndef EBCDIC
1468	case `0x2028`:
1469	case `0x2029`:
1470	#endif /* Not EBCDIC */
1471	if ((md->moptions & PCRE_BSR_ANYCRLF) != `0`) break;
1472	goto ANYNL01;
1473
1474	case CHAR_CR:
1475	if (ptr + `1` < end_subject && UCHAR21TEST(ptr + `1`) == CHAR_LF) ncount = `1`;
1476	/ Fall through /
1477
1478	ANYNL01:
1479	case CHAR_LF:
1480	if (count > `0` && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1481	{
1482	active_count--; / Remove non-match possibility /
1483	next_active_state--;
1484	}
1485	count++;
1486	ADD_NEW_DATA(-state_offset, count, ncount);
1487	break;
1488
1489	default:
1490	break;
1491	}
1492	}
1493	break;
1494
1495	/-----------------------------------------------------------------/
1496	case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1497	case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1498	case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1499	count = current_state->count; / Already matched /
1500	if (count > `0`) { ADD_ACTIVE(state_offset + `2`, `0`); }
1501	if (clen > `0`)
1502	{
1503	BOOL OK;
1504	switch (c)
1505	{
1506	VSPACE_CASES:
1507	OK = TRUE;
1508	break;
1509
1510	default:
1511	OK = FALSE;
1512	break;
1513	}
1514
1515	if (OK == (d == OP_VSPACE))
1516	{
1517	if (count > `0` && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1518	{
1519	active_count--; / Remove non-match possibility /
1520	next_active_state--;
1521	}
1522	count++;
1523	ADD_NEW_DATA(-state_offset, count, `0`);
1524	}
1525	}
1526	break;
1527
1528	/-----------------------------------------------------------------/
1529	case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1530	case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1531	case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1532	count = current_state->count; / Already matched /
1533	if (count > `0`) { ADD_ACTIVE(state_offset + `2`, `0`); }
1534	if (clen > `0`)
1535	{
1536	BOOL OK;
1537	switch (c)
1538	{
1539	HSPACE_CASES:
1540	OK = TRUE;
1541	break;
1542
1543	default:
1544	OK = FALSE;
1545	break;
1546	}
1547
1548	if (OK == (d == OP_HSPACE))
1549	{
1550	if (count > `0` && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1551	{
1552	active_count--; / Remove non-match possibility /
1553	next_active_state--;
1554	}
1555	count++;
1556	ADD_NEW_DATA(-state_offset, count, `0`);
1557	}
1558	}
1559	break;
1560
1561	/-----------------------------------------------------------------/
1562	#ifdef SUPPORT_UCP
1563	case OP_PROP_EXTRA + OP_TYPEQUERY:
1564	case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1565	case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1566	count = `4`;
1567	goto QS1;
1568
1569	case OP_PROP_EXTRA + OP_TYPESTAR:
1570	case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1571	case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1572	count = `0`;
1573
1574	QS1:
1575
1576	ADD_ACTIVE(state_offset + `4`, `0`);
1577	if (clen > `0`)
1578	{
1579	BOOL OK;
1580	const pcre_uint32 *cp;
1581	const ucd_record * prop = GET_UCD(c);
1582	switch(code[`2`])
1583	{
1584	case PT_ANY:
1585	OK = TRUE;
1586	break;
1587
1588	case PT_LAMP:
1589	OK = prop->chartype == ucp_Lu \|\| prop->chartype == ucp_Ll \|\|
1590	prop->chartype == ucp_Lt;
1591	break;
1592
1593	case PT_GC:
1594	OK = PRIV(ucp_gentype)[prop->chartype] == code[`3`];
1595	break;
1596
1597	case PT_PC:
1598	OK = prop->chartype == code[`3`];
1599	break;
1600
1601	case PT_SC:
1602	OK = prop->script == code[`3`];
1603	break;
1604
1605	/ These are specials for combination cases. /
1606
1607	case PT_ALNUM:
1608	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
1609	PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1610	break;
1611
1612	/ Perl space used to exclude VT, but from Perl 5.18 it is included,*
1613	which means that Perl space and POSIX space are now identical. PCRE
1614	was changed at release 8.34. /*
1615
1616	case PT_SPACE: / Perl space /
1617	case PT_PXSPACE: / POSIX space /
1618	switch(c)
1619	{
1620	HSPACE_CASES:
1621	VSPACE_CASES:
1622	OK = TRUE;
1623	break;
1624
1625	default:
1626	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1627	break;
1628	}
1629	break;
1630
1631	case PT_WORD:
1632	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
1633	PRIV(ucp_gentype)[prop->chartype] == ucp_N \|\|
1634	c == CHAR_UNDERSCORE;
1635	break;
1636
1637	case PT_CLIST:
1638	cp = PRIV(ucd_caseless_sets) + code[`3`];
1639	for (;;)
1640	{
1641	if (c < cp) { OK = FALSE; break*; }
1642	if (c == cp++) { OK = TRUE; break*; }
1643	}
1644	break;
1645
1646	case PT_UCNC:
1647	OK = c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
1648	c == CHAR_GRAVE_ACCENT \|\| (c >= `0xa0` && c <= `0xd7ff`) \|\|
1649	c >= `0xe000`;
1650	break;
1651
1652	/ Should never occur, but keep compilers from grumbling. /
1653
1654	default:
1655	OK = codevalue != OP_PROP;
1656	break;
1657	}
1658
1659	if (OK == (d == OP_PROP))
1660	{
1661	if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR \|\|
1662	codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1663	{
1664	active_count--; / Remove non-match possibility /
1665	next_active_state--;
1666	}
1667	ADD_NEW(state_offset + count, `0`);
1668	}
1669	}
1670	break;
1671
1672	/-----------------------------------------------------------------/
1673	case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1674	case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1675	case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1676	count = `2`;
1677	goto QS2;
1678
1679	case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1680	case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1681	case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1682	count = `0`;
1683
1684	QS2:
1685
1686	ADD_ACTIVE(state_offset + `2`, `0`);
1687	if (clen > `0`)
1688	{
1689	int lgb, rgb;
1690	const pcre_uchar *nptr = ptr + clen;
1691	int ncount = `0`;
1692	if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR \|\|
1693	codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1694	{
1695	active_count--; / Remove non-match possibility /
1696	next_active_state--;
1697	}
1698	lgb = UCD_GRAPHBREAK(c);
1699	while (nptr < end_subject)
1700	{
1701	dlen = `1`;
1702	if (!utf) d = nptr; else* { GETCHARLEN(d, nptr, dlen); }
1703	rgb = UCD_GRAPHBREAK(d);
1704	if ((PRIV(ucp_gbtable)[lgb] & (`1` << rgb)) == `0`) break;
1705	ncount++;
1706	lgb = rgb;
1707	nptr += dlen;
1708	}
1709	ADD_NEW_DATA(-(state_offset + count), `0`, ncount);
1710	}
1711	break;
1712	#endif
1713
1714	/-----------------------------------------------------------------/
1715	case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1716	case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1717	case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1718	count = `2`;
1719	goto QS3;
1720
1721	case OP_ANYNL_EXTRA + OP_TYPESTAR:
1722	case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1723	case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1724	count = `0`;
1725
1726	QS3:
1727	ADD_ACTIVE(state_offset + `2`, `0`);
1728	if (clen > `0`)
1729	{
1730	int ncount = `0`;
1731	switch (c)
1732	{
1733	case CHAR_VT:
1734	case CHAR_FF:
1735	case CHAR_NEL:
1736	#ifndef EBCDIC
1737	case `0x2028`:
1738	case `0x2029`:
1739	#endif /* Not EBCDIC */
1740	if ((md->moptions & PCRE_BSR_ANYCRLF) != `0`) break;
1741	goto ANYNL02;
1742
1743	case CHAR_CR:
1744	if (ptr + `1` < end_subject && UCHAR21TEST(ptr + `1`) == CHAR_LF) ncount = `1`;
1745	/ Fall through /
1746
1747	ANYNL02:
1748	case CHAR_LF:
1749	if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR \|\|
1750	codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1751	{
1752	active_count--; / Remove non-match possibility /
1753	next_active_state--;
1754	}
1755	ADD_NEW_DATA(-(state_offset + (int)count), `0`, ncount);
1756	break;
1757
1758	default:
1759	break;
1760	}
1761	}
1762	break;
1763
1764	/-----------------------------------------------------------------/
1765	case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1766	case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1767	case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1768	count = `2`;
1769	goto QS4;
1770
1771	case OP_VSPACE_EXTRA + OP_TYPESTAR:
1772	case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1773	case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1774	count = `0`;
1775
1776	QS4:
1777	ADD_ACTIVE(state_offset + `2`, `0`);
1778	if (clen > `0`)
1779	{
1780	BOOL OK;
1781	switch (c)
1782	{
1783	VSPACE_CASES:
1784	OK = TRUE;
1785	break;
1786
1787	default:
1788	OK = FALSE;
1789	break;
1790	}
1791	if (OK == (d == OP_VSPACE))
1792	{
1793	if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR \|\|
1794	codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1795	{
1796	active_count--; / Remove non-match possibility /
1797	next_active_state--;
1798	}
1799	ADD_NEW_DATA(-(state_offset + (int)count), `0`, `0`);
1800	}
1801	}
1802	break;
1803
1804	/-----------------------------------------------------------------/
1805	case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1806	case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1807	case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1808	count = `2`;
1809	goto QS5;
1810
1811	case OP_HSPACE_EXTRA + OP_TYPESTAR:
1812	case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1813	case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1814	count = `0`;
1815
1816	QS5:
1817	ADD_ACTIVE(state_offset + `2`, `0`);
1818	if (clen > `0`)
1819	{
1820	BOOL OK;
1821	switch (c)
1822	{
1823	HSPACE_CASES:
1824	OK = TRUE;
1825	break;
1826
1827	default:
1828	OK = FALSE;
1829	break;
1830	}
1831
1832	if (OK == (d == OP_HSPACE))
1833	{
1834	if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR \|\|
1835	codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1836	{
1837	active_count--; / Remove non-match possibility /
1838	next_active_state--;
1839	}
1840	ADD_NEW_DATA(-(state_offset + (int)count), `0`, `0`);
1841	}
1842	}
1843	break;
1844
1845	/-----------------------------------------------------------------/
1846	#ifdef SUPPORT_UCP
1847	case OP_PROP_EXTRA + OP_TYPEEXACT:
1848	case OP_PROP_EXTRA + OP_TYPEUPTO:
1849	case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1850	case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1851	if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1852	{ ADD_ACTIVE(state_offset + `1` + IMM2_SIZE + `3`, `0`); }
1853	count = current_state->count; / Number already matched /
1854	if (clen > `0`)
1855	{
1856	BOOL OK;
1857	const pcre_uint32 *cp;
1858	const ucd_record * prop = GET_UCD(c);
1859	switch(code[`1` + IMM2_SIZE + `1`])
1860	{
1861	case PT_ANY:
1862	OK = TRUE;
1863	break;
1864
1865	case PT_LAMP:
1866	OK = prop->chartype == ucp_Lu \|\| prop->chartype == ucp_Ll \|\|
1867	prop->chartype == ucp_Lt;
1868	break;
1869
1870	case PT_GC:
1871	OK = PRIV(ucp_gentype)[prop->chartype] == code[`1` + IMM2_SIZE + `2`];
1872	break;
1873
1874	case PT_PC:
1875	OK = prop->chartype == code[`1` + IMM2_SIZE + `2`];
1876	break;
1877
1878	case PT_SC:
1879	OK = prop->script == code[`1` + IMM2_SIZE + `2`];
1880	break;
1881
1882	/ These are specials for combination cases. /
1883
1884	case PT_ALNUM:
1885	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
1886	PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1887	break;
1888
1889	/ Perl space used to exclude VT, but from Perl 5.18 it is included,*
1890	which means that Perl space and POSIX space are now identical. PCRE
1891	was changed at release 8.34. /*
1892
1893	case PT_SPACE: / Perl space /
1894	case PT_PXSPACE: / POSIX space /
1895	switch(c)
1896	{
1897	HSPACE_CASES:
1898	VSPACE_CASES:
1899	OK = TRUE;
1900	break;
1901
1902	default:
1903	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1904	break;
1905	}
1906	break;
1907
1908	case PT_WORD:
1909	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
1910	PRIV(ucp_gentype)[prop->chartype] == ucp_N \|\|
1911	c == CHAR_UNDERSCORE;
1912	break;
1913
1914	case PT_CLIST:
1915	cp = PRIV(ucd_caseless_sets) + code[`1` + IMM2_SIZE + `2`];
1916	for (;;)
1917	{
1918	if (c < cp) { OK = FALSE; break*; }
1919	if (c == cp++) { OK = TRUE; break*; }
1920	}
1921	break;
1922
1923	case PT_UCNC:
1924	OK = c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
1925	c == CHAR_GRAVE_ACCENT \|\| (c >= `0xa0` && c <= `0xd7ff`) \|\|
1926	c >= `0xe000`;
1927	break;
1928
1929	/ Should never occur, but keep compilers from grumbling. /
1930
1931	default:
1932	OK = codevalue != OP_PROP;
1933	break;
1934	}
1935
1936	if (OK == (d == OP_PROP))
1937	{
1938	if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1939	{
1940	active_count--; / Remove non-match possibility /
1941	next_active_state--;
1942	}
1943	if (++count >= (int)GET2(code, `1`))
1944	{ ADD_NEW(state_offset + `1` + IMM2_SIZE + `3`, `0`); }
1945	else
1946	{ ADD_NEW(state_offset, count); }
1947	}
1948	}
1949	break;
1950
1951	/-----------------------------------------------------------------/
1952	case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1953	case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1954	case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1955	case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1956	if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1957	{ ADD_ACTIVE(state_offset + `2` + IMM2_SIZE, `0`); }
1958	count = current_state->count; / Number already matched /
1959	if (clen > `0`)
1960	{
1961	int lgb, rgb;
1962	const pcre_uchar *nptr = ptr + clen;
1963	int ncount = `0`;
1964	if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1965	{
1966	active_count--; / Remove non-match possibility /
1967	next_active_state--;
1968	}
1969	lgb = UCD_GRAPHBREAK(c);
1970	while (nptr < end_subject)
1971	{
1972	dlen = `1`;
1973	if (!utf) d = nptr; else* { GETCHARLEN(d, nptr, dlen); }
1974	rgb = UCD_GRAPHBREAK(d);
1975	if ((PRIV(ucp_gbtable)[lgb] & (`1` << rgb)) == `0`) break;
1976	ncount++;
1977	lgb = rgb;
1978	nptr += dlen;
1979	}
1980	if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != `0`)
1981	reset_could_continue = TRUE;
1982	if (++count >= (int)GET2(code, `1`))
1983	{ ADD_NEW_DATA(-(state_offset + `2` + IMM2_SIZE), `0`, ncount); }
1984	else
1985	{ ADD_NEW_DATA(-state_offset, count, ncount); }
1986	}
1987	break;
1988	#endif
1989
1990	/-----------------------------------------------------------------/
1991	case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1992	case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1993	case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1994	case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1995	if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1996	{ ADD_ACTIVE(state_offset + `2` + IMM2_SIZE, `0`); }
1997	count = current_state->count; / Number already matched /
1998	if (clen > `0`)
1999	{
2000	int ncount = `0`;
2001	switch (c)
2002	{
2003	case CHAR_VT:
2004	case CHAR_FF:
2005	case CHAR_NEL:
2006	#ifndef EBCDIC
2007	case `0x2028`:
2008	case `0x2029`:
2009	#endif /* Not EBCDIC */
2010	if ((md->moptions & PCRE_BSR_ANYCRLF) != `0`) break;
2011	goto ANYNL03;
2012
2013	case CHAR_CR:
2014	if (ptr + `1` < end_subject && UCHAR21TEST(ptr + `1`) == CHAR_LF) ncount = `1`;
2015	/ Fall through /
2016
2017	ANYNL03:
2018	case CHAR_LF:
2019	if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
2020	{
2021	active_count--; / Remove non-match possibility /
2022	next_active_state--;
2023	}
2024	if (++count >= (int)GET2(code, `1`))
2025	{ ADD_NEW_DATA(-(state_offset + `2` + IMM2_SIZE), `0`, ncount); }
2026	else
2027	{ ADD_NEW_DATA(-state_offset, count, ncount); }
2028	break;
2029
2030	default:
2031	break;
2032	}
2033	}
2034	break;
2035
2036	/-----------------------------------------------------------------/
2037	case OP_VSPACE_EXTRA + OP_TYPEEXACT:
2038	case OP_VSPACE_EXTRA + OP_TYPEUPTO:
2039	case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
2040	case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
2041	if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
2042	{ ADD_ACTIVE(state_offset + `2` + IMM2_SIZE, `0`); }
2043	count = current_state->count; / Number already matched /
2044	if (clen > `0`)
2045	{
2046	BOOL OK;
2047	switch (c)
2048	{
2049	VSPACE_CASES:
2050	OK = TRUE;
2051	break;
2052
2053	default:
2054	OK = FALSE;
2055	}
2056
2057	if (OK == (d == OP_VSPACE))
2058	{
2059	if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
2060	{
2061	active_count--; / Remove non-match possibility /
2062	next_active_state--;
2063	}
2064	if (++count >= (int)GET2(code, `1`))
2065	{ ADD_NEW_DATA(-(state_offset + `2` + IMM2_SIZE), `0`, `0`); }
2066	else
2067	{ ADD_NEW_DATA(-state_offset, count, `0`); }
2068	}
2069	}
2070	break;
2071
2072	/-----------------------------------------------------------------/
2073	case OP_HSPACE_EXTRA + OP_TYPEEXACT:
2074	case OP_HSPACE_EXTRA + OP_TYPEUPTO:
2075	case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
2076	case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
2077	if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
2078	{ ADD_ACTIVE(state_offset + `2` + IMM2_SIZE, `0`); }
2079	count = current_state->count; / Number already matched /
2080	if (clen > `0`)
2081	{
2082	BOOL OK;
2083	switch (c)
2084	{
2085	HSPACE_CASES:
2086	OK = TRUE;
2087	break;
2088
2089	default:
2090	OK = FALSE;
2091	break;
2092	}
2093
2094	if (OK == (d == OP_HSPACE))
2095	{
2096	if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
2097	{
2098	active_count--; / Remove non-match possibility /
2099	next_active_state--;
2100	}
2101	if (++count >= (int)GET2(code, `1`))
2102	{ ADD_NEW_DATA(-(state_offset + `2` + IMM2_SIZE), `0`, `0`); }
2103	else
2104	{ ADD_NEW_DATA(-state_offset, count, `0`); }
2105	}
2106	}
2107	break;
2108
2109	/ ========================================================================== /
2110	/ These opcodes are followed by a character that is usually compared*
2111	to the current subject character; it is loaded into d. We still get
2112	here even if there is no subject character, because in some cases zero
2113	repetitions are permitted. /*
2114
2115	/-----------------------------------------------------------------/
2116	case OP_CHAR:
2117	if (clen > `0` && c == d) { ADD_NEW(state_offset + dlen + `1`, `0`); }
2118	break;
2119
2120	/-----------------------------------------------------------------/
2121	case OP_CHARI:
2122	if (clen == `0`) break;
2123
2124	#ifdef SUPPORT_UTF
2125	if (utf)
2126	{
2127	if (c == d) { ADD_NEW(state_offset + dlen + `1`, `0`); } else
2128	{
2129	unsigned int othercase;
2130	if (c < `128`)
2131	othercase = fcc[c];
2132	else
2133	/ If we have Unicode property support, we can use it to test the*
2134	other case of the character. /*
2135	#ifdef SUPPORT_UCP
2136	othercase = UCD_OTHERCASE(c);
2137	#else
2138	othercase = NOTACHAR;
2139	#endif
2140
2141	if (d == othercase) { ADD_NEW(state_offset + dlen + `1`, `0`); }
2142	}
2143	}
2144	else
2145	#endif /* SUPPORT_UTF */
2146	/ Not UTF mode /
2147	{
2148	if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2149	{ ADD_NEW(state_offset + `2`, `0`); }
2150	}
2151	break;
2152
2153
2154	#ifdef SUPPORT_UCP
2155	/-----------------------------------------------------------------/
2156	/ This is a tricky one because it can match more than one character.*
2157	Find out how many characters to skip, and then set up a negative state
2158	to wait for them to pass before continuing. /*
2159
2160	case OP_EXTUNI:
2161	if (clen > `0`)
2162	{
2163	int lgb, rgb;
2164	const pcre_uchar *nptr = ptr + clen;
2165	int ncount = `0`;
2166	lgb = UCD_GRAPHBREAK(c);
2167	while (nptr < end_subject)
2168	{
2169	dlen = `1`;
2170	if (!utf) d = nptr; else* { GETCHARLEN(d, nptr, dlen); }
2171	rgb = UCD_GRAPHBREAK(d);
2172	if ((PRIV(ucp_gbtable)[lgb] & (`1` << rgb)) == `0`) break;
2173	ncount++;
2174	lgb = rgb;
2175	nptr += dlen;
2176	}
2177	if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != `0`)
2178	reset_could_continue = TRUE;
2179	ADD_NEW_DATA(-(state_offset + `1`), `0`, ncount);
2180	}
2181	break;
2182	#endif
2183
2184	/-----------------------------------------------------------------/
2185	/ This is a tricky like EXTUNI because it too can match more than one*
2186	character (when CR is followed by LF). In this case, set up a negative
2187	state to wait for one character to pass before continuing. /*
2188
2189	case OP_ANYNL:
2190	if (clen > `0`) switch(c)
2191	{
2192	case CHAR_VT:
2193	case CHAR_FF:
2194	case CHAR_NEL:
2195	#ifndef EBCDIC
2196	case `0x2028`:
2197	case `0x2029`:
2198	#endif /* Not EBCDIC */
2199	if ((md->moptions & PCRE_BSR_ANYCRLF) != `0`) break;
2200
2201	case CHAR_LF:
2202	ADD_NEW(state_offset + `1`, `0`);
2203	break;
2204
2205	case CHAR_CR:
2206	if (ptr + `1` >= end_subject)
2207	{
2208	ADD_NEW(state_offset + `1`, `0`);
2209	if ((md->moptions & PCRE_PARTIAL_HARD) != `0`)
2210	reset_could_continue = TRUE;
2211	}
2212	else if (UCHAR21TEST(ptr + `1`) == CHAR_LF)
2213	{
2214	ADD_NEW_DATA(-(state_offset + `1`), `0`, `1`);
2215	}
2216	else
2217	{
2218	ADD_NEW(state_offset + `1`, `0`);
2219	}
2220	break;
2221	}
2222	break;
2223
2224	/-----------------------------------------------------------------/
2225	case OP_NOT_VSPACE:
2226	if (clen > `0`) switch(c)
2227	{
2228	VSPACE_CASES:
2229	break;
2230
2231	default:
2232	ADD_NEW(state_offset + `1`, `0`);
2233	break;
2234	}
2235	break;
2236
2237	/-----------------------------------------------------------------/
2238	case OP_VSPACE:
2239	if (clen > `0`) switch(c)
2240	{
2241	VSPACE_CASES:
2242	ADD_NEW(state_offset + `1`, `0`);
2243	break;
2244
2245	default:
2246	break;
2247	}
2248	break;
2249
2250	/-----------------------------------------------------------------/
2251	case OP_NOT_HSPACE:
2252	if (clen > `0`) switch(c)
2253	{
2254	HSPACE_CASES:
2255	break;
2256
2257	default:
2258	ADD_NEW(state_offset + `1`, `0`);
2259	break;
2260	}
2261	break;
2262
2263	/-----------------------------------------------------------------/
2264	case OP_HSPACE:
2265	if (clen > `0`) switch(c)
2266	{
2267	HSPACE_CASES:
2268	ADD_NEW(state_offset + `1`, `0`);
2269	break;
2270
2271	default:
2272	break;
2273	}
2274	break;
2275
2276	/-----------------------------------------------------------------/
2277	/ Match a negated single character casefully. /
2278
2279	case OP_NOT:
2280	if (clen > `0` && c != d) { ADD_NEW(state_offset + dlen + `1`, `0`); }
2281	break;
2282
2283	/-----------------------------------------------------------------/
2284	/ Match a negated single character caselessly. /
2285
2286	case OP_NOTI:
2287	if (clen > `0`)
2288	{
2289	pcre_uint32 otherd;
2290	#ifdef SUPPORT_UTF
2291	if (utf && d >= `128`)
2292	{
2293	#ifdef SUPPORT_UCP
2294	otherd = UCD_OTHERCASE(d);
2295	#else
2296	otherd = d;
2297	#endif /* SUPPORT_UCP */
2298	}
2299	else
2300	#endif /* SUPPORT_UTF */
2301	otherd = TABLE_GET(d, fcc, d);
2302	if (c != d && c != otherd)
2303	{ ADD_NEW(state_offset + dlen + `1`, `0`); }
2304	}
2305	break;
2306
2307	/-----------------------------------------------------------------/
2308	case OP_PLUSI:
2309	case OP_MINPLUSI:
2310	case OP_POSPLUSI:
2311	case OP_NOTPLUSI:
2312	case OP_NOTMINPLUSI:
2313	case OP_NOTPOSPLUSI:
2314	caseless = TRUE;
2315	codevalue -= OP_STARI - OP_STAR;
2316
2317	/ Fall through /
2318	case OP_PLUS:
2319	case OP_MINPLUS:
2320	case OP_POSPLUS:
2321	case OP_NOTPLUS:
2322	case OP_NOTMINPLUS:
2323	case OP_NOTPOSPLUS:
2324	count = current_state->count; / Already matched /
2325	if (count > `0`) { ADD_ACTIVE(state_offset + dlen + `1`, `0`); }
2326	if (clen > `0`)
2327	{
2328	pcre_uint32 otherd = NOTACHAR;
2329	if (caseless)
2330	{
2331	#ifdef SUPPORT_UTF
2332	if (utf && d >= `128`)
2333	{
2334	#ifdef SUPPORT_UCP
2335	otherd = UCD_OTHERCASE(d);
2336	#endif /* SUPPORT_UCP */
2337	}
2338	else
2339	#endif /* SUPPORT_UTF */
2340	otherd = TABLE_GET(d, fcc, d);
2341	}
2342	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
2343	{
2344	if (count > `0` &&
2345	(codevalue == OP_POSPLUS \|\| codevalue == OP_NOTPOSPLUS))
2346	{
2347	active_count--; / Remove non-match possibility /
2348	next_active_state--;
2349	}
2350	count++;
2351	ADD_NEW(state_offset, count);
2352	}
2353	}
2354	break;
2355
2356	/-----------------------------------------------------------------/
2357	case OP_QUERYI:
2358	case OP_MINQUERYI:
2359	case OP_POSQUERYI:
2360	case OP_NOTQUERYI:
2361	case OP_NOTMINQUERYI:
2362	case OP_NOTPOSQUERYI:
2363	caseless = TRUE;
2364	codevalue -= OP_STARI - OP_STAR;
2365	/ Fall through /
2366	case OP_QUERY:
2367	case OP_MINQUERY:
2368	case OP_POSQUERY:
2369	case OP_NOTQUERY:
2370	case OP_NOTMINQUERY:
2371	case OP_NOTPOSQUERY:
2372	ADD_ACTIVE(state_offset + dlen + `1`, `0`);
2373	if (clen > `0`)
2374	{
2375	pcre_uint32 otherd = NOTACHAR;
2376	if (caseless)
2377	{
2378	#ifdef SUPPORT_UTF
2379	if (utf && d >= `128`)
2380	{
2381	#ifdef SUPPORT_UCP
2382	otherd = UCD_OTHERCASE(d);
2383	#endif /* SUPPORT_UCP */
2384	}
2385	else
2386	#endif /* SUPPORT_UTF */
2387	otherd = TABLE_GET(d, fcc, d);
2388	}
2389	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
2390	{
2391	if (codevalue == OP_POSQUERY \|\| codevalue == OP_NOTPOSQUERY)
2392	{
2393	active_count--; / Remove non-match possibility /
2394	next_active_state--;
2395	}
2396	ADD_NEW(state_offset + dlen + `1`, `0`);
2397	}
2398	}
2399	break;
2400
2401	/-----------------------------------------------------------------/
2402	case OP_STARI:
2403	case OP_MINSTARI:
2404	case OP_POSSTARI:
2405	case OP_NOTSTARI:
2406	case OP_NOTMINSTARI:
2407	case OP_NOTPOSSTARI:
2408	caseless = TRUE;
2409	codevalue -= OP_STARI - OP_STAR;
2410	/ Fall through /
2411	case OP_STAR:
2412	case OP_MINSTAR:
2413	case OP_POSSTAR:
2414	case OP_NOTSTAR:
2415	case OP_NOTMINSTAR:
2416	case OP_NOTPOSSTAR:
2417	ADD_ACTIVE(state_offset + dlen + `1`, `0`);
2418	if (clen > `0`)
2419	{
2420	pcre_uint32 otherd = NOTACHAR;
2421	if (caseless)
2422	{
2423	#ifdef SUPPORT_UTF
2424	if (utf && d >= `128`)
2425	{
2426	#ifdef SUPPORT_UCP
2427	otherd = UCD_OTHERCASE(d);
2428	#endif /* SUPPORT_UCP */
2429	}
2430	else
2431	#endif /* SUPPORT_UTF */
2432	otherd = TABLE_GET(d, fcc, d);
2433	}
2434	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
2435	{
2436	if (codevalue == OP_POSSTAR \|\| codevalue == OP_NOTPOSSTAR)
2437	{
2438	active_count--; / Remove non-match possibility /
2439	next_active_state--;
2440	}
2441	ADD_NEW(state_offset, `0`);
2442	}
2443	}
2444	break;
2445
2446	/-----------------------------------------------------------------/
2447	case OP_EXACTI:
2448	case OP_NOTEXACTI:
2449	caseless = TRUE;
2450	codevalue -= OP_STARI - OP_STAR;
2451	/ Fall through /
2452	case OP_EXACT:
2453	case OP_NOTEXACT:
2454	count = current_state->count; / Number already matched /
2455	if (clen > `0`)
2456	{
2457	pcre_uint32 otherd = NOTACHAR;
2458	if (caseless)
2459	{
2460	#ifdef SUPPORT_UTF
2461	if (utf && d >= `128`)
2462	{
2463	#ifdef SUPPORT_UCP
2464	otherd = UCD_OTHERCASE(d);
2465	#endif /* SUPPORT_UCP */
2466	}
2467	else
2468	#endif /* SUPPORT_UTF */
2469	otherd = TABLE_GET(d, fcc, d);
2470	}
2471	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
2472	{
2473	if (++count >= (int)GET2(code, `1`))
2474	{ ADD_NEW(state_offset + dlen + `1` + IMM2_SIZE, `0`); }
2475	else
2476	{ ADD_NEW(state_offset, count); }
2477	}
2478	}
2479	break;
2480
2481	/-----------------------------------------------------------------/
2482	case OP_UPTOI:
2483	case OP_MINUPTOI:
2484	case OP_POSUPTOI:
2485	case OP_NOTUPTOI:
2486	case OP_NOTMINUPTOI:
2487	case OP_NOTPOSUPTOI:
2488	caseless = TRUE;
2489	codevalue -= OP_STARI - OP_STAR;
2490	/ Fall through /
2491	case OP_UPTO:
2492	case OP_MINUPTO:
2493	case OP_POSUPTO:
2494	case OP_NOTUPTO:
2495	case OP_NOTMINUPTO:
2496	case OP_NOTPOSUPTO:
2497	ADD_ACTIVE(state_offset + dlen + `1` + IMM2_SIZE, `0`);
2498	count = current_state->count; / Number already matched /
2499	if (clen > `0`)
2500	{
2501	pcre_uint32 otherd = NOTACHAR;
2502	if (caseless)
2503	{
2504	#ifdef SUPPORT_UTF
2505	if (utf && d >= `128`)
2506	{
2507	#ifdef SUPPORT_UCP
2508	otherd = UCD_OTHERCASE(d);
2509	#endif /* SUPPORT_UCP */
2510	}
2511	else
2512	#endif /* SUPPORT_UTF */
2513	otherd = TABLE_GET(d, fcc, d);
2514	}
2515	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
2516	{
2517	if (codevalue == OP_POSUPTO \|\| codevalue == OP_NOTPOSUPTO)
2518	{
2519	active_count--; / Remove non-match possibility /
2520	next_active_state--;
2521	}
2522	if (++count >= (int)GET2(code, `1`))
2523	{ ADD_NEW(state_offset + dlen + `1` + IMM2_SIZE, `0`); }
2524	else
2525	{ ADD_NEW(state_offset, count); }
2526	}
2527	}
2528	break;
2529
2530
2531	/ ========================================================================== /
2532	/ These are the class-handling opcodes /
2533
2534	case OP_CLASS:
2535	case OP_NCLASS:
2536	case OP_XCLASS:
2537	{
2538	BOOL isinclass = FALSE;
2539	int next_state_offset;
2540	const pcre_uchar *ecode;
2541
2542	/ For a simple class, there is always just a 32-byte table, and we*
2543	can set isinclass from it. /*
2544
2545	if (codevalue != OP_XCLASS)
2546	{
2547	ecode = code + `1` + (`32` / sizeof(pcre_uchar));
2548	if (clen > `0`)
2549	{
2550	isinclass = (c > `255`)? (codevalue == OP_NCLASS) :
2551	((((pcre_uint8 *)(code + `1`))[c/`8`] & (`1` << (c&`7`))) != `0`);
2552	}
2553	}
2554
2555	/ An extended class may have a table or a list of single characters,*
2556	ranges, or both, and it may be positive or negative. There's a
2557	function that sorts all this out. /*
2558
2559	else
2560	{
2561	ecode = code + GET(code, `1`);
2562	if (clen > `0`) isinclass = PRIV(xclass)(c, code + `1` + LINK_SIZE, utf);
2563	}
2564
2565	/ At this point, isinclass is set for all kinds of class, and ecode*
2566	points to the byte after the end of the class. If there is a
2567	quantifier, this is where it will be. /*
2568
2569	next_state_offset = (int)(ecode - start_code);
2570
2571	switch (*ecode)
2572	{
2573	case OP_CRSTAR:
2574	case OP_CRMINSTAR:
2575	case OP_CRPOSSTAR:
2576	ADD_ACTIVE(next_state_offset + `1`, `0`);
2577	if (isinclass)
2578	{
2579	if (*ecode == OP_CRPOSSTAR)
2580	{
2581	active_count--; / Remove non-match possibility /
2582	next_active_state--;
2583	}
2584	ADD_NEW(state_offset, `0`);
2585	}
2586	break;
2587
2588	case OP_CRPLUS:
2589	case OP_CRMINPLUS:
2590	case OP_CRPOSPLUS:
2591	count = current_state->count; / Already matched /
2592	if (count > `0`) { ADD_ACTIVE(next_state_offset + `1`, `0`); }
2593	if (isinclass)
2594	{
2595	if (count > `0` && *ecode == OP_CRPOSPLUS)
2596	{
2597	active_count--; / Remove non-match possibility /
2598	next_active_state--;
2599	}
2600	count++;
2601	ADD_NEW(state_offset, count);
2602	}
2603	break;
2604
2605	case OP_CRQUERY:
2606	case OP_CRMINQUERY:
2607	case OP_CRPOSQUERY:
2608	ADD_ACTIVE(next_state_offset + `1`, `0`);
2609	if (isinclass)
2610	{
2611	if (*ecode == OP_CRPOSQUERY)
2612	{
2613	active_count--; / Remove non-match possibility /
2614	next_active_state--;
2615	}
2616	ADD_NEW(next_state_offset + `1`, `0`);
2617	}
2618	break;
2619
2620	case OP_CRRANGE:
2621	case OP_CRMINRANGE:
2622	case OP_CRPOSRANGE:
2623	count = current_state->count; / Already matched /
2624	if (count >= (int)GET2(ecode, `1`))
2625	{ ADD_ACTIVE(next_state_offset + `1` + `2` * IMM2_SIZE, `0`); }
2626	if (isinclass)
2627	{
2628	int max = (int)GET2(ecode, `1` + IMM2_SIZE);
2629	if (ecode == OP_CRPOSRANGE && count >= (int*)GET2(ecode, `1`))
2630	{
2631	active_count--; / Remove non-match possibility /
2632	next_active_state--;
2633	}
2634	if (++count >= max && max != `0`) / Max 0 => no limit /
2635	{ ADD_NEW(next_state_offset + `1` + `2` * IMM2_SIZE, `0`); }
2636	else
2637	{ ADD_NEW(state_offset, count); }
2638	}
2639	break;
2640
2641	default:
2642	if (isinclass) { ADD_NEW(next_state_offset, `0`); }
2643	break;
2644	}
2645	}
2646	break;
2647
2648	/ ========================================================================== /
2649	/ These are the opcodes for fancy brackets of various kinds. We have*
2650	to use recursion in order to handle them. The "always failing" assertion
2651	(?!) is optimised to OP_FAIL when compiling, so we have to support that,
2652	though the other "backtracking verbs" are not supported. /*
2653
2654	case OP_FAIL:
2655	forced_fail++; / Count FAILs for multiple states /
2656	break;
2657
2658	case OP_ASSERT:
2659	case OP_ASSERT_NOT:
2660	case OP_ASSERTBACK:
2661	case OP_ASSERTBACK_NOT:
2662	{
2663	int rc;
2664	int local_offsets[`2`];
2665	int local_workspace[`1000`];
2666	const pcre_uchar *endasscode = code + GET(code, `1`);
2667
2668	while (*endasscode == OP_ALT) endasscode += GET(endasscode, `1`);
2669
2670	rc = internal_dfa_exec(
2671	md, / static match data /
2672	code, / this subexpression's code /
2673	ptr, / where we currently are /
2674	(int)(ptr - start_subject), / start offset /
2675	local_offsets, / offset vector /
2676	sizeof(local_offsets)/sizeof(int), / size of same /
2677	local_workspace, / workspace vector /
2678	sizeof(local_workspace)/sizeof(int), / size of same /
2679	rlevel); / function recursion level /
2680
2681	if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2682	if ((rc >= `0`) == (codevalue == OP_ASSERT \|\| codevalue == OP_ASSERTBACK))
2683	{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + `1` - start_code), `0`); }
2684	}
2685	break;
2686
2687	/-----------------------------------------------------------------/
2688	case OP_COND:
2689	case OP_SCOND:
2690	{
2691	int local_offsets[`1000`];
2692	int local_workspace[`1000`];
2693	int codelink = GET(code, `1`);
2694	int condcode;
2695
2696	/ Because of the way auto-callout works during compile, a callout item*
2697	is inserted between OP_COND and an assertion condition. This does not
2698	happen for the other conditions. /*
2699
2700	if (code[LINK_SIZE+`1`] == OP_CALLOUT)
2701	{
2702	rrc = `0`;
2703	if (PUBL(callout) != NULL)
2704	{
2705	PUBL(callout_block) cb;
2706	cb.version = `1`; / Version 1 of the callout block /
2707	cb.callout_number = code[LINK_SIZE+`2`];
2708	cb.offset_vector = offsets;
2709	#if defined COMPILE_PCRE8
2710	cb.subject = (PCRE_SPTR)start_subject;
2711	#elif defined COMPILE_PCRE16
2712	cb.subject = (PCRE_SPTR16)start_subject;
2713	#elif defined COMPILE_PCRE32
2714	cb.subject = (PCRE_SPTR32)start_subject;
2715	#endif
2716	cb.subject_length = (int)(end_subject - start_subject);
2717	cb.start_match = (int)(current_subject - start_subject);
2718	cb.current_position = (int)(ptr - start_subject);
2719	cb.pattern_position = GET(code, LINK_SIZE + `3`);
2720	cb.next_item_length = GET(code, `3` + `2`*LINK_SIZE);
2721	cb.capture_top = `1`;
2722	cb.capture_last = -`1`;
2723	cb.callout_data = md->callout_data;
2724	cb.mark = NULL; / No (MARK) support /*
2725	if ((rrc = (PUBL(callout))(&cb)) < `0`) return* rrc; / Abandon /
2726	}
2727	if (rrc > `0`) break; / Fail this thread /
2728	code += PRIV(OP_lengths)[OP_CALLOUT]; / Skip callout data /
2729	}
2730
2731	condcode = code[LINK_SIZE+`1`];
2732
2733	/ Back reference conditions and duplicate named recursion conditions*
2734	are not supported /*
2735
2736	if (condcode == OP_CREF \|\| condcode == OP_DNCREF \|\|
2737	condcode == OP_DNRREF)
2738	return PCRE_ERROR_DFA_UCOND;
2739
2740	/ The DEFINE condition is always false, and the assertion (?!) is*
2741	converted to OP_FAIL. /*
2742
2743	if (condcode == OP_DEF \|\| condcode == OP_FAIL)
2744	{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + `1`, `0`); }
2745
2746	/ The only supported version of OP_RREF is for the value RREF_ANY,*
2747	which means "test if in any recursion". We can't test for specifically
2748	recursed groups. /*
2749
2750	else if (condcode == OP_RREF)
2751	{
2752	int value = GET2(code, LINK_SIZE + `2`);
2753	if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2754	if (md->recursive != NULL)
2755	{ ADD_ACTIVE(state_offset + LINK_SIZE + `2` + IMM2_SIZE, `0`); }
2756	else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + `1`, `0`); }
2757	}
2758
2759	/ Otherwise, the condition is an assertion /
2760
2761	else
2762	{
2763	int rc;
2764	const pcre_uchar *asscode = code + LINK_SIZE + `1`;
2765	const pcre_uchar *endasscode = asscode + GET(asscode, `1`);
2766
2767	while (*endasscode == OP_ALT) endasscode += GET(endasscode, `1`);
2768
2769	rc = internal_dfa_exec(
2770	md, / fixed match data /
2771	asscode, / this subexpression's code /
2772	ptr, / where we currently are /
2773	(int)(ptr - start_subject), / start offset /
2774	local_offsets, / offset vector /
2775	sizeof(local_offsets)/sizeof(int), / size of same /
2776	local_workspace, / workspace vector /
2777	sizeof(local_workspace)/sizeof(int), / size of same /
2778	rlevel); / function recursion level /
2779
2780	if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2781	if ((rc >= `0`) ==
2782	(condcode == OP_ASSERT \|\| condcode == OP_ASSERTBACK))
2783	{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + `1` - start_code), `0`); }
2784	else
2785	{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + `1`, `0`); }
2786	}
2787	}
2788	break;
2789
2790	/-----------------------------------------------------------------/
2791	case OP_RECURSE:
2792	{
2793	dfa_recursion_info *ri;
2794	int local_offsets[`1000`];
2795	int local_workspace[`1000`];
2796	const pcre_uchar *callpat = start_code + GET(code, `1`);
2797	int recno = (callpat == md->start_code)? `0` :
2798	GET2(callpat, `1` + LINK_SIZE);
2799	int rc;
2800
2801	DPRINTF(("%.sStarting regex recursion\n", rlevel`2`-`2`, SP));
2802
2803	/ Check for repeating a recursion without advancing the subject*
2804	pointer. This should catch convoluted mutual recursions. (Some simple
2805	cases are caught at compile time.) /*
2806
2807	for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2808	if (recno == ri->group_num && ptr == ri->subject_position)
2809	return PCRE_ERROR_RECURSELOOP;
2810
2811	/ Remember this recursion and where we started it so as to*
2812	catch infinite loops. /*
2813
2814	new_recursive.group_num = recno;
2815	new_recursive.subject_position = ptr;
2816	new_recursive.prevrec = md->recursive;
2817	md->recursive = &new_recursive;
2818
2819	rc = internal_dfa_exec(
2820	md, / fixed match data /
2821	callpat, / this subexpression's code /
2822	ptr, / where we currently are /
2823	(int)(ptr - start_subject), / start offset /
2824	local_offsets, / offset vector /
2825	sizeof(local_offsets)/sizeof(int), / size of same /
2826	local_workspace, / workspace vector /
2827	sizeof(local_workspace)/sizeof(int), / size of same /
2828	rlevel); / function recursion level /
2829
2830	md->recursive = new_recursive.prevrec; / Done this recursion /
2831
2832	DPRINTF(("%.sReturn from regex recursion: rc=%d\n", rlevel`2`-`2`, SP,
2833	rc));
2834
2835	/ Ran out of internal offsets /
2836
2837	if (rc == `0`) return PCRE_ERROR_DFA_RECURSE;
2838
2839	/ For each successful matched substring, set up the next state with a*
2840	count of characters to skip before trying it. Note that the count is in
2841	characters, not bytes. /*
2842
2843	if (rc > `0`)
2844	{
2845	for (rc = rc*`2` - `2`; rc >= `0`; rc -= `2`)
2846	{
2847	int charcount = local_offsets[rc+`1`] - local_offsets[rc];
2848	#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2849	if (utf)
2850	{
2851	const pcre_uchar *p = start_subject + local_offsets[rc];
2852	const pcre_uchar *pp = start_subject + local_offsets[rc+`1`];
2853	while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2854	}
2855	#endif
2856	if (charcount > `0`)
2857	{
2858	ADD_NEW_DATA(-(state_offset + LINK_SIZE + `1`), `0`, (charcount - `1`));
2859	}
2860	else
2861	{
2862	ADD_ACTIVE(state_offset + LINK_SIZE + `1`, `0`);
2863	}
2864	}
2865	}
2866	else if (rc != PCRE_ERROR_NOMATCH) return rc;
2867	}
2868	break;
2869
2870	/-----------------------------------------------------------------/
2871	case OP_BRAPOS:
2872	case OP_SBRAPOS:
2873	case OP_CBRAPOS:
2874	case OP_SCBRAPOS:
2875	case OP_BRAPOSZERO:
2876	{
2877	int charcount, matched_count;
2878	const pcre_uchar *local_ptr = ptr;
2879	BOOL allow_zero;
2880
2881	if (codevalue == OP_BRAPOSZERO)
2882	{
2883	allow_zero = TRUE;
2884	codevalue = (++code); /* Codevalue will be one of above BRAs /
2885	}
2886	else allow_zero = FALSE;
2887
2888	/ Loop to match the subpattern as many times as possible as if it were*
2889	a complete pattern. /*
2890
2891	for (matched_count = `0`;; matched_count++)
2892	{
2893	int local_offsets[`2`];
2894	int local_workspace[`1000`];
2895
2896	int rc = internal_dfa_exec(
2897	md, / fixed match data /
2898	code, / this subexpression's code /
2899	local_ptr, / where we currently are /
2900	(int)(ptr - start_subject), / start offset /
2901	local_offsets, / offset vector /
2902	sizeof(local_offsets)/sizeof(int), / size of same /
2903	local_workspace, / workspace vector /
2904	sizeof(local_workspace)/sizeof(int), / size of same /
2905	rlevel); / function recursion level /
2906
2907	/ Failed to match /
2908
2909	if (rc < `0`)
2910	{
2911	if (rc != PCRE_ERROR_NOMATCH) return rc;
2912	break;
2913	}
2914
2915	/ Matched: break the loop if zero characters matched. /
2916
2917	charcount = local_offsets[`1`] - local_offsets[`0`];
2918	if (charcount == `0`) break;
2919	local_ptr += charcount; / Advance temporary position ptr /
2920	}
2921
2922	/ At this point we have matched the subpattern matched_count*
2923	times, and local_ptr is pointing to the character after the end of the
2924	last match. /*
2925
2926	if (matched_count > `0` \|\| allow_zero)
2927	{
2928	const pcre_uchar *end_subpattern = code;
2929	int next_state_offset;
2930
2931	do { end_subpattern += GET(end_subpattern, `1`); }
2932	while (*end_subpattern == OP_ALT);
2933	next_state_offset =
2934	(int)(end_subpattern - start_code + LINK_SIZE + `1`);
2935
2936	/ Optimization: if there are no more active states, and there*
2937	are no new states yet set up, then skip over the subject string
2938	right here, to save looping. Otherwise, set up the new state to swing
2939	into action when the end of the matched substring is reached. /*
2940
2941	if (i + `1` >= active_count && new_count == `0`)
2942	{
2943	ptr = local_ptr;
2944	clen = `0`;
2945	ADD_NEW(next_state_offset, `0`);
2946	}
2947	else
2948	{
2949	const pcre_uchar *p = ptr;
2950	const pcre_uchar *pp = local_ptr;
2951	charcount = (int)(pp - p);
2952	#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2953	if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2954	#endif
2955	ADD_NEW_DATA(-next_state_offset, `0`, (charcount - `1`));
2956	}
2957	}
2958	}
2959	break;
2960
2961	/-----------------------------------------------------------------/
2962	case OP_ONCE:
2963	case OP_ONCE_NC:
2964	{
2965	int local_offsets[`2`];
2966	int local_workspace[`1000`];
2967
2968	int rc = internal_dfa_exec(
2969	md, / fixed match data /
2970	code, / this subexpression's code /
2971	ptr, / where we currently are /
2972	(int)(ptr - start_subject), / start offset /
2973	local_offsets, / offset vector /
2974	sizeof(local_offsets)/sizeof(int), / size of same /
2975	local_workspace, / workspace vector /
2976	sizeof(local_workspace)/sizeof(int), / size of same /
2977	rlevel); / function recursion level /
2978
2979	if (rc >= `0`)
2980	{
2981	const pcre_uchar *end_subpattern = code;
2982	int charcount = local_offsets[`1`] - local_offsets[`0`];
2983	int next_state_offset, repeat_state_offset;
2984
2985	do { end_subpattern += GET(end_subpattern, `1`); }
2986	while (*end_subpattern == OP_ALT);
2987	next_state_offset =
2988	(int)(end_subpattern - start_code + LINK_SIZE + `1`);
2989
2990	/ If the end of this subpattern is KETRMAX or KETRMIN, we must*
2991	arrange for the repeat state also to be added to the relevant list.
2992	Calculate the offset, or set -1 for no repeat. /*
2993
2994	repeat_state_offset = (*end_subpattern == OP_KETRMAX \|\|
2995	*end_subpattern == OP_KETRMIN)?
2996	(int)(end_subpattern - start_code - GET(end_subpattern, `1`)) : -`1`;
2997
2998	/ If we have matched an empty string, add the next state at the*
2999	current character pointer. This is important so that the duplicate
3000	checking kicks in, which is what breaks infinite loops that match an
3001	empty string. /*
3002
3003	if (charcount == `0`)
3004	{
3005	ADD_ACTIVE(next_state_offset, `0`);
3006	}
3007
3008	/ Optimization: if there are no more active states, and there*
3009	are no new states yet set up, then skip over the subject string
3010	right here, to save looping. Otherwise, set up the new state to swing
3011	into action when the end of the matched substring is reached. /*
3012
3013	else if (i + `1` >= active_count && new_count == `0`)
3014	{
3015	ptr += charcount;
3016	clen = `0`;
3017	ADD_NEW(next_state_offset, `0`);
3018
3019	/ If we are adding a repeat state at the new character position,*
3020	we must fudge things so that it is the only current state.
3021	Otherwise, it might be a duplicate of one we processed before, and
3022	that would cause it to be skipped. /*
3023
3024	if (repeat_state_offset >= `0`)
3025	{
3026	next_active_state = active_states;
3027	active_count = `0`;
3028	i = -`1`;
3029	ADD_ACTIVE(repeat_state_offset, `0`);
3030	}
3031	}
3032	else
3033	{
3034	#if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3035	if (utf)
3036	{
3037	const pcre_uchar *p = start_subject + local_offsets[`0`];
3038	const pcre_uchar *pp = start_subject + local_offsets[`1`];
3039	while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
3040	}
3041	#endif
3042	ADD_NEW_DATA(-next_state_offset, `0`, (charcount - `1`));
3043	if (repeat_state_offset >= `0`)
3044	{ ADD_NEW_DATA(-repeat_state_offset, `0`, (charcount - `1`)); }
3045	}
3046	}
3047	else if (rc != PCRE_ERROR_NOMATCH) return rc;
3048	}
3049	break;
3050
3051
3052	/ ========================================================================== /
3053	/ Handle callouts /
3054
3055	case OP_CALLOUT:
3056	rrc = `0`;
3057	if (PUBL(callout) != NULL)
3058	{
3059	PUBL(callout_block) cb;
3060	cb.version = `1`; / Version 1 of the callout block /
3061	cb.callout_number = code[`1`];
3062	cb.offset_vector = offsets;
3063	#if defined COMPILE_PCRE8
3064	cb.subject = (PCRE_SPTR)start_subject;
3065	#elif defined COMPILE_PCRE16
3066	cb.subject = (PCRE_SPTR16)start_subject;
3067	#elif defined COMPILE_PCRE32
3068	cb.subject = (PCRE_SPTR32)start_subject;
3069	#endif
3070	cb.subject_length = (int)(end_subject - start_subject);
3071	cb.start_match = (int)(current_subject - start_subject);
3072	cb.current_position = (int)(ptr - start_subject);
3073	cb.pattern_position = GET(code, `2`);
3074	cb.next_item_length = GET(code, `2` + LINK_SIZE);
3075	cb.capture_top = `1`;
3076	cb.capture_last = -`1`;
3077	cb.callout_data = md->callout_data;
3078	cb.mark = NULL; / No (MARK) support /*
3079	if ((rrc = (PUBL(callout))(&cb)) < `0`) return* rrc; / Abandon /
3080	}
3081	if (rrc == `0`)
3082	{ ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], `0`); }
3083	break;
3084
3085
3086	/ ========================================================================== /
3087	default: / Unsupported opcode /
3088	return PCRE_ERROR_DFA_UITEM;
3089	}
3090
3091	NEXT_ACTIVE_STATE: continue;
3092
3093	} / End of loop scanning active states /
3094
3095	/ We have finished the processing at the current subject character. If no*
3096	new states have been set for the next character, we have found all the
3097	matches that we are going to find. If we are at the top level and partial
3098	matching has been requested, check for appropriate conditions.
3099
3100	The "forced_ fail" variable counts the number of (F) encountered for the*
3101	character. If it is equal to the original active_count (saved in
3102	workspace[1]) it means that (F) was found on every active state. In this*
3103	case we don't want to give a partial match.
3104
3105	The "could_continue" variable is true if a state could have continued but
3106	for the fact that the end of the subject was reached. /*
3107
3108	if (new_count <= `0`)
3109	{
3110	if (rlevel == `1` && / Top level, and /
3111	could_continue && / Some could go on, and /
3112	forced_fail != workspace[`1`] && / Not all forced fail & /
3113	( / either... /
3114	(md->moptions & PCRE_PARTIAL_HARD) != `0` / Hard partial /
3115	\|\| / or... /
3116	((md->moptions & PCRE_PARTIAL_SOFT) != `0` && / Soft partial and /
3117	match_count < `0`) / no matches /
3118	) && / And... /
3119	(
3120	partial_newline \|\| / Either partial NL /
3121	( / or ... /
3122	ptr >= end_subject && / End of subject and /
3123	ptr > md->start_used_ptr) / Inspected non-empty string /
3124	)
3125	)
3126	match_count = PCRE_ERROR_PARTIAL;
3127	DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
3128	"%.s---------------------\n\n", rlevel`2`-`2`, SP, rlevel, match_count,
3129	rlevel*`2`-`2`, SP));
3130	break; / In effect, "return", but see the comment below /
3131	}
3132
3133	/ One or more states are active for the next character. /
3134
3135	ptr += clen; / Advance to next subject character /
3136	} / Loop to move along the subject string /
3137
3138	/ Control gets here from "break" a few lines above. We do it this way because*
3139	if we use "return" above, we have compiler trouble. Some compilers warn if
3140	there's nothing here because they think the function doesn't return a value. On
3141	the other hand, if we put a dummy statement here, some more clever compilers
3142	complain that it can't be reached. Sigh. /*
3143
3144	return match_count;
3145	}
3146
3147
3148
3149
3150	/*************************************************
3151	* Execute a Regular Expression - DFA engine *
3152	*************************************************/
3153
3154	/ This external function applies a compiled re to a subject string using a DFA*
3155	engine. This function calls the internal function multiple times if the pattern
3156	is not anchored.
3157
3158	Arguments:
3159	argument_re points to the compiled expression
3160	extra_data points to extra data or is NULL
3161	subject points to the subject string
3162	length length of subject string (may contain binary zeros)
3163	start_offset where to start in the subject string
3164	options option bits
3165	offsets vector of match offsets
3166	offsetcount size of same
3167	workspace workspace vector
3168	wscount size of same
3169
3170	Returns: > 0 => number of match offset pairs placed in offsets
3171	= 0 => offsets overflowed; longest matches are present
3172	-1 => failed to match
3173	< -1 => some kind of unexpected problem
3174	*/
3175
3176	#if defined COMPILE_PCRE8
3177	PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3178	pcre_dfa_exec(const pcre argument_re, const* pcre_extra *extra_data,
3179	const char subject, int* length, int start_offset, int options, int *offsets,
3180	int offsetcount, int workspace, int* wscount)
3181	#elif defined COMPILE_PCRE16
3182	PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3183	pcre16_dfa_exec(const pcre16 argument_re, const* pcre16_extra *extra_data,
3184	PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3185	int offsetcount, int workspace, int* wscount)
3186	#elif defined COMPILE_PCRE32
3187	PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3188	pcre32_dfa_exec(const pcre32 argument_re, const* pcre32_extra *extra_data,
3189	PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
3190	int offsetcount, int workspace, int* wscount)
3191	#endif
3192	{
3193	REAL_PCRE re = (REAL_PCRE )argument_re;
3194	dfa_match_data match_block;
3195	dfa_match_data *md = &match_block;
3196	BOOL utf, anchored, startline, firstline;
3197	const pcre_uchar current_subject, end_subject;
3198	const pcre_study_data *study = NULL;
3199
3200	const pcre_uchar *req_char_ptr;
3201	const pcre_uint8 *start_bits = NULL;
3202	BOOL has_first_char = FALSE;
3203	BOOL has_req_char = FALSE;
3204	pcre_uchar first_char = `0`;
3205	pcre_uchar first_char2 = `0`;
3206	pcre_uchar req_char = `0`;
3207	pcre_uchar req_char2 = `0`;
3208	int newline;
3209
3210	/ Plausibility checks /
3211
3212	if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != `0`) return PCRE_ERROR_BADOPTION;
3213	if (re == NULL \|\| subject == NULL \|\| workspace == NULL \|\|
3214	(offsets == NULL && offsetcount > `0`)) return PCRE_ERROR_NULL;
3215	if (offsetcount < `0`) return PCRE_ERROR_BADCOUNT;
3216	if (wscount < `20`) return PCRE_ERROR_DFA_WSSIZE;
3217	if (length < `0`) return PCRE_ERROR_BADLENGTH;
3218	if (start_offset < `0` \|\| start_offset > length) return PCRE_ERROR_BADOFFSET;
3219
3220	/ Check that the first field in the block is the magic number. If it is not,*
3221	return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3222	REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3223	means that the pattern is likely compiled with different endianness. /*
3224
3225	if (re->magic_number != MAGIC_NUMBER)
3226	return re->magic_number == REVERSED_MAGIC_NUMBER?
3227	PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3228	if ((re->flags & PCRE_MODE) == `0`) return PCRE_ERROR_BADMODE;
3229
3230	/ If restarting after a partial match, do some sanity checks on the contents*
3231	of the workspace. /*
3232
3233	if ((options & PCRE_DFA_RESTART) != `0`)
3234	{
3235	if ((workspace[`0`] & (-`2`)) != `0` \|\| workspace[`1`] < `1` \|\|
3236	workspace[`1`] > (wscount - `2`)/INTS_PER_STATEBLOCK)
3237	return PCRE_ERROR_DFA_BADRESTART;
3238	}
3239
3240	/ Set up study, callout, and table data /
3241
3242	md->tables = re->tables;
3243	md->callout_data = NULL;
3244
3245	if (extra_data != NULL)
3246	{
3247	unsigned long int flags = extra_data->flags;
3248	if ((flags & PCRE_EXTRA_STUDY_DATA) != `0`)
3249	study = (const pcre_study_data *)extra_data->study_data;
3250	if ((flags & PCRE_EXTRA_MATCH_LIMIT) != `0`) return PCRE_ERROR_DFA_UMLIMIT;
3251	if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != `0`)
3252	return PCRE_ERROR_DFA_UMLIMIT;
3253	if ((flags & PCRE_EXTRA_CALLOUT_DATA) != `0`)
3254	md->callout_data = extra_data->callout_data;
3255	if ((flags & PCRE_EXTRA_TABLES) != `0`)
3256	md->tables = extra_data->tables;
3257	}
3258
3259	/ Set some local values /
3260
3261	current_subject = (const pcre_uchar *)subject + start_offset;
3262	end_subject = (const pcre_uchar *)subject + length;
3263	req_char_ptr = current_subject - `1`;
3264
3265	#ifdef SUPPORT_UTF
3266	/ PCRE_UTF(16\|32) have the same value as PCRE_UTF8. /
3267	utf = (re->options & PCRE_UTF8) != `0`;
3268	#else
3269	utf = FALSE;
3270	#endif
3271
3272	anchored = (options & (PCRE_ANCHORED\|PCRE_DFA_RESTART)) != `0` \|\|
3273	(re->options & PCRE_ANCHORED) != `0`;
3274
3275	/ The remaining fixed data for passing around. /
3276
3277	md->start_code = (const pcre_uchar *)argument_re +
3278	re->name_table_offset + re->name_count * re->name_entry_size;
3279	md->start_subject = (const pcre_uchar *)subject;
3280	md->end_subject = end_subject;
3281	md->start_offset = start_offset;
3282	md->moptions = options;
3283	md->poptions = re->options;
3284
3285	/ If the BSR option is not set at match time, copy what was set*
3286	at compile time. /*
3287
3288	if ((md->moptions & (PCRE_BSR_ANYCRLF\|PCRE_BSR_UNICODE)) == `0`)
3289	{
3290	if ((re->options & (PCRE_BSR_ANYCRLF\|PCRE_BSR_UNICODE)) != `0`)
3291	md->moptions \|= re->options & (PCRE_BSR_ANYCRLF\|PCRE_BSR_UNICODE);
3292	#ifdef BSR_ANYCRLF
3293	else md->moptions \|= PCRE_BSR_ANYCRLF;
3294	#endif
3295	}
3296
3297	/ Handle different types of newline. The three bits give eight cases. If*
3298	nothing is set at run time, whatever was used at compile time applies. /*
3299
3300	switch ((((options & PCRE_NEWLINE_BITS) == `0`)? re->options : (pcre_uint32)options) &
3301	PCRE_NEWLINE_BITS)
3302	{
3303	case `0`: newline = NEWLINE; break; / Compile-time default /
3304	case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
3305	case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
3306	case PCRE_NEWLINE_CR+
3307	PCRE_NEWLINE_LF: newline = (CHAR_CR << `8`) \| CHAR_NL; break;
3308	case PCRE_NEWLINE_ANY: newline = -`1`; break;
3309	case PCRE_NEWLINE_ANYCRLF: newline = -`2`; break;
3310	default: return PCRE_ERROR_BADNEWLINE;
3311	}
3312
3313	if (newline == -`2`)
3314	{
3315	md->nltype = NLTYPE_ANYCRLF;
3316	}
3317	else if (newline < `0`)
3318	{
3319	md->nltype = NLTYPE_ANY;
3320	}
3321	else
3322	{
3323	md->nltype = NLTYPE_FIXED;
3324	if (newline > `255`)
3325	{
3326	md->nllen = `2`;
3327	md->nl[`0`] = (newline >> `8`) & `255`;
3328	md->nl[`1`] = newline & `255`;
3329	}
3330	else
3331	{
3332	md->nllen = `1`;
3333	md->nl[`0`] = newline;
3334	}
3335	}
3336
3337	/ Check a UTF-8 string if required. Unfortunately there's no way of passing*
3338	back the character offset. /*
3339
3340	#ifdef SUPPORT_UTF
3341	if (utf && (options & PCRE_NO_UTF8_CHECK) == `0`)
3342	{
3343	int erroroffset;
3344	int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3345	if (errorcode != `0`)
3346	{
3347	if (offsetcount >= `2`)
3348	{
3349	offsets[`0`] = erroroffset;
3350	offsets[`1`] = errorcode;
3351	}
3352	#if defined COMPILE_PCRE8
3353	return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != `0`) ?
3354	PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3355	#elif defined COMPILE_PCRE16
3356	return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != `0`) ?
3357	PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
3358	#elif defined COMPILE_PCRE32
3359	return PCRE_ERROR_BADUTF32;
3360	#endif
3361	}
3362	#if defined COMPILE_PCRE8 \|\| defined COMPILE_PCRE16
3363	if (start_offset > `0` && start_offset < length &&
3364	NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3365	return PCRE_ERROR_BADUTF8_OFFSET;
3366	#endif
3367	}
3368	#endif
3369
3370	/ If the exec call supplied NULL for tables, use the inbuilt ones. This*
3371	is a feature that makes it possible to save compiled regex and re-use them
3372	in other programs later. /*
3373
3374	if (md->tables == NULL) md->tables = PRIV(default_tables);
3375
3376	/ The "must be at the start of a line" flags are used in a loop when finding*
3377	where to start. /*
3378
3379	startline = (re->flags & PCRE_STARTLINE) != `0`;
3380	firstline = (re->options & PCRE_FIRSTLINE) != `0`;
3381
3382	/ Set up the first character to match, if available. The first_byte value is*
3383	never set for an anchored regular expression, but the anchoring may be forced
3384	at run time, so we have to test for anchoring. The first char may be unset for
3385	an unanchored pattern, of course. If there's no first char and the pattern was
3386	studied, there may be a bitmap of possible first characters. /*
3387
3388	if (!anchored)
3389	{
3390	if ((re->flags & PCRE_FIRSTSET) != `0`)
3391	{
3392	has_first_char = TRUE;
3393	first_char = first_char2 = (pcre_uchar)(re->first_char);
3394	if ((re->flags & PCRE_FCH_CASELESS) != `0`)
3395	{
3396	first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3397	#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3398	if (utf && first_char > `127`)
3399	first_char2 = UCD_OTHERCASE(first_char);
3400	#endif
3401	}
3402	}
3403	else
3404	{
3405	if (!startline && study != NULL &&
3406	(study->flags & PCRE_STUDY_MAPPED) != `0`)
3407	start_bits = study->start_bits;
3408	}
3409	}
3410
3411	/ For anchored or unanchored matches, there may be a "last known required*
3412	character" set. /*
3413
3414	if ((re->flags & PCRE_REQCHSET) != `0`)
3415	{
3416	has_req_char = TRUE;
3417	req_char = req_char2 = (pcre_uchar)(re->req_char);
3418	if ((re->flags & PCRE_RCH_CASELESS) != `0`)
3419	{
3420	req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3421	#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3422	if (utf && req_char > `127`)
3423	req_char2 = UCD_OTHERCASE(req_char);
3424	#endif
3425	}
3426	}
3427
3428	/ Call the main matching function, looping for a non-anchored regex after a*
3429	failed match. If not restarting, perform certain optimizations at the start of
3430	a match. /*
3431
3432	for (;;)
3433	{
3434	int rc;
3435
3436	if ((options & PCRE_DFA_RESTART) == `0`)
3437	{
3438	const pcre_uchar *save_end_subject = end_subject;
3439
3440	/ If firstline is TRUE, the start of the match is constrained to the first*
3441	line of a multiline string. Implement this by temporarily adjusting
3442	end_subject so that we stop scanning at a newline. If the match fails at
3443	the newline, later code breaks this loop. /*
3444
3445	if (firstline)
3446	{
3447	PCRE_PUCHAR t = current_subject;
3448	#ifdef SUPPORT_UTF
3449	if (utf)
3450	{
3451	while (t < md->end_subject && !IS_NEWLINE(t))
3452	{
3453	t++;
3454	ACROSSCHAR(t < end_subject, *t, t++);
3455	}
3456	}
3457	else
3458	#endif
3459	while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3460	end_subject = t;
3461	}
3462
3463	/ There are some optimizations that avoid running the match if a known*
3464	starting point is not found. However, there is an option that disables
3465	these, for testing and for ensuring that all callouts do actually occur.
3466	The option can be set in the regex by (NO_START_OPT) or passed in*
3467	match-time options. /*
3468
3469	if (((options \| re->options) & PCRE_NO_START_OPTIMIZE) == `0`)
3470	{
3471	/ Advance to a known first pcre_uchar (i.e. data item) /
3472
3473	if (has_first_char)
3474	{
3475	if (first_char != first_char2)
3476	{
3477	pcre_uchar csc;
3478	while (current_subject < end_subject &&
3479	(csc = UCHAR21TEST(current_subject)) != first_char && csc != first_char2)
3480	current_subject++;
3481	}
3482	else
3483	while (current_subject < end_subject &&
3484	UCHAR21TEST(current_subject) != first_char)
3485	current_subject++;
3486	}
3487
3488	/ Or to just after a linebreak for a multiline match if possible /
3489
3490	else if (startline)
3491	{
3492	if (current_subject > md->start_subject + start_offset)
3493	{
3494	#ifdef SUPPORT_UTF
3495	if (utf)
3496	{
3497	while (current_subject < end_subject &&
3498	!WAS_NEWLINE(current_subject))
3499	{
3500	current_subject++;
3501	ACROSSCHAR(current_subject < end_subject, *current_subject,
3502	current_subject++);
3503	}
3504	}
3505	else
3506	#endif
3507	while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
3508	current_subject++;
3509
3510	/ If we have just passed a CR and the newline option is ANY or*
3511	ANYCRLF, and we are now at a LF, advance the match position by one
3512	more character. /*
3513
3514	if (UCHAR21TEST(current_subject - `1`) == CHAR_CR &&
3515	(md->nltype == NLTYPE_ANY \|\| md->nltype == NLTYPE_ANYCRLF) &&
3516	current_subject < end_subject &&
3517	UCHAR21TEST(current_subject) == CHAR_NL)
3518	current_subject++;
3519	}
3520	}
3521
3522	/ Advance to a non-unique first pcre_uchar after study /
3523
3524	else if (start_bits != NULL)
3525	{
3526	while (current_subject < end_subject)
3527	{
3528	register pcre_uint32 c = UCHAR21TEST(current_subject);
3529	#ifndef COMPILE_PCRE8
3530	if (c > `255`) c = `255`;
3531	#endif
3532	if ((start_bits[c/`8`] & (`1` << (c&`7`))) != `0`) break;
3533	current_subject++;
3534	}
3535	}
3536	}
3537
3538	/ Restore fudged end_subject /
3539
3540	end_subject = save_end_subject;
3541
3542	/ The following two optimizations are disabled for partial matching or if*
3543	disabling is explicitly requested (and of course, by the test above, this
3544	code is not obeyed when restarting after a partial match). /*
3545
3546	if (((options \| re->options) & PCRE_NO_START_OPTIMIZE) == `0` &&
3547	(options & (PCRE_PARTIAL_HARD\|PCRE_PARTIAL_SOFT)) == `0`)
3548	{
3549	/ If the pattern was studied, a minimum subject length may be set. This*
3550	is a lower bound; no actual string of that length may actually match the
3551	pattern. Although the value is, strictly, in characters, we treat it as
3552	in pcre_uchar units to avoid spending too much time in this optimization.
3553	*/
3554
3555	if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != `0` &&
3556	(pcre_uint32)(end_subject - current_subject) < study->minlength)
3557	return PCRE_ERROR_NOMATCH;
3558
3559	/ If req_char is set, we know that that pcre_uchar must appear in the*
3560	subject for the match to succeed. If the first pcre_uchar is set,
3561	req_char must be later in the subject; otherwise the test starts at the
3562	match point. This optimization can save a huge amount of work in patterns
3563	with nested unlimited repeats that aren't going to match. Writing
3564	separate code for cased/caseless versions makes it go faster, as does
3565	using an autoincrement and backing off on a match.
3566
3567	HOWEVER: when the subject string is very, very long, searching to its end
3568	can take a long time, and give bad performance on quite ordinary
3569	patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3570	string... so we don't do this when the string is sufficiently long. /*
3571
3572	if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3573	{
3574	register PCRE_PUCHAR p = current_subject + (has_first_char? `1`:`0`);
3575
3576	/ We don't need to repeat the search if we haven't yet reached the*
3577	place we found it at last time. /*
3578
3579	if (p > req_char_ptr)
3580	{
3581	if (req_char != req_char2)
3582	{
3583	while (p < end_subject)
3584	{
3585	register pcre_uint32 pp = UCHAR21INCTEST(p);
3586	if (pp == req_char \|\| pp == req_char2) { p--; break; }
3587	}
3588	}
3589	else
3590	{
3591	while (p < end_subject)
3592	{
3593	if (UCHAR21INCTEST(p) == req_char) { p--; break; }
3594	}
3595	}
3596
3597	/ If we can't find the required pcre_uchar, break the matching loop,*
3598	which will cause a return or PCRE_ERROR_NOMATCH. /*
3599
3600	if (p >= end_subject) break;
3601
3602	/ If we have found the required pcre_uchar, save the point where we*
3603	found it, so that we don't search again next time round the loop if
3604	the start hasn't passed this point yet. /*
3605
3606	req_char_ptr = p;
3607	}
3608	}
3609	}
3610	} / End of optimizations that are done when not restarting /
3611
3612	/ OK, now we can do the business /
3613
3614	md->start_used_ptr = current_subject;
3615	md->recursive = NULL;
3616
3617	rc = internal_dfa_exec(
3618	md, / fixed match data /
3619	md->start_code, / this subexpression's code /
3620	current_subject, / where we currently are /
3621	start_offset, / start offset in subject /
3622	offsets, / offset vector /
3623	offsetcount, / size of same /
3624	workspace, / workspace vector /
3625	wscount, / size of same /
3626	`0`); / function recurse level /
3627
3628	/ Anything other than "no match" means we are done, always; otherwise, carry*
3629	on only if not anchored. /*
3630
3631	if (rc != PCRE_ERROR_NOMATCH \|\| anchored)
3632	{
3633	if (rc == PCRE_ERROR_PARTIAL && offsetcount >= `2`)
3634	{
3635	offsets[`0`] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
3636	offsets[`1`] = (int)(end_subject - (PCRE_PUCHAR)subject);
3637	if (offsetcount > `2`)
3638	offsets[`2`] = (int)(current_subject - (PCRE_PUCHAR)subject);
3639	}
3640	return rc;
3641	}
3642
3643	/ Advance to the next subject character unless we are at the end of a line*
3644	and firstline is set. /*
3645
3646	if (firstline && IS_NEWLINE(current_subject)) break;
3647	current_subject++;
3648	#ifdef SUPPORT_UTF
3649	if (utf)
3650	{
3651	ACROSSCHAR(current_subject < end_subject, *current_subject,
3652	current_subject++);
3653	}
3654	#endif
3655	if (current_subject > end_subject) break;
3656
3657	/ If we have just passed a CR and we are now at a LF, and the pattern does*
3658	not contain any explicit matches for \r or \n, and the newline option is CRLF
3659	or ANY or ANYCRLF, advance the match position by one more character. /*
3660
3661	if (UCHAR21TEST(current_subject - `1`) == CHAR_CR &&
3662	current_subject < end_subject &&
3663	UCHAR21TEST(current_subject) == CHAR_NL &&
3664	(re->flags & PCRE_HASCRORLF) == `0` &&
3665	(md->nltype == NLTYPE_ANY \|\|
3666	md->nltype == NLTYPE_ANYCRLF \|\|
3667	md->nllen == `2`))
3668	current_subject++;
3669
3670	} / "Bumpalong" loop /
3671
3672	return PCRE_ERROR_NOMATCH;
3673	}
3674
3675	/ End of pcre_dfa_exec.c /
3676

Browse the source code of POCO/Foundation/src/pcre_dfa_exec.c