pcre_exec.c source code [ClickHouse/contrib/poco/Foundation/src/pcre_exec.c]

1	/*************************************************
2	* Perl-Compatible Regular Expressions *
3	*************************************************/
4
5	/ PCRE is a library of functions to support regular expressions whose syntax*
6	and semantics are as close as possible to those of the Perl 5 language.
7
8	Written by Philip Hazel
9	Copyright (c) 1997-2014 University of Cambridge
10
11	-----------------------------------------------------------------------------
12	Redistribution and use in source and binary forms, with or without
13	modification, are permitted provided that the following conditions are met:
14
15	* Redistributions of source code must retain the above copyright notice,
16	this list of conditions and the following disclaimer.
17
18	* Redistributions in binary form must reproduce the above copyright
19	notice, this list of conditions and the following disclaimer in the
20	documentation and/or other materials provided with the distribution.
21
22	* Neither the name of the University of Cambridge nor the names of its
23	contributors may be used to endorse or promote products derived from
24	this software without specific prior written permission.
25
26	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30	LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34	CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36	POSSIBILITY OF SUCH DAMAGE.
37	-----------------------------------------------------------------------------
38	*/
39
40	/ This module contains pcre_exec(), the externally visible function that does*
41	pattern matching using an NFA algorithm, trying to mimic Perl as closely as
42	possible. There are also some static supporting functions. /*
43
44	#pragma warning( disable : 4127) // conditional expression is constant
45	#pragma warning( disable : 4244) // conversion from 'int' to 'unsigned short', possible loss of data
46
47	#include "pcre_config.h"
48
49	#define NLBLOCK md /* Block containing newline information */
50	#define PSSTART start_subject /* Field containing processed string start */
51	#define PSEND end_subject /* Field containing processed string end */
52
53	#include "pcre_internal.h"
54
55	/ Undefine some potentially clashing cpp symbols /
56
57	#undef min
58	#undef max
59
60	/ The md->capture_last field uses the lower 16 bits for the last captured*
61	substring (which can never be greater than 65535) and a bit in the top half
62	to mean "capture vector overflowed". This odd way of doing things was
63	implemented when it was realized that preserving and restoring the overflow bit
64	whenever the last capture number was saved/restored made for a neater
65	interface, and doing it this way saved on (a) another variable, which would
66	have increased the stack frame size (a big NO-NO in PCRE) and (b) another
67	separate set of save/restore instructions. The following defines are used in
68	implementing this. /*
69
70	#define CAPLMASK 0x0000ffff /* The bits used for last_capture */
71	#define OVFLMASK 0xffff0000 /* The bits used for the overflow flag */
72	#define OVFLBIT 0x00010000 /* The bit that is set for overflow */
73
74	/ Values for setting in md->match_function_type to indicate two special types*
75	of call to match(). We do it this way to save on using another stack variable,
76	as stack usage is to be discouraged. /*
77
78	#define MATCH_CONDASSERT 1 /* Called to check a condition assertion */
79	#define MATCH_CBEGROUP 2 /* Could-be-empty unlimited repeat group */
80
81	/ Non-error returns from the match() function. Error returns are externally*
82	defined PCRE_ERROR_xxx codes, which are all negative. /*
83
84	#define MATCH_MATCH 1
85	#define MATCH_NOMATCH 0
86
87	/ Special internal returns from the match() function. Make them sufficiently*
88	negative to avoid the external error codes. /*
89
90	#define MATCH_ACCEPT (-999)
91	#define MATCH_KETRPOS (-998)
92	#define MATCH_ONCE (-997)
93	/ The next 5 must be kept together and in sequence so that a test that checks*
94	for any one of them can use a range. /*
95	#define MATCH_COMMIT (-996)
96	#define MATCH_PRUNE (-995)
97	#define MATCH_SKIP (-994)
98	#define MATCH_SKIP_ARG (-993)
99	#define MATCH_THEN (-992)
100	#define MATCH_BACKTRACK_MAX MATCH_THEN
101	#define MATCH_BACKTRACK_MIN MATCH_COMMIT
102
103	/ Maximum number of ints of offset to save on the stack for recursive calls.*
104	If the offset vector is bigger, malloc is used. This should be a multiple of 3,
105	because the offset vector is always a multiple of 3 long. /*
106
107	#define REC_STACK_SAVE_MAX 30
108
109	/ Min and max values for the common repeats; for the maxima, 0 => infinity /
110
111	static const char rep_min[] = { `0`, `0`, `1`, `1`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, };
112	static const char rep_max[] = { `0`, `0`, `0`, `0`, `1`, `1`, `0`, `0`, `0`, `0`, `1`, };
113
114	#ifdef PCRE_DEBUG
115	/*************************************************
116	* Debugging function to print chars *
117	*************************************************/
118
119	/ Print a sequence of chars in printable format, stopping at the end of the*
120	subject if the requested.
121
122	Arguments:
123	p points to characters
124	length number to print
125	is_subject TRUE if printing from within md->start_subject
126	md pointer to matching data block, if is_subject is TRUE
127
128	Returns: nothing
129	*/
130
131	static void
132	pchars(const pcre_uchar p, int* length, BOOL is_subject, match_data *md)
133	{
134	pcre_uint32 c;
135	BOOL utf = md->utf;
136	if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
137	while (length-- > `0`)
138	if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
139	}
140	#endif
141
142
143
144	/*************************************************
145	* Match a back-reference *
146	*************************************************/
147
148	/ Normally, if a back reference hasn't been set, the length that is passed is*
149	negative, so the match always fails. However, in JavaScript compatibility mode,
150	the length passed is zero. Note that in caseless UTF-8 mode, the number of
151	subject bytes matched may be different to the number of reference bytes.
152
153	Arguments:
154	offset index into the offset vector
155	eptr pointer into the subject
156	length length of reference to be matched (number of bytes)
157	md points to match data block
158	caseless TRUE if caseless
159
160	Returns: >= 0 the number of subject bytes matched
161	-1 no match
162	-2 partial match; always given if at end subject
163	*/
164
165	static int
166	match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
167	BOOL caseless)
168	{
169	PCRE_PUCHAR eptr_start = eptr;
170	register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
171	#if defined SUPPORT_UTF && defined SUPPORT_UCP
172	BOOL utf = md->utf;
173	#endif
174
175	#ifdef PCRE_DEBUG
176	if (eptr >= md->end_subject)
177	printf("matching subject <null>");
178	else
179	{
180	printf("matching subject ");
181	pchars(eptr, length, TRUE, md);
182	}
183	printf(" against backref ");
184	pchars(p, length, FALSE, md);
185	printf("\n");
186	#endif
187
188	/ Always fail if reference not set (and not JavaScript compatible - in that*
189	case the length is passed as zero). /*
190
191	if (length < `0`) return -`1`;
192
193	/ Separate the caseless case for speed. In UTF-8 mode we can only do this*
194	properly if Unicode properties are supported. Otherwise, we can check only
195	ASCII characters. /*
196
197	if (caseless)
198	{
199	#if defined SUPPORT_UTF && defined SUPPORT_UCP
200	if (utf)
201	{
202	/ Match characters up to the end of the reference. NOTE: the number of*
203	data units matched may differ, because in UTF-8 there are some characters
204	whose upper and lower case versions code have different numbers of bytes.
205	For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
206	(3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
207	sequence of two of the latter. It is important, therefore, to check the
208	length along the reference, not along the subject (earlier code did this
209	wrong). /*
210
211	PCRE_PUCHAR endptr = p + length;
212	while (p < endptr)
213	{
214	pcre_uint32 c, d;
215	const ucd_record *ur;
216	if (eptr >= md->end_subject) return -`2`; / Partial match /
217	GETCHARINC(c, eptr);
218	GETCHARINC(d, p);
219	ur = GET_UCD(d);
220	if (c != d && c != d + ur->other_case)
221	{
222	const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
223	for (;;)
224	{
225	if (c < pp) return* -`1`;
226	if (c == pp++) break*;
227	}
228	}
229	}
230	}
231	else
232	#endif
233
234	/ The same code works when not in UTF-8 mode and in UTF-8 mode when there*
235	is no UCP support. /*
236	{
237	while (length-- > `0`)
238	{
239	pcre_uint32 cc, cp;
240	if (eptr >= md->end_subject) return -`2`; / Partial match /
241	cc = UCHAR21TEST(eptr);
242	cp = UCHAR21TEST(p);
243	if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -`1`;
244	p++;
245	eptr++;
246	}
247	}
248	}
249
250	/ In the caseful case, we can just compare the bytes, whether or not we*
251	are in UTF-8 mode. /*
252
253	else
254	{
255	while (length-- > `0`)
256	{
257	if (eptr >= md->end_subject) return -`2`; / Partial match /
258	if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -`1`;
259	}
260	}
261
262	return (int)(eptr - eptr_start);
263	}
264
265
266
267	/***************************************************************************
268	****************************************************************************
269	RECURSION IN THE match() FUNCTION
270
271	The match() function is highly recursive, though not every recursive call
272	increases the recursive depth. Nevertheless, some regular expressions can cause
273	it to recurse to a great depth. I was writing for Unix, so I just let it call
274	itself recursively. This uses the stack for saving everything that has to be
275	saved for a recursive call. On Unix, the stack can be large, and this works
276	fine.
277
278	It turns out that on some non-Unix-like systems there are problems with
279	programs that use a lot of stack. (This despite the fact that every last chip
280	has oodles of memory these days, and techniques for extending the stack have
281	been known for decades.) So....
282
283	There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
284	calls by keeping local variables that need to be preserved in blocks of memory
285	obtained from malloc() instead instead of on the stack. Macros are used to
286	achieve this so that the actual code doesn't look very different to what it
287	always used to.
288
289	The original heap-recursive code used longjmp(). However, it seems that this
290	can be very slow on some operating systems. Following a suggestion from Stan
291	Switzer, the use of longjmp() has been abolished, at the cost of having to
292	provide a unique number for each call to RMATCH. There is no way of generating
293	a sequence of numbers at compile time in C. I have given them names, to make
294	them stand out more clearly.
295
296	Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
297	FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
298	tests. Furthermore, not using longjmp() means that local dynamic variables
299	don't have indeterminate values; this has meant that the frame size can be
300	reduced because the result can be "passed back" by straight setting of the
301	variable instead of being passed in the frame.
302	****************************************************************************
303	***************************************************************************/
304
305	/ Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN*
306	below must be updated in sync. /*
307
308	enum { RM1=`1`, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
309	RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
310	RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
311	RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
312	RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
313	RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
314	RM61, RM62, RM63, RM64, RM65, RM66, RM67 };
315
316	/ These versions of the macros use the stack, as normal. There are debugging*
317	versions and production versions. Note that the "rw" argument of RMATCH isn't
318	actually used in this definition. /*
319
320	#ifndef NO_RECURSE
321	#define REGISTER register
322
323	#ifdef PCRE_DEBUG
324	#define RMATCH(ra,rb,rc,rd,re,rw) \
325	{ \
326	printf("match() called in line %d\n", __LINE__); \
327	rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
328	printf("to line %d\n", __LINE__); \
329	}
330	#define RRETURN(ra) \
331	{ \
332	printf("match() returned %d from line %d\n", ra, __LINE__); \
333	return ra; \
334	}
335	#else
336	#define RMATCH(ra,rb,rc,rd,re,rw) \
337	rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
338	#define RRETURN(ra) return ra
339	#endif
340
341	#else
342
343
344	/ These versions of the macros manage a private stack on the heap. Note that*
345	the "rd" argument of RMATCH isn't actually used in this definition. It's the md
346	argument of match(), which never changes. /*
347
348	#define REGISTER
349
350	#define RMATCH(ra,rb,rc,rd,re,rw)\
351	{\
352	heapframe *newframe = frame->Xnextframe;\
353	if (newframe == NULL)\
354	{\
355	newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
356	if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
357	newframe->Xnextframe = NULL;\
358	frame->Xnextframe = newframe;\
359	}\
360	frame->Xwhere = rw;\
361	newframe->Xeptr = ra;\
362	newframe->Xecode = rb;\
363	newframe->Xmstart = mstart;\
364	newframe->Xoffset_top = rc;\
365	newframe->Xeptrb = re;\
366	newframe->Xrdepth = frame->Xrdepth + 1;\
367	newframe->Xprevframe = frame;\
368	frame = newframe;\
369	DPRINTF(("restarting from line %d\n", __LINE__));\
370	goto HEAP_RECURSE;\
371	L_##rw:\
372	DPRINTF(("jumped back to line %d\n", __LINE__));\
373	}
374
375	#define RRETURN(ra)\
376	{\
377	heapframe *oldframe = frame;\
378	frame = oldframe->Xprevframe;\
379	if (frame != NULL)\
380	{\
381	rrc = ra;\
382	goto HEAP_RETURN;\
383	}\
384	return ra;\
385	}
386
387
388	/ Structure for remembering the local variables in a private frame /
389
390	typedef struct heapframe {
391	struct heapframe *Xprevframe;
392	struct heapframe *Xnextframe;
393
394	/ Function arguments that may change /
395
396	PCRE_PUCHAR Xeptr;
397	const pcre_uchar *Xecode;
398	PCRE_PUCHAR Xmstart;
399	int Xoffset_top;
400	eptrblock *Xeptrb;
401	unsigned int Xrdepth;
402
403	/ Function local variables /
404
405	PCRE_PUCHAR Xcallpat;
406	#ifdef SUPPORT_UTF
407	PCRE_PUCHAR Xcharptr;
408	#endif
409	PCRE_PUCHAR Xdata;
410	PCRE_PUCHAR Xnext;
411	PCRE_PUCHAR Xpp;
412	PCRE_PUCHAR Xprev;
413	PCRE_PUCHAR Xsaved_eptr;
414
415	recursion_info Xnew_recursive;
416
417	BOOL Xcur_is_word;
418	BOOL Xcondition;
419	BOOL Xprev_is_word;
420
421	#ifdef SUPPORT_UCP
422	int Xprop_type;
423	unsigned int Xprop_value;
424	int Xprop_fail_result;
425	int Xoclength;
426	pcre_uchar Xocchars[`6`];
427	#endif
428
429	int Xcodelink;
430	int Xctype;
431	unsigned int Xfc;
432	int Xfi;
433	int Xlength;
434	int Xmax;
435	int Xmin;
436	unsigned int Xnumber;
437	int Xoffset;
438	unsigned int Xop;
439	pcre_int32 Xsave_capture_last;
440	int Xsave_offset1, Xsave_offset2, Xsave_offset3;
441	int Xstacksave[REC_STACK_SAVE_MAX];
442
443	eptrblock Xnewptrb;
444
445	/ Where to jump back to /
446
447	int Xwhere;
448
449	} heapframe;
450
451	#endif
452
453
454	/***************************************************************************
455	***************************************************************************/
456
457
458
459	/*************************************************
460	* Match from current position *
461	*************************************************/
462
463	/ This function is called recursively in many circumstances. Whenever it*
464	returns a negative (error) response, the outer incarnation must also return the
465	same response. /*
466
467	/ These macros pack up tests that are used for partial matching, and which*
468	appear several times in the code. We set the "hit end" flag if the pointer is
469	at the end of the subject and also past the start of the subject (i.e.
470	something has been matched). For hard partial matching, we then return
471	immediately. The second one is used when we already know we are past the end of
472	the subject. /*
473
474	#define CHECK_PARTIAL()\
475	if (md->partial != 0 && eptr >= md->end_subject && \
476	eptr > md->start_used_ptr) \
477	{ \
478	md->hitend = TRUE; \
479	if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
480	}
481
482	#define SCHECK_PARTIAL()\
483	if (md->partial != 0 && eptr > md->start_used_ptr) \
484	{ \
485	md->hitend = TRUE; \
486	if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
487	}
488
489
490	/ Performance note: It might be tempting to extract commonly used fields from*
491	the md structure (e.g. utf, end_subject) into individual variables to improve
492	performance. Tests using gcc on a SPARC disproved this; in the first case, it
493	made performance worse.
494
495	Arguments:
496	eptr pointer to current character in subject
497	ecode pointer to current position in compiled code
498	mstart pointer to the current match start position (can be modified
499	by encountering \K)
500	offset_top current top pointer
501	md pointer to "static" info for the match
502	eptrb pointer to chain of blocks containing eptr at start of
503	brackets - for testing for empty matches
504	rdepth the recursion depth
505
506	Returns: MATCH_MATCH if matched ) these values are >= 0
507	MATCH_NOMATCH if failed to match )
508	a negative MATCH_xxx value for PRUNE, SKIP, etc
509	a negative PCRE_ERROR_xxx value if aborted by an error condition
510	(e.g. stopped by repeated call or recursion limit)
511	*/
512
513	static int
514	match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
515	PCRE_PUCHAR mstart, int offset_top, match_data md, eptrblock eptrb,
516	unsigned int rdepth)
517	{
518	/ These variables do not need to be preserved over recursion in this function,*
519	so they can be ordinary variables in all cases. Mark some of them with
520	"register" because they are used a lot in loops. /*
521
522	register int rrc; / Returns from recursive calls /
523	register int i; / Used for loops not involving calls to RMATCH() /
524	register pcre_uint32 c; / Character values not kept over RMATCH() calls /
525	register BOOL utf; / Local copy of UTF flag for speed /
526
527	BOOL minimize, possessive; / Quantifier options /
528	BOOL caseless;
529	int condcode;
530
531	/ When recursion is not being used, all "local" variables that have to be*
532	preserved over calls to RMATCH() are part of a "frame". We set up the top-level
533	frame on the stack here; subsequent instantiations are obtained from the heap
534	whenever RMATCH() does a "recursion". See the macro definitions above. Putting
535	the top-level on the stack rather than malloc-ing them all gives a performance
536	boost in many cases where there is not much "recursion". /*
537
538	#ifdef NO_RECURSE
539	heapframe frame = (heapframe )md->match_frames_base;
540
541	/ Copy in the original argument variables /
542
543	frame->Xeptr = eptr;
544	frame->Xecode = ecode;
545	frame->Xmstart = mstart;
546	frame->Xoffset_top = offset_top;
547	frame->Xeptrb = eptrb;
548	frame->Xrdepth = rdepth;
549
550	/ This is where control jumps back to to effect "recursion" /
551
552	HEAP_RECURSE:
553
554	/ Macros make the argument variables come from the current frame /
555
556	#define eptr frame->Xeptr
557	#define ecode frame->Xecode
558	#define mstart frame->Xmstart
559	#define offset_top frame->Xoffset_top
560	#define eptrb frame->Xeptrb
561	#define rdepth frame->Xrdepth
562
563	/ Ditto for the local variables /
564
565	#ifdef SUPPORT_UTF
566	#define charptr frame->Xcharptr
567	#endif
568	#define callpat frame->Xcallpat
569	#define codelink frame->Xcodelink
570	#define data frame->Xdata
571	#define next frame->Xnext
572	#define pp frame->Xpp
573	#define prev frame->Xprev
574	#define saved_eptr frame->Xsaved_eptr
575
576	#define new_recursive frame->Xnew_recursive
577
578	#define cur_is_word frame->Xcur_is_word
579	#define condition frame->Xcondition
580	#define prev_is_word frame->Xprev_is_word
581
582	#ifdef SUPPORT_UCP
583	#define prop_type frame->Xprop_type
584	#define prop_value frame->Xprop_value
585	#define prop_fail_result frame->Xprop_fail_result
586	#define oclength frame->Xoclength
587	#define occhars frame->Xocchars
588	#endif
589
590	#define ctype frame->Xctype
591	#define fc frame->Xfc
592	#define fi frame->Xfi
593	#define length frame->Xlength
594	#define max frame->Xmax
595	#define min frame->Xmin
596	#define number frame->Xnumber
597	#define offset frame->Xoffset
598	#define op frame->Xop
599	#define save_capture_last frame->Xsave_capture_last
600	#define save_offset1 frame->Xsave_offset1
601	#define save_offset2 frame->Xsave_offset2
602	#define save_offset3 frame->Xsave_offset3
603	#define stacksave frame->Xstacksave
604
605	#define newptrb frame->Xnewptrb
606
607	/ When recursion is being used, local variables are allocated on the stack and*
608	get preserved during recursion in the normal way. In this environment, fi and
609	i, and fc and c, can be the same variables. /*
610
611	#else /* NO_RECURSE not defined */
612	#define fi i
613	#define fc c
614
615	/ Many of the following variables are used only in small blocks of the code.*
616	My normal style of coding would have declared them within each of those blocks.
617	However, in order to accommodate the version of this code that uses an external
618	"stack" implemented on the heap, it is easier to declare them all here, so the
619	declarations can be cut out in a block. The only declarations within blocks
620	below are for variables that do not have to be preserved over a recursive call
621	to RMATCH(). /*
622
623	#ifdef SUPPORT_UTF
624	const pcre_uchar *charptr;
625	#endif
626	const pcre_uchar *callpat;
627	const pcre_uchar *data;
628	const pcre_uchar *next;
629	PCRE_PUCHAR pp;
630	const pcre_uchar *prev;
631	PCRE_PUCHAR saved_eptr;
632
633	recursion_info new_recursive;
634
635	BOOL cur_is_word;
636	BOOL condition;
637	BOOL prev_is_word;
638
639	#ifdef SUPPORT_UCP
640	int prop_type;
641	unsigned int prop_value;
642	int prop_fail_result;
643	int oclength;
644	pcre_uchar occhars[`6`];
645	#endif
646
647	int codelink;
648	int ctype;
649	int length;
650	int max;
651	int min;
652	unsigned int number;
653	int offset;
654	unsigned int op;
655	pcre_int32 save_capture_last;
656	int save_offset1, save_offset2, save_offset3;
657	int stacksave[REC_STACK_SAVE_MAX];
658
659	eptrblock newptrb;
660
661	/ There is a special fudge for calling match() in a way that causes it to*
662	measure the size of its basic stack frame when the stack is being used for
663	recursion. The second argument (ecode) being NULL triggers this behaviour. It
664	cannot normally ever be NULL. The return is the negated value of the frame
665	size. /*
666
667	if (ecode == NULL)
668	{
669	if (rdepth == `0`)
670	return match((PCRE_PUCHAR)&rdepth, NULL, NULL, `0`, NULL, NULL, `1`);
671	else
672	{
673	int len = (char )&rdepth - (char* *)eptr;
674	return (len > `0`)? -len : len;
675	}
676	}
677	#endif /* NO_RECURSE */
678
679	/ To save space on the stack and in the heap frame, I have doubled up on some*
680	of the local variables that are used only in localised parts of the code, but
681	still need to be preserved over recursive calls of match(). These macros define
682	the alternative names that are used. /*
683
684	#define allow_zero cur_is_word
685	#define cbegroup condition
686	#define code_offset codelink
687	#define condassert condition
688	#define matched_once prev_is_word
689	#define foc number
690	#define save_mark data
691
692	/ These statements are here to stop the compiler complaining about unitialized*
693	variables. /*
694
695	#ifdef SUPPORT_UCP
696	prop_value = `0`;
697	prop_fail_result = `0`;
698	#endif
699
700
701	/ This label is used for tail recursion, which is used in a few cases even*
702	when NO_RECURSE is not defined, in order to reduce the amount of stack that is
703	used. Thanks to Ian Taylor for noticing this possibility and sending the
704	original patch. /*
705
706	TAIL_RECURSE:
707
708	/ OK, now we can get on with the real code of the function. Recursive calls*
709	are specified by the macro RMATCH and RRETURN is used to return. When
710	NO_RECURSE is not* defined, these just turn into a recursive call to match()*
711	and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
712	defined). However, RMATCH isn't like a function call because it's quite a
713	complicated macro. It has to be used in one particular way. This shouldn't,
714	however, impact performance when true recursion is being used. /*
715
716	#ifdef SUPPORT_UTF
717	utf = md->utf; / Local copy of the flag /
718	#else
719	utf = FALSE;
720	#endif
721
722	/ First check that we haven't called match() too many times, or that we*
723	haven't exceeded the recursive call limit. /*
724
725	if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
726	if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
727
728	/ At the start of a group with an unlimited repeat that may match an empty*
729	string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
730	done this way to save having to use another function argument, which would take
731	up space on the stack. See also MATCH_CONDASSERT below.
732
733	When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
734	such remembered pointers, to be checked when we hit the closing ket, in order
735	to break infinite loops that match no characters. When match() is called in
736	other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
737	NOT be used with tail recursion, because the memory block that is used is on
738	the stack, so a new one may be required for each match(). /*
739
740	if (md->match_function_type == MATCH_CBEGROUP)
741	{
742	newptrb.epb_saved_eptr = eptr;
743	newptrb.epb_prev = eptrb;
744	eptrb = &newptrb;
745	md->match_function_type = `0`;
746	}
747
748	/ Now start processing the opcodes. /
749
750	for (;;)
751	{
752	minimize = possessive = FALSE;
753	op = *ecode;
754
755	switch(op)
756	{
757	case OP_MARK:
758	md->nomatch_mark = ecode + `2`;
759	md->mark = NULL; / In case previously set by assertion /
760	RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[`1`], offset_top, md,
761	eptrb, RM55);
762	if ((rrc == MATCH_MATCH \|\| rrc == MATCH_ACCEPT) &&
763	md->mark == NULL) md->mark = ecode + `2`;
764
765	/ A return of MATCH_SKIP_ARG means that matching failed at SKIP with an*
766	argument, and we must check whether that argument matches this MARK's
767	argument. It is passed back in md->start_match_ptr (an overloading of that
768	variable). If it does match, we reset that variable to the current subject
769	position and return MATCH_SKIP. Otherwise, pass back the return code
770	unaltered. /*
771
772	else if (rrc == MATCH_SKIP_ARG &&
773	STRCMP_UC_UC_TEST(ecode + `2`, md->start_match_ptr) == `0`)
774	{
775	md->start_match_ptr = eptr;
776	RRETURN(MATCH_SKIP);
777	}
778	RRETURN(rrc);
779
780	case OP_FAIL:
781	RRETURN(MATCH_NOMATCH);
782
783	case OP_COMMIT:
784	RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
785	eptrb, RM52);
786	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
787	RRETURN(MATCH_COMMIT);
788
789	case OP_PRUNE:
790	RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
791	eptrb, RM51);
792	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
793	RRETURN(MATCH_PRUNE);
794
795	case OP_PRUNE_ARG:
796	md->nomatch_mark = ecode + `2`;
797	md->mark = NULL; / In case previously set by assertion /
798	RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[`1`], offset_top, md,
799	eptrb, RM56);
800	if ((rrc == MATCH_MATCH \|\| rrc == MATCH_ACCEPT) &&
801	md->mark == NULL) md->mark = ecode + `2`;
802	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
803	RRETURN(MATCH_PRUNE);
804
805	case OP_SKIP:
806	RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
807	eptrb, RM53);
808	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
809	md->start_match_ptr = eptr; / Pass back current position /
810	RRETURN(MATCH_SKIP);
811
812	/ Note that, for Perl compatibility, SKIP with an argument does NOT set*
813	nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
814	not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
815	that failed and any that precede it (either they also failed, or were not
816	triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
817	SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
818	set to the count of the one that failed. /*
819
820	case OP_SKIP_ARG:
821	md->skip_arg_count++;
822	if (md->skip_arg_count <= md->ignore_skip_arg)
823	{
824	ecode += PRIV(OP_lengths)[*ecode] + ecode[`1`];
825	break;
826	}
827	RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[`1`], offset_top, md,
828	eptrb, RM57);
829	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
830
831	/ Pass back the current skip name by overloading md->start_match_ptr and*
832	returning the special MATCH_SKIP_ARG return code. This will either be
833	caught by a matching MARK, or get to the top, where it causes a rematch
834	with md->ignore_skip_arg set to the value of md->skip_arg_count. /*
835
836	md->start_match_ptr = ecode + `2`;
837	RRETURN(MATCH_SKIP_ARG);
838
839	/ For THEN (and THEN_ARG) we pass back the address of the opcode, so that*
840	the branch in which it occurs can be determined. Overload the start of
841	match pointer to do this. /*
842
843	case OP_THEN:
844	RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
845	eptrb, RM54);
846	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
847	md->start_match_ptr = ecode;
848	RRETURN(MATCH_THEN);
849
850	case OP_THEN_ARG:
851	md->nomatch_mark = ecode + `2`;
852	md->mark = NULL; / In case previously set by assertion /
853	RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[`1`], offset_top,
854	md, eptrb, RM58);
855	if ((rrc == MATCH_MATCH \|\| rrc == MATCH_ACCEPT) &&
856	md->mark == NULL) md->mark = ecode + `2`;
857	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
858	md->start_match_ptr = ecode;
859	RRETURN(MATCH_THEN);
860
861	/ Handle an atomic group that does not contain any capturing parentheses.*
862	This can be handled like an assertion. Prior to 8.13, all atomic groups
863	were handled this way. In 8.13, the code was changed as below for ONCE, so
864	that backups pass through the group and thereby reset captured values.
865	However, this uses a lot more stack, so in 8.20, atomic groups that do not
866	contain any captures generate OP_ONCE_NC, which can be handled in the old,
867	less stack intensive way.
868
869	Check the alternative branches in turn - the matching won't pass the KET
870	for this kind of subpattern. If any one branch matches, we carry on as at
871	the end of a normal bracket, leaving the subject pointer, but resetting
872	the start-of-match value in case it was changed by \K. /*
873
874	case OP_ONCE_NC:
875	prev = ecode;
876	saved_eptr = eptr;
877	save_mark = md->mark;
878	do
879	{
880	RMATCH(eptr, ecode + `1` + LINK_SIZE, offset_top, md, eptrb, RM64);
881	if (rrc == MATCH_MATCH) / Note: _not_ MATCH_ACCEPT /
882	{
883	mstart = md->start_match_ptr;
884	break;
885	}
886	if (rrc == MATCH_THEN)
887	{
888	next = ecode + GET(ecode,`1`);
889	if (md->start_match_ptr < next &&
890	(ecode == OP_ALT \|\| next == OP_ALT))
891	rrc = MATCH_NOMATCH;
892	}
893
894	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
895	ecode += GET(ecode,`1`);
896	md->mark = save_mark;
897	}
898	while (*ecode == OP_ALT);
899
900	/ If hit the end of the group (which could be repeated), fail /
901
902	if (ecode != OP_ONCE_NC && ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
903
904	/ Continue as from after the group, updating the offsets high water*
905	mark, since extracts may have been taken. /*
906
907	do ecode += GET(ecode, `1`); while (*ecode == OP_ALT);
908
909	offset_top = md->end_offset_top;
910	eptr = md->end_match_ptr;
911
912	/ For a non-repeating ket, just continue at this level. This also*
913	happens for a repeating ket if no characters were matched in the group.
914	This is the forcible breaking of infinite loops as implemented in Perl
915	5.005. /*
916
917	if (*ecode == OP_KET \|\| eptr == saved_eptr)
918	{
919	ecode += `1`+LINK_SIZE;
920	break;
921	}
922
923	/ The repeating kets try the rest of the pattern or restart from the*
924	preceding bracket, in the appropriate order. The second "call" of match()
925	uses tail recursion, to avoid using another stack frame. /*
926
927	if (*ecode == OP_KETRMIN)
928	{
929	RMATCH(eptr, ecode + `1` + LINK_SIZE, offset_top, md, eptrb, RM65);
930	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
931	ecode = prev;
932	goto TAIL_RECURSE;
933	}
934	else / OP_KETRMAX /
935	{
936	RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
937	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
938	ecode += `1` + LINK_SIZE;
939	goto TAIL_RECURSE;
940	}
941	/ Control never gets here /
942
943	/ Handle a capturing bracket, other than those that are possessive with an*
944	unlimited repeat. If there is space in the offset vector, save the current
945	subject position in the working slot at the top of the vector. We mustn't
946	change the current values of the data slot, because they may be set from a
947	previous iteration of this group, and be referred to by a reference inside
948	the group. A failure to match might occur after the group has succeeded,
949	if something later on doesn't match. For this reason, we need to restore
950	the working value and also the values of the final offsets, in case they
951	were set by a previous iteration of the same bracket.
952
953	If there isn't enough space in the offset vector, treat this as if it were
954	a non-capturing bracket. Don't worry about setting the flag for the error
955	case here; that is handled in the code for KET. /*
956
957	case OP_CBRA:
958	case OP_SCBRA:
959	number = GET2(ecode, `1`+LINK_SIZE);
960	offset = number << `1`;
961
962	#ifdef PCRE_DEBUG
963	printf("start bracket %d\n", number);
964	printf("subject=");
965	pchars(eptr, `16`, TRUE, md);
966	printf("\n");
967	#endif
968
969	if (offset < md->offset_max)
970	{
971	save_offset1 = md->offset_vector[offset];
972	save_offset2 = md->offset_vector[offset+`1`];
973	save_offset3 = md->offset_vector[md->offset_end - number];
974	save_capture_last = md->capture_last;
975	save_mark = md->mark;
976
977	DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
978	md->offset_vector[md->offset_end - number] =
979	(int)(eptr - md->start_subject);
980
981	for (;;)
982	{
983	if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
984	RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
985	eptrb, RM1);
986	if (rrc == MATCH_ONCE) break; / Backing up through an atomic group /
987
988	/ If we backed up to a THEN, check whether it is within the current*
989	branch by comparing the address of the THEN that is passed back with
990	the end of the branch. If it is within the current branch, and the
991	branch is one of two or more alternatives (it either starts or ends
992	with OP_ALT), we have reached the limit of THEN's action, so convert
993	the return code to NOMATCH, which will cause normal backtracking to
994	happen from now on. Otherwise, THEN is passed back to an outer
995	alternative. This implements Perl's treatment of parenthesized groups,
996	where a group not containing \| does not affect the current alternative,
997	that is, (X) is NOT the same as (X\|(F)). /
998
999	if (rrc == MATCH_THEN)
1000	{
1001	next = ecode + GET(ecode,`1`);
1002	if (md->start_match_ptr < next &&
1003	(ecode == OP_ALT \|\| next == OP_ALT))
1004	rrc = MATCH_NOMATCH;
1005	}
1006
1007	/ Anything other than NOMATCH is passed back. /
1008
1009	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1010	md->capture_last = save_capture_last;
1011	ecode += GET(ecode, `1`);
1012	md->mark = save_mark;
1013	if (ecode != OP_ALT) break*;
1014	}
1015
1016	DPRINTF(("bracket %d failed\n", number));
1017	md->offset_vector[offset] = save_offset1;
1018	md->offset_vector[offset+`1`] = save_offset2;
1019	md->offset_vector[md->offset_end - number] = save_offset3;
1020
1021	/ At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. /
1022
1023	RRETURN(rrc);
1024	}
1025
1026	/ FALL THROUGH ... Insufficient room for saving captured contents. Treat*
1027	as a non-capturing bracket. /*
1028
1029	/ VVVVVVVVVVVVVVVVVVVVVVVVV /
1030	/ VVVVVVVVVVVVVVVVVVVVVVVVV /
1031
1032	DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1033
1034	/ VVVVVVVVVVVVVVVVVVVVVVVVV /
1035	/ VVVVVVVVVVVVVVVVVVVVVVVVV /
1036
1037	/ Non-capturing or atomic group, except for possessive with unlimited*
1038	repeat and ONCE group with no captures. Loop for all the alternatives.
1039
1040	When we get to the final alternative within the brackets, we used to return
1041	the result of a recursive call to match() whatever happened so it was
1042	possible to reduce stack usage by turning this into a tail recursion,
1043	except in the case of a possibly empty group. However, now that there is
1044	the possibility of (THEN) occurring in the final alternative, this*
1045	optimization is no longer always possible.
1046
1047	We can optimize if we know there are no (THEN)s in the pattern; at present*
1048	this is the best that can be done.
1049
1050	MATCH_ONCE is returned when the end of an atomic group is successfully
1051	reached, but subsequent matching fails. It passes back up the tree (causing
1052	captured values to be reset) until the original atomic group level is
1053	reached. This is tested by comparing md->once_target with the start of the
1054	group. At this point, the return is converted into MATCH_NOMATCH so that
1055	previous backup points can be taken. /*
1056
1057	case OP_ONCE:
1058	case OP_BRA:
1059	case OP_SBRA:
1060	DPRINTF(("start non-capturing bracket\n"));
1061
1062	for (;;)
1063	{
1064	if (op >= OP_SBRA \|\| op == OP_ONCE)
1065	md->match_function_type = MATCH_CBEGROUP;
1066
1067	/ If this is not a possibly empty group, and there are no (THEN)s in
1068	the pattern, and this is the final alternative, optimize as described
1069	above. /*
1070
1071	else if (!md->hasthen && ecode[GET(ecode, `1`)] != OP_ALT)
1072	{
1073	ecode += PRIV(OP_lengths)[*ecode];
1074	goto TAIL_RECURSE;
1075	}
1076
1077	/ In all other cases, we have to make another call to match(). /
1078
1079	save_mark = md->mark;
1080	save_capture_last = md->capture_last;
1081	RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1082	RM2);
1083
1084	/ See comment in the code for capturing groups above about handling*
1085	THEN. /*
1086
1087	if (rrc == MATCH_THEN)
1088	{
1089	next = ecode + GET(ecode,`1`);
1090	if (md->start_match_ptr < next &&
1091	(ecode == OP_ALT \|\| next == OP_ALT))
1092	rrc = MATCH_NOMATCH;
1093	}
1094
1095	if (rrc != MATCH_NOMATCH)
1096	{
1097	if (rrc == MATCH_ONCE)
1098	{
1099	const pcre_uchar *scode = ecode;
1100	if (scode != OP_ONCE) /* If not at start, find it /
1101	{
1102	while (*scode == OP_ALT) scode += GET(scode, `1`);
1103	scode -= GET(scode, `1`);
1104	}
1105	if (md->once_target == scode) rrc = MATCH_NOMATCH;
1106	}
1107	RRETURN(rrc);
1108	}
1109	ecode += GET(ecode, `1`);
1110	md->mark = save_mark;
1111	if (ecode != OP_ALT) break*;
1112	md->capture_last = save_capture_last;
1113	}
1114
1115	RRETURN(MATCH_NOMATCH);
1116
1117	/ Handle possessive capturing brackets with an unlimited repeat. We come*
1118	here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1119	handled similarly to the normal case above. However, the matching is
1120	different. The end of these brackets will always be OP_KETRPOS, which
1121	returns MATCH_KETRPOS without going further in the pattern. By this means
1122	we can handle the group by iteration rather than recursion, thereby
1123	reducing the amount of stack needed. /*
1124
1125	case OP_CBRAPOS:
1126	case OP_SCBRAPOS:
1127	allow_zero = FALSE;
1128
1129	POSSESSIVE_CAPTURE:
1130	number = GET2(ecode, `1`+LINK_SIZE);
1131	offset = number << `1`;
1132
1133	#ifdef PCRE_DEBUG
1134	printf("start possessive bracket %d\n", number);
1135	printf("subject=");
1136	pchars(eptr, `16`, TRUE, md);
1137	printf("\n");
1138	#endif
1139
1140	if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE;
1141
1142	matched_once = FALSE;
1143	code_offset = (int)(ecode - md->start_code);
1144
1145	save_offset1 = md->offset_vector[offset];
1146	save_offset2 = md->offset_vector[offset+`1`];
1147	save_offset3 = md->offset_vector[md->offset_end - number];
1148	save_capture_last = md->capture_last;
1149
1150	DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1151
1152	/ Each time round the loop, save the current subject position for use*
1153	when the group matches. For MATCH_MATCH, the group has matched, so we
1154	restart it with a new subject starting position, remembering that we had
1155	at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1156	usual. If we haven't matched any alternatives in any iteration, check to
1157	see if a previous iteration matched. If so, the group has matched;
1158	continue from afterwards. Otherwise it has failed; restore the previous
1159	capture values before returning NOMATCH. /*
1160
1161	for (;;)
1162	{
1163	md->offset_vector[md->offset_end - number] =
1164	(int)(eptr - md->start_subject);
1165	if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1166	RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1167	eptrb, RM63);
1168	if (rrc == MATCH_KETRPOS)
1169	{
1170	offset_top = md->end_offset_top;
1171	ecode = md->start_code + code_offset;
1172	save_capture_last = md->capture_last;
1173	matched_once = TRUE;
1174	mstart = md->start_match_ptr; / In case \K changed it /
1175	if (eptr == md->end_match_ptr) / Matched an empty string /
1176	{
1177	do ecode += GET(ecode, `1`); while (*ecode == OP_ALT);
1178	break;
1179	}
1180	eptr = md->end_match_ptr;
1181	continue;
1182	}
1183
1184	/ See comment in the code for capturing groups above about handling*
1185	THEN. /*
1186
1187	if (rrc == MATCH_THEN)
1188	{
1189	next = ecode + GET(ecode,`1`);
1190	if (md->start_match_ptr < next &&
1191	(ecode == OP_ALT \|\| next == OP_ALT))
1192	rrc = MATCH_NOMATCH;
1193	}
1194
1195	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1196	md->capture_last = save_capture_last;
1197	ecode += GET(ecode, `1`);
1198	if (ecode != OP_ALT) break*;
1199	}
1200
1201	if (!matched_once)
1202	{
1203	md->offset_vector[offset] = save_offset1;
1204	md->offset_vector[offset+`1`] = save_offset2;
1205	md->offset_vector[md->offset_end - number] = save_offset3;
1206	}
1207
1208	if (allow_zero \|\| matched_once)
1209	{
1210	ecode += `1` + LINK_SIZE;
1211	break;
1212	}
1213
1214	RRETURN(MATCH_NOMATCH);
1215
1216	/ Non-capturing possessive bracket with unlimited repeat. We come here*
1217	from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1218	without the capturing complication. It is written out separately for speed
1219	and cleanliness. /*
1220
1221	case OP_BRAPOS:
1222	case OP_SBRAPOS:
1223	allow_zero = FALSE;
1224
1225	POSSESSIVE_NON_CAPTURE:
1226	matched_once = FALSE;
1227	code_offset = (int)(ecode - md->start_code);
1228	save_capture_last = md->capture_last;
1229
1230	for (;;)
1231	{
1232	if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1233	RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1234	eptrb, RM48);
1235	if (rrc == MATCH_KETRPOS)
1236	{
1237	offset_top = md->end_offset_top;
1238	ecode = md->start_code + code_offset;
1239	matched_once = TRUE;
1240	mstart = md->start_match_ptr; / In case \K reset it /
1241	if (eptr == md->end_match_ptr) / Matched an empty string /
1242	{
1243	do ecode += GET(ecode, `1`); while (*ecode == OP_ALT);
1244	break;
1245	}
1246	eptr = md->end_match_ptr;
1247	continue;
1248	}
1249
1250	/ See comment in the code for capturing groups above about handling*
1251	THEN. /*
1252
1253	if (rrc == MATCH_THEN)
1254	{
1255	next = ecode + GET(ecode,`1`);
1256	if (md->start_match_ptr < next &&
1257	(ecode == OP_ALT \|\| next == OP_ALT))
1258	rrc = MATCH_NOMATCH;
1259	}
1260
1261	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1262	ecode += GET(ecode, `1`);
1263	if (ecode != OP_ALT) break*;
1264	md->capture_last = save_capture_last;
1265	}
1266
1267	if (matched_once \|\| allow_zero)
1268	{
1269	ecode += `1` + LINK_SIZE;
1270	break;
1271	}
1272	RRETURN(MATCH_NOMATCH);
1273
1274	/ Control never reaches here. /
1275
1276	/ Conditional group: compilation checked that there are no more than two*
1277	branches. If the condition is false, skipping the first branch takes us
1278	past the end of the item if there is only one branch, but that's exactly
1279	what we want. /*
1280
1281	case OP_COND:
1282	case OP_SCOND:
1283
1284	/ The variable codelink will be added to ecode when the condition is*
1285	false, to get to the second branch. Setting it to the offset to the ALT
1286	or KET, then incrementing ecode achieves this effect. We now have ecode
1287	pointing to the condition or callout. /*
1288
1289	codelink = GET(ecode, `1`); / Offset to the second branch /
1290	ecode += `1` + LINK_SIZE; / From this opcode /
1291
1292	/ Because of the way auto-callout works during compile, a callout item is*
1293	inserted between OP_COND and an assertion condition. /*
1294
1295	if (*ecode == OP_CALLOUT)
1296	{
1297	if (PUBL(callout) != NULL)
1298	{
1299	PUBL(callout_block) cb;
1300	cb.version = `2`; / Version 1 of the callout block /
1301	cb.callout_number = ecode[`1`];
1302	cb.offset_vector = md->offset_vector;
1303	#if defined COMPILE_PCRE8
1304	cb.subject = (PCRE_SPTR)md->start_subject;
1305	#elif defined COMPILE_PCRE16
1306	cb.subject = (PCRE_SPTR16)md->start_subject;
1307	#elif defined COMPILE_PCRE32
1308	cb.subject = (PCRE_SPTR32)md->start_subject;
1309	#endif
1310	cb.subject_length = (int)(md->end_subject - md->start_subject);
1311	cb.start_match = (int)(mstart - md->start_subject);
1312	cb.current_position = (int)(eptr - md->start_subject);
1313	cb.pattern_position = GET(ecode, `2`);
1314	cb.next_item_length = GET(ecode, `2` + LINK_SIZE);
1315	cb.capture_top = offset_top/`2`;
1316	cb.capture_last = md->capture_last & CAPLMASK;
1317	/ Internal change requires this for API compatibility. /
1318	if (cb.capture_last == `0`) cb.capture_last = -`1`;
1319	cb.callout_data = md->callout_data;
1320	cb.mark = md->nomatch_mark;
1321	if ((rrc = (*PUBL(callout))(&cb)) > `0`) RRETURN(MATCH_NOMATCH);
1322	if (rrc < `0`) RRETURN(rrc);
1323	}
1324
1325	/ Advance ecode past the callout, so it now points to the condition. We*
1326	must adjust codelink so that the value of ecode+codelink is unchanged. /*
1327
1328	ecode += PRIV(OP_lengths)[OP_CALLOUT];
1329	codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1330	}
1331
1332	/ Test the various possible conditions /
1333
1334	condition = FALSE;
1335	switch(condcode = *ecode)
1336	{
1337	case OP_RREF: / Numbered group recursion test /
1338	if (md->recursive != NULL) / Not recursing => FALSE /
1339	{
1340	unsigned int recno = GET2(ecode, `1`); / Recursion group number/
1341	condition = (recno == RREF_ANY \|\| recno == md->recursive->group_num);
1342	}
1343	break;
1344
1345	case OP_DNRREF: / Duplicate named group recursion test /
1346	if (md->recursive != NULL)
1347	{
1348	int count = GET2(ecode, `1` + IMM2_SIZE);
1349	pcre_uchar slot = md->name_table + GET2(ecode, `1`) md->name_entry_size;
1350	while (count-- > `0`)
1351	{
1352	unsigned int recno = GET2(slot, `0`);
1353	condition = recno == md->recursive->group_num;
1354	if (condition) break;
1355	slot += md->name_entry_size;
1356	}
1357	}
1358	break;
1359
1360	case OP_CREF: / Numbered group used test /
1361	offset = GET2(ecode, `1`) << `1`; / Doubled ref number /
1362	condition = offset < offset_top && md->offset_vector[offset] >= `0`;
1363	break;
1364
1365	case OP_DNCREF: / Duplicate named group used test /
1366	{
1367	int count = GET2(ecode, `1` + IMM2_SIZE);
1368	pcre_uchar slot = md->name_table + GET2(ecode, `1`) md->name_entry_size;
1369	while (count-- > `0`)
1370	{
1371	offset = GET2(slot, `0`) << `1`;
1372	condition = offset < offset_top && md->offset_vector[offset] >= `0`;
1373	if (condition) break;
1374	slot += md->name_entry_size;
1375	}
1376	}
1377	break;
1378
1379	case OP_DEF: / DEFINE - always false /
1380	case OP_FAIL: / From optimized (?!) condition /
1381	break;
1382
1383	/ The condition is an assertion. Call match() to evaluate it - setting*
1384	md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
1385	of an assertion. /*
1386
1387	default:
1388	md->match_function_type = MATCH_CONDASSERT;
1389	RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
1390	if (rrc == MATCH_MATCH)
1391	{
1392	if (md->end_offset_top > offset_top)
1393	offset_top = md->end_offset_top; / Captures may have happened /
1394	condition = TRUE;
1395
1396	/ Advance ecode past the assertion to the start of the first branch,*
1397	but adjust it so that the general choosing code below works. If the
1398	assertion has a quantifier that allows zero repeats we must skip over
1399	the BRAZERO. This is a lunatic thing to do, but somebody did! /*
1400
1401	if (*ecode == OP_BRAZERO) ecode++;
1402	ecode += GET(ecode, `1`);
1403	while (*ecode == OP_ALT) ecode += GET(ecode, `1`);
1404	ecode += `1` + LINK_SIZE - PRIV(OP_lengths)[condcode];
1405	}
1406
1407	/ PCRE doesn't allow the effect of (THEN) to escape beyond an
1408	assertion; it is therefore treated as NOMATCH. Any other return is an
1409	error. /*
1410
1411	else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1412	{
1413	RRETURN(rrc); / Need braces because of following else /
1414	}
1415	break;
1416	}
1417
1418	/ Choose branch according to the condition /
1419
1420	ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
1421
1422	/ We are now at the branch that is to be obeyed. As there is only one, we*
1423	can use tail recursion to avoid using another stack frame, except when
1424	there is unlimited repeat of a possibly empty group. In the latter case, a
1425	recursive call to match() is always required, unless the second alternative
1426	doesn't exist, in which case we can just plough on. Note that, for
1427	compatibility with Perl, the \| in a conditional group is NOT treated as
1428	creating two alternatives. If a THEN is encountered in the branch, it
1429	propagates out to the enclosing alternative (unless nested in a deeper set
1430	of alternatives, of course). /*
1431
1432	if (condition \|\| ecode[-(`1`+LINK_SIZE)] == OP_ALT)
1433	{
1434	if (op != OP_SCOND)
1435	{
1436	goto TAIL_RECURSE;
1437	}
1438
1439	md->match_function_type = MATCH_CBEGROUP;
1440	RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
1441	RRETURN(rrc);
1442	}
1443
1444	/ Condition false & no alternative; continue after the group. /
1445
1446	else
1447	{
1448	}
1449	break;
1450
1451
1452	/ Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,*
1453	to close any currently open capturing brackets. /*
1454
1455	case OP_CLOSE:
1456	number = GET2(ecode, `1`); / Must be less than 65536 /
1457	offset = number << `1`;
1458
1459	#ifdef PCRE_DEBUG
1460	printf("end bracket %d at *ACCEPT", number);
1461	printf("\n");
1462	#endif
1463
1464	md->capture_last = (md->capture_last & OVFLMASK) \| number;
1465	if (offset >= md->offset_max) md->capture_last \|= OVFLBIT; else
1466	{
1467	md->offset_vector[offset] =
1468	md->offset_vector[md->offset_end - number];
1469	md->offset_vector[offset+`1`] = (int)(eptr - md->start_subject);
1470
1471	/ If this group is at or above the current highwater mark, ensure that*
1472	any groups between the current high water mark and this group are marked
1473	unset and then update the high water mark. /*
1474
1475	if (offset >= offset_top)
1476	{
1477	register int *iptr = md->offset_vector + offset_top;
1478	register int *iend = md->offset_vector + offset;
1479	while (iptr < iend) *iptr++ = -`1`;
1480	offset_top = offset + `2`;
1481	}
1482	}
1483	ecode += `1` + IMM2_SIZE;
1484	break;
1485
1486
1487	/ End of the pattern, either real or forced. /
1488
1489	case OP_END:
1490	case OP_ACCEPT:
1491	case OP_ASSERT_ACCEPT:
1492
1493	/ If we have matched an empty string, fail if not in an assertion and not*
1494	in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1495	is set and we have matched at the start of the subject. In both cases,
1496	backtracking will then try other alternatives, if any. /*
1497
1498	if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1499	md->recursive == NULL &&
1500	(md->notempty \|\|
1501	(md->notempty_atstart &&
1502	mstart == md->start_subject + md->start_offset)))
1503	RRETURN(MATCH_NOMATCH);
1504
1505	/ Otherwise, we have a match. /
1506
1507	md->end_match_ptr = eptr; / Record where we ended /
1508	md->end_offset_top = offset_top; / and how many extracts were taken /
1509	md->start_match_ptr = mstart; / and the start (\K can modify) /
1510
1511	/ For some reason, the macros don't work properly if an expression is*
1512	given as the argument to RRETURN when the heap is in use. /*
1513
1514	rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1515	RRETURN(rrc);
1516
1517	/ Assertion brackets. Check the alternative branches in turn - the*
1518	matching won't pass the KET for an assertion. If any one branch matches,
1519	the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1520	start of each branch to move the current point backwards, so the code at
1521	this level is identical to the lookahead case. When the assertion is part
1522	of a condition, we want to return immediately afterwards. The caller of
1523	this incarnation of the match() function will have set MATCH_CONDASSERT in
1524	md->match_function type, and one of these opcodes will be the first opcode
1525	that is processed. We use a local variable that is preserved over calls to
1526	match() to remember this case. /*
1527
1528	case OP_ASSERT:
1529	case OP_ASSERTBACK:
1530	save_mark = md->mark;
1531	if (md->match_function_type == MATCH_CONDASSERT)
1532	{
1533	condassert = TRUE;
1534	md->match_function_type = `0`;
1535	}
1536	else condassert = FALSE;
1537
1538	/ Loop for each branch /
1539
1540	do
1541	{
1542	RMATCH(eptr, ecode + `1` + LINK_SIZE, offset_top, md, NULL, RM4);
1543
1544	/ A match means that the assertion is true; break out of the loop*
1545	that matches its alternatives. /*
1546
1547	if (rrc == MATCH_MATCH \|\| rrc == MATCH_ACCEPT)
1548	{
1549	mstart = md->start_match_ptr; / In case \K reset it /
1550	break;
1551	}
1552
1553	/ If not matched, restore the previous mark setting. /
1554
1555	md->mark = save_mark;
1556
1557	/ See comment in the code for capturing groups above about handling*
1558	THEN. /*
1559
1560	if (rrc == MATCH_THEN)
1561	{
1562	next = ecode + GET(ecode,`1`);
1563	if (md->start_match_ptr < next &&
1564	(ecode == OP_ALT \|\| next == OP_ALT))
1565	rrc = MATCH_NOMATCH;
1566	}
1567
1568	/ Anything other than NOMATCH causes the entire assertion to fail,*
1569	passing back the return code. This includes COMMIT, SKIP, PRUNE and an
1570	uncaptured THEN, which means they take their normal effect. This
1571	consistent approach does not always have exactly the same effect as in
1572	Perl. /*
1573
1574	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1575	ecode += GET(ecode, `1`);
1576	}
1577	while (ecode == OP_ALT); /* Continue for next alternative /
1578
1579	/ If we have tried all the alternative branches, the assertion has*
1580	failed. If not, we broke out after a match. /*
1581
1582	if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1583
1584	/ If checking an assertion for a condition, return MATCH_MATCH. /
1585
1586	if (condassert) RRETURN(MATCH_MATCH);
1587
1588	/ Continue from after a successful assertion, updating the offsets high*
1589	water mark, since extracts may have been taken during the assertion. /*
1590
1591	do ecode += GET(ecode,`1`); while (*ecode == OP_ALT);
1592	ecode += `1` + LINK_SIZE;
1593	offset_top = md->end_offset_top;
1594	continue;
1595
1596	/ Negative assertion: all branches must fail to match for the assertion to*
1597	succeed. /*
1598
1599	case OP_ASSERT_NOT:
1600	case OP_ASSERTBACK_NOT:
1601	save_mark = md->mark;
1602	if (md->match_function_type == MATCH_CONDASSERT)
1603	{
1604	condassert = TRUE;
1605	md->match_function_type = `0`;
1606	}
1607	else condassert = FALSE;
1608
1609	/ Loop for each alternative branch. /
1610
1611	do
1612	{
1613	RMATCH(eptr, ecode + `1` + LINK_SIZE, offset_top, md, NULL, RM5);
1614	md->mark = save_mark; / Always restore the mark setting /
1615
1616	switch(rrc)
1617	{
1618	case MATCH_MATCH: / A successful match means /
1619	case MATCH_ACCEPT: / the assertion has failed. /
1620	RRETURN(MATCH_NOMATCH);
1621
1622	case MATCH_NOMATCH: / Carry on with next branch /
1623	break;
1624
1625	/ See comment in the code for capturing groups above about handling*
1626	THEN. /*
1627
1628	case MATCH_THEN:
1629	next = ecode + GET(ecode,`1`);
1630	if (md->start_match_ptr < next &&
1631	(ecode == OP_ALT \|\| next == OP_ALT))
1632	{
1633	rrc = MATCH_NOMATCH;
1634	break;
1635	}
1636	/ Otherwise fall through. /
1637
1638	/ COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole*
1639	assertion to fail to match, without considering any more alternatives.
1640	Failing to match means the assertion is true. This is a consistent
1641	approach, but does not always have the same effect as in Perl. /*
1642
1643	case MATCH_COMMIT:
1644	case MATCH_SKIP:
1645	case MATCH_SKIP_ARG:
1646	case MATCH_PRUNE:
1647	do ecode += GET(ecode,`1`); while (*ecode == OP_ALT);
1648	goto NEG_ASSERT_TRUE; / Break out of alternation loop /
1649
1650	/ Anything else is an error /
1651
1652	default:
1653	RRETURN(rrc);
1654	}
1655
1656	/ Continue with next branch /
1657
1658	ecode += GET(ecode,`1`);
1659	}
1660	while (*ecode == OP_ALT);
1661
1662	/ All branches in the assertion failed to match. /
1663
1664	NEG_ASSERT_TRUE:
1665	if (condassert) RRETURN(MATCH_MATCH); / Condition assertion /
1666	ecode += `1` + LINK_SIZE; / Continue with current branch /
1667	continue;
1668
1669	/ Move the subject pointer back. This occurs only at the start of*
1670	each branch of a lookbehind assertion. If we are too close to the start to
1671	move back, this match function fails. When working with UTF-8 we move
1672	back a number of characters, not bytes. /*
1673
1674	case OP_REVERSE:
1675	#ifdef SUPPORT_UTF
1676	if (utf)
1677	{
1678	i = GET(ecode, `1`);
1679	while (i-- > `0`)
1680	{
1681	eptr--;
1682	if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1683	BACKCHAR(eptr);
1684	}
1685	}
1686	else
1687	#endif
1688
1689	/ No UTF-8 support, or not in UTF-8 mode: count is byte count /
1690
1691	{
1692	eptr -= GET(ecode, `1`);
1693	if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1694	}
1695
1696	/ Save the earliest consulted character, then skip to next op code /
1697
1698	if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1699	ecode += `1` + LINK_SIZE;
1700	break;
1701
1702	/ The callout item calls an external function, if one is provided, passing*
1703	details of the match so far. This is mainly for debugging, though the
1704	function is able to force a failure. /*
1705
1706	case OP_CALLOUT:
1707	if (PUBL(callout) != NULL)
1708	{
1709	PUBL(callout_block) cb;
1710	cb.version = `2`; / Version 1 of the callout block /
1711	cb.callout_number = ecode[`1`];
1712	cb.offset_vector = md->offset_vector;
1713	#if defined COMPILE_PCRE8
1714	cb.subject = (PCRE_SPTR)md->start_subject;
1715	#elif defined COMPILE_PCRE16
1716	cb.subject = (PCRE_SPTR16)md->start_subject;
1717	#elif defined COMPILE_PCRE32
1718	cb.subject = (PCRE_SPTR32)md->start_subject;
1719	#endif
1720	cb.subject_length = (int)(md->end_subject - md->start_subject);
1721	cb.start_match = (int)(mstart - md->start_subject);
1722	cb.current_position = (int)(eptr - md->start_subject);
1723	cb.pattern_position = GET(ecode, `2`);
1724	cb.next_item_length = GET(ecode, `2` + LINK_SIZE);
1725	cb.capture_top = offset_top/`2`;
1726	cb.capture_last = md->capture_last & CAPLMASK;
1727	/ Internal change requires this for API compatibility. /
1728	if (cb.capture_last == `0`) cb.capture_last = -`1`;
1729	cb.callout_data = md->callout_data;
1730	cb.mark = md->nomatch_mark;
1731	if ((rrc = (*PUBL(callout))(&cb)) > `0`) RRETURN(MATCH_NOMATCH);
1732	if (rrc < `0`) RRETURN(rrc);
1733	}
1734	ecode += `2` + `2`*LINK_SIZE;
1735	break;
1736
1737	/ Recursion either matches the current regex, or some subexpression. The*
1738	offset data is the offset to the starting bracket from the start of the
1739	whole pattern. (This is so that it works from duplicated subpatterns.)
1740
1741	The state of the capturing groups is preserved over recursion, and
1742	re-instated afterwards. We don't know how many are started and not yet
1743	finished (offset_top records the completed total) so we just have to save
1744	all the potential data. There may be up to 65535 such values, which is too
1745	large to put on the stack, but using malloc for small numbers seems
1746	expensive. As a compromise, the stack is used when there are no more than
1747	REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1748
1749	There are also other values that have to be saved. We use a chained
1750	sequence of blocks that actually live on the stack. Thanks to Robin Houston
1751	for the original version of this logic. It has, however, been hacked around
1752	a lot, so he is not to blame for the current way it works. /*
1753
1754	case OP_RECURSE:
1755	{
1756	recursion_info *ri;
1757	unsigned int recno;
1758
1759	callpat = md->start_code + GET(ecode, `1`);
1760	recno = (callpat == md->start_code)? `0` :
1761	GET2(callpat, `1` + LINK_SIZE);
1762
1763	/ Check for repeating a recursion without advancing the subject pointer.*
1764	This should catch convoluted mutual recursions. (Some simple cases are
1765	caught at compile time.) /*
1766
1767	for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1768	if (recno == ri->group_num && eptr == ri->subject_position)
1769	RRETURN(PCRE_ERROR_RECURSELOOP);
1770
1771	/ Add to "recursing stack" /
1772
1773	new_recursive.group_num = recno;
1774	new_recursive.saved_capture_last = md->capture_last;
1775	new_recursive.subject_position = eptr;
1776	new_recursive.prevrec = md->recursive;
1777	md->recursive = &new_recursive;
1778
1779	/ Where to continue from afterwards /
1780
1781	ecode += `1` + LINK_SIZE;
1782
1783	/ Now save the offset data /
1784
1785	new_recursive.saved_max = md->offset_end;
1786	if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1787	new_recursive.offset_save = stacksave;
1788	else
1789	{
1790	new_recursive.offset_save =
1791	(int )(PUBL(malloc))(new_recursive.saved_max sizeof(int));
1792	if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1793	}
1794	memcpy(new_recursive.offset_save, md->offset_vector,
1795	new_recursive.saved_max * sizeof(int));
1796
1797	/ OK, now we can do the recursion. After processing each alternative,*
1798	restore the offset data and the last captured value. If there were nested
1799	recursions, md->recursive might be changed, so reset it before looping.
1800	*/
1801
1802	DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1803	cbegroup = (*callpat >= OP_SBRA);
1804	do
1805	{
1806	if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1807	RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1808	md, eptrb, RM6);
1809	memcpy(md->offset_vector, new_recursive.offset_save,
1810	new_recursive.saved_max * sizeof(int));
1811	md->capture_last = new_recursive.saved_capture_last;
1812	md->recursive = new_recursive.prevrec;
1813	if (rrc == MATCH_MATCH \|\| rrc == MATCH_ACCEPT)
1814	{
1815	DPRINTF(("Recursion matched\n"));
1816	if (new_recursive.offset_save != stacksave)
1817	(PUBL(free))(new_recursive.offset_save);
1818
1819	/ Set where we got to in the subject, and reset the start in case*
1820	it was changed by \K. This is* propagated back out of a recursion,*
1821	for Perl compatibility. /*
1822
1823	eptr = md->end_match_ptr;
1824	mstart = md->start_match_ptr;
1825	goto RECURSION_MATCHED; / Exit loop; end processing /
1826	}
1827
1828	/ PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a*
1829	recursion; they cause a NOMATCH for the entire recursion. These codes
1830	are defined in a range that can be tested for. /*
1831
1832	if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
1833	{
1834	if (new_recursive.offset_save != stacksave)
1835	(PUBL(free))(new_recursive.offset_save);
1836	RRETURN(MATCH_NOMATCH);
1837	}
1838
1839	/ Any return code other than NOMATCH is an error. /
1840
1841	if (rrc != MATCH_NOMATCH)
1842	{
1843	DPRINTF(("Recursion gave error %d\n", rrc));
1844	if (new_recursive.offset_save != stacksave)
1845	(PUBL(free))(new_recursive.offset_save);
1846	RRETURN(rrc);
1847	}
1848
1849	md->recursive = &new_recursive;
1850	callpat += GET(callpat, `1`);
1851	}
1852	while (*callpat == OP_ALT);
1853
1854	DPRINTF(("Recursion didn't match\n"));
1855	md->recursive = new_recursive.prevrec;
1856	if (new_recursive.offset_save != stacksave)
1857	(PUBL(free))(new_recursive.offset_save);
1858	RRETURN(MATCH_NOMATCH);
1859	}
1860
1861	RECURSION_MATCHED:
1862	break;
1863
1864	/ An alternation is the end of a branch; scan along to find the end of the*
1865	bracketed group and go to there. /*
1866
1867	case OP_ALT:
1868	do ecode += GET(ecode,`1`); while (*ecode == OP_ALT);
1869	break;
1870
1871	/ BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,*
1872	indicating that it may occur zero times. It may repeat infinitely, or not
1873	at all - i.e. it could be () or ()? or even (){0} in the pattern. Brackets*
1874	with fixed upper repeat limits are compiled as a number of copies, with the
1875	optional ones preceded by BRAZERO or BRAMINZERO. /*
1876
1877	case OP_BRAZERO:
1878	next = ecode + `1`;
1879	RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1880	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1881	do next += GET(next, `1`); while (*next == OP_ALT);
1882	ecode = next + `1` + LINK_SIZE;
1883	break;
1884
1885	case OP_BRAMINZERO:
1886	next = ecode + `1`;
1887	do next += GET(next, `1`); while (*next == OP_ALT);
1888	RMATCH(eptr, next + `1`+LINK_SIZE, offset_top, md, eptrb, RM11);
1889	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1890	ecode++;
1891	break;
1892
1893	case OP_SKIPZERO:
1894	next = ecode+`1`;
1895	do next += GET(next,`1`); while (*next == OP_ALT);
1896	ecode = next + `1` + LINK_SIZE;
1897	break;
1898
1899	/ BRAPOSZERO occurs before a possessive bracket group. Don't do anything*
1900	here; just jump to the group, with allow_zero set TRUE. /*
1901
1902	case OP_BRAPOSZERO:
1903	op = *(++ecode);
1904	allow_zero = TRUE;
1905	if (op == OP_CBRAPOS \|\| op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
1906	goto POSSESSIVE_NON_CAPTURE;
1907
1908	/ End of a group, repeated or non-repeating. /
1909
1910	case OP_KET:
1911	case OP_KETRMIN:
1912	case OP_KETRMAX:
1913	case OP_KETRPOS:
1914	prev = ecode - GET(ecode, `1`);
1915
1916	/ If this was a group that remembered the subject start, in order to break*
1917	infinite repeats of empty string matches, retrieve the subject start from
1918	the chain. Otherwise, set it NULL. /*
1919
1920	if (prev >= OP_SBRA \|\| prev == OP_ONCE)
1921	{
1922	saved_eptr = eptrb->epb_saved_eptr; / Value at start of group /
1923	eptrb = eptrb->epb_prev; / Backup to previous group /
1924	}
1925	else saved_eptr = NULL;
1926
1927	/ If we are at the end of an assertion group or a non-capturing atomic*
1928	group, stop matching and return MATCH_MATCH, but record the current high
1929	water mark for use by positive assertions. We also need to record the match
1930	start in case it was changed by \K. /*
1931
1932	if ((prev >= OP_ASSERT && prev <= OP_ASSERTBACK_NOT) \|\|
1933	*prev == OP_ONCE_NC)
1934	{
1935	md->end_match_ptr = eptr; / For ONCE_NC /
1936	md->end_offset_top = offset_top;
1937	md->start_match_ptr = mstart;
1938	RRETURN(MATCH_MATCH); / Sets md->mark /
1939	}
1940
1941	/ For capturing groups we have to check the group number back at the start*
1942	and if necessary complete handling an extraction by setting the offsets and
1943	bumping the high water mark. Whole-pattern recursion is coded as a recurse
1944	into group 0, so it won't be picked up here. Instead, we catch it when the
1945	OP_END is reached. Other recursion is handled here. We just have to record
1946	the current subject position and start match pointer and give a MATCH
1947	return. /*
1948
1949	if (prev == OP_CBRA \|\| prev == OP_SCBRA \|\|
1950	prev == OP_CBRAPOS \|\| prev == OP_SCBRAPOS)
1951	{
1952	number = GET2(prev, `1`+LINK_SIZE);
1953	offset = number << `1`;
1954
1955	#ifdef PCRE_DEBUG
1956	printf("end bracket %d", number);
1957	printf("\n");
1958	#endif
1959
1960	/ Handle a recursively called group. /
1961
1962	if (md->recursive != NULL && md->recursive->group_num == number)
1963	{
1964	md->end_match_ptr = eptr;
1965	md->start_match_ptr = mstart;
1966	RRETURN(MATCH_MATCH);
1967	}
1968
1969	/ Deal with capturing /
1970
1971	md->capture_last = (md->capture_last & OVFLMASK) \| number;
1972	if (offset >= md->offset_max) md->capture_last \|= OVFLBIT; else
1973	{
1974	/ If offset is greater than offset_top, it means that we are*
1975	"skipping" a capturing group, and that group's offsets must be marked
1976	unset. In earlier versions of PCRE, all the offsets were unset at the
1977	start of matching, but this doesn't work because atomic groups and
1978	assertions can cause a value to be set that should later be unset.
1979	Example: matching /(?>(a))b\|(a)c/ against "ac". This sets group 1 as
1980	part of the atomic group, but this is not on the final matching path,
1981	so must be unset when 2 is set. (If there is no group 2, there is no
1982	problem, because offset_top will then be 2, indicating no capture.) /*
1983
1984	if (offset > offset_top)
1985	{
1986	register int *iptr = md->offset_vector + offset_top;
1987	register int *iend = md->offset_vector + offset;
1988	while (iptr < iend) *iptr++ = -`1`;
1989	}
1990
1991	/ Now make the extraction /
1992
1993	md->offset_vector[offset] =
1994	md->offset_vector[md->offset_end - number];
1995	md->offset_vector[offset+`1`] = (int)(eptr - md->start_subject);
1996	if (offset_top <= offset) offset_top = offset + `2`;
1997	}
1998	}
1999
2000	/ OP_KETRPOS is a possessive repeating ket. Remember the current position,*
2001	and return the MATCH_KETRPOS. This makes it possible to do the repeats one
2002	at a time from the outer level, thus saving stack. This must precede the
2003	empty string test - in this case that test is done at the outer level. /*
2004
2005	if (*ecode == OP_KETRPOS)
2006	{
2007	md->start_match_ptr = mstart; / In case \K reset it /
2008	md->end_match_ptr = eptr;
2009	md->end_offset_top = offset_top;
2010	RRETURN(MATCH_KETRPOS);
2011	}
2012
2013	/ For an ordinary non-repeating ket, just continue at this level. This*
2014	also happens for a repeating ket if no characters were matched in the
2015	group. This is the forcible breaking of infinite loops as implemented in
2016	Perl 5.005. For a non-repeating atomic group that includes captures,
2017	establish a backup point by processing the rest of the pattern at a lower
2018	level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
2019	original OP_ONCE level, thereby bypassing intermediate backup points, but
2020	resetting any captures that happened along the way. /*
2021
2022	if (*ecode == OP_KET \|\| eptr == saved_eptr)
2023	{
2024	if (*prev == OP_ONCE)
2025	{
2026	RMATCH(eptr, ecode + `1` + LINK_SIZE, offset_top, md, eptrb, RM12);
2027	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2028	md->once_target = prev; / Level at which to change to MATCH_NOMATCH /
2029	RRETURN(MATCH_ONCE);
2030	}
2031	ecode += `1` + LINK_SIZE; / Carry on at this level /
2032	break;
2033	}
2034
2035	/ The normal repeating kets try the rest of the pattern or restart from*
2036	the preceding bracket, in the appropriate order. In the second case, we can
2037	use tail recursion to avoid using another stack frame, unless we have an
2038	an atomic group or an unlimited repeat of a group that can match an empty
2039	string. /*
2040
2041	if (*ecode == OP_KETRMIN)
2042	{
2043	RMATCH(eptr, ecode + `1` + LINK_SIZE, offset_top, md, eptrb, RM7);
2044	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2045	if (*prev == OP_ONCE)
2046	{
2047	RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2048	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2049	md->once_target = prev; / Level at which to change to MATCH_NOMATCH /
2050	RRETURN(MATCH_ONCE);
2051	}
2052	if (prev >= OP_SBRA) /* Could match an empty string /
2053	{
2054	RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2055	RRETURN(rrc);
2056	}
2057	ecode = prev;
2058	goto TAIL_RECURSE;
2059	}
2060	else / OP_KETRMAX /
2061	{
2062	RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2063	if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
2064	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2065	if (*prev == OP_ONCE)
2066	{
2067	RMATCH(eptr, ecode + `1` + LINK_SIZE, offset_top, md, eptrb, RM9);
2068	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2069	md->once_target = prev;
2070	RRETURN(MATCH_ONCE);
2071	}
2072	ecode += `1` + LINK_SIZE;
2073	goto TAIL_RECURSE;
2074	}
2075	/ Control never gets here /
2076
2077	/ Not multiline mode: start of subject assertion, unless notbol. /
2078
2079	case OP_CIRC:
2080	if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2081
2082	/ Start of subject assertion /
2083
2084	case OP_SOD:
2085	if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2086	ecode++;
2087	break;
2088
2089	/ Multiline mode: start of subject unless notbol, or after any newline. /
2090
2091	case OP_CIRCM:
2092	if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
2093	if (eptr != md->start_subject &&
2094	(eptr == md->end_subject \|\| !WAS_NEWLINE(eptr)))
2095	RRETURN(MATCH_NOMATCH);
2096	ecode++;
2097	break;
2098
2099	/ Start of match assertion /
2100
2101	case OP_SOM:
2102	if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2103	ecode++;
2104	break;
2105
2106	/ Reset the start of match point /
2107
2108	case OP_SET_SOM:
2109	mstart = eptr;
2110	ecode++;
2111	break;
2112
2113	/ Multiline mode: assert before any newline, or before end of subject*
2114	unless noteol is set. /*
2115
2116	case OP_DOLLM:
2117	if (eptr < md->end_subject)
2118	{
2119	if (!IS_NEWLINE(eptr))
2120	{
2121	if (md->partial != `0` &&
2122	eptr + `1` >= md->end_subject &&
2123	NLBLOCK->nltype == NLTYPE_FIXED &&
2124	NLBLOCK->nllen == `2` &&
2125	UCHAR21TEST(eptr) == NLBLOCK->nl[`0`])
2126	{
2127	md->hitend = TRUE;
2128	if (md->partial > `1`) RRETURN(PCRE_ERROR_PARTIAL);
2129	}
2130	RRETURN(MATCH_NOMATCH);
2131	}
2132	}
2133	else
2134	{
2135	if (md->noteol) RRETURN(MATCH_NOMATCH);
2136	SCHECK_PARTIAL();
2137	}
2138	ecode++;
2139	break;
2140
2141	/ Not multiline mode: assert before a terminating newline or before end of*
2142	subject unless noteol is set. /*
2143
2144	case OP_DOLL:
2145	if (md->noteol) RRETURN(MATCH_NOMATCH);
2146	if (!md->endonly) goto ASSERT_NL_OR_EOS;
2147
2148	/ ... else fall through for endonly /
2149
2150	/ End of subject assertion (\z) /
2151
2152	case OP_EOD:
2153	if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2154	SCHECK_PARTIAL();
2155	ecode++;
2156	break;
2157
2158	/ End of subject or ending \n assertion (\Z) /
2159
2160	case OP_EODN:
2161	ASSERT_NL_OR_EOS:
2162	if (eptr < md->end_subject &&
2163	(!IS_NEWLINE(eptr) \|\| eptr != md->end_subject - md->nllen))
2164	{
2165	if (md->partial != `0` &&
2166	eptr + `1` >= md->end_subject &&
2167	NLBLOCK->nltype == NLTYPE_FIXED &&
2168	NLBLOCK->nllen == `2` &&
2169	UCHAR21TEST(eptr) == NLBLOCK->nl[`0`])
2170	{
2171	md->hitend = TRUE;
2172	if (md->partial > `1`) RRETURN(PCRE_ERROR_PARTIAL);
2173	}
2174	RRETURN(MATCH_NOMATCH);
2175	}
2176
2177	/ Either at end of string or \n before end. /
2178
2179	SCHECK_PARTIAL();
2180	ecode++;
2181	break;
2182
2183	/ Word boundary assertions /
2184
2185	case OP_NOT_WORD_BOUNDARY:
2186	case OP_WORD_BOUNDARY:
2187	{
2188
2189	/ Find out if the previous and current characters are "word" characters.*
2190	It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2191	be "non-word" characters. Remember the earliest consulted character for
2192	partial matching. /*
2193
2194	#ifdef SUPPORT_UTF
2195	if (utf)
2196	{
2197	/ Get status of previous character /
2198
2199	if (eptr == md->start_subject) prev_is_word = FALSE; else
2200	{
2201	PCRE_PUCHAR lastptr = eptr - `1`;
2202	BACKCHAR(lastptr);
2203	if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2204	GETCHAR(c, lastptr);
2205	#ifdef SUPPORT_UCP
2206	if (md->use_ucp)
2207	{
2208	if (c == `'_'`) prev_is_word = TRUE; else
2209	{
2210	int cat = UCD_CATEGORY(c);
2211	prev_is_word = (cat == ucp_L \|\| cat == ucp_N);
2212	}
2213	}
2214	else
2215	#endif
2216	prev_is_word = c < `256` && (md->ctypes[c] & ctype_word) != `0`;
2217	}
2218
2219	/ Get status of next character /
2220
2221	if (eptr >= md->end_subject)
2222	{
2223	SCHECK_PARTIAL();
2224	cur_is_word = FALSE;
2225	}
2226	else
2227	{
2228	GETCHAR(c, eptr);
2229	#ifdef SUPPORT_UCP
2230	if (md->use_ucp)
2231	{
2232	if (c == `'_'`) cur_is_word = TRUE; else
2233	{
2234	int cat = UCD_CATEGORY(c);
2235	cur_is_word = (cat == ucp_L \|\| cat == ucp_N);
2236	}
2237	}
2238	else
2239	#endif
2240	cur_is_word = c < `256` && (md->ctypes[c] & ctype_word) != `0`;
2241	}
2242	}
2243	else
2244	#endif
2245
2246	/ Not in UTF-8 mode, but we may still have PCRE_UCP set, and for*
2247	consistency with the behaviour of \w we do use it in this case. /*
2248
2249	{
2250	/ Get status of previous character /
2251
2252	if (eptr == md->start_subject) prev_is_word = FALSE; else
2253	{
2254	if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - `1`;
2255	#ifdef SUPPORT_UCP
2256	if (md->use_ucp)
2257	{
2258	c = eptr[-`1`];
2259	if (c == `'_'`) prev_is_word = TRUE; else
2260	{
2261	int cat = UCD_CATEGORY(c);
2262	prev_is_word = (cat == ucp_L \|\| cat == ucp_N);
2263	}
2264	}
2265	else
2266	#endif
2267	prev_is_word = MAX_255(eptr[-`1`])
2268	&& ((md->ctypes[eptr[-`1`]] & ctype_word) != `0`);
2269	}
2270
2271	/ Get status of next character /
2272
2273	if (eptr >= md->end_subject)
2274	{
2275	SCHECK_PARTIAL();
2276	cur_is_word = FALSE;
2277	}
2278	else
2279	#ifdef SUPPORT_UCP
2280	if (md->use_ucp)
2281	{
2282	c = *eptr;
2283	if (c == `'_'`) cur_is_word = TRUE; else
2284	{
2285	int cat = UCD_CATEGORY(c);
2286	cur_is_word = (cat == ucp_L \|\| cat == ucp_N);
2287	}
2288	}
2289	else
2290	#endif
2291	cur_is_word = MAX_255(*eptr)
2292	&& ((md->ctypes[*eptr] & ctype_word) != `0`);
2293	}
2294
2295	/ Now see if the situation is what we want /
2296
2297	if ((*ecode++ == OP_WORD_BOUNDARY)?
2298	cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2299	RRETURN(MATCH_NOMATCH);
2300	}
2301	break;
2302
2303	/ Match any single character type except newline; have to take care with*
2304	CRLF newlines and partial matching. /*
2305
2306	case OP_ANY:
2307	if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2308	if (md->partial != `0` &&
2309	eptr + `1` >= md->end_subject &&
2310	NLBLOCK->nltype == NLTYPE_FIXED &&
2311	NLBLOCK->nllen == `2` &&
2312	UCHAR21TEST(eptr) == NLBLOCK->nl[`0`])
2313	{
2314	md->hitend = TRUE;
2315	if (md->partial > `1`) RRETURN(PCRE_ERROR_PARTIAL);
2316	}
2317
2318	/ Fall through /
2319
2320	/ Match any single character whatsoever. /
2321
2322	case OP_ALLANY:
2323	if (eptr >= md->end_subject) / DO NOT merge the eptr++ here; it must /
2324	{ / not be updated before SCHECK_PARTIAL. /
2325	SCHECK_PARTIAL();
2326	RRETURN(MATCH_NOMATCH);
2327	}
2328	eptr++;
2329	#ifdef SUPPORT_UTF
2330	if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2331	#endif
2332	ecode++;
2333	break;
2334
2335	/ Match a single byte, even in UTF-8 mode. This opcode really does match*
2336	any byte, even newline, independent of the setting of PCRE_DOTALL. /*
2337
2338	case OP_ANYBYTE:
2339	if (eptr >= md->end_subject) / DO NOT merge the eptr++ here; it must /
2340	{ / not be updated before SCHECK_PARTIAL. /
2341	SCHECK_PARTIAL();
2342	RRETURN(MATCH_NOMATCH);
2343	}
2344	eptr++;
2345	ecode++;
2346	break;
2347
2348	case OP_NOT_DIGIT:
2349	if (eptr >= md->end_subject)
2350	{
2351	SCHECK_PARTIAL();
2352	RRETURN(MATCH_NOMATCH);
2353	}
2354	GETCHARINCTEST(c, eptr);
2355	if (
2356	#if defined SUPPORT_UTF \|\| !(defined COMPILE_PCRE8)
2357	c < `256` &&
2358	#endif
2359	(md->ctypes[c] & ctype_digit) != `0`
2360	)
2361	RRETURN(MATCH_NOMATCH);
2362	ecode++;
2363	break;
2364
2365	case OP_DIGIT:
2366	if (eptr >= md->end_subject)
2367	{
2368	SCHECK_PARTIAL();
2369	RRETURN(MATCH_NOMATCH);
2370	}
2371	GETCHARINCTEST(c, eptr);
2372	if (
2373	#if defined SUPPORT_UTF \|\| !(defined COMPILE_PCRE8)
2374	c > `255` \|\|
2375	#endif
2376	(md->ctypes[c] & ctype_digit) == `0`
2377	)
2378	RRETURN(MATCH_NOMATCH);
2379	ecode++;
2380	break;
2381
2382	case OP_NOT_WHITESPACE:
2383	if (eptr >= md->end_subject)
2384	{
2385	SCHECK_PARTIAL();
2386	RRETURN(MATCH_NOMATCH);
2387	}
2388	GETCHARINCTEST(c, eptr);
2389	if (
2390	#if defined SUPPORT_UTF \|\| !(defined COMPILE_PCRE8)
2391	c < `256` &&
2392	#endif
2393	(md->ctypes[c] & ctype_space) != `0`
2394	)
2395	RRETURN(MATCH_NOMATCH);
2396	ecode++;
2397	break;
2398
2399	case OP_WHITESPACE:
2400	if (eptr >= md->end_subject)
2401	{
2402	SCHECK_PARTIAL();
2403	RRETURN(MATCH_NOMATCH);
2404	}
2405	GETCHARINCTEST(c, eptr);
2406	if (
2407	#if defined SUPPORT_UTF \|\| !(defined COMPILE_PCRE8)
2408	c > `255` \|\|
2409	#endif
2410	(md->ctypes[c] & ctype_space) == `0`
2411	)
2412	RRETURN(MATCH_NOMATCH);
2413	ecode++;
2414	break;
2415
2416	case OP_NOT_WORDCHAR:
2417	if (eptr >= md->end_subject)
2418	{
2419	SCHECK_PARTIAL();
2420	RRETURN(MATCH_NOMATCH);
2421	}
2422	GETCHARINCTEST(c, eptr);
2423	if (
2424	#if defined SUPPORT_UTF \|\| !(defined COMPILE_PCRE8)
2425	c < `256` &&
2426	#endif
2427	(md->ctypes[c] & ctype_word) != `0`
2428	)
2429	RRETURN(MATCH_NOMATCH);
2430	ecode++;
2431	break;
2432
2433	case OP_WORDCHAR:
2434	if (eptr >= md->end_subject)
2435	{
2436	SCHECK_PARTIAL();
2437	RRETURN(MATCH_NOMATCH);
2438	}
2439	GETCHARINCTEST(c, eptr);
2440	if (
2441	#if defined SUPPORT_UTF \|\| !(defined COMPILE_PCRE8)
2442	c > `255` \|\|
2443	#endif
2444	(md->ctypes[c] & ctype_word) == `0`
2445	)
2446	RRETURN(MATCH_NOMATCH);
2447	ecode++;
2448	break;
2449
2450	case OP_ANYNL:
2451	if (eptr >= md->end_subject)
2452	{
2453	SCHECK_PARTIAL();
2454	RRETURN(MATCH_NOMATCH);
2455	}
2456	GETCHARINCTEST(c, eptr);
2457	switch(c)
2458	{
2459	default: RRETURN(MATCH_NOMATCH);
2460
2461	case CHAR_CR:
2462	if (eptr >= md->end_subject)
2463	{
2464	SCHECK_PARTIAL();
2465	}
2466	else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
2467	break;
2468
2469	case CHAR_LF:
2470	break;
2471
2472	case CHAR_VT:
2473	case CHAR_FF:
2474	case CHAR_NEL:
2475	#ifndef EBCDIC
2476	case `0x2028`:
2477	case `0x2029`:
2478	#endif /* Not EBCDIC */
2479	if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2480	break;
2481	}
2482	ecode++;
2483	break;
2484
2485	case OP_NOT_HSPACE:
2486	if (eptr >= md->end_subject)
2487	{
2488	SCHECK_PARTIAL();
2489	RRETURN(MATCH_NOMATCH);
2490	}
2491	GETCHARINCTEST(c, eptr);
2492	switch(c)
2493	{
2494	HSPACE_CASES: RRETURN(MATCH_NOMATCH); / Byte and multibyte cases /
2495	default: break;
2496	}
2497	ecode++;
2498	break;
2499
2500	case OP_HSPACE:
2501	if (eptr >= md->end_subject)
2502	{
2503	SCHECK_PARTIAL();
2504	RRETURN(MATCH_NOMATCH);
2505	}
2506	GETCHARINCTEST(c, eptr);
2507	switch(c)
2508	{
2509	HSPACE_CASES: break; / Byte and multibyte cases /
2510	default: RRETURN(MATCH_NOMATCH);
2511	}
2512	ecode++;
2513	break;
2514
2515	case OP_NOT_VSPACE:
2516	if (eptr >= md->end_subject)
2517	{
2518	SCHECK_PARTIAL();
2519	RRETURN(MATCH_NOMATCH);
2520	}
2521	GETCHARINCTEST(c, eptr);
2522	switch(c)
2523	{
2524	VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2525	default: break;
2526	}
2527	ecode++;
2528	break;
2529
2530	case OP_VSPACE:
2531	if (eptr >= md->end_subject)
2532	{
2533	SCHECK_PARTIAL();
2534	RRETURN(MATCH_NOMATCH);
2535	}
2536	GETCHARINCTEST(c, eptr);
2537	switch(c)
2538	{
2539	VSPACE_CASES: break;
2540	default: RRETURN(MATCH_NOMATCH);
2541	}
2542	ecode++;
2543	break;
2544
2545	#ifdef SUPPORT_UCP
2546	/ Check the next character by Unicode property. We will get here only*
2547	if the support is in the binary; otherwise a compile-time error occurs. /*
2548
2549	case OP_PROP:
2550	case OP_NOTPROP:
2551	if (eptr >= md->end_subject)
2552	{
2553	SCHECK_PARTIAL();
2554	RRETURN(MATCH_NOMATCH);
2555	}
2556	GETCHARINCTEST(c, eptr);
2557	{
2558	const pcre_uint32 *cp;
2559	const ucd_record *prop = GET_UCD(c);
2560
2561	switch(ecode[`1`])
2562	{
2563	case PT_ANY:
2564	if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2565	break;
2566
2567	case PT_LAMP:
2568	if ((prop->chartype == ucp_Lu \|\|
2569	prop->chartype == ucp_Ll \|\|
2570	prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2571	RRETURN(MATCH_NOMATCH);
2572	break;
2573
2574	case PT_GC:
2575	if ((ecode[`2`] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2576	RRETURN(MATCH_NOMATCH);
2577	break;
2578
2579	case PT_PC:
2580	if ((ecode[`2`] != prop->chartype) == (op == OP_PROP))
2581	RRETURN(MATCH_NOMATCH);
2582	break;
2583
2584	case PT_SC:
2585	if ((ecode[`2`] != prop->script) == (op == OP_PROP))
2586	RRETURN(MATCH_NOMATCH);
2587	break;
2588
2589	/ These are specials /
2590
2591	case PT_ALNUM:
2592	if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
2593	PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2594	RRETURN(MATCH_NOMATCH);
2595	break;
2596
2597	/ Perl space used to exclude VT, but from Perl 5.18 it is included,*
2598	which means that Perl space and POSIX space are now identical. PCRE
2599	was changed at release 8.34. /*
2600
2601	case PT_SPACE: / Perl space /
2602	case PT_PXSPACE: / POSIX space /
2603	switch(c)
2604	{
2605	HSPACE_CASES:
2606	VSPACE_CASES:
2607	if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2608	break;
2609
2610	default:
2611	if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
2612	(op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
2613	break;
2614	}
2615	break;
2616
2617	case PT_WORD:
2618	if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
2619	PRIV(ucp_gentype)[prop->chartype] == ucp_N \|\|
2620	c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2621	RRETURN(MATCH_NOMATCH);
2622	break;
2623
2624	case PT_CLIST:
2625	cp = PRIV(ucd_caseless_sets) + ecode[`2`];
2626	for (;;)
2627	{
2628	if (c < *cp)
2629	{ if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2630	if (c == *cp++)
2631	{ if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2632	}
2633	break;
2634
2635	case PT_UCNC:
2636	if ((c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
2637	c == CHAR_GRAVE_ACCENT \|\| (c >= `0xa0` && c <= `0xd7ff`) \|\|
2638	c >= `0xe000`) == (op == OP_NOTPROP))
2639	RRETURN(MATCH_NOMATCH);
2640	break;
2641
2642	/ This should never occur /
2643
2644	default:
2645	RRETURN(PCRE_ERROR_INTERNAL);
2646	}
2647
2648	ecode += `3`;
2649	}
2650	break;
2651
2652	/ Match an extended Unicode sequence. We will get here only if the support*
2653	is in the binary; otherwise a compile-time error occurs. /*
2654
2655	case OP_EXTUNI:
2656	if (eptr >= md->end_subject)
2657	{
2658	SCHECK_PARTIAL();
2659	RRETURN(MATCH_NOMATCH);
2660	}
2661	else
2662	{
2663	int lgb, rgb;
2664	GETCHARINCTEST(c, eptr);
2665	lgb = UCD_GRAPHBREAK(c);
2666	while (eptr < md->end_subject)
2667	{
2668	int len = `1`;
2669	if (!utf) c = eptr; else* { GETCHARLEN(c, eptr, len); }
2670	rgb = UCD_GRAPHBREAK(c);
2671	if ((PRIV(ucp_gbtable)[lgb] & (`1` << rgb)) == `0`) break;
2672	lgb = rgb;
2673	eptr += len;
2674	}
2675	}
2676	CHECK_PARTIAL();
2677	ecode++;
2678	break;
2679	#endif /* SUPPORT_UCP */
2680
2681
2682	/ Match a back reference, possibly repeatedly. Look past the end of the*
2683	item to see if there is repeat information following. The code is similar
2684	to that for character classes, but repeated for efficiency. Then obey
2685	similar code to character type repeats - written out again for speed.
2686	However, if the referenced string is the empty string, always treat
2687	it as matched, any number of times (otherwise there could be infinite
2688	loops). If the reference is unset, there are two possibilities:
2689
2690	(a) In the default, Perl-compatible state, set the length negative;
2691	this ensures that every attempt at a match fails. We can't just fail
2692	here, because of the possibility of quantifiers with zero minima.
2693
2694	(b) If the JavaScript compatibility flag is set, set the length to zero
2695	so that the back reference matches an empty string.
2696
2697	Otherwise, set the length to the length of what was matched by the
2698	referenced subpattern.
2699
2700	The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
2701	or to a non-duplicated named group. For a duplicated named group, OP_DNREF
2702	and OP_DNREFI are used. In this case we must scan the list of groups to
2703	which the name refers, and use the first one that is set. /*
2704
2705	case OP_DNREF:
2706	case OP_DNREFI:
2707	caseless = op == OP_DNREFI;
2708	{
2709	int count = GET2(ecode, `1`+IMM2_SIZE);
2710	pcre_uchar slot = md->name_table + GET2(ecode, `1`) md->name_entry_size;
2711	ecode += `1` + `2`*IMM2_SIZE;
2712
2713	/ Setting the default length first and initializing 'offset' avoids*
2714	compiler warnings in the REF_REPEAT code. /*
2715
2716	length = (md->jscript_compat)? `0` : -`1`;
2717	offset = `0`;
2718
2719	while (count-- > `0`)
2720	{
2721	offset = GET2(slot, `0`) << `1`;
2722	if (offset < offset_top && md->offset_vector[offset] >= `0`)
2723	{
2724	length = md->offset_vector[offset+`1`] - md->offset_vector[offset];
2725	break;
2726	}
2727	slot += md->name_entry_size;
2728	}
2729	}
2730	goto REF_REPEAT;
2731
2732	case OP_REF:
2733	case OP_REFI:
2734	caseless = op == OP_REFI;
2735	offset = GET2(ecode, `1`) << `1`; / Doubled ref number /
2736	ecode += `1` + IMM2_SIZE;
2737	if (offset >= offset_top \|\| md->offset_vector[offset] < `0`)
2738	length = (md->jscript_compat)? `0` : -`1`;
2739	else
2740	length = md->offset_vector[offset+`1`] - md->offset_vector[offset];
2741
2742	/ Set up for repetition, or handle the non-repeated case /
2743
2744	REF_REPEAT:
2745	switch (*ecode)
2746	{
2747	case OP_CRSTAR:
2748	case OP_CRMINSTAR:
2749	case OP_CRPLUS:
2750	case OP_CRMINPLUS:
2751	case OP_CRQUERY:
2752	case OP_CRMINQUERY:
2753	c = *ecode++ - OP_CRSTAR;
2754	minimize = (c & `1`) != `0`;
2755	min = rep_min[c]; / Pick up values from tables; /
2756	max = rep_max[c]; / zero for max => infinity /
2757	if (max == `0`) max = INT_MAX;
2758	break;
2759
2760	case OP_CRRANGE:
2761	case OP_CRMINRANGE:
2762	minimize = (*ecode == OP_CRMINRANGE);
2763	min = GET2(ecode, `1`);
2764	max = GET2(ecode, `1` + IMM2_SIZE);
2765	if (max == `0`) max = INT_MAX;
2766	ecode += `1` + `2` * IMM2_SIZE;
2767	break;
2768
2769	default: / No repeat follows /
2770	if ((length = match_ref(offset, eptr, length, md, caseless)) < `0`)
2771	{
2772	if (length == -`2`) eptr = md->end_subject; / Partial match /
2773	CHECK_PARTIAL();
2774	RRETURN(MATCH_NOMATCH);
2775	}
2776	eptr += length;
2777	continue; / With the main loop /
2778	}
2779
2780	/ Handle repeated back references. If the length of the reference is*
2781	zero, just continue with the main loop. If the length is negative, it
2782	means the reference is unset in non-Java-compatible mode. If the minimum is
2783	zero, we can continue at the same level without recursion. For any other
2784	minimum, carrying on will result in NOMATCH. /*
2785
2786	if (length == `0`) continue;
2787	if (length < `0` && min == `0`) continue;
2788
2789	/ First, ensure the minimum number of matches are present. We get back*
2790	the length of the reference string explicitly rather than passing the
2791	address of eptr, so that eptr can be a register variable. /*
2792
2793	for (i = `1`; i <= min; i++)
2794	{
2795	int slength;
2796	if ((slength = match_ref(offset, eptr, length, md, caseless)) < `0`)
2797	{
2798	if (slength == -`2`) eptr = md->end_subject; / Partial match /
2799	CHECK_PARTIAL();
2800	RRETURN(MATCH_NOMATCH);
2801	}
2802	eptr += slength;
2803	}
2804
2805	/ If min = max, continue at the same level without recursion.*
2806	They are not both allowed to be zero. /*
2807
2808	if (min == max) continue;
2809
2810	/ If minimizing, keep trying and advancing the pointer /
2811
2812	if (minimize)
2813	{
2814	for (fi = min;; fi++)
2815	{
2816	int slength;
2817	RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2818	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2819	if (fi >= max) RRETURN(MATCH_NOMATCH);
2820	if ((slength = match_ref(offset, eptr, length, md, caseless)) < `0`)
2821	{
2822	if (slength == -`2`) eptr = md->end_subject; / Partial match /
2823	CHECK_PARTIAL();
2824	RRETURN(MATCH_NOMATCH);
2825	}
2826	eptr += slength;
2827	}
2828	/ Control never gets here /
2829	}
2830
2831	/ If maximizing, find the longest string and work backwards /
2832
2833	else
2834	{
2835	pp = eptr;
2836	for (i = min; i < max; i++)
2837	{
2838	int slength;
2839	if ((slength = match_ref(offset, eptr, length, md, caseless)) < `0`)
2840	{
2841	/ Can't use CHECK_PARTIAL because we don't want to update eptr in*
2842	the soft partial matching case. /*
2843
2844	if (slength == -`2` && md->partial != `0` &&
2845	md->end_subject > md->start_used_ptr)
2846	{
2847	md->hitend = TRUE;
2848	if (md->partial > `1`) RRETURN(PCRE_ERROR_PARTIAL);
2849	}
2850	break;
2851	}
2852	eptr += slength;
2853	}
2854
2855	while (eptr >= pp)
2856	{
2857	RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2858	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2859	eptr -= length;
2860	}
2861	RRETURN(MATCH_NOMATCH);
2862	}
2863	/ Control never gets here /
2864
2865	/ Match a bit-mapped character class, possibly repeatedly. This op code is*
2866	used when all the characters in the class have values in the range 0-255,
2867	and either the matching is caseful, or the characters are in the range
2868	0-127 when UTF-8 processing is enabled. The only difference between
2869	OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2870	encountered.
2871
2872	First, look past the end of the item to see if there is repeat information
2873	following. Then obey similar code to character type repeats - written out
2874	again for speed. /*
2875
2876	case OP_NCLASS:
2877	case OP_CLASS:
2878	{
2879	/ The data variable is saved across frames, so the byte map needs to*
2880	be stored there. /*
2881	#define BYTE_MAP ((pcre_uint8 *)data)
2882	data = ecode + `1`; / Save for matching /
2883	ecode += `1` + (`32` / sizeof(pcre_uchar)); / Advance past the item /
2884
2885	switch (*ecode)
2886	{
2887	case OP_CRSTAR:
2888	case OP_CRMINSTAR:
2889	case OP_CRPLUS:
2890	case OP_CRMINPLUS:
2891	case OP_CRQUERY:
2892	case OP_CRMINQUERY:
2893	case OP_CRPOSSTAR:
2894	case OP_CRPOSPLUS:
2895	case OP_CRPOSQUERY:
2896	c = *ecode++ - OP_CRSTAR;
2897	if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & `1`) != `0`;
2898	else possessive = TRUE;
2899	min = rep_min[c]; / Pick up values from tables; /
2900	max = rep_max[c]; / zero for max => infinity /
2901	if (max == `0`) max = INT_MAX;
2902	break;
2903
2904	case OP_CRRANGE:
2905	case OP_CRMINRANGE:
2906	case OP_CRPOSRANGE:
2907	minimize = (*ecode == OP_CRMINRANGE);
2908	possessive = (*ecode == OP_CRPOSRANGE);
2909	min = GET2(ecode, `1`);
2910	max = GET2(ecode, `1` + IMM2_SIZE);
2911	if (max == `0`) max = INT_MAX;
2912	ecode += `1` + `2` * IMM2_SIZE;
2913	break;
2914
2915	default: / No repeat follows /
2916	min = max = `1`;
2917	break;
2918	}
2919
2920	/ First, ensure the minimum number of matches are present. /
2921
2922	#ifdef SUPPORT_UTF
2923	if (utf)
2924	{
2925	for (i = `1`; i <= min; i++)
2926	{
2927	if (eptr >= md->end_subject)
2928	{
2929	SCHECK_PARTIAL();
2930	RRETURN(MATCH_NOMATCH);
2931	}
2932	GETCHARINC(c, eptr);
2933	if (c > `255`)
2934	{
2935	if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2936	}
2937	else
2938	if ((BYTE_MAP[c/`8`] & (`1` << (c&`7`))) == `0`) RRETURN(MATCH_NOMATCH);
2939	}
2940	}
2941	else
2942	#endif
2943	/ Not UTF mode /
2944	{
2945	for (i = `1`; i <= min; i++)
2946	{
2947	if (eptr >= md->end_subject)
2948	{
2949	SCHECK_PARTIAL();
2950	RRETURN(MATCH_NOMATCH);
2951	}
2952	c = *eptr++;
2953	#ifndef COMPILE_PCRE8
2954	if (c > `255`)
2955	{
2956	if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2957	}
2958	else
2959	#endif
2960	if ((BYTE_MAP[c/`8`] & (`1` << (c&`7`))) == `0`) RRETURN(MATCH_NOMATCH);
2961	}
2962	}
2963
2964	/ If max == min we can continue with the main loop without the*
2965	need to recurse. /*
2966
2967	if (min == max) continue;
2968
2969	/ If minimizing, keep testing the rest of the expression and advancing*
2970	the pointer while it matches the class. /*
2971
2972	if (minimize)
2973	{
2974	#ifdef SUPPORT_UTF
2975	if (utf)
2976	{
2977	for (fi = min;; fi++)
2978	{
2979	RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2980	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2981	if (fi >= max) RRETURN(MATCH_NOMATCH);
2982	if (eptr >= md->end_subject)
2983	{
2984	SCHECK_PARTIAL();
2985	RRETURN(MATCH_NOMATCH);
2986	}
2987	GETCHARINC(c, eptr);
2988	if (c > `255`)
2989	{
2990	if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2991	}
2992	else
2993	if ((BYTE_MAP[c/`8`] & (`1` << (c&`7`))) == `0`) RRETURN(MATCH_NOMATCH);
2994	}
2995	}
2996	else
2997	#endif
2998	/ Not UTF mode /
2999	{
3000	for (fi = min;; fi++)
3001	{
3002	RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
3003	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3004	if (fi >= max) RRETURN(MATCH_NOMATCH);
3005	if (eptr >= md->end_subject)
3006	{
3007	SCHECK_PARTIAL();
3008	RRETURN(MATCH_NOMATCH);
3009	}
3010	c = *eptr++;
3011	#ifndef COMPILE_PCRE8
3012	if (c > `255`)
3013	{
3014	if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
3015	}
3016	else
3017	#endif
3018	if ((BYTE_MAP[c/`8`] & (`1` << (c&`7`))) == `0`) RRETURN(MATCH_NOMATCH);
3019	}
3020	}
3021	/ Control never gets here /
3022	}
3023
3024	/ If maximizing, find the longest possible run, then work backwards. /
3025
3026	else
3027	{
3028	pp = eptr;
3029
3030	#ifdef SUPPORT_UTF
3031	if (utf)
3032	{
3033	for (i = min; i < max; i++)
3034	{
3035	int len = `1`;
3036	if (eptr >= md->end_subject)
3037	{
3038	SCHECK_PARTIAL();
3039	break;
3040	}
3041	GETCHARLEN(c, eptr, len);
3042	if (c > `255`)
3043	{
3044	if (op == OP_CLASS) break;
3045	}
3046	else
3047	if ((BYTE_MAP[c/`8`] & (`1` << (c&`7`))) == `0`) break;
3048	eptr += len;
3049	}
3050
3051	if (possessive) continue; / No backtracking /
3052
3053	for (;;)
3054	{
3055	RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
3056	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3057	if (eptr-- == pp) break; / Stop if tried at original pos /
3058	BACKCHAR(eptr);
3059	}
3060	}
3061	else
3062	#endif
3063	/ Not UTF mode /
3064	{
3065	for (i = min; i < max; i++)
3066	{
3067	if (eptr >= md->end_subject)
3068	{
3069	SCHECK_PARTIAL();
3070	break;
3071	}
3072	c = *eptr;
3073	#ifndef COMPILE_PCRE8
3074	if (c > `255`)
3075	{
3076	if (op == OP_CLASS) break;
3077	}
3078	else
3079	#endif
3080	if ((BYTE_MAP[c/`8`] & (`1` << (c&`7`))) == `0`) break;
3081	eptr++;
3082	}
3083
3084	if (possessive) continue; / No backtracking /
3085
3086	while (eptr >= pp)
3087	{
3088	RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
3089	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3090	eptr--;
3091	}
3092	}
3093
3094	RRETURN(MATCH_NOMATCH);
3095	}
3096	#undef BYTE_MAP
3097	}
3098	/ Control never gets here /
3099
3100
3101	/ Match an extended character class. In the 8-bit library, this opcode is*
3102	encountered only when UTF-8 mode mode is supported. In the 16-bit and
3103	32-bit libraries, codepoints greater than 255 may be encountered even when
3104	UTF is not supported. /*
3105
3106	#if defined SUPPORT_UTF \|\| !defined COMPILE_PCRE8
3107	case OP_XCLASS:
3108	{
3109	data = ecode + `1` + LINK_SIZE; / Save for matching /
3110	ecode += GET(ecode, `1`); / Advance past the item /
3111
3112	switch (*ecode)
3113	{
3114	case OP_CRSTAR:
3115	case OP_CRMINSTAR:
3116	case OP_CRPLUS:
3117	case OP_CRMINPLUS:
3118	case OP_CRQUERY:
3119	case OP_CRMINQUERY:
3120	case OP_CRPOSSTAR:
3121	case OP_CRPOSPLUS:
3122	case OP_CRPOSQUERY:
3123	c = *ecode++ - OP_CRSTAR;
3124	if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & `1`) != `0`;
3125	else possessive = TRUE;
3126	min = rep_min[c]; / Pick up values from tables; /
3127	max = rep_max[c]; / zero for max => infinity /
3128	if (max == `0`) max = INT_MAX;
3129	break;
3130
3131	case OP_CRRANGE:
3132	case OP_CRMINRANGE:
3133	case OP_CRPOSRANGE:
3134	minimize = (*ecode == OP_CRMINRANGE);
3135	possessive = (*ecode == OP_CRPOSRANGE);
3136	min = GET2(ecode, `1`);
3137	max = GET2(ecode, `1` + IMM2_SIZE);
3138	if (max == `0`) max = INT_MAX;
3139	ecode += `1` + `2` * IMM2_SIZE;
3140	break;
3141
3142	default: / No repeat follows /
3143	min = max = `1`;
3144	break;
3145	}
3146
3147	/ First, ensure the minimum number of matches are present. /
3148
3149	for (i = `1`; i <= min; i++)
3150	{
3151	if (eptr >= md->end_subject)
3152	{
3153	SCHECK_PARTIAL();
3154	RRETURN(MATCH_NOMATCH);
3155	}
3156	GETCHARINCTEST(c, eptr);
3157	if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3158	}
3159
3160	/ If max == min we can continue with the main loop without the*
3161	need to recurse. /*
3162
3163	if (min == max) continue;
3164
3165	/ If minimizing, keep testing the rest of the expression and advancing*
3166	the pointer while it matches the class. /*
3167
3168	if (minimize)
3169	{
3170	for (fi = min;; fi++)
3171	{
3172	RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3173	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3174	if (fi >= max) RRETURN(MATCH_NOMATCH);
3175	if (eptr >= md->end_subject)
3176	{
3177	SCHECK_PARTIAL();
3178	RRETURN(MATCH_NOMATCH);
3179	}
3180	GETCHARINCTEST(c, eptr);
3181	if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3182	}
3183	/ Control never gets here /
3184	}
3185
3186	/ If maximizing, find the longest possible run, then work backwards. /
3187
3188	else
3189	{
3190	pp = eptr;
3191	for (i = min; i < max; i++)
3192	{
3193	int len = `1`;
3194	if (eptr >= md->end_subject)
3195	{
3196	SCHECK_PARTIAL();
3197	break;
3198	}
3199	#ifdef SUPPORT_UTF
3200	GETCHARLENTEST(c, eptr, len);
3201	#else
3202	c = *eptr;
3203	#endif
3204	if (!PRIV(xclass)(c, data, utf)) break;
3205	eptr += len;
3206	}
3207
3208	if (possessive) continue; / No backtracking /
3209
3210	for(;;)
3211	{
3212	RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3213	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3214	if (eptr-- == pp) break; / Stop if tried at original pos /
3215	#ifdef SUPPORT_UTF
3216	if (utf) BACKCHAR(eptr);
3217	#endif
3218	}
3219	RRETURN(MATCH_NOMATCH);
3220	}
3221
3222	/ Control never gets here /
3223	}
3224	#endif /* End of XCLASS */
3225
3226	/ Match a single character, casefully /
3227
3228	case OP_CHAR:
3229	#ifdef SUPPORT_UTF
3230	if (utf)
3231	{
3232	length = `1`;
3233	ecode++;
3234	GETCHARLEN(fc, ecode, length);
3235	if (length > md->end_subject - eptr)
3236	{
3237	CHECK_PARTIAL(); / Not SCHECK_PARTIAL() /
3238	RRETURN(MATCH_NOMATCH);
3239	}
3240	while (length-- > `0`) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
3241	}
3242	else
3243	#endif
3244	/ Not UTF mode /
3245	{
3246	if (md->end_subject - eptr < `1`)
3247	{
3248	SCHECK_PARTIAL(); / This one can use SCHECK_PARTIAL() /
3249	RRETURN(MATCH_NOMATCH);
3250	}
3251	if (ecode[`1`] != *eptr++) RRETURN(MATCH_NOMATCH);
3252	ecode += `2`;
3253	}
3254	break;
3255
3256	/ Match a single character, caselessly. If we are at the end of the*
3257	subject, give up immediately. /*
3258
3259	case OP_CHARI:
3260	if (eptr >= md->end_subject)
3261	{
3262	SCHECK_PARTIAL();
3263	RRETURN(MATCH_NOMATCH);
3264	}
3265
3266	#ifdef SUPPORT_UTF
3267	if (utf)
3268	{
3269	length = `1`;
3270	ecode++;
3271	GETCHARLEN(fc, ecode, length);
3272
3273	/ If the pattern character's value is < 128, we have only one byte, and*
3274	we know that its other case must also be one byte long, so we can use the
3275	fast lookup table. We know that there is at least one byte left in the
3276	subject. /*
3277
3278	if (fc < `128`)
3279	{
3280	pcre_uint32 cc = UCHAR21(eptr);
3281	if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3282	ecode++;
3283	eptr++;
3284	}
3285
3286	/ Otherwise we must pick up the subject character. Note that we cannot*
3287	use the value of "length" to check for sufficient bytes left, because the
3288	other case of the character may have more or fewer bytes. /*
3289
3290	else
3291	{
3292	pcre_uint32 dc;
3293	GETCHARINC(dc, eptr);
3294	ecode += length;
3295
3296	/ If we have Unicode property support, we can use it to test the other*
3297	case of the character, if there is one. /*
3298
3299	if (fc != dc)
3300	{
3301	#ifdef SUPPORT_UCP
3302	if (dc != UCD_OTHERCASE(fc))
3303	#endif
3304	RRETURN(MATCH_NOMATCH);
3305	}
3306	}
3307	}
3308	else
3309	#endif /* SUPPORT_UTF */
3310
3311	/ Not UTF mode /
3312	{
3313	if (TABLE_GET(ecode[`1`], md->lcc, ecode[`1`])
3314	!= TABLE_GET(eptr, md->lcc, eptr)) RRETURN(MATCH_NOMATCH);
3315	eptr++;
3316	ecode += `2`;
3317	}
3318	break;
3319
3320	/ Match a single character repeatedly. /
3321
3322	case OP_EXACT:
3323	case OP_EXACTI:
3324	min = max = GET2(ecode, `1`);
3325	ecode += `1` + IMM2_SIZE;
3326	goto REPEATCHAR;
3327
3328	case OP_POSUPTO:
3329	case OP_POSUPTOI:
3330	possessive = TRUE;
3331	/ Fall through /
3332
3333	case OP_UPTO:
3334	case OP_UPTOI:
3335	case OP_MINUPTO:
3336	case OP_MINUPTOI:
3337	min = `0`;
3338	max = GET2(ecode, `1`);
3339	minimize = ecode == OP_MINUPTO \|\| ecode == OP_MINUPTOI;
3340	ecode += `1` + IMM2_SIZE;
3341	goto REPEATCHAR;
3342
3343	case OP_POSSTAR:
3344	case OP_POSSTARI:
3345	possessive = TRUE;
3346	min = `0`;
3347	max = INT_MAX;
3348	ecode++;
3349	goto REPEATCHAR;
3350
3351	case OP_POSPLUS:
3352	case OP_POSPLUSI:
3353	possessive = TRUE;
3354	min = `1`;
3355	max = INT_MAX;
3356	ecode++;
3357	goto REPEATCHAR;
3358
3359	case OP_POSQUERY:
3360	case OP_POSQUERYI:
3361	possessive = TRUE;
3362	min = `0`;
3363	max = `1`;
3364	ecode++;
3365	goto REPEATCHAR;
3366
3367	case OP_STAR:
3368	case OP_STARI:
3369	case OP_MINSTAR:
3370	case OP_MINSTARI:
3371	case OP_PLUS:
3372	case OP_PLUSI:
3373	case OP_MINPLUS:
3374	case OP_MINPLUSI:
3375	case OP_QUERY:
3376	case OP_QUERYI:
3377	case OP_MINQUERY:
3378	case OP_MINQUERYI:
3379	c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3380	minimize = (c & `1`) != `0`;
3381	min = rep_min[c]; / Pick up values from tables; /
3382	max = rep_max[c]; / zero for max => infinity /
3383	if (max == `0`) max = INT_MAX;
3384
3385	/ Common code for all repeated single-character matches. We first check*
3386	for the minimum number of characters. If the minimum equals the maximum, we
3387	are done. Otherwise, if minimizing, check the rest of the pattern for a
3388	match; if there isn't one, advance up to the maximum, one character at a
3389	time.
3390
3391	If maximizing, advance up to the maximum number of matching characters,
3392	until eptr is past the end of the maximum run. If possessive, we are
3393	then done (no backing up). Otherwise, match at this position; anything
3394	other than no match is immediately returned. For nomatch, back up one
3395	character, unless we are matching \R and the last thing matched was
3396	\r\n, in which case, back up two bytes. When we reach the first optional
3397	character position, we can save stack by doing a tail recurse.
3398
3399	The various UTF/non-UTF and caseful/caseless cases are handled separately,
3400	for speed. /*
3401
3402	REPEATCHAR:
3403	#ifdef SUPPORT_UTF
3404	if (utf)
3405	{
3406	length = `1`;
3407	charptr = ecode;
3408	GETCHARLEN(fc, ecode, length);
3409	ecode += length;
3410
3411	/ Handle multibyte character matching specially here. There is*
3412	support for caseless matching if UCP support is present. /*
3413
3414	if (length > `1`)
3415	{
3416	#ifdef SUPPORT_UCP
3417	pcre_uint32 othercase;
3418	if (op >= OP_STARI && / Caseless /
3419	(othercase = UCD_OTHERCASE(fc)) != fc)
3420	oclength = PRIV(ord2utf)(othercase, occhars);
3421	else oclength = `0`;
3422	#endif /* SUPPORT_UCP */
3423
3424	for (i = `1`; i <= min; i++)
3425	{
3426	if (eptr <= md->end_subject - length &&
3427	memcmp(eptr, charptr, IN_UCHARS(length)) == `0`) eptr += length;
3428	#ifdef SUPPORT_UCP
3429	else if (oclength > `0` &&
3430	eptr <= md->end_subject - oclength &&
3431	memcmp(eptr, occhars, IN_UCHARS(oclength)) == `0`) eptr += oclength;
3432	#endif /* SUPPORT_UCP */
3433	else
3434	{
3435	CHECK_PARTIAL();
3436	RRETURN(MATCH_NOMATCH);
3437	}
3438	}
3439
3440	if (min == max) continue;
3441
3442	if (minimize)
3443	{
3444	for (fi = min;; fi++)
3445	{
3446	RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3447	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3448	if (fi >= max) RRETURN(MATCH_NOMATCH);
3449	if (eptr <= md->end_subject - length &&
3450	memcmp(eptr, charptr, IN_UCHARS(length)) == `0`) eptr += length;
3451	#ifdef SUPPORT_UCP
3452	else if (oclength > `0` &&
3453	eptr <= md->end_subject - oclength &&
3454	memcmp(eptr, occhars, IN_UCHARS(oclength)) == `0`) eptr += oclength;
3455	#endif /* SUPPORT_UCP */
3456	else
3457	{
3458	CHECK_PARTIAL();
3459	RRETURN(MATCH_NOMATCH);
3460	}
3461	}
3462	/ Control never gets here /
3463	}
3464
3465	else / Maximize /
3466	{
3467	pp = eptr;
3468	for (i = min; i < max; i++)
3469	{
3470	if (eptr <= md->end_subject - length &&
3471	memcmp(eptr, charptr, IN_UCHARS(length)) == `0`) eptr += length;
3472	#ifdef SUPPORT_UCP
3473	else if (oclength > `0` &&
3474	eptr <= md->end_subject - oclength &&
3475	memcmp(eptr, occhars, IN_UCHARS(oclength)) == `0`) eptr += oclength;
3476	#endif /* SUPPORT_UCP */
3477	else
3478	{
3479	CHECK_PARTIAL();
3480	break;
3481	}
3482	}
3483
3484	if (possessive) continue; / No backtracking /
3485	for(;;)
3486	{
3487	if (eptr <= pp) goto TAIL_RECURSE;
3488	RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3489	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3490	#ifdef SUPPORT_UCP
3491	eptr--;
3492	BACKCHAR(eptr);
3493	#else /* without SUPPORT_UCP */
3494	eptr -= length;
3495	#endif /* SUPPORT_UCP */
3496	}
3497	}
3498	/ Control never gets here /
3499	}
3500
3501	/ If the length of a UTF-8 character is 1, we fall through here, and*
3502	obey the code as for non-UTF-8 characters below, though in this case the
3503	value of fc will always be < 128. /*
3504	}
3505	else
3506	#endif /* SUPPORT_UTF */
3507	/ When not in UTF-8 mode, load a single-byte character. /
3508	fc = *ecode++;
3509
3510	/ The value of fc at this point is always one character, though we may*
3511	or may not be in UTF mode. The code is duplicated for the caseless and
3512	caseful cases, for speed, since matching characters is likely to be quite
3513	common. First, ensure the minimum number of matches are present. If min =
3514	max, continue at the same level without recursing. Otherwise, if
3515	minimizing, keep trying the rest of the expression and advancing one
3516	matching character if failing, up to the maximum. Alternatively, if
3517	maximizing, find the maximum number of characters and work backwards. /*
3518
3519	DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3520	max, (char *)eptr));
3521
3522	if (op >= OP_STARI) / Caseless /
3523	{
3524	#ifdef COMPILE_PCRE8
3525	/ fc must be < 128 if UTF is enabled. /
3526	foc = md->fcc[fc];
3527	#else
3528	#ifdef SUPPORT_UTF
3529	#ifdef SUPPORT_UCP
3530	if (utf && fc > `127`)
3531	foc = UCD_OTHERCASE(fc);
3532	#else
3533	if (utf && fc > `127`)
3534	foc = fc;
3535	#endif /* SUPPORT_UCP */
3536	else
3537	#endif /* SUPPORT_UTF */
3538	foc = TABLE_GET(fc, md->fcc, fc);
3539	#endif /* COMPILE_PCRE8 */
3540
3541	for (i = `1`; i <= min; i++)
3542	{
3543	pcre_uint32 cc; / Faster than pcre_uchar /
3544	if (eptr >= md->end_subject)
3545	{
3546	SCHECK_PARTIAL();
3547	RRETURN(MATCH_NOMATCH);
3548	}
3549	cc = UCHAR21TEST(eptr);
3550	if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3551	eptr++;
3552	}
3553	if (min == max) continue;
3554	if (minimize)
3555	{
3556	for (fi = min;; fi++)
3557	{
3558	pcre_uint32 cc; / Faster than pcre_uchar /
3559	RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3560	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3561	if (fi >= max) RRETURN(MATCH_NOMATCH);
3562	if (eptr >= md->end_subject)
3563	{
3564	SCHECK_PARTIAL();
3565	RRETURN(MATCH_NOMATCH);
3566	}
3567	cc = UCHAR21TEST(eptr);
3568	if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
3569	eptr++;
3570	}
3571	/ Control never gets here /
3572	}
3573	else / Maximize /
3574	{
3575	pp = eptr;
3576	for (i = min; i < max; i++)
3577	{
3578	pcre_uint32 cc; / Faster than pcre_uchar /
3579	if (eptr >= md->end_subject)
3580	{
3581	SCHECK_PARTIAL();
3582	break;
3583	}
3584	cc = UCHAR21TEST(eptr);
3585	if (fc != cc && foc != cc) break;
3586	eptr++;
3587	}
3588	if (possessive) continue; / No backtracking /
3589	for (;;)
3590	{
3591	if (eptr == pp) goto TAIL_RECURSE;
3592	RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3593	eptr--;
3594	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3595	}
3596	/ Control never gets here /
3597	}
3598	}
3599
3600	/ Caseful comparisons (includes all multi-byte characters) /
3601
3602	else
3603	{
3604	for (i = `1`; i <= min; i++)
3605	{
3606	if (eptr >= md->end_subject)
3607	{
3608	SCHECK_PARTIAL();
3609	RRETURN(MATCH_NOMATCH);
3610	}
3611	if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3612	}
3613
3614	if (min == max) continue;
3615
3616	if (minimize)
3617	{
3618	for (fi = min;; fi++)
3619	{
3620	RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3621	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3622	if (fi >= max) RRETURN(MATCH_NOMATCH);
3623	if (eptr >= md->end_subject)
3624	{
3625	SCHECK_PARTIAL();
3626	RRETURN(MATCH_NOMATCH);
3627	}
3628	if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3629	}
3630	/ Control never gets here /
3631	}
3632	else / Maximize /
3633	{
3634	pp = eptr;
3635	for (i = min; i < max; i++)
3636	{
3637	if (eptr >= md->end_subject)
3638	{
3639	SCHECK_PARTIAL();
3640	break;
3641	}
3642	if (fc != UCHAR21TEST(eptr)) break;
3643	eptr++;
3644	}
3645	if (possessive) continue; / No backtracking /
3646	for (;;)
3647	{
3648	if (eptr == pp) goto TAIL_RECURSE;
3649	RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3650	eptr--;
3651	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3652	}
3653	/ Control never gets here /
3654	}
3655	}
3656	/ Control never gets here /
3657
3658	/ Match a negated single one-byte character. The character we are*
3659	checking can be multibyte. /*
3660
3661	case OP_NOT:
3662	case OP_NOTI:
3663	if (eptr >= md->end_subject)
3664	{
3665	SCHECK_PARTIAL();
3666	RRETURN(MATCH_NOMATCH);
3667	}
3668	#ifdef SUPPORT_UTF
3669	if (utf)
3670	{
3671	register pcre_uint32 ch, och;
3672
3673	ecode++;
3674	GETCHARINC(ch, ecode);
3675	GETCHARINC(c, eptr);
3676
3677	if (op == OP_NOT)
3678	{
3679	if (ch == c) RRETURN(MATCH_NOMATCH);
3680	}
3681	else
3682	{
3683	#ifdef SUPPORT_UCP
3684	if (ch > `127`)
3685	och = UCD_OTHERCASE(ch);
3686	#else
3687	if (ch > `127`)
3688	och = ch;
3689	#endif /* SUPPORT_UCP */
3690	else
3691	och = TABLE_GET(ch, md->fcc, ch);
3692	if (ch == c \|\| och == c) RRETURN(MATCH_NOMATCH);
3693	}
3694	}
3695	else
3696	#endif
3697	{
3698	register pcre_uint32 ch = ecode[`1`];
3699	c = *eptr++;
3700	if (ch == c \|\| (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
3701	RRETURN(MATCH_NOMATCH);
3702	ecode += `2`;
3703	}
3704	break;
3705
3706	/ Match a negated single one-byte character repeatedly. This is almost a*
3707	repeat of the code for a repeated single character, but I haven't found a
3708	nice way of commoning these up that doesn't require a test of the
3709	positive/negative option for each character match. Maybe that wouldn't add
3710	very much to the time taken, but character matching is* what this is all*
3711	about... /*
3712
3713	case OP_NOTEXACT:
3714	case OP_NOTEXACTI:
3715	min = max = GET2(ecode, `1`);
3716	ecode += `1` + IMM2_SIZE;
3717	goto REPEATNOTCHAR;
3718
3719	case OP_NOTUPTO:
3720	case OP_NOTUPTOI:
3721	case OP_NOTMINUPTO:
3722	case OP_NOTMINUPTOI:
3723	min = `0`;
3724	max = GET2(ecode, `1`);
3725	minimize = ecode == OP_NOTMINUPTO \|\| ecode == OP_NOTMINUPTOI;
3726	ecode += `1` + IMM2_SIZE;
3727	goto REPEATNOTCHAR;
3728
3729	case OP_NOTPOSSTAR:
3730	case OP_NOTPOSSTARI:
3731	possessive = TRUE;
3732	min = `0`;
3733	max = INT_MAX;
3734	ecode++;
3735	goto REPEATNOTCHAR;
3736
3737	case OP_NOTPOSPLUS:
3738	case OP_NOTPOSPLUSI:
3739	possessive = TRUE;
3740	min = `1`;
3741	max = INT_MAX;
3742	ecode++;
3743	goto REPEATNOTCHAR;
3744
3745	case OP_NOTPOSQUERY:
3746	case OP_NOTPOSQUERYI:
3747	possessive = TRUE;
3748	min = `0`;
3749	max = `1`;
3750	ecode++;
3751	goto REPEATNOTCHAR;
3752
3753	case OP_NOTPOSUPTO:
3754	case OP_NOTPOSUPTOI:
3755	possessive = TRUE;
3756	min = `0`;
3757	max = GET2(ecode, `1`);
3758	ecode += `1` + IMM2_SIZE;
3759	goto REPEATNOTCHAR;
3760
3761	case OP_NOTSTAR:
3762	case OP_NOTSTARI:
3763	case OP_NOTMINSTAR:
3764	case OP_NOTMINSTARI:
3765	case OP_NOTPLUS:
3766	case OP_NOTPLUSI:
3767	case OP_NOTMINPLUS:
3768	case OP_NOTMINPLUSI:
3769	case OP_NOTQUERY:
3770	case OP_NOTQUERYI:
3771	case OP_NOTMINQUERY:
3772	case OP_NOTMINQUERYI:
3773	c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3774	minimize = (c & `1`) != `0`;
3775	min = rep_min[c]; / Pick up values from tables; /
3776	max = rep_max[c]; / zero for max => infinity /
3777	if (max == `0`) max = INT_MAX;
3778
3779	/ Common code for all repeated single-byte matches. /
3780
3781	REPEATNOTCHAR:
3782	GETCHARINCTEST(fc, ecode);
3783
3784	/ The code is duplicated for the caseless and caseful cases, for speed,*
3785	since matching characters is likely to be quite common. First, ensure the
3786	minimum number of matches are present. If min = max, continue at the same
3787	level without recursing. Otherwise, if minimizing, keep trying the rest of
3788	the expression and advancing one matching character if failing, up to the
3789	maximum. Alternatively, if maximizing, find the maximum number of
3790	characters and work backwards. /*
3791
3792	DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3793	max, (char *)eptr));
3794
3795	if (op >= OP_NOTSTARI) / Caseless /
3796	{
3797	#ifdef SUPPORT_UTF
3798	#ifdef SUPPORT_UCP
3799	if (utf && fc > `127`)
3800	foc = UCD_OTHERCASE(fc);
3801	#else
3802	if (utf && fc > `127`)
3803	foc = fc;
3804	#endif /* SUPPORT_UCP */
3805	else
3806	#endif /* SUPPORT_UTF */
3807	foc = TABLE_GET(fc, md->fcc, fc);
3808
3809	#ifdef SUPPORT_UTF
3810	if (utf)
3811	{
3812	register pcre_uint32 d;
3813	for (i = `1`; i <= min; i++)
3814	{
3815	if (eptr >= md->end_subject)
3816	{
3817	SCHECK_PARTIAL();
3818	RRETURN(MATCH_NOMATCH);
3819	}
3820	GETCHARINC(d, eptr);
3821	if (fc == d \|\| (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3822	}
3823	}
3824	else
3825	#endif /* SUPPORT_UTF */
3826	/ Not UTF mode /
3827	{
3828	for (i = `1`; i <= min; i++)
3829	{
3830	if (eptr >= md->end_subject)
3831	{
3832	SCHECK_PARTIAL();
3833	RRETURN(MATCH_NOMATCH);
3834	}
3835	if (fc == eptr \|\| foc == eptr) RRETURN(MATCH_NOMATCH);
3836	eptr++;
3837	}
3838	}
3839
3840	if (min == max) continue;
3841
3842	if (minimize)
3843	{
3844	#ifdef SUPPORT_UTF
3845	if (utf)
3846	{
3847	register pcre_uint32 d;
3848	for (fi = min;; fi++)
3849	{
3850	RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3851	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3852	if (fi >= max) RRETURN(MATCH_NOMATCH);
3853	if (eptr >= md->end_subject)
3854	{
3855	SCHECK_PARTIAL();
3856	RRETURN(MATCH_NOMATCH);
3857	}
3858	GETCHARINC(d, eptr);
3859	if (fc == d \|\| (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3860	}
3861	}
3862	else
3863	#endif /SUPPORT_UTF /
3864	/ Not UTF mode /
3865	{
3866	for (fi = min;; fi++)
3867	{
3868	RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3869	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3870	if (fi >= max) RRETURN(MATCH_NOMATCH);
3871	if (eptr >= md->end_subject)
3872	{
3873	SCHECK_PARTIAL();
3874	RRETURN(MATCH_NOMATCH);
3875	}
3876	if (fc == eptr \|\| foc == eptr) RRETURN(MATCH_NOMATCH);
3877	eptr++;
3878	}
3879	}
3880	/ Control never gets here /
3881	}
3882
3883	/ Maximize case /
3884
3885	else
3886	{
3887	pp = eptr;
3888
3889	#ifdef SUPPORT_UTF
3890	if (utf)
3891	{
3892	register pcre_uint32 d;
3893	for (i = min; i < max; i++)
3894	{
3895	int len = `1`;
3896	if (eptr >= md->end_subject)
3897	{
3898	SCHECK_PARTIAL();
3899	break;
3900	}
3901	GETCHARLEN(d, eptr, len);
3902	if (fc == d \|\| (unsigned int)foc == d) break;
3903	eptr += len;
3904	}
3905	if (possessive) continue; / No backtracking /
3906	for(;;)
3907	{
3908	if (eptr <= pp) goto TAIL_RECURSE;
3909	RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3910	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3911	eptr--;
3912	BACKCHAR(eptr);
3913	}
3914	}
3915	else
3916	#endif /* SUPPORT_UTF */
3917	/ Not UTF mode /
3918	{
3919	for (i = min; i < max; i++)
3920	{
3921	if (eptr >= md->end_subject)
3922	{
3923	SCHECK_PARTIAL();
3924	break;
3925	}
3926	if (fc == eptr \|\| foc == eptr) break;
3927	eptr++;
3928	}
3929	if (possessive) continue; / No backtracking /
3930	for (;;)
3931	{
3932	if (eptr == pp) goto TAIL_RECURSE;
3933	RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3934	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3935	eptr--;
3936	}
3937	}
3938	/ Control never gets here /
3939	}
3940	}
3941
3942	/ Caseful comparisons /
3943
3944	else
3945	{
3946	#ifdef SUPPORT_UTF
3947	if (utf)
3948	{
3949	register pcre_uint32 d;
3950	for (i = `1`; i <= min; i++)
3951	{
3952	if (eptr >= md->end_subject)
3953	{
3954	SCHECK_PARTIAL();
3955	RRETURN(MATCH_NOMATCH);
3956	}
3957	GETCHARINC(d, eptr);
3958	if (fc == d) RRETURN(MATCH_NOMATCH);
3959	}
3960	}
3961	else
3962	#endif
3963	/ Not UTF mode /
3964	{
3965	for (i = `1`; i <= min; i++)
3966	{
3967	if (eptr >= md->end_subject)
3968	{
3969	SCHECK_PARTIAL();
3970	RRETURN(MATCH_NOMATCH);
3971	}
3972	if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3973	}
3974	}
3975
3976	if (min == max) continue;
3977
3978	if (minimize)
3979	{
3980	#ifdef SUPPORT_UTF
3981	if (utf)
3982	{
3983	register pcre_uint32 d;
3984	for (fi = min;; fi++)
3985	{
3986	RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3987	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3988	if (fi >= max) RRETURN(MATCH_NOMATCH);
3989	if (eptr >= md->end_subject)
3990	{
3991	SCHECK_PARTIAL();
3992	RRETURN(MATCH_NOMATCH);
3993	}
3994	GETCHARINC(d, eptr);
3995	if (fc == d) RRETURN(MATCH_NOMATCH);
3996	}
3997	}
3998	else
3999	#endif
4000	/ Not UTF mode /
4001	{
4002	for (fi = min;; fi++)
4003	{
4004	RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
4005	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4006	if (fi >= max) RRETURN(MATCH_NOMATCH);
4007	if (eptr >= md->end_subject)
4008	{
4009	SCHECK_PARTIAL();
4010	RRETURN(MATCH_NOMATCH);
4011	}
4012	if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
4013	}
4014	}
4015	/ Control never gets here /
4016	}
4017
4018	/ Maximize case /
4019
4020	else
4021	{
4022	pp = eptr;
4023
4024	#ifdef SUPPORT_UTF
4025	if (utf)
4026	{
4027	register pcre_uint32 d;
4028	for (i = min; i < max; i++)
4029	{
4030	int len = `1`;
4031	if (eptr >= md->end_subject)
4032	{
4033	SCHECK_PARTIAL();
4034	break;
4035	}
4036	GETCHARLEN(d, eptr, len);
4037	if (fc == d) break;
4038	eptr += len;
4039	}
4040	if (possessive) continue; / No backtracking /
4041	for(;;)
4042	{
4043	if (eptr <= pp) goto TAIL_RECURSE;
4044	RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
4045	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4046	eptr--;
4047	BACKCHAR(eptr);
4048	}
4049	}
4050	else
4051	#endif
4052	/ Not UTF mode /
4053	{
4054	for (i = min; i < max; i++)
4055	{
4056	if (eptr >= md->end_subject)
4057	{
4058	SCHECK_PARTIAL();
4059	break;
4060	}
4061	if (fc == eptr) break*;
4062	eptr++;
4063	}
4064	if (possessive) continue; / No backtracking /
4065	for (;;)
4066	{
4067	if (eptr == pp) goto TAIL_RECURSE;
4068	RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
4069	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4070	eptr--;
4071	}
4072	}
4073	/ Control never gets here /
4074	}
4075	}
4076	/ Control never gets here /
4077
4078	/ Match a single character type repeatedly; several different opcodes*
4079	share code. This is very similar to the code for single characters, but we
4080	repeat it in the interests of efficiency. /*
4081
4082	case OP_TYPEEXACT:
4083	min = max = GET2(ecode, `1`);
4084	minimize = TRUE;
4085	ecode += `1` + IMM2_SIZE;
4086	goto REPEATTYPE;
4087
4088	case OP_TYPEUPTO:
4089	case OP_TYPEMINUPTO:
4090	min = `0`;
4091	max = GET2(ecode, `1`);
4092	minimize = *ecode == OP_TYPEMINUPTO;
4093	ecode += `1` + IMM2_SIZE;
4094	goto REPEATTYPE;
4095
4096	case OP_TYPEPOSSTAR:
4097	possessive = TRUE;
4098	min = `0`;
4099	max = INT_MAX;
4100	ecode++;
4101	goto REPEATTYPE;
4102
4103	case OP_TYPEPOSPLUS:
4104	possessive = TRUE;
4105	min = `1`;
4106	max = INT_MAX;
4107	ecode++;
4108	goto REPEATTYPE;
4109
4110	case OP_TYPEPOSQUERY:
4111	possessive = TRUE;
4112	min = `0`;
4113	max = `1`;
4114	ecode++;
4115	goto REPEATTYPE;
4116
4117	case OP_TYPEPOSUPTO:
4118	possessive = TRUE;
4119	min = `0`;
4120	max = GET2(ecode, `1`);
4121	ecode += `1` + IMM2_SIZE;
4122	goto REPEATTYPE;
4123
4124	case OP_TYPESTAR:
4125	case OP_TYPEMINSTAR:
4126	case OP_TYPEPLUS:
4127	case OP_TYPEMINPLUS:
4128	case OP_TYPEQUERY:
4129	case OP_TYPEMINQUERY:
4130	c = *ecode++ - OP_TYPESTAR;
4131	minimize = (c & `1`) != `0`;
4132	min = rep_min[c]; / Pick up values from tables; /
4133	max = rep_max[c]; / zero for max => infinity /
4134	if (max == `0`) max = INT_MAX;
4135
4136	/ Common code for all repeated single character type matches. Note that*
4137	in UTF-8 mode, '.' matches a character of any length, but for the other
4138	character types, the valid characters are all one-byte long. /*
4139
4140	REPEATTYPE:
4141	ctype = ecode++; /* Code for the character type /
4142
4143	#ifdef SUPPORT_UCP
4144	if (ctype == OP_PROP \|\| ctype == OP_NOTPROP)
4145	{
4146	prop_fail_result = ctype == OP_NOTPROP;
4147	prop_type = *ecode++;
4148	prop_value = *ecode++;
4149	}
4150	else prop_type = -`1`;
4151	#endif
4152
4153	/ First, ensure the minimum number of matches are present. Use inline*
4154	code for maximizing the speed, and do the type test once at the start
4155	(i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4156	is tidier. Also separate the UCP code, which can be the same for both UTF-8
4157	and single-bytes. /*
4158
4159	if (min > `0`)
4160	{
4161	#ifdef SUPPORT_UCP
4162	if (prop_type >= `0`)
4163	{
4164	switch(prop_type)
4165	{
4166	case PT_ANY:
4167	if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4168	for (i = `1`; i <= min; i++)
4169	{
4170	if (eptr >= md->end_subject)
4171	{
4172	SCHECK_PARTIAL();
4173	RRETURN(MATCH_NOMATCH);
4174	}
4175	GETCHARINCTEST(c, eptr);
4176	}
4177	break;
4178
4179	case PT_LAMP:
4180	for (i = `1`; i <= min; i++)
4181	{
4182	int chartype;
4183	if (eptr >= md->end_subject)
4184	{
4185	SCHECK_PARTIAL();
4186	RRETURN(MATCH_NOMATCH);
4187	}
4188	GETCHARINCTEST(c, eptr);
4189	chartype = UCD_CHARTYPE(c);
4190	if ((chartype == ucp_Lu \|\|
4191	chartype == ucp_Ll \|\|
4192	chartype == ucp_Lt) == prop_fail_result)
4193	RRETURN(MATCH_NOMATCH);
4194	}
4195	break;
4196
4197	case PT_GC:
4198	for (i = `1`; i <= min; i++)
4199	{
4200	if (eptr >= md->end_subject)
4201	{
4202	SCHECK_PARTIAL();
4203	RRETURN(MATCH_NOMATCH);
4204	}
4205	GETCHARINCTEST(c, eptr);
4206	if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4207	RRETURN(MATCH_NOMATCH);
4208	}
4209	break;
4210
4211	case PT_PC:
4212	for (i = `1`; i <= min; i++)
4213	{
4214	if (eptr >= md->end_subject)
4215	{
4216	SCHECK_PARTIAL();
4217	RRETURN(MATCH_NOMATCH);
4218	}
4219	GETCHARINCTEST(c, eptr);
4220	if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4221	RRETURN(MATCH_NOMATCH);
4222	}
4223	break;
4224
4225	case PT_SC:
4226	for (i = `1`; i <= min; i++)
4227	{
4228	if (eptr >= md->end_subject)
4229	{
4230	SCHECK_PARTIAL();
4231	RRETURN(MATCH_NOMATCH);
4232	}
4233	GETCHARINCTEST(c, eptr);
4234	if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4235	RRETURN(MATCH_NOMATCH);
4236	}
4237	break;
4238
4239	case PT_ALNUM:
4240	for (i = `1`; i <= min; i++)
4241	{
4242	int category;
4243	if (eptr >= md->end_subject)
4244	{
4245	SCHECK_PARTIAL();
4246	RRETURN(MATCH_NOMATCH);
4247	}
4248	GETCHARINCTEST(c, eptr);
4249	category = UCD_CATEGORY(c);
4250	if ((category == ucp_L \|\| category == ucp_N) == prop_fail_result)
4251	RRETURN(MATCH_NOMATCH);
4252	}
4253	break;
4254
4255	/ Perl space used to exclude VT, but from Perl 5.18 it is included,*
4256	which means that Perl space and POSIX space are now identical. PCRE
4257	was changed at release 8.34. /*
4258
4259	case PT_SPACE: / Perl space /
4260	case PT_PXSPACE: / POSIX space /
4261	for (i = `1`; i <= min; i++)
4262	{
4263	if (eptr >= md->end_subject)
4264	{
4265	SCHECK_PARTIAL();
4266	RRETURN(MATCH_NOMATCH);
4267	}
4268	GETCHARINCTEST(c, eptr);
4269	switch(c)
4270	{
4271	HSPACE_CASES:
4272	VSPACE_CASES:
4273	if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4274	break;
4275
4276	default:
4277	if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
4278	RRETURN(MATCH_NOMATCH);
4279	break;
4280	}
4281	}
4282	break;
4283
4284	case PT_WORD:
4285	for (i = `1`; i <= min; i++)
4286	{
4287	int category;
4288	if (eptr >= md->end_subject)
4289	{
4290	SCHECK_PARTIAL();
4291	RRETURN(MATCH_NOMATCH);
4292	}
4293	GETCHARINCTEST(c, eptr);
4294	category = UCD_CATEGORY(c);
4295	if ((category == ucp_L \|\| category == ucp_N \|\| c == CHAR_UNDERSCORE)
4296	== prop_fail_result)
4297	RRETURN(MATCH_NOMATCH);
4298	}
4299	break;
4300
4301	case PT_CLIST:
4302	for (i = `1`; i <= min; i++)
4303	{
4304	const pcre_uint32 *cp;
4305	if (eptr >= md->end_subject)
4306	{
4307	SCHECK_PARTIAL();
4308	RRETURN(MATCH_NOMATCH);
4309	}
4310	GETCHARINCTEST(c, eptr);
4311	cp = PRIV(ucd_caseless_sets) + prop_value;
4312	for (;;)
4313	{
4314	if (c < *cp)
4315	{ if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4316	if (c == *cp++)
4317	{ if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4318	}
4319	}
4320	break;
4321
4322	case PT_UCNC:
4323	for (i = `1`; i <= min; i++)
4324	{
4325	if (eptr >= md->end_subject)
4326	{
4327	SCHECK_PARTIAL();
4328	RRETURN(MATCH_NOMATCH);
4329	}
4330	GETCHARINCTEST(c, eptr);
4331	if ((c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
4332	c == CHAR_GRAVE_ACCENT \|\| (c >= `0xa0` && c <= `0xd7ff`) \|\|
4333	c >= `0xe000`) == prop_fail_result)
4334	RRETURN(MATCH_NOMATCH);
4335	}
4336	break;
4337
4338	/ This should not occur /
4339
4340	default:
4341	RRETURN(PCRE_ERROR_INTERNAL);
4342	}
4343	}
4344
4345	/ Match extended Unicode sequences. We will get here only if the*
4346	support is in the binary; otherwise a compile-time error occurs. /*
4347
4348	else if (ctype == OP_EXTUNI)
4349	{
4350	for (i = `1`; i <= min; i++)
4351	{
4352	if (eptr >= md->end_subject)
4353	{
4354	SCHECK_PARTIAL();
4355	RRETURN(MATCH_NOMATCH);
4356	}
4357	else
4358	{
4359	int lgb, rgb;
4360	GETCHARINCTEST(c, eptr);
4361	lgb = UCD_GRAPHBREAK(c);
4362	while (eptr < md->end_subject)
4363	{
4364	int len = `1`;
4365	if (!utf) c = eptr; else* { GETCHARLEN(c, eptr, len); }
4366	rgb = UCD_GRAPHBREAK(c);
4367	if ((PRIV(ucp_gbtable)[lgb] & (`1` << rgb)) == `0`) break;
4368	lgb = rgb;
4369	eptr += len;
4370	}
4371	}
4372	CHECK_PARTIAL();
4373	}
4374	}
4375
4376	else
4377	#endif /* SUPPORT_UCP */
4378
4379	/ Handle all other cases when the coding is UTF-8 /
4380
4381	#ifdef SUPPORT_UTF
4382	if (utf) switch(ctype)
4383	{
4384	case OP_ANY:
4385	for (i = `1`; i <= min; i++)
4386	{
4387	if (eptr >= md->end_subject)
4388	{
4389	SCHECK_PARTIAL();
4390	RRETURN(MATCH_NOMATCH);
4391	}
4392	if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4393	if (md->partial != `0` &&
4394	eptr + `1` >= md->end_subject &&
4395	NLBLOCK->nltype == NLTYPE_FIXED &&
4396	NLBLOCK->nllen == `2` &&
4397	UCHAR21(eptr) == NLBLOCK->nl[`0`])
4398	{
4399	md->hitend = TRUE;
4400	if (md->partial > `1`) RRETURN(PCRE_ERROR_PARTIAL);
4401	}
4402	eptr++;
4403	ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4404	}
4405	break;
4406
4407	case OP_ALLANY:
4408	for (i = `1`; i <= min; i++)
4409	{
4410	if (eptr >= md->end_subject)
4411	{
4412	SCHECK_PARTIAL();
4413	RRETURN(MATCH_NOMATCH);
4414	}
4415	eptr++;
4416	ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4417	}
4418	break;
4419
4420	case OP_ANYBYTE:
4421	if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4422	eptr += min;
4423	break;
4424
4425	case OP_ANYNL:
4426	for (i = `1`; i <= min; i++)
4427	{
4428	if (eptr >= md->end_subject)
4429	{
4430	SCHECK_PARTIAL();
4431	RRETURN(MATCH_NOMATCH);
4432	}
4433	GETCHARINC(c, eptr);
4434	switch(c)
4435	{
4436	default: RRETURN(MATCH_NOMATCH);
4437
4438	case CHAR_CR:
4439	if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
4440	break;
4441
4442	case CHAR_LF:
4443	break;
4444
4445	case CHAR_VT:
4446	case CHAR_FF:
4447	case CHAR_NEL:
4448	#ifndef EBCDIC
4449	case `0x2028`:
4450	case `0x2029`:
4451	#endif /* Not EBCDIC */
4452	if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4453	break;
4454	}
4455	}
4456	break;
4457
4458	case OP_NOT_HSPACE:
4459	for (i = `1`; i <= min; i++)
4460	{
4461	if (eptr >= md->end_subject)
4462	{
4463	SCHECK_PARTIAL();
4464	RRETURN(MATCH_NOMATCH);
4465	}
4466	GETCHARINC(c, eptr);
4467	switch(c)
4468	{
4469	HSPACE_CASES: RRETURN(MATCH_NOMATCH); / Byte and multibyte cases /
4470	default: break;
4471	}
4472	}
4473	break;
4474
4475	case OP_HSPACE:
4476	for (i = `1`; i <= min; i++)
4477	{
4478	if (eptr >= md->end_subject)
4479	{
4480	SCHECK_PARTIAL();
4481	RRETURN(MATCH_NOMATCH);
4482	}
4483	GETCHARINC(c, eptr);
4484	switch(c)
4485	{
4486	HSPACE_CASES: break; / Byte and multibyte cases /
4487	default: RRETURN(MATCH_NOMATCH);
4488	}
4489	}
4490	break;
4491
4492	case OP_NOT_VSPACE:
4493	for (i = `1`; i <= min; i++)
4494	{
4495	if (eptr >= md->end_subject)
4496	{
4497	SCHECK_PARTIAL();
4498	RRETURN(MATCH_NOMATCH);
4499	}
4500	GETCHARINC(c, eptr);
4501	switch(c)
4502	{
4503	VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4504	default: break;
4505	}
4506	}
4507	break;
4508
4509	case OP_VSPACE:
4510	for (i = `1`; i <= min; i++)
4511	{
4512	if (eptr >= md->end_subject)
4513	{
4514	SCHECK_PARTIAL();
4515	RRETURN(MATCH_NOMATCH);
4516	}
4517	GETCHARINC(c, eptr);
4518	switch(c)
4519	{
4520	VSPACE_CASES: break;
4521	default: RRETURN(MATCH_NOMATCH);
4522	}
4523	}
4524	break;
4525
4526	case OP_NOT_DIGIT:
4527	for (i = `1`; i <= min; i++)
4528	{
4529	if (eptr >= md->end_subject)
4530	{
4531	SCHECK_PARTIAL();
4532	RRETURN(MATCH_NOMATCH);
4533	}
4534	GETCHARINC(c, eptr);
4535	if (c < `128` && (md->ctypes[c] & ctype_digit) != `0`)
4536	RRETURN(MATCH_NOMATCH);
4537	}
4538	break;
4539
4540	case OP_DIGIT:
4541	for (i = `1`; i <= min; i++)
4542	{
4543	pcre_uint32 cc;
4544	if (eptr >= md->end_subject)
4545	{
4546	SCHECK_PARTIAL();
4547	RRETURN(MATCH_NOMATCH);
4548	}
4549	cc = UCHAR21(eptr);
4550	if (cc >= `128` \|\| (md->ctypes[cc] & ctype_digit) == `0`)
4551	RRETURN(MATCH_NOMATCH);
4552	eptr++;
4553	/ No need to skip more bytes - we know it's a 1-byte character /
4554	}
4555	break;
4556
4557	case OP_NOT_WHITESPACE:
4558	for (i = `1`; i <= min; i++)
4559	{
4560	pcre_uint32 cc;
4561	if (eptr >= md->end_subject)
4562	{
4563	SCHECK_PARTIAL();
4564	RRETURN(MATCH_NOMATCH);
4565	}
4566	cc = UCHAR21(eptr);
4567	if (cc < `128` && (md->ctypes[cc] & ctype_space) != `0`)
4568	RRETURN(MATCH_NOMATCH);
4569	eptr++;
4570	ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4571	}
4572	break;
4573
4574	case OP_WHITESPACE:
4575	for (i = `1`; i <= min; i++)
4576	{
4577	pcre_uint32 cc;
4578	if (eptr >= md->end_subject)
4579	{
4580	SCHECK_PARTIAL();
4581	RRETURN(MATCH_NOMATCH);
4582	}
4583	cc = UCHAR21(eptr);
4584	if (cc >= `128` \|\| (md->ctypes[cc] & ctype_space) == `0`)
4585	RRETURN(MATCH_NOMATCH);
4586	eptr++;
4587	/ No need to skip more bytes - we know it's a 1-byte character /
4588	}
4589	break;
4590
4591	case OP_NOT_WORDCHAR:
4592	for (i = `1`; i <= min; i++)
4593	{
4594	pcre_uint32 cc;
4595	if (eptr >= md->end_subject)
4596	{
4597	SCHECK_PARTIAL();
4598	RRETURN(MATCH_NOMATCH);
4599	}
4600	cc = UCHAR21(eptr);
4601	if (cc < `128` && (md->ctypes[cc] & ctype_word) != `0`)
4602	RRETURN(MATCH_NOMATCH);
4603	eptr++;
4604	ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4605	}
4606	break;
4607
4608	case OP_WORDCHAR:
4609	for (i = `1`; i <= min; i++)
4610	{
4611	pcre_uint32 cc;
4612	if (eptr >= md->end_subject)
4613	{
4614	SCHECK_PARTIAL();
4615	RRETURN(MATCH_NOMATCH);
4616	}
4617	cc = UCHAR21(eptr);
4618	if (cc >= `128` \|\| (md->ctypes[cc] & ctype_word) == `0`)
4619	RRETURN(MATCH_NOMATCH);
4620	eptr++;
4621	/ No need to skip more bytes - we know it's a 1-byte character /
4622	}
4623	break;
4624
4625	default:
4626	RRETURN(PCRE_ERROR_INTERNAL);
4627	} / End switch(ctype) /
4628
4629	else
4630	#endif /* SUPPORT_UTF */
4631
4632	/ Code for the non-UTF-8 case for minimum matching of operators other*
4633	than OP_PROP and OP_NOTPROP. /*
4634
4635	switch(ctype)
4636	{
4637	case OP_ANY:
4638	for (i = `1`; i <= min; i++)
4639	{
4640	if (eptr >= md->end_subject)
4641	{
4642	SCHECK_PARTIAL();
4643	RRETURN(MATCH_NOMATCH);
4644	}
4645	if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4646	if (md->partial != `0` &&
4647	eptr + `1` >= md->end_subject &&
4648	NLBLOCK->nltype == NLTYPE_FIXED &&
4649	NLBLOCK->nllen == `2` &&
4650	*eptr == NLBLOCK->nl[`0`])
4651	{
4652	md->hitend = TRUE;
4653	if (md->partial > `1`) RRETURN(PCRE_ERROR_PARTIAL);
4654	}
4655	eptr++;
4656	}
4657	break;
4658
4659	case OP_ALLANY:
4660	if (eptr > md->end_subject - min)
4661	{
4662	SCHECK_PARTIAL();
4663	RRETURN(MATCH_NOMATCH);
4664	}
4665	eptr += min;
4666	break;
4667
4668	case OP_ANYBYTE:
4669	if (eptr > md->end_subject - min)
4670	{
4671	SCHECK_PARTIAL();
4672	RRETURN(MATCH_NOMATCH);
4673	}
4674	eptr += min;
4675	break;
4676
4677	case OP_ANYNL:
4678	for (i = `1`; i <= min; i++)
4679	{
4680	if (eptr >= md->end_subject)
4681	{
4682	SCHECK_PARTIAL();
4683	RRETURN(MATCH_NOMATCH);
4684	}
4685	switch(*eptr++)
4686	{
4687	default: RRETURN(MATCH_NOMATCH);
4688
4689	case CHAR_CR:
4690	if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
4691	break;
4692
4693	case CHAR_LF:
4694	break;
4695
4696	case CHAR_VT:
4697	case CHAR_FF:
4698	case CHAR_NEL:
4699	#if defined COMPILE_PCRE16 \|\| defined COMPILE_PCRE32
4700	case `0x2028`:
4701	case `0x2029`:
4702	#endif
4703	if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4704	break;
4705	}
4706	}
4707	break;
4708
4709	case OP_NOT_HSPACE:
4710	for (i = `1`; i <= min; i++)
4711	{
4712	if (eptr >= md->end_subject)
4713	{
4714	SCHECK_PARTIAL();
4715	RRETURN(MATCH_NOMATCH);
4716	}
4717	switch(*eptr++)
4718	{
4719	default: break;
4720	HSPACE_BYTE_CASES:
4721	#if defined COMPILE_PCRE16 \|\| defined COMPILE_PCRE32
4722	HSPACE_MULTIBYTE_CASES:
4723	#endif
4724	RRETURN(MATCH_NOMATCH);
4725	}
4726	}
4727	break;
4728
4729	case OP_HSPACE:
4730	for (i = `1`; i <= min; i++)
4731	{
4732	if (eptr >= md->end_subject)
4733	{
4734	SCHECK_PARTIAL();
4735	RRETURN(MATCH_NOMATCH);
4736	}
4737	switch(*eptr++)
4738	{
4739	default: RRETURN(MATCH_NOMATCH);
4740	HSPACE_BYTE_CASES:
4741	#if defined COMPILE_PCRE16 \|\| defined COMPILE_PCRE32
4742	HSPACE_MULTIBYTE_CASES:
4743	#endif
4744	break;
4745	}
4746	}
4747	break;
4748
4749	case OP_NOT_VSPACE:
4750	for (i = `1`; i <= min; i++)
4751	{
4752	if (eptr >= md->end_subject)
4753	{
4754	SCHECK_PARTIAL();
4755	RRETURN(MATCH_NOMATCH);
4756	}
4757	switch(*eptr++)
4758	{
4759	VSPACE_BYTE_CASES:
4760	#if defined COMPILE_PCRE16 \|\| defined COMPILE_PCRE32
4761	VSPACE_MULTIBYTE_CASES:
4762	#endif
4763	RRETURN(MATCH_NOMATCH);
4764	default: break;
4765	}
4766	}
4767	break;
4768
4769	case OP_VSPACE:
4770	for (i = `1`; i <= min; i++)
4771	{
4772	if (eptr >= md->end_subject)
4773	{
4774	SCHECK_PARTIAL();
4775	RRETURN(MATCH_NOMATCH);
4776	}
4777	switch(*eptr++)
4778	{
4779	default: RRETURN(MATCH_NOMATCH);
4780	VSPACE_BYTE_CASES:
4781	#if defined COMPILE_PCRE16 \|\| defined COMPILE_PCRE32
4782	VSPACE_MULTIBYTE_CASES:
4783	#endif
4784	break;
4785	}
4786	}
4787	break;
4788
4789	case OP_NOT_DIGIT:
4790	for (i = `1`; i <= min; i++)
4791	{
4792	if (eptr >= md->end_subject)
4793	{
4794	SCHECK_PARTIAL();
4795	RRETURN(MATCH_NOMATCH);
4796	}
4797	if (MAX_255(eptr) && (md->ctypes[eptr] & ctype_digit) != `0`)
4798	RRETURN(MATCH_NOMATCH);
4799	eptr++;
4800	}
4801	break;
4802
4803	case OP_DIGIT:
4804	for (i = `1`; i <= min; i++)
4805	{
4806	if (eptr >= md->end_subject)
4807	{
4808	SCHECK_PARTIAL();
4809	RRETURN(MATCH_NOMATCH);
4810	}
4811	if (!MAX_255(eptr) \|\| (md->ctypes[eptr] & ctype_digit) == `0`)
4812	RRETURN(MATCH_NOMATCH);
4813	eptr++;
4814	}
4815	break;
4816
4817	case OP_NOT_WHITESPACE:
4818	for (i = `1`; i <= min; i++)
4819	{
4820	if (eptr >= md->end_subject)
4821	{
4822	SCHECK_PARTIAL();
4823	RRETURN(MATCH_NOMATCH);
4824	}
4825	if (MAX_255(eptr) && (md->ctypes[eptr] & ctype_space) != `0`)
4826	RRETURN(MATCH_NOMATCH);
4827	eptr++;
4828	}
4829	break;
4830
4831	case OP_WHITESPACE:
4832	for (i = `1`; i <= min; i++)
4833	{
4834	if (eptr >= md->end_subject)
4835	{
4836	SCHECK_PARTIAL();
4837	RRETURN(MATCH_NOMATCH);
4838	}
4839	if (!MAX_255(eptr) \|\| (md->ctypes[eptr] & ctype_space) == `0`)
4840	RRETURN(MATCH_NOMATCH);
4841	eptr++;
4842	}
4843	break;
4844
4845	case OP_NOT_WORDCHAR:
4846	for (i = `1`; i <= min; i++)
4847	{
4848	if (eptr >= md->end_subject)
4849	{
4850	SCHECK_PARTIAL();
4851	RRETURN(MATCH_NOMATCH);
4852	}
4853	if (MAX_255(eptr) && (md->ctypes[eptr] & ctype_word) != `0`)
4854	RRETURN(MATCH_NOMATCH);
4855	eptr++;
4856	}
4857	break;
4858
4859	case OP_WORDCHAR:
4860	for (i = `1`; i <= min; i++)
4861	{
4862	if (eptr >= md->end_subject)
4863	{
4864	SCHECK_PARTIAL();
4865	RRETURN(MATCH_NOMATCH);
4866	}
4867	if (!MAX_255(eptr) \|\| (md->ctypes[eptr] & ctype_word) == `0`)
4868	RRETURN(MATCH_NOMATCH);
4869	eptr++;
4870	}
4871	break;
4872
4873	default:
4874	RRETURN(PCRE_ERROR_INTERNAL);
4875	}
4876	}
4877
4878	/ If min = max, continue at the same level without recursing /
4879
4880	if (min == max) continue;
4881
4882	/ If minimizing, we have to test the rest of the pattern before each*
4883	subsequent match. Again, separate the UTF-8 case for speed, and also
4884	separate the UCP cases. /*
4885
4886	if (minimize)
4887	{
4888	#ifdef SUPPORT_UCP
4889	if (prop_type >= `0`)
4890	{
4891	switch(prop_type)
4892	{
4893	case PT_ANY:
4894	for (fi = min;; fi++)
4895	{
4896	RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4897	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4898	if (fi >= max) RRETURN(MATCH_NOMATCH);
4899	if (eptr >= md->end_subject)
4900	{
4901	SCHECK_PARTIAL();
4902	RRETURN(MATCH_NOMATCH);
4903	}
4904	GETCHARINCTEST(c, eptr);
4905	if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4906	}
4907	/ Control never gets here /
4908
4909	case PT_LAMP:
4910	for (fi = min;; fi++)
4911	{
4912	int chartype;
4913	RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4914	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4915	if (fi >= max) RRETURN(MATCH_NOMATCH);
4916	if (eptr >= md->end_subject)
4917	{
4918	SCHECK_PARTIAL();
4919	RRETURN(MATCH_NOMATCH);
4920	}
4921	GETCHARINCTEST(c, eptr);
4922	chartype = UCD_CHARTYPE(c);
4923	if ((chartype == ucp_Lu \|\|
4924	chartype == ucp_Ll \|\|
4925	chartype == ucp_Lt) == prop_fail_result)
4926	RRETURN(MATCH_NOMATCH);
4927	}
4928	/ Control never gets here /
4929
4930	case PT_GC:
4931	for (fi = min;; fi++)
4932	{
4933	RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4934	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4935	if (fi >= max) RRETURN(MATCH_NOMATCH);
4936	if (eptr >= md->end_subject)
4937	{
4938	SCHECK_PARTIAL();
4939	RRETURN(MATCH_NOMATCH);
4940	}
4941	GETCHARINCTEST(c, eptr);
4942	if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4943	RRETURN(MATCH_NOMATCH);
4944	}
4945	/ Control never gets here /
4946
4947	case PT_PC:
4948	for (fi = min;; fi++)
4949	{
4950	RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4951	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4952	if (fi >= max) RRETURN(MATCH_NOMATCH);
4953	if (eptr >= md->end_subject)
4954	{
4955	SCHECK_PARTIAL();
4956	RRETURN(MATCH_NOMATCH);
4957	}
4958	GETCHARINCTEST(c, eptr);
4959	if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4960	RRETURN(MATCH_NOMATCH);
4961	}
4962	/ Control never gets here /
4963
4964	case PT_SC:
4965	for (fi = min;; fi++)
4966	{
4967	RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4968	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4969	if (fi >= max) RRETURN(MATCH_NOMATCH);
4970	if (eptr >= md->end_subject)
4971	{
4972	SCHECK_PARTIAL();
4973	RRETURN(MATCH_NOMATCH);
4974	}
4975	GETCHARINCTEST(c, eptr);
4976	if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4977	RRETURN(MATCH_NOMATCH);
4978	}
4979	/ Control never gets here /
4980
4981	case PT_ALNUM:
4982	for (fi = min;; fi++)
4983	{
4984	int category;
4985	RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4986	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4987	if (fi >= max) RRETURN(MATCH_NOMATCH);
4988	if (eptr >= md->end_subject)
4989	{
4990	SCHECK_PARTIAL();
4991	RRETURN(MATCH_NOMATCH);
4992	}
4993	GETCHARINCTEST(c, eptr);
4994	category = UCD_CATEGORY(c);
4995	if ((category == ucp_L \|\| category == ucp_N) == prop_fail_result)
4996	RRETURN(MATCH_NOMATCH);
4997	}
4998	/ Control never gets here /
4999
5000	/ Perl space used to exclude VT, but from Perl 5.18 it is included,*
5001	which means that Perl space and POSIX space are now identical. PCRE
5002	was changed at release 8.34. /*
5003
5004	case PT_SPACE: / Perl space /
5005	case PT_PXSPACE: / POSIX space /
5006	for (fi = min;; fi++)
5007	{
5008	RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
5009	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5010	if (fi >= max) RRETURN(MATCH_NOMATCH);
5011	if (eptr >= md->end_subject)
5012	{
5013	SCHECK_PARTIAL();
5014	RRETURN(MATCH_NOMATCH);
5015	}
5016	GETCHARINCTEST(c, eptr);
5017	switch(c)
5018	{
5019	HSPACE_CASES:
5020	VSPACE_CASES:
5021	if (prop_fail_result) RRETURN(MATCH_NOMATCH);
5022	break;
5023
5024	default:
5025	if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5026	RRETURN(MATCH_NOMATCH);
5027	break;
5028	}
5029	}
5030	/ Control never gets here /
5031
5032	case PT_WORD:
5033	for (fi = min;; fi++)
5034	{
5035	int category;
5036	RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
5037	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5038	if (fi >= max) RRETURN(MATCH_NOMATCH);
5039	if (eptr >= md->end_subject)
5040	{
5041	SCHECK_PARTIAL();
5042	RRETURN(MATCH_NOMATCH);
5043	}
5044	GETCHARINCTEST(c, eptr);
5045	category = UCD_CATEGORY(c);
5046	if ((category == ucp_L \|\|
5047	category == ucp_N \|\|
5048	c == CHAR_UNDERSCORE)
5049	== prop_fail_result)
5050	RRETURN(MATCH_NOMATCH);
5051	}
5052	/ Control never gets here /
5053
5054	case PT_CLIST:
5055	for (fi = min;; fi++)
5056	{
5057	const pcre_uint32 *cp;
5058	RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
5059	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5060	if (fi >= max) RRETURN(MATCH_NOMATCH);
5061	if (eptr >= md->end_subject)
5062	{
5063	SCHECK_PARTIAL();
5064	RRETURN(MATCH_NOMATCH);
5065	}
5066	GETCHARINCTEST(c, eptr);
5067	cp = PRIV(ucd_caseless_sets) + prop_value;
5068	for (;;)
5069	{
5070	if (c < *cp)
5071	{ if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
5072	if (c == *cp++)
5073	{ if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
5074	}
5075	}
5076	/ Control never gets here /
5077
5078	case PT_UCNC:
5079	for (fi = min;; fi++)
5080	{
5081	RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
5082	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5083	if (fi >= max) RRETURN(MATCH_NOMATCH);
5084	if (eptr >= md->end_subject)
5085	{
5086	SCHECK_PARTIAL();
5087	RRETURN(MATCH_NOMATCH);
5088	}
5089	GETCHARINCTEST(c, eptr);
5090	if ((c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
5091	c == CHAR_GRAVE_ACCENT \|\| (c >= `0xa0` && c <= `0xd7ff`) \|\|
5092	c >= `0xe000`) == prop_fail_result)
5093	RRETURN(MATCH_NOMATCH);
5094	}
5095	/ Control never gets here /
5096
5097	/ This should never occur /
5098	default:
5099	RRETURN(PCRE_ERROR_INTERNAL);
5100	}
5101	}
5102
5103	/ Match extended Unicode sequences. We will get here only if the*
5104	support is in the binary; otherwise a compile-time error occurs. /*
5105
5106	else if (ctype == OP_EXTUNI)
5107	{
5108	for (fi = min;; fi++)
5109	{
5110	RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
5111	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5112	if (fi >= max) RRETURN(MATCH_NOMATCH);
5113	if (eptr >= md->end_subject)
5114	{
5115	SCHECK_PARTIAL();
5116	RRETURN(MATCH_NOMATCH);
5117	}
5118	else
5119	{
5120	int lgb, rgb;
5121	GETCHARINCTEST(c, eptr);
5122	lgb = UCD_GRAPHBREAK(c);
5123	while (eptr < md->end_subject)
5124	{
5125	int len = `1`;
5126	if (!utf) c = eptr; else* { GETCHARLEN(c, eptr, len); }
5127	rgb = UCD_GRAPHBREAK(c);
5128	if ((PRIV(ucp_gbtable)[lgb] & (`1` << rgb)) == `0`) break;
5129	lgb = rgb;
5130	eptr += len;
5131	}
5132	}
5133	CHECK_PARTIAL();
5134	}
5135	}
5136	else
5137	#endif /* SUPPORT_UCP */
5138
5139	#ifdef SUPPORT_UTF
5140	if (utf)
5141	{
5142	for (fi = min;; fi++)
5143	{
5144	RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
5145	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5146	if (fi >= max) RRETURN(MATCH_NOMATCH);
5147	if (eptr >= md->end_subject)
5148	{
5149	SCHECK_PARTIAL();
5150	RRETURN(MATCH_NOMATCH);
5151	}
5152	if (ctype == OP_ANY && IS_NEWLINE(eptr))
5153	RRETURN(MATCH_NOMATCH);
5154	GETCHARINC(c, eptr);
5155	switch(ctype)
5156	{
5157	case OP_ANY: / This is the non-NL case /
5158	if (md->partial != `0` && / Take care with CRLF partial /
5159	eptr >= md->end_subject &&
5160	NLBLOCK->nltype == NLTYPE_FIXED &&
5161	NLBLOCK->nllen == `2` &&
5162	c == NLBLOCK->nl[`0`])
5163	{
5164	md->hitend = TRUE;
5165	if (md->partial > `1`) RRETURN(PCRE_ERROR_PARTIAL);
5166	}
5167	break;
5168
5169	case OP_ALLANY:
5170	case OP_ANYBYTE:
5171	break;
5172
5173	case OP_ANYNL:
5174	switch(c)
5175	{
5176	default: RRETURN(MATCH_NOMATCH);
5177	case CHAR_CR:
5178	if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
5179	break;
5180
5181	case CHAR_LF:
5182	break;
5183
5184	case CHAR_VT:
5185	case CHAR_FF:
5186	case CHAR_NEL:
5187	#ifndef EBCDIC
5188	case `0x2028`:
5189	case `0x2029`:
5190	#endif /* Not EBCDIC */
5191	if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5192	break;
5193	}
5194	break;
5195
5196	case OP_NOT_HSPACE:
5197	switch(c)
5198	{
5199	HSPACE_CASES: RRETURN(MATCH_NOMATCH);
5200	default: break;
5201	}
5202	break;
5203
5204	case OP_HSPACE:
5205	switch(c)
5206	{
5207	HSPACE_CASES: break;
5208	default: RRETURN(MATCH_NOMATCH);
5209	}
5210	break;
5211
5212	case OP_NOT_VSPACE:
5213	switch(c)
5214	{
5215	VSPACE_CASES: RRETURN(MATCH_NOMATCH);
5216	default: break;
5217	}
5218	break;
5219
5220	case OP_VSPACE:
5221	switch(c)
5222	{
5223	VSPACE_CASES: break;
5224	default: RRETURN(MATCH_NOMATCH);
5225	}
5226	break;
5227
5228	case OP_NOT_DIGIT:
5229	if (c < `256` && (md->ctypes[c] & ctype_digit) != `0`)
5230	RRETURN(MATCH_NOMATCH);
5231	break;
5232
5233	case OP_DIGIT:
5234	if (c >= `256` \|\| (md->ctypes[c] & ctype_digit) == `0`)
5235	RRETURN(MATCH_NOMATCH);
5236	break;
5237
5238	case OP_NOT_WHITESPACE:
5239	if (c < `256` && (md->ctypes[c] & ctype_space) != `0`)
5240	RRETURN(MATCH_NOMATCH);
5241	break;
5242
5243	case OP_WHITESPACE:
5244	if (c >= `256` \|\| (md->ctypes[c] & ctype_space) == `0`)
5245	RRETURN(MATCH_NOMATCH);
5246	break;
5247
5248	case OP_NOT_WORDCHAR:
5249	if (c < `256` && (md->ctypes[c] & ctype_word) != `0`)
5250	RRETURN(MATCH_NOMATCH);
5251	break;
5252
5253	case OP_WORDCHAR:
5254	if (c >= `256` \|\| (md->ctypes[c] & ctype_word) == `0`)
5255	RRETURN(MATCH_NOMATCH);
5256	break;
5257
5258	default:
5259	RRETURN(PCRE_ERROR_INTERNAL);
5260	}
5261	}
5262	}
5263	else
5264	#endif
5265	/ Not UTF mode /
5266	{
5267	for (fi = min;; fi++)
5268	{
5269	RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5270	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5271	if (fi >= max) RRETURN(MATCH_NOMATCH);
5272	if (eptr >= md->end_subject)
5273	{
5274	SCHECK_PARTIAL();
5275	RRETURN(MATCH_NOMATCH);
5276	}
5277	if (ctype == OP_ANY && IS_NEWLINE(eptr))
5278	RRETURN(MATCH_NOMATCH);
5279	c = *eptr++;
5280	switch(ctype)
5281	{
5282	case OP_ANY: / This is the non-NL case /
5283	if (md->partial != `0` && / Take care with CRLF partial /
5284	eptr >= md->end_subject &&
5285	NLBLOCK->nltype == NLTYPE_FIXED &&
5286	NLBLOCK->nllen == `2` &&
5287	c == NLBLOCK->nl[`0`])
5288	{
5289	md->hitend = TRUE;
5290	if (md->partial > `1`) RRETURN(PCRE_ERROR_PARTIAL);
5291	}
5292	break;
5293
5294	case OP_ALLANY:
5295	case OP_ANYBYTE:
5296	break;
5297
5298	case OP_ANYNL:
5299	switch(c)
5300	{
5301	default: RRETURN(MATCH_NOMATCH);
5302	case CHAR_CR:
5303	if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
5304	break;
5305
5306	case CHAR_LF:
5307	break;
5308
5309	case CHAR_VT:
5310	case CHAR_FF:
5311	case CHAR_NEL:
5312	#if defined COMPILE_PCRE16 \|\| defined COMPILE_PCRE32
5313	case `0x2028`:
5314	case `0x2029`:
5315	#endif
5316	if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5317	break;
5318	}
5319	break;
5320
5321	case OP_NOT_HSPACE:
5322	switch(c)
5323	{
5324	default: break;
5325	HSPACE_BYTE_CASES:
5326	#if defined COMPILE_PCRE16 \|\| defined COMPILE_PCRE32
5327	HSPACE_MULTIBYTE_CASES:
5328	#endif
5329	RRETURN(MATCH_NOMATCH);
5330	}
5331	break;
5332
5333	case OP_HSPACE:
5334	switch(c)
5335	{
5336	default: RRETURN(MATCH_NOMATCH);
5337	HSPACE_BYTE_CASES:
5338	#if defined COMPILE_PCRE16 \|\| defined COMPILE_PCRE32
5339	HSPACE_MULTIBYTE_CASES:
5340	#endif
5341	break;
5342	}
5343	break;
5344
5345	case OP_NOT_VSPACE:
5346	switch(c)
5347	{
5348	default: break;
5349	VSPACE_BYTE_CASES:
5350	#if defined COMPILE_PCRE16 \|\| defined COMPILE_PCRE32
5351	VSPACE_MULTIBYTE_CASES:
5352	#endif
5353	RRETURN(MATCH_NOMATCH);
5354	}
5355	break;
5356
5357	case OP_VSPACE:
5358	switch(c)
5359	{
5360	default: RRETURN(MATCH_NOMATCH);
5361	VSPACE_BYTE_CASES:
5362	#if defined COMPILE_PCRE16 \|\| defined COMPILE_PCRE32
5363	VSPACE_MULTIBYTE_CASES:
5364	#endif
5365	break;
5366	}
5367	break;
5368
5369	case OP_NOT_DIGIT:
5370	if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != `0`) RRETURN(MATCH_NOMATCH);
5371	break;
5372
5373	case OP_DIGIT:
5374	if (!MAX_255(c) \|\| (md->ctypes[c] & ctype_digit) == `0`) RRETURN(MATCH_NOMATCH);
5375	break;
5376
5377	case OP_NOT_WHITESPACE:
5378	if (MAX_255(c) && (md->ctypes[c] & ctype_space) != `0`) RRETURN(MATCH_NOMATCH);
5379	break;
5380
5381	case OP_WHITESPACE:
5382	if (!MAX_255(c) \|\| (md->ctypes[c] & ctype_space) == `0`) RRETURN(MATCH_NOMATCH);
5383	break;
5384
5385	case OP_NOT_WORDCHAR:
5386	if (MAX_255(c) && (md->ctypes[c] & ctype_word) != `0`) RRETURN(MATCH_NOMATCH);
5387	break;
5388
5389	case OP_WORDCHAR:
5390	if (!MAX_255(c) \|\| (md->ctypes[c] & ctype_word) == `0`) RRETURN(MATCH_NOMATCH);
5391	break;
5392
5393	default:
5394	RRETURN(PCRE_ERROR_INTERNAL);
5395	}
5396	}
5397	}
5398	/ Control never gets here /
5399	}
5400
5401	/ If maximizing, it is worth using inline code for speed, doing the type*
5402	test once at the start (i.e. keep it out of the loop). Again, keep the
5403	UTF-8 and UCP stuff separate. /*
5404
5405	else
5406	{
5407	pp = eptr; / Remember where we started /
5408
5409	#ifdef SUPPORT_UCP
5410	if (prop_type >= `0`)
5411	{
5412	switch(prop_type)
5413	{
5414	case PT_ANY:
5415	for (i = min; i < max; i++)
5416	{
5417	int len = `1`;
5418	if (eptr >= md->end_subject)
5419	{
5420	SCHECK_PARTIAL();
5421	break;
5422	}
5423	GETCHARLENTEST(c, eptr, len);
5424	if (prop_fail_result) break;
5425	eptr+= len;
5426	}
5427	break;
5428
5429	case PT_LAMP:
5430	for (i = min; i < max; i++)
5431	{
5432	int chartype;
5433	int len = `1`;
5434	if (eptr >= md->end_subject)
5435	{
5436	SCHECK_PARTIAL();
5437	break;
5438	}
5439	GETCHARLENTEST(c, eptr, len);
5440	chartype = UCD_CHARTYPE(c);
5441	if ((chartype == ucp_Lu \|\|
5442	chartype == ucp_Ll \|\|
5443	chartype == ucp_Lt) == prop_fail_result)
5444	break;
5445	eptr+= len;
5446	}
5447	break;
5448
5449	case PT_GC:
5450	for (i = min; i < max; i++)
5451	{
5452	int len = `1`;
5453	if (eptr >= md->end_subject)
5454	{
5455	SCHECK_PARTIAL();
5456	break;
5457	}
5458	GETCHARLENTEST(c, eptr, len);
5459	if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5460	eptr+= len;
5461	}
5462	break;
5463
5464	case PT_PC:
5465	for (i = min; i < max; i++)
5466	{
5467	int len = `1`;
5468	if (eptr >= md->end_subject)
5469	{
5470	SCHECK_PARTIAL();
5471	break;
5472	}
5473	GETCHARLENTEST(c, eptr, len);
5474	if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5475	eptr+= len;
5476	}
5477	break;
5478
5479	case PT_SC:
5480	for (i = min; i < max; i++)
5481	{
5482	int len = `1`;
5483	if (eptr >= md->end_subject)
5484	{
5485	SCHECK_PARTIAL();
5486	break;
5487	}
5488	GETCHARLENTEST(c, eptr, len);
5489	if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5490	eptr+= len;
5491	}
5492	break;
5493
5494	case PT_ALNUM:
5495	for (i = min; i < max; i++)
5496	{
5497	int category;
5498	int len = `1`;
5499	if (eptr >= md->end_subject)
5500	{
5501	SCHECK_PARTIAL();
5502	break;
5503	}
5504	GETCHARLENTEST(c, eptr, len);
5505	category = UCD_CATEGORY(c);
5506	if ((category == ucp_L \|\| category == ucp_N) == prop_fail_result)
5507	break;
5508	eptr+= len;
5509	}
5510	break;
5511
5512	/ Perl space used to exclude VT, but from Perl 5.18 it is included,*
5513	which means that Perl space and POSIX space are now identical. PCRE
5514	was changed at release 8.34. /*
5515
5516	case PT_SPACE: / Perl space /
5517	case PT_PXSPACE: / POSIX space /
5518	for (i = min; i < max; i++)
5519	{
5520	int len = `1`;
5521	if (eptr >= md->end_subject)
5522	{
5523	SCHECK_PARTIAL();
5524	break;
5525	}
5526	GETCHARLENTEST(c, eptr, len);
5527	switch(c)
5528	{
5529	HSPACE_CASES:
5530	VSPACE_CASES:
5531	if (prop_fail_result) goto ENDLOOP99; / Break the loop /
5532	break;
5533
5534	default:
5535	if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5536	goto ENDLOOP99; / Break the loop /
5537	break;
5538	}
5539	eptr+= len;
5540	}
5541	ENDLOOP99:
5542	break;
5543
5544	case PT_WORD:
5545	for (i = min; i < max; i++)
5546	{
5547	int category;
5548	int len = `1`;
5549	if (eptr >= md->end_subject)
5550	{
5551	SCHECK_PARTIAL();
5552	break;
5553	}
5554	GETCHARLENTEST(c, eptr, len);
5555	category = UCD_CATEGORY(c);
5556	if ((category == ucp_L \|\| category == ucp_N \|\|
5557	c == CHAR_UNDERSCORE) == prop_fail_result)
5558	break;
5559	eptr+= len;
5560	}
5561	break;
5562
5563	case PT_CLIST:
5564	for (i = min; i < max; i++)
5565	{
5566	const pcre_uint32 *cp;
5567	int len = `1`;
5568	if (eptr >= md->end_subject)
5569	{
5570	SCHECK_PARTIAL();
5571	break;
5572	}
5573	GETCHARLENTEST(c, eptr, len);
5574	cp = PRIV(ucd_caseless_sets) + prop_value;
5575	for (;;)
5576	{
5577	if (c < *cp)
5578	{ if (prop_fail_result) break; else goto GOT_MAX; }
5579	if (c == *cp++)
5580	{ if (prop_fail_result) goto GOT_MAX; else break; }
5581	}
5582	eptr += len;
5583	}
5584	GOT_MAX:
5585	break;
5586
5587	case PT_UCNC:
5588	for (i = min; i < max; i++)
5589	{
5590	int len = `1`;
5591	if (eptr >= md->end_subject)
5592	{
5593	SCHECK_PARTIAL();
5594	break;
5595	}
5596	GETCHARLENTEST(c, eptr, len);
5597	if ((c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
5598	c == CHAR_GRAVE_ACCENT \|\| (c >= `0xa0` && c <= `0xd7ff`) \|\|
5599	c >= `0xe000`) == prop_fail_result)
5600	break;
5601	eptr += len;
5602	}
5603	break;
5604
5605	default:
5606	RRETURN(PCRE_ERROR_INTERNAL);
5607	}
5608
5609	/ eptr is now past the end of the maximum run /
5610
5611	if (possessive) continue; / No backtracking /
5612	for(;;)
5613	{
5614	if (eptr <= pp) goto TAIL_RECURSE;
5615	RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5616	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5617	eptr--;
5618	if (utf) BACKCHAR(eptr);
5619	}
5620	}
5621
5622	/ Match extended Unicode grapheme clusters. We will get here only if the*
5623	support is in the binary; otherwise a compile-time error occurs. /*
5624
5625	else if (ctype == OP_EXTUNI)
5626	{
5627	for (i = min; i < max; i++)
5628	{
5629	if (eptr >= md->end_subject)
5630	{
5631	SCHECK_PARTIAL();
5632	break;
5633	}
5634	else
5635	{
5636	int lgb, rgb;
5637	GETCHARINCTEST(c, eptr);
5638	lgb = UCD_GRAPHBREAK(c);
5639	while (eptr < md->end_subject)
5640	{
5641	int len = `1`;
5642	if (!utf) c = eptr; else* { GETCHARLEN(c, eptr, len); }
5643	rgb = UCD_GRAPHBREAK(c);
5644	if ((PRIV(ucp_gbtable)[lgb] & (`1` << rgb)) == `0`) break;
5645	lgb = rgb;
5646	eptr += len;
5647	}
5648	}
5649	CHECK_PARTIAL();
5650	}
5651
5652	/ eptr is now past the end of the maximum run /
5653
5654	if (possessive) continue; / No backtracking /
5655
5656	/ We use <= pp rather than == pp to detect the start of the run while*
5657	backtracking because the use of \C in UTF mode can cause BACKCHAR to
5658	move back past pp. This is just palliative; the use of \C in UTF mode
5659	is fraught with danger. /*
5660
5661	for(;;)
5662	{
5663	int lgb, rgb;
5664	PCRE_PUCHAR fptr;
5665
5666	if (eptr <= pp) goto TAIL_RECURSE; / At start of char run /
5667	RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5668	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5669
5670	/ Backtracking over an extended grapheme cluster involves inspecting*
5671	the previous two characters (if present) to see if a break is
5672	permitted between them. /*
5673
5674	eptr--;
5675	if (!utf) c = eptr; else*
5676	{
5677	BACKCHAR(eptr);
5678	GETCHAR(c, eptr);
5679	}
5680	rgb = UCD_GRAPHBREAK(c);
5681
5682	for (;;)
5683	{
5684	if (eptr <= pp) goto TAIL_RECURSE; / At start of char run /
5685	fptr = eptr - `1`;
5686	if (!utf) c = fptr; else*
5687	{
5688	BACKCHAR(fptr);
5689	GETCHAR(c, fptr);
5690	}
5691	lgb = UCD_GRAPHBREAK(c);
5692	if ((PRIV(ucp_gbtable)[lgb] & (`1` << rgb)) == `0`) break;
5693	eptr = fptr;
5694	rgb = lgb;
5695	}
5696	}
5697	}
5698
5699	else
5700	#endif /* SUPPORT_UCP */
5701
5702	#ifdef SUPPORT_UTF
5703	if (utf)
5704	{
5705	switch(ctype)
5706	{
5707	case OP_ANY:
5708	for (i = min; i < max; i++)
5709	{
5710	if (eptr >= md->end_subject)
5711	{
5712	SCHECK_PARTIAL();
5713	break;
5714	}
5715	if (IS_NEWLINE(eptr)) break;
5716	if (md->partial != `0` && / Take care with CRLF partial /
5717	eptr + `1` >= md->end_subject &&
5718	NLBLOCK->nltype == NLTYPE_FIXED &&
5719	NLBLOCK->nllen == `2` &&
5720	UCHAR21(eptr) == NLBLOCK->nl[`0`])
5721	{
5722	md->hitend = TRUE;
5723	if (md->partial > `1`) RRETURN(PCRE_ERROR_PARTIAL);
5724	}
5725	eptr++;
5726	ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5727	}
5728	break;
5729
5730	case OP_ALLANY:
5731	if (max < INT_MAX)
5732	{
5733	for (i = min; i < max; i++)
5734	{
5735	if (eptr >= md->end_subject)
5736	{
5737	SCHECK_PARTIAL();
5738	break;
5739	}
5740	eptr++;
5741	ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5742	}
5743	}
5744	else
5745	{
5746	eptr = md->end_subject; / Unlimited UTF-8 repeat /
5747	SCHECK_PARTIAL();
5748	}
5749	break;
5750
5751	/ The byte case is the same as non-UTF8 /
5752
5753	case OP_ANYBYTE:
5754	c = max - min;
5755	if (c > (unsigned int)(md->end_subject - eptr))
5756	{
5757	eptr = md->end_subject;
5758	SCHECK_PARTIAL();
5759	}
5760	else eptr += c;
5761	break;
5762
5763	case OP_ANYNL:
5764	for (i = min; i < max; i++)
5765	{
5766	int len = `1`;
5767	if (eptr >= md->end_subject)
5768	{
5769	SCHECK_PARTIAL();
5770	break;
5771	}
5772	GETCHARLEN(c, eptr, len);
5773	if (c == CHAR_CR)
5774	{
5775	if (++eptr >= md->end_subject) break;
5776	if (UCHAR21(eptr) == CHAR_LF) eptr++;
5777	}
5778	else
5779	{
5780	if (c != CHAR_LF &&
5781	(md->bsr_anycrlf \|\|
5782	(c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5783	#ifndef EBCDIC
5784	&& c != `0x2028` && c != `0x2029`
5785	#endif /* Not EBCDIC */
5786	)))
5787	break;
5788	eptr += len;
5789	}
5790	}
5791	break;
5792
5793	case OP_NOT_HSPACE:
5794	case OP_HSPACE:
5795	for (i = min; i < max; i++)
5796	{
5797	BOOL gotspace;
5798	int len = `1`;
5799	if (eptr >= md->end_subject)
5800	{
5801	SCHECK_PARTIAL();
5802	break;
5803	}
5804	GETCHARLEN(c, eptr, len);
5805	switch(c)
5806	{
5807	HSPACE_CASES: gotspace = TRUE; break;
5808	default: gotspace = FALSE; break;
5809	}
5810	if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5811	eptr += len;
5812	}
5813	break;
5814
5815	case OP_NOT_VSPACE:
5816	case OP_VSPACE:
5817	for (i = min; i < max; i++)
5818	{
5819	BOOL gotspace;
5820	int len = `1`;
5821	if (eptr >= md->end_subject)
5822	{
5823	SCHECK_PARTIAL();
5824	break;
5825	}
5826	GETCHARLEN(c, eptr, len);
5827	switch(c)
5828	{
5829	VSPACE_CASES: gotspace = TRUE; break;
5830	default: gotspace = FALSE; break;
5831	}
5832	if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5833	eptr += len;
5834	}
5835	break;
5836
5837	case OP_NOT_DIGIT:
5838	for (i = min; i < max; i++)
5839	{
5840	int len = `1`;
5841	if (eptr >= md->end_subject)
5842	{
5843	SCHECK_PARTIAL();
5844	break;
5845	}
5846	GETCHARLEN(c, eptr, len);
5847	if (c < `256` && (md->ctypes[c] & ctype_digit) != `0`) break;
5848	eptr+= len;
5849	}
5850	break;
5851
5852	case OP_DIGIT:
5853	for (i = min; i < max; i++)
5854	{
5855	int len = `1`;
5856	if (eptr >= md->end_subject)
5857	{
5858	SCHECK_PARTIAL();
5859	break;
5860	}
5861	GETCHARLEN(c, eptr, len);
5862	if (c >= `256` \|\|(md->ctypes[c] & ctype_digit) == `0`) break;
5863	eptr+= len;
5864	}
5865	break;
5866
5867	case OP_NOT_WHITESPACE:
5868	for (i = min; i < max; i++)
5869	{
5870	int len = `1`;
5871	if (eptr >= md->end_subject)
5872	{
5873	SCHECK_PARTIAL();
5874	break;
5875	}
5876	GETCHARLEN(c, eptr, len);
5877	if (c < `256` && (md->ctypes[c] & ctype_space) != `0`) break;
5878	eptr+= len;
5879	}
5880	break;
5881
5882	case OP_WHITESPACE:
5883	for (i = min; i < max; i++)
5884	{
5885	int len = `1`;
5886	if (eptr >= md->end_subject)
5887	{
5888	SCHECK_PARTIAL();
5889	break;
5890	}
5891	GETCHARLEN(c, eptr, len);
5892	if (c >= `256` \|\|(md->ctypes[c] & ctype_space) == `0`) break;
5893	eptr+= len;
5894	}
5895	break;
5896
5897	case OP_NOT_WORDCHAR:
5898	for (i = min; i < max; i++)
5899	{
5900	int len = `1`;
5901	if (eptr >= md->end_subject)
5902	{
5903	SCHECK_PARTIAL();
5904	break;
5905	}
5906	GETCHARLEN(c, eptr, len);
5907	if (c < `256` && (md->ctypes[c] & ctype_word) != `0`) break;
5908	eptr+= len;
5909	}
5910	break;
5911
5912	case OP_WORDCHAR:
5913	for (i = min; i < max; i++)
5914	{
5915	int len = `1`;
5916	if (eptr >= md->end_subject)
5917	{
5918	SCHECK_PARTIAL();
5919	break;
5920	}
5921	GETCHARLEN(c, eptr, len);
5922	if (c >= `256` \|\| (md->ctypes[c] & ctype_word) == `0`) break;
5923	eptr+= len;
5924	}
5925	break;
5926
5927	default:
5928	RRETURN(PCRE_ERROR_INTERNAL);
5929	}
5930
5931	if (possessive) continue; / No backtracking /
5932	for(;;)
5933	{
5934	if (eptr <= pp) goto TAIL_RECURSE;
5935	RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5936	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5937	eptr--;
5938	BACKCHAR(eptr);
5939	if (ctype == OP_ANYNL && eptr > pp && UCHAR21(eptr) == CHAR_NL &&
5940	UCHAR21(eptr - `1`) == CHAR_CR) eptr--;
5941	}
5942	}
5943	else
5944	#endif /* SUPPORT_UTF */
5945	/ Not UTF mode /
5946	{
5947	switch(ctype)
5948	{
5949	case OP_ANY:
5950	for (i = min; i < max; i++)
5951	{
5952	if (eptr >= md->end_subject)
5953	{
5954	SCHECK_PARTIAL();
5955	break;
5956	}
5957	if (IS_NEWLINE(eptr)) break;
5958	if (md->partial != `0` && / Take care with CRLF partial /
5959	eptr + `1` >= md->end_subject &&
5960	NLBLOCK->nltype == NLTYPE_FIXED &&
5961	NLBLOCK->nllen == `2` &&
5962	*eptr == NLBLOCK->nl[`0`])
5963	{
5964	md->hitend = TRUE;
5965	if (md->partial > `1`) RRETURN(PCRE_ERROR_PARTIAL);
5966	}
5967	eptr++;
5968	}
5969	break;
5970
5971	case OP_ALLANY:
5972	case OP_ANYBYTE:
5973	c = max - min;
5974	if (c > (unsigned int)(md->end_subject - eptr))
5975	{
5976	eptr = md->end_subject;
5977	SCHECK_PARTIAL();
5978	}
5979	else eptr += c;
5980	break;
5981
5982	case OP_ANYNL:
5983	for (i = min; i < max; i++)
5984	{
5985	if (eptr >= md->end_subject)
5986	{
5987	SCHECK_PARTIAL();
5988	break;
5989	}
5990	c = *eptr;
5991	if (c == CHAR_CR)
5992	{
5993	if (++eptr >= md->end_subject) break;
5994	if (*eptr == CHAR_LF) eptr++;
5995	}
5996	else
5997	{
5998	if (c != CHAR_LF && (md->bsr_anycrlf \|\|
5999	(c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
6000	#if defined COMPILE_PCRE16 \|\| defined COMPILE_PCRE32
6001	&& c != `0x2028` && c != `0x2029`
6002	#endif
6003	))) break;
6004	eptr++;
6005	}
6006	}
6007	break;
6008
6009	case OP_NOT_HSPACE:
6010	for (i = min; i < max; i++)
6011	{
6012	if (eptr >= md->end_subject)
6013	{
6014	SCHECK_PARTIAL();
6015	break;
6016	}
6017	switch(*eptr)
6018	{
6019	default: eptr++; break;
6020	HSPACE_BYTE_CASES:
6021	#if defined COMPILE_PCRE16 \|\| defined COMPILE_PCRE32
6022	HSPACE_MULTIBYTE_CASES:
6023	#endif
6024	goto ENDLOOP00;
6025	}
6026	}
6027	ENDLOOP00:
6028	break;
6029
6030	case OP_HSPACE:
6031	for (i = min; i < max; i++)
6032	{
6033	if (eptr >= md->end_subject)
6034	{
6035	SCHECK_PARTIAL();
6036	break;
6037	}
6038	switch(*eptr)
6039	{
6040	default: goto ENDLOOP01;
6041	HSPACE_BYTE_CASES:
6042	#if defined COMPILE_PCRE16 \|\| defined COMPILE_PCRE32
6043	HSPACE_MULTIBYTE_CASES:
6044	#endif
6045	eptr++; break;
6046	}
6047	}
6048	ENDLOOP01:
6049	break;
6050
6051	case OP_NOT_VSPACE:
6052	for (i = min; i < max; i++)
6053	{
6054	if (eptr >= md->end_subject)
6055	{
6056	SCHECK_PARTIAL();
6057	break;
6058	}
6059	switch(*eptr)
6060	{
6061	default: eptr++; break;
6062	VSPACE_BYTE_CASES:
6063	#if defined COMPILE_PCRE16 \|\| defined COMPILE_PCRE32
6064	VSPACE_MULTIBYTE_CASES:
6065	#endif
6066	goto ENDLOOP02;
6067	}
6068	}
6069	ENDLOOP02:
6070	break;
6071
6072	case OP_VSPACE:
6073	for (i = min; i < max; i++)
6074	{
6075	if (eptr >= md->end_subject)
6076	{
6077	SCHECK_PARTIAL();
6078	break;
6079	}
6080	switch(*eptr)
6081	{
6082	default: goto ENDLOOP03;
6083	VSPACE_BYTE_CASES:
6084	#if defined COMPILE_PCRE16 \|\| defined COMPILE_PCRE32
6085	VSPACE_MULTIBYTE_CASES:
6086	#endif
6087	eptr++; break;
6088	}
6089	}
6090	ENDLOOP03:
6091	break;
6092
6093	case OP_NOT_DIGIT:
6094	for (i = min; i < max; i++)
6095	{
6096	if (eptr >= md->end_subject)
6097	{
6098	SCHECK_PARTIAL();
6099	break;
6100	}
6101	if (MAX_255(eptr) && (md->ctypes[eptr] & ctype_digit) != `0`) break;
6102	eptr++;
6103	}
6104	break;
6105
6106	case OP_DIGIT:
6107	for (i = min; i < max; i++)
6108	{
6109	if (eptr >= md->end_subject)
6110	{
6111	SCHECK_PARTIAL();
6112	break;
6113	}
6114	if (!MAX_255(eptr) \|\| (md->ctypes[eptr] & ctype_digit) == `0`) break;
6115	eptr++;
6116	}
6117	break;
6118
6119	case OP_NOT_WHITESPACE:
6120	for (i = min; i < max; i++)
6121	{
6122	if (eptr >= md->end_subject)
6123	{
6124	SCHECK_PARTIAL();
6125	break;
6126	}
6127	if (MAX_255(eptr) && (md->ctypes[eptr] & ctype_space) != `0`) break;
6128	eptr++;
6129	}
6130	break;
6131
6132	case OP_WHITESPACE:
6133	for (i = min; i < max; i++)
6134	{
6135	if (eptr >= md->end_subject)
6136	{
6137	SCHECK_PARTIAL();
6138	break;
6139	}
6140	if (!MAX_255(eptr) \|\| (md->ctypes[eptr] & ctype_space) == `0`) break;
6141	eptr++;
6142	}
6143	break;
6144
6145	case OP_NOT_WORDCHAR:
6146	for (i = min; i < max; i++)
6147	{
6148	if (eptr >= md->end_subject)
6149	{
6150	SCHECK_PARTIAL();
6151	break;
6152	}
6153	if (MAX_255(eptr) && (md->ctypes[eptr] & ctype_word) != `0`) break;
6154	eptr++;
6155	}
6156	break;
6157
6158	case OP_WORDCHAR:
6159	for (i = min; i < max; i++)
6160	{
6161	if (eptr >= md->end_subject)
6162	{
6163	SCHECK_PARTIAL();
6164	break;
6165	}
6166	if (!MAX_255(eptr) \|\| (md->ctypes[eptr] & ctype_word) == `0`) break;
6167	eptr++;
6168	}
6169	break;
6170
6171	default:
6172	RRETURN(PCRE_ERROR_INTERNAL);
6173	}
6174
6175	if (possessive) continue; / No backtracking /
6176	for (;;)
6177	{
6178	if (eptr == pp) goto TAIL_RECURSE;
6179	RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6180	if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6181	eptr--;
6182	if (ctype == OP_ANYNL && eptr > pp && *eptr == CHAR_LF &&
6183	eptr[-`1`] == CHAR_CR) eptr--;
6184	}
6185	}
6186
6187	/ Control never gets here /
6188	}
6189
6190	/ There's been some horrible disaster. Arrival here can only mean there is*
6191	something seriously wrong in the code above or the OP_xxx definitions. /*
6192
6193	default:
6194	DPRINTF(("Unknown opcode %d\n", *ecode));
6195	RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6196	}
6197
6198	/ Do not stick any code in here without much thought; it is assumed*
6199	that "continue" in the code above comes out to here to repeat the main
6200	loop. /*
6201
6202	} / End of main loop /
6203	/ Control never reaches here /
6204
6205
6206	/ When compiling to use the heap rather than the stack for recursive calls to*
6207	match(), the RRETURN() macro jumps here. The number that is saved in
6208	frame->Xwhere indicates which label we actually want to return to. /*
6209
6210	#ifdef NO_RECURSE
6211	#define LBL(val) case val: goto L_RM##val;
6212	HEAP_RETURN:
6213	switch (frame->Xwhere)
6214	{
6215	LBL( `1`) LBL( `2`) LBL( `3`) LBL( `4`) LBL( `5`) LBL( `6`) LBL( `7`) LBL( `8`)
6216	LBL( `9`) LBL(`10`) LBL(`11`) LBL(`12`) LBL(`13`) LBL(`14`) LBL(`15`) LBL(`17`)
6217	LBL(`19`) LBL(`24`) LBL(`25`) LBL(`26`) LBL(`27`) LBL(`29`) LBL(`31`) LBL(`33`)
6218	LBL(`35`) LBL(`43`) LBL(`47`) LBL(`48`) LBL(`49`) LBL(`50`) LBL(`51`) LBL(`52`)
6219	LBL(`53`) LBL(`54`) LBL(`55`) LBL(`56`) LBL(`57`) LBL(`58`) LBL(`63`) LBL(`64`)
6220	LBL(`65`) LBL(`66`)
6221	#if defined SUPPORT_UTF \|\| !defined COMPILE_PCRE8
6222	LBL(`20`) LBL(`21`)
6223	#endif
6224	#ifdef SUPPORT_UTF
6225	LBL(`16`) LBL(`18`)
6226	LBL(`22`) LBL(`23`) LBL(`28`) LBL(`30`)
6227	LBL(`32`) LBL(`34`) LBL(`42`) LBL(`46`)
6228	#ifdef SUPPORT_UCP
6229	LBL(`36`) LBL(`37`) LBL(`38`) LBL(`39`) LBL(`40`) LBL(`41`) LBL(`44`) LBL(`45`)
6230	LBL(`59`) LBL(`60`) LBL(`61`) LBL(`62`) LBL(`67`)
6231	#endif /* SUPPORT_UCP */
6232	#endif /* SUPPORT_UTF */
6233	default:
6234	DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6235	return PCRE_ERROR_INTERNAL;
6236	}
6237	#undef LBL
6238	#endif /* NO_RECURSE */
6239	}
6240
6241
6242	/***************************************************************************
6243	****************************************************************************
6244	RECURSION IN THE match() FUNCTION
6245
6246	Undefine all the macros that were defined above to handle this. /*
6247
6248	#ifdef NO_RECURSE
6249	#undef eptr
6250	#undef ecode
6251	#undef mstart
6252	#undef offset_top
6253	#undef eptrb
6254	#undef flags
6255
6256	#undef callpat
6257	#undef charptr
6258	#undef data
6259	#undef next
6260	#undef pp
6261	#undef prev
6262	#undef saved_eptr
6263
6264	#undef new_recursive
6265
6266	#undef cur_is_word
6267	#undef condition
6268	#undef prev_is_word
6269
6270	#undef ctype
6271	#undef length
6272	#undef max
6273	#undef min
6274	#undef number
6275	#undef offset
6276	#undef op
6277	#undef save_capture_last
6278	#undef save_offset1
6279	#undef save_offset2
6280	#undef save_offset3
6281	#undef stacksave
6282
6283	#undef newptrb
6284
6285	#endif
6286
6287	/ These two are defined as macros in both cases /
6288
6289	#undef fc
6290	#undef fi
6291
6292	/***************************************************************************
6293	***************************************************************************/
6294
6295
6296	#ifdef NO_RECURSE
6297	/*************************************************
6298	* Release allocated heap frames *
6299	*************************************************/
6300
6301	/ This function releases all the allocated frames. The base frame is on the*
6302	machine stack, and so must not be freed.
6303
6304	Argument: the address of the base frame
6305	Returns: nothing
6306	*/
6307
6308	static void
6309	release_match_heapframes (heapframe *frame_base)
6310	{
6311	heapframe *nextframe = frame_base->Xnextframe;
6312	while (nextframe != NULL)
6313	{
6314	heapframe *oldframe = nextframe;
6315	nextframe = nextframe->Xnextframe;
6316	(PUBL(stack_free))(oldframe);
6317	}
6318	}
6319	#endif
6320
6321
6322	/*************************************************
6323	* Execute a Regular Expression *
6324	*************************************************/
6325
6326	/ This function applies a compiled re to a subject string and picks out*
6327	portions of the string if it matches. Two elements in the vector are set for
6328	each substring: the offsets to the start and end of the substring.
6329
6330	Arguments:
6331	argument_re points to the compiled expression
6332	extra_data points to extra data or is NULL
6333	subject points to the subject string
6334	length length of subject string (may contain binary zeros)
6335	start_offset where to start in the subject string
6336	options option bits
6337	offsets points to a vector of ints to be filled in with offsets
6338	offsetcount the number of elements in the vector
6339
6340	Returns: > 0 => success; value is the number of elements filled in
6341	= 0 => success, but offsets is not big enough
6342	-1 => failed to match
6343	< -1 => some kind of unexpected problem
6344	*/
6345
6346	#if defined COMPILE_PCRE8
6347	PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6348	pcre_exec(const pcre argument_re, const* pcre_extra *extra_data,
6349	PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6350	int offsetcount)
6351	#elif defined COMPILE_PCRE16
6352	PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6353	pcre16_exec(const pcre16 argument_re, const* pcre16_extra *extra_data,
6354	PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6355	int offsetcount)
6356	#elif defined COMPILE_PCRE32
6357	PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6358	pcre32_exec(const pcre32 argument_re, const* pcre32_extra *extra_data,
6359	PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
6360	int offsetcount)
6361	#endif
6362	{
6363	int rc, ocount, arg_offset_max;
6364	int newline;
6365	BOOL using_temporary_offsets = FALSE;
6366	BOOL anchored;
6367	BOOL startline;
6368	BOOL firstline;
6369	BOOL utf;
6370	BOOL has_first_char = FALSE;
6371	BOOL has_req_char = FALSE;
6372	pcre_uchar first_char = `0`;
6373	pcre_uchar first_char2 = `0`;
6374	pcre_uchar req_char = `0`;
6375	pcre_uchar req_char2 = `0`;
6376	match_data match_block;
6377	match_data *md = &match_block;
6378	const pcre_uint8 *tables;
6379	const pcre_uint8 *start_bits = NULL;
6380	PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6381	PCRE_PUCHAR end_subject;
6382	PCRE_PUCHAR start_partial = NULL;
6383	PCRE_PUCHAR match_partial = NULL;
6384	PCRE_PUCHAR req_char_ptr = start_match - `1`;
6385
6386	const pcre_study_data *study;
6387	const REAL_PCRE re = (const* REAL_PCRE *)argument_re;
6388
6389	#ifdef NO_RECURSE
6390	heapframe frame_zero;
6391	frame_zero.Xprevframe = NULL; / Marks the top level /
6392	frame_zero.Xnextframe = NULL; / None are allocated yet /
6393	md->match_frames_base = &frame_zero;
6394	#endif
6395
6396	/ Check for the special magic call that measures the size of the stack used*
6397	per recursive call of match(). Without the funny casting for sizeof, a Windows
6398	compiler gave this error: "unary minus operator applied to unsigned type,
6399	result still unsigned". Hopefully the cast fixes that. /*
6400
6401	if (re == NULL && extra_data == NULL && subject == NULL && length == -`999` &&
6402	start_offset == -`999`)
6403	#ifdef NO_RECURSE
6404	return -((int)sizeof(heapframe));
6405	#else
6406	return match(NULL, NULL, NULL, `0`, NULL, NULL, `0`);
6407	#endif
6408
6409	/ Plausibility checks /
6410
6411	if ((options & ~PUBLIC_EXEC_OPTIONS) != `0`) return PCRE_ERROR_BADOPTION;
6412	if (re == NULL \|\| subject == NULL \|\| (offsets == NULL && offsetcount > `0`))
6413	return PCRE_ERROR_NULL;
6414	if (offsetcount < `0`) return PCRE_ERROR_BADCOUNT;
6415	if (length < `0`) return PCRE_ERROR_BADLENGTH;
6416	if (start_offset < `0` \|\| start_offset > length) return PCRE_ERROR_BADOFFSET;
6417
6418	/ Check that the first field in the block is the magic number. If it is not,*
6419	return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6420	REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6421	means that the pattern is likely compiled with different endianness. /*
6422
6423	if (re->magic_number != MAGIC_NUMBER)
6424	return re->magic_number == REVERSED_MAGIC_NUMBER?
6425	PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6426	if ((re->flags & PCRE_MODE) == `0`) return PCRE_ERROR_BADMODE;
6427
6428	/ These two settings are used in the code for checking a UTF-8 string that*
6429	follows immediately afterwards. Other values in the md block are used only
6430	during "normal" pcre_exec() processing, not when the JIT support is in use,
6431	so they are set up later. /*
6432
6433	/ PCRE_UTF16 has the same value as PCRE_UTF8. /
6434	utf = md->utf = (re->options & PCRE_UTF8) != `0`;
6435	md->partial = ((options & PCRE_PARTIAL_HARD) != `0`)? `2` :
6436	((options & PCRE_PARTIAL_SOFT) != `0`)? `1` : `0`;
6437
6438	/ Check a UTF-8 string if required. Pass back the character offset and error*
6439	code for an invalid string if a results vector is available. /*
6440
6441	#ifdef SUPPORT_UTF
6442	if (utf && (options & PCRE_NO_UTF8_CHECK) == `0`)
6443	{
6444	int erroroffset;
6445	int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6446	if (errorcode != `0`)
6447	{
6448	if (offsetcount >= `2`)
6449	{
6450	offsets[`0`] = erroroffset;
6451	offsets[`1`] = errorcode;
6452	}
6453	#if defined COMPILE_PCRE8
6454	return (errorcode <= PCRE_UTF8_ERR5 && md->partial > `1`)?
6455	PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6456	#elif defined COMPILE_PCRE16
6457	return (errorcode <= PCRE_UTF16_ERR1 && md->partial > `1`)?
6458	PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6459	#elif defined COMPILE_PCRE32
6460	return PCRE_ERROR_BADUTF32;
6461	#endif
6462	}
6463	#if defined COMPILE_PCRE8 \|\| defined COMPILE_PCRE16
6464	/ Check that a start_offset points to the start of a UTF character. /
6465	if (start_offset > `0` && start_offset < length &&
6466	NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6467	return PCRE_ERROR_BADUTF8_OFFSET;
6468	#endif
6469	}
6470	#endif
6471
6472	/ If the pattern was successfully studied with JIT support, run the JIT*
6473	executable instead of the rest of this function. Most options must be set at
6474	compile time for the JIT code to be usable. Fallback to the normal code path if
6475	an unsupported flag is set. /*
6476
6477	#ifdef SUPPORT_JIT
6478	if (extra_data != NULL
6479	&& (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT \|
6480	PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6481	&& extra_data->executable_jit != NULL
6482	&& (options & ~PUBLIC_JIT_EXEC_OPTIONS) == `0`)
6483	{
6484	rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
6485	start_offset, options, offsets, offsetcount);
6486
6487	/ PCRE_ERROR_NULL means that the selected normal or partial matching*
6488	mode is not compiled. In this case we simply fallback to interpreter. /*
6489
6490	if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
6491	}
6492	#endif
6493
6494	/ Carry on with non-JIT matching. This information is for finding all the*
6495	numbers associated with a given name, for condition testing. /*
6496
6497	md->name_table = (pcre_uchar *)re + re->name_table_offset;
6498	md->name_count = re->name_count;
6499	md->name_entry_size = re->name_entry_size;
6500
6501	/ Fish out the optional data from the extra_data structure, first setting*
6502	the default values. /*
6503
6504	study = NULL;
6505	md->match_limit = MATCH_LIMIT;
6506	md->match_limit_recursion = MATCH_LIMIT_RECURSION;
6507	md->callout_data = NULL;
6508
6509	/ The table pointer is always in native byte order. /
6510
6511	tables = re->tables;
6512
6513	/ The two limit values override the defaults, whatever their value. /
6514
6515	if (extra_data != NULL)
6516	{
6517	unsigned long int flags = extra_data->flags;
6518	if ((flags & PCRE_EXTRA_STUDY_DATA) != `0`)
6519	study = (const pcre_study_data *)extra_data->study_data;
6520	if ((flags & PCRE_EXTRA_MATCH_LIMIT) != `0`)
6521	md->match_limit = extra_data->match_limit;
6522	if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != `0`)
6523	md->match_limit_recursion = extra_data->match_limit_recursion;
6524	if ((flags & PCRE_EXTRA_CALLOUT_DATA) != `0`)
6525	md->callout_data = extra_data->callout_data;
6526	if ((flags & PCRE_EXTRA_TABLES) != `0`) tables = extra_data->tables;
6527	}
6528
6529	/ Limits in the regex override only if they are smaller. /
6530
6531	if ((re->flags & PCRE_MLSET) != `0` && re->limit_match < md->match_limit)
6532	md->match_limit = re->limit_match;
6533
6534	if ((re->flags & PCRE_RLSET) != `0` &&
6535	re->limit_recursion < md->match_limit_recursion)
6536	md->match_limit_recursion = re->limit_recursion;
6537
6538	/ If the exec call supplied NULL for tables, use the inbuilt ones. This*
6539	is a feature that makes it possible to save compiled regex and re-use them
6540	in other programs later. /*
6541
6542	if (tables == NULL) tables = PRIV(default_tables);
6543
6544	/ Set up other data /
6545
6546	anchored = ((re->options \| options) & PCRE_ANCHORED) != `0`;
6547	startline = (re->flags & PCRE_STARTLINE) != `0`;
6548	firstline = (re->options & PCRE_FIRSTLINE) != `0`;
6549
6550	/ The code starts after the real_pcre block and the capture name table. /
6551
6552	md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6553	re->name_count * re->name_entry_size;
6554
6555	md->start_subject = (PCRE_PUCHAR)subject;
6556	md->start_offset = start_offset;
6557	md->end_subject = md->start_subject + length;
6558	end_subject = md->end_subject;
6559
6560	md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != `0`;
6561	md->use_ucp = (re->options & PCRE_UCP) != `0`;
6562	md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != `0`;
6563	md->ignore_skip_arg = `0`;
6564
6565	/ Some options are unpacked into BOOL variables in the hope that testing*
6566	them will be faster than individual option bits. /*
6567
6568	md->notbol = (options & PCRE_NOTBOL) != `0`;
6569	md->noteol = (options & PCRE_NOTEOL) != `0`;
6570	md->notempty = (options & PCRE_NOTEMPTY) != `0`;
6571	md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != `0`;
6572
6573	md->hitend = FALSE;
6574	md->mark = md->nomatch_mark = NULL; / In case never set /
6575
6576	md->recursive = NULL; / No recursion at top level /
6577	md->hasthen = (re->flags & PCRE_HASTHEN) != `0`;
6578
6579	md->lcc = tables + lcc_offset;
6580	md->fcc = tables + fcc_offset;
6581	md->ctypes = tables + ctypes_offset;
6582
6583	/ Handle different \R options. /
6584
6585	switch (options & (PCRE_BSR_ANYCRLF\|PCRE_BSR_UNICODE))
6586	{
6587	case `0`:
6588	if ((re->options & (PCRE_BSR_ANYCRLF\|PCRE_BSR_UNICODE)) != `0`)
6589	md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != `0`;
6590	else
6591	#ifdef BSR_ANYCRLF
6592	md->bsr_anycrlf = TRUE;
6593	#else
6594	md->bsr_anycrlf = FALSE;
6595	#endif
6596	break;
6597
6598	case PCRE_BSR_ANYCRLF:
6599	md->bsr_anycrlf = TRUE;
6600	break;
6601
6602	case PCRE_BSR_UNICODE:
6603	md->bsr_anycrlf = FALSE;
6604	break;
6605
6606	default: return PCRE_ERROR_BADNEWLINE;
6607	}
6608
6609	/ Handle different types of newline. The three bits give eight cases. If*
6610	nothing is set at run time, whatever was used at compile time applies. /*
6611
6612	switch ((((options & PCRE_NEWLINE_BITS) == `0`)? re->options :
6613	(pcre_uint32)options) & PCRE_NEWLINE_BITS)
6614	{
6615	case `0`: newline = NEWLINE; break; / Compile-time default /
6616	case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6617	case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6618	case PCRE_NEWLINE_CR+
6619	PCRE_NEWLINE_LF: newline = (CHAR_CR << `8`) \| CHAR_NL; break;
6620	case PCRE_NEWLINE_ANY: newline = -`1`; break;
6621	case PCRE_NEWLINE_ANYCRLF: newline = -`2`; break;
6622	default: return PCRE_ERROR_BADNEWLINE;
6623	}
6624
6625	if (newline == -`2`)
6626	{
6627	md->nltype = NLTYPE_ANYCRLF;
6628	}
6629	else if (newline < `0`)
6630	{
6631	md->nltype = NLTYPE_ANY;
6632	}
6633	else
6634	{
6635	md->nltype = NLTYPE_FIXED;
6636	if (newline > `255`)
6637	{
6638	md->nllen = `2`;
6639	md->nl[`0`] = (newline >> `8`) & `255`;
6640	md->nl[`1`] = newline & `255`;
6641	}
6642	else
6643	{
6644	md->nllen = `1`;
6645	md->nl[`0`] = newline;
6646	}
6647	}
6648
6649	/ Partial matching was originally supported only for a restricted set of*
6650	regexes; from release 8.00 there are no restrictions, but the bits are still
6651	defined (though never set). So there's no harm in leaving this code. /*
6652
6653	if (md->partial && (re->flags & PCRE_NOPARTIAL) != `0`)
6654	return PCRE_ERROR_BADPARTIAL;
6655
6656	/ If the expression has got more back references than the offsets supplied can*
6657	hold, we get a temporary chunk of working store to use during the matching.
6658	Otherwise, we can use the vector supplied, rounding down its size to a multiple
6659	of 3. /*
6660
6661	ocount = offsetcount - (offsetcount % `3`);
6662	arg_offset_max = (`2`*ocount)/`3`;
6663
6664	if (re->top_backref > `0` && re->top_backref >= ocount/`3`)
6665	{
6666	ocount = re->top_backref * `3` + `3`;
6667	md->offset_vector = (int )(PUBL(malloc))(ocount sizeof(int));
6668	if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6669	using_temporary_offsets = TRUE;
6670	DPRINTF(("Got memory to hold back references\n"));
6671	}
6672	else md->offset_vector = offsets;
6673	md->offset_end = ocount;
6674	md->offset_max = (`2`*ocount)/`3`;
6675	md->capture_last = `0`;
6676
6677	/ Reset the working variable associated with each extraction. These should*
6678	never be used unless previously set, but they get saved and restored, and so we
6679	initialize them to avoid reading uninitialized locations. Also, unset the
6680	offsets for the matched string. This is really just for tidiness with callouts,
6681	in case they inspect these fields. /*
6682
6683	if (md->offset_vector != NULL)
6684	{
6685	register int *iptr = md->offset_vector + ocount;
6686	register int *iend = iptr - re->top_bracket;
6687	if (iend < md->offset_vector + `2`) iend = md->offset_vector + `2`;
6688	while (--iptr >= iend) *iptr = -`1`;
6689	if (offsetcount > `0`) md->offset_vector[`0`] = -`1`;
6690	if (offsetcount > `1`) md->offset_vector[`1`] = -`1`;
6691	}
6692
6693	/ Set up the first character to match, if available. The first_char value is*
6694	never set for an anchored regular expression, but the anchoring may be forced
6695	at run time, so we have to test for anchoring. The first char may be unset for
6696	an unanchored pattern, of course. If there's no first char and the pattern was
6697	studied, there may be a bitmap of possible first characters. /*
6698
6699	if (!anchored)
6700	{
6701	if ((re->flags & PCRE_FIRSTSET) != `0`)
6702	{
6703	has_first_char = TRUE;
6704	first_char = first_char2 = (pcre_uchar)(re->first_char);
6705	if ((re->flags & PCRE_FCH_CASELESS) != `0`)
6706	{
6707	first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6708	#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6709	if (utf && first_char > `127`)
6710	first_char2 = UCD_OTHERCASE(first_char);
6711	#endif
6712	}
6713	}
6714	else
6715	if (!startline && study != NULL &&
6716	(study->flags & PCRE_STUDY_MAPPED) != `0`)
6717	start_bits = study->start_bits;
6718	}
6719
6720	/ For anchored or unanchored matches, there may be a "last known required*
6721	character" set. /*
6722
6723	if ((re->flags & PCRE_REQCHSET) != `0`)
6724	{
6725	has_req_char = TRUE;
6726	req_char = req_char2 = (pcre_uchar)(re->req_char);
6727	if ((re->flags & PCRE_RCH_CASELESS) != `0`)
6728	{
6729	req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6730	#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6731	if (utf && req_char > `127`)
6732	req_char2 = UCD_OTHERCASE(req_char);
6733	#endif
6734	}
6735	}
6736
6737
6738	/ ==========================================================================/
6739
6740	/ Loop for handling unanchored repeated matching attempts; for anchored regexs*
6741	the loop runs just once. /*
6742
6743	for(;;)
6744	{
6745	PCRE_PUCHAR save_end_subject = end_subject;
6746	PCRE_PUCHAR new_start_match;
6747
6748	/ If firstline is TRUE, the start of the match is constrained to the first*
6749	line of a multiline string. That is, the match must be before or at the first
6750	newline. Implement this by temporarily adjusting end_subject so that we stop
6751	scanning at a newline. If the match fails at the newline, later code breaks
6752	this loop. /*
6753
6754	if (firstline)
6755	{
6756	PCRE_PUCHAR t = start_match;
6757	#ifdef SUPPORT_UTF
6758	if (utf)
6759	{
6760	while (t < md->end_subject && !IS_NEWLINE(t))
6761	{
6762	t++;
6763	ACROSSCHAR(t < end_subject, *t, t++);
6764	}
6765	}
6766	else
6767	#endif
6768	while (t < md->end_subject && !IS_NEWLINE(t)) t++;
6769	end_subject = t;
6770	}
6771
6772	/ There are some optimizations that avoid running the match if a known*
6773	starting point is not found, or if a known later character is not present.
6774	However, there is an option that disables these, for testing and for ensuring
6775	that all callouts do actually occur. The option can be set in the regex by
6776	(NO_START_OPT) or passed in match-time options. /
6777
6778	if (((options \| re->options) & PCRE_NO_START_OPTIMIZE) == `0`)
6779	{
6780	/ Advance to a unique first char if there is one. /
6781
6782	if (has_first_char)
6783	{
6784	pcre_uchar smc;
6785
6786	if (first_char != first_char2)
6787	while (start_match < end_subject &&
6788	(smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2)
6789	start_match++;
6790	else
6791	while (start_match < end_subject && UCHAR21TEST(start_match) != first_char)
6792	start_match++;
6793	}
6794
6795	/ Or to just after a linebreak for a multiline match /
6796
6797	else if (startline)
6798	{
6799	if (start_match > md->start_subject + start_offset)
6800	{
6801	#ifdef SUPPORT_UTF
6802	if (utf)
6803	{
6804	while (start_match < end_subject && !WAS_NEWLINE(start_match))
6805	{
6806	start_match++;
6807	ACROSSCHAR(start_match < end_subject, *start_match,
6808	start_match++);
6809	}
6810	}
6811	else
6812	#endif
6813	while (start_match < end_subject && !WAS_NEWLINE(start_match))
6814	start_match++;
6815
6816	/ If we have just passed a CR and the newline option is ANY or ANYCRLF,*
6817	and we are now at a LF, advance the match position by one more character.
6818	*/
6819
6820	if (start_match[-`1`] == CHAR_CR &&
6821	(md->nltype == NLTYPE_ANY \|\| md->nltype == NLTYPE_ANYCRLF) &&
6822	start_match < end_subject &&
6823	UCHAR21TEST(start_match) == CHAR_NL)
6824	start_match++;
6825	}
6826	}
6827
6828	/ Or to a non-unique first byte after study /
6829
6830	else if (start_bits != NULL)
6831	{
6832	while (start_match < end_subject)
6833	{
6834	register pcre_uint32 c = UCHAR21TEST(start_match);
6835	#ifndef COMPILE_PCRE8
6836	if (c > `255`) c = `255`;
6837	#endif
6838	if ((start_bits[c/`8`] & (`1` << (c&`7`))) != `0`) break;
6839	start_match++;
6840	}
6841	}
6842	} / Starting optimizations /
6843
6844	/ Restore fudged end_subject /
6845
6846	end_subject = save_end_subject;
6847
6848	/ The following two optimizations are disabled for partial matching or if*
6849	disabling is explicitly requested. /*
6850
6851	if (((options \| re->options) & PCRE_NO_START_OPTIMIZE) == `0` && !md->partial)
6852	{
6853	/ If the pattern was studied, a minimum subject length may be set. This is*
6854	a lower bound; no actual string of that length may actually match the
6855	pattern. Although the value is, strictly, in characters, we treat it as
6856	bytes to avoid spending too much time in this optimization. /*
6857
6858	if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != `0` &&
6859	(pcre_uint32)(end_subject - start_match) < study->minlength)
6860	{
6861	rc = MATCH_NOMATCH;
6862	break;
6863	}
6864
6865	/ If req_char is set, we know that that character must appear in the*
6866	subject for the match to succeed. If the first character is set, req_char
6867	must be later in the subject; otherwise the test starts at the match point.
6868	This optimization can save a huge amount of backtracking in patterns with
6869	nested unlimited repeats that aren't going to match. Writing separate code
6870	for cased/caseless versions makes it go faster, as does using an
6871	autoincrement and backing off on a match.
6872
6873	HOWEVER: when the subject string is very, very long, searching to its end
6874	can take a long time, and give bad performance on quite ordinary patterns.
6875	This showed up when somebody was matching something like /^\d+C/ on a
6876	32-megabyte string... so we don't do this when the string is sufficiently
6877	long. /*
6878
6879	if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
6880	{
6881	register PCRE_PUCHAR p = start_match + (has_first_char? `1`:`0`);
6882
6883	/ We don't need to repeat the search if we haven't yet reached the*
6884	place we found it at last time. /*
6885
6886	if (p > req_char_ptr)
6887	{
6888	if (req_char != req_char2)
6889	{
6890	while (p < end_subject)
6891	{
6892	register pcre_uint32 pp = UCHAR21INCTEST(p);
6893	if (pp == req_char \|\| pp == req_char2) { p--; break; }
6894	}
6895	}
6896	else
6897	{
6898	while (p < end_subject)
6899	{
6900	if (UCHAR21INCTEST(p) == req_char) { p--; break; }
6901	}
6902	}
6903
6904	/ If we can't find the required character, break the matching loop,*
6905	forcing a match failure. /*
6906
6907	if (p >= end_subject)
6908	{
6909	rc = MATCH_NOMATCH;
6910	break;
6911	}
6912
6913	/ If we have found the required character, save the point where we*
6914	found it, so that we don't search again next time round the loop if
6915	the start hasn't passed this character yet. /*
6916
6917	req_char_ptr = p;
6918	}
6919	}
6920	}
6921
6922	#ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */
6923	printf(">>>> Match against: ");
6924	pchars(start_match, end_subject - start_match, TRUE, md);
6925	printf("\n");
6926	#endif
6927
6928	/ OK, we can now run the match. If "hitend" is set afterwards, remember the*
6929	first starting point for which a partial match was found. /*
6930
6931	md->start_match_ptr = start_match;
6932	md->start_used_ptr = start_match;
6933	md->match_call_count = `0`;
6934	md->match_function_type = `0`;
6935	md->end_offset_top = `0`;
6936	md->skip_arg_count = `0`;
6937	rc = match(start_match, md->start_code, start_match, `2`, md, NULL, `0`);
6938	if (md->hitend && start_partial == NULL)
6939	{
6940	start_partial = md->start_used_ptr;
6941	match_partial = start_match;
6942	}
6943
6944	switch(rc)
6945	{
6946	/ If MATCH_SKIP_ARG reaches this level it means that a MARK that matched*
6947	the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6948	entirely. The only way we can do that is to re-do the match at the same
6949	point, with a flag to force SKIP with an argument to be ignored. Just
6950	treating this case as NOMATCH does not work because it does not check other
6951	alternatives in patterns such as A(SKIP:A)B\|AC when the subject is AC. /
6952
6953	case MATCH_SKIP_ARG:
6954	new_start_match = start_match;
6955	md->ignore_skip_arg = md->skip_arg_count;
6956	break;
6957
6958	/ SKIP passes back the next starting point explicitly, but if it is no*
6959	greater than the match we have just done, treat it as NOMATCH. /*
6960
6961	case MATCH_SKIP:
6962	if (md->start_match_ptr > start_match)
6963	{
6964	new_start_match = md->start_match_ptr;
6965	break;
6966	}
6967	/ Fall through /
6968
6969	/ NOMATCH and PRUNE advance by one character. THEN at this level acts*
6970	exactly like PRUNE. Unset ignore SKIP-with-argument. /*
6971
6972	case MATCH_NOMATCH:
6973	case MATCH_PRUNE:
6974	case MATCH_THEN:
6975	md->ignore_skip_arg = `0`;
6976	new_start_match = start_match + `1`;
6977	#ifdef SUPPORT_UTF
6978	if (utf)
6979	ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6980	new_start_match++);
6981	#endif
6982	break;
6983
6984	/ COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. /
6985
6986	case MATCH_COMMIT:
6987	rc = MATCH_NOMATCH;
6988	goto ENDLOOP;
6989
6990	/ Any other return is either a match, or some kind of error. /
6991
6992	default:
6993	goto ENDLOOP;
6994	}
6995
6996	/ Control reaches here for the various types of "no match at this point"*
6997	result. Reset the code to MATCH_NOMATCH for subsequent checking. /*
6998
6999	rc = MATCH_NOMATCH;
7000
7001	/ If PCRE_FIRSTLINE is set, the match must happen before or at the first*
7002	newline in the subject (though it may continue over the newline). Therefore,
7003	if we have just failed to match, starting at a newline, do not continue. /*
7004
7005	if (firstline && IS_NEWLINE(start_match)) break;
7006
7007	/ Advance to new matching position /
7008
7009	start_match = new_start_match;
7010
7011	/ Break the loop if the pattern is anchored or if we have passed the end of*
7012	the subject. /*
7013
7014	if (anchored \|\| start_match > end_subject) break;
7015
7016	/ If we have just passed a CR and we are now at a LF, and the pattern does*
7017	not contain any explicit matches for \r or \n, and the newline option is CRLF
7018	or ANY or ANYCRLF, advance the match position by one more character. In
7019	normal matching start_match will aways be greater than the first position at
7020	this stage, but a failed SKIP can cause a return at the same point, which is*
7021	why the first test exists. /*
7022
7023	if (start_match > (PCRE_PUCHAR)subject + start_offset &&
7024	start_match[-`1`] == CHAR_CR &&
7025	start_match < end_subject &&
7026	*start_match == CHAR_NL &&
7027	(re->flags & PCRE_HASCRORLF) == `0` &&
7028	(md->nltype == NLTYPE_ANY \|\|
7029	md->nltype == NLTYPE_ANYCRLF \|\|
7030	md->nllen == `2`))
7031	start_match++;
7032
7033	md->mark = NULL; / Reset for start of next match attempt /
7034	} / End of for(;;) "bumpalong" loop /
7035
7036	/ ==========================================================================/
7037
7038	/ We reach here when rc is not MATCH_NOMATCH, or if one of the stopping*
7039	conditions is true:
7040
7041	(1) The pattern is anchored or the match was failed by (COMMIT);*
7042
7043	(2) We are past the end of the subject;
7044
7045	(3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
7046	this option requests that a match occur at or before the first newline in
7047	the subject.
7048
7049	When we have a match and the offset vector is big enough to deal with any
7050	backreferences, captured substring offsets will already be set up. In the case
7051	where we had to get some local store to hold offsets for backreference
7052	processing, copy those that we can. In this case there need not be overflow if
7053	certain parts of the pattern were not used, even though there are more
7054	capturing parentheses than vector slots. /*
7055
7056	ENDLOOP:
7057
7058	if (rc == MATCH_MATCH \|\| rc == MATCH_ACCEPT)
7059	{
7060	if (using_temporary_offsets)
7061	{
7062	if (arg_offset_max >= `4`)
7063	{
7064	memcpy(offsets + `2`, md->offset_vector + `2`,
7065	(arg_offset_max - `2`) * sizeof(int));
7066	DPRINTF(("Copied offsets from temporary memory\n"));
7067	}
7068	if (md->end_offset_top > arg_offset_max) md->capture_last \|= OVFLBIT;
7069	DPRINTF(("Freeing temporary memory\n"));
7070	(PUBL(free))(md->offset_vector);
7071	}
7072
7073	/ Set the return code to the number of captured strings, or 0 if there were*
7074	too many to fit into the vector. /*
7075
7076	rc = ((md->capture_last & OVFLBIT) != `0` &&
7077	md->end_offset_top >= arg_offset_max)?
7078	`0` : md->end_offset_top/`2`;
7079
7080	/ If there is space in the offset vector, set any unused pairs at the end of*
7081	the pattern to -1 for backwards compatibility. It is documented that this
7082	happens. In earlier versions, the whole set of potential capturing offsets
7083	was set to -1 each time round the loop, but this is handled differently now.
7084	"Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
7085	those at the end that need unsetting here. We can't just unset them all at
7086	the start of the whole thing because they may get set in one branch that is
7087	not the final matching branch. /*
7088
7089	if (md->end_offset_top/`2` <= re->top_bracket && offsets != NULL)
7090	{
7091	register int iptr, iend;
7092	int resetcount = `2` + re->top_bracket * `2`;
7093	if (resetcount > offsetcount) resetcount = offsetcount;
7094	iptr = offsets + md->end_offset_top;
7095	iend = offsets + resetcount;
7096	while (iptr < iend) *iptr++ = -`1`;
7097	}
7098
7099	/ If there is space, set up the whole thing as substring 0. The value of*
7100	md->start_match_ptr might be modified if \K was encountered on the success
7101	matching path. /*
7102
7103	if (offsetcount < `2`) rc = `0`; else
7104	{
7105	offsets[`0`] = (int)(md->start_match_ptr - md->start_subject);
7106	offsets[`1`] = (int)(md->end_match_ptr - md->start_subject);
7107	}
7108
7109	/ Return MARK data if requested /
7110
7111	if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != `0`)
7112	(extra_data->mark) = (pcre_uchar )md->mark;
7113	DPRINTF((">>>> returning %d\n", rc));
7114	#ifdef NO_RECURSE
7115	release_match_heapframes(&frame_zero);
7116	#endif
7117	return rc;
7118	}
7119
7120	/ Control gets here if there has been an error, or if the overall match*
7121	attempt has failed at all permitted starting positions. /*
7122
7123	if (using_temporary_offsets)
7124	{
7125	DPRINTF(("Freeing temporary memory\n"));
7126	(PUBL(free))(md->offset_vector);
7127	}
7128
7129	/ For anything other than nomatch or partial match, just return the code. /
7130
7131	if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
7132	{
7133	DPRINTF((">>>> error: returning %d\n", rc));
7134	#ifdef NO_RECURSE
7135	release_match_heapframes(&frame_zero);
7136	#endif
7137	return rc;
7138	}
7139
7140	/ Handle partial matches - disable any mark data /
7141
7142	if (match_partial != NULL)
7143	{
7144	DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
7145	md->mark = NULL;
7146	if (offsetcount > `1`)
7147	{
7148	offsets[`0`] = (int)(start_partial - (PCRE_PUCHAR)subject);
7149	offsets[`1`] = (int)(end_subject - (PCRE_PUCHAR)subject);
7150	if (offsetcount > `2`)
7151	offsets[`2`] = (int)(match_partial - (PCRE_PUCHAR)subject);
7152	}
7153	rc = PCRE_ERROR_PARTIAL;
7154	}
7155
7156	/ This is the classic nomatch case /
7157
7158	else
7159	{
7160	DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
7161	rc = PCRE_ERROR_NOMATCH;
7162	}
7163
7164	/ Return the MARK data if it has been requested. /
7165
7166	if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != `0`)
7167	(extra_data->mark) = (pcre_uchar )md->nomatch_mark;
7168	#ifdef NO_RECURSE
7169	release_match_heapframes(&frame_zero);
7170	#endif
7171	return rc;
7172	}
7173
7174	/ End of pcre_exec.c /
7175

Browse the source code of ClickHouse/contrib/poco/Foundation/src/pcre_exec.c