pcre2_dfa_match.c source code [Qt/src/3rdparty/pcre2/src/pcre2_dfa_match.c]

1	/*************************************************
2	* Perl-Compatible Regular Expressions *
3	*************************************************/
4
5	/ PCRE is a library of functions to support regular expressions whose syntax*
6	and semantics are as close as possible to those of the Perl 5 language.
7
8	Written by Philip Hazel
9	Original API code Copyright (c) 1997-2012 University of Cambridge
10	New API code Copyright (c) 2016-2020 University of Cambridge
11
12	-----------------------------------------------------------------------------
13	Redistribution and use in source and binary forms, with or without
14	modification, are permitted provided that the following conditions are met:
15
16	* Redistributions of source code must retain the above copyright notice,
17	this list of conditions and the following disclaimer.
18
19	* Redistributions in binary form must reproduce the above copyright
20	notice, this list of conditions and the following disclaimer in the
21	documentation and/or other materials provided with the distribution.
22
23	* Neither the name of the University of Cambridge nor the names of its
24	contributors may be used to endorse or promote products derived from
25	this software without specific prior written permission.
26
27	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31	LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35	CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37	POSSIBILITY OF SUCH DAMAGE.
38	-----------------------------------------------------------------------------
39	*/
40
41
42	/ This module contains the external function pcre2_dfa_match(), which is an*
43	alternative matching function that uses a sort of DFA algorithm (not a true
44	FSM). This is NOT Perl-compatible, but it has advantages in certain
45	applications. /*
46
47
48	/ NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved*
49	the performance of his patterns greatly. I could not use it as it stood, as it
50	was not thread safe, and made assumptions about pattern sizes. Also, it caused
51	test 7 to loop, and test 9 to crash with a segfault.
52
53	The issue is the check for duplicate states, which is done by a simple linear
54	search up the state list. (Grep for "duplicate" below to find the code.) For
55	many patterns, there will never be many states active at one time, so a simple
56	linear search is fine. In patterns that have many active states, it might be a
57	bottleneck. The suggested code used an indexing scheme to remember which states
58	had previously been used for each character, and avoided the linear search when
59	it knew there was no chance of a duplicate. This was implemented when adding
60	states to the state lists.
61
62	I wrote some thread-safe, not-limited code to try something similar at the time
63	of checking for duplicates (instead of when adding states), using index vectors
64	on the stack. It did give a 13% improvement with one specially constructed
65	pattern for certain subject strings, but on other strings and on many of the
66	simpler patterns in the test suite it did worse. The major problem, I think,
67	was the extra time to initialize the index. This had to be done for each call
68	of internal_dfa_match(). (The supplied patch used a static vector, initialized
69	only once - I suspect this was the cause of the problems with the tests.)
70
71	Overall, I concluded that the gains in some cases did not outweigh the losses
72	in others, so I abandoned this code. /*
73
74
75	#ifdef HAVE_CONFIG_H
76	#include "config.h"
77	#endif
78
79	#define NLBLOCK mb /* Block containing newline information */
80	#define PSSTART start_subject /* Field containing processed string start */
81	#define PSEND end_subject /* Field containing processed string end */
82
83	#include "pcre2_internal.h"
84
85	#define PUBLIC_DFA_MATCH_OPTIONS \
86	(PCRE2_ANCHORED\|PCRE2_ENDANCHORED\|PCRE2_NOTBOL\|PCRE2_NOTEOL\|PCRE2_NOTEMPTY\| \
87	PCRE2_NOTEMPTY_ATSTART\|PCRE2_NO_UTF_CHECK\|PCRE2_PARTIAL_HARD\| \
88	PCRE2_PARTIAL_SOFT\|PCRE2_DFA_SHORTEST\|PCRE2_DFA_RESTART\| \
89	PCRE2_COPY_MATCHED_SUBJECT)
90
91
92	/*************************************************
93	* Code parameters and static tables *
94	*************************************************/
95
96	/ These are offsets that are used to turn the OP_TYPESTAR and friends opcodes*
97	into others, under special conditions. A gap of 20 between the blocks should be
98	enough. The resulting opcodes don't have to be less than 256 because they are
99	never stored, so we push them well clear of the normal opcodes. /*
100
101	#define OP_PROP_EXTRA 300
102	#define OP_EXTUNI_EXTRA 320
103	#define OP_ANYNL_EXTRA 340
104	#define OP_HSPACE_EXTRA 360
105	#define OP_VSPACE_EXTRA 380
106
107
108	/ This table identifies those opcodes that are followed immediately by a*
109	character that is to be tested in some way. This makes it possible to
110	centralize the loading of these characters. In the case of Type etc, the*
111	"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
112	small value. Non-zero values in the table are the offsets from the opcode where
113	the character is to be found. NOTE* If the start of this table is*
114	modified, the three tables that follow must also be modified. /*
115
116	static const uint8_t coptable[] = {
117	`0`, / End /
118	`0`, `0`, `0`, `0`, `0`, / \A, \G, \K, \B, \b /
119	`0`, `0`, `0`, `0`, `0`, `0`, / \D, \d, \S, \s, \W, \w /
120	`0`, `0`, `0`, / Any, AllAny, Anybyte /
121	`0`, `0`, / \P, \p /
122	`0`, `0`, `0`, `0`, `0`, / \R, \H, \h, \V, \v /
123	`0`, / \X /
124	`0`, `0`, `0`, `0`, `0`, `0`, / \Z, \z, $, $M, ^, ^M /
125	`1`, / Char /
126	`1`, / Chari /
127	`1`, / not /
128	`1`, / noti /
129	/ Positive single-char repeats /
130	`1`, `1`, `1`, `1`, `1`, `1`, / , ?, +, +?, ?, ?? /
131	`1`+IMM2_SIZE, `1`+IMM2_SIZE, / upto, minupto /
132	`1`+IMM2_SIZE, / exact /
133	`1`, `1`, `1`, `1`+IMM2_SIZE, / +, ++, ?+, upto+ /*
134	`1`, `1`, `1`, `1`, `1`, `1`, / I, ?I, +I, +?I, ?I, ??I /
135	`1`+IMM2_SIZE, `1`+IMM2_SIZE, / upto I, minupto I /
136	`1`+IMM2_SIZE, / exact I /
137	`1`, `1`, `1`, `1`+IMM2_SIZE, / +I, ++I, ?+I, upto+I /*
138	/ Negative single-char repeats - only for chars < 256 /
139	`1`, `1`, `1`, `1`, `1`, `1`, / NOT , ?, +, +?, ?, ?? /
140	`1`+IMM2_SIZE, `1`+IMM2_SIZE, / NOT upto, minupto /
141	`1`+IMM2_SIZE, / NOT exact /
142	`1`, `1`, `1`, `1`+IMM2_SIZE, / NOT +, ++, ?+, upto+ /*
143	`1`, `1`, `1`, `1`, `1`, `1`, / NOT I, ?I, +I, +?I, ?I, ??I /
144	`1`+IMM2_SIZE, `1`+IMM2_SIZE, / NOT upto I, minupto I /
145	`1`+IMM2_SIZE, / NOT exact I /
146	`1`, `1`, `1`, `1`+IMM2_SIZE, / NOT +I, ++I, ?+I, upto+I /*
147	/ Positive type repeats /
148	`1`, `1`, `1`, `1`, `1`, `1`, / Type , ?, +, +?, ?, ?? /
149	`1`+IMM2_SIZE, `1`+IMM2_SIZE, / Type upto, minupto /
150	`1`+IMM2_SIZE, / Type exact /
151	`1`, `1`, `1`, `1`+IMM2_SIZE, / Type +, ++, ?+, upto+ /*
152	/ Character class & ref repeats /
153	`0`, `0`, `0`, `0`, `0`, `0`, / , ?, +, +?, ?, ?? /
154	`0`, `0`, / CRRANGE, CRMINRANGE /
155	`0`, `0`, `0`, `0`, / Possessive +, ++, ?+, CRPOSRANGE /*
156	`0`, / CLASS /
157	`0`, / NCLASS /
158	`0`, / XCLASS - variable length /
159	`0`, / REF /
160	`0`, / REFI /
161	`0`, / DNREF /
162	`0`, / DNREFI /
163	`0`, / RECURSE /
164	`0`, / CALLOUT /
165	`0`, / CALLOUT_STR /
166	`0`, / Alt /
167	`0`, / Ket /
168	`0`, / KetRmax /
169	`0`, / KetRmin /
170	`0`, / KetRpos /
171	`0`, / Reverse /
172	`0`, / Assert /
173	`0`, / Assert not /
174	`0`, / Assert behind /
175	`0`, / Assert behind not /
176	`0`, / NA assert /
177	`0`, / NA assert behind /
178	`0`, / ONCE /
179	`0`, / SCRIPT_RUN /
180	`0`, `0`, `0`, `0`, `0`, / BRA, BRAPOS, CBRA, CBRAPOS, COND /
181	`0`, `0`, `0`, `0`, `0`, / SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND /
182	`0`, `0`, / CREF, DNCREF /
183	`0`, `0`, / RREF, DNRREF /
184	`0`, `0`, / FALSE, TRUE /
185	`0`, `0`, `0`, / BRAZERO, BRAMINZERO, BRAPOSZERO /
186	`0`, `0`, `0`, / MARK, PRUNE, PRUNE_ARG /
187	`0`, `0`, `0`, `0`, / SKIP, SKIP_ARG, THEN, THEN_ARG /
188	`0`, `0`, / COMMIT, COMMIT_ARG /
189	`0`, `0`, `0`, / FAIL, ACCEPT, ASSERT_ACCEPT /
190	`0`, `0`, `0` / CLOSE, SKIPZERO, DEFINE /
191	};
192
193	/ This table identifies those opcodes that inspect a character. It is used to*
194	remember the fact that a character could have been inspected when the end of
195	the subject is reached. NOTE* If the start of this table is modified, the*
196	two tables that follow must also be modified. /*
197
198	static const uint8_t poptable[] = {
199	`0`, / End /
200	`0`, `0`, `0`, `1`, `1`, / \A, \G, \K, \B, \b /
201	`1`, `1`, `1`, `1`, `1`, `1`, / \D, \d, \S, \s, \W, \w /
202	`1`, `1`, `1`, / Any, AllAny, Anybyte /
203	`1`, `1`, / \P, \p /
204	`1`, `1`, `1`, `1`, `1`, / \R, \H, \h, \V, \v /
205	`1`, / \X /
206	`0`, `0`, `0`, `0`, `0`, `0`, / \Z, \z, $, $M, ^, ^M /
207	`1`, / Char /
208	`1`, / Chari /
209	`1`, / not /
210	`1`, / noti /
211	/ Positive single-char repeats /
212	`1`, `1`, `1`, `1`, `1`, `1`, / , ?, +, +?, ?, ?? /
213	`1`, `1`, `1`, / upto, minupto, exact /
214	`1`, `1`, `1`, `1`, / +, ++, ?+, upto+ /*
215	`1`, `1`, `1`, `1`, `1`, `1`, / I, ?I, +I, +?I, ?I, ??I /
216	`1`, `1`, `1`, / upto I, minupto I, exact I /
217	`1`, `1`, `1`, `1`, / +I, ++I, ?+I, upto+I /*
218	/ Negative single-char repeats - only for chars < 256 /
219	`1`, `1`, `1`, `1`, `1`, `1`, / NOT , ?, +, +?, ?, ?? /
220	`1`, `1`, `1`, / NOT upto, minupto, exact /
221	`1`, `1`, `1`, `1`, / NOT +, ++, ?+, upto+ /*
222	`1`, `1`, `1`, `1`, `1`, `1`, / NOT I, ?I, +I, +?I, ?I, ??I /
223	`1`, `1`, `1`, / NOT upto I, minupto I, exact I /
224	`1`, `1`, `1`, `1`, / NOT +I, ++I, ?+I, upto+I /*
225	/ Positive type repeats /
226	`1`, `1`, `1`, `1`, `1`, `1`, / Type , ?, +, +?, ?, ?? /
227	`1`, `1`, `1`, / Type upto, minupto, exact /
228	`1`, `1`, `1`, `1`, / Type +, ++, ?+, upto+ /*
229	/ Character class & ref repeats /
230	`1`, `1`, `1`, `1`, `1`, `1`, / , ?, +, +?, ?, ?? /
231	`1`, `1`, / CRRANGE, CRMINRANGE /
232	`1`, `1`, `1`, `1`, / Possessive +, ++, ?+, CRPOSRANGE /*
233	`1`, / CLASS /
234	`1`, / NCLASS /
235	`1`, / XCLASS - variable length /
236	`0`, / REF /
237	`0`, / REFI /
238	`0`, / DNREF /
239	`0`, / DNREFI /
240	`0`, / RECURSE /
241	`0`, / CALLOUT /
242	`0`, / CALLOUT_STR /
243	`0`, / Alt /
244	`0`, / Ket /
245	`0`, / KetRmax /
246	`0`, / KetRmin /
247	`0`, / KetRpos /
248	`0`, / Reverse /
249	`0`, / Assert /
250	`0`, / Assert not /
251	`0`, / Assert behind /
252	`0`, / Assert behind not /
253	`0`, / NA assert /
254	`0`, / NA assert behind /
255	`0`, / ONCE /
256	`0`, / SCRIPT_RUN /
257	`0`, `0`, `0`, `0`, `0`, / BRA, BRAPOS, CBRA, CBRAPOS, COND /
258	`0`, `0`, `0`, `0`, `0`, / SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND /
259	`0`, `0`, / CREF, DNCREF /
260	`0`, `0`, / RREF, DNRREF /
261	`0`, `0`, / FALSE, TRUE /
262	`0`, `0`, `0`, / BRAZERO, BRAMINZERO, BRAPOSZERO /
263	`0`, `0`, `0`, / MARK, PRUNE, PRUNE_ARG /
264	`0`, `0`, `0`, `0`, / SKIP, SKIP_ARG, THEN, THEN_ARG /
265	`0`, `0`, / COMMIT, COMMIT_ARG /
266	`0`, `0`, `0`, / FAIL, ACCEPT, ASSERT_ACCEPT /
267	`0`, `0`, `0` / CLOSE, SKIPZERO, DEFINE /
268	};
269
270	/ These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,*
271	and \w /*
272
273	static const uint8_t toptable1[] = {
274	`0`, `0`, `0`, `0`, `0`, `0`,
275	ctype_digit, ctype_digit,
276	ctype_space, ctype_space,
277	ctype_word, ctype_word,
278	`0`, `0` / OP_ANY, OP_ALLANY /
279	};
280
281	static const uint8_t toptable2[] = {
282	`0`, `0`, `0`, `0`, `0`, `0`,
283	ctype_digit, `0`,
284	ctype_space, `0`,
285	ctype_word, `0`,
286	`1`, `1` / OP_ANY, OP_ALLANY /
287	};
288
289
290	/ Structure for holding data about a particular state, which is in effect the*
291	current data for an active path through the match tree. It must consist
292	entirely of ints because the working vector we are passed, and which we put
293	these structures in, is a vector of ints. /*
294
295	typedef struct stateblock {
296	int offset; / Offset to opcode (-ve has meaning) /
297	int count; / Count for repeats /
298	int data; / Some use extra data /
299	} stateblock;
300
301	#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
302
303
304	/ Before version 10.32 the recursive calls of internal_dfa_match() were passed*
305	local working space and output vectors that were created on the stack. This has
306	caused issues for some patterns, especially in small-stack environments such as
307	Windows. A new scheme is now in use which sets up a vector on the stack, but if
308	this is too small, heap memory is used, up to the heap_limit. The main
309	parameters are all numbers of ints because the workspace is a vector of ints.
310
311	The size of the starting stack vector, DFA_START_RWS_SIZE, is in bytes, and is
312	defined in pcre2_internal.h so as to be available to pcre2test when it is
313	finding the minimum heap requirement for a match. /*
314
315	#define OVEC_UNIT (sizeof(PCRE2_SIZE)/sizeof(int))
316
317	#define RWS_BASE_SIZE (DFA_START_RWS_SIZE/sizeof(int)) /* Stack vector */
318	#define RWS_RSIZE 1000 /* Work size for recursion */
319	#define RWS_OVEC_RSIZE (1000OVEC_UNIT) / Ovector for recursion */
320	#define RWS_OVEC_OSIZE (2OVEC_UNIT) / Ovector in other cases */
321
322	/ This structure is at the start of each workspace block. /
323
324	typedef struct RWS_anchor {
325	struct RWS_anchor *next;
326	uint32_t size; / Number of ints /
327	uint32_t free; / Number of ints /
328	} RWS_anchor;
329
330	#define RWS_ANCHOR_SIZE (sizeof(RWS_anchor)/sizeof(int))
331
332
333
334	/*************************************************
335	* Process a callout *
336	*************************************************/
337
338	/ This function is called to perform a callout.*
339
340	Arguments:
341	code current code pointer
342	offsets points to current capture offsets
343	current_subject start of current subject match
344	ptr current position in subject
345	mb the match block
346	extracode extra code offset when called from condition
347	lengthptr where to return the callout length
348
349	Returns: the return from the callout
350	*/
351
352	static int
353	do_callout(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject,
354	PCRE2_SPTR ptr, dfa_match_block *mb, PCRE2_SIZE extracode,
355	PCRE2_SIZE *lengthptr)
356	{
357	pcre2_callout_block *cb = mb->cb;
358
359	*lengthptr = (code[extracode] == OP_CALLOUT)?
360	(PCRE2_SIZE)PRIV(OP_lengths)[OP_CALLOUT] :
361	(PCRE2_SIZE)GET(code, `1` + `2`*LINK_SIZE + extracode);
362
363	if (mb->callout == NULL) return `0`; / No callout provided /
364
365	/ Fixed fields in the callout block are set once and for all at the start of*
366	matching. /*
367
368	cb->offset_vector = offsets;
369	cb->start_match = (PCRE2_SIZE)(current_subject - mb->start_subject);
370	cb->current_position = (PCRE2_SIZE)(ptr - mb->start_subject);
371	cb->pattern_position = GET(code, `1` + extracode);
372	cb->next_item_length = GET(code, `1` + LINK_SIZE + extracode);
373
374	if (code[extracode] == OP_CALLOUT)
375	{
376	cb->callout_number = code[`1` + `2`*LINK_SIZE + extracode];
377	cb->callout_string_offset = `0`;
378	cb->callout_string = NULL;
379	cb->callout_string_length = `0`;
380	}
381	else
382	{
383	cb->callout_number = `0`;
384	cb->callout_string_offset = GET(code, `1` + `3`*LINK_SIZE + extracode);
385	cb->callout_string = code + (`1` + `4`*LINK_SIZE + extracode) + `1`;
386	cb->callout_string_length = lengthptr - (`1` + `4`LINK_SIZE) - `2`;
387	}
388
389	return (mb->callout)(cb, mb->callout_data);
390	}
391
392
393
394	/*************************************************
395	* Expand local workspace memory *
396	*************************************************/
397
398	/ This function is called when internal_dfa_match() is about to be called*
399	recursively and there is insufficient working space left in the current
400	workspace block. If there's an existing next block, use it; otherwise get a new
401	block unless the heap limit is reached.
402
403	Arguments:
404	rwsptr pointer to block pointer (updated)
405	ovecsize space needed for an ovector
406	mb the match block
407
408	Returns: 0 rwsptr has been updated
409	!0 an error code
410	*/
411
412	static int
413	more_workspace(RWS_anchor *rwsptr, unsigned* int ovecsize, dfa_match_block *mb)
414	{
415	RWS_anchor rws = rwsptr;
416	RWS_anchor *new;
417
418	if (rws->next != NULL)
419	{
420	new = rws->next;
421	}
422
423	/ Sizes in the RWS_anchor blocks are in units of sizeof(int), but*
424	mb->heap_limit and mb->heap_used are in kibibytes. Play carefully, to avoid
425	overflow. /*
426
427	else
428	{
429	uint32_t newsize = (rws->size >= UINT32_MAX/`2`)? UINT32_MAX/`2` : rws->size * `2`;
430	uint32_t newsizeK = newsize/(`1024`/sizeof(int));
431
432	if (newsizeK + mb->heap_used > mb->heap_limit)
433	newsizeK = (uint32_t)(mb->heap_limit - mb->heap_used);
434	newsize = newsizeK(`1024`/sizeof(int*));
435
436	if (newsize < RWS_RSIZE + ovecsize + RWS_ANCHOR_SIZE)
437	return PCRE2_ERROR_HEAPLIMIT;
438	new = mb->memctl.malloc(newsize*sizeof(int), mb->memctl.memory_data);
439	if (new == NULL) return PCRE2_ERROR_NOMEMORY;
440	mb->heap_used += newsizeK;
441	new->next = NULL;
442	new->size = newsize;
443	rws->next = new;
444	}
445
446	new->free = new->size - RWS_ANCHOR_SIZE;
447	*rwsptr = new;
448	return `0`;
449	}
450
451
452
453	/*************************************************
454	* Match a Regular Expression - DFA engine *
455	*************************************************/
456
457	/ This internal function applies a compiled pattern to a subject string,*
458	starting at a given point, using a DFA engine. This function is called from the
459	external one, possibly multiple times if the pattern is not anchored. The
460	function calls itself recursively for some kinds of subpattern.
461
462	Arguments:
463	mb the match_data block with fixed information
464	this_start_code the opening bracket of this subexpression's code
465	current_subject where we currently are in the subject string
466	start_offset start offset in the subject string
467	offsets vector to contain the matching string offsets
468	offsetcount size of same
469	workspace vector of workspace
470	wscount size of same
471	rlevel function call recursion level
472
473	Returns: > 0 => number of match offset pairs placed in offsets
474	= 0 => offsets overflowed; longest matches are present
475	-1 => failed to match
476	< -1 => some kind of unexpected problem
477
478	The following macros are used for adding states to the two state vectors (one
479	for the current character, one for the following character). /*
480
481	#define ADD_ACTIVE(x,y) \
482	if (active_count++ < wscount) \
483	{ \
484	next_active_state->offset = (x); \
485	next_active_state->count = (y); \
486	next_active_state++; \
487	} \
488	else return PCRE2_ERROR_DFA_WSSIZE
489
490	#define ADD_ACTIVE_DATA(x,y,z) \
491	if (active_count++ < wscount) \
492	{ \
493	next_active_state->offset = (x); \
494	next_active_state->count = (y); \
495	next_active_state->data = (z); \
496	next_active_state++; \
497	} \
498	else return PCRE2_ERROR_DFA_WSSIZE
499
500	#define ADD_NEW(x,y) \
501	if (new_count++ < wscount) \
502	{ \
503	next_new_state->offset = (x); \
504	next_new_state->count = (y); \
505	next_new_state++; \
506	} \
507	else return PCRE2_ERROR_DFA_WSSIZE
508
509	#define ADD_NEW_DATA(x,y,z) \
510	if (new_count++ < wscount) \
511	{ \
512	next_new_state->offset = (x); \
513	next_new_state->count = (y); \
514	next_new_state->data = (z); \
515	next_new_state++; \
516	} \
517	else return PCRE2_ERROR_DFA_WSSIZE
518
519	/ And now, here is the code /
520
521	static int
522	internal_dfa_match(
523	dfa_match_block *mb,
524	PCRE2_SPTR this_start_code,
525	PCRE2_SPTR current_subject,
526	PCRE2_SIZE start_offset,
527	PCRE2_SIZE *offsets,
528	uint32_t offsetcount,
529	int *workspace,
530	int wscount,
531	uint32_t rlevel,
532	int *RWS)
533	{
534	stateblock active_states, new_states, *temp_states;
535	stateblock next_active_state, next_new_state;
536	const uint8_t ctypes, lcc, *fcc;
537	PCRE2_SPTR ptr;
538	PCRE2_SPTR end_code;
539	dfa_recursion_info new_recursive;
540	int active_count, new_count, match_count;
541
542	/ Some fields in the mb block are frequently referenced, so we load them into*
543	independent variables in the hope that this will perform better. /*
544
545	PCRE2_SPTR start_subject = mb->start_subject;
546	PCRE2_SPTR end_subject = mb->end_subject;
547	PCRE2_SPTR start_code = mb->start_code;
548
549	#ifdef SUPPORT_UNICODE
550	BOOL utf = (mb->poptions & PCRE2_UTF) != `0`;
551	BOOL utf_or_ucp = utf \|\| (mb->poptions & PCRE2_UCP) != `0`;
552	#else
553	BOOL utf = FALSE;
554	#endif
555
556	BOOL reset_could_continue = FALSE;
557
558	if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
559	if (rlevel++ > mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
560	offsetcount &= (uint32_t)(-`2`); / Round down /
561
562	wscount -= `2`;
563	wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * `2`))) /
564	(`2` * INTS_PER_STATEBLOCK);
565
566	ctypes = mb->tables + ctypes_offset;
567	lcc = mb->tables + lcc_offset;
568	fcc = mb->tables + fcc_offset;
569
570	match_count = PCRE2_ERROR_NOMATCH; / A negative number /
571
572	active_states = (stateblock *)(workspace + `2`);
573	next_new_state = new_states = active_states + wscount;
574	new_count = `0`;
575
576	/ The first thing in any (sub) pattern is a bracket of some sort. Push all*
577	the alternative states onto the list, and find out where the end is. This
578	makes is possible to use this function recursively, when we want to stop at a
579	matching internal ket rather than at the end.
580
581	If we are dealing with a backward assertion we have to find out the maximum
582	amount to move back, and set up each alternative appropriately. /*
583
584	if (this_start_code == OP_ASSERTBACK \|\| this_start_code == OP_ASSERTBACK_NOT)
585	{
586	size_t max_back = `0`;
587	size_t gone_back;
588
589	end_code = this_start_code;
590	do
591	{
592	size_t back = (size_t)GET(end_code, `2`+LINK_SIZE);
593	if (back > max_back) max_back = back;
594	end_code += GET(end_code, `1`);
595	}
596	while (*end_code == OP_ALT);
597
598	/ If we can't go back the amount required for the longest lookbehind*
599	pattern, go back as far as we can; some alternatives may still be viable. /*
600
601	#ifdef SUPPORT_UNICODE
602	/ In character mode we have to step back character by character /
603
604	if (utf)
605	{
606	for (gone_back = `0`; gone_back < max_back; gone_back++)
607	{
608	if (current_subject <= start_subject) break;
609	current_subject--;
610	ACROSSCHAR(current_subject > start_subject, current_subject,
611	current_subject--);
612	}
613	}
614	else
615	#endif
616
617	/ In byte-mode we can do this quickly. /
618
619	{
620	size_t current_offset = (size_t)(current_subject - start_subject);
621	gone_back = (current_offset < max_back)? current_offset : max_back;
622	current_subject -= gone_back;
623	}
624
625	/ Save the earliest consulted character /
626
627	if (current_subject < mb->start_used_ptr)
628	mb->start_used_ptr = current_subject;
629
630	/ Now we can process the individual branches. There will be an OP_REVERSE at*
631	the start of each branch, except when the length of the branch is zero. /*
632
633	end_code = this_start_code;
634	do
635	{
636	uint32_t revlen = (end_code[`1`+LINK_SIZE] == OP_REVERSE)? `1` + LINK_SIZE : `0`;
637	size_t back = (revlen == `0`)? `0` : (size_t)GET(end_code, `2`+LINK_SIZE);
638	if (back <= gone_back)
639	{
640	int bstate = (int)(end_code - start_code + `1` + LINK_SIZE + revlen);
641	ADD_NEW_DATA(-bstate, `0`, (int)(gone_back - back));
642	}
643	end_code += GET(end_code, `1`);
644	}
645	while (*end_code == OP_ALT);
646	}
647
648	/ This is the code for a "normal" subpattern (not a backward assertion). The*
649	start of a whole pattern is always one of these. If we are at the top level,
650	we may be asked to restart matching from the same point that we reached for a
651	previous partial match. We still have to scan through the top-level branches to
652	find the end state. /*
653
654	else
655	{
656	end_code = this_start_code;
657
658	/ Restarting /
659
660	if (rlevel == `1` && (mb->moptions & PCRE2_DFA_RESTART) != `0`)
661	{
662	do { end_code += GET(end_code, `1`); } while (*end_code == OP_ALT);
663	new_count = workspace[`1`];
664	if (!workspace[`0`])
665	memcpy(new_states, active_states, (size_t)new_count * sizeof(stateblock));
666	}
667
668	/ Not restarting /
669
670	else
671	{
672	int length = `1` + LINK_SIZE +
673	((this_start_code == OP_CBRA \|\| this_start_code == OP_SCBRA \|\|
674	this_start_code == OP_CBRAPOS \|\| this_start_code == OP_SCBRAPOS)
675	? IMM2_SIZE:`0`);
676	do
677	{
678	ADD_NEW((int)(end_code - start_code + length), `0`);
679	end_code += GET(end_code, `1`);
680	length = `1` + LINK_SIZE;
681	}
682	while (*end_code == OP_ALT);
683	}
684	}
685
686	workspace[`0`] = `0`; / Bit indicating which vector is current /
687
688	/ Loop for scanning the subject /
689
690	ptr = current_subject;
691	for (;;)
692	{
693	int i, j;
694	int clen, dlen;
695	uint32_t c, d;
696	int forced_fail = `0`;
697	BOOL partial_newline = FALSE;
698	BOOL could_continue = reset_could_continue;
699	reset_could_continue = FALSE;
700
701	if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
702
703	/ Make the new state list into the active state list and empty the*
704	new state list. /*
705
706	temp_states = active_states;
707	active_states = new_states;
708	new_states = temp_states;
709	active_count = new_count;
710	new_count = `0`;
711
712	workspace[`0`] ^= `1`; / Remember for the restarting feature /
713	workspace[`1`] = active_count;
714
715	/ Set the pointers for adding new states /
716
717	next_active_state = active_states + active_count;
718	next_new_state = new_states;
719
720	/ Load the current character from the subject outside the loop, as many*
721	different states may want to look at it, and we assume that at least one
722	will. /*
723
724	if (ptr < end_subject)
725	{
726	clen = `1`; / Number of data items in the character /
727	#ifdef SUPPORT_UNICODE
728	GETCHARLENTEST(c, ptr, clen);
729	#else
730	c = *ptr;
731	#endif /* SUPPORT_UNICODE */
732	}
733	else
734	{
735	clen = `0`; / This indicates the end of the subject /
736	c = NOTACHAR; / This value should never actually be used /
737	}
738
739	/ Scan up the active states and act on each one. The result of an action*
740	may be to add more states to the currently active list (e.g. on hitting a
741	parenthesis) or it may be to put states on the new list, for considering
742	when we move the character pointer on. /*
743
744	for (i = `0`; i < active_count; i++)
745	{
746	stateblock *current_state = active_states + i;
747	BOOL caseless = FALSE;
748	PCRE2_SPTR code;
749	uint32_t codevalue;
750	int state_offset = current_state->offset;
751	int rrc;
752	int count;
753
754	/ A negative offset is a special case meaning "hold off going to this*
755	(negated) state until the number of characters in the data field have
756	been skipped". If the could_continue flag was passed over from a previous
757	state, arrange for it to passed on. /*
758
759	if (state_offset < `0`)
760	{
761	if (current_state->data > `0`)
762	{
763	ADD_NEW_DATA(state_offset, current_state->count,
764	current_state->data - `1`);
765	if (could_continue) reset_could_continue = TRUE;
766	continue;
767	}
768	else
769	{
770	current_state->offset = state_offset = -state_offset;
771	}
772	}
773
774	/ Check for a duplicate state with the same count, and skip if found.*
775	See the note at the head of this module about the possibility of improving
776	performance here. /*
777
778	for (j = `0`; j < i; j++)
779	{
780	if (active_states[j].offset == state_offset &&
781	active_states[j].count == current_state->count)
782	goto NEXT_ACTIVE_STATE;
783	}
784
785	/ The state offset is the offset to the opcode /
786
787	code = start_code + state_offset;
788	codevalue = *code;
789
790	/ If this opcode inspects a character, but we are at the end of the*
791	subject, remember the fact for use when testing for a partial match. /*
792
793	if (clen == `0` && poptable[codevalue] != `0`)
794	could_continue = TRUE;
795
796	/ If this opcode is followed by an inline character, load it. It is*
797	tempting to test for the presence of a subject character here, but that
798	is wrong, because sometimes zero repetitions of the subject are
799	permitted.
800
801	We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
802	argument that is not a data character - but is always one byte long because
803	the values are small. We have to take special action to deal with \P, \p,
804	\H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
805	these ones to new opcodes. /*
806
807	if (coptable[codevalue] > `0`)
808	{
809	dlen = `1`;
810	#ifdef SUPPORT_UNICODE
811	if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
812	#endif /* SUPPORT_UNICODE */
813	d = code[coptable[codevalue]];
814	if (codevalue >= OP_TYPESTAR)
815	{
816	switch(d)
817	{
818	case OP_ANYBYTE: return PCRE2_ERROR_DFA_UITEM;
819	case OP_NOTPROP:
820	case OP_PROP: codevalue += OP_PROP_EXTRA; break;
821	case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
822	case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
823	case OP_NOT_HSPACE:
824	case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
825	case OP_NOT_VSPACE:
826	case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
827	default: break;
828	}
829	}
830	}
831	else
832	{
833	dlen = `0`; / Not strictly necessary, but compilers moan /
834	d = NOTACHAR; / if these variables are not set. /
835	}
836
837
838	/ Now process the individual opcodes /
839
840	switch (codevalue)
841	{
842	/ ========================================================================== /
843	/ These cases are never obeyed. This is a fudge that causes a compile-*
844	time error if the vectors coptable or poptable, which are indexed by
845	opcode, are not the correct length. It seems to be the only way to do
846	such a check at compile time, as the sizeof() operator does not work
847	in the C preprocessor. /*
848
849	case OP_TABLE_LENGTH:
850	case OP_TABLE_LENGTH +
851	((sizeof(coptable) == OP_TABLE_LENGTH) &&
852	(sizeof(poptable) == OP_TABLE_LENGTH)):
853	return `0`;
854
855	/ ========================================================================== /
856	/ Reached a closing bracket. If not at the end of the pattern, carry*
857	on with the next opcode. For repeating opcodes, also add the repeat
858	state. Note that KETRPOS will always be encountered at the end of the
859	subpattern, because the possessive subpattern repeats are always handled
860	using recursive calls. Thus, it never adds any new states.
861
862	At the end of the (sub)pattern, unless we have an empty string and
863	PCRE2_NOTEMPTY is set, or PCRE2_NOTEMPTY_ATSTART is set and we are at the
864	start of the subject, save the match data, shifting up all previous
865	matches so we always have the longest first. /*
866
867	case OP_KET:
868	case OP_KETRMIN:
869	case OP_KETRMAX:
870	case OP_KETRPOS:
871	if (code != end_code)
872	{
873	ADD_ACTIVE(state_offset + `1` + LINK_SIZE, `0`);
874	if (codevalue != OP_KET)
875	{
876	ADD_ACTIVE(state_offset - (int)GET(code, `1`), `0`);
877	}
878	}
879	else
880	{
881	if (ptr > current_subject \|\|
882	((mb->moptions & PCRE2_NOTEMPTY) == `0` &&
883	((mb->moptions & PCRE2_NOTEMPTY_ATSTART) == `0` \|\|
884	current_subject > start_subject + mb->start_offset)))
885	{
886	if (match_count < `0`) match_count = (offsetcount >= `2`)? `1` : `0`;
887	else if (match_count > `0` && ++match_count * `2` > (int)offsetcount)
888	match_count = `0`;
889	count = ((match_count == `0`)? (int)offsetcount : match_count * `2`) - `2`;
890	if (count > `0`) (void)memmove(offsets + `2`, offsets,
891	(size_t)count * sizeof(PCRE2_SIZE));
892	if (offsetcount >= `2`)
893	{
894	offsets[`0`] = (PCRE2_SIZE)(current_subject - start_subject);
895	offsets[`1`] = (PCRE2_SIZE)(ptr - start_subject);
896	}
897	if ((mb->moptions & PCRE2_DFA_SHORTEST) != `0`) return match_count;
898	}
899	}
900	break;
901
902	/ ========================================================================== /
903	/ These opcodes add to the current list of states without looking*
904	at the current character. /*
905
906	/-----------------------------------------------------------------/
907	case OP_ALT:
908	do { code += GET(code, `1`); } while (*code == OP_ALT);
909	ADD_ACTIVE((int)(code - start_code), `0`);
910	break;
911
912	/-----------------------------------------------------------------/
913	case OP_BRA:
914	case OP_SBRA:
915	do
916	{
917	ADD_ACTIVE((int)(code - start_code + `1` + LINK_SIZE), `0`);
918	code += GET(code, `1`);
919	}
920	while (*code == OP_ALT);
921	break;
922
923	/-----------------------------------------------------------------/
924	case OP_CBRA:
925	case OP_SCBRA:
926	ADD_ACTIVE((int)(code - start_code + `1` + LINK_SIZE + IMM2_SIZE), `0`);
927	code += GET(code, `1`);
928	while (*code == OP_ALT)
929	{
930	ADD_ACTIVE((int)(code - start_code + `1` + LINK_SIZE), `0`);
931	code += GET(code, `1`);
932	}
933	break;
934
935	/-----------------------------------------------------------------/
936	case OP_BRAZERO:
937	case OP_BRAMINZERO:
938	ADD_ACTIVE(state_offset + `1`, `0`);
939	code += `1` + GET(code, `2`);
940	while (*code == OP_ALT) code += GET(code, `1`);
941	ADD_ACTIVE((int)(code - start_code + `1` + LINK_SIZE), `0`);
942	break;
943
944	/-----------------------------------------------------------------/
945	case OP_SKIPZERO:
946	code += `1` + GET(code, `2`);
947	while (*code == OP_ALT) code += GET(code, `1`);
948	ADD_ACTIVE((int)(code - start_code + `1` + LINK_SIZE), `0`);
949	break;
950
951	/-----------------------------------------------------------------/
952	case OP_CIRC:
953	if (ptr == start_subject && (mb->moptions & PCRE2_NOTBOL) == `0`)
954	{ ADD_ACTIVE(state_offset + `1`, `0`); }
955	break;
956
957	/-----------------------------------------------------------------/
958	case OP_CIRCM:
959	if ((ptr == start_subject && (mb->moptions & PCRE2_NOTBOL) == `0`) \|\|
960	((ptr != end_subject \|\| (mb->poptions & PCRE2_ALT_CIRCUMFLEX) != `0` )
961	&& WAS_NEWLINE(ptr)))
962	{ ADD_ACTIVE(state_offset + `1`, `0`); }
963	break;
964
965	/-----------------------------------------------------------------/
966	case OP_EOD:
967	if (ptr >= end_subject)
968	{
969	if ((mb->moptions & PCRE2_PARTIAL_HARD) != `0`)
970	return PCRE2_ERROR_PARTIAL;
971	else { ADD_ACTIVE(state_offset + `1`, `0`); }
972	}
973	break;
974
975	/-----------------------------------------------------------------/
976	case OP_SOD:
977	if (ptr == start_subject) { ADD_ACTIVE(state_offset + `1`, `0`); }
978	break;
979
980	/-----------------------------------------------------------------/
981	case OP_SOM:
982	if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + `1`, `0`); }
983	break;
984
985
986	/ ========================================================================== /
987	/ These opcodes inspect the next subject character, and sometimes*
988	the previous one as well, but do not have an argument. The variable
989	clen contains the length of the current character and is zero if we are
990	at the end of the subject. /*
991
992	/-----------------------------------------------------------------/
993	case OP_ANY:
994	if (clen > `0` && !IS_NEWLINE(ptr))
995	{
996	if (ptr + `1` >= mb->end_subject &&
997	(mb->moptions & (PCRE2_PARTIAL_HARD)) != `0` &&
998	NLBLOCK->nltype == NLTYPE_FIXED &&
999	NLBLOCK->nllen == `2` &&
1000	c == NLBLOCK->nl[`0`])
1001	{
1002	could_continue = partial_newline = TRUE;
1003	}
1004	else
1005	{
1006	ADD_NEW(state_offset + `1`, `0`);
1007	}
1008	}
1009	break;
1010
1011	/-----------------------------------------------------------------/
1012	case OP_ALLANY:
1013	if (clen > `0`)
1014	{ ADD_NEW(state_offset + `1`, `0`); }
1015	break;
1016
1017	/-----------------------------------------------------------------/
1018	case OP_EODN:
1019	if (clen == `0` \|\| (IS_NEWLINE(ptr) && ptr == end_subject - mb->nllen))
1020	{
1021	if ((mb->moptions & PCRE2_PARTIAL_HARD) != `0`)
1022	return PCRE2_ERROR_PARTIAL;
1023	ADD_ACTIVE(state_offset + `1`, `0`);
1024	}
1025	break;
1026
1027	/-----------------------------------------------------------------/
1028	case OP_DOLL:
1029	if ((mb->moptions & PCRE2_NOTEOL) == `0`)
1030	{
1031	if (clen == `0` && (mb->moptions & PCRE2_PARTIAL_HARD) != `0`)
1032	could_continue = TRUE;
1033	else if (clen == `0` \|\|
1034	((mb->poptions & PCRE2_DOLLAR_ENDONLY) == `0` && IS_NEWLINE(ptr) &&
1035	(ptr == end_subject - mb->nllen)
1036	))
1037	{ ADD_ACTIVE(state_offset + `1`, `0`); }
1038	else if (ptr + `1` >= mb->end_subject &&
1039	(mb->moptions & (PCRE2_PARTIAL_HARD\|PCRE2_PARTIAL_SOFT)) != `0` &&
1040	NLBLOCK->nltype == NLTYPE_FIXED &&
1041	NLBLOCK->nllen == `2` &&
1042	c == NLBLOCK->nl[`0`])
1043	{
1044	if ((mb->moptions & PCRE2_PARTIAL_HARD) != `0`)
1045	{
1046	reset_could_continue = TRUE;
1047	ADD_NEW_DATA(-(state_offset + `1`), `0`, `1`);
1048	}
1049	else could_continue = partial_newline = TRUE;
1050	}
1051	}
1052	break;
1053
1054	/-----------------------------------------------------------------/
1055	case OP_DOLLM:
1056	if ((mb->moptions & PCRE2_NOTEOL) == `0`)
1057	{
1058	if (clen == `0` && (mb->moptions & PCRE2_PARTIAL_HARD) != `0`)
1059	could_continue = TRUE;
1060	else if (clen == `0` \|\|
1061	((mb->poptions & PCRE2_DOLLAR_ENDONLY) == `0` && IS_NEWLINE(ptr)))
1062	{ ADD_ACTIVE(state_offset + `1`, `0`); }
1063	else if (ptr + `1` >= mb->end_subject &&
1064	(mb->moptions & (PCRE2_PARTIAL_HARD\|PCRE2_PARTIAL_SOFT)) != `0` &&
1065	NLBLOCK->nltype == NLTYPE_FIXED &&
1066	NLBLOCK->nllen == `2` &&
1067	c == NLBLOCK->nl[`0`])
1068	{
1069	if ((mb->moptions & PCRE2_PARTIAL_HARD) != `0`)
1070	{
1071	reset_could_continue = TRUE;
1072	ADD_NEW_DATA(-(state_offset + `1`), `0`, `1`);
1073	}
1074	else could_continue = partial_newline = TRUE;
1075	}
1076	}
1077	else if (IS_NEWLINE(ptr))
1078	{ ADD_ACTIVE(state_offset + `1`, `0`); }
1079	break;
1080
1081	/-----------------------------------------------------------------/
1082
1083	case OP_DIGIT:
1084	case OP_WHITESPACE:
1085	case OP_WORDCHAR:
1086	if (clen > `0` && c < `256` &&
1087	((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != `0`)
1088	{ ADD_NEW(state_offset + `1`, `0`); }
1089	break;
1090
1091	/-----------------------------------------------------------------/
1092	case OP_NOT_DIGIT:
1093	case OP_NOT_WHITESPACE:
1094	case OP_NOT_WORDCHAR:
1095	if (clen > `0` && (c >= `256` \|\|
1096	((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != `0`))
1097	{ ADD_NEW(state_offset + `1`, `0`); }
1098	break;
1099
1100	/-----------------------------------------------------------------/
1101	case OP_WORD_BOUNDARY:
1102	case OP_NOT_WORD_BOUNDARY:
1103	{
1104	int left_word, right_word;
1105
1106	if (ptr > start_subject)
1107	{
1108	PCRE2_SPTR temp = ptr - `1`;
1109	if (temp < mb->start_used_ptr) mb->start_used_ptr = temp;
1110	#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1111	if (utf) { BACKCHAR(temp); }
1112	#endif
1113	GETCHARTEST(d, temp);
1114	#ifdef SUPPORT_UNICODE
1115	if ((mb->poptions & PCRE2_UCP) != `0`)
1116	{
1117	if (d == `'_'`) left_word = TRUE; else
1118	{
1119	uint32_t cat = UCD_CATEGORY(d);
1120	left_word = (cat == ucp_L \|\| cat == ucp_N);
1121	}
1122	}
1123	else
1124	#endif
1125	left_word = d < `256` && (ctypes[d] & ctype_word) != `0`;
1126	}
1127	else left_word = FALSE;
1128
1129	if (clen > `0`)
1130	{
1131	if (ptr >= mb->last_used_ptr)
1132	{
1133	PCRE2_SPTR temp = ptr + `1`;
1134	#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1135	if (utf) { FORWARDCHARTEST(temp, mb->end_subject); }
1136	#endif
1137	mb->last_used_ptr = temp;
1138	}
1139	#ifdef SUPPORT_UNICODE
1140	if ((mb->poptions & PCRE2_UCP) != `0`)
1141	{
1142	if (c == `'_'`) right_word = TRUE; else
1143	{
1144	uint32_t cat = UCD_CATEGORY(c);
1145	right_word = (cat == ucp_L \|\| cat == ucp_N);
1146	}
1147	}
1148	else
1149	#endif
1150	right_word = c < `256` && (ctypes[c] & ctype_word) != `0`;
1151	}
1152	else right_word = FALSE;
1153
1154	if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
1155	{ ADD_ACTIVE(state_offset + `1`, `0`); }
1156	}
1157	break;
1158
1159
1160	/-----------------------------------------------------------------/
1161	/ Check the next character by Unicode property. We will get here only*
1162	if the support is in the binary; otherwise a compile-time error occurs.
1163	*/
1164
1165	#ifdef SUPPORT_UNICODE
1166	case OP_PROP:
1167	case OP_NOTPROP:
1168	if (clen > `0`)
1169	{
1170	BOOL OK;
1171	const uint32_t *cp;
1172	const ucd_record * prop = GET_UCD(c);
1173	switch(code[`1`])
1174	{
1175	case PT_ANY:
1176	OK = TRUE;
1177	break;
1178
1179	case PT_LAMP:
1180	OK = prop->chartype == ucp_Lu \|\| prop->chartype == ucp_Ll \|\|
1181	prop->chartype == ucp_Lt;
1182	break;
1183
1184	case PT_GC:
1185	OK = PRIV(ucp_gentype)[prop->chartype] == code[`2`];
1186	break;
1187
1188	case PT_PC:
1189	OK = prop->chartype == code[`2`];
1190	break;
1191
1192	case PT_SC:
1193	OK = prop->script == code[`2`];
1194	break;
1195
1196	/ These are specials for combination cases. /
1197
1198	case PT_ALNUM:
1199	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
1200	PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1201	break;
1202
1203	/ Perl space used to exclude VT, but from Perl 5.18 it is included,*
1204	which means that Perl space and POSIX space are now identical. PCRE
1205	was changed at release 8.34. /*
1206
1207	case PT_SPACE: / Perl space /
1208	case PT_PXSPACE: / POSIX space /
1209	switch(c)
1210	{
1211	HSPACE_CASES:
1212	VSPACE_CASES:
1213	OK = TRUE;
1214	break;
1215
1216	default:
1217	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1218	break;
1219	}
1220	break;
1221
1222	case PT_WORD:
1223	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
1224	PRIV(ucp_gentype)[prop->chartype] == ucp_N \|\|
1225	c == CHAR_UNDERSCORE;
1226	break;
1227
1228	case PT_CLIST:
1229	cp = PRIV(ucd_caseless_sets) + code[`2`];
1230	for (;;)
1231	{
1232	if (c < cp) { OK = FALSE; break*; }
1233	if (c == cp++) { OK = TRUE; break*; }
1234	}
1235	break;
1236
1237	case PT_UCNC:
1238	OK = c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
1239	c == CHAR_GRAVE_ACCENT \|\| (c >= `0xa0` && c <= `0xd7ff`) \|\|
1240	c >= `0xe000`;
1241	break;
1242
1243	/ Should never occur, but keep compilers from grumbling. /
1244
1245	default:
1246	OK = codevalue != OP_PROP;
1247	break;
1248	}
1249
1250	if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + `3`, `0`); }
1251	}
1252	break;
1253	#endif
1254
1255
1256
1257	/ ========================================================================== /
1258	/ These opcodes likewise inspect the subject character, but have an*
1259	argument that is not a data character. It is one of these opcodes:
1260	OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
1261	OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. /*
1262
1263	case OP_TYPEPLUS:
1264	case OP_TYPEMINPLUS:
1265	case OP_TYPEPOSPLUS:
1266	count = current_state->count; / Already matched /
1267	if (count > `0`) { ADD_ACTIVE(state_offset + `2`, `0`); }
1268	if (clen > `0`)
1269	{
1270	if (d == OP_ANY && ptr + `1` >= mb->end_subject &&
1271	(mb->moptions & (PCRE2_PARTIAL_HARD)) != `0` &&
1272	NLBLOCK->nltype == NLTYPE_FIXED &&
1273	NLBLOCK->nllen == `2` &&
1274	c == NLBLOCK->nl[`0`])
1275	{
1276	could_continue = partial_newline = TRUE;
1277	}
1278	else if ((c >= `256` && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
1279	(c < `256` &&
1280	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
1281	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != `0`))
1282	{
1283	if (count > `0` && codevalue == OP_TYPEPOSPLUS)
1284	{
1285	active_count--; / Remove non-match possibility /
1286	next_active_state--;
1287	}
1288	count++;
1289	ADD_NEW(state_offset, count);
1290	}
1291	}
1292	break;
1293
1294	/-----------------------------------------------------------------/
1295	case OP_TYPEQUERY:
1296	case OP_TYPEMINQUERY:
1297	case OP_TYPEPOSQUERY:
1298	ADD_ACTIVE(state_offset + `2`, `0`);
1299	if (clen > `0`)
1300	{
1301	if (d == OP_ANY && ptr + `1` >= mb->end_subject &&
1302	(mb->moptions & (PCRE2_PARTIAL_HARD)) != `0` &&
1303	NLBLOCK->nltype == NLTYPE_FIXED &&
1304	NLBLOCK->nllen == `2` &&
1305	c == NLBLOCK->nl[`0`])
1306	{
1307	could_continue = partial_newline = TRUE;
1308	}
1309	else if ((c >= `256` && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
1310	(c < `256` &&
1311	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
1312	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != `0`))
1313	{
1314	if (codevalue == OP_TYPEPOSQUERY)
1315	{
1316	active_count--; / Remove non-match possibility /
1317	next_active_state--;
1318	}
1319	ADD_NEW(state_offset + `2`, `0`);
1320	}
1321	}
1322	break;
1323
1324	/-----------------------------------------------------------------/
1325	case OP_TYPESTAR:
1326	case OP_TYPEMINSTAR:
1327	case OP_TYPEPOSSTAR:
1328	ADD_ACTIVE(state_offset + `2`, `0`);
1329	if (clen > `0`)
1330	{
1331	if (d == OP_ANY && ptr + `1` >= mb->end_subject &&
1332	(mb->moptions & (PCRE2_PARTIAL_HARD)) != `0` &&
1333	NLBLOCK->nltype == NLTYPE_FIXED &&
1334	NLBLOCK->nllen == `2` &&
1335	c == NLBLOCK->nl[`0`])
1336	{
1337	could_continue = partial_newline = TRUE;
1338	}
1339	else if ((c >= `256` && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
1340	(c < `256` &&
1341	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
1342	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != `0`))
1343	{
1344	if (codevalue == OP_TYPEPOSSTAR)
1345	{
1346	active_count--; / Remove non-match possibility /
1347	next_active_state--;
1348	}
1349	ADD_NEW(state_offset, `0`);
1350	}
1351	}
1352	break;
1353
1354	/-----------------------------------------------------------------/
1355	case OP_TYPEEXACT:
1356	count = current_state->count; / Number already matched /
1357	if (clen > `0`)
1358	{
1359	if (d == OP_ANY && ptr + `1` >= mb->end_subject &&
1360	(mb->moptions & (PCRE2_PARTIAL_HARD)) != `0` &&
1361	NLBLOCK->nltype == NLTYPE_FIXED &&
1362	NLBLOCK->nllen == `2` &&
1363	c == NLBLOCK->nl[`0`])
1364	{
1365	could_continue = partial_newline = TRUE;
1366	}
1367	else if ((c >= `256` && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
1368	(c < `256` &&
1369	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
1370	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != `0`))
1371	{
1372	if (++count >= (int)GET2(code, `1`))
1373	{ ADD_NEW(state_offset + `1` + IMM2_SIZE + `1`, `0`); }
1374	else
1375	{ ADD_NEW(state_offset, count); }
1376	}
1377	}
1378	break;
1379
1380	/-----------------------------------------------------------------/
1381	case OP_TYPEUPTO:
1382	case OP_TYPEMINUPTO:
1383	case OP_TYPEPOSUPTO:
1384	ADD_ACTIVE(state_offset + `2` + IMM2_SIZE, `0`);
1385	count = current_state->count; / Number already matched /
1386	if (clen > `0`)
1387	{
1388	if (d == OP_ANY && ptr + `1` >= mb->end_subject &&
1389	(mb->moptions & (PCRE2_PARTIAL_HARD)) != `0` &&
1390	NLBLOCK->nltype == NLTYPE_FIXED &&
1391	NLBLOCK->nllen == `2` &&
1392	c == NLBLOCK->nl[`0`])
1393	{
1394	could_continue = partial_newline = TRUE;
1395	}
1396	else if ((c >= `256` && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) \|\|
1397	(c < `256` &&
1398	(d != OP_ANY \|\| !IS_NEWLINE(ptr)) &&
1399	((ctypes[c] & toptable1[d]) ^ toptable2[d]) != `0`))
1400	{
1401	if (codevalue == OP_TYPEPOSUPTO)
1402	{
1403	active_count--; / Remove non-match possibility /
1404	next_active_state--;
1405	}
1406	if (++count >= (int)GET2(code, `1`))
1407	{ ADD_NEW(state_offset + `2` + IMM2_SIZE, `0`); }
1408	else
1409	{ ADD_NEW(state_offset, count); }
1410	}
1411	}
1412	break;
1413
1414	/ ========================================================================== /
1415	/ These are virtual opcodes that are used when something like*
1416	OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
1417	argument. It keeps the code above fast for the other cases. The argument
1418	is in the d variable. /*
1419
1420	#ifdef SUPPORT_UNICODE
1421	case OP_PROP_EXTRA + OP_TYPEPLUS:
1422	case OP_PROP_EXTRA + OP_TYPEMINPLUS:
1423	case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
1424	count = current_state->count; / Already matched /
1425	if (count > `0`) { ADD_ACTIVE(state_offset + `4`, `0`); }
1426	if (clen > `0`)
1427	{
1428	BOOL OK;
1429	const uint32_t *cp;
1430	const ucd_record * prop = GET_UCD(c);
1431	switch(code[`2`])
1432	{
1433	case PT_ANY:
1434	OK = TRUE;
1435	break;
1436
1437	case PT_LAMP:
1438	OK = prop->chartype == ucp_Lu \|\| prop->chartype == ucp_Ll \|\|
1439	prop->chartype == ucp_Lt;
1440	break;
1441
1442	case PT_GC:
1443	OK = PRIV(ucp_gentype)[prop->chartype] == code[`3`];
1444	break;
1445
1446	case PT_PC:
1447	OK = prop->chartype == code[`3`];
1448	break;
1449
1450	case PT_SC:
1451	OK = prop->script == code[`3`];
1452	break;
1453
1454	/ These are specials for combination cases. /
1455
1456	case PT_ALNUM:
1457	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
1458	PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1459	break;
1460
1461	/ Perl space used to exclude VT, but from Perl 5.18 it is included,*
1462	which means that Perl space and POSIX space are now identical. PCRE
1463	was changed at release 8.34. /*
1464
1465	case PT_SPACE: / Perl space /
1466	case PT_PXSPACE: / POSIX space /
1467	switch(c)
1468	{
1469	HSPACE_CASES:
1470	VSPACE_CASES:
1471	OK = TRUE;
1472	break;
1473
1474	default:
1475	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1476	break;
1477	}
1478	break;
1479
1480	case PT_WORD:
1481	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
1482	PRIV(ucp_gentype)[prop->chartype] == ucp_N \|\|
1483	c == CHAR_UNDERSCORE;
1484	break;
1485
1486	case PT_CLIST:
1487	cp = PRIV(ucd_caseless_sets) + code[`3`];
1488	for (;;)
1489	{
1490	if (c < cp) { OK = FALSE; break*; }
1491	if (c == cp++) { OK = TRUE; break*; }
1492	}
1493	break;
1494
1495	case PT_UCNC:
1496	OK = c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
1497	c == CHAR_GRAVE_ACCENT \|\| (c >= `0xa0` && c <= `0xd7ff`) \|\|
1498	c >= `0xe000`;
1499	break;
1500
1501	/ Should never occur, but keep compilers from grumbling. /
1502
1503	default:
1504	OK = codevalue != OP_PROP;
1505	break;
1506	}
1507
1508	if (OK == (d == OP_PROP))
1509	{
1510	if (count > `0` && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1511	{
1512	active_count--; / Remove non-match possibility /
1513	next_active_state--;
1514	}
1515	count++;
1516	ADD_NEW(state_offset, count);
1517	}
1518	}
1519	break;
1520
1521	/-----------------------------------------------------------------/
1522	case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1523	case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1524	case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1525	count = current_state->count; / Already matched /
1526	if (count > `0`) { ADD_ACTIVE(state_offset + `2`, `0`); }
1527	if (clen > `0`)
1528	{
1529	int ncount = `0`;
1530	if (count > `0` && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1531	{
1532	active_count--; / Remove non-match possibility /
1533	next_active_state--;
1534	}
1535	(void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
1536	&ncount);
1537	count++;
1538	ADD_NEW_DATA(-state_offset, count, ncount);
1539	}
1540	break;
1541	#endif
1542
1543	/-----------------------------------------------------------------/
1544	case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1545	case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1546	case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1547	count = current_state->count; / Already matched /
1548	if (count > `0`) { ADD_ACTIVE(state_offset + `2`, `0`); }
1549	if (clen > `0`)
1550	{
1551	int ncount = `0`;
1552	switch (c)
1553	{
1554	case CHAR_VT:
1555	case CHAR_FF:
1556	case CHAR_NEL:
1557	#ifndef EBCDIC
1558	case `0x2028`:
1559	case `0x2029`:
1560	#endif /* Not EBCDIC */
1561	if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
1562	goto ANYNL01;
1563
1564	case CHAR_CR:
1565	if (ptr + `1` < end_subject && UCHAR21TEST(ptr + `1`) == CHAR_LF) ncount = `1`;
1566	/ Fall through /
1567
1568	ANYNL01:
1569	case CHAR_LF:
1570	if (count > `0` && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1571	{
1572	active_count--; / Remove non-match possibility /
1573	next_active_state--;
1574	}
1575	count++;
1576	ADD_NEW_DATA(-state_offset, count, ncount);
1577	break;
1578
1579	default:
1580	break;
1581	}
1582	}
1583	break;
1584
1585	/-----------------------------------------------------------------/
1586	case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1587	case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1588	case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1589	count = current_state->count; / Already matched /
1590	if (count > `0`) { ADD_ACTIVE(state_offset + `2`, `0`); }
1591	if (clen > `0`)
1592	{
1593	BOOL OK;
1594	switch (c)
1595	{
1596	VSPACE_CASES:
1597	OK = TRUE;
1598	break;
1599
1600	default:
1601	OK = FALSE;
1602	break;
1603	}
1604
1605	if (OK == (d == OP_VSPACE))
1606	{
1607	if (count > `0` && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1608	{
1609	active_count--; / Remove non-match possibility /
1610	next_active_state--;
1611	}
1612	count++;
1613	ADD_NEW_DATA(-state_offset, count, `0`);
1614	}
1615	}
1616	break;
1617
1618	/-----------------------------------------------------------------/
1619	case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1620	case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1621	case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1622	count = current_state->count; / Already matched /
1623	if (count > `0`) { ADD_ACTIVE(state_offset + `2`, `0`); }
1624	if (clen > `0`)
1625	{
1626	BOOL OK;
1627	switch (c)
1628	{
1629	HSPACE_CASES:
1630	OK = TRUE;
1631	break;
1632
1633	default:
1634	OK = FALSE;
1635	break;
1636	}
1637
1638	if (OK == (d == OP_HSPACE))
1639	{
1640	if (count > `0` && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1641	{
1642	active_count--; / Remove non-match possibility /
1643	next_active_state--;
1644	}
1645	count++;
1646	ADD_NEW_DATA(-state_offset, count, `0`);
1647	}
1648	}
1649	break;
1650
1651	/-----------------------------------------------------------------/
1652	#ifdef SUPPORT_UNICODE
1653	case OP_PROP_EXTRA + OP_TYPEQUERY:
1654	case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1655	case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1656	count = `4`;
1657	goto QS1;
1658
1659	case OP_PROP_EXTRA + OP_TYPESTAR:
1660	case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1661	case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1662	count = `0`;
1663
1664	QS1:
1665
1666	ADD_ACTIVE(state_offset + `4`, `0`);
1667	if (clen > `0`)
1668	{
1669	BOOL OK;
1670	const uint32_t *cp;
1671	const ucd_record * prop = GET_UCD(c);
1672	switch(code[`2`])
1673	{
1674	case PT_ANY:
1675	OK = TRUE;
1676	break;
1677
1678	case PT_LAMP:
1679	OK = prop->chartype == ucp_Lu \|\| prop->chartype == ucp_Ll \|\|
1680	prop->chartype == ucp_Lt;
1681	break;
1682
1683	case PT_GC:
1684	OK = PRIV(ucp_gentype)[prop->chartype] == code[`3`];
1685	break;
1686
1687	case PT_PC:
1688	OK = prop->chartype == code[`3`];
1689	break;
1690
1691	case PT_SC:
1692	OK = prop->script == code[`3`];
1693	break;
1694
1695	/ These are specials for combination cases. /
1696
1697	case PT_ALNUM:
1698	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
1699	PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1700	break;
1701
1702	/ Perl space used to exclude VT, but from Perl 5.18 it is included,*
1703	which means that Perl space and POSIX space are now identical. PCRE
1704	was changed at release 8.34. /*
1705
1706	case PT_SPACE: / Perl space /
1707	case PT_PXSPACE: / POSIX space /
1708	switch(c)
1709	{
1710	HSPACE_CASES:
1711	VSPACE_CASES:
1712	OK = TRUE;
1713	break;
1714
1715	default:
1716	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1717	break;
1718	}
1719	break;
1720
1721	case PT_WORD:
1722	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
1723	PRIV(ucp_gentype)[prop->chartype] == ucp_N \|\|
1724	c == CHAR_UNDERSCORE;
1725	break;
1726
1727	case PT_CLIST:
1728	cp = PRIV(ucd_caseless_sets) + code[`3`];
1729	for (;;)
1730	{
1731	if (c < cp) { OK = FALSE; break*; }
1732	if (c == cp++) { OK = TRUE; break*; }
1733	}
1734	break;
1735
1736	case PT_UCNC:
1737	OK = c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
1738	c == CHAR_GRAVE_ACCENT \|\| (c >= `0xa0` && c <= `0xd7ff`) \|\|
1739	c >= `0xe000`;
1740	break;
1741
1742	/ Should never occur, but keep compilers from grumbling. /
1743
1744	default:
1745	OK = codevalue != OP_PROP;
1746	break;
1747	}
1748
1749	if (OK == (d == OP_PROP))
1750	{
1751	if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR \|\|
1752	codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1753	{
1754	active_count--; / Remove non-match possibility /
1755	next_active_state--;
1756	}
1757	ADD_NEW(state_offset + count, `0`);
1758	}
1759	}
1760	break;
1761
1762	/-----------------------------------------------------------------/
1763	case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1764	case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1765	case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1766	count = `2`;
1767	goto QS2;
1768
1769	case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1770	case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1771	case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1772	count = `0`;
1773
1774	QS2:
1775
1776	ADD_ACTIVE(state_offset + `2`, `0`);
1777	if (clen > `0`)
1778	{
1779	int ncount = `0`;
1780	if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR \|\|
1781	codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1782	{
1783	active_count--; / Remove non-match possibility /
1784	next_active_state--;
1785	}
1786	(void)PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
1787	&ncount);
1788	ADD_NEW_DATA(-(state_offset + count), `0`, ncount);
1789	}
1790	break;
1791	#endif
1792
1793	/-----------------------------------------------------------------/
1794	case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1795	case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1796	case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1797	count = `2`;
1798	goto QS3;
1799
1800	case OP_ANYNL_EXTRA + OP_TYPESTAR:
1801	case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1802	case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1803	count = `0`;
1804
1805	QS3:
1806	ADD_ACTIVE(state_offset + `2`, `0`);
1807	if (clen > `0`)
1808	{
1809	int ncount = `0`;
1810	switch (c)
1811	{
1812	case CHAR_VT:
1813	case CHAR_FF:
1814	case CHAR_NEL:
1815	#ifndef EBCDIC
1816	case `0x2028`:
1817	case `0x2029`:
1818	#endif /* Not EBCDIC */
1819	if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
1820	goto ANYNL02;
1821
1822	case CHAR_CR:
1823	if (ptr + `1` < end_subject && UCHAR21TEST(ptr + `1`) == CHAR_LF) ncount = `1`;
1824	/ Fall through /
1825
1826	ANYNL02:
1827	case CHAR_LF:
1828	if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR \|\|
1829	codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1830	{
1831	active_count--; / Remove non-match possibility /
1832	next_active_state--;
1833	}
1834	ADD_NEW_DATA(-(state_offset + (int)count), `0`, ncount);
1835	break;
1836
1837	default:
1838	break;
1839	}
1840	}
1841	break;
1842
1843	/-----------------------------------------------------------------/
1844	case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1845	case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1846	case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1847	count = `2`;
1848	goto QS4;
1849
1850	case OP_VSPACE_EXTRA + OP_TYPESTAR:
1851	case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1852	case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1853	count = `0`;
1854
1855	QS4:
1856	ADD_ACTIVE(state_offset + `2`, `0`);
1857	if (clen > `0`)
1858	{
1859	BOOL OK;
1860	switch (c)
1861	{
1862	VSPACE_CASES:
1863	OK = TRUE;
1864	break;
1865
1866	default:
1867	OK = FALSE;
1868	break;
1869	}
1870	if (OK == (d == OP_VSPACE))
1871	{
1872	if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR \|\|
1873	codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1874	{
1875	active_count--; / Remove non-match possibility /
1876	next_active_state--;
1877	}
1878	ADD_NEW_DATA(-(state_offset + (int)count), `0`, `0`);
1879	}
1880	}
1881	break;
1882
1883	/-----------------------------------------------------------------/
1884	case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1885	case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1886	case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1887	count = `2`;
1888	goto QS5;
1889
1890	case OP_HSPACE_EXTRA + OP_TYPESTAR:
1891	case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1892	case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1893	count = `0`;
1894
1895	QS5:
1896	ADD_ACTIVE(state_offset + `2`, `0`);
1897	if (clen > `0`)
1898	{
1899	BOOL OK;
1900	switch (c)
1901	{
1902	HSPACE_CASES:
1903	OK = TRUE;
1904	break;
1905
1906	default:
1907	OK = FALSE;
1908	break;
1909	}
1910
1911	if (OK == (d == OP_HSPACE))
1912	{
1913	if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR \|\|
1914	codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1915	{
1916	active_count--; / Remove non-match possibility /
1917	next_active_state--;
1918	}
1919	ADD_NEW_DATA(-(state_offset + (int)count), `0`, `0`);
1920	}
1921	}
1922	break;
1923
1924	/-----------------------------------------------------------------/
1925	#ifdef SUPPORT_UNICODE
1926	case OP_PROP_EXTRA + OP_TYPEEXACT:
1927	case OP_PROP_EXTRA + OP_TYPEUPTO:
1928	case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1929	case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1930	if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1931	{ ADD_ACTIVE(state_offset + `1` + IMM2_SIZE + `3`, `0`); }
1932	count = current_state->count; / Number already matched /
1933	if (clen > `0`)
1934	{
1935	BOOL OK;
1936	const uint32_t *cp;
1937	const ucd_record * prop = GET_UCD(c);
1938	switch(code[`1` + IMM2_SIZE + `1`])
1939	{
1940	case PT_ANY:
1941	OK = TRUE;
1942	break;
1943
1944	case PT_LAMP:
1945	OK = prop->chartype == ucp_Lu \|\| prop->chartype == ucp_Ll \|\|
1946	prop->chartype == ucp_Lt;
1947	break;
1948
1949	case PT_GC:
1950	OK = PRIV(ucp_gentype)[prop->chartype] == code[`1` + IMM2_SIZE + `2`];
1951	break;
1952
1953	case PT_PC:
1954	OK = prop->chartype == code[`1` + IMM2_SIZE + `2`];
1955	break;
1956
1957	case PT_SC:
1958	OK = prop->script == code[`1` + IMM2_SIZE + `2`];
1959	break;
1960
1961	/ These are specials for combination cases. /
1962
1963	case PT_ALNUM:
1964	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
1965	PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1966	break;
1967
1968	/ Perl space used to exclude VT, but from Perl 5.18 it is included,*
1969	which means that Perl space and POSIX space are now identical. PCRE
1970	was changed at release 8.34. /*
1971
1972	case PT_SPACE: / Perl space /
1973	case PT_PXSPACE: / POSIX space /
1974	switch(c)
1975	{
1976	HSPACE_CASES:
1977	VSPACE_CASES:
1978	OK = TRUE;
1979	break;
1980
1981	default:
1982	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1983	break;
1984	}
1985	break;
1986
1987	case PT_WORD:
1988	OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
1989	PRIV(ucp_gentype)[prop->chartype] == ucp_N \|\|
1990	c == CHAR_UNDERSCORE;
1991	break;
1992
1993	case PT_CLIST:
1994	cp = PRIV(ucd_caseless_sets) + code[`1` + IMM2_SIZE + `2`];
1995	for (;;)
1996	{
1997	if (c < cp) { OK = FALSE; break*; }
1998	if (c == cp++) { OK = TRUE; break*; }
1999	}
2000	break;
2001
2002	case PT_UCNC:
2003	OK = c == CHAR_DOLLAR_SIGN \|\| c == CHAR_COMMERCIAL_AT \|\|
2004	c == CHAR_GRAVE_ACCENT \|\| (c >= `0xa0` && c <= `0xd7ff`) \|\|
2005	c >= `0xe000`;
2006	break;
2007
2008	/ Should never occur, but keep compilers from grumbling. /
2009
2010	default:
2011	OK = codevalue != OP_PROP;
2012	break;
2013	}
2014
2015	if (OK == (d == OP_PROP))
2016	{
2017	if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
2018	{
2019	active_count--; / Remove non-match possibility /
2020	next_active_state--;
2021	}
2022	if (++count >= (int)GET2(code, `1`))
2023	{ ADD_NEW(state_offset + `1` + IMM2_SIZE + `3`, `0`); }
2024	else
2025	{ ADD_NEW(state_offset, count); }
2026	}
2027	}
2028	break;
2029
2030	/-----------------------------------------------------------------/
2031	case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
2032	case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
2033	case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
2034	case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
2035	if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
2036	{ ADD_ACTIVE(state_offset + `2` + IMM2_SIZE, `0`); }
2037	count = current_state->count; / Number already matched /
2038	if (clen > `0`)
2039	{
2040	PCRE2_SPTR nptr;
2041	int ncount = `0`;
2042	if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
2043	{
2044	active_count--; / Remove non-match possibility /
2045	next_active_state--;
2046	}
2047	nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject, end_subject, utf,
2048	&ncount);
2049	if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != `0`)
2050	reset_could_continue = TRUE;
2051	if (++count >= (int)GET2(code, `1`))
2052	{ ADD_NEW_DATA(-(state_offset + `2` + IMM2_SIZE), `0`, ncount); }
2053	else
2054	{ ADD_NEW_DATA(-state_offset, count, ncount); }
2055	}
2056	break;
2057	#endif
2058
2059	/-----------------------------------------------------------------/
2060	case OP_ANYNL_EXTRA + OP_TYPEEXACT:
2061	case OP_ANYNL_EXTRA + OP_TYPEUPTO:
2062	case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
2063	case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
2064	if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
2065	{ ADD_ACTIVE(state_offset + `2` + IMM2_SIZE, `0`); }
2066	count = current_state->count; / Number already matched /
2067	if (clen > `0`)
2068	{
2069	int ncount = `0`;
2070	switch (c)
2071	{
2072	case CHAR_VT:
2073	case CHAR_FF:
2074	case CHAR_NEL:
2075	#ifndef EBCDIC
2076	case `0x2028`:
2077	case `0x2029`:
2078	#endif /* Not EBCDIC */
2079	if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
2080	goto ANYNL03;
2081
2082	case CHAR_CR:
2083	if (ptr + `1` < end_subject && UCHAR21TEST(ptr + `1`) == CHAR_LF) ncount = `1`;
2084	/ Fall through /
2085
2086	ANYNL03:
2087	case CHAR_LF:
2088	if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
2089	{
2090	active_count--; / Remove non-match possibility /
2091	next_active_state--;
2092	}
2093	if (++count >= (int)GET2(code, `1`))
2094	{ ADD_NEW_DATA(-(state_offset + `2` + IMM2_SIZE), `0`, ncount); }
2095	else
2096	{ ADD_NEW_DATA(-state_offset, count, ncount); }
2097	break;
2098
2099	default:
2100	break;
2101	}
2102	}
2103	break;
2104
2105	/-----------------------------------------------------------------/
2106	case OP_VSPACE_EXTRA + OP_TYPEEXACT:
2107	case OP_VSPACE_EXTRA + OP_TYPEUPTO:
2108	case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
2109	case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
2110	if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
2111	{ ADD_ACTIVE(state_offset + `2` + IMM2_SIZE, `0`); }
2112	count = current_state->count; / Number already matched /
2113	if (clen > `0`)
2114	{
2115	BOOL OK;
2116	switch (c)
2117	{
2118	VSPACE_CASES:
2119	OK = TRUE;
2120	break;
2121
2122	default:
2123	OK = FALSE;
2124	}
2125
2126	if (OK == (d == OP_VSPACE))
2127	{
2128	if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
2129	{
2130	active_count--; / Remove non-match possibility /
2131	next_active_state--;
2132	}
2133	if (++count >= (int)GET2(code, `1`))
2134	{ ADD_NEW_DATA(-(state_offset + `2` + IMM2_SIZE), `0`, `0`); }
2135	else
2136	{ ADD_NEW_DATA(-state_offset, count, `0`); }
2137	}
2138	}
2139	break;
2140
2141	/-----------------------------------------------------------------/
2142	case OP_HSPACE_EXTRA + OP_TYPEEXACT:
2143	case OP_HSPACE_EXTRA + OP_TYPEUPTO:
2144	case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
2145	case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
2146	if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
2147	{ ADD_ACTIVE(state_offset + `2` + IMM2_SIZE, `0`); }
2148	count = current_state->count; / Number already matched /
2149	if (clen > `0`)
2150	{
2151	BOOL OK;
2152	switch (c)
2153	{
2154	HSPACE_CASES:
2155	OK = TRUE;
2156	break;
2157
2158	default:
2159	OK = FALSE;
2160	break;
2161	}
2162
2163	if (OK == (d == OP_HSPACE))
2164	{
2165	if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
2166	{
2167	active_count--; / Remove non-match possibility /
2168	next_active_state--;
2169	}
2170	if (++count >= (int)GET2(code, `1`))
2171	{ ADD_NEW_DATA(-(state_offset + `2` + IMM2_SIZE), `0`, `0`); }
2172	else
2173	{ ADD_NEW_DATA(-state_offset, count, `0`); }
2174	}
2175	}
2176	break;
2177
2178	/ ========================================================================== /
2179	/ These opcodes are followed by a character that is usually compared*
2180	to the current subject character; it is loaded into d. We still get
2181	here even if there is no subject character, because in some cases zero
2182	repetitions are permitted. /*
2183
2184	/-----------------------------------------------------------------/
2185	case OP_CHAR:
2186	if (clen > `0` && c == d) { ADD_NEW(state_offset + dlen + `1`, `0`); }
2187	break;
2188
2189	/-----------------------------------------------------------------/
2190	case OP_CHARI:
2191	if (clen == `0`) break;
2192
2193	#ifdef SUPPORT_UNICODE
2194	if (utf_or_ucp)
2195	{
2196	if (c == d) { ADD_NEW(state_offset + dlen + `1`, `0`); } else
2197	{
2198	unsigned int othercase;
2199	if (c < `128`)
2200	othercase = fcc[c];
2201	else
2202	othercase = UCD_OTHERCASE(c);
2203	if (d == othercase) { ADD_NEW(state_offset + dlen + `1`, `0`); }
2204	}
2205	}
2206	else
2207	#endif /* SUPPORT_UNICODE */
2208	/ Not UTF or UCP mode /
2209	{
2210	if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2211	{ ADD_NEW(state_offset + `2`, `0`); }
2212	}
2213	break;
2214
2215
2216	#ifdef SUPPORT_UNICODE
2217	/-----------------------------------------------------------------/
2218	/ This is a tricky one because it can match more than one character.*
2219	Find out how many characters to skip, and then set up a negative state
2220	to wait for them to pass before continuing. /*
2221
2222	case OP_EXTUNI:
2223	if (clen > `0`)
2224	{
2225	int ncount = `0`;
2226	PCRE2_SPTR nptr = PRIV(extuni)(c, ptr + clen, mb->start_subject,
2227	end_subject, utf, &ncount);
2228	if (nptr >= end_subject && (mb->moptions & PCRE2_PARTIAL_HARD) != `0`)
2229	reset_could_continue = TRUE;
2230	ADD_NEW_DATA(-(state_offset + `1`), `0`, ncount);
2231	}
2232	break;
2233	#endif
2234
2235	/-----------------------------------------------------------------/
2236	/ This is a tricky like EXTUNI because it too can match more than one*
2237	character (when CR is followed by LF). In this case, set up a negative
2238	state to wait for one character to pass before continuing. /*
2239
2240	case OP_ANYNL:
2241	if (clen > `0`) switch(c)
2242	{
2243	case CHAR_VT:
2244	case CHAR_FF:
2245	case CHAR_NEL:
2246	#ifndef EBCDIC
2247	case `0x2028`:
2248	case `0x2029`:
2249	#endif /* Not EBCDIC */
2250	if (mb->bsr_convention == PCRE2_BSR_ANYCRLF) break;
2251	/ Fall through /
2252
2253	case CHAR_LF:
2254	ADD_NEW(state_offset + `1`, `0`);
2255	break;
2256
2257	case CHAR_CR:
2258	if (ptr + `1` >= end_subject)
2259	{
2260	ADD_NEW(state_offset + `1`, `0`);
2261	if ((mb->moptions & PCRE2_PARTIAL_HARD) != `0`)
2262	reset_could_continue = TRUE;
2263	}
2264	else if (UCHAR21TEST(ptr + `1`) == CHAR_LF)
2265	{
2266	ADD_NEW_DATA(-(state_offset + `1`), `0`, `1`);
2267	}
2268	else
2269	{
2270	ADD_NEW(state_offset + `1`, `0`);
2271	}
2272	break;
2273	}
2274	break;
2275
2276	/-----------------------------------------------------------------/
2277	case OP_NOT_VSPACE:
2278	if (clen > `0`) switch(c)
2279	{
2280	VSPACE_CASES:
2281	break;
2282
2283	default:
2284	ADD_NEW(state_offset + `1`, `0`);
2285	break;
2286	}
2287	break;
2288
2289	/-----------------------------------------------------------------/
2290	case OP_VSPACE:
2291	if (clen > `0`) switch(c)
2292	{
2293	VSPACE_CASES:
2294	ADD_NEW(state_offset + `1`, `0`);
2295	break;
2296
2297	default:
2298	break;
2299	}
2300	break;
2301
2302	/-----------------------------------------------------------------/
2303	case OP_NOT_HSPACE:
2304	if (clen > `0`) switch(c)
2305	{
2306	HSPACE_CASES:
2307	break;
2308
2309	default:
2310	ADD_NEW(state_offset + `1`, `0`);
2311	break;
2312	}
2313	break;
2314
2315	/-----------------------------------------------------------------/
2316	case OP_HSPACE:
2317	if (clen > `0`) switch(c)
2318	{
2319	HSPACE_CASES:
2320	ADD_NEW(state_offset + `1`, `0`);
2321	break;
2322
2323	default:
2324	break;
2325	}
2326	break;
2327
2328	/-----------------------------------------------------------------/
2329	/ Match a negated single character casefully. /
2330
2331	case OP_NOT:
2332	if (clen > `0` && c != d) { ADD_NEW(state_offset + dlen + `1`, `0`); }
2333	break;
2334
2335	/-----------------------------------------------------------------/
2336	/ Match a negated single character caselessly. /
2337
2338	case OP_NOTI:
2339	if (clen > `0`)
2340	{
2341	uint32_t otherd;
2342	#ifdef SUPPORT_UNICODE
2343	if (utf_or_ucp && d >= `128`)
2344	otherd = UCD_OTHERCASE(d);
2345	else
2346	#endif /* SUPPORT_UNICODE */
2347	otherd = TABLE_GET(d, fcc, d);
2348	if (c != d && c != otherd)
2349	{ ADD_NEW(state_offset + dlen + `1`, `0`); }
2350	}
2351	break;
2352
2353	/-----------------------------------------------------------------/
2354	case OP_PLUSI:
2355	case OP_MINPLUSI:
2356	case OP_POSPLUSI:
2357	case OP_NOTPLUSI:
2358	case OP_NOTMINPLUSI:
2359	case OP_NOTPOSPLUSI:
2360	caseless = TRUE;
2361	codevalue -= OP_STARI - OP_STAR;
2362
2363	/ Fall through /
2364	case OP_PLUS:
2365	case OP_MINPLUS:
2366	case OP_POSPLUS:
2367	case OP_NOTPLUS:
2368	case OP_NOTMINPLUS:
2369	case OP_NOTPOSPLUS:
2370	count = current_state->count; / Already matched /
2371	if (count > `0`) { ADD_ACTIVE(state_offset + dlen + `1`, `0`); }
2372	if (clen > `0`)
2373	{
2374	uint32_t otherd = NOTACHAR;
2375	if (caseless)
2376	{
2377	#ifdef SUPPORT_UNICODE
2378	if (utf_or_ucp && d >= `128`)
2379	otherd = UCD_OTHERCASE(d);
2380	else
2381	#endif /* SUPPORT_UNICODE */
2382	otherd = TABLE_GET(d, fcc, d);
2383	}
2384	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
2385	{
2386	if (count > `0` &&
2387	(codevalue == OP_POSPLUS \|\| codevalue == OP_NOTPOSPLUS))
2388	{
2389	active_count--; / Remove non-match possibility /
2390	next_active_state--;
2391	}
2392	count++;
2393	ADD_NEW(state_offset, count);
2394	}
2395	}
2396	break;
2397
2398	/-----------------------------------------------------------------/
2399	case OP_QUERYI:
2400	case OP_MINQUERYI:
2401	case OP_POSQUERYI:
2402	case OP_NOTQUERYI:
2403	case OP_NOTMINQUERYI:
2404	case OP_NOTPOSQUERYI:
2405	caseless = TRUE;
2406	codevalue -= OP_STARI - OP_STAR;
2407	/ Fall through /
2408	case OP_QUERY:
2409	case OP_MINQUERY:
2410	case OP_POSQUERY:
2411	case OP_NOTQUERY:
2412	case OP_NOTMINQUERY:
2413	case OP_NOTPOSQUERY:
2414	ADD_ACTIVE(state_offset + dlen + `1`, `0`);
2415	if (clen > `0`)
2416	{
2417	uint32_t otherd = NOTACHAR;
2418	if (caseless)
2419	{
2420	#ifdef SUPPORT_UNICODE
2421	if (utf_or_ucp && d >= `128`)
2422	otherd = UCD_OTHERCASE(d);
2423	else
2424	#endif /* SUPPORT_UNICODE */
2425	otherd = TABLE_GET(d, fcc, d);
2426	}
2427	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
2428	{
2429	if (codevalue == OP_POSQUERY \|\| codevalue == OP_NOTPOSQUERY)
2430	{
2431	active_count--; / Remove non-match possibility /
2432	next_active_state--;
2433	}
2434	ADD_NEW(state_offset + dlen + `1`, `0`);
2435	}
2436	}
2437	break;
2438
2439	/-----------------------------------------------------------------/
2440	case OP_STARI:
2441	case OP_MINSTARI:
2442	case OP_POSSTARI:
2443	case OP_NOTSTARI:
2444	case OP_NOTMINSTARI:
2445	case OP_NOTPOSSTARI:
2446	caseless = TRUE;
2447	codevalue -= OP_STARI - OP_STAR;
2448	/ Fall through /
2449	case OP_STAR:
2450	case OP_MINSTAR:
2451	case OP_POSSTAR:
2452	case OP_NOTSTAR:
2453	case OP_NOTMINSTAR:
2454	case OP_NOTPOSSTAR:
2455	ADD_ACTIVE(state_offset + dlen + `1`, `0`);
2456	if (clen > `0`)
2457	{
2458	uint32_t otherd = NOTACHAR;
2459	if (caseless)
2460	{
2461	#ifdef SUPPORT_UNICODE
2462	if (utf_or_ucp && d >= `128`)
2463	otherd = UCD_OTHERCASE(d);
2464	else
2465	#endif /* SUPPORT_UNICODE */
2466	otherd = TABLE_GET(d, fcc, d);
2467	}
2468	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
2469	{
2470	if (codevalue == OP_POSSTAR \|\| codevalue == OP_NOTPOSSTAR)
2471	{
2472	active_count--; / Remove non-match possibility /
2473	next_active_state--;
2474	}
2475	ADD_NEW(state_offset, `0`);
2476	}
2477	}
2478	break;
2479
2480	/-----------------------------------------------------------------/
2481	case OP_EXACTI:
2482	case OP_NOTEXACTI:
2483	caseless = TRUE;
2484	codevalue -= OP_STARI - OP_STAR;
2485	/ Fall through /
2486	case OP_EXACT:
2487	case OP_NOTEXACT:
2488	count = current_state->count; / Number already matched /
2489	if (clen > `0`)
2490	{
2491	uint32_t otherd = NOTACHAR;
2492	if (caseless)
2493	{
2494	#ifdef SUPPORT_UNICODE
2495	if (utf_or_ucp && d >= `128`)
2496	otherd = UCD_OTHERCASE(d);
2497	else
2498	#endif /* SUPPORT_UNICODE */
2499	otherd = TABLE_GET(d, fcc, d);
2500	}
2501	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
2502	{
2503	if (++count >= (int)GET2(code, `1`))
2504	{ ADD_NEW(state_offset + dlen + `1` + IMM2_SIZE, `0`); }
2505	else
2506	{ ADD_NEW(state_offset, count); }
2507	}
2508	}
2509	break;
2510
2511	/-----------------------------------------------------------------/
2512	case OP_UPTOI:
2513	case OP_MINUPTOI:
2514	case OP_POSUPTOI:
2515	case OP_NOTUPTOI:
2516	case OP_NOTMINUPTOI:
2517	case OP_NOTPOSUPTOI:
2518	caseless = TRUE;
2519	codevalue -= OP_STARI - OP_STAR;
2520	/ Fall through /
2521	case OP_UPTO:
2522	case OP_MINUPTO:
2523	case OP_POSUPTO:
2524	case OP_NOTUPTO:
2525	case OP_NOTMINUPTO:
2526	case OP_NOTPOSUPTO:
2527	ADD_ACTIVE(state_offset + dlen + `1` + IMM2_SIZE, `0`);
2528	count = current_state->count; / Number already matched /
2529	if (clen > `0`)
2530	{
2531	uint32_t otherd = NOTACHAR;
2532	if (caseless)
2533	{
2534	#ifdef SUPPORT_UNICODE
2535	if (utf_or_ucp && d >= `128`)
2536	otherd = UCD_OTHERCASE(d);
2537	else
2538	#endif /* SUPPORT_UNICODE */
2539	otherd = TABLE_GET(d, fcc, d);
2540	}
2541	if ((c == d \|\| c == otherd) == (codevalue < OP_NOTSTAR))
2542	{
2543	if (codevalue == OP_POSUPTO \|\| codevalue == OP_NOTPOSUPTO)
2544	{
2545	active_count--; / Remove non-match possibility /
2546	next_active_state--;
2547	}
2548	if (++count >= (int)GET2(code, `1`))
2549	{ ADD_NEW(state_offset + dlen + `1` + IMM2_SIZE, `0`); }
2550	else
2551	{ ADD_NEW(state_offset, count); }
2552	}
2553	}
2554	break;
2555
2556
2557	/ ========================================================================== /
2558	/ These are the class-handling opcodes /
2559
2560	case OP_CLASS:
2561	case OP_NCLASS:
2562	case OP_XCLASS:
2563	{
2564	BOOL isinclass = FALSE;
2565	int next_state_offset;
2566	PCRE2_SPTR ecode;
2567
2568	/ For a simple class, there is always just a 32-byte table, and we*
2569	can set isinclass from it. /*
2570
2571	if (codevalue != OP_XCLASS)
2572	{
2573	ecode = code + `1` + (`32` / sizeof(PCRE2_UCHAR));
2574	if (clen > `0`)
2575	{
2576	isinclass = (c > `255`)? (codevalue == OP_NCLASS) :
2577	((((uint8_t *)(code + `1`))[c/`8`] & (`1u` << (c&`7`))) != `0`);
2578	}
2579	}
2580
2581	/ An extended class may have a table or a list of single characters,*
2582	ranges, or both, and it may be positive or negative. There's a
2583	function that sorts all this out. /*
2584
2585	else
2586	{
2587	ecode = code + GET(code, `1`);
2588	if (clen > `0`) isinclass = PRIV(xclass)(c, code + `1` + LINK_SIZE, utf);
2589	}
2590
2591	/ At this point, isinclass is set for all kinds of class, and ecode*
2592	points to the byte after the end of the class. If there is a
2593	quantifier, this is where it will be. /*
2594
2595	next_state_offset = (int)(ecode - start_code);
2596
2597	switch (*ecode)
2598	{
2599	case OP_CRSTAR:
2600	case OP_CRMINSTAR:
2601	case OP_CRPOSSTAR:
2602	ADD_ACTIVE(next_state_offset + `1`, `0`);
2603	if (isinclass)
2604	{
2605	if (*ecode == OP_CRPOSSTAR)
2606	{
2607	active_count--; / Remove non-match possibility /
2608	next_active_state--;
2609	}
2610	ADD_NEW(state_offset, `0`);
2611	}
2612	break;
2613
2614	case OP_CRPLUS:
2615	case OP_CRMINPLUS:
2616	case OP_CRPOSPLUS:
2617	count = current_state->count; / Already matched /
2618	if (count > `0`) { ADD_ACTIVE(next_state_offset + `1`, `0`); }
2619	if (isinclass)
2620	{
2621	if (count > `0` && *ecode == OP_CRPOSPLUS)
2622	{
2623	active_count--; / Remove non-match possibility /
2624	next_active_state--;
2625	}
2626	count++;
2627	ADD_NEW(state_offset, count);
2628	}
2629	break;
2630
2631	case OP_CRQUERY:
2632	case OP_CRMINQUERY:
2633	case OP_CRPOSQUERY:
2634	ADD_ACTIVE(next_state_offset + `1`, `0`);
2635	if (isinclass)
2636	{
2637	if (*ecode == OP_CRPOSQUERY)
2638	{
2639	active_count--; / Remove non-match possibility /
2640	next_active_state--;
2641	}
2642	ADD_NEW(next_state_offset + `1`, `0`);
2643	}
2644	break;
2645
2646	case OP_CRRANGE:
2647	case OP_CRMINRANGE:
2648	case OP_CRPOSRANGE:
2649	count = current_state->count; / Already matched /
2650	if (count >= (int)GET2(ecode, `1`))
2651	{ ADD_ACTIVE(next_state_offset + `1` + `2` * IMM2_SIZE, `0`); }
2652	if (isinclass)
2653	{
2654	int max = (int)GET2(ecode, `1` + IMM2_SIZE);
2655
2656	if (ecode == OP_CRPOSRANGE && count >= (int*)GET2(ecode, `1`))
2657	{
2658	active_count--; / Remove non-match possibility /
2659	next_active_state--;
2660	}
2661
2662	if (++count >= max && max != `0`) / Max 0 => no limit /
2663	{ ADD_NEW(next_state_offset + `1` + `2` * IMM2_SIZE, `0`); }
2664	else
2665	{ ADD_NEW(state_offset, count); }
2666	}
2667	break;
2668
2669	default:
2670	if (isinclass) { ADD_NEW(next_state_offset, `0`); }
2671	break;
2672	}
2673	}
2674	break;
2675
2676	/ ========================================================================== /
2677	/ These are the opcodes for fancy brackets of various kinds. We have*
2678	to use recursion in order to handle them. The "always failing" assertion
2679	(?!) is optimised to OP_FAIL when compiling, so we have to support that,
2680	though the other "backtracking verbs" are not supported. /*
2681
2682	case OP_FAIL:
2683	forced_fail++; / Count FAILs for multiple states /
2684	break;
2685
2686	case OP_ASSERT:
2687	case OP_ASSERT_NOT:
2688	case OP_ASSERTBACK:
2689	case OP_ASSERTBACK_NOT:
2690	{
2691	int rc;
2692	int *local_workspace;
2693	PCRE2_SIZE *local_offsets;
2694	PCRE2_SPTR endasscode = code + GET(code, `1`);
2695	RWS_anchor rws = (RWS_anchor )RWS;
2696
2697	if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
2698	{
2699	rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
2700	if (rc != `0`) return rc;
2701	RWS = (int *)rws;
2702	}
2703
2704	local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
2705	local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
2706	rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
2707
2708	while (*endasscode == OP_ALT) endasscode += GET(endasscode, `1`);
2709
2710	rc = internal_dfa_match(
2711	mb, / static match data /
2712	code, / this subexpression's code /
2713	ptr, / where we currently are /
2714	(PCRE2_SIZE)(ptr - start_subject), / start offset /
2715	local_offsets, / offset vector /
2716	RWS_OVEC_OSIZE/OVEC_UNIT, / size of same /
2717	local_workspace, / workspace vector /
2718	RWS_RSIZE, / size of same /
2719	rlevel, / function recursion level /
2720	RWS); / recursion workspace /
2721
2722	rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
2723
2724	if (rc < `0` && rc != PCRE2_ERROR_NOMATCH) return rc;
2725	if ((rc >= `0`) == (codevalue == OP_ASSERT \|\| codevalue == OP_ASSERTBACK))
2726	{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + `1` - start_code), `0`); }
2727	}
2728	break;
2729
2730	/-----------------------------------------------------------------/
2731	case OP_COND:
2732	case OP_SCOND:
2733	{
2734	int codelink = (int)GET(code, `1`);
2735	PCRE2_UCHAR condcode;
2736
2737	/ Because of the way auto-callout works during compile, a callout item*
2738	is inserted between OP_COND and an assertion condition. This does not
2739	happen for the other conditions. /*
2740
2741	if (code[LINK_SIZE + `1`] == OP_CALLOUT
2742	\|\| code[LINK_SIZE + `1`] == OP_CALLOUT_STR)
2743	{
2744	PCRE2_SIZE callout_length;
2745	rrc = do_callout(code, offsets, current_subject, ptr, mb,
2746	`1` + LINK_SIZE, &callout_length);
2747	if (rrc < `0`) return rrc; / Abandon /
2748	if (rrc > `0`) break; / Fail this thread /
2749	code += callout_length; / Skip callout data /
2750	}
2751
2752	condcode = code[LINK_SIZE+`1`];
2753
2754	/ Back reference conditions and duplicate named recursion conditions*
2755	are not supported /*
2756
2757	if (condcode == OP_CREF \|\| condcode == OP_DNCREF \|\|
2758	condcode == OP_DNRREF)
2759	return PCRE2_ERROR_DFA_UCOND;
2760
2761	/ The DEFINE condition is always false, and the assertion (?!) is*
2762	converted to OP_FAIL. /*
2763
2764	if (condcode == OP_FALSE \|\| condcode == OP_FAIL)
2765	{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + `1`, `0`); }
2766
2767	/ There is also an always-true condition /
2768
2769	else if (condcode == OP_TRUE)
2770	{ ADD_ACTIVE(state_offset + LINK_SIZE + `2`, `0`); }
2771
2772	/ The only supported version of OP_RREF is for the value RREF_ANY,*
2773	which means "test if in any recursion". We can't test for specifically
2774	recursed groups. /*
2775
2776	else if (condcode == OP_RREF)
2777	{
2778	unsigned int value = GET2(code, LINK_SIZE + `2`);
2779	if (value != RREF_ANY) return PCRE2_ERROR_DFA_UCOND;
2780	if (mb->recursive != NULL)
2781	{ ADD_ACTIVE(state_offset + LINK_SIZE + `2` + IMM2_SIZE, `0`); }
2782	else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + `1`, `0`); }
2783	}
2784
2785	/ Otherwise, the condition is an assertion /
2786
2787	else
2788	{
2789	int rc;
2790	int *local_workspace;
2791	PCRE2_SIZE *local_offsets;
2792	PCRE2_SPTR asscode = code + LINK_SIZE + `1`;
2793	PCRE2_SPTR endasscode = asscode + GET(asscode, `1`);
2794	RWS_anchor rws = (RWS_anchor )RWS;
2795
2796	if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
2797	{
2798	rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
2799	if (rc != `0`) return rc;
2800	RWS = (int *)rws;
2801	}
2802
2803	local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
2804	local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
2805	rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
2806
2807	while (*endasscode == OP_ALT) endasscode += GET(endasscode, `1`);
2808
2809	rc = internal_dfa_match(
2810	mb, / fixed match data /
2811	asscode, / this subexpression's code /
2812	ptr, / where we currently are /
2813	(PCRE2_SIZE)(ptr - start_subject), / start offset /
2814	local_offsets, / offset vector /
2815	RWS_OVEC_OSIZE/OVEC_UNIT, / size of same /
2816	local_workspace, / workspace vector /
2817	RWS_RSIZE, / size of same /
2818	rlevel, / function recursion level /
2819	RWS); / recursion workspace /
2820
2821	rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
2822
2823	if (rc < `0` && rc != PCRE2_ERROR_NOMATCH) return rc;
2824	if ((rc >= `0`) ==
2825	(condcode == OP_ASSERT \|\| condcode == OP_ASSERTBACK))
2826	{ ADD_ACTIVE((int)(endasscode + LINK_SIZE + `1` - start_code), `0`); }
2827	else
2828	{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + `1`, `0`); }
2829	}
2830	}
2831	break;
2832
2833	/-----------------------------------------------------------------/
2834	case OP_RECURSE:
2835	{
2836	int rc;
2837	int *local_workspace;
2838	PCRE2_SIZE *local_offsets;
2839	RWS_anchor rws = (RWS_anchor )RWS;
2840	dfa_recursion_info *ri;
2841	PCRE2_SPTR callpat = start_code + GET(code, `1`);
2842	uint32_t recno = (callpat == mb->start_code)? `0` :
2843	GET2(callpat, `1` + LINK_SIZE);
2844
2845	if (rws->free < RWS_RSIZE + RWS_OVEC_RSIZE)
2846	{
2847	rc = more_workspace(&rws, RWS_OVEC_RSIZE, mb);
2848	if (rc != `0`) return rc;
2849	RWS = (int *)rws;
2850	}
2851
2852	local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
2853	local_workspace = ((int *)local_offsets) + RWS_OVEC_RSIZE;
2854	rws->free -= RWS_RSIZE + RWS_OVEC_RSIZE;
2855
2856	/ Check for repeating a recursion without advancing the subject*
2857	pointer. This should catch convoluted mutual recursions. (Some simple
2858	cases are caught at compile time.) /*
2859
2860	for (ri = mb->recursive; ri != NULL; ri = ri->prevrec)
2861	if (recno == ri->group_num && ptr == ri->subject_position)
2862	return PCRE2_ERROR_RECURSELOOP;
2863
2864	/ Remember this recursion and where we started it so as to*
2865	catch infinite loops. /*
2866
2867	new_recursive.group_num = recno;
2868	new_recursive.subject_position = ptr;
2869	new_recursive.prevrec = mb->recursive;
2870	mb->recursive = &new_recursive;
2871
2872	rc = internal_dfa_match(
2873	mb, / fixed match data /
2874	callpat, / this subexpression's code /
2875	ptr, / where we currently are /
2876	(PCRE2_SIZE)(ptr - start_subject), / start offset /
2877	local_offsets, / offset vector /
2878	RWS_OVEC_RSIZE/OVEC_UNIT, / size of same /
2879	local_workspace, / workspace vector /
2880	RWS_RSIZE, / size of same /
2881	rlevel, / function recursion level /
2882	RWS); / recursion workspace /
2883
2884	rws->free += RWS_RSIZE + RWS_OVEC_RSIZE;
2885	mb->recursive = new_recursive.prevrec; / Done this recursion /
2886
2887	/ Ran out of internal offsets /
2888
2889	if (rc == `0`) return PCRE2_ERROR_DFA_RECURSE;
2890
2891	/ For each successful matched substring, set up the next state with a*
2892	count of characters to skip before trying it. Note that the count is in
2893	characters, not bytes. /*
2894
2895	if (rc > `0`)
2896	{
2897	for (rc = rc*`2` - `2`; rc >= `0`; rc -= `2`)
2898	{
2899	PCRE2_SIZE charcount = local_offsets[rc+`1`] - local_offsets[rc];
2900	#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
2901	if (utf)
2902	{
2903	PCRE2_SPTR p = start_subject + local_offsets[rc];
2904	PCRE2_SPTR pp = start_subject + local_offsets[rc+`1`];
2905	while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
2906	}
2907	#endif
2908	if (charcount > `0`)
2909	{
2910	ADD_NEW_DATA(-(state_offset + LINK_SIZE + `1`), `0`,
2911	(int)(charcount - `1`));
2912	}
2913	else
2914	{
2915	ADD_ACTIVE(state_offset + LINK_SIZE + `1`, `0`);
2916	}
2917	}
2918	}
2919	else if (rc != PCRE2_ERROR_NOMATCH) return rc;
2920	}
2921	break;
2922
2923	/-----------------------------------------------------------------/
2924	case OP_BRAPOS:
2925	case OP_SBRAPOS:
2926	case OP_CBRAPOS:
2927	case OP_SCBRAPOS:
2928	case OP_BRAPOSZERO:
2929	{
2930	int rc;
2931	int *local_workspace;
2932	PCRE2_SIZE *local_offsets;
2933	PCRE2_SIZE charcount, matched_count;
2934	PCRE2_SPTR local_ptr = ptr;
2935	RWS_anchor rws = (RWS_anchor )RWS;
2936	BOOL allow_zero;
2937
2938	if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
2939	{
2940	rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
2941	if (rc != `0`) return rc;
2942	RWS = (int *)rws;
2943	}
2944
2945	local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
2946	local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
2947	rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
2948
2949	if (codevalue == OP_BRAPOSZERO)
2950	{
2951	allow_zero = TRUE;
2952	codevalue = (++code); /* Codevalue will be one of above BRAs /
2953	}
2954	else allow_zero = FALSE;
2955
2956	/ Loop to match the subpattern as many times as possible as if it were*
2957	a complete pattern. /*
2958
2959	for (matched_count = `0`;; matched_count++)
2960	{
2961	rc = internal_dfa_match(
2962	mb, / fixed match data /
2963	code, / this subexpression's code /
2964	local_ptr, / where we currently are /
2965	(PCRE2_SIZE)(ptr - start_subject), / start offset /
2966	local_offsets, / offset vector /
2967	RWS_OVEC_OSIZE/OVEC_UNIT, / size of same /
2968	local_workspace, / workspace vector /
2969	RWS_RSIZE, / size of same /
2970	rlevel, / function recursion level /
2971	RWS); / recursion workspace /
2972
2973	/ Failed to match /
2974
2975	if (rc < `0`)
2976	{
2977	if (rc != PCRE2_ERROR_NOMATCH) return rc;
2978	break;
2979	}
2980
2981	/ Matched: break the loop if zero characters matched. /
2982
2983	charcount = local_offsets[`1`] - local_offsets[`0`];
2984	if (charcount == `0`) break;
2985	local_ptr += charcount; / Advance temporary position ptr /
2986	}
2987
2988	rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
2989
2990	/ At this point we have matched the subpattern matched_count*
2991	times, and local_ptr is pointing to the character after the end of the
2992	last match. /*
2993
2994	if (matched_count > `0` \|\| allow_zero)
2995	{
2996	PCRE2_SPTR end_subpattern = code;
2997	int next_state_offset;
2998
2999	do { end_subpattern += GET(end_subpattern, `1`); }
3000	while (*end_subpattern == OP_ALT);
3001	next_state_offset =
3002	(int)(end_subpattern - start_code + LINK_SIZE + `1`);
3003
3004	/ Optimization: if there are no more active states, and there*
3005	are no new states yet set up, then skip over the subject string
3006	right here, to save looping. Otherwise, set up the new state to swing
3007	into action when the end of the matched substring is reached. /*
3008
3009	if (i + `1` >= active_count && new_count == `0`)
3010	{
3011	ptr = local_ptr;
3012	clen = `0`;
3013	ADD_NEW(next_state_offset, `0`);
3014	}
3015	else
3016	{
3017	PCRE2_SPTR p = ptr;
3018	PCRE2_SPTR pp = local_ptr;
3019	charcount = (PCRE2_SIZE)(pp - p);
3020	#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3021	if (utf) while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
3022	#endif
3023	ADD_NEW_DATA(-next_state_offset, `0`, (int)(charcount - `1`));
3024	}
3025	}
3026	}
3027	break;
3028
3029	/-----------------------------------------------------------------/
3030	case OP_ONCE:
3031	{
3032	int rc;
3033	int *local_workspace;
3034	PCRE2_SIZE *local_offsets;
3035	RWS_anchor rws = (RWS_anchor )RWS;
3036
3037	if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
3038	{
3039	rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
3040	if (rc != `0`) return rc;
3041	RWS = (int *)rws;
3042	}
3043
3044	local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
3045	local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
3046	rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
3047
3048	rc = internal_dfa_match(
3049	mb, / fixed match data /
3050	code, / this subexpression's code /
3051	ptr, / where we currently are /
3052	(PCRE2_SIZE)(ptr - start_subject), / start offset /
3053	local_offsets, / offset vector /
3054	RWS_OVEC_OSIZE/OVEC_UNIT, / size of same /
3055	local_workspace, / workspace vector /
3056	RWS_RSIZE, / size of same /
3057	rlevel, / function recursion level /
3058	RWS); / recursion workspace /
3059
3060	rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
3061
3062	if (rc >= `0`)
3063	{
3064	PCRE2_SPTR end_subpattern = code;
3065	PCRE2_SIZE charcount = local_offsets[`1`] - local_offsets[`0`];
3066	int next_state_offset, repeat_state_offset;
3067
3068	do { end_subpattern += GET(end_subpattern, `1`); }
3069	while (*end_subpattern == OP_ALT);
3070	next_state_offset =
3071	(int)(end_subpattern - start_code + LINK_SIZE + `1`);
3072
3073	/ If the end of this subpattern is KETRMAX or KETRMIN, we must*
3074	arrange for the repeat state also to be added to the relevant list.
3075	Calculate the offset, or set -1 for no repeat. /*
3076
3077	repeat_state_offset = (*end_subpattern == OP_KETRMAX \|\|
3078	*end_subpattern == OP_KETRMIN)?
3079	(int)(end_subpattern - start_code - GET(end_subpattern, `1`)) : -`1`;
3080
3081	/ If we have matched an empty string, add the next state at the*
3082	current character pointer. This is important so that the duplicate
3083	checking kicks in, which is what breaks infinite loops that match an
3084	empty string. /*
3085
3086	if (charcount == `0`)
3087	{
3088	ADD_ACTIVE(next_state_offset, `0`);
3089	}
3090
3091	/ Optimization: if there are no more active states, and there*
3092	are no new states yet set up, then skip over the subject string
3093	right here, to save looping. Otherwise, set up the new state to swing
3094	into action when the end of the matched substring is reached. /*
3095
3096	else if (i + `1` >= active_count && new_count == `0`)
3097	{
3098	ptr += charcount;
3099	clen = `0`;
3100	ADD_NEW(next_state_offset, `0`);
3101
3102	/ If we are adding a repeat state at the new character position,*
3103	we must fudge things so that it is the only current state.
3104	Otherwise, it might be a duplicate of one we processed before, and
3105	that would cause it to be skipped. /*
3106
3107	if (repeat_state_offset >= `0`)
3108	{
3109	next_active_state = active_states;
3110	active_count = `0`;
3111	i = -`1`;
3112	ADD_ACTIVE(repeat_state_offset, `0`);
3113	}
3114	}
3115	else
3116	{
3117	#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3118	if (utf)
3119	{
3120	PCRE2_SPTR p = start_subject + local_offsets[`0`];
3121	PCRE2_SPTR pp = start_subject + local_offsets[`1`];
3122	while (p < pp) if (NOT_FIRSTCU(*p++)) charcount--;
3123	}
3124	#endif
3125	ADD_NEW_DATA(-next_state_offset, `0`, (int)(charcount - `1`));
3126	if (repeat_state_offset >= `0`)
3127	{ ADD_NEW_DATA(-repeat_state_offset, `0`, (int)(charcount - `1`)); }
3128	}
3129	}
3130	else if (rc != PCRE2_ERROR_NOMATCH) return rc;
3131	}
3132	break;
3133
3134
3135	/ ========================================================================== /
3136	/ Handle callouts /
3137
3138	case OP_CALLOUT:
3139	case OP_CALLOUT_STR:
3140	{
3141	PCRE2_SIZE callout_length;
3142	rrc = do_callout(code, offsets, current_subject, ptr, mb, `0`,
3143	&callout_length);
3144	if (rrc < `0`) return rrc; / Abandon /
3145	if (rrc == `0`)
3146	{ ADD_ACTIVE(state_offset + (int)callout_length, `0`); }
3147	}
3148	break;
3149
3150
3151	/ ========================================================================== /
3152	default: / Unsupported opcode /
3153	return PCRE2_ERROR_DFA_UITEM;
3154	}
3155
3156	NEXT_ACTIVE_STATE: continue;
3157
3158	} / End of loop scanning active states /
3159
3160	/ We have finished the processing at the current subject character. If no*
3161	new states have been set for the next character, we have found all the
3162	matches that we are going to find. If partial matching has been requested,
3163	check for appropriate conditions.
3164
3165	The "forced_ fail" variable counts the number of (F) encountered for the*
3166	character. If it is equal to the original active_count (saved in
3167	workspace[1]) it means that (F) was found on every active state. In this*
3168	case we don't want to give a partial match.
3169
3170	The "could_continue" variable is true if a state could have continued but
3171	for the fact that the end of the subject was reached. /*
3172
3173	if (new_count <= `0`)
3174	{
3175	if (could_continue && / Some could go on, and /
3176	forced_fail != workspace[`1`] && / Not all forced fail & /
3177	( / either... /
3178	(mb->moptions & PCRE2_PARTIAL_HARD) != `0` / Hard partial /
3179	\|\| / or... /
3180	((mb->moptions & PCRE2_PARTIAL_SOFT) != `0` && / Soft partial and /
3181	match_count < `0`) / no matches /
3182	) && / And... /
3183	(
3184	partial_newline \|\| / Either partial NL /
3185	( / or ... /
3186	ptr >= end_subject && / End of subject and /
3187	( / either /
3188	ptr > mb->start_used_ptr \|\| / Inspected non-empty string /
3189	mb->allowemptypartial / or pattern has lookbehind /
3190	) / or could match empty /
3191	)
3192	))
3193	match_count = PCRE2_ERROR_PARTIAL;
3194	break; / Exit from loop along the subject string /
3195	}
3196
3197	/ One or more states are active for the next character. /
3198
3199	ptr += clen; / Advance to next subject character /
3200	} / Loop to move along the subject string /
3201
3202	/ Control gets here from "break" a few lines above. If we have a match and*
3203	PCRE2_ENDANCHORED is set, the match fails. /*
3204
3205	if (match_count >= `0` &&
3206	((mb->moptions \| mb->poptions) & PCRE2_ENDANCHORED) != `0` &&
3207	ptr < end_subject)
3208	match_count = PCRE2_ERROR_NOMATCH;
3209
3210	return match_count;
3211	}
3212
3213
3214
3215	/*************************************************
3216	* Match a pattern using the DFA algorithm *
3217	*************************************************/
3218
3219	/ This function matches a compiled pattern to a subject string, using the*
3220	alternate matching algorithm that finds all matches at once.
3221
3222	Arguments:
3223	code points to the compiled pattern
3224	subject subject string
3225	length length of subject string
3226	startoffset where to start matching in the subject
3227	options option bits
3228	match_data points to a match data structure
3229	gcontext points to a match context
3230	workspace pointer to workspace
3231	wscount size of workspace
3232
3233	Returns: > 0 => number of match offset pairs placed in offsets
3234	= 0 => offsets overflowed; longest matches are present
3235	-1 => failed to match
3236	< -1 => some kind of unexpected problem
3237	*/
3238
3239	PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
3240	pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
3241	PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
3242	pcre2_match_context mcontext, int* *workspace, PCRE2_SIZE wscount)
3243	{
3244	int rc;
3245	int was_zero_terminated = `0`;
3246
3247	const pcre2_real_code re = (const* pcre2_real_code *)code;
3248
3249	PCRE2_SPTR start_match;
3250	PCRE2_SPTR end_subject;
3251	PCRE2_SPTR bumpalong_limit;
3252	PCRE2_SPTR req_cu_ptr;
3253
3254	BOOL utf, anchored, startline, firstline;
3255	BOOL has_first_cu = FALSE;
3256	BOOL has_req_cu = FALSE;
3257
3258	#if PCRE2_CODE_UNIT_WIDTH == 8
3259	BOOL memchr_not_found_first_cu = FALSE;
3260	BOOL memchr_not_found_first_cu2 = FALSE;
3261	#endif
3262
3263	PCRE2_UCHAR first_cu = `0`;
3264	PCRE2_UCHAR first_cu2 = `0`;
3265	PCRE2_UCHAR req_cu = `0`;
3266	PCRE2_UCHAR req_cu2 = `0`;
3267
3268	const uint8_t *start_bits = NULL;
3269
3270	/ We need to have mb pointing to a match block, because the IS_NEWLINE macro*
3271	is used below, and it expects NLBLOCK to be defined as a pointer. /*
3272
3273	pcre2_callout_block cb;
3274	dfa_match_block actual_match_block;
3275	dfa_match_block *mb = &actual_match_block;
3276
3277	/ Set up a starting block of memory for use during recursive calls to*
3278	internal_dfa_match(). By putting this on the stack, it minimizes resource use
3279	in the case when it is not needed. If this is too small, more memory is
3280	obtained from the heap. At the start of each block is an anchor structure./*
3281
3282	int base_recursion_workspace[RWS_BASE_SIZE];
3283	RWS_anchor rws = (RWS_anchor )base_recursion_workspace;
3284	rws->next = NULL;
3285	rws->size = RWS_BASE_SIZE;
3286	rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE;
3287
3288	/ A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated*
3289	subject string. /*
3290
3291	if (length == PCRE2_ZERO_TERMINATED)
3292	{
3293	length = PRIV(strlen)(subject);
3294	was_zero_terminated = `1`;
3295	}
3296
3297	/ Plausibility checks /
3298
3299	if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != `0`) return PCRE2_ERROR_BADOPTION;
3300	if (re == NULL \|\| subject == NULL \|\| workspace == NULL \|\| match_data == NULL)
3301	return PCRE2_ERROR_NULL;
3302	if (wscount < `20`) return PCRE2_ERROR_DFA_WSSIZE;
3303	if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
3304
3305	/ Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same*
3306	time. /*
3307
3308	if ((options & (PCRE2_PARTIAL_HARD\|PCRE2_PARTIAL_SOFT)) != `0` &&
3309	((re->overall_options \| options) & PCRE2_ENDANCHORED) != `0`)
3310	return PCRE2_ERROR_BADOPTION;
3311
3312	/ Invalid UTF support is not available for DFA matching. /
3313
3314	if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != `0`)
3315	return PCRE2_ERROR_DFA_UINVALID_UTF;
3316
3317	/ Check that the first field in the block is the magic number. If it is not,*
3318	return with PCRE2_ERROR_BADMAGIC. /*
3319
3320	if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
3321
3322	/ Check the code unit width. /
3323
3324	if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/`8`)
3325	return PCRE2_ERROR_BADMODE;
3326
3327	/ PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the*
3328	options variable for this function. Users of PCRE2 who are not calling the
3329	function directly would like to have a way of setting these flags, in the same
3330	way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
3331	constructions like (NO_AUTOPOSSESS). To enable this, (NOTEMPTY) and
3332	(NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be*
3333	transferred to the options for this function. The bits are guaranteed to be
3334	adjacent, but do not have the same values. This bit of Boolean trickery assumes
3335	that the match-time bits are not more significant than the flag bits. If by
3336	accident this is not the case, a compile-time division by zero error will
3337	occur. /*
3338
3339	#define FF (PCRE2_NOTEMPTY_SET\|PCRE2_NE_ATST_SET)
3340	#define OO (PCRE2_NOTEMPTY\|PCRE2_NOTEMPTY_ATSTART)
3341	options \|= (re->flags & FF) / ((FF & (~FF+`1`)) / (OO & (~OO+`1`)));
3342	#undef FF
3343	#undef OO
3344
3345	/ If restarting after a partial match, do some sanity checks on the contents*
3346	of the workspace. /*
3347
3348	if ((options & PCRE2_DFA_RESTART) != `0`)
3349	{
3350	if ((workspace[`0`] & (-`2`)) != `0` \|\| workspace[`1`] < `1` \|\|
3351	workspace[`1`] > (int)((wscount - `2`)/INTS_PER_STATEBLOCK))
3352	return PCRE2_ERROR_DFA_BADRESTART;
3353	}
3354
3355	/ Set some local values /
3356
3357	utf = (re->overall_options & PCRE2_UTF) != `0`;
3358	start_match = subject + start_offset;
3359	end_subject = subject + length;
3360	req_cu_ptr = start_match - `1`;
3361	anchored = (options & (PCRE2_ANCHORED\|PCRE2_DFA_RESTART)) != `0` \|\|
3362	(re->overall_options & PCRE2_ANCHORED) != `0`;
3363
3364	/ The "must be at the start of a line" flags are used in a loop when finding*
3365	where to start. /*
3366
3367	startline = (re->flags & PCRE2_STARTLINE) != `0`;
3368	firstline = (re->overall_options & PCRE2_FIRSTLINE) != `0`;
3369	bumpalong_limit = end_subject;
3370
3371	/ Initialize and set up the fixed fields in the callout block, with a pointer*
3372	in the match block. /*
3373
3374	mb->cb = &cb;
3375	cb.version = `2`;
3376	cb.subject = subject;
3377	cb.subject_length = (PCRE2_SIZE)(end_subject - subject);
3378	cb.callout_flags = `0`;
3379	cb.capture_top = `1`; / No capture support /
3380	cb.capture_last = `0`;
3381	cb.mark = NULL; / No (MARK) support /*
3382
3383	/ Get data from the match context, if present, and fill in the remaining*
3384	fields in the match block. It is an error to set an offset limit without
3385	setting the flag at compile time. /*
3386
3387	if (mcontext == NULL)
3388	{
3389	mb->callout = NULL;
3390	mb->memctl = re->memctl;
3391	mb->match_limit = PRIV(default_match_context).match_limit;
3392	mb->match_limit_depth = PRIV(default_match_context).depth_limit;
3393	mb->heap_limit = PRIV(default_match_context).heap_limit;
3394	}
3395	else
3396	{
3397	if (mcontext->offset_limit != PCRE2_UNSET)
3398	{
3399	if ((re->overall_options & PCRE2_USE_OFFSET_LIMIT) == `0`)
3400	return PCRE2_ERROR_BADOFFSETLIMIT;
3401	bumpalong_limit = subject + mcontext->offset_limit;
3402	}
3403	mb->callout = mcontext->callout;
3404	mb->callout_data = mcontext->callout_data;
3405	mb->memctl = mcontext->memctl;
3406	mb->match_limit = mcontext->match_limit;
3407	mb->match_limit_depth = mcontext->depth_limit;
3408	mb->heap_limit = mcontext->heap_limit;
3409	}
3410
3411	if (mb->match_limit > re->limit_match)
3412	mb->match_limit = re->limit_match;
3413
3414	if (mb->match_limit_depth > re->limit_depth)
3415	mb->match_limit_depth = re->limit_depth;
3416
3417	if (mb->heap_limit > re->limit_heap)
3418	mb->heap_limit = re->limit_heap;
3419
3420	mb->start_code = (PCRE2_UCHAR )((uint8_t )re + sizeof(pcre2_real_code)) +
3421	re->name_count * re->name_entry_size;
3422	mb->tables = re->tables;
3423	mb->start_subject = subject;
3424	mb->end_subject = end_subject;
3425	mb->start_offset = start_offset;
3426	mb->allowemptypartial = (re->max_lookbehind > `0`) \|\|
3427	(re->flags & PCRE2_MATCH_EMPTY) != `0`;
3428	mb->moptions = options;
3429	mb->poptions = re->overall_options;
3430	mb->match_call_count = `0`;
3431	mb->heap_used = `0`;
3432
3433	/ Process the \R and newline settings. /
3434
3435	mb->bsr_convention = re->bsr_convention;
3436	mb->nltype = NLTYPE_FIXED;
3437	switch(re->newline_convention)
3438	{
3439	case PCRE2_NEWLINE_CR:
3440	mb->nllen = `1`;
3441	mb->nl[`0`] = CHAR_CR;
3442	break;
3443
3444	case PCRE2_NEWLINE_LF:
3445	mb->nllen = `1`;
3446	mb->nl[`0`] = CHAR_NL;
3447	break;
3448
3449	case PCRE2_NEWLINE_NUL:
3450	mb->nllen = `1`;
3451	mb->nl[`0`] = CHAR_NUL;
3452	break;
3453
3454	case PCRE2_NEWLINE_CRLF:
3455	mb->nllen = `2`;
3456	mb->nl[`0`] = CHAR_CR;
3457	mb->nl[`1`] = CHAR_NL;
3458	break;
3459
3460	case PCRE2_NEWLINE_ANY:
3461	mb->nltype = NLTYPE_ANY;
3462	break;
3463
3464	case PCRE2_NEWLINE_ANYCRLF:
3465	mb->nltype = NLTYPE_ANYCRLF;
3466	break;
3467
3468	default: return PCRE2_ERROR_INTERNAL;
3469	}
3470
3471	/ Check a UTF string for validity if required. For 8-bit and 16-bit strings,*
3472	we must also check that a starting offset does not point into the middle of a
3473	multiunit character. We check only the portion of the subject that is going to
3474	be inspected during matching - from the offset minus the maximum back reference
3475	to the given length. This saves time when a small part of a large subject is
3476	being matched by the use of a starting offset. Note that the maximum lookbehind
3477	is a number of characters, not code units. /*
3478
3479	#ifdef SUPPORT_UNICODE
3480	if (utf && (options & PCRE2_NO_UTF_CHECK) == `0`)
3481	{
3482	PCRE2_SPTR check_subject = start_match; / start_match includes offset /
3483
3484	if (start_offset > `0`)
3485	{
3486	#if PCRE2_CODE_UNIT_WIDTH != 32
3487	unsigned int i;
3488	if (start_match < end_subject && NOT_FIRSTCU(*start_match))
3489	return PCRE2_ERROR_BADUTFOFFSET;
3490	for (i = re->max_lookbehind; i > `0` && check_subject > subject; i--)
3491	{
3492	check_subject--;
3493	while (check_subject > subject &&
3494	#if PCRE2_CODE_UNIT_WIDTH == 8
3495	(*check_subject & `0xc0`) == `0x80`)
3496	#else /* 16-bit */
3497	(*check_subject & `0xfc00`) == `0xdc00`)
3498	#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
3499	check_subject--;
3500	}
3501	#else /* In the 32-bit library, one code unit equals one character. */
3502	check_subject -= re->max_lookbehind;
3503	if (check_subject < subject) check_subject = subject;
3504	#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
3505	}
3506
3507	/ Validate the relevant portion of the subject. After an error, adjust the*
3508	offset to be an absolute offset in the whole string. /*
3509
3510	match_data->rc = PRIV(valid_utf)(check_subject,
3511	length - (PCRE2_SIZE)(check_subject - subject), &(match_data->startchar));
3512	if (match_data->rc != `0`)
3513	{
3514	match_data->startchar += (PCRE2_SIZE)(check_subject - subject);
3515	return match_data->rc;
3516	}
3517	}
3518	#endif /* SUPPORT_UNICODE */
3519
3520	/ Set up the first code unit to match, if available. If there's no first code*
3521	unit there may be a bitmap of possible first characters. /*
3522
3523	if ((re->flags & PCRE2_FIRSTSET) != `0`)
3524	{
3525	has_first_cu = TRUE;
3526	first_cu = first_cu2 = (PCRE2_UCHAR)(re->first_codeunit);
3527	if ((re->flags & PCRE2_FIRSTCASELESS) != `0`)
3528	{
3529	first_cu2 = TABLE_GET(first_cu, mb->tables + fcc_offset, first_cu);
3530	#ifdef SUPPORT_UNICODE
3531	#if PCRE2_CODE_UNIT_WIDTH == 8
3532	if (first_cu > `127` && !utf && (re->overall_options & PCRE2_UCP) != `0`)
3533	first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
3534	#else
3535	if (first_cu > `127` && (utf \|\| (re->overall_options & PCRE2_UCP) != `0`))
3536	first_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(first_cu);
3537	#endif
3538	#endif /* SUPPORT_UNICODE */
3539	}
3540	}
3541	else
3542	if (!startline && (re->flags & PCRE2_FIRSTMAPSET) != `0`)
3543	start_bits = re->start_bitmap;
3544
3545	/ There may be a "last known required code unit" set. /
3546
3547	if ((re->flags & PCRE2_LASTSET) != `0`)
3548	{
3549	has_req_cu = TRUE;
3550	req_cu = req_cu2 = (PCRE2_UCHAR)(re->last_codeunit);
3551	if ((re->flags & PCRE2_LASTCASELESS) != `0`)
3552	{
3553	req_cu2 = TABLE_GET(req_cu, mb->tables + fcc_offset, req_cu);
3554	#ifdef SUPPORT_UNICODE
3555	#if PCRE2_CODE_UNIT_WIDTH == 8
3556	if (req_cu > `127` && !utf && (re->overall_options & PCRE2_UCP) != `0`)
3557	req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
3558	#else
3559	if (req_cu > `127` && (utf \|\| (re->overall_options & PCRE2_UCP) != `0`))
3560	req_cu2 = (PCRE2_UCHAR)UCD_OTHERCASE(req_cu);
3561	#endif
3562	#endif /* SUPPORT_UNICODE */
3563	}
3564	}
3565
3566	/ If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,*
3567	free the memory that was obtained. /*
3568
3569	if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != `0`)
3570	{
3571	match_data->memctl.free((void *)match_data->subject,
3572	match_data->memctl.memory_data);
3573	match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
3574	}
3575
3576	/ Fill in fields that are always returned in the match data. /
3577
3578	match_data->code = re;
3579	match_data->subject = NULL; / Default for no match /
3580	match_data->mark = NULL;
3581	match_data->matchedby = PCRE2_MATCHEDBY_DFA_INTERPRETER;
3582
3583	/ Call the main matching function, looping for a non-anchored regex after a*
3584	failed match. If not restarting, perform certain optimizations at the start of
3585	a match. /*
3586
3587	for (;;)
3588	{
3589	/ ----------------- Start of match optimizations ---------------- /
3590
3591	/ There are some optimizations that avoid running the match if a known*
3592	starting point is not found, or if a known later code unit is not present.
3593	However, there is an option (settable at compile time) that disables
3594	these, for testing and for ensuring that all callouts do actually occur.
3595	The optimizations must also be avoided when restarting a DFA match. /*
3596
3597	if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == `0` &&
3598	(options & PCRE2_DFA_RESTART) == `0`)
3599	{
3600	/ If firstline is TRUE, the start of the match is constrained to the first*
3601	line of a multiline string. That is, the match must be before or at the
3602	first newline following the start of matching. Temporarily adjust
3603	end_subject so that we stop the optimization scans for a first code unit
3604	immediately after the first character of a newline (the first code unit can
3605	legitimately be a newline). If the match fails at the newline, later code
3606	breaks this loop. /*
3607
3608	if (firstline)
3609	{
3610	PCRE2_SPTR t = start_match;
3611	#ifdef SUPPORT_UNICODE
3612	if (utf)
3613	{
3614	while (t < end_subject && !IS_NEWLINE(t))
3615	{
3616	t++;
3617	ACROSSCHAR(t < end_subject, t, t++);
3618	}
3619	}
3620	else
3621	#endif
3622	while (t < end_subject && !IS_NEWLINE(t)) t++;
3623	end_subject = t;
3624	}
3625
3626	/ Anchored: check the first code unit if one is recorded. This may seem*
3627	pointless but it can help in detecting a no match case without scanning for
3628	the required code unit. /*
3629
3630	if (anchored)
3631	{
3632	if (has_first_cu \|\| start_bits != NULL)
3633	{
3634	BOOL ok = start_match < end_subject;
3635	if (ok)
3636	{
3637	PCRE2_UCHAR c = UCHAR21TEST(start_match);
3638	ok = has_first_cu && (c == first_cu \|\| c == first_cu2);
3639	if (!ok && start_bits != NULL)
3640	{
3641	#if PCRE2_CODE_UNIT_WIDTH != 8
3642	if (c > `255`) c = `255`;
3643	#endif
3644	ok = (start_bits[c/`8`] & (`1u` << (c&`7`))) != `0`;
3645	}
3646	}
3647	if (!ok) break;
3648	}
3649	}
3650
3651	/ Not anchored. Advance to a unique first code unit if there is one. In*
3652	8-bit mode, the use of memchr() gives a big speed up, even though we have
3653	to call it twice in caseless mode, in order to find the earliest occurrence
3654	of the character in either of its cases. If a call to memchr() that
3655	searches the rest of the subject fails to find one case, remember that in
3656	order not to keep on repeating the search. This can make a huge difference
3657	when the strings are very long and only one case is present. /*
3658
3659	else
3660	{
3661	if (has_first_cu)
3662	{
3663	if (first_cu != first_cu2) / Caseless /
3664	{
3665	#if PCRE2_CODE_UNIT_WIDTH != 8
3666	PCRE2_UCHAR smc;
3667	while (start_match < end_subject &&
3668	(smc = UCHAR21TEST(start_match)) != first_cu &&
3669	smc != first_cu2)
3670	start_match++;
3671
3672	#else /* 8-bit code units */
3673	PCRE2_SPTR pp1 = NULL;
3674	PCRE2_SPTR pp2 = NULL;
3675	PCRE2_SIZE cu2size = end_subject - start_match;
3676
3677	if (!memchr_not_found_first_cu)
3678	{
3679	pp1 = memchr(start_match, first_cu, end_subject - start_match);
3680	if (pp1 == NULL) memchr_not_found_first_cu = TRUE;
3681	else cu2size = pp1 - start_match;
3682	}
3683
3684	/ If pp1 is not NULL, we have arranged to search only as far as pp1,*
3685	to see if the other case is earlier, so we can set "not found" only
3686	when both searches have returned NULL. /*
3687
3688	if (!memchr_not_found_first_cu2)
3689	{
3690	pp2 = memchr(start_match, first_cu2, cu2size);
3691	memchr_not_found_first_cu2 = (pp2 == NULL && pp1 == NULL);
3692	}
3693
3694	if (pp1 == NULL)
3695	start_match = (pp2 == NULL)? end_subject : pp2;
3696	else
3697	start_match = (pp2 == NULL \|\| pp1 < pp2)? pp1 : pp2;
3698	#endif
3699	}
3700
3701	/ The caseful case /
3702
3703	else
3704	{
3705	#if PCRE2_CODE_UNIT_WIDTH != 8
3706	while (start_match < end_subject && UCHAR21TEST(start_match) !=
3707	first_cu)
3708	start_match++;
3709	#else /* 8-bit code units */
3710	start_match = memchr(start_match, first_cu, end_subject - start_match);
3711	if (start_match == NULL) start_match = end_subject;
3712	#endif
3713	}
3714
3715	/ If we can't find the required code unit, having reached the true end*
3716	of the subject, break the bumpalong loop, to force a match failure,
3717	except when doing partial matching, when we let the next cycle run at
3718	the end of the subject. To see why, consider the pattern /(?<=abc)def/,
3719	which partially matches "abc", even though the string does not contain
3720	the starting character "d". If we have not reached the true end of the
3721	subject (PCRE2_FIRSTLINE caused end_subject to be temporarily modified)
3722	we also let the cycle run, because the matching string is legitimately
3723	allowed to start with the first code unit of a newline. /*
3724
3725	if ((mb->moptions & (PCRE2_PARTIAL_HARD\|PCRE2_PARTIAL_SOFT)) == `0` &&
3726	start_match >= mb->end_subject)
3727	break;
3728	}
3729
3730	/ If there's no first code unit, advance to just after a linebreak for a*
3731	multiline match if required. /*
3732
3733	else if (startline)
3734	{
3735	if (start_match > mb->start_subject + start_offset)
3736	{
3737	#ifdef SUPPORT_UNICODE
3738	if (utf)
3739	{
3740	while (start_match < end_subject && !WAS_NEWLINE(start_match))
3741	{
3742	start_match++;
3743	ACROSSCHAR(start_match < end_subject, start_match, start_match++);
3744	}
3745	}
3746	else
3747	#endif
3748	while (start_match < end_subject && !WAS_NEWLINE(start_match))
3749	start_match++;
3750
3751	/ If we have just passed a CR and the newline option is ANY or*
3752	ANYCRLF, and we are now at a LF, advance the match position by one
3753	more code unit. /*
3754
3755	if (start_match[-`1`] == CHAR_CR &&
3756	(mb->nltype == NLTYPE_ANY \|\| mb->nltype == NLTYPE_ANYCRLF) &&
3757	start_match < end_subject &&
3758	UCHAR21TEST(start_match) == CHAR_NL)
3759	start_match++;
3760	}
3761	}
3762
3763	/ If there's no first code unit or a requirement for a multiline line*
3764	start, advance to a non-unique first code unit if any have been
3765	identified. The bitmap contains only 256 bits. When code units are 16 or
3766	32 bits wide, all code units greater than 254 set the 255 bit. /*
3767
3768	else if (start_bits != NULL)
3769	{
3770	while (start_match < end_subject)
3771	{
3772	uint32_t c = UCHAR21TEST(start_match);
3773	#if PCRE2_CODE_UNIT_WIDTH != 8
3774	if (c > `255`) c = `255`;
3775	#endif
3776	if ((start_bits[c/`8`] & (`1u` << (c&`7`))) != `0`) break;
3777	start_match++;
3778	}
3779
3780	/ See comment above in first_cu checking about the next line. /
3781
3782	if ((mb->moptions & (PCRE2_PARTIAL_HARD\|PCRE2_PARTIAL_SOFT)) == `0` &&
3783	start_match >= mb->end_subject)
3784	break;
3785	}
3786	} / End of first code unit handling /
3787
3788	/ Restore fudged end_subject /
3789
3790	end_subject = mb->end_subject;
3791
3792	/ The following two optimizations are disabled for partial matching. /
3793
3794	if ((mb->moptions & (PCRE2_PARTIAL_HARD\|PCRE2_PARTIAL_SOFT)) == `0`)
3795	{
3796	PCRE2_SPTR p;
3797
3798	/ The minimum matching length is a lower bound; no actual string of that*
3799	length may actually match the pattern. Although the value is, strictly,
3800	in characters, we treat it as code units to avoid spending too much time
3801	in this optimization. /*
3802
3803	if (end_subject - start_match < re->minlength) goto NOMATCH_EXIT;
3804
3805	/ If req_cu is set, we know that that code unit must appear in the*
3806	subject for the match to succeed. If the first code unit is set, req_cu
3807	must be later in the subject; otherwise the test starts at the match
3808	point. This optimization can save a huge amount of backtracking in
3809	patterns with nested unlimited repeats that aren't going to match.
3810	Writing separate code for cased/caseless versions makes it go faster, as
3811	does using an autoincrement and backing off on a match. As in the case of
3812	the first code unit, using memchr() in the 8-bit library gives a big
3813	speed up. Unlike the first_cu check above, we do not need to call
3814	memchr() twice in the caseless case because we only need to check for the
3815	presence of the character in either case, not find the first occurrence.
3816
3817	The search can be skipped if the code unit was found later than the
3818	current starting point in a previous iteration of the bumpalong loop.
3819
3820	HOWEVER: when the subject string is very, very long, searching to its end
3821	can take a long time, and give bad performance on quite ordinary
3822	patterns. This showed up when somebody was matching something like
3823	/^\d+C/ on a 32-megabyte string... so we don't do this when the string is
3824	sufficiently long, but it's worth searching a lot more for unanchored
3825	patterns. /*
3826
3827	p = start_match + (has_first_cu? `1`:`0`);
3828	if (has_req_cu && p > req_cu_ptr)
3829	{
3830	PCRE2_SIZE check_length = end_subject - start_match;
3831
3832	if (check_length < REQ_CU_MAX \|\|
3833	(!anchored && check_length < REQ_CU_MAX * `1000`))
3834	{
3835	if (req_cu != req_cu2) / Caseless /
3836	{
3837	#if PCRE2_CODE_UNIT_WIDTH != 8
3838	while (p < end_subject)
3839	{
3840	uint32_t pp = UCHAR21INCTEST(p);
3841	if (pp == req_cu \|\| pp == req_cu2) { p--; break; }
3842	}
3843	#else /* 8-bit code units */
3844	PCRE2_SPTR pp = p;
3845	p = memchr(pp, req_cu, end_subject - pp);
3846	if (p == NULL)
3847	{
3848	p = memchr(pp, req_cu2, end_subject - pp);
3849	if (p == NULL) p = end_subject;
3850	}
3851	#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
3852	}
3853
3854	/ The caseful case /
3855
3856	else
3857	{
3858	#if PCRE2_CODE_UNIT_WIDTH != 8
3859	while (p < end_subject)
3860	{
3861	if (UCHAR21INCTEST(p) == req_cu) { p--; break; }
3862	}
3863
3864	#else /* 8-bit code units */
3865	p = memchr(p, req_cu, end_subject - p);
3866	if (p == NULL) p = end_subject;
3867	#endif
3868	}
3869
3870	/ If we can't find the required code unit, break the matching loop,*
3871	forcing a match failure. /*
3872
3873	if (p >= end_subject) break;
3874
3875	/ If we have found the required code unit, save the point where we*
3876	found it, so that we don't search again next time round the loop if
3877	the start hasn't passed this code unit yet. /*
3878
3879	req_cu_ptr = p;
3880	}
3881	}
3882	}
3883	}
3884
3885	/ ------------ End of start of match optimizations ------------ /
3886
3887	/ Give no match if we have passed the bumpalong limit. /
3888
3889	if (start_match > bumpalong_limit) break;
3890
3891	/ OK, now we can do the business /
3892
3893	mb->start_used_ptr = start_match;
3894	mb->last_used_ptr = start_match;
3895	mb->recursive = NULL;
3896
3897	rc = internal_dfa_match(
3898	mb, / fixed match data /
3899	mb->start_code, / this subexpression's code /
3900	start_match, / where we currently are /
3901	start_offset, / start offset in subject /
3902	match_data->ovector, / offset vector /
3903	(uint32_t)match_data->oveccount * `2`, / actual size of same /
3904	workspace, / workspace vector /
3905	(int)wscount, / size of same /
3906	`0`, / function recurse level /
3907	base_recursion_workspace); / initial workspace for recursion /
3908
3909	/ Anything other than "no match" means we are done, always; otherwise, carry*
3910	on only if not anchored. /*
3911
3912	if (rc != PCRE2_ERROR_NOMATCH \|\| anchored)
3913	{
3914	if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > `0`)
3915	{
3916	match_data->ovector[`0`] = (PCRE2_SIZE)(start_match - subject);
3917	match_data->ovector[`1`] = (PCRE2_SIZE)(end_subject - subject);
3918	}
3919	match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject);
3920	match_data->rightchar = (PCRE2_SIZE)( mb->last_used_ptr - subject);
3921	match_data->startchar = (PCRE2_SIZE)(start_match - subject);
3922	match_data->rc = rc;
3923
3924	if (rc >= `0` &&(options & PCRE2_COPY_MATCHED_SUBJECT) != `0`)
3925	{
3926	length = CU2BYTES(length + was_zero_terminated);
3927	match_data->subject = match_data->memctl.malloc(length,
3928	match_data->memctl.memory_data);
3929	if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
3930	memcpy((void *)match_data->subject, subject, length);
3931	match_data->flags \|= PCRE2_MD_COPIED_SUBJECT;
3932	}
3933	else
3934	{
3935	if (rc >= `0` \|\| rc == PCRE2_ERROR_PARTIAL) match_data->subject = subject;
3936	}
3937	goto EXIT;
3938	}
3939
3940	/ Advance to the next subject character unless we are at the end of a line*
3941	and firstline is set. /*
3942
3943	if (firstline && IS_NEWLINE(start_match)) break;
3944	start_match++;
3945	#ifdef SUPPORT_UNICODE
3946	if (utf)
3947	{
3948	ACROSSCHAR(start_match < end_subject, start_match, start_match++);
3949	}
3950	#endif
3951	if (start_match > end_subject) break;
3952
3953	/ If we have just passed a CR and we are now at a LF, and the pattern does*
3954	not contain any explicit matches for \r or \n, and the newline option is CRLF
3955	or ANY or ANYCRLF, advance the match position by one more character. /*
3956
3957	if (UCHAR21TEST(start_match - `1`) == CHAR_CR &&
3958	start_match < end_subject &&
3959	UCHAR21TEST(start_match) == CHAR_NL &&
3960	(re->flags & PCRE2_HASCRORLF) == `0` &&
3961	(mb->nltype == NLTYPE_ANY \|\|
3962	mb->nltype == NLTYPE_ANYCRLF \|\|
3963	mb->nllen == `2`))
3964	start_match++;
3965
3966	} / "Bumpalong" loop /
3967
3968	NOMATCH_EXIT:
3969	rc = PCRE2_ERROR_NOMATCH;
3970
3971	EXIT:
3972	while (rws->next != NULL)
3973	{
3974	RWS_anchor *next = rws->next;
3975	rws->next = next->next;
3976	mb->memctl.free(next, mb->memctl.memory_data);
3977	}
3978
3979	return rc;
3980	}
3981
3982	/ End of pcre2_dfa_match.c /
3983

Browse the source code of Qt/src/3rdparty/pcre2/src/pcre2_dfa_match.c