pcre2_auto_possess.c source code [Godot/thirdparty/pcre2/src/pcre2_auto_possess.c]

1	/*************************************************
2	* Perl-Compatible Regular Expressions *
3	*************************************************/
4
5	/ PCRE is a library of functions to support regular expressions whose syntax*
6	and semantics are as close as possible to those of the Perl 5 language.
7
8	Written by Philip Hazel
9	Original API code Copyright (c) 1997-2012 University of Cambridge
10	New API code Copyright (c) 2016-2022 University of Cambridge
11
12	-----------------------------------------------------------------------------
13	Redistribution and use in source and binary forms, with or without
14	modification, are permitted provided that the following conditions are met:
15
16	* Redistributions of source code must retain the above copyright notice,
17	this list of conditions and the following disclaimer.
18
19	* Redistributions in binary form must reproduce the above copyright
20	notice, this list of conditions and the following disclaimer in the
21	documentation and/or other materials provided with the distribution.
22
23	* Neither the name of the University of Cambridge nor the names of its
24	contributors may be used to endorse or promote products derived from
25	this software without specific prior written permission.
26
27	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31	LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35	CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37	POSSIBILITY OF SUCH DAMAGE.
38	-----------------------------------------------------------------------------
39	*/
40
41	/ This module contains functions that scan a compiled pattern and change*
42	repeats into possessive repeats where possible. /*
43
44
45	#ifdef HAVE_CONFIG_H
46	#include "config.h"
47	#endif
48
49
50	#include "pcre2_internal.h"
51
52
53	/*************************************************
54	* Tables for auto-possessification *
55	*************************************************/
56
57	/ This table is used to check whether auto-possessification is possible*
58	between adjacent character-type opcodes. The left-hand (repeated) opcode is
59	used to select the row, and the right-hand opcode is use to select the column.
60	A value of 1 means that auto-possessification is OK. For example, the second
61	value in the first row means that \D+\d can be turned into \D++\d.
62
63	The Unicode property types (\P and \p) have to be present to fill out the table
64	because of what their opcode values are, but the table values should always be
65	zero because property types are handled separately in the code. The last four
66	columns apply to items that cannot be repeated, so there is no need to have
67	rows for them. Note that OP_DIGIT etc. are generated only when PCRE_UCP is
68	not set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
69
70	#define APTROWS (LAST_AUTOTAB_LEFT_OP - FIRST_AUTOTAB_OP + 1)
71	#define APTCOLS (LAST_AUTOTAB_RIGHT_OP - FIRST_AUTOTAB_OP + 1)
72
73	static const uint8_t autoposstab[APTROWS][APTCOLS] = {
74	/ \D \d \S \s \W \w . .+ \C \P \p \R \H \h \V \v \X \Z \z $ $M /
75	{ `0`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `0` }, / \D /
76	{ `1`, `0`, `0`, `1`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `1`, `1`, `1` }, / \d /
77	{ `0`, `0`, `0`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `1`, `1`, `1` }, / \S /
78	{ `0`, `1`, `1`, `0`, `0`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `0` }, / \s /
79	{ `0`, `1`, `0`, `0`, `0`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `0` }, / \W /
80	{ `0`, `0`, `0`, `1`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `1`, `0`, `1`, `0`, `1`, `1`, `1`, `1` }, / \w /
81	{ `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `0` }, / . /
82	{ `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `0` }, / .+ /
83	{ `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `0` }, / \C /
84	{ `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` }, / \P /
85	{ `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` }, / \p /
86	{ `0`, `1`, `0`, `1`, `0`, `1`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `0`, `0`, `0`, `1`, `0`, `0` }, / \R /
87	{ `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `0`, `0`, `0`, `1`, `0`, `0` }, / \H /
88	{ `0`, `1`, `1`, `0`, `0`, `1`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `0`, `0`, `1`, `0`, `0`, `1`, `0`, `0` }, / \h /
89	{ `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `0`, `0`, `1`, `0`, `0`, `1`, `0`, `0` }, / \V /
90	{ `0`, `1`, `1`, `0`, `0`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `0`, `0`, `0`, `1`, `0`, `0` }, / \v /
91	{ `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `0`, `0` } / \X /
92	};
93
94	#ifdef SUPPORT_UNICODE
95	/ This table is used to check whether auto-possessification is possible*
96	between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP). The
97	left-hand (repeated) opcode is used to select the row, and the right-hand
98	opcode is used to select the column. The values are as follows:
99
100	0 Always return FALSE (never auto-possessify)
101	1 Character groups are distinct (possessify if both are OP_PROP)
102	2 Check character categories in the same group (general or particular)
103	3 TRUE if the two opcodes are not the same (PROP vs NOTPROP)
104
105	4 Check left general category vs right particular category
106	5 Check right general category vs left particular category
107
108	6 Left alphanum vs right general category
109	7 Left space vs right general category
110	8 Left word vs right general category
111
112	9 Right alphanum vs left general category
113	10 Right space vs left general category
114	11 Right word vs left general category
115
116	12 Left alphanum vs right particular category
117	13 Left space vs right particular category
118	14 Left word vs right particular category
119
120	15 Right alphanum vs left particular category
121	16 Right space vs left particular category
122	17 Right word vs left particular category
123	*/
124
125	static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = {
126	/ ANY LAMP GC PC SC SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL /
127	{ `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` }, / PT_ANY /
128	{ `0`, `3`, `0`, `0`, `0`, `0`, `3`, `1`, `1`, `0`, `0`, `0`, `0`, `0` }, / PT_LAMP /
129	{ `0`, `0`, `2`, `4`, `0`, `0`, `9`, `10`, `10`, `11`, `0`, `0`, `0`, `0` }, / PT_GC /
130	{ `0`, `0`, `5`, `2`, `0`, `0`, `15`, `16`, `16`, `17`, `0`, `0`, `0`, `0` }, / PT_PC /
131	{ `0`, `0`, `0`, `0`, `2`, `2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` }, / PT_SC /
132	{ `0`, `0`, `0`, `0`, `2`, `2`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` }, / PT_SCX /
133	{ `0`, `3`, `6`, `12`, `0`, `0`, `3`, `1`, `1`, `0`, `0`, `0`, `0`, `0` }, / PT_ALNUM /
134	{ `0`, `1`, `7`, `13`, `0`, `0`, `1`, `3`, `3`, `1`, `0`, `0`, `0`, `0` }, / PT_SPACE /
135	{ `0`, `1`, `7`, `13`, `0`, `0`, `1`, `3`, `3`, `1`, `0`, `0`, `0`, `0` }, / PT_PXSPACE /
136	{ `0`, `0`, `8`, `14`, `0`, `0`, `0`, `1`, `1`, `3`, `0`, `0`, `0`, `0` }, / PT_WORD /
137	{ `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` }, / PT_CLIST /
138	{ `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `3`, `0`, `0` }, / PT_UCNC /
139	{ `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` }, / PT_BIDICL /
140	{ `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` } / PT_BOOL /
141	};
142
143	/ This table is used to check whether auto-possessification is possible*
144	between adjacent Unicode property opcodes (OP_PROP and OP_NOTPROP) when one
145	specifies a general category and the other specifies a particular category. The
146	row is selected by the general category and the column by the particular
147	category. The value is 1 if the particular category is not part of the general
148	category. /*
149
150	static const uint8_t catposstab[`7`][`30`] = {
151	/ Cc Cf Cn Co Cs Ll Lm Lo Lt Lu Mc Me Mn Nd Nl No Pc Pd Pe Pf Pi Po Ps Sc Sk Sm So Zl Zp Zs /
152	{ `0`, `0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` }, / C /
153	{ `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` }, / L /
154	{ `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` }, / M /
155	{ `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1` }, / N /
156	{ `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `1`, `1`, `1`, `1`, `1`, `1`, `1` }, / P /
157	{ `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0`, `0`, `1`, `1`, `1` }, / S /
158	{ `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `0`, `0`, `0` } / Z /
159	};
160
161	/ This table is used when checking ALNUM, (PX)SPACE, SPACE, and WORD against*
162	a general or particular category. The properties in each row are those
163	that apply to the character set in question. Duplication means that a little
164	unnecessary work is done when checking, but this keeps things much simpler
165	because they can all use the same code. For more details see the comment where
166	this table is used.
167
168	Note: SPACE and PXSPACE used to be different because Perl excluded VT from
169	"space", but from Perl 5.18 it's included, so both categories are treated the
170	same here. /*
171
172	static const uint8_t posspropstab[`3`][`4`] = {
173	{ ucp_L, ucp_N, ucp_N, ucp_Nl }, / ALNUM, 3rd and 4th values redundant /
174	{ ucp_Z, ucp_Z, ucp_C, ucp_Cc }, / SPACE and PXSPACE, 2nd value redundant /
175	{ ucp_L, ucp_N, ucp_P, ucp_Po } / WORD /
176	};
177	#endif /* SUPPORT_UNICODE */
178
179
180
181	#ifdef SUPPORT_UNICODE
182	/*************************************************
183	* Check a character and a property *
184	*************************************************/
185
186	/ This function is called by compare_opcodes() when a property item is*
187	adjacent to a fixed character.
188
189	Arguments:
190	c the character
191	ptype the property type
192	pdata the data for the type
193	negated TRUE if it's a negated property (\P or \p{^)
194
195	Returns: TRUE if auto-possessifying is OK
196	*/
197
198	static BOOL
199	check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata,
200	BOOL negated)
201	{
202	BOOL ok;
203	const uint32_t *p;
204	const ucd_record *prop = GET_UCD(c);
205
206	switch(ptype)
207	{
208	case PT_LAMP:
209	return (prop->chartype == ucp_Lu \|\|
210	prop->chartype == ucp_Ll \|\|
211	prop->chartype == ucp_Lt) == negated;
212
213	case PT_GC:
214	return (pdata == PRIV(ucp_gentype)[prop->chartype]) == negated;
215
216	case PT_PC:
217	return (pdata == prop->chartype) == negated;
218
219	case PT_SC:
220	return (pdata == prop->script) == negated;
221
222	case PT_SCX:
223	ok = (pdata == prop->script
224	\|\| MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != `0`);
225	return ok == negated;
226
227	/ These are specials /
228
229	case PT_ALNUM:
230	return (PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
231	PRIV(ucp_gentype)[prop->chartype] == ucp_N) == negated;
232
233	/ Perl space used to exclude VT, but from Perl 5.18 it is included, which*
234	means that Perl space and POSIX space are now identical. PCRE was changed
235	at release 8.34. /*
236
237	case PT_SPACE: / Perl space /
238	case PT_PXSPACE: / POSIX space /
239	switch(c)
240	{
241	HSPACE_CASES:
242	VSPACE_CASES:
243	return negated;
244
245	default:
246	return (PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == negated;
247	}
248	break; / Control never reaches here /
249
250	case PT_WORD:
251	return (PRIV(ucp_gentype)[prop->chartype] == ucp_L \|\|
252	PRIV(ucp_gentype)[prop->chartype] == ucp_N \|\|
253	c == CHAR_UNDERSCORE) == negated;
254
255	case PT_CLIST:
256	p = PRIV(ucd_caseless_sets) + prop->caseset;
257	for (;;)
258	{
259	if (c < p) return* !negated;
260	if (c == p++) return* negated;
261	}
262	break; / Control never reaches here /
263
264	/ Haven't yet thought these through. /
265
266	case PT_BIDICL:
267	return FALSE;
268
269	case PT_BOOL:
270	return FALSE;
271	}
272
273	return FALSE;
274	}
275	#endif /* SUPPORT_UNICODE */
276
277
278
279	/*************************************************
280	* Base opcode of repeated opcodes *
281	*************************************************/
282
283	/ Returns the base opcode for repeated single character type opcodes. If the*
284	opcode is not a repeated character type, it returns with the original value.
285
286	Arguments: c opcode
287	Returns: base opcode for the type
288	*/
289
290	static PCRE2_UCHAR
291	get_repeat_base(PCRE2_UCHAR c)
292	{
293	return (c > OP_TYPEPOSUPTO)? c :
294	(c >= OP_TYPESTAR)? OP_TYPESTAR :
295	(c >= OP_NOTSTARI)? OP_NOTSTARI :
296	(c >= OP_NOTSTAR)? OP_NOTSTAR :
297	(c >= OP_STARI)? OP_STARI :
298	OP_STAR;
299	}
300
301
302	/*************************************************
303	* Fill the character property list *
304	*************************************************/
305
306	/ Checks whether the code points to an opcode that can take part in auto-*
307	possessification, and if so, fills a list with its properties.
308
309	Arguments:
310	code points to start of expression
311	utf TRUE if in UTF mode
312	ucp TRUE if in UCP mode
313	fcc points to the case-flipping table
314	list points to output list
315	list[0] will be filled with the opcode
316	list[1] will be non-zero if this opcode
317	can match an empty character string
318	list[2..7] depends on the opcode
319
320	Returns: points to the start of the next opcode if code is accepted*
321	NULL if code is not accepted*
322	*/
323
324	static PCRE2_SPTR
325	get_chr_property_list(PCRE2_SPTR code, BOOL utf, BOOL ucp, const uint8_t *fcc,
326	uint32_t *list)
327	{
328	PCRE2_UCHAR c = *code;
329	PCRE2_UCHAR base;
330	PCRE2_SPTR end;
331	uint32_t chr;
332
333	#ifdef SUPPORT_UNICODE
334	uint32_t *clist_dest;
335	const uint32_t *clist_src;
336	#else
337	(void)utf; / Suppress "unused parameter" compiler warnings /
338	(void)ucp;
339	#endif
340
341	list[`0`] = c;
342	list[`1`] = FALSE;
343	code++;
344
345	if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
346	{
347	base = get_repeat_base(c);
348	c -= (base - OP_STAR);
349
350	if (c == OP_UPTO \|\| c == OP_MINUPTO \|\| c == OP_EXACT \|\| c == OP_POSUPTO)
351	code += IMM2_SIZE;
352
353	list[`1`] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
354	c != OP_POSPLUS);
355
356	switch(base)
357	{
358	case OP_STAR:
359	list[`0`] = OP_CHAR;
360	break;
361
362	case OP_STARI:
363	list[`0`] = OP_CHARI;
364	break;
365
366	case OP_NOTSTAR:
367	list[`0`] = OP_NOT;
368	break;
369
370	case OP_NOTSTARI:
371	list[`0`] = OP_NOTI;
372	break;
373
374	case OP_TYPESTAR:
375	list[`0`] = *code;
376	code++;
377	break;
378	}
379	c = list[`0`];
380	}
381
382	switch(c)
383	{
384	case OP_NOT_DIGIT:
385	case OP_DIGIT:
386	case OP_NOT_WHITESPACE:
387	case OP_WHITESPACE:
388	case OP_NOT_WORDCHAR:
389	case OP_WORDCHAR:
390	case OP_ANY:
391	case OP_ALLANY:
392	case OP_ANYNL:
393	case OP_NOT_HSPACE:
394	case OP_HSPACE:
395	case OP_NOT_VSPACE:
396	case OP_VSPACE:
397	case OP_EXTUNI:
398	case OP_EODN:
399	case OP_EOD:
400	case OP_DOLL:
401	case OP_DOLLM:
402	return code;
403
404	case OP_CHAR:
405	case OP_NOT:
406	GETCHARINCTEST(chr, code);
407	list[`2`] = chr;
408	list[`3`] = NOTACHAR;
409	return code;
410
411	case OP_CHARI:
412	case OP_NOTI:
413	list[`0`] = (c == OP_CHARI) ? OP_CHAR : OP_NOT;
414	GETCHARINCTEST(chr, code);
415	list[`2`] = chr;
416
417	#ifdef SUPPORT_UNICODE
418	if (chr < `128` \|\| (chr < `256` && !utf && !ucp))
419	list[`3`] = fcc[chr];
420	else
421	list[`3`] = UCD_OTHERCASE(chr);
422	#elif defined SUPPORT_WIDE_CHARS
423	list[`3`] = (chr < `256`) ? fcc[chr] : chr;
424	#else
425	list[`3`] = fcc[chr];
426	#endif
427
428	/ The othercase might be the same value. /
429
430	if (chr == list[`3`])
431	list[`3`] = NOTACHAR;
432	else
433	list[`4`] = NOTACHAR;
434	return code;
435
436	#ifdef SUPPORT_UNICODE
437	case OP_PROP:
438	case OP_NOTPROP:
439	if (code[`0`] != PT_CLIST)
440	{
441	list[`2`] = code[`0`];
442	list[`3`] = code[`1`];
443	return code + `2`;
444	}
445
446	/ Convert only if we have enough space. /
447
448	clist_src = PRIV(ucd_caseless_sets) + code[`1`];
449	clist_dest = list + `2`;
450	code += `2`;
451
452	do {
453	if (clist_dest >= list + `8`)
454	{
455	/ Early return if there is not enough space. This should never*
456	happen, since all clists are shorter than 5 character now. /*
457	list[`2`] = code[`0`];
458	list[`3`] = code[`1`];
459	return code;
460	}
461	clist_dest++ = clist_src;
462	}
463	while(*clist_src++ != NOTACHAR);
464
465	/ All characters are stored. The terminating NOTACHAR is copied from the*
466	clist itself. /*
467
468	list[`0`] = (c == OP_PROP) ? OP_CHAR : OP_NOT;
469	return code;
470	#endif
471
472	case OP_NCLASS:
473	case OP_CLASS:
474	#ifdef SUPPORT_WIDE_CHARS
475	case OP_XCLASS:
476	if (c == OP_XCLASS)
477	end = code + GET(code, `0`) - `1`;
478	else
479	#endif
480	end = code + `32` / sizeof(PCRE2_UCHAR);
481
482	switch(*end)
483	{
484	case OP_CRSTAR:
485	case OP_CRMINSTAR:
486	case OP_CRQUERY:
487	case OP_CRMINQUERY:
488	case OP_CRPOSSTAR:
489	case OP_CRPOSQUERY:
490	list[`1`] = TRUE;
491	end++;
492	break;
493
494	case OP_CRPLUS:
495	case OP_CRMINPLUS:
496	case OP_CRPOSPLUS:
497	end++;
498	break;
499
500	case OP_CRRANGE:
501	case OP_CRMINRANGE:
502	case OP_CRPOSRANGE:
503	list[`1`] = (GET2(end, `1`) == `0`);
504	end += `1` + `2` * IMM2_SIZE;
505	break;
506	}
507	list[`2`] = (uint32_t)(end - code);
508	return end;
509	}
510
511	return NULL; / Opcode not accepted /
512	}
513
514
515
516	/*************************************************
517	* Scan further character sets for match *
518	*************************************************/
519
520	/ Checks whether the base and the current opcode have a common character, in*
521	which case the base cannot be possessified.
522
523	Arguments:
524	code points to the byte code
525	utf TRUE in UTF mode
526	ucp TRUE in UCP mode
527	cb compile data block
528	base_list the data list of the base opcode
529	base_end the end of the base opcode
530	rec_limit points to recursion depth counter
531
532	Returns: TRUE if the auto-possessification is possible
533	*/
534
535	static BOOL
536	compare_opcodes(PCRE2_SPTR code, BOOL utf, BOOL ucp, const compile_block *cb,
537	const uint32_t base_list, PCRE2_SPTR base_end, int* *rec_limit)
538	{
539	PCRE2_UCHAR c;
540	uint32_t list[`8`];
541	const uint32_t *chr_ptr;
542	const uint32_t *ochr_ptr;
543	const uint32_t *list_ptr;
544	PCRE2_SPTR next_code;
545	#ifdef SUPPORT_WIDE_CHARS
546	PCRE2_SPTR xclass_flags;
547	#endif
548	const uint8_t *class_bitset;
549	const uint8_t set1, set2, *set_end;
550	uint32_t chr;
551	BOOL accepted, invert_bits;
552	BOOL entered_a_group = FALSE;
553
554	if (--(rec_limit) <= `0`) return* FALSE; / Recursion has gone too deep /
555
556	/ Note: the base_list[1] contains whether the current opcode has a greedy*
557	(represented by a non-zero value) quantifier. This is a different from
558	other character type lists, which store here that the character iterator
559	matches to an empty string (also represented by a non-zero value). /*
560
561	for(;;)
562	{
563	/ All operations move the code pointer forward.*
564	Therefore infinite recursions are not possible. /*
565
566	c = *code;
567
568	/ Skip over callouts /
569
570	if (c == OP_CALLOUT)
571	{
572	code += PRIV(OP_lengths)[c];
573	continue;
574	}
575
576	if (c == OP_CALLOUT_STR)
577	{
578	code += GET(code, `1` + `2`*LINK_SIZE);
579	continue;
580	}
581
582	/ At the end of a branch, skip to the end of the group. /
583
584	if (c == OP_ALT)
585	{
586	do code += GET(code, `1`); while (*code == OP_ALT);
587	c = *code;
588	}
589
590	/ Inspect the next opcode. /
591
592	switch(c)
593	{
594	/ We can always possessify a greedy iterator at the end of the pattern,*
595	which is reached after skipping over the final OP_KET. A non-greedy
596	iterator must never be possessified. /*
597
598	case OP_END:
599	return base_list[`1`] != `0`;
600
601	/ When an iterator is at the end of certain kinds of group we can inspect*
602	what follows the group by skipping over the closing ket. Note that this
603	does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
604	iteration is variable (could be another iteration or could be the next
605	item). As these two opcodes are not listed in the next switch, they will
606	end up as the next code to inspect, and return FALSE by virtue of being
607	unsupported. /*
608
609	case OP_KET:
610	case OP_KETRPOS:
611	/ The non-greedy case cannot be converted to a possessive form. /
612
613	if (base_list[`1`] == `0`) return FALSE;
614
615	/ If the bracket is capturing it might be referenced by an OP_RECURSE*
616	so its last iterator can never be possessified if the pattern contains
617	recursions. (This could be improved by keeping a list of group numbers that
618	are called by recursion.) /*
619
620	switch(*(code - GET(code, `1`)))
621	{
622	case OP_CBRA:
623	case OP_SCBRA:
624	case OP_CBRAPOS:
625	case OP_SCBRAPOS:
626	if (cb->had_recurse) return FALSE;
627	break;
628
629	/ A script run might have to backtrack if the iterated item can match*
630	characters from more than one script. So give up unless repeating an
631	explicit character. /*
632
633	case OP_SCRIPT_RUN:
634	if (base_list[`0`] != OP_CHAR && base_list[`0`] != OP_CHARI)
635	return FALSE;
636	break;
637
638	/ Atomic sub-patterns and assertions can always auto-possessify their*
639	last iterator. However, if the group was entered as a result of checking
640	a previous iterator, this is not possible. /*
641
642	case OP_ASSERT:
643	case OP_ASSERT_NOT:
644	case OP_ASSERTBACK:
645	case OP_ASSERTBACK_NOT:
646	case OP_ONCE:
647	return !entered_a_group;
648
649	/ Non-atomic assertions - don't possessify last iterator. This needs*
650	more thought. /*
651
652	case OP_ASSERT_NA:
653	case OP_ASSERTBACK_NA:
654	return FALSE;
655	}
656
657	/ Skip over the bracket and inspect what comes next. /
658
659	code += PRIV(OP_lengths)[c];
660	continue;
661
662	/ Handle cases where the next item is a group. /
663
664	case OP_ONCE:
665	case OP_BRA:
666	case OP_CBRA:
667	next_code = code + GET(code, `1`);
668	code += PRIV(OP_lengths)[c];
669
670	/ Check each branch. We have to recurse a level for all but the last*
671	branch. /*
672
673	while (*next_code == OP_ALT)
674	{
675	if (!compare_opcodes(code, utf, ucp, cb, base_list, base_end, rec_limit))
676	return FALSE;
677	code = next_code + `1` + LINK_SIZE;
678	next_code += GET(next_code, `1`);
679	}
680
681	entered_a_group = TRUE;
682	continue;
683
684	case OP_BRAZERO:
685	case OP_BRAMINZERO:
686
687	next_code = code + `1`;
688	if (next_code != OP_BRA && next_code != OP_CBRA &&
689	next_code != OP_ONCE) return* FALSE;
690
691	do next_code += GET(next_code, `1`); while (*next_code == OP_ALT);
692
693	/ The bracket content will be checked by the OP_BRA/OP_CBRA case above. /
694
695	next_code += `1` + LINK_SIZE;
696	if (!compare_opcodes(next_code, utf, ucp, cb, base_list, base_end,
697	rec_limit))
698	return FALSE;
699
700	code += PRIV(OP_lengths)[c];
701	continue;
702
703	/ The next opcode does not need special handling; fall through and use it*
704	to see if the base can be possessified. /*
705
706	default:
707	break;
708	}
709
710	/ We now have the next appropriate opcode to compare with the base. Check*
711	for a supported opcode, and load its properties. /*
712
713	code = get_chr_property_list(code, utf, ucp, cb->fcc, list);
714	if (code == NULL) return FALSE; / Unsupported /
715
716	/ If either opcode is a small character list, set pointers for comparing*
717	characters from that list with another list, or with a property. /*
718
719	if (base_list[`0`] == OP_CHAR)
720	{
721	chr_ptr = base_list + `2`;
722	list_ptr = list;
723	}
724	else if (list[`0`] == OP_CHAR)
725	{
726	chr_ptr = list + `2`;
727	list_ptr = base_list;
728	}
729
730	/ Character bitsets can also be compared to certain opcodes. /
731
732	else if (base_list[`0`] == OP_CLASS \|\| list[`0`] == OP_CLASS
733	#if PCRE2_CODE_UNIT_WIDTH == 8
734	/ In 8 bit, non-UTF mode, OP_CLASS and OP_NCLASS are the same. /
735	\|\| (!utf && (base_list[`0`] == OP_NCLASS \|\| list[`0`] == OP_NCLASS))
736	#endif
737	)
738	{
739	#if PCRE2_CODE_UNIT_WIDTH == 8
740	if (base_list[`0`] == OP_CLASS \|\| (!utf && base_list[`0`] == OP_NCLASS))
741	#else
742	if (base_list[`0`] == OP_CLASS)
743	#endif
744	{
745	set1 = (uint8_t *)(base_end - base_list[`2`]);
746	list_ptr = list;
747	}
748	else
749	{
750	set1 = (uint8_t *)(code - list[`2`]);
751	list_ptr = base_list;
752	}
753
754	invert_bits = FALSE;
755	switch(list_ptr[`0`])
756	{
757	case OP_CLASS:
758	case OP_NCLASS:
759	set2 = (uint8_t *)
760	((list_ptr == list ? code : base_end) - list_ptr[`2`]);
761	break;
762
763	#ifdef SUPPORT_WIDE_CHARS
764	case OP_XCLASS:
765	xclass_flags = (list_ptr == list ? code : base_end) - list_ptr[`2`] + LINK_SIZE;
766	if ((xclass_flags & XCL_HASPROP) != `0`) return* FALSE;
767	if ((*xclass_flags & XCL_MAP) == `0`)
768	{
769	/ No bits are set for characters < 256. /
770	if (list[`1`] == `0`) return (*xclass_flags & XCL_NOT) == `0`;
771	/ Might be an empty repeat. /
772	continue;
773	}
774	set2 = (uint8_t *)(xclass_flags + `1`);
775	break;
776	#endif
777
778	case OP_NOT_DIGIT:
779	invert_bits = TRUE;
780	/ Fall through /
781	case OP_DIGIT:
782	set2 = (uint8_t *)(cb->cbits + cbit_digit);
783	break;
784
785	case OP_NOT_WHITESPACE:
786	invert_bits = TRUE;
787	/ Fall through /
788	case OP_WHITESPACE:
789	set2 = (uint8_t *)(cb->cbits + cbit_space);
790	break;
791
792	case OP_NOT_WORDCHAR:
793	invert_bits = TRUE;
794	/ Fall through /
795	case OP_WORDCHAR:
796	set2 = (uint8_t *)(cb->cbits + cbit_word);
797	break;
798
799	default:
800	return FALSE;
801	}
802
803	/ Because the bit sets are unaligned bytes, we need to perform byte*
804	comparison here. /*
805
806	set_end = set1 + `32`;
807	if (invert_bits)
808	{
809	do
810	{
811	if ((set1++ & ~(set2++)) != `0`) return FALSE;
812	}
813	while (set1 < set_end);
814	}
815	else
816	{
817	do
818	{
819	if ((set1++ & set2++) != `0`) return FALSE;
820	}
821	while (set1 < set_end);
822	}
823
824	if (list[`1`] == `0`) return TRUE;
825	/ Might be an empty repeat. /
826	continue;
827	}
828
829	/ Some property combinations also acceptable. Unicode property opcodes are*
830	processed specially; the rest can be handled with a lookup table. /*
831
832	else
833	{
834	uint32_t leftop, rightop;
835
836	leftop = base_list[`0`];
837	rightop = list[`0`];
838
839	#ifdef SUPPORT_UNICODE
840	accepted = FALSE; / Always set in non-unicode case. /
841	if (leftop == OP_PROP \|\| leftop == OP_NOTPROP)
842	{
843	if (rightop == OP_EOD)
844	accepted = TRUE;
845	else if (rightop == OP_PROP \|\| rightop == OP_NOTPROP)
846	{
847	int n;
848	const uint8_t *p;
849	BOOL same = leftop == rightop;
850	BOOL lisprop = leftop == OP_PROP;
851	BOOL risprop = rightop == OP_PROP;
852	BOOL bothprop = lisprop && risprop;
853
854	/ There's a table that specifies how each combination is to be*
855	processed:
856	0 Always return FALSE (never auto-possessify)
857	1 Character groups are distinct (possessify if both are OP_PROP)
858	2 Check character categories in the same group (general or particular)
859	3 Return TRUE if the two opcodes are not the same
860	... see comments below
861	*/
862
863	n = propposstab[base_list[`2`]][list[`2`]];
864	switch(n)
865	{
866	case `0`: break;
867	case `1`: accepted = bothprop; break;
868	case `2`: accepted = (base_list[`3`] == list[`3`]) != same; break;
869	case `3`: accepted = !same; break;
870
871	case `4`: / Left general category, right particular category /
872	accepted = risprop && catposstab[base_list[`3`]][list[`3`]] == same;
873	break;
874
875	case `5`: / Right general category, left particular category /
876	accepted = lisprop && catposstab[list[`3`]][base_list[`3`]] == same;
877	break;
878
879	/ This code is logically tricky. Think hard before fiddling with it.*
880	The posspropstab table has four entries per row. Each row relates to
881	one of PCRE's special properties such as ALNUM or SPACE or WORD.
882	Only WORD actually needs all four entries, but using repeats for the
883	others means they can all use the same code below.
884
885	The first two entries in each row are Unicode general categories, and
886	apply always, because all the characters they include are part of the
887	PCRE character set. The third and fourth entries are a general and a
888	particular category, respectively, that include one or more relevant
889	characters. One or the other is used, depending on whether the check
890	is for a general or a particular category. However, in both cases the
891	category contains more characters than the specials that are defined
892	for the property being tested against. Therefore, it cannot be used
893	in a NOTPROP case.
894
895	Example: the row for WORD contains ucp_L, ucp_N, ucp_P, ucp_Po.
896	Underscore is covered by ucp_P or ucp_Po. /*
897
898	case `6`: / Left alphanum vs right general category /
899	case `7`: / Left space vs right general category /
900	case `8`: / Left word vs right general category /
901	p = posspropstab[n-`6`];
902	accepted = risprop && lisprop ==
903	(list[`3`] != p[`0`] &&
904	list[`3`] != p[`1`] &&
905	(list[`3`] != p[`2`] \|\| !lisprop));
906	break;
907
908	case `9`: / Right alphanum vs left general category /
909	case `10`: / Right space vs left general category /
910	case `11`: / Right word vs left general category /
911	p = posspropstab[n-`9`];
912	accepted = lisprop && risprop ==
913	(base_list[`3`] != p[`0`] &&
914	base_list[`3`] != p[`1`] &&
915	(base_list[`3`] != p[`2`] \|\| !risprop));
916	break;
917
918	case `12`: / Left alphanum vs right particular category /
919	case `13`: / Left space vs right particular category /
920	case `14`: / Left word vs right particular category /
921	p = posspropstab[n-`12`];
922	accepted = risprop && lisprop ==
923	(catposstab[p[`0`]][list[`3`]] &&
924	catposstab[p[`1`]][list[`3`]] &&
925	(list[`3`] != p[`3`] \|\| !lisprop));
926	break;
927
928	case `15`: / Right alphanum vs left particular category /
929	case `16`: / Right space vs left particular category /
930	case `17`: / Right word vs left particular category /
931	p = posspropstab[n-`15`];
932	accepted = lisprop && risprop ==
933	(catposstab[p[`0`]][base_list[`3`]] &&
934	catposstab[p[`1`]][base_list[`3`]] &&
935	(base_list[`3`] != p[`3`] \|\| !risprop));
936	break;
937	}
938	}
939	}
940
941	else
942	#endif /* SUPPORT_UNICODE */
943
944	accepted = leftop >= FIRST_AUTOTAB_OP && leftop <= LAST_AUTOTAB_LEFT_OP &&
945	rightop >= FIRST_AUTOTAB_OP && rightop <= LAST_AUTOTAB_RIGHT_OP &&
946	autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
947
948	if (!accepted) return FALSE;
949
950	if (list[`1`] == `0`) return TRUE;
951	/ Might be an empty repeat. /
952	continue;
953	}
954
955	/ Control reaches here only if one of the items is a small character list.*
956	All characters are checked against the other side. /*
957
958	do
959	{
960	chr = *chr_ptr;
961
962	switch(list_ptr[`0`])
963	{
964	case OP_CHAR:
965	ochr_ptr = list_ptr + `2`;
966	do
967	{
968	if (chr == ochr_ptr) return* FALSE;
969	ochr_ptr++;
970	}
971	while(*ochr_ptr != NOTACHAR);
972	break;
973
974	case OP_NOT:
975	ochr_ptr = list_ptr + `2`;
976	do
977	{
978	if (chr == *ochr_ptr)
979	break;
980	ochr_ptr++;
981	}
982	while(*ochr_ptr != NOTACHAR);
983	if (ochr_ptr == NOTACHAR) return* FALSE; / Not found /
984	break;
985
986	/* Note that OP_DIGIT etc. are generated only when PCRE2_UCP is not
987	set. When it is set, \d etc. are converted into OP_(NOT_)PROP codes. /*
988
989	case OP_DIGIT:
990	if (chr < `256` && (cb->ctypes[chr] & ctype_digit) != `0`) return FALSE;
991	break;
992
993	case OP_NOT_DIGIT:
994	if (chr > `255` \|\| (cb->ctypes[chr] & ctype_digit) == `0`) return FALSE;
995	break;
996
997	case OP_WHITESPACE:
998	if (chr < `256` && (cb->ctypes[chr] & ctype_space) != `0`) return FALSE;
999	break;
1000
1001	case OP_NOT_WHITESPACE:
1002	if (chr > `255` \|\| (cb->ctypes[chr] & ctype_space) == `0`) return FALSE;
1003	break;
1004
1005	case OP_WORDCHAR:
1006	if (chr < `255` && (cb->ctypes[chr] & ctype_word) != `0`) return FALSE;
1007	break;
1008
1009	case OP_NOT_WORDCHAR:
1010	if (chr > `255` \|\| (cb->ctypes[chr] & ctype_word) == `0`) return FALSE;
1011	break;
1012
1013	case OP_HSPACE:
1014	switch(chr)
1015	{
1016	HSPACE_CASES: return FALSE;
1017	default: break;
1018	}
1019	break;
1020
1021	case OP_NOT_HSPACE:
1022	switch(chr)
1023	{
1024	HSPACE_CASES: break;
1025	default: return FALSE;
1026	}
1027	break;
1028
1029	case OP_ANYNL:
1030	case OP_VSPACE:
1031	switch(chr)
1032	{
1033	VSPACE_CASES: return FALSE;
1034	default: break;
1035	}
1036	break;
1037
1038	case OP_NOT_VSPACE:
1039	switch(chr)
1040	{
1041	VSPACE_CASES: break;
1042	default: return FALSE;
1043	}
1044	break;
1045
1046	case OP_DOLL:
1047	case OP_EODN:
1048	switch (chr)
1049	{
1050	case CHAR_CR:
1051	case CHAR_LF:
1052	case CHAR_VT:
1053	case CHAR_FF:
1054	case CHAR_NEL:
1055	#ifndef EBCDIC
1056	case `0x2028`:
1057	case `0x2029`:
1058	#endif /* Not EBCDIC */
1059	return FALSE;
1060	}
1061	break;
1062
1063	case OP_EOD: / Can always possessify before \z /
1064	break;
1065
1066	#ifdef SUPPORT_UNICODE
1067	case OP_PROP:
1068	case OP_NOTPROP:
1069	if (!check_char_prop(chr, list_ptr[`2`], list_ptr[`3`],
1070	list_ptr[`0`] == OP_NOTPROP))
1071	return FALSE;
1072	break;
1073	#endif
1074
1075	case OP_NCLASS:
1076	if (chr > `255`) return FALSE;
1077	/ Fall through /
1078
1079	case OP_CLASS:
1080	if (chr > `255`) break;
1081	class_bitset = (uint8_t *)
1082	((list_ptr == list ? code : base_end) - list_ptr[`2`]);
1083	if ((class_bitset[chr >> `3`] & (`1u` << (chr & `7`))) != `0`) return FALSE;
1084	break;
1085
1086	#ifdef SUPPORT_WIDE_CHARS
1087	case OP_XCLASS:
1088	if (PRIV(xclass)(chr, (list_ptr == list ? code : base_end) -
1089	list_ptr[`2`] + LINK_SIZE, utf)) return FALSE;
1090	break;
1091	#endif
1092
1093	default:
1094	return FALSE;
1095	}
1096
1097	chr_ptr++;
1098	}
1099	while(*chr_ptr != NOTACHAR);
1100
1101	/ At least one character must be matched from this opcode. /
1102
1103	if (list[`1`] == `0`) return TRUE;
1104	}
1105
1106	/ Control never reaches here. There used to be a fail-save return FALSE; here,*
1107	but some compilers complain about an unreachable statement. /*
1108	}
1109
1110
1111
1112	/*************************************************
1113	* Scan compiled regex for auto-possession *
1114	*************************************************/
1115
1116	/ Replaces single character iterations with their possessive alternatives*
1117	if appropriate. This function modifies the compiled opcode! Hitting a
1118	non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a
1119	bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches
1120	overly complicated or large patterns. In these cases, the check just stops,
1121	leaving the remainder of the pattern unpossessified.
1122
1123	Arguments:
1124	code points to start of the byte code
1125	cb compile data block
1126
1127	Returns: 0 for success
1128	-1 if a non-existant opcode is encountered
1129	*/
1130
1131	int
1132	PRIV(auto_possessify)(PCRE2_UCHAR code, const* compile_block *cb)
1133	{
1134	PCRE2_UCHAR c;
1135	PCRE2_SPTR end;
1136	PCRE2_UCHAR *repeat_opcode;
1137	uint32_t list[`8`];
1138	int rec_limit = `1000`; / Was 10,000 but clang+ASAN uses a lot of stack. /
1139	BOOL utf = (cb->external_options & PCRE2_UTF) != `0`;
1140	BOOL ucp = (cb->external_options & PCRE2_UCP) != `0`;
1141
1142	for (;;)
1143	{
1144	c = *code;
1145
1146	if (c >= OP_TABLE_LENGTH) return -`1`; / Something gone wrong /
1147
1148	if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
1149	{
1150	c -= get_repeat_base(c) - OP_STAR;
1151	end = (c <= OP_MINUPTO) ?
1152	get_chr_property_list(code, utf, ucp, cb->fcc, list) : NULL;
1153	list[`1`] = c == OP_STAR \|\| c == OP_PLUS \|\| c == OP_QUERY \|\| c == OP_UPTO;
1154
1155	if (end != NULL && compare_opcodes(end, utf, ucp, cb, list, end,
1156	&rec_limit))
1157	{
1158	switch(c)
1159	{
1160	case OP_STAR:
1161	*code += OP_POSSTAR - OP_STAR;
1162	break;
1163
1164	case OP_MINSTAR:
1165	*code += OP_POSSTAR - OP_MINSTAR;
1166	break;
1167
1168	case OP_PLUS:
1169	*code += OP_POSPLUS - OP_PLUS;
1170	break;
1171
1172	case OP_MINPLUS:
1173	*code += OP_POSPLUS - OP_MINPLUS;
1174	break;
1175
1176	case OP_QUERY:
1177	*code += OP_POSQUERY - OP_QUERY;
1178	break;
1179
1180	case OP_MINQUERY:
1181	*code += OP_POSQUERY - OP_MINQUERY;
1182	break;
1183
1184	case OP_UPTO:
1185	*code += OP_POSUPTO - OP_UPTO;
1186	break;
1187
1188	case OP_MINUPTO:
1189	*code += OP_POSUPTO - OP_MINUPTO;
1190	break;
1191	}
1192	}
1193	c = *code;
1194	}
1195	else if (c == OP_CLASS \|\| c == OP_NCLASS \|\| c == OP_XCLASS)
1196	{
1197	#ifdef SUPPORT_WIDE_CHARS
1198	if (c == OP_XCLASS)
1199	repeat_opcode = code + GET(code, `1`);
1200	else
1201	#endif
1202	repeat_opcode = code + `1` + (`32` / sizeof(PCRE2_UCHAR));
1203
1204	c = *repeat_opcode;
1205	if (c >= OP_CRSTAR && c <= OP_CRMINRANGE)
1206	{
1207	/ The return from get_chr_property_list() will never be NULL when*
1208	*code (aka c) is one of the three class opcodes. However, gcc with
1209	-fanalyzer notes that a NULL return is possible, and grumbles. Hence we
1210	put in a check. /*
1211
1212	end = get_chr_property_list(code, utf, ucp, cb->fcc, list);
1213	list[`1`] = (c & `1`) == `0`;
1214
1215	if (end != NULL &&
1216	compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit))
1217	{
1218	switch (c)
1219	{
1220	case OP_CRSTAR:
1221	case OP_CRMINSTAR:
1222	*repeat_opcode = OP_CRPOSSTAR;
1223	break;
1224
1225	case OP_CRPLUS:
1226	case OP_CRMINPLUS:
1227	*repeat_opcode = OP_CRPOSPLUS;
1228	break;
1229
1230	case OP_CRQUERY:
1231	case OP_CRMINQUERY:
1232	*repeat_opcode = OP_CRPOSQUERY;
1233	break;
1234
1235	case OP_CRRANGE:
1236	case OP_CRMINRANGE:
1237	*repeat_opcode = OP_CRPOSRANGE;
1238	break;
1239	}
1240	}
1241	}
1242	c = *code;
1243	}
1244
1245	switch(c)
1246	{
1247	case OP_END:
1248	return `0`;
1249
1250	case OP_TYPESTAR:
1251	case OP_TYPEMINSTAR:
1252	case OP_TYPEPLUS:
1253	case OP_TYPEMINPLUS:
1254	case OP_TYPEQUERY:
1255	case OP_TYPEMINQUERY:
1256	case OP_TYPEPOSSTAR:
1257	case OP_TYPEPOSPLUS:
1258	case OP_TYPEPOSQUERY:
1259	if (code[`1`] == OP_PROP \|\| code[`1`] == OP_NOTPROP) code += `2`;
1260	break;
1261
1262	case OP_TYPEUPTO:
1263	case OP_TYPEMINUPTO:
1264	case OP_TYPEEXACT:
1265	case OP_TYPEPOSUPTO:
1266	if (code[`1` + IMM2_SIZE] == OP_PROP \|\| code[`1` + IMM2_SIZE] == OP_NOTPROP)
1267	code += `2`;
1268	break;
1269
1270	case OP_CALLOUT_STR:
1271	code += GET(code, `1` + `2`*LINK_SIZE);
1272	break;
1273
1274	#ifdef SUPPORT_WIDE_CHARS
1275	case OP_XCLASS:
1276	code += GET(code, `1`);
1277	break;
1278	#endif
1279
1280	case OP_MARK:
1281	case OP_COMMIT_ARG:
1282	case OP_PRUNE_ARG:
1283	case OP_SKIP_ARG:
1284	case OP_THEN_ARG:
1285	code += code[`1`];
1286	break;
1287	}
1288
1289	/ Add in the fixed length from the table /
1290
1291	code += PRIV(OP_lengths)[c];
1292
1293	/ In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be*
1294	followed by a multi-byte character. The length in the table is a minimum, so
1295	we have to arrange to skip the extra code units. /*
1296
1297	#ifdef MAYBE_UTF_MULTI
1298	if (utf) switch(c)
1299	{
1300	case OP_CHAR:
1301	case OP_CHARI:
1302	case OP_NOT:
1303	case OP_NOTI:
1304	case OP_STAR:
1305	case OP_MINSTAR:
1306	case OP_PLUS:
1307	case OP_MINPLUS:
1308	case OP_QUERY:
1309	case OP_MINQUERY:
1310	case OP_UPTO:
1311	case OP_MINUPTO:
1312	case OP_EXACT:
1313	case OP_POSSTAR:
1314	case OP_POSPLUS:
1315	case OP_POSQUERY:
1316	case OP_POSUPTO:
1317	case OP_STARI:
1318	case OP_MINSTARI:
1319	case OP_PLUSI:
1320	case OP_MINPLUSI:
1321	case OP_QUERYI:
1322	case OP_MINQUERYI:
1323	case OP_UPTOI:
1324	case OP_MINUPTOI:
1325	case OP_EXACTI:
1326	case OP_POSSTARI:
1327	case OP_POSPLUSI:
1328	case OP_POSQUERYI:
1329	case OP_POSUPTOI:
1330	case OP_NOTSTAR:
1331	case OP_NOTMINSTAR:
1332	case OP_NOTPLUS:
1333	case OP_NOTMINPLUS:
1334	case OP_NOTQUERY:
1335	case OP_NOTMINQUERY:
1336	case OP_NOTUPTO:
1337	case OP_NOTMINUPTO:
1338	case OP_NOTEXACT:
1339	case OP_NOTPOSSTAR:
1340	case OP_NOTPOSPLUS:
1341	case OP_NOTPOSQUERY:
1342	case OP_NOTPOSUPTO:
1343	case OP_NOTSTARI:
1344	case OP_NOTMINSTARI:
1345	case OP_NOTPLUSI:
1346	case OP_NOTMINPLUSI:
1347	case OP_NOTQUERYI:
1348	case OP_NOTMINQUERYI:
1349	case OP_NOTUPTOI:
1350	case OP_NOTMINUPTOI:
1351	case OP_NOTEXACTI:
1352	case OP_NOTPOSSTARI:
1353	case OP_NOTPOSPLUSI:
1354	case OP_NOTPOSQUERYI:
1355	case OP_NOTPOSUPTOI:
1356	if (HAS_EXTRALEN(code[-`1`])) code += GET_EXTRALEN(code[-`1`]);
1357	break;
1358	}
1359	#else
1360	(void)(utf); / Keep compiler happy by referencing function argument /
1361	#endif /* SUPPORT_WIDE_CHARS */
1362	}
1363	}
1364
1365	/ End of pcre2_auto_possess.c /
1366

Browse the source code of Godot/thirdparty/pcre2/src/pcre2_auto_possess.c