pcre2_substitute.c source code [Godot/thirdparty/pcre2/src/pcre2_substitute.c]

1	/*************************************************
2	* Perl-Compatible Regular Expressions *
3	*************************************************/
4
5	/ PCRE is a library of functions to support regular expressions whose syntax*
6	and semantics are as close as possible to those of the Perl 5 language.
7
8	Written by Philip Hazel
9	Original API code Copyright (c) 1997-2012 University of Cambridge
10	New API code Copyright (c) 2016-2022 University of Cambridge
11
12	-----------------------------------------------------------------------------
13	Redistribution and use in source and binary forms, with or without
14	modification, are permitted provided that the following conditions are met:
15
16	* Redistributions of source code must retain the above copyright notice,
17	this list of conditions and the following disclaimer.
18
19	* Redistributions in binary form must reproduce the above copyright
20	notice, this list of conditions and the following disclaimer in the
21	documentation and/or other materials provided with the distribution.
22
23	* Neither the name of the University of Cambridge nor the names of its
24	contributors may be used to endorse or promote products derived from
25	this software without specific prior written permission.
26
27	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31	LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35	CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37	POSSIBILITY OF SUCH DAMAGE.
38	-----------------------------------------------------------------------------
39	*/
40
41
42	#ifdef HAVE_CONFIG_H
43	#include "config.h"
44	#endif
45
46	#include "pcre2_internal.h"
47
48	#define PTR_STACK_SIZE 20
49
50	#define SUBSTITUTE_OPTIONS \
51	(PCRE2_SUBSTITUTE_EXTENDED\|PCRE2_SUBSTITUTE_GLOBAL\| \
52	PCRE2_SUBSTITUTE_LITERAL\|PCRE2_SUBSTITUTE_MATCHED\| \
53	PCRE2_SUBSTITUTE_OVERFLOW_LENGTH\|PCRE2_SUBSTITUTE_REPLACEMENT_ONLY\| \
54	PCRE2_SUBSTITUTE_UNKNOWN_UNSET\|PCRE2_SUBSTITUTE_UNSET_EMPTY)
55
56
57
58	/*************************************************
59	* Find end of substitute text *
60	*************************************************/
61
62	/ In extended mode, we recognize ${name:+set text:unset text} and similar*
63	constructions. This requires the identification of unescaped : and }
64	characters. This function scans for such. It must deal with nested ${
65	constructions. The pointer to the text is updated, either to the required end
66	character, or to where an error was detected.
67
68	Arguments:
69	code points to the compiled expression (for options)
70	ptrptr points to the pointer to the start of the text (updated)
71	ptrend end of the whole string
72	last TRUE if the last expected string (only } recognized)
73
74	Returns: 0 on success
75	negative error code on failure
76	*/
77
78	static int
79	find_text_end(const pcre2_code code, PCRE2_SPTR ptrptr, PCRE2_SPTR ptrend,
80	BOOL last)
81	{
82	int rc = `0`;
83	uint32_t nestlevel = `0`;
84	BOOL literal = FALSE;
85	PCRE2_SPTR ptr = *ptrptr;
86
87	for (; ptr < ptrend; ptr++)
88	{
89	if (literal)
90	{
91	if (ptr[`0`] == CHAR_BACKSLASH && ptr < ptrend - `1` && ptr[`1`] == CHAR_E)
92	{
93	literal = FALSE;
94	ptr += `1`;
95	}
96	}
97
98	else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
99	{
100	if (nestlevel == `0`) goto EXIT;
101	nestlevel--;
102	}
103
104	else if (ptr == CHAR_COLON && !last && nestlevel == `0`) goto* EXIT;
105
106	else if (*ptr == CHAR_DOLLAR_SIGN)
107	{
108	if (ptr < ptrend - `1` && ptr[`1`] == CHAR_LEFT_CURLY_BRACKET)
109	{
110	nestlevel++;
111	ptr += `1`;
112	}
113	}
114
115	else if (*ptr == CHAR_BACKSLASH)
116	{
117	int erc;
118	int errorcode;
119	uint32_t ch;
120
121	if (ptr < ptrend - `1`) switch (ptr[`1`])
122	{
123	case CHAR_L:
124	case CHAR_l:
125	case CHAR_U:
126	case CHAR_u:
127	ptr += `1`;
128	continue;
129	}
130
131	ptr += `1`; / Must point after \ /
132	erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
133	code->overall_options, code->extra_options, FALSE, NULL);
134	ptr -= `1`; / Back to last code unit of escape /
135	if (errorcode != `0`)
136	{
137	rc = errorcode;
138	goto EXIT;
139	}
140
141	switch(erc)
142	{
143	case `0`: / Data character /
144	case ESC_E: / Isolated \E is ignored /
145	break;
146
147	case ESC_Q:
148	literal = TRUE;
149	break;
150
151	default:
152	rc = PCRE2_ERROR_BADREPESCAPE;
153	goto EXIT;
154	}
155	}
156	}
157
158	rc = PCRE2_ERROR_REPMISSINGBRACE; / Terminator not found /
159
160	EXIT:
161	*ptrptr = ptr;
162	return rc;
163	}
164
165
166
167	/*************************************************
168	* Match and substitute *
169	*************************************************/
170
171	/ This function applies a compiled re to a subject string and creates a new*
172	string with substitutions. The first 7 arguments are the same as for
173	pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
174
175	Arguments:
176	code points to the compiled expression
177	subject points to the subject string
178	length length of subject string (may contain binary zeros)
179	start_offset where to start in the subject string
180	options option bits
181	match_data points to a match_data block, or is NULL
182	context points a PCRE2 context
183	replacement points to the replacement string
184	rlength length of replacement string
185	buffer where to put the substituted string
186	blength points to length of buffer; updated to length of string
187
188	Returns: >= 0 number of substitutions made
189	< 0 an error code
190	PCRE2_ERROR_BADREPLACEMENT means invalid use of $
191	*/
192
193	/ This macro checks for space in the buffer before copying into it. On*
194	overflow, either give an error immediately, or keep on, accumulating the
195	length. /*
196
197	#define CHECKMEMCPY(from,length) \
198	{ \
199	if (!overflowed && lengthleft < length) \
200	{ \
201	if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
202	overflowed = TRUE; \
203	extra_needed = length - lengthleft; \
204	} \
205	else if (overflowed) \
206	{ \
207	extra_needed += length; \
208	} \
209	else \
210	{ \
211	memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
212	buff_offset += length; \
213	lengthleft -= length; \
214	} \
215	}
216
217	/ Here's the function /
218
219	PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
220	pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
221	PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
222	pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
223	PCRE2_UCHAR buffer, PCRE2_SIZE blength)
224	{
225	int rc;
226	int subs;
227	int forcecase = `0`;
228	int forcecasereset = `0`;
229	uint32_t ovector_count;
230	uint32_t goptions = `0`;
231	uint32_t suboptions;
232	pcre2_match_data *internal_match_data = NULL;
233	BOOL escaped_literal = FALSE;
234	BOOL overflowed = FALSE;
235	BOOL use_existing_match;
236	BOOL replacement_only;
237	#ifdef SUPPORT_UNICODE
238	BOOL utf = (code->overall_options & PCRE2_UTF) != `0`;
239	BOOL ucp = (code->overall_options & PCRE2_UCP) != `0`;
240	#endif
241	PCRE2_UCHAR temp[`6`];
242	PCRE2_SPTR ptr;
243	PCRE2_SPTR repend;
244	PCRE2_SIZE extra_needed = `0`;
245	PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
246	PCRE2_SIZE *ovector;
247	PCRE2_SIZE ovecsave[`3`];
248	pcre2_substitute_callout_block scb;
249
250	/ General initialization /
251
252	buff_offset = `0`;
253	lengthleft = buff_length = *blength;
254	*blength = PCRE2_UNSET;
255	ovecsave[`0`] = ovecsave[`1`] = ovecsave[`2`] = PCRE2_UNSET;
256
257	/ Partial matching is not valid. This must come after setting blength to
258	PCRE2_UNSET, so as not to imply an offset in the replacement. /*
259
260	if ((options & (PCRE2_PARTIAL_HARD\|PCRE2_PARTIAL_SOFT)) != `0`)
261	return PCRE2_ERROR_BADOPTION;
262
263	/ Validate length and find the end of the replacement. A NULL replacement of*
264	zero length is interpreted as an empty string. /*
265
266	if (replacement == NULL)
267	{
268	if (rlength != `0`) return PCRE2_ERROR_NULL;
269	replacement = (PCRE2_SPTR)"";
270	}
271
272	if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
273	repend = replacement + rlength;
274
275	/ Check for using a match that has already happened. Note that the subject*
276	pointer in the match data may be NULL after a no-match. /*
277
278	use_existing_match = ((options & PCRE2_SUBSTITUTE_MATCHED) != `0`);
279	replacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != `0`);
280
281	/ If starting from an existing match, there must be an externally provided*
282	match data block. We create an internal match_data block in two cases: (a) an
283	external one is not supplied (and we are not starting from an existing match);
284	(b) an existing match is to be used for the first substitution. In the latter
285	case, we copy the existing match into the internal block, except for any cached
286	heap frame size and pointer. This ensures that no changes are made to the
287	external match data block. /*
288
289	if (match_data == NULL)
290	{
291	pcre2_general_context *gcontext;
292	if (use_existing_match) return PCRE2_ERROR_NULL;
293	gcontext = (mcontext == NULL)?
294	(pcre2_general_context *)code :
295	(pcre2_general_context *)mcontext;
296	match_data = internal_match_data =
297	pcre2_match_data_create_from_pattern(code, gcontext);
298	if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
299	}
300
301	else if (use_existing_match)
302	{
303	pcre2_general_context *gcontext = (mcontext == NULL)?
304	(pcre2_general_context *)code :
305	(pcre2_general_context *)mcontext;
306	int pairs = (code->top_bracket + `1` < match_data->oveccount)?
307	code->top_bracket + `1` : match_data->oveccount;
308	internal_match_data = pcre2_match_data_create(match_data->oveccount,
309	gcontext);
310	if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
311	memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector)
312	+ `2`pairssizeof(PCRE2_SIZE));
313	internal_match_data->heapframes = NULL;
314	internal_match_data->heapframes_size = `0`;
315	match_data = internal_match_data;
316	}
317
318	/ Remember ovector details /
319
320	ovector = pcre2_get_ovector_pointer(match_data);
321	ovector_count = pcre2_get_ovector_count(match_data);
322
323	/ Fixed things in the callout block /
324
325	scb.version = `0`;
326	scb.input = subject;
327	scb.output = (PCRE2_SPTR)buffer;
328	scb.ovector = ovector;
329
330	/ A NULL subject of zero length is treated as an empty string. /
331
332	if (subject == NULL)
333	{
334	if (length != `0`) return PCRE2_ERROR_NULL;
335	subject = (PCRE2_SPTR)"";
336	}
337
338	/ Find length of zero-terminated subject /
339
340	if (length == PCRE2_ZERO_TERMINATED)
341	length = subject? PRIV(strlen)(subject) : `0`;
342
343	/ Check UTF replacement string if necessary. /
344
345	#ifdef SUPPORT_UNICODE
346	if (utf && (options & PCRE2_NO_UTF_CHECK) == `0`)
347	{
348	rc = PRIV(valid_utf)(replacement, rlength, &(match_data->startchar));
349	if (rc != `0`)
350	{
351	match_data->leftchar = `0`;
352	goto EXIT;
353	}
354	}
355	#endif /* SUPPORT_UNICODE */
356
357	/ Save the substitute options and remove them from the match options. /
358
359	suboptions = options & SUBSTITUTE_OPTIONS;
360	options &= ~SUBSTITUTE_OPTIONS;
361
362	/ Error if the start match offset is greater than the length of the subject. /
363
364	if (start_offset > length)
365	{
366	match_data->leftchar = `0`;
367	rc = PCRE2_ERROR_BADOFFSET;
368	goto EXIT;
369	}
370
371	/ Copy up to the start offset, unless only the replacement is required. /
372
373	if (!replacement_only) CHECKMEMCPY(subject, start_offset);
374
375	/ Loop for global substituting. If PCRE2_SUBSTITUTE_MATCHED is set, the first*
376	match is taken from the match_data that was passed in. /*
377
378	subs = `0`;
379	do
380	{
381	PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
382	uint32_t ptrstackptr = `0`;
383
384	if (use_existing_match)
385	{
386	rc = match_data->rc;
387	use_existing_match = FALSE;
388	}
389	else rc = pcre2_match(code, subject, length, start_offset, options\|goptions,
390	match_data, mcontext);
391
392	#ifdef SUPPORT_UNICODE
393	if (utf) options \|= PCRE2_NO_UTF_CHECK; / Only need to check once /
394	#endif
395
396	/ Any error other than no match returns the error code. No match when not*
397	doing the special after-empty-match global rematch, or when at the end of the
398	subject, breaks the global loop. Otherwise, advance the starting point by one
399	character, copying it to the output, and try again. /*
400
401	if (rc < `0`)
402	{
403	PCRE2_SIZE save_start;
404
405	if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
406	if (goptions == `0` \|\| start_offset >= length) break;
407
408	/ Advance by one code point. Then, if CRLF is a valid newline sequence and*
409	we have advanced into the middle of it, advance one more code point. In
410	other words, do not start in the middle of CRLF, even if CR and LF on their
411	own are valid newlines. /*
412
413	save_start = start_offset++;
414	if (subject[start_offset-`1`] == CHAR_CR &&
415	code->newline_convention != PCRE2_NEWLINE_CR &&
416	code->newline_convention != PCRE2_NEWLINE_LF &&
417	start_offset < length &&
418	subject[start_offset] == CHAR_LF)
419	start_offset++;
420
421	/ Otherwise, in UTF mode, advance past any secondary code points. /
422
423	else if ((code->overall_options & PCRE2_UTF) != `0`)
424	{
425	#if PCRE2_CODE_UNIT_WIDTH == 8
426	while (start_offset < length && (subject[start_offset] & `0xc0`) == `0x80`)
427	start_offset++;
428	#elif PCRE2_CODE_UNIT_WIDTH == 16
429	while (start_offset < length &&
430	(subject[start_offset] & `0xfc00`) == `0xdc00`)
431	start_offset++;
432	#endif
433	}
434
435	/ Copy what we have advanced past (unless not required), reset the special*
436	global options, and continue to the next match. /*
437
438	fraglength = start_offset - save_start;
439	if (!replacement_only) CHECKMEMCPY(subject + save_start, fraglength);
440	goptions = `0`;
441	continue;
442	}
443
444	/ Handle a successful match. Matches that use \K to end before they start*
445	or start before the current point in the subject are not supported. /*
446
447	if (ovector[`1`] < ovector[`0`] \|\| ovector[`0`] < start_offset)
448	{
449	rc = PCRE2_ERROR_BADSUBSPATTERN;
450	goto EXIT;
451	}
452
453	/ Check for the same match as previous. This is legitimate after matching an*
454	empty string that starts after the initial match offset. We have tried again
455	at the match point in case the pattern is one like /(?<=\G.)/ which can never
456	match at its starting point, so running the match achieves the bumpalong. If
457	we do get the same (null) match at the original match point, it isn't such a
458	pattern, so we now do the empty string magic. In all other cases, a repeat
459	match should never occur. /*
460
461	if (ovecsave[`0`] == ovector[`0`] && ovecsave[`1`] == ovector[`1`])
462	{
463	if (ovector[`0`] == ovector[`1`] && ovecsave[`2`] != start_offset)
464	{
465	goptions = PCRE2_NOTEMPTY_ATSTART \| PCRE2_ANCHORED;
466	ovecsave[`2`] = start_offset;
467	continue; / Back to the top of the loop /
468	}
469	rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
470	goto EXIT;
471	}
472
473	/ Count substitutions with a paranoid check for integer overflow; surely no*
474	real call to this function would ever hit this! /*
475
476	if (subs == INT_MAX)
477	{
478	rc = PCRE2_ERROR_TOOMANYREPLACE;
479	goto EXIT;
480	}
481	subs++;
482
483	/ Copy the text leading up to the match (unless not required), and remember*
484	where the insert begins and how many ovector pairs are set. /*
485
486	if (rc == `0`) rc = ovector_count;
487	fraglength = ovector[`0`] - start_offset;
488	if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength);
489	scb.output_offsets[`0`] = buff_offset;
490	scb.oveccount = rc;
491
492	/ Process the replacement string. If the entire replacement is literal, just*
493	copy it with length check. /*
494
495	ptr = replacement;
496	if ((suboptions & PCRE2_SUBSTITUTE_LITERAL) != `0`)
497	{
498	CHECKMEMCPY(ptr, rlength);
499	}
500
501	/ Within a non-literal replacement, which must be scanned character by*
502	character, local literal mode can be set by \Q, but only in extended mode
503	when backslashes are being interpreted. In extended mode we must handle
504	nested substrings that are to be reprocessed. /*
505
506	else for (;;)
507	{
508	uint32_t ch;
509	unsigned int chlen;
510
511	/ If at the end of a nested substring, pop the stack. /
512
513	if (ptr >= repend)
514	{
515	if (ptrstackptr == `0`) break; / End of replacement string /
516	repend = ptrstack[--ptrstackptr];
517	ptr = ptrstack[--ptrstackptr];
518	continue;
519	}
520
521	/ Handle the next character /
522
523	if (escaped_literal)
524	{
525	if (ptr[`0`] == CHAR_BACKSLASH && ptr < repend - `1` && ptr[`1`] == CHAR_E)
526	{
527	escaped_literal = FALSE;
528	ptr += `2`;
529	continue;
530	}
531	goto LOADLITERAL;
532	}
533
534	/ Not in literal mode. /
535
536	if (*ptr == CHAR_DOLLAR_SIGN)
537	{
538	int group, n;
539	uint32_t special = `0`;
540	BOOL inparens;
541	BOOL star;
542	PCRE2_SIZE sublength;
543	PCRE2_SPTR text1_start = NULL;
544	PCRE2_SPTR text1_end = NULL;
545	PCRE2_SPTR text2_start = NULL;
546	PCRE2_SPTR text2_end = NULL;
547	PCRE2_UCHAR next;
548	PCRE2_UCHAR name[`33`];
549
550	if (++ptr >= repend) goto BAD;
551	if ((next = ptr) == CHAR_DOLLAR_SIGN) goto* LOADLITERAL;
552
553	group = -`1`;
554	n = `0`;
555	inparens = FALSE;
556	star = FALSE;
557
558	if (next == CHAR_LEFT_CURLY_BRACKET)
559	{
560	if (++ptr >= repend) goto BAD;
561	next = *ptr;
562	inparens = TRUE;
563	}
564
565	if (next == CHAR_ASTERISK)
566	{
567	if (++ptr >= repend) goto BAD;
568	next = *ptr;
569	star = TRUE;
570	}
571
572	if (!star && next >= CHAR_0 && next <= CHAR_9)
573	{
574	group = next - CHAR_0;
575	while (++ptr < repend)
576	{
577	next = *ptr;
578	if (next < CHAR_0 \|\| next > CHAR_9) break;
579	group = group * `10` + next - CHAR_0;
580
581	/ A check for a number greater than the hightest captured group*
582	is sufficient here; no need for a separate overflow check. If unknown
583	groups are to be treated as unset, just skip over any remaining
584	digits and carry on. /*
585
586	if (group > code->top_bracket)
587	{
588	if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != `0`)
589	{
590	while (++ptr < repend && ptr >= CHAR_0 && ptr <= CHAR_9);
591	break;
592	}
593	else
594	{
595	rc = PCRE2_ERROR_NOSUBSTRING;
596	goto PTREXIT;
597	}
598	}
599	}
600	}
601	else
602	{
603	const uint8_t *ctypes = code->tables + ctypes_offset;
604	while (MAX_255(next) && (ctypes[next] & ctype_word) != `0`)
605	{
606	name[n++] = next;
607	if (n > `32`) goto BAD;
608	if (++ptr >= repend) break;
609	next = *ptr;
610	}
611	if (n == `0`) goto BAD;
612	name[n] = `0`;
613	}
614
615	/ In extended mode we recognize ${name:+set text:unset text} and*
616	${name:-default text}. /*
617
618	if (inparens)
619	{
620	if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != `0` &&
621	!star && ptr < repend - `2` && next == CHAR_COLON)
622	{
623	special = *(++ptr);
624	if (special != CHAR_PLUS && special != CHAR_MINUS)
625	{
626	rc = PCRE2_ERROR_BADSUBSTITUTION;
627	goto PTREXIT;
628	}
629
630	text1_start = ++ptr;
631	rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
632	if (rc != `0`) goto PTREXIT;
633	text1_end = ptr;
634
635	if (special == CHAR_PLUS && *ptr == CHAR_COLON)
636	{
637	text2_start = ++ptr;
638	rc = find_text_end(code, &ptr, repend, TRUE);
639	if (rc != `0`) goto PTREXIT;
640	text2_end = ptr;
641	}
642	}
643
644	else
645	{
646	if (ptr >= repend \|\| *ptr != CHAR_RIGHT_CURLY_BRACKET)
647	{
648	rc = PCRE2_ERROR_REPMISSINGBRACE;
649	goto PTREXIT;
650	}
651	}
652
653	ptr++;
654	}
655
656	/ Have found a syntactically correct group number or name, or name.
657	Only MARK is currently recognized. /
658
659	if (star)
660	{
661	if (PRIV(strcmp_c8)(name, STRING_MARK) == `0`)
662	{
663	PCRE2_SPTR mark = pcre2_get_mark(match_data);
664	if (mark != NULL)
665	{
666	PCRE2_SPTR mark_start = mark;
667	while (*mark != `0`) mark++;
668	fraglength = mark - mark_start;
669	CHECKMEMCPY(mark_start, fraglength);
670	}
671	}
672	else goto BAD;
673	}
674
675	/ Substitute the contents of a group. We don't use substring_copy*
676	functions any more, in order to support case forcing. /*
677
678	else
679	{
680	PCRE2_SPTR subptr, subptrend;
681
682	/ Find a number for a named group. In case there are duplicate names,*
683	search for the first one that is set. If the name is not found when
684	PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
685	non-existent group. /*
686
687	if (group < `0`)
688	{
689	PCRE2_SPTR first, last, entry;
690	rc = pcre2_substring_nametable_scan(code, name, &first, &last);
691	if (rc == PCRE2_ERROR_NOSUBSTRING &&
692	(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != `0`)
693	{
694	group = code->top_bracket + `1`;
695	}
696	else
697	{
698	if (rc < `0`) goto PTREXIT;
699	for (entry = first; entry <= last; entry += rc)
700	{
701	uint32_t ng = GET2(entry, `0`);
702	if (ng < ovector_count)
703	{
704	if (group < `0`) group = ng; / First in ovector /
705	if (ovector[ng*`2`] != PCRE2_UNSET)
706	{
707	group = ng; / First that is set /
708	break;
709	}
710	}
711	}
712
713	/ If group is still negative, it means we did not find a group*
714	that is in the ovector. Just set the first group. /*
715
716	if (group < `0`) group = GET2(first, `0`);
717	}
718	}
719
720	/ We now have a group that is identified by number. Find the length of*
721	the captured string. If a group in a non-special substitution is unset
722	when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. /*
723
724	rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
725	if (rc < `0`)
726	{
727	if (rc == PCRE2_ERROR_NOSUBSTRING &&
728	(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != `0`)
729	{
730	rc = PCRE2_ERROR_UNSET;
731	}
732	if (rc != PCRE2_ERROR_UNSET) goto PTREXIT; / Non-unset errors /
733	if (special == `0`) / Plain substitution /
734	{
735	if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != `0`) continue;
736	goto PTREXIT; / Else error /
737	}
738	}
739
740	/ If special is '+' we have a 'set' and possibly an 'unset' text,*
741	both of which are reprocessed when used. If special is '-' we have a
742	default text for when the group is unset; it must be reprocessed. /*
743
744	if (special != `0`)
745	{
746	if (special == CHAR_MINUS)
747	{
748	if (rc == `0`) goto LITERAL_SUBSTITUTE;
749	text2_start = text1_start;
750	text2_end = text1_end;
751	}
752
753	if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
754	ptrstack[ptrstackptr++] = ptr;
755	ptrstack[ptrstackptr++] = repend;
756
757	if (rc == `0`)
758	{
759	ptr = text1_start;
760	repend = text1_end;
761	}
762	else
763	{
764	ptr = text2_start;
765	repend = text2_end;
766	}
767	continue;
768	}
769
770	/ Otherwise we have a literal substitution of a group's contents. /
771
772	LITERAL_SUBSTITUTE:
773	subptr = subject + ovector[group*`2`];
774	subptrend = subject + ovector[group*`2` + `1`];
775
776	/ Substitute a literal string, possibly forcing alphabetic case. /
777
778	while (subptr < subptrend)
779	{
780	GETCHARINCTEST(ch, subptr);
781	if (forcecase != `0`)
782	{
783	#ifdef SUPPORT_UNICODE
784	if (utf \|\| ucp)
785	{
786	uint32_t type = UCD_CHARTYPE(ch);
787	if (PRIV(ucp_gentype)[type] == ucp_L &&
788	type != ((forcecase > `0`)? ucp_Lu : ucp_Ll))
789	ch = UCD_OTHERCASE(ch);
790	}
791	else
792	#endif
793	{
794	if (((code->tables + cbits_offset +
795	((forcecase > `0`)? cbit_upper:cbit_lower)
796	)[ch/`8`] & (`1u` << (ch%`8`))) == `0`)
797	ch = (code->tables + fcc_offset)[ch];
798	}
799	forcecase = forcecasereset;
800	}
801
802	#ifdef SUPPORT_UNICODE
803	if (utf) chlen = PRIV(ord2utf)(ch, temp); else
804	#endif
805	{
806	temp[`0`] = ch;
807	chlen = `1`;
808	}
809	CHECKMEMCPY(temp, chlen);
810	}
811	}
812	}
813
814	/ Handle an escape sequence in extended mode. We can use check_escape()*
815	to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
816	the case-forcing escapes are not supported in pcre2_compile() so must be
817	recognized here. /*
818
819	else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != `0` &&
820	*ptr == CHAR_BACKSLASH)
821	{
822	int errorcode;
823
824	if (ptr < repend - `1`) switch (ptr[`1`])
825	{
826	case CHAR_L:
827	forcecase = forcecasereset = -`1`;
828	ptr += `2`;
829	continue;
830
831	case CHAR_l:
832	forcecase = -`1`;
833	forcecasereset = `0`;
834	ptr += `2`;
835	continue;
836
837	case CHAR_U:
838	forcecase = forcecasereset = `1`;
839	ptr += `2`;
840	continue;
841
842	case CHAR_u:
843	forcecase = `1`;
844	forcecasereset = `0`;
845	ptr += `2`;
846	continue;
847
848	default:
849	break;
850	}
851
852	ptr++; / Point after \ /
853	rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
854	code->overall_options, code->extra_options, FALSE, NULL);
855	if (errorcode != `0`) goto BADESCAPE;
856
857	switch(rc)
858	{
859	case ESC_E:
860	forcecase = forcecasereset = `0`;
861	continue;
862
863	case ESC_Q:
864	escaped_literal = TRUE;
865	continue;
866
867	case `0`: / Data character /
868	goto LITERAL;
869
870	default:
871	goto BADESCAPE;
872	}
873	}
874
875	/ Handle a literal code unit /
876
877	else
878	{
879	LOADLITERAL:
880	GETCHARINCTEST(ch, ptr); / Get character value, increment pointer /
881
882	LITERAL:
883	if (forcecase != `0`)
884	{
885	#ifdef SUPPORT_UNICODE
886	if (utf \|\| ucp)
887	{
888	uint32_t type = UCD_CHARTYPE(ch);
889	if (PRIV(ucp_gentype)[type] == ucp_L &&
890	type != ((forcecase > `0`)? ucp_Lu : ucp_Ll))
891	ch = UCD_OTHERCASE(ch);
892	}
893	else
894	#endif
895	{
896	if (((code->tables + cbits_offset +
897	((forcecase > `0`)? cbit_upper:cbit_lower)
898	)[ch/`8`] & (`1u` << (ch%`8`))) == `0`)
899	ch = (code->tables + fcc_offset)[ch];
900	}
901	forcecase = forcecasereset;
902	}
903
904	#ifdef SUPPORT_UNICODE
905	if (utf) chlen = PRIV(ord2utf)(ch, temp); else
906	#endif
907	{
908	temp[`0`] = ch;
909	chlen = `1`;
910	}
911	CHECKMEMCPY(temp, chlen);
912	} / End handling a literal code unit /
913	} / End of loop for scanning the replacement. /
914
915	/ The replacement has been copied to the output, or its size has been*
916	remembered. Do the callout if there is one and we have done an actual
917	replacement. /*
918
919	if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL)
920	{
921	scb.subscount = subs;
922	scb.output_offsets[`1`] = buff_offset;
923	rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
924
925	/ A non-zero return means cancel this substitution. Instead, copy the*
926	matched string fragment. /*
927
928	if (rc != `0`)
929	{
930	PCRE2_SIZE newlength = scb.output_offsets[`1`] - scb.output_offsets[`0`];
931	PCRE2_SIZE oldlength = ovector[`1`] - ovector[`0`];
932
933	buff_offset -= newlength;
934	lengthleft += newlength;
935	if (!replacement_only) CHECKMEMCPY(subject + ovector[`0`], oldlength);
936
937	/ A negative return means do not do any more. /
938
939	if (rc < `0`) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL);
940	}
941	}
942
943	/ Save the details of this match. See above for how this data is used. If we*
944	matched an empty string, do the magic for global matches. Update the start
945	offset to point to the rest of the subject string. If we re-used an existing
946	match for the first match, switch to the internal match data block. /*
947
948	ovecsave[`0`] = ovector[`0`];
949	ovecsave[`1`] = ovector[`1`];
950	ovecsave[`2`] = start_offset;
951
952	goptions = (ovector[`0`] != ovector[`1`] \|\| ovector[`0`] > start_offset)? `0` :
953	PCRE2_ANCHORED\|PCRE2_NOTEMPTY_ATSTART;
954	start_offset = ovector[`1`];
955	} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != `0`); / Repeat "do" loop /
956
957	/ Copy the rest of the subject unless not required, and terminate the output*
958	with a binary zero. /*
959
960	if (!replacement_only)
961	{
962	fraglength = length - start_offset;
963	CHECKMEMCPY(subject + start_offset, fraglength);
964	}
965
966	temp[`0`] = `0`;
967	CHECKMEMCPY(temp, `1`);
968
969	/ If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,*
970	and matching has carried on after a full buffer, in order to compute the length
971	needed. Otherwise, an overflow generates an immediate error return. /*
972
973	if (overflowed)
974	{
975	rc = PCRE2_ERROR_NOMEMORY;
976	*blength = buff_length + extra_needed;
977	}
978
979	/ After a successful execution, return the number of substitutions and set the*
980	length of buffer used, excluding the trailing zero. /*
981
982	else
983	{
984	rc = subs;
985	*blength = buff_offset - `1`;
986	}
987
988	EXIT:
989	if (internal_match_data != NULL) pcre2_match_data_free(internal_match_data);
990	else match_data->rc = rc;
991	return rc;
992
993	NOROOM:
994	rc = PCRE2_ERROR_NOMEMORY;
995	goto EXIT;
996
997	BAD:
998	rc = PCRE2_ERROR_BADREPLACEMENT;
999	goto PTREXIT;
1000
1001	BADESCAPE:
1002	rc = PCRE2_ERROR_BADREPESCAPE;
1003
1004	PTREXIT:
1005	*blength = (PCRE2_SIZE)(ptr - replacement);
1006	goto EXIT;
1007	}
1008
1009	/ End of pcre2_substitute.c /
1010

Browse the source code of Godot/thirdparty/pcre2/src/pcre2_substitute.c