pcre2_substitute.c source code [Qt/src/3rdparty/pcre2/src/pcre2_substitute.c]

1	/*************************************************
2	* Perl-Compatible Regular Expressions *
3	*************************************************/
4
5	/ PCRE is a library of functions to support regular expressions whose syntax*
6	and semantics are as close as possible to those of the Perl 5 language.
7
8	Written by Philip Hazel
9	Original API code Copyright (c) 1997-2012 University of Cambridge
10	New API code Copyright (c) 2016-2020 University of Cambridge
11
12	-----------------------------------------------------------------------------
13	Redistribution and use in source and binary forms, with or without
14	modification, are permitted provided that the following conditions are met:
15
16	* Redistributions of source code must retain the above copyright notice,
17	this list of conditions and the following disclaimer.
18
19	* Redistributions in binary form must reproduce the above copyright
20	notice, this list of conditions and the following disclaimer in the
21	documentation and/or other materials provided with the distribution.
22
23	* Neither the name of the University of Cambridge nor the names of its
24	contributors may be used to endorse or promote products derived from
25	this software without specific prior written permission.
26
27	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28	AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29	IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30	ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31	LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32	CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33	SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34	INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35	CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36	ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37	POSSIBILITY OF SUCH DAMAGE.
38	-----------------------------------------------------------------------------
39	*/
40
41
42	#ifdef HAVE_CONFIG_H
43	#include "config.h"
44	#endif
45
46	#include "pcre2_internal.h"
47
48	#define PTR_STACK_SIZE 20
49
50	#define SUBSTITUTE_OPTIONS \
51	(PCRE2_SUBSTITUTE_EXTENDED\|PCRE2_SUBSTITUTE_GLOBAL\| \
52	PCRE2_SUBSTITUTE_LITERAL\|PCRE2_SUBSTITUTE_MATCHED\| \
53	PCRE2_SUBSTITUTE_OVERFLOW_LENGTH\|PCRE2_SUBSTITUTE_REPLACEMENT_ONLY\| \
54	PCRE2_SUBSTITUTE_UNKNOWN_UNSET\|PCRE2_SUBSTITUTE_UNSET_EMPTY)
55
56
57
58	/*************************************************
59	* Find end of substitute text *
60	*************************************************/
61
62	/ In extended mode, we recognize ${name:+set text:unset text} and similar*
63	constructions. This requires the identification of unescaped : and }
64	characters. This function scans for such. It must deal with nested ${
65	constructions. The pointer to the text is updated, either to the required end
66	character, or to where an error was detected.
67
68	Arguments:
69	code points to the compiled expression (for options)
70	ptrptr points to the pointer to the start of the text (updated)
71	ptrend end of the whole string
72	last TRUE if the last expected string (only } recognized)
73
74	Returns: 0 on success
75	negative error code on failure
76	*/
77
78	static int
79	find_text_end(const pcre2_code code, PCRE2_SPTR ptrptr, PCRE2_SPTR ptrend,
80	BOOL last)
81	{
82	int rc = `0`;
83	uint32_t nestlevel = `0`;
84	BOOL literal = FALSE;
85	PCRE2_SPTR ptr = *ptrptr;
86
87	for (; ptr < ptrend; ptr++)
88	{
89	if (literal)
90	{
91	if (ptr[`0`] == CHAR_BACKSLASH && ptr < ptrend - `1` && ptr[`1`] == CHAR_E)
92	{
93	literal = FALSE;
94	ptr += `1`;
95	}
96	}
97
98	else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
99	{
100	if (nestlevel == `0`) goto EXIT;
101	nestlevel--;
102	}
103
104	else if (ptr == CHAR_COLON && !last && nestlevel == `0`) goto* EXIT;
105
106	else if (*ptr == CHAR_DOLLAR_SIGN)
107	{
108	if (ptr < ptrend - `1` && ptr[`1`] == CHAR_LEFT_CURLY_BRACKET)
109	{
110	nestlevel++;
111	ptr += `1`;
112	}
113	}
114
115	else if (*ptr == CHAR_BACKSLASH)
116	{
117	int erc;
118	int errorcode;
119	uint32_t ch;
120
121	if (ptr < ptrend - `1`) switch (ptr[`1`])
122	{
123	case CHAR_L:
124	case CHAR_l:
125	case CHAR_U:
126	case CHAR_u:
127	ptr += `1`;
128	continue;
129	}
130
131	ptr += `1`; / Must point after \ /
132	erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
133	code->overall_options, code->extra_options, FALSE, NULL);
134	ptr -= `1`; / Back to last code unit of escape /
135	if (errorcode != `0`)
136	{
137	rc = errorcode;
138	goto EXIT;
139	}
140
141	switch(erc)
142	{
143	case `0`: / Data character /
144	case ESC_E: / Isolated \E is ignored /
145	break;
146
147	case ESC_Q:
148	literal = TRUE;
149	break;
150
151	default:
152	rc = PCRE2_ERROR_BADREPESCAPE;
153	goto EXIT;
154	}
155	}
156	}
157
158	rc = PCRE2_ERROR_REPMISSINGBRACE; / Terminator not found /
159
160	EXIT:
161	*ptrptr = ptr;
162	return rc;
163	}
164
165
166
167	/*************************************************
168	* Match and substitute *
169	*************************************************/
170
171	/ This function applies a compiled re to a subject string and creates a new*
172	string with substitutions. The first 7 arguments are the same as for
173	pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
174
175	Arguments:
176	code points to the compiled expression
177	subject points to the subject string
178	length length of subject string (may contain binary zeros)
179	start_offset where to start in the subject string
180	options option bits
181	match_data points to a match_data block, or is NULL
182	context points a PCRE2 context
183	replacement points to the replacement string
184	rlength length of replacement string
185	buffer where to put the substituted string
186	blength points to length of buffer; updated to length of string
187
188	Returns: >= 0 number of substitutions made
189	< 0 an error code
190	PCRE2_ERROR_BADREPLACEMENT means invalid use of $
191	*/
192
193	/ This macro checks for space in the buffer before copying into it. On*
194	overflow, either give an error immediately, or keep on, accumulating the
195	length. /*
196
197	#define CHECKMEMCPY(from,length) \
198	{ \
199	if (!overflowed && lengthleft < length) \
200	{ \
201	if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
202	overflowed = TRUE; \
203	extra_needed = length - lengthleft; \
204	} \
205	else if (overflowed) \
206	{ \
207	extra_needed += length; \
208	} \
209	else \
210	{ \
211	memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
212	buff_offset += length; \
213	lengthleft -= length; \
214	} \
215	}
216
217	/ Here's the function /
218
219	PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
220	pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
221	PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
222	pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
223	PCRE2_UCHAR buffer, PCRE2_SIZE blength)
224	{
225	int rc;
226	int subs;
227	int forcecase = `0`;
228	int forcecasereset = `0`;
229	uint32_t ovector_count;
230	uint32_t goptions = `0`;
231	uint32_t suboptions;
232	pcre2_match_data *internal_match_data = NULL;
233	BOOL escaped_literal = FALSE;
234	BOOL overflowed = FALSE;
235	BOOL use_existing_match;
236	BOOL replacement_only;
237	#ifdef SUPPORT_UNICODE
238	BOOL utf = (code->overall_options & PCRE2_UTF) != `0`;
239	BOOL ucp = (code->overall_options & PCRE2_UCP) != `0`;
240	#endif
241	PCRE2_UCHAR temp[`6`];
242	PCRE2_SPTR ptr;
243	PCRE2_SPTR repend;
244	PCRE2_SIZE extra_needed = `0`;
245	PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
246	PCRE2_SIZE *ovector;
247	PCRE2_SIZE ovecsave[`3`];
248	pcre2_substitute_callout_block scb;
249
250	/ General initialization /
251
252	buff_offset = `0`;
253	lengthleft = buff_length = *blength;
254	*blength = PCRE2_UNSET;
255	ovecsave[`0`] = ovecsave[`1`] = ovecsave[`2`] = PCRE2_UNSET;
256
257	/ Partial matching is not valid. This must come after setting blength to
258	PCRE2_UNSET, so as not to imply an offset in the replacement. /*
259
260	if ((options & (PCRE2_PARTIAL_HARD\|PCRE2_PARTIAL_SOFT)) != `0`)
261	return PCRE2_ERROR_BADOPTION;
262
263	/ Check for using a match that has already happened. Note that the subject*
264	pointer in the match data may be NULL after a no-match. /*
265
266	use_existing_match = ((options & PCRE2_SUBSTITUTE_MATCHED) != `0`);
267	replacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != `0`);
268
269	/ If starting from an existing match, there must be an externally provided*
270	match data block. We create an internal match_data block in two cases: (a) an
271	external one is not supplied (and we are not starting from an existing match);
272	(b) an existing match is to be used for the first substitution. In the latter
273	case, we copy the existing match into the internal block. This ensures that no
274	changes are made to the existing match data block. /*
275
276	if (match_data == NULL)
277	{
278	pcre2_general_context *gcontext;
279	if (use_existing_match) return PCRE2_ERROR_NULL;
280	gcontext = (mcontext == NULL)?
281	(pcre2_general_context *)code :
282	(pcre2_general_context *)mcontext;
283	match_data = internal_match_data =
284	pcre2_match_data_create_from_pattern(code, gcontext);
285	if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
286	}
287
288	else if (use_existing_match)
289	{
290	pcre2_general_context *gcontext = (mcontext == NULL)?
291	(pcre2_general_context *)code :
292	(pcre2_general_context *)mcontext;
293	int pairs = (code->top_bracket + `1` < match_data->oveccount)?
294	code->top_bracket + `1` : match_data->oveccount;
295	internal_match_data = pcre2_match_data_create(match_data->oveccount,
296	gcontext);
297	if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
298	memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector)
299	+ `2`pairssizeof(PCRE2_SIZE));
300	match_data = internal_match_data;
301	}
302
303	/ Remember ovector details /
304
305	ovector = pcre2_get_ovector_pointer(match_data);
306	ovector_count = pcre2_get_ovector_count(match_data);
307
308	/ Fixed things in the callout block /
309
310	scb.version = `0`;
311	scb.input = subject;
312	scb.output = (PCRE2_SPTR)buffer;
313	scb.ovector = ovector;
314
315	/ Find lengths of zero-terminated strings and the end of the replacement. /
316
317	if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
318	if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
319	repend = replacement + rlength;
320
321	/ Check UTF replacement string if necessary. /
322
323	#ifdef SUPPORT_UNICODE
324	if (utf && (options & PCRE2_NO_UTF_CHECK) == `0`)
325	{
326	rc = PRIV(valid_utf)(replacement, rlength, &(match_data->startchar));
327	if (rc != `0`)
328	{
329	match_data->leftchar = `0`;
330	goto EXIT;
331	}
332	}
333	#endif /* SUPPORT_UNICODE */
334
335	/ Save the substitute options and remove them from the match options. /
336
337	suboptions = options & SUBSTITUTE_OPTIONS;
338	options &= ~SUBSTITUTE_OPTIONS;
339
340	/ Error if the start match offset is greater than the length of the subject. /
341
342	if (start_offset > length)
343	{
344	match_data->leftchar = `0`;
345	rc = PCRE2_ERROR_BADOFFSET;
346	goto EXIT;
347	}
348
349	/ Copy up to the start offset, unless only the replacement is required. /
350
351	if (!replacement_only) CHECKMEMCPY(subject, start_offset);
352
353	/ Loop for global substituting. If PCRE2_SUBSTITUTE_MATCHED is set, the first*
354	match is taken from the match_data that was passed in. /*
355
356	subs = `0`;
357	do
358	{
359	PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
360	uint32_t ptrstackptr = `0`;
361
362	if (use_existing_match)
363	{
364	rc = match_data->rc;
365	use_existing_match = FALSE;
366	}
367	else rc = pcre2_match(code, subject, length, start_offset, options\|goptions,
368	match_data, mcontext);
369
370	#ifdef SUPPORT_UNICODE
371	if (utf) options \|= PCRE2_NO_UTF_CHECK; / Only need to check once /
372	#endif
373
374	/ Any error other than no match returns the error code. No match when not*
375	doing the special after-empty-match global rematch, or when at the end of the
376	subject, breaks the global loop. Otherwise, advance the starting point by one
377	character, copying it to the output, and try again. /*
378
379	if (rc < `0`)
380	{
381	PCRE2_SIZE save_start;
382
383	if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
384	if (goptions == `0` \|\| start_offset >= length) break;
385
386	/ Advance by one code point. Then, if CRLF is a valid newline sequence and*
387	we have advanced into the middle of it, advance one more code point. In
388	other words, do not start in the middle of CRLF, even if CR and LF on their
389	own are valid newlines. /*
390
391	save_start = start_offset++;
392	if (subject[start_offset-`1`] == CHAR_CR &&
393	code->newline_convention != PCRE2_NEWLINE_CR &&
394	code->newline_convention != PCRE2_NEWLINE_LF &&
395	start_offset < length &&
396	subject[start_offset] == CHAR_LF)
397	start_offset++;
398
399	/ Otherwise, in UTF mode, advance past any secondary code points. /
400
401	else if ((code->overall_options & PCRE2_UTF) != `0`)
402	{
403	#if PCRE2_CODE_UNIT_WIDTH == 8
404	while (start_offset < length && (subject[start_offset] & `0xc0`) == `0x80`)
405	start_offset++;
406	#elif PCRE2_CODE_UNIT_WIDTH == 16
407	while (start_offset < length &&
408	(subject[start_offset] & `0xfc00`) == `0xdc00`)
409	start_offset++;
410	#endif
411	}
412
413	/ Copy what we have advanced past (unless not required), reset the special*
414	global options, and continue to the next match. /*
415
416	fraglength = start_offset - save_start;
417	if (!replacement_only) CHECKMEMCPY(subject + save_start, fraglength);
418	goptions = `0`;
419	continue;
420	}
421
422	/ Handle a successful match. Matches that use \K to end before they start*
423	or start before the current point in the subject are not supported. /*
424
425	if (ovector[`1`] < ovector[`0`] \|\| ovector[`0`] < start_offset)
426	{
427	rc = PCRE2_ERROR_BADSUBSPATTERN;
428	goto EXIT;
429	}
430
431	/ Check for the same match as previous. This is legitimate after matching an*
432	empty string that starts after the initial match offset. We have tried again
433	at the match point in case the pattern is one like /(?<=\G.)/ which can never
434	match at its starting point, so running the match achieves the bumpalong. If
435	we do get the same (null) match at the original match point, it isn't such a
436	pattern, so we now do the empty string magic. In all other cases, a repeat
437	match should never occur. /*
438
439	if (ovecsave[`0`] == ovector[`0`] && ovecsave[`1`] == ovector[`1`])
440	{
441	if (ovector[`0`] == ovector[`1`] && ovecsave[`2`] != start_offset)
442	{
443	goptions = PCRE2_NOTEMPTY_ATSTART \| PCRE2_ANCHORED;
444	ovecsave[`2`] = start_offset;
445	continue; / Back to the top of the loop /
446	}
447	rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
448	goto EXIT;
449	}
450
451	/ Count substitutions with a paranoid check for integer overflow; surely no*
452	real call to this function would ever hit this! /*
453
454	if (subs == INT_MAX)
455	{
456	rc = PCRE2_ERROR_TOOMANYREPLACE;
457	goto EXIT;
458	}
459	subs++;
460
461	/ Copy the text leading up to the match (unless not required), and remember*
462	where the insert begins and how many ovector pairs are set. /*
463
464	if (rc == `0`) rc = ovector_count;
465	fraglength = ovector[`0`] - start_offset;
466	if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength);
467	scb.output_offsets[`0`] = buff_offset;
468	scb.oveccount = rc;
469
470	/ Process the replacement string. If the entire replacement is literal, just*
471	copy it with length check. /*
472
473	ptr = replacement;
474	if ((suboptions & PCRE2_SUBSTITUTE_LITERAL) != `0`)
475	{
476	CHECKMEMCPY(ptr, rlength);
477	}
478
479	/ Within a non-literal replacement, which must be scanned character by*
480	character, local literal mode can be set by \Q, but only in extended mode
481	when backslashes are being interpreted. In extended mode we must handle
482	nested substrings that are to be reprocessed. /*
483
484	else for (;;)
485	{
486	uint32_t ch;
487	unsigned int chlen;
488
489	/ If at the end of a nested substring, pop the stack. /
490
491	if (ptr >= repend)
492	{
493	if (ptrstackptr == `0`) break; / End of replacement string /
494	repend = ptrstack[--ptrstackptr];
495	ptr = ptrstack[--ptrstackptr];
496	continue;
497	}
498
499	/ Handle the next character /
500
501	if (escaped_literal)
502	{
503	if (ptr[`0`] == CHAR_BACKSLASH && ptr < repend - `1` && ptr[`1`] == CHAR_E)
504	{
505	escaped_literal = FALSE;
506	ptr += `2`;
507	continue;
508	}
509	goto LOADLITERAL;
510	}
511
512	/ Not in literal mode. /
513
514	if (*ptr == CHAR_DOLLAR_SIGN)
515	{
516	int group, n;
517	uint32_t special = `0`;
518	BOOL inparens;
519	BOOL star;
520	PCRE2_SIZE sublength;
521	PCRE2_SPTR text1_start = NULL;
522	PCRE2_SPTR text1_end = NULL;
523	PCRE2_SPTR text2_start = NULL;
524	PCRE2_SPTR text2_end = NULL;
525	PCRE2_UCHAR next;
526	PCRE2_UCHAR name[`33`];
527
528	if (++ptr >= repend) goto BAD;
529	if ((next = ptr) == CHAR_DOLLAR_SIGN) goto* LOADLITERAL;
530
531	group = -`1`;
532	n = `0`;
533	inparens = FALSE;
534	star = FALSE;
535
536	if (next == CHAR_LEFT_CURLY_BRACKET)
537	{
538	if (++ptr >= repend) goto BAD;
539	next = *ptr;
540	inparens = TRUE;
541	}
542
543	if (next == CHAR_ASTERISK)
544	{
545	if (++ptr >= repend) goto BAD;
546	next = *ptr;
547	star = TRUE;
548	}
549
550	if (!star && next >= CHAR_0 && next <= CHAR_9)
551	{
552	group = next - CHAR_0;
553	while (++ptr < repend)
554	{
555	next = *ptr;
556	if (next < CHAR_0 \|\| next > CHAR_9) break;
557	group = group * `10` + next - CHAR_0;
558
559	/ A check for a number greater than the hightest captured group*
560	is sufficient here; no need for a separate overflow check. If unknown
561	groups are to be treated as unset, just skip over any remaining
562	digits and carry on. /*
563
564	if (group > code->top_bracket)
565	{
566	if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != `0`)
567	{
568	while (++ptr < repend && ptr >= CHAR_0 && ptr <= CHAR_9);
569	break;
570	}
571	else
572	{
573	rc = PCRE2_ERROR_NOSUBSTRING;
574	goto PTREXIT;
575	}
576	}
577	}
578	}
579	else
580	{
581	const uint8_t *ctypes = code->tables + ctypes_offset;
582	while (MAX_255(next) && (ctypes[next] & ctype_word) != `0`)
583	{
584	name[n++] = next;
585	if (n > `32`) goto BAD;
586	if (++ptr >= repend) break;
587	next = *ptr;
588	}
589	if (n == `0`) goto BAD;
590	name[n] = `0`;
591	}
592
593	/ In extended mode we recognize ${name:+set text:unset text} and*
594	${name:-default text}. /*
595
596	if (inparens)
597	{
598	if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != `0` &&
599	!star && ptr < repend - `2` && next == CHAR_COLON)
600	{
601	special = *(++ptr);
602	if (special != CHAR_PLUS && special != CHAR_MINUS)
603	{
604	rc = PCRE2_ERROR_BADSUBSTITUTION;
605	goto PTREXIT;
606	}
607
608	text1_start = ++ptr;
609	rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
610	if (rc != `0`) goto PTREXIT;
611	text1_end = ptr;
612
613	if (special == CHAR_PLUS && *ptr == CHAR_COLON)
614	{
615	text2_start = ++ptr;
616	rc = find_text_end(code, &ptr, repend, TRUE);
617	if (rc != `0`) goto PTREXIT;
618	text2_end = ptr;
619	}
620	}
621
622	else
623	{
624	if (ptr >= repend \|\| *ptr != CHAR_RIGHT_CURLY_BRACKET)
625	{
626	rc = PCRE2_ERROR_REPMISSINGBRACE;
627	goto PTREXIT;
628	}
629	}
630
631	ptr++;
632	}
633
634	/ Have found a syntactically correct group number or name, or name.
635	Only MARK is currently recognized. /
636
637	if (star)
638	{
639	if (PRIV(strcmp_c8)(name, STRING_MARK) == `0`)
640	{
641	PCRE2_SPTR mark = pcre2_get_mark(match_data);
642	if (mark != NULL)
643	{
644	PCRE2_SPTR mark_start = mark;
645	while (*mark != `0`) mark++;
646	fraglength = mark - mark_start;
647	CHECKMEMCPY(mark_start, fraglength);
648	}
649	}
650	else goto BAD;
651	}
652
653	/ Substitute the contents of a group. We don't use substring_copy*
654	functions any more, in order to support case forcing. /*
655
656	else
657	{
658	PCRE2_SPTR subptr, subptrend;
659
660	/ Find a number for a named group. In case there are duplicate names,*
661	search for the first one that is set. If the name is not found when
662	PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
663	non-existent group. /*
664
665	if (group < `0`)
666	{
667	PCRE2_SPTR first, last, entry;
668	rc = pcre2_substring_nametable_scan(code, name, &first, &last);
669	if (rc == PCRE2_ERROR_NOSUBSTRING &&
670	(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != `0`)
671	{
672	group = code->top_bracket + `1`;
673	}
674	else
675	{
676	if (rc < `0`) goto PTREXIT;
677	for (entry = first; entry <= last; entry += rc)
678	{
679	uint32_t ng = GET2(entry, `0`);
680	if (ng < ovector_count)
681	{
682	if (group < `0`) group = ng; / First in ovector /
683	if (ovector[ng*`2`] != PCRE2_UNSET)
684	{
685	group = ng; / First that is set /
686	break;
687	}
688	}
689	}
690
691	/ If group is still negative, it means we did not find a group*
692	that is in the ovector. Just set the first group. /*
693
694	if (group < `0`) group = GET2(first, `0`);
695	}
696	}
697
698	/ We now have a group that is identified by number. Find the length of*
699	the captured string. If a group in a non-special substitution is unset
700	when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. /*
701
702	rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
703	if (rc < `0`)
704	{
705	if (rc == PCRE2_ERROR_NOSUBSTRING &&
706	(suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != `0`)
707	{
708	rc = PCRE2_ERROR_UNSET;
709	}
710	if (rc != PCRE2_ERROR_UNSET) goto PTREXIT; / Non-unset errors /
711	if (special == `0`) / Plain substitution /
712	{
713	if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != `0`) continue;
714	goto PTREXIT; / Else error /
715	}
716	}
717
718	/ If special is '+' we have a 'set' and possibly an 'unset' text,*
719	both of which are reprocessed when used. If special is '-' we have a
720	default text for when the group is unset; it must be reprocessed. /*
721
722	if (special != `0`)
723	{
724	if (special == CHAR_MINUS)
725	{
726	if (rc == `0`) goto LITERAL_SUBSTITUTE;
727	text2_start = text1_start;
728	text2_end = text1_end;
729	}
730
731	if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
732	ptrstack[ptrstackptr++] = ptr;
733	ptrstack[ptrstackptr++] = repend;
734
735	if (rc == `0`)
736	{
737	ptr = text1_start;
738	repend = text1_end;
739	}
740	else
741	{
742	ptr = text2_start;
743	repend = text2_end;
744	}
745	continue;
746	}
747
748	/ Otherwise we have a literal substitution of a group's contents. /
749
750	LITERAL_SUBSTITUTE:
751	subptr = subject + ovector[group*`2`];
752	subptrend = subject + ovector[group*`2` + `1`];
753
754	/ Substitute a literal string, possibly forcing alphabetic case. /
755
756	while (subptr < subptrend)
757	{
758	GETCHARINCTEST(ch, subptr);
759	if (forcecase != `0`)
760	{
761	#ifdef SUPPORT_UNICODE
762	if (utf \|\| ucp)
763	{
764	uint32_t type = UCD_CHARTYPE(ch);
765	if (PRIV(ucp_gentype)[type] == ucp_L &&
766	type != ((forcecase > `0`)? ucp_Lu : ucp_Ll))
767	ch = UCD_OTHERCASE(ch);
768	}
769	else
770	#endif
771	{
772	if (((code->tables + cbits_offset +
773	((forcecase > `0`)? cbit_upper:cbit_lower)
774	)[ch/`8`] & (`1u` << (ch%`8`))) == `0`)
775	ch = (code->tables + fcc_offset)[ch];
776	}
777	forcecase = forcecasereset;
778	}
779
780	#ifdef SUPPORT_UNICODE
781	if (utf) chlen = PRIV(ord2utf)(ch, temp); else
782	#endif
783	{
784	temp[`0`] = ch;
785	chlen = `1`;
786	}
787	CHECKMEMCPY(temp, chlen);
788	}
789	}
790	}
791
792	/ Handle an escape sequence in extended mode. We can use check_escape()*
793	to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
794	the case-forcing escapes are not supported in pcre2_compile() so must be
795	recognized here. /*
796
797	else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != `0` &&
798	*ptr == CHAR_BACKSLASH)
799	{
800	int errorcode;
801
802	if (ptr < repend - `1`) switch (ptr[`1`])
803	{
804	case CHAR_L:
805	forcecase = forcecasereset = -`1`;
806	ptr += `2`;
807	continue;
808
809	case CHAR_l:
810	forcecase = -`1`;
811	forcecasereset = `0`;
812	ptr += `2`;
813	continue;
814
815	case CHAR_U:
816	forcecase = forcecasereset = `1`;
817	ptr += `2`;
818	continue;
819
820	case CHAR_u:
821	forcecase = `1`;
822	forcecasereset = `0`;
823	ptr += `2`;
824	continue;
825
826	default:
827	break;
828	}
829
830	ptr++; / Point after \ /
831	rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
832	code->overall_options, code->extra_options, FALSE, NULL);
833	if (errorcode != `0`) goto BADESCAPE;
834
835	switch(rc)
836	{
837	case ESC_E:
838	forcecase = forcecasereset = `0`;
839	continue;
840
841	case ESC_Q:
842	escaped_literal = TRUE;
843	continue;
844
845	case `0`: / Data character /
846	goto LITERAL;
847
848	default:
849	goto BADESCAPE;
850	}
851	}
852
853	/ Handle a literal code unit /
854
855	else
856	{
857	LOADLITERAL:
858	GETCHARINCTEST(ch, ptr); / Get character value, increment pointer /
859
860	LITERAL:
861	if (forcecase != `0`)
862	{
863	#ifdef SUPPORT_UNICODE
864	if (utf \|\| ucp)
865	{
866	uint32_t type = UCD_CHARTYPE(ch);
867	if (PRIV(ucp_gentype)[type] == ucp_L &&
868	type != ((forcecase > `0`)? ucp_Lu : ucp_Ll))
869	ch = UCD_OTHERCASE(ch);
870	}
871	else
872	#endif
873	{
874	if (((code->tables + cbits_offset +
875	((forcecase > `0`)? cbit_upper:cbit_lower)
876	)[ch/`8`] & (`1u` << (ch%`8`))) == `0`)
877	ch = (code->tables + fcc_offset)[ch];
878	}
879	forcecase = forcecasereset;
880	}
881
882	#ifdef SUPPORT_UNICODE
883	if (utf) chlen = PRIV(ord2utf)(ch, temp); else
884	#endif
885	{
886	temp[`0`] = ch;
887	chlen = `1`;
888	}
889	CHECKMEMCPY(temp, chlen);
890	} / End handling a literal code unit /
891	} / End of loop for scanning the replacement. /
892
893	/ The replacement has been copied to the output, or its size has been*
894	remembered. Do the callout if there is one and we have done an actual
895	replacement. /*
896
897	if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL)
898	{
899	scb.subscount = subs;
900	scb.output_offsets[`1`] = buff_offset;
901	rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
902
903	/ A non-zero return means cancel this substitution. Instead, copy the*
904	matched string fragment. /*
905
906	if (rc != `0`)
907	{
908	PCRE2_SIZE newlength = scb.output_offsets[`1`] - scb.output_offsets[`0`];
909	PCRE2_SIZE oldlength = ovector[`1`] - ovector[`0`];
910
911	buff_offset -= newlength;
912	lengthleft += newlength;
913	if (!replacement_only) CHECKMEMCPY(subject + ovector[`0`], oldlength);
914
915	/ A negative return means do not do any more. /
916
917	if (rc < `0`) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL);
918	}
919	}
920
921	/ Save the details of this match. See above for how this data is used. If we*
922	matched an empty string, do the magic for global matches. Update the start
923	offset to point to the rest of the subject string. If we re-used an existing
924	match for the first match, switch to the internal match data block. /*
925
926	ovecsave[`0`] = ovector[`0`];
927	ovecsave[`1`] = ovector[`1`];
928	ovecsave[`2`] = start_offset;
929
930	goptions = (ovector[`0`] != ovector[`1`] \|\| ovector[`0`] > start_offset)? `0` :
931	PCRE2_ANCHORED\|PCRE2_NOTEMPTY_ATSTART;
932	start_offset = ovector[`1`];
933	} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != `0`); / Repeat "do" loop /
934
935	/ Copy the rest of the subject unless not required, and terminate the output*
936	with a binary zero. /*
937
938	if (!replacement_only)
939	{
940	fraglength = length - start_offset;
941	CHECKMEMCPY(subject + start_offset, fraglength);
942	}
943
944	temp[`0`] = `0`;
945	CHECKMEMCPY(temp, `1`);
946
947	/ If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,*
948	and matching has carried on after a full buffer, in order to compute the length
949	needed. Otherwise, an overflow generates an immediate error return. /*
950
951	if (overflowed)
952	{
953	rc = PCRE2_ERROR_NOMEMORY;
954	*blength = buff_length + extra_needed;
955	}
956
957	/ After a successful execution, return the number of substitutions and set the*
958	length of buffer used, excluding the trailing zero. /*
959
960	else
961	{
962	rc = subs;
963	*blength = buff_offset - `1`;
964	}
965
966	EXIT:
967	if (internal_match_data != NULL) pcre2_match_data_free(internal_match_data);
968	else match_data->rc = rc;
969	return rc;
970
971	NOROOM:
972	rc = PCRE2_ERROR_NOMEMORY;
973	goto EXIT;
974
975	BAD:
976	rc = PCRE2_ERROR_BADREPLACEMENT;
977	goto PTREXIT;
978
979	BADESCAPE:
980	rc = PCRE2_ERROR_BADREPESCAPE;
981
982	PTREXIT:
983	*blength = (PCRE2_SIZE)(ptr - replacement);
984	goto EXIT;
985	}
986
987	/ End of pcre2_substitute.c /
988

Browse the source code of Qt/src/3rdparty/pcre2/src/pcre2_substitute.c