1/****************************************************************************
2**
3** Copyright (C) 2020 Giuseppe D'Angelo <dangelog@gmail.com>.
4** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
5** Copyright (C) 2016 The Qt Company Ltd.
6** Contact: https://www.qt.io/licensing/
7**
8** This file is part of the QtCore module of the Qt Toolkit.
9**
10** $QT_BEGIN_LICENSE:LGPL$
11** Commercial License Usage
12** Licensees holding valid commercial Qt licenses may use this file in
13** accordance with the commercial license agreement provided with the
14** Software or, alternatively, in accordance with the terms contained in
15** a written agreement between you and The Qt Company. For licensing terms
16** and conditions see https://www.qt.io/terms-conditions. For further
17** information use the contact form at https://www.qt.io/contact-us.
18**
19** GNU Lesser General Public License Usage
20** Alternatively, this file may be used under the terms of the GNU Lesser
21** General Public License version 3 as published by the Free Software
22** Foundation and appearing in the file LICENSE.LGPL3 included in the
23** packaging of this file. Please review the following information to
24** ensure the GNU Lesser General Public License version 3 requirements
25** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
26**
27** GNU General Public License Usage
28** Alternatively, this file may be used under the terms of the GNU
29** General Public License version 2.0 or (at your option) the GNU General
30** Public license version 3 or any later version approved by the KDE Free
31** Qt Foundation. The licenses are as published by the Free Software
32** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
33** included in the packaging of this file. Please review the following
34** information to ensure the GNU General Public License requirements will
35** be met: https://www.gnu.org/licenses/gpl-2.0.html and
36** https://www.gnu.org/licenses/gpl-3.0.html.
37**
38** $QT_END_LICENSE$
39**
40****************************************************************************/
41
42#include "qregularexpression.h"
43
44#include <QtCore/qcoreapplication.h>
45#include <QtCore/qhashfunctions.h>
46#include <QtCore/qlist.h>
47#include <QtCore/qmutex.h>
48#include <QtCore/qstringlist.h>
49#include <QtCore/qdebug.h>
50#include <QtCore/qthreadstorage.h>
51#include <QtCore/qglobal.h>
52#include <QtCore/qatomic.h>
53#include <QtCore/qdatastream.h>
54
55#define PCRE2_CODE_UNIT_WIDTH 16
56
57#include <pcre2.h>
58
59QT_BEGIN_NAMESPACE
60
61/*!
62 \class QRegularExpression
63 \inmodule QtCore
64 \reentrant
65
66 \brief The QRegularExpression class provides pattern matching using regular
67 expressions.
68
69 \since 5.0
70
71 \ingroup tools
72 \ingroup shared
73
74 \keyword regular expression
75
76 Regular expressions, or \e{regexps}, are a very powerful tool to handle
77 strings and texts. This is useful in many contexts, e.g.,
78
79 \table
80 \row \li Validation
81 \li A regexp can test whether a substring meets some criteria,
82 e.g. is an integer or contains no whitespace.
83 \row \li Searching
84 \li A regexp provides more powerful pattern matching than
85 simple substring matching, e.g., match one of the words
86 \e{mail}, \e{letter} or \e{correspondence}, but none of the
87 words \e{email}, \e{mailman}, \e{mailer}, \e{letterbox}, etc.
88 \row \li Search and Replace
89 \li A regexp can replace all occurrences of a substring with a
90 different substring, e.g., replace all occurrences of \e{&}
91 with \e{\&amp;} except where the \e{&} is already followed by
92 an \e{amp;}.
93 \row \li String Splitting
94 \li A regexp can be used to identify where a string should be
95 split apart, e.g. splitting tab-delimited strings.
96 \endtable
97
98 This document is by no means a complete reference to pattern matching using
99 regular expressions, and the following parts will require the reader to
100 have some basic knowledge about Perl-like regular expressions and their
101 pattern syntax.
102
103 Good references about regular expressions include:
104
105 \list
106 \li \e {Mastering Regular Expressions} (Third Edition) by Jeffrey E. F.
107 Friedl, ISBN 0-596-52812-4;
108 \li the \l{http://pcre.org/pcre.txt} {pcrepattern(3)} man page, describing
109 the pattern syntax supported by PCRE (the reference implementation of
110 Perl-compatible regular expressions);
111 \li the \l{http://perldoc.perl.org/perlre.html} {Perl's regular expression
112 documentation} and the \l{http://perldoc.perl.org/perlretut.html} {Perl's
113 regular expression tutorial}.
114 \endlist
115
116 \tableofcontents
117
118 \section1 Introduction
119
120 QRegularExpression implements Perl-compatible regular expressions. It fully
121 supports Unicode. For an overview of the regular expression syntax
122 supported by QRegularExpression, please refer to the aforementioned
123 pcrepattern(3) man page. A regular expression is made up of two things: a
124 \b{pattern string} and a set of \b{pattern options} that change the
125 meaning of the pattern string.
126
127 You can set the pattern string by passing a string to the QRegularExpression
128 constructor:
129
130 \snippet code/src_corelib_text_qregularexpression.cpp 0
131
132 This sets the pattern string to \c{a pattern}. You can also use the
133 setPattern() function to set a pattern on an existing QRegularExpression
134 object:
135
136 \snippet code/src_corelib_text_qregularexpression.cpp 1
137
138 Note that due to C++ literal strings rules, you must escape all backslashes
139 inside the pattern string with another backslash:
140
141 \snippet code/src_corelib_text_qregularexpression.cpp 2
142
143 The pattern() function returns the pattern that is currently set for a
144 QRegularExpression object:
145
146 \snippet code/src_corelib_text_qregularexpression.cpp 3
147
148 \section1 Pattern Options
149
150 The meaning of the pattern string can be modified by setting one or more
151 \e{pattern options}. For instance, it is possible to set a pattern to match
152 case insensitively by setting the QRegularExpression::CaseInsensitiveOption.
153
154 You can set the options by passing them to the QRegularExpression
155 constructor, as in:
156
157 \snippet code/src_corelib_text_qregularexpression.cpp 4
158
159 Alternatively, you can use the setPatternOptions() function on an existing
160 QRegularExpressionObject:
161
162 \snippet code/src_corelib_text_qregularexpression.cpp 5
163
164 It is possible to get the pattern options currently set on a
165 QRegularExpression object by using the patternOptions() function:
166
167 \snippet code/src_corelib_text_qregularexpression.cpp 6
168
169 Please refer to the QRegularExpression::PatternOption enum documentation for
170 more information about each pattern option.
171
172 \section1 Match Type and Match Options
173
174 The last two arguments of the match() and the globalMatch() functions set
175 the match type and the match options. The match type is a value of the
176 QRegularExpression::MatchType enum; the "traditional" matching algorithm is
177 chosen by using the NormalMatch match type (the default). It is also
178 possible to enable partial matching of the regular expression against a
179 subject string: see the \l{partial matching} section for more details.
180
181 The match options are a set of one or more QRegularExpression::MatchOption
182 values. They change the way a specific match of a regular expression
183 against a subject string is done. Please refer to the
184 QRegularExpression::MatchOption enum documentation for more details.
185
186 \target normal matching
187 \section1 Normal Matching
188
189 In order to perform a match you can simply invoke the match() function
190 passing a string to match against. We refer to this string as the
191 \e{subject string}. The result of the match() function is a
192 QRegularExpressionMatch object that can be used to inspect the results of
193 the match. For instance:
194
195 \snippet code/src_corelib_text_qregularexpression.cpp 7
196
197 If a match is successful, the (implicit) capturing group number 0 can be
198 used to retrieve the substring matched by the entire pattern (see also the
199 section about \l{extracting captured substrings}):
200
201 \snippet code/src_corelib_text_qregularexpression.cpp 8
202
203 It's also possible to start a match at an arbitrary offset inside the
204 subject string by passing the offset as an argument of the
205 match() function. In the following example \c{"12 abc"}
206 is not matched because the match is started at offset 1:
207
208 \snippet code/src_corelib_text_qregularexpression.cpp 9
209
210 \target extracting captured substrings
211 \section2 Extracting captured substrings
212
213 The QRegularExpressionMatch object contains also information about the
214 substrings captured by the capturing groups in the pattern string. The
215 \l{QRegularExpressionMatch::}{captured()} function will return the string
216 captured by the n-th capturing group:
217
218 \snippet code/src_corelib_text_qregularexpression.cpp 10
219
220 Capturing groups in the pattern are numbered starting from 1, and the
221 implicit capturing group 0 is used to capture the substring that matched
222 the entire pattern.
223
224 It's also possible to retrieve the starting and the ending offsets (inside
225 the subject string) of each captured substring, by using the
226 \l{QRegularExpressionMatch::}{capturedStart()} and the
227 \l{QRegularExpressionMatch::}{capturedEnd()} functions:
228
229 \snippet code/src_corelib_text_qregularexpression.cpp 11
230
231 All of these functions have an overload taking a QString as a parameter
232 in order to extract \e{named} captured substrings. For instance:
233
234 \snippet code/src_corelib_text_qregularexpression.cpp 12
235
236 \target global matching
237 \section1 Global Matching
238
239 \e{Global matching} is useful to find all the occurrences of a given
240 regular expression inside a subject string. Suppose that we want to extract
241 all the words from a given string, where a word is a substring matching
242 the pattern \c{\w+}.
243
244 QRegularExpression::globalMatch returns a QRegularExpressionMatchIterator,
245 which is a Java-like forward iterator that can be used to iterate over the
246 results. For instance:
247
248 \snippet code/src_corelib_text_qregularexpression.cpp 13
249
250 Since it's a Java-like iterator, the QRegularExpressionMatchIterator will
251 point immediately before the first result. Every result is returned as a
252 QRegularExpressionMatch object. The
253 \l{QRegularExpressionMatchIterator::}{hasNext()} function will return true
254 if there's at least one more result, and
255 \l{QRegularExpressionMatchIterator::}{next()} will return the next result
256 and advance the iterator. Continuing from the previous example:
257
258 \snippet code/src_corelib_text_qregularexpression.cpp 14
259
260 You can also use \l{QRegularExpressionMatchIterator::}{peekNext()} to get
261 the next result without advancing the iterator.
262
263 It is also possible to simply use the result of
264 QRegularExpression::globalMatch in a range-based for loop, for instance
265 like this:
266
267 \snippet code/src_corelib_text_qregularexpression.cpp 34
268
269 It is possible to pass a starting offset and one or more match options to
270 the globalMatch() function, exactly like normal matching with match().
271
272 \target partial matching
273 \section1 Partial Matching
274
275 A \e{partial match} is obtained when the end of the subject string is
276 reached, but more characters are needed to successfully complete the match.
277 Note that a partial match is usually much more inefficient than a normal
278 match because many optimizations of the matching algorithm cannot be
279 employed.
280
281 A partial match must be explicitly requested by specifying a match type of
282 PartialPreferCompleteMatch or PartialPreferFirstMatch when calling
283 QRegularExpression::match or QRegularExpression::globalMatch. If a partial
284 match is found, then calling the \l{QRegularExpressionMatch::}{hasMatch()}
285 function on the QRegularExpressionMatch object returned by match() will
286 return \c{false}, but \l{QRegularExpressionMatch::}{hasPartialMatch()} will return
287 \c{true}.
288
289 When a partial match is found, no captured substrings are returned, and the
290 (implicit) capturing group 0 corresponding to the whole match captures the
291 partially matched substring of the subject string.
292
293 Note that asking for a partial match can still lead to a complete match, if
294 one is found; in this case, \l{QRegularExpressionMatch::}{hasMatch()} will
295 return \c{true} and \l{QRegularExpressionMatch::}{hasPartialMatch()}
296 \c{false}. It never happens that a QRegularExpressionMatch reports both a
297 partial and a complete match.
298
299 Partial matching is mainly useful in two scenarios: validating user input
300 in real time and incremental/multi-segment matching.
301
302 \target validating user input
303 \section2 Validating user input
304
305 Suppose that we would like the user to input a date in a specific
306 format, for instance "MMM dd, yyyy". We can check the input validity with
307 a pattern like:
308
309 \c{^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \d\d?, \d\d\d\d$}
310
311 (This pattern doesn't catch invalid days, but let's keep it for the
312 example's purposes).
313
314 We would like to validate the input with this regular expression \e{while}
315 the user is typing it, so that we can report an error in the input as soon
316 as it is committed (for instance, the user typed the wrong key). In order
317 to do so we must distinguish three cases:
318
319 \list
320 \li the input cannot possibly match the regular expression;
321 \li the input does match the regular expression;
322 \li the input does not match the regular expression right now,
323 but it will if more characters will be added to it.
324 \endlist
325
326 Note that these three cases represent exactly the possible states of a
327 QValidator (see the QValidator::State enum).
328
329 In particular, in the last case we want the regular expression engine to
330 report a partial match: we are successfully matching the pattern against
331 the subject string but the matching cannot continue because the end of the
332 subject is encountered. Notice, however, that the matching algorithm should
333 continue and try all possibilities, and in case a complete (non-partial)
334 match is found, then this one should be reported, and the input string
335 accepted as fully valid.
336
337 This behavior is implemented by the PartialPreferCompleteMatch match type.
338 For instance:
339
340 \snippet code/src_corelib_text_qregularexpression.cpp 15
341
342 If matching the same regular expression against the subject string leads to
343 a complete match, it is reported as usual:
344
345 \snippet code/src_corelib_text_qregularexpression.cpp 16
346
347 Another example with a different pattern, showing the behavior of
348 preferring a complete match over a partial one:
349
350 \snippet code/src_corelib_text_qregularexpression.cpp 17
351
352 In this case, the subpattern \c{abc\\w+X} partially matches the subject
353 string; however, the subpattern \c{def} matches the subject string
354 completely, and therefore a complete match is reported.
355
356 If multiple partial matches are found when matching (but no complete
357 match), then the QRegularExpressionMatch object will report the first one
358 that is found. For instance:
359
360 \snippet code/src_corelib_text_qregularexpression.cpp 18
361
362 \section2 Incremental/multi-segment matching
363
364 Incremental matching is another use case of partial matching. Suppose that
365 we want to find the occurrences of a regular expression inside a large text
366 (that is, substrings matching the regular expression). In order to do so we
367 would like to "feed" the large text to the regular expression engines in
368 smaller chunks. The obvious problem is what happens if the substring that
369 matches the regular expression spans across two or more chunks.
370
371 In this case, the regular expression engine should report a partial match,
372 so that we can match again adding new data and (eventually) get a complete
373 match. This implies that the regular expression engine may assume that
374 there are other characters \e{beyond the end} of the subject string. This
375 is not to be taken literally -- the engine will never try to access
376 any character after the last one in the subject.
377
378 QRegularExpression implements this behavior when using the
379 PartialPreferFirstMatch match type. This match type reports a partial match
380 as soon as it is found, and other match alternatives are not tried
381 (even if they could lead to a complete match). For instance:
382
383 \snippet code/src_corelib_text_qregularexpression.cpp 19
384
385 This happens because when matching the first branch of the alternation
386 operator a partial match is found, and therefore matching stops, without
387 trying the second branch. Another example:
388
389 \snippet code/src_corelib_text_qregularexpression.cpp 20
390
391 This shows what could seem a counterintuitive behavior of quantifiers:
392 since \c{?} is greedy, then the engine tries first to continue the match
393 after having matched \c{"abc"}; but then the matching reaches the end of the
394 subject string, and therefore a partial match is reported. This is
395 even more surprising in the following example:
396
397 \snippet code/src_corelib_text_qregularexpression.cpp 21
398
399 It's easy to understand this behavior if we remember that the engine
400 expects the subject string to be only a substring of the whole text we're
401 looking for a match into (that is, how we said before, that the engine
402 assumes that there are other characters beyond the end of the subject
403 string).
404
405 Since the \c{*} quantifier is greedy, then reporting a complete match could
406 be an error, because after the current subject \c{"abc"} there may be other
407 occurrences of \c{"abc"}. For instance, the complete text could have been
408 "abcabcX", and therefore the \e{right} match to report (in the complete
409 text) would have been \c{"abcabc"}; by matching only against the leading
410 \c{"abc"} we instead get a partial match.
411
412 \section1 Error Handling
413
414 It is possible for a QRegularExpression object to be invalid because of
415 syntax errors in the pattern string. The isValid() function will return
416 true if the regular expression is valid, or false otherwise:
417
418 \snippet code/src_corelib_text_qregularexpression.cpp 22
419
420 You can get more information about the specific error by calling the
421 errorString() function; moreover, the patternErrorOffset() function
422 will return the offset inside the pattern string
423
424 \snippet code/src_corelib_text_qregularexpression.cpp 23
425
426 If a match is attempted with an invalid QRegularExpression, then the
427 returned QRegularExpressionMatch object will be invalid as well (that is,
428 its \l{QRegularExpressionMatch::}{isValid()} function will return false).
429 The same applies for attempting a global match.
430
431 \section1 Unsupported Perl-compatible Regular Expressions Features
432
433 QRegularExpression does not support all the features available in
434 Perl-compatible regular expressions. The most notable one is the fact that
435 duplicated names for capturing groups are not supported, and using them can
436 lead to undefined behavior.
437
438 This may change in a future version of Qt.
439
440 \section1 Debugging Code that Uses QRegularExpression
441
442 QRegularExpression internally uses a just in time compiler (JIT) to
443 optimize the execution of the matching algorithm. The JIT makes extensive
444 usage of self-modifying code, which can lead debugging tools such as
445 Valgrind to crash. You must enable all checks for self-modifying code if
446 you want to debug programs using QRegularExpression (for instance, Valgrind's
447 \c{--smc-check} command line option). The downside of enabling such checks
448 is that your program will run considerably slower.
449
450 To avoid that, the JIT is disabled by default if you compile Qt in debug
451 mode. It is possible to override the default and enable or disable the JIT
452 usage (both in debug or release mode) by setting the
453 \c{QT_ENABLE_REGEXP_JIT} environment variable to a non-zero or zero value
454 respectively.
455
456 \sa QRegularExpressionMatch, QRegularExpressionMatchIterator
457*/
458
459/*!
460 \class QRegularExpressionMatch
461 \inmodule QtCore
462 \reentrant
463
464 \brief The QRegularExpressionMatch class provides the results of a matching
465 a QRegularExpression against a string.
466
467 \since 5.0
468
469 \ingroup tools
470 \ingroup shared
471
472 \keyword regular expression match
473
474 A QRegularExpressionMatch object can be obtained by calling the
475 QRegularExpression::match() function, or as a single result of a global
476 match from a QRegularExpressionMatchIterator.
477
478 The success or the failure of a match attempt can be inspected by calling
479 the hasMatch() function. QRegularExpressionMatch also reports a successful
480 partial match through the hasPartialMatch() function.
481
482 In addition, QRegularExpressionMatch returns the substrings captured by the
483 capturing groups in the pattern string. The implicit capturing group with
484 index 0 captures the result of the whole match. The captured() function
485 returns each substring captured, either by the capturing group's index or
486 by its name:
487
488 \snippet code/src_corelib_text_qregularexpression.cpp 29
489
490 For each captured substring it is possible to query its starting and ending
491 offsets in the subject string by calling the capturedStart() and the
492 capturedEnd() function, respectively. The length of each captured
493 substring is available using the capturedLength() function.
494
495 The convenience function capturedTexts() will return \e{all} the captured
496 substrings at once (including the substring matched by the entire pattern)
497 in the order they have been captured by capturing groups; that is,
498 \c{captured(i) == capturedTexts().at(i)}.
499
500 You can retrieve the QRegularExpression object the subject string was
501 matched against by calling the regularExpression() function; the
502 match type and the match options are available as well by calling
503 the matchType() and the matchOptions() respectively.
504
505 Please refer to the QRegularExpression documentation for more information
506 about the Qt regular expression classes.
507
508 \sa QRegularExpression
509*/
510
511/*!
512 \class QRegularExpressionMatchIterator
513 \inmodule QtCore
514 \reentrant
515
516 \brief The QRegularExpressionMatchIterator class provides an iterator on
517 the results of a global match of a QRegularExpression object against a string.
518
519 \since 5.0
520
521 \ingroup tools
522 \ingroup shared
523
524 \keyword regular expression iterator
525
526 A QRegularExpressionMatchIterator object is a forward only Java-like
527 iterator; it can be obtained by calling the
528 QRegularExpression::globalMatch() function. A new
529 QRegularExpressionMatchIterator will be positioned before the first result.
530 You can then call the hasNext() function to check if there are more
531 results available; if so, the next() function will return the next
532 result and advance the iterator.
533
534 Each result is a QRegularExpressionMatch object holding all the information
535 for that result (including captured substrings).
536
537 For instance:
538
539 \snippet code/src_corelib_text_qregularexpression.cpp 30
540
541 Moreover, QRegularExpressionMatchIterator offers a peekNext() function
542 to get the next result \e{without} advancing the iterator.
543
544 Starting with Qt 6.0, it is also possible to simply use the result of
545 QRegularExpression::globalMatch in a range-based for loop, for instance
546 like this:
547
548 \snippet code/src_corelib_text_qregularexpression.cpp 34
549
550 You can retrieve the QRegularExpression object the subject string was
551 matched against by calling the regularExpression() function; the
552 match type and the match options are available as well by calling
553 the matchType() and the matchOptions() respectively.
554
555 Please refer to the QRegularExpression documentation for more information
556 about the Qt regular expression classes.
557
558 \sa QRegularExpression, QRegularExpressionMatch
559*/
560
561
562/*!
563 \enum QRegularExpression::PatternOption
564
565 The PatternOption enum defines modifiers to the way the pattern string
566 should be interpreted, and therefore the way the pattern matches against a
567 subject string.
568
569 \value NoPatternOption
570 No pattern options are set.
571
572 \value CaseInsensitiveOption
573 The pattern should match against the subject string in a case
574 insensitive way. This option corresponds to the /i modifier in Perl
575 regular expressions.
576
577 \value DotMatchesEverythingOption
578 The dot metacharacter (\c{.}) in the pattern string is allowed to match
579 any character in the subject string, including newlines (normally, the
580 dot does not match newlines). This option corresponds to the \c{/s}
581 modifier in Perl regular expressions.
582
583 \value MultilineOption
584 The caret (\c{^}) and the dollar (\c{$}) metacharacters in the pattern
585 string are allowed to match, respectively, immediately after and
586 immediately before any newline in the subject string, as well as at the
587 very beginning and at the very end of the subject string. This option
588 corresponds to the \c{/m} modifier in Perl regular expressions.
589
590 \value ExtendedPatternSyntaxOption
591 Any whitespace in the pattern string which is not escaped and outside a
592 character class is ignored. Moreover, an unescaped sharp (\b{#})
593 outside a character class causes all the following characters, until
594 the first newline (included), to be ignored. This can be used to
595 increase the readability of a pattern string as well as put comments
596 inside regular expressions; this is particularly useful if the pattern
597 string is loaded from a file or written by the user, because in C++
598 code it is always possible to use the rules for string literals to put
599 comments outside the pattern string. This option corresponds to the \c{/x}
600 modifier in Perl regular expressions.
601
602 \value InvertedGreedinessOption
603 The greediness of the quantifiers is inverted: \c{*}, \c{+}, \c{?},
604 \c{{m,n}}, etc. become lazy, while their lazy versions (\c{*?},
605 \c{+?}, \c{??}, \c{{m,n}?}, etc.) become greedy. There is no equivalent
606 for this option in Perl regular expressions.
607
608 \value DontCaptureOption
609 The non-named capturing groups do not capture substrings; named
610 capturing groups still work as intended, as well as the implicit
611 capturing group number 0 corresponding to the entire match. There is no
612 equivalent for this option in Perl regular expressions.
613
614 \value UseUnicodePropertiesOption
615 The meaning of the \c{\w}, \c{\d}, etc., character classes, as well as
616 the meaning of their counterparts (\c{\W}, \c{\D}, etc.), is changed
617 from matching ASCII characters only to matching any character with the
618 corresponding Unicode property. For instance, \c{\d} is changed to
619 match any character with the Unicode Nd (decimal digit) property;
620 \c{\w} to match any character with either the Unicode L (letter) or N
621 (digit) property, plus underscore, and so on. This option corresponds
622 to the \c{/u} modifier in Perl regular expressions.
623*/
624
625/*!
626 \enum QRegularExpression::MatchType
627
628 The MatchType enum defines the type of the match that should be attempted
629 against the subject string.
630
631 \value NormalMatch
632 A normal match is done.
633
634 \value PartialPreferCompleteMatch
635 The pattern string is matched partially against the subject string. If
636 a partial match is found, then it is recorded, and other matching
637 alternatives are tried as usual. If a complete match is then found,
638 then it's preferred to the partial match; in this case only the
639 complete match is reported. If instead no complete match is found (but
640 only the partial one), then the partial one is reported.
641
642 \value PartialPreferFirstMatch
643 The pattern string is matched partially against the subject string. If
644 a partial match is found, then matching stops and the partial match is
645 reported. In this case, other matching alternatives (potentially
646 leading to a complete match) are not tried. Moreover, this match type
647 assumes that the subject string only a substring of a larger text, and
648 that (in this text) there are other characters beyond the end of the
649 subject string. This can lead to surprising results; see the discussion
650 in the \l{partial matching} section for more details.
651
652 \value NoMatch
653 No matching is done. This value is returned as the match type by a
654 default constructed QRegularExpressionMatch or
655 QRegularExpressionMatchIterator. Using this match type is not very
656 useful for the user, as no matching ever happens. This enum value
657 has been introduced in Qt 5.1.
658*/
659
660/*!
661 \enum QRegularExpression::MatchOption
662
663 \value NoMatchOption
664 No match options are set.
665
666 \value AnchoredMatchOption
667 Use AnchorAtOffsetMatchOption instead.
668
669 \value AnchorAtOffsetMatchOption
670 The match is constrained to start exactly at the offset passed to
671 match() in order to be successful, even if the pattern string does not
672 contain any metacharacter that anchors the match at that point.
673 Note that passing this option does not anchor the end of the match
674 to the end of the subject; if you want to fully anchor a regular
675 expression, use anchoredPattern().
676 This enum value has been introduced in Qt 6.0.
677
678 \value DontCheckSubjectStringMatchOption
679 The subject string is not checked for UTF-16 validity before
680 attempting a match. Use this option with extreme caution, as
681 attempting to match an invalid string may crash the program and/or
682 constitute a security issue. This enum value has been introduced in
683 Qt 5.4.
684*/
685
686/*!
687 \internal
688*/
689static int convertToPcreOptions(QRegularExpression::PatternOptions patternOptions)
690{
691 int options = 0;
692
693 if (patternOptions & QRegularExpression::CaseInsensitiveOption)
694 options |= PCRE2_CASELESS;
695 if (patternOptions & QRegularExpression::DotMatchesEverythingOption)
696 options |= PCRE2_DOTALL;
697 if (patternOptions & QRegularExpression::MultilineOption)
698 options |= PCRE2_MULTILINE;
699 if (patternOptions & QRegularExpression::ExtendedPatternSyntaxOption)
700 options |= PCRE2_EXTENDED;
701 if (patternOptions & QRegularExpression::InvertedGreedinessOption)
702 options |= PCRE2_UNGREEDY;
703 if (patternOptions & QRegularExpression::DontCaptureOption)
704 options |= PCRE2_NO_AUTO_CAPTURE;
705 if (patternOptions & QRegularExpression::UseUnicodePropertiesOption)
706 options |= PCRE2_UCP;
707
708 return options;
709}
710
711/*!
712 \internal
713*/
714static int convertToPcreOptions(QRegularExpression::MatchOptions matchOptions)
715{
716 int options = 0;
717
718 if (matchOptions & QRegularExpression::AnchorAtOffsetMatchOption)
719 options |= PCRE2_ANCHORED;
720 if (matchOptions & QRegularExpression::DontCheckSubjectStringMatchOption)
721 options |= PCRE2_NO_UTF_CHECK;
722
723 return options;
724}
725
726struct QRegularExpressionPrivate : QSharedData
727{
728 QRegularExpressionPrivate();
729 ~QRegularExpressionPrivate();
730 QRegularExpressionPrivate(const QRegularExpressionPrivate &other);
731
732 void cleanCompiledPattern();
733 void compilePattern();
734 void getPatternInfo();
735 void optimizePattern();
736
737 enum CheckSubjectStringOption {
738 CheckSubjectString,
739 DontCheckSubjectString
740 };
741
742 void doMatch(QRegularExpressionMatchPrivate *priv,
743 qsizetype offset,
744 CheckSubjectStringOption checkSubjectStringOption = CheckSubjectString,
745 const QRegularExpressionMatchPrivate *previous = nullptr) const;
746
747 int captureIndexForName(QStringView name) const;
748
749 // sizeof(QSharedData) == 4, so start our members with an enum
750 QRegularExpression::PatternOptions patternOptions;
751 QString pattern;
752
753 // *All* of the following members are managed while holding this mutex,
754 // except for isDirty which is set to true by QRegularExpression setters
755 // (right after a detach happened).
756 mutable QMutex mutex;
757
758 // The PCRE code pointer is reference-counted by the QRegularExpressionPrivate
759 // objects themselves; when the private is copied (i.e. a detach happened)
760 // it is set to nullptr
761 pcre2_code_16 *compiledPattern;
762 int errorCode;
763 qsizetype errorOffset;
764 int capturingCount;
765 bool usingCrLfNewlines;
766 bool isDirty;
767};
768
769struct QRegularExpressionMatchPrivate : QSharedData
770{
771 QRegularExpressionMatchPrivate(const QRegularExpression &re,
772 const QString &subjectStorage,
773 QStringView subject,
774 QRegularExpression::MatchType matchType,
775 QRegularExpression::MatchOptions matchOptions);
776
777 QRegularExpressionMatch nextMatch() const;
778
779 const QRegularExpression regularExpression;
780
781 // subject is what we match upon. If we've been asked to match over
782 // a QString, then subjectStorage is a copy of that string
783 // (so that it's kept alive by us)
784 const QString subjectStorage;
785 const QStringView subject;
786
787 const QRegularExpression::MatchType matchType;
788 const QRegularExpression::MatchOptions matchOptions;
789
790 // the capturedOffsets vector contains pairs of (start, end) positions
791 // for each captured substring
792 QList<qsizetype> capturedOffsets;
793
794 int capturedCount = 0;
795
796 bool hasMatch = false;
797 bool hasPartialMatch = false;
798 bool isValid = false;
799};
800
801struct QRegularExpressionMatchIteratorPrivate : QSharedData
802{
803 QRegularExpressionMatchIteratorPrivate(const QRegularExpression &re,
804 QRegularExpression::MatchType matchType,
805 QRegularExpression::MatchOptions matchOptions,
806 const QRegularExpressionMatch &next);
807
808 bool hasNext() const;
809 QRegularExpressionMatch next;
810 const QRegularExpression regularExpression;
811 const QRegularExpression::MatchType matchType;
812 const QRegularExpression::MatchOptions matchOptions;
813};
814
815/*!
816 \internal
817*/
818QRegularExpression::QRegularExpression(QRegularExpressionPrivate &dd)
819 : d(&dd)
820{
821}
822
823/*!
824 \internal
825*/
826QRegularExpressionPrivate::QRegularExpressionPrivate()
827 : QSharedData(),
828 patternOptions(),
829 pattern(),
830 mutex(),
831 compiledPattern(nullptr),
832 errorCode(0),
833 errorOffset(-1),
834 capturingCount(0),
835 usingCrLfNewlines(false),
836 isDirty(true)
837{
838}
839
840/*!
841 \internal
842*/
843QRegularExpressionPrivate::~QRegularExpressionPrivate()
844{
845 cleanCompiledPattern();
846}
847
848/*!
849 \internal
850
851 Copies the private, which means copying only the pattern and the pattern
852 options. The compiledPattern pointer is NOT copied (we
853 do not own it any more), and in general all the members set when
854 compiling a pattern are set to default values. isDirty is set back to true
855 so that the pattern has to be recompiled again.
856*/
857QRegularExpressionPrivate::QRegularExpressionPrivate(const QRegularExpressionPrivate &other)
858 : QSharedData(other),
859 patternOptions(other.patternOptions),
860 pattern(other.pattern),
861 mutex(),
862 compiledPattern(nullptr),
863 errorCode(0),
864 errorOffset(-1),
865 capturingCount(0),
866 usingCrLfNewlines(false),
867 isDirty(true)
868{
869}
870
871/*!
872 \internal
873*/
874void QRegularExpressionPrivate::cleanCompiledPattern()
875{
876 pcre2_code_free_16(compiledPattern);
877 compiledPattern = nullptr;
878 errorCode = 0;
879 errorOffset = -1;
880 capturingCount = 0;
881 usingCrLfNewlines = false;
882}
883
884/*!
885 \internal
886*/
887void QRegularExpressionPrivate::compilePattern()
888{
889 const QMutexLocker lock(&mutex);
890
891 if (!isDirty)
892 return;
893
894 isDirty = false;
895 cleanCompiledPattern();
896
897 int options = convertToPcreOptions(patternOptions);
898 options |= PCRE2_UTF;
899
900 PCRE2_SIZE patternErrorOffset;
901 compiledPattern = pcre2_compile_16(reinterpret_cast<PCRE2_SPTR16>(pattern.utf16()),
902 pattern.length(),
903 options,
904 &errorCode,
905 &patternErrorOffset,
906 nullptr);
907
908 if (!compiledPattern) {
909 errorOffset = qsizetype(patternErrorOffset);
910 return;
911 } else {
912 // ignore whatever PCRE2 wrote into errorCode -- leave it to 0 to mean "no error"
913 errorCode = 0;
914 }
915
916 optimizePattern();
917 getPatternInfo();
918}
919
920/*!
921 \internal
922*/
923void QRegularExpressionPrivate::getPatternInfo()
924{
925 Q_ASSERT(compiledPattern);
926
927 pcre2_pattern_info_16(compiledPattern, PCRE2_INFO_CAPTURECOUNT, &capturingCount);
928
929 // detect the settings for the newline
930 unsigned int patternNewlineSetting;
931 if (pcre2_pattern_info_16(compiledPattern, PCRE2_INFO_NEWLINE, &patternNewlineSetting) != 0) {
932 // no option was specified in the regexp, grab PCRE build defaults
933 pcre2_config_16(PCRE2_CONFIG_NEWLINE, &patternNewlineSetting);
934 }
935
936 usingCrLfNewlines = (patternNewlineSetting == PCRE2_NEWLINE_CRLF) ||
937 (patternNewlineSetting == PCRE2_NEWLINE_ANY) ||
938 (patternNewlineSetting == PCRE2_NEWLINE_ANYCRLF);
939
940 unsigned int hasJOptionChanged;
941 pcre2_pattern_info_16(compiledPattern, PCRE2_INFO_JCHANGED, &hasJOptionChanged);
942 if (Q_UNLIKELY(hasJOptionChanged)) {
943 qWarning("QRegularExpressionPrivate::getPatternInfo(): the pattern '%ls'\n is using the (?J) option; duplicate capturing group names are not supported by Qt",
944 qUtf16Printable(pattern));
945 }
946}
947
948
949/*
950 Simple "smartpointer" wrapper around a pcre2_jit_stack_16, to be used with
951 QThreadStorage.
952*/
953class QPcreJitStackPointer
954{
955 Q_DISABLE_COPY(QPcreJitStackPointer)
956
957public:
958 /*!
959 \internal
960 */
961 QPcreJitStackPointer()
962 {
963 // The default JIT stack size in PCRE is 32K,
964 // we allocate from 32K up to 512K.
965 stack = pcre2_jit_stack_create_16(32 * 1024, 512 * 1024, nullptr);
966 }
967 /*!
968 \internal
969 */
970 ~QPcreJitStackPointer()
971 {
972 if (stack)
973 pcre2_jit_stack_free_16(stack);
974 }
975
976 pcre2_jit_stack_16 *stack;
977};
978
979Q_GLOBAL_STATIC(QThreadStorage<QPcreJitStackPointer *>, jitStacks)
980
981/*!
982 \internal
983*/
984static pcre2_jit_stack_16 *qtPcreCallback(void *)
985{
986 if (jitStacks()->hasLocalData())
987 return jitStacks()->localData()->stack;
988
989 return nullptr;
990}
991
992/*!
993 \internal
994*/
995static bool isJitEnabled()
996{
997 QByteArray jitEnvironment = qgetenv("QT_ENABLE_REGEXP_JIT");
998 if (!jitEnvironment.isEmpty()) {
999 bool ok;
1000 int enableJit = jitEnvironment.toInt(&ok);
1001 return ok ? (enableJit != 0) : true;
1002 }
1003
1004#ifdef QT_DEBUG
1005 return false;
1006#else
1007 return true;
1008#endif
1009}
1010
1011/*!
1012 \internal
1013
1014 The purpose of the function is to call pcre2_jit_compile_16, which
1015 JIT-compiles the pattern.
1016
1017 It gets called when a pattern is recompiled by us (in compilePattern()),
1018 under mutex protection.
1019*/
1020void QRegularExpressionPrivate::optimizePattern()
1021{
1022 Q_ASSERT(compiledPattern);
1023
1024 static const bool enableJit = isJitEnabled();
1025
1026 if (!enableJit)
1027 return;
1028
1029 pcre2_jit_compile_16(compiledPattern, PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
1030}
1031
1032/*!
1033 \internal
1034
1035 Returns the capturing group number for the given name. Duplicated names for
1036 capturing groups are not supported.
1037*/
1038int QRegularExpressionPrivate::captureIndexForName(QStringView name) const
1039{
1040 Q_ASSERT(!name.isEmpty());
1041
1042 if (!compiledPattern)
1043 return -1;
1044
1045 // See the other usages of pcre2_pattern_info_16 for more details about this
1046 PCRE2_SPTR16 *namedCapturingTable;
1047 unsigned int namedCapturingTableEntryCount;
1048 unsigned int namedCapturingTableEntrySize;
1049
1050 pcre2_pattern_info_16(compiledPattern, PCRE2_INFO_NAMETABLE, &namedCapturingTable);
1051 pcre2_pattern_info_16(compiledPattern, PCRE2_INFO_NAMECOUNT, &namedCapturingTableEntryCount);
1052 pcre2_pattern_info_16(compiledPattern, PCRE2_INFO_NAMEENTRYSIZE, &namedCapturingTableEntrySize);
1053
1054 for (unsigned int i = 0; i < namedCapturingTableEntryCount; ++i) {
1055 const auto currentNamedCapturingTableRow =
1056 reinterpret_cast<const char16_t *>(namedCapturingTable) + namedCapturingTableEntrySize * i;
1057
1058 if (name == (currentNamedCapturingTableRow + 1)) {
1059 const int index = *currentNamedCapturingTableRow;
1060 return index;
1061 }
1062 }
1063
1064 return -1;
1065}
1066
1067/*!
1068 \internal
1069
1070 This is a simple wrapper for pcre2_match_16 for handling the case in which the
1071 JIT runs out of memory. In that case, we allocate a thread-local JIT stack
1072 and re-run pcre2_match_16.
1073*/
1074static int safe_pcre2_match_16(const pcre2_code_16 *code,
1075 PCRE2_SPTR16 subject, qsizetype length,
1076 qsizetype startOffset, int options,
1077 pcre2_match_data_16 *matchData,
1078 pcre2_match_context_16 *matchContext)
1079{
1080 int result = pcre2_match_16(code, subject, length,
1081 startOffset, options, matchData, matchContext);
1082
1083 if (result == PCRE2_ERROR_JIT_STACKLIMIT && !jitStacks()->hasLocalData()) {
1084 QPcreJitStackPointer *p = new QPcreJitStackPointer;
1085 jitStacks()->setLocalData(p);
1086
1087 result = pcre2_match_16(code, subject, length,
1088 startOffset, options, matchData, matchContext);
1089 }
1090
1091 return result;
1092}
1093
1094/*!
1095 \internal
1096
1097 Performs a match on the subject string view held by \a priv. The
1098 match will be of type priv->matchType and using the options
1099 priv->matchOptions; the matching \a offset is relative the
1100 substring, and if negative, it's taken as an offset from the end of
1101 the substring.
1102
1103 It also advances a match if a previous result is given as \a
1104 previous. The subject string goes a Unicode validity check if
1105 \a checkSubjectString is CheckSubjectString and the match options don't
1106 include DontCheckSubjectStringMatchOption (PCRE doesn't like illegal
1107 UTF-16 sequences).
1108
1109 \a priv is modified to hold the results of the match.
1110
1111 Advancing a match is a tricky algorithm. If the previous match matched a
1112 non-empty string, we just do an ordinary match at the offset position.
1113
1114 If the previous match matched an empty string, then an anchored, non-empty
1115 match is attempted at the offset position. If that succeeds, then we got
1116 the next match and we can return it. Otherwise, we advance by 1 position
1117 (which can be one or two code units in UTF-16!) and reattempt a "normal"
1118 match. We also have the problem of detecting the current newline format: if
1119 the new advanced offset is pointing to the beginning of a CRLF sequence, we
1120 must advance over it.
1121*/
1122void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv,
1123 qsizetype offset,
1124 CheckSubjectStringOption checkSubjectStringOption,
1125 const QRegularExpressionMatchPrivate *previous) const
1126{
1127 Q_ASSERT(priv);
1128 Q_ASSUME(priv != previous);
1129
1130 const qsizetype subjectLength = priv->subject.size();
1131
1132 if (offset < 0)
1133 offset += subjectLength;
1134
1135 if (offset < 0 || offset > subjectLength)
1136 return;
1137
1138 if (Q_UNLIKELY(!compiledPattern)) {
1139 qWarning("QRegularExpressionPrivate::doMatch(): called on an invalid QRegularExpression object");
1140 return;
1141 }
1142
1143 // skip doing the actual matching if NoMatch type was requested
1144 if (priv->matchType == QRegularExpression::NoMatch) {
1145 priv->isValid = true;
1146 return;
1147 }
1148
1149 int pcreOptions = convertToPcreOptions(priv->matchOptions);
1150
1151 if (priv->matchType == QRegularExpression::PartialPreferCompleteMatch)
1152 pcreOptions |= PCRE2_PARTIAL_SOFT;
1153 else if (priv->matchType == QRegularExpression::PartialPreferFirstMatch)
1154 pcreOptions |= PCRE2_PARTIAL_HARD;
1155
1156 if (checkSubjectStringOption == DontCheckSubjectString)
1157 pcreOptions |= PCRE2_NO_UTF_CHECK;
1158
1159 bool previousMatchWasEmpty = false;
1160 if (previous && previous->hasMatch &&
1161 (previous->capturedOffsets.at(0) == previous->capturedOffsets.at(1))) {
1162 previousMatchWasEmpty = true;
1163 }
1164
1165 pcre2_match_context_16 *matchContext = pcre2_match_context_create_16(nullptr);
1166 pcre2_jit_stack_assign_16(matchContext, &qtPcreCallback, nullptr);
1167 pcre2_match_data_16 *matchData = pcre2_match_data_create_from_pattern_16(compiledPattern, nullptr);
1168
1169 const char16_t * const subjectUtf16 = priv->subject.utf16();
1170
1171 int result;
1172
1173 if (!previousMatchWasEmpty) {
1174 result = safe_pcre2_match_16(compiledPattern,
1175 reinterpret_cast<PCRE2_SPTR16>(subjectUtf16), subjectLength,
1176 offset, pcreOptions,
1177 matchData, matchContext);
1178 } else {
1179 result = safe_pcre2_match_16(compiledPattern,
1180 reinterpret_cast<PCRE2_SPTR16>(subjectUtf16), subjectLength,
1181 offset, pcreOptions | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED,
1182 matchData, matchContext);
1183
1184 if (result == PCRE2_ERROR_NOMATCH) {
1185 ++offset;
1186
1187 if (usingCrLfNewlines
1188 && offset < subjectLength
1189 && subjectUtf16[offset - 1] == QLatin1Char('\r')
1190 && subjectUtf16[offset] == QLatin1Char('\n')) {
1191 ++offset;
1192 } else if (offset < subjectLength
1193 && QChar::isLowSurrogate(subjectUtf16[offset])) {
1194 ++offset;
1195 }
1196
1197 result = safe_pcre2_match_16(compiledPattern,
1198 reinterpret_cast<PCRE2_SPTR16>(subjectUtf16), subjectLength,
1199 offset, pcreOptions,
1200 matchData, matchContext);
1201 }
1202 }
1203
1204#ifdef QREGULAREXPRESSION_DEBUG
1205 qDebug() << "Matching" << pattern << "against" << subject
1206 << "offset" << offset
1207 << priv->matchType << priv->matchOptions << previousMatchWasEmpty
1208 << "result" << result;
1209#endif
1210
1211 // result == 0 means not enough space in captureOffsets; should never happen
1212 Q_ASSERT(result != 0);
1213
1214 if (result > 0) {
1215 // full match
1216 priv->isValid = true;
1217 priv->hasMatch = true;
1218 priv->capturedCount = result;
1219 priv->capturedOffsets.resize(result * 2);
1220 } else {
1221 // no match, partial match or error
1222 priv->hasPartialMatch = (result == PCRE2_ERROR_PARTIAL);
1223 priv->isValid = (result == PCRE2_ERROR_NOMATCH || result == PCRE2_ERROR_PARTIAL);
1224
1225 if (result == PCRE2_ERROR_PARTIAL) {
1226 // partial match:
1227 // leave the start and end capture offsets (i.e. cap(0))
1228 priv->capturedCount = 1;
1229 priv->capturedOffsets.resize(2);
1230 } else {
1231 // no match or error
1232 priv->capturedCount = 0;
1233 priv->capturedOffsets.clear();
1234 }
1235 }
1236
1237 // copy the captured substrings offsets, if any
1238 if (priv->capturedCount) {
1239 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(matchData);
1240 qsizetype *const capturedOffsets = priv->capturedOffsets.data();
1241
1242 for (int i = 0; i < priv->capturedCount * 2; ++i)
1243 capturedOffsets[i] = qsizetype(ovector[i]);
1244
1245 // For partial matches, PCRE2 and PCRE1 differ in behavior when lookbehinds
1246 // are involved. PCRE2 reports the real begin of the match and the maximum
1247 // used lookbehind as distinct information; PCRE1 instead automatically
1248 // adjusted ovector[0] to include the maximum lookbehind.
1249 //
1250 // For instance, given the pattern "\bstring\b", and the subject "a str":
1251 // * PCRE1 reports partial, capturing " str"
1252 // * PCRE2 reports partial, capturing "str" with a lookbehind of 1
1253 //
1254 // To keep behavior, emulate PCRE1 here.
1255 // (Eventually, we could expose the lookbehind info in a future patch.)
1256 if (result == PCRE2_ERROR_PARTIAL) {
1257 unsigned int maximumLookBehind;
1258 pcre2_pattern_info_16(compiledPattern, PCRE2_INFO_MAXLOOKBEHIND, &maximumLookBehind);
1259 capturedOffsets[0] -= maximumLookBehind;
1260 }
1261 }
1262
1263 pcre2_match_data_free_16(matchData);
1264 pcre2_match_context_free_16(matchContext);
1265}
1266
1267/*!
1268 \internal
1269*/
1270QRegularExpressionMatchPrivate::QRegularExpressionMatchPrivate(const QRegularExpression &re,
1271 const QString &subjectStorage,
1272 QStringView subject,
1273 QRegularExpression::MatchType matchType,
1274 QRegularExpression::MatchOptions matchOptions)
1275 : regularExpression(re),
1276 subjectStorage(subjectStorage),
1277 subject(subject),
1278 matchType(matchType),
1279 matchOptions(matchOptions)
1280{
1281}
1282
1283/*!
1284 \internal
1285*/
1286QRegularExpressionMatch QRegularExpressionMatchPrivate::nextMatch() const
1287{
1288 Q_ASSERT(isValid);
1289 Q_ASSERT(hasMatch || hasPartialMatch);
1290
1291 auto nextPrivate = new QRegularExpressionMatchPrivate(regularExpression,
1292 subjectStorage,
1293 subject,
1294 matchType,
1295 matchOptions);
1296
1297 // Note the DontCheckSubjectString passed for the check of the subject string:
1298 // if we're advancing a match on the same subject,
1299 // then that subject was already checked at least once (when this object
1300 // was created, or when the object that created this one was created, etc.)
1301 regularExpression.d->doMatch(nextPrivate,
1302 capturedOffsets.at(1),
1303 QRegularExpressionPrivate::DontCheckSubjectString,
1304 this);
1305 return QRegularExpressionMatch(*nextPrivate);
1306}
1307
1308/*!
1309 \internal
1310*/
1311QRegularExpressionMatchIteratorPrivate::QRegularExpressionMatchIteratorPrivate(const QRegularExpression &re,
1312 QRegularExpression::MatchType matchType,
1313 QRegularExpression::MatchOptions matchOptions,
1314 const QRegularExpressionMatch &next)
1315 : next(next),
1316 regularExpression(re),
1317 matchType(matchType), matchOptions(matchOptions)
1318{
1319}
1320
1321/*!
1322 \internal
1323*/
1324bool QRegularExpressionMatchIteratorPrivate::hasNext() const
1325{
1326 return next.isValid() && (next.hasMatch() || next.hasPartialMatch());
1327}
1328
1329// PUBLIC API
1330
1331/*!
1332 Constructs a QRegularExpression object with an empty pattern and no pattern
1333 options.
1334
1335 \sa setPattern(), setPatternOptions()
1336*/
1337QRegularExpression::QRegularExpression()
1338 : d(new QRegularExpressionPrivate)
1339{
1340}
1341
1342/*!
1343 Constructs a QRegularExpression object using the given \a pattern as
1344 pattern and the \a options as the pattern options.
1345
1346 \sa setPattern(), setPatternOptions()
1347*/
1348QRegularExpression::QRegularExpression(const QString &pattern, PatternOptions options)
1349 : d(new QRegularExpressionPrivate)
1350{
1351 d->pattern = pattern;
1352 d->patternOptions = options;
1353}
1354
1355/*!
1356 Constructs a QRegularExpression object as a copy of \a re.
1357
1358 \sa operator=()
1359*/
1360QRegularExpression::QRegularExpression(const QRegularExpression &re)
1361 : d(re.d)
1362{
1363}
1364
1365/*!
1366 Destroys the QRegularExpression object.
1367*/
1368QRegularExpression::~QRegularExpression()
1369{
1370}
1371
1372/*!
1373 Assigns the regular expression \a re to this object, and returns a reference
1374 to the copy. Both the pattern and the pattern options are copied.
1375*/
1376QRegularExpression &QRegularExpression::operator=(const QRegularExpression &re)
1377{
1378 d = re.d;
1379 return *this;
1380}
1381
1382/*!
1383 \fn void QRegularExpression::swap(QRegularExpression &other)
1384
1385 Swaps the regular expression \a other with this regular expression. This
1386 operation is very fast and never fails.
1387*/
1388
1389/*!
1390 Returns the pattern string of the regular expression.
1391
1392 \sa setPattern(), patternOptions()
1393*/
1394QString QRegularExpression::pattern() const
1395{
1396 return d->pattern;
1397}
1398
1399/*!
1400 Sets the pattern string of the regular expression to \a pattern. The
1401 pattern options are left unchanged.
1402
1403 \sa pattern(), setPatternOptions()
1404*/
1405void QRegularExpression::setPattern(const QString &pattern)
1406{
1407 d.detach();
1408 d->isDirty = true;
1409 d->pattern = pattern;
1410}
1411
1412/*!
1413 Returns the pattern options for the regular expression.
1414
1415 \sa setPatternOptions(), pattern()
1416*/
1417QRegularExpression::PatternOptions QRegularExpression::patternOptions() const
1418{
1419 return d->patternOptions;
1420}
1421
1422/*!
1423 Sets the given \a options as the pattern options of the regular expression.
1424 The pattern string is left unchanged.
1425
1426 \sa patternOptions(), setPattern()
1427*/
1428void QRegularExpression::setPatternOptions(PatternOptions options)
1429{
1430 d.detach();
1431 d->isDirty = true;
1432 d->patternOptions = options;
1433}
1434
1435/*!
1436 Returns the number of capturing groups inside the pattern string,
1437 or -1 if the regular expression is not valid.
1438
1439 \note The implicit capturing group 0 is \e{not} included in the returned number.
1440
1441 \sa isValid()
1442*/
1443int QRegularExpression::captureCount() const
1444{
1445 if (!isValid()) // will compile the pattern
1446 return -1;
1447 return d->capturingCount;
1448}
1449
1450/*!
1451 \since 5.1
1452
1453 Returns a list of captureCount() + 1 elements, containing the names of the
1454 named capturing groups in the pattern string. The list is sorted such that
1455 the element of the list at position \c{i} is the name of the \c{i}-th
1456 capturing group, if it has a name, or an empty string if that capturing
1457 group is unnamed.
1458
1459 For instance, given the regular expression
1460
1461 \snippet code/src_corelib_text_qregularexpression.cpp 32
1462
1463 namedCaptureGroups() will return the following list:
1464
1465 \snippet code/src_corelib_text_qregularexpression.cpp 33
1466
1467 which corresponds to the fact that the capturing group #0 (corresponding to
1468 the whole match) has no name, the capturing group #1 has name "day", the
1469 capturing group #2 has name "month", etc.
1470
1471 If the regular expression is not valid, returns an empty list.
1472
1473 \sa isValid(), QRegularExpressionMatch::captured(), QString::isEmpty()
1474*/
1475QStringList QRegularExpression::namedCaptureGroups() const
1476{
1477 if (!isValid()) // isValid() will compile the pattern
1478 return QStringList();
1479
1480 // namedCapturingTable will point to a table of
1481 // namedCapturingTableEntryCount entries, each one of which
1482 // contains one ushort followed by the name, NUL terminated.
1483 // The ushort is the numerical index of the name in the pattern.
1484 // The length of each entry is namedCapturingTableEntrySize.
1485 PCRE2_SPTR16 *namedCapturingTable;
1486 unsigned int namedCapturingTableEntryCount;
1487 unsigned int namedCapturingTableEntrySize;
1488
1489 pcre2_pattern_info_16(d->compiledPattern, PCRE2_INFO_NAMETABLE, &namedCapturingTable);
1490 pcre2_pattern_info_16(d->compiledPattern, PCRE2_INFO_NAMECOUNT, &namedCapturingTableEntryCount);
1491 pcre2_pattern_info_16(d->compiledPattern, PCRE2_INFO_NAMEENTRYSIZE, &namedCapturingTableEntrySize);
1492
1493 // The +1 is for the implicit group #0
1494 QStringList result(d->capturingCount + 1);
1495
1496 for (unsigned int i = 0; i < namedCapturingTableEntryCount; ++i) {
1497 const auto currentNamedCapturingTableRow =
1498 reinterpret_cast<const char16_t *>(namedCapturingTable) + namedCapturingTableEntrySize * i;
1499
1500 const int index = *currentNamedCapturingTableRow;
1501 result[index] = QString::fromUtf16(currentNamedCapturingTableRow + 1);
1502 }
1503
1504 return result;
1505}
1506
1507/*!
1508 Returns \c true if the regular expression is a valid regular expression (that
1509 is, it contains no syntax errors, etc.), or false otherwise. Use
1510 errorString() to obtain a textual description of the error.
1511
1512 \sa errorString(), patternErrorOffset()
1513*/
1514bool QRegularExpression::isValid() const
1515{
1516 d.data()->compilePattern();
1517 return d->compiledPattern;
1518}
1519
1520/*!
1521 Returns a textual description of the error found when checking the validity
1522 of the regular expression, or "no error" if no error was found.
1523
1524 \sa isValid(), patternErrorOffset()
1525*/
1526QString QRegularExpression::errorString() const
1527{
1528 d.data()->compilePattern();
1529 if (d->errorCode) {
1530 QString errorString;
1531 int errorStringLength;
1532 do {
1533 errorString.resize(errorString.length() + 64);
1534 errorStringLength = pcre2_get_error_message_16(d->errorCode,
1535 reinterpret_cast<ushort *>(errorString.data()),
1536 errorString.length());
1537 } while (errorStringLength < 0);
1538 errorString.resize(errorStringLength);
1539
1540#ifdef QT_NO_TRANSLATION
1541 return errorString;
1542#else
1543 return QCoreApplication::translate("QRegularExpression", std::move(errorString).toLatin1().constData());
1544#endif
1545 }
1546#ifdef QT_NO_TRANSLATION
1547 return QLatin1String("no error");
1548#else
1549 return QCoreApplication::translate("QRegularExpression", "no error");
1550#endif
1551}
1552
1553/*!
1554 Returns the offset, inside the pattern string, at which an error was found
1555 when checking the validity of the regular expression. If no error was
1556 found, then -1 is returned.
1557
1558 \sa pattern(), isValid(), errorString()
1559*/
1560qsizetype QRegularExpression::patternErrorOffset() const
1561{
1562 d.data()->compilePattern();
1563 return d->errorOffset;
1564}
1565
1566/*!
1567 Attempts to match the regular expression against the given \a subject
1568 string, starting at the position \a offset inside the subject, using a
1569 match of type \a matchType and honoring the given \a matchOptions.
1570
1571 The returned QRegularExpressionMatch object contains the results of the
1572 match.
1573
1574 \sa QRegularExpressionMatch, {normal matching}
1575*/
1576QRegularExpressionMatch QRegularExpression::match(const QString &subject,
1577 qsizetype offset,
1578 MatchType matchType,
1579 MatchOptions matchOptions) const
1580{
1581 d.data()->compilePattern();
1582 auto priv = new QRegularExpressionMatchPrivate(*this,
1583 subject,
1584 qToStringViewIgnoringNull(subject),
1585 matchType,
1586 matchOptions);
1587 d->doMatch(priv, offset);
1588 return QRegularExpressionMatch(*priv);
1589}
1590
1591/*!
1592 \since 6.0
1593 \overload
1594
1595 Attempts to match the regular expression against the given \a subjectView
1596 string view, starting at the position \a offset inside the subject, using a
1597 match of type \a matchType and honoring the given \a matchOptions.
1598
1599 The returned QRegularExpressionMatch object contains the results of the
1600 match.
1601
1602 \note The data referenced by \a subjectView must remain valid as long
1603 as there are QRegularExpressionMatch objects using it.
1604
1605 \sa QRegularExpressionMatch, {normal matching}
1606*/
1607QRegularExpressionMatch QRegularExpression::match(QStringView subjectView,
1608 qsizetype offset,
1609 MatchType matchType,
1610 MatchOptions matchOptions) const
1611{
1612 d.data()->compilePattern();
1613 auto priv = new QRegularExpressionMatchPrivate(*this,
1614 QString(),
1615 subjectView,
1616 matchType,
1617 matchOptions);
1618 d->doMatch(priv, offset);
1619 return QRegularExpressionMatch(*priv);
1620}
1621
1622/*!
1623 Attempts to perform a global match of the regular expression against the
1624 given \a subject string, starting at the position \a offset inside the
1625 subject, using a match of type \a matchType and honoring the given \a
1626 matchOptions.
1627
1628 The returned QRegularExpressionMatchIterator is positioned before the
1629 first match result (if any).
1630
1631 \sa QRegularExpressionMatchIterator, {global matching}
1632*/
1633QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &subject,
1634 qsizetype offset,
1635 MatchType matchType,
1636 MatchOptions matchOptions) const
1637{
1638 QRegularExpressionMatchIteratorPrivate *priv =
1639 new QRegularExpressionMatchIteratorPrivate(*this,
1640 matchType,
1641 matchOptions,
1642 match(subject, offset, matchType, matchOptions));
1643
1644 return QRegularExpressionMatchIterator(*priv);
1645}
1646
1647/*!
1648 \since 6.0
1649 \overload
1650
1651 Attempts to perform a global match of the regular expression against the
1652 given \a subjectView string view, starting at the position \a offset inside the
1653 subject, using a match of type \a matchType and honoring the given \a
1654 matchOptions.
1655
1656 The returned QRegularExpressionMatchIterator is positioned before the
1657 first match result (if any).
1658
1659 \note The data referenced by \a subjectView must remain valid as
1660 long as there are QRegularExpressionMatchIterator or
1661 QRegularExpressionMatch objects using it.
1662
1663 \sa QRegularExpressionMatchIterator, {global matching}
1664*/
1665QRegularExpressionMatchIterator QRegularExpression::globalMatch(QStringView subjectView,
1666 qsizetype offset,
1667 MatchType matchType,
1668 MatchOptions matchOptions) const
1669{
1670 QRegularExpressionMatchIteratorPrivate *priv =
1671 new QRegularExpressionMatchIteratorPrivate(*this,
1672 matchType,
1673 matchOptions,
1674 match(subjectView, offset, matchType, matchOptions));
1675
1676 return QRegularExpressionMatchIterator(*priv);
1677}
1678
1679/*!
1680 \since 5.4
1681
1682 Compiles the pattern immediately, including JIT compiling it (if
1683 the JIT is enabled) for optimization.
1684
1685 \sa isValid(), {Debugging Code that Uses QRegularExpression}
1686*/
1687void QRegularExpression::optimize() const
1688{
1689 d.data()->compilePattern();
1690}
1691
1692/*!
1693 Returns \c true if the regular expression is equal to \a re, or false
1694 otherwise. Two QRegularExpression objects are equal if they have
1695 the same pattern string and the same pattern options.
1696
1697 \sa operator!=()
1698*/
1699bool QRegularExpression::operator==(const QRegularExpression &re) const
1700{
1701 return (d == re.d) ||
1702 (d->pattern == re.d->pattern && d->patternOptions == re.d->patternOptions);
1703}
1704
1705/*!
1706 \fn QRegularExpression & QRegularExpression::operator=(QRegularExpression && re)
1707
1708 Move-assigns the regular expression \a re to this object, and returns a reference
1709 to the copy. Both the pattern and the pattern options are copied.
1710*/
1711
1712/*!
1713 \fn bool QRegularExpression::operator!=(const QRegularExpression &re) const
1714
1715 Returns \c true if the regular expression is different from \a re, or
1716 false otherwise.
1717
1718 \sa operator==()
1719*/
1720
1721/*!
1722 \since 5.6
1723 \relates QRegularExpression
1724
1725 Returns the hash value for \a key, using
1726 \a seed to seed the calculation.
1727*/
1728size_t qHash(const QRegularExpression &key, size_t seed) noexcept
1729{
1730 return qHashMulti(seed, key.d->pattern, key.d->patternOptions);
1731}
1732
1733#if QT_STRINGVIEW_LEVEL < 2
1734/*!
1735 \fn QString QRegularExpression::escape(const QString &str)
1736 \overload
1737*/
1738#endif // QT_STRINGVIEW_LEVEL < 2
1739
1740/*!
1741 \since 5.15
1742
1743 Escapes all characters of \a str so that they no longer have any special
1744 meaning when used as a regular expression pattern string, and returns
1745 the escaped string. For instance:
1746
1747 \snippet code/src_corelib_text_qregularexpression.cpp 26
1748
1749 This is very convenient in order to build patterns from arbitrary strings:
1750
1751 \snippet code/src_corelib_text_qregularexpression.cpp 27
1752
1753 \note This function implements Perl's quotemeta algorithm and escapes with
1754 a backslash all characters in \a str, except for the characters in the
1755 \c{[A-Z]}, \c{[a-z]} and \c{[0-9]} ranges, as well as the underscore
1756 (\c{_}) character. The only difference with Perl is that a literal NUL
1757 inside \a str is escaped with the sequence \c{"\\0"} (backslash +
1758 \c{'0'}), instead of \c{"\\\0"} (backslash + \c{NUL}).
1759*/
1760QString QRegularExpression::escape(QStringView str)
1761{
1762 QString result;
1763 const qsizetype count = str.size();
1764 result.reserve(count * 2);
1765
1766 // everything but [a-zA-Z0-9_] gets escaped,
1767 // cf. perldoc -f quotemeta
1768 for (qsizetype i = 0; i < count; ++i) {
1769 const QChar current = str.at(i);
1770
1771 if (current == QChar::Null) {
1772 // unlike Perl, a literal NUL must be escaped with
1773 // "\\0" (backslash + 0) and not "\\\0" (backslash + NUL),
1774 // because pcre16_compile uses a NUL-terminated string
1775 result.append(QLatin1Char('\\'));
1776 result.append(QLatin1Char('0'));
1777 } else if ( (current < QLatin1Char('a') || current > QLatin1Char('z')) &&
1778 (current < QLatin1Char('A') || current > QLatin1Char('Z')) &&
1779 (current < QLatin1Char('0') || current > QLatin1Char('9')) &&
1780 current != QLatin1Char('_') )
1781 {
1782 result.append(QLatin1Char('\\'));
1783 result.append(current);
1784 if (current.isHighSurrogate() && i < (count - 1))
1785 result.append(str.at(++i));
1786 } else {
1787 result.append(current);
1788 }
1789 }
1790
1791 result.squeeze();
1792 return result;
1793}
1794
1795#if QT_STRINGVIEW_LEVEL < 2
1796/*!
1797 \since 5.12
1798 \fn QString QRegularExpression::wildcardToRegularExpression(const QString &pattern, WildcardConversionOptions options)
1799 \overload
1800*/
1801#endif // QT_STRINGVIEW_LEVEL < 2
1802
1803/*!
1804 \since 6.0
1805 \enum QRegularExpression::WildcardConversionOption
1806
1807 The WildcardConversionOption enum defines modifiers to the way a wildcard glob
1808 pattern gets converted to a regular expression pattern.
1809
1810 \value DefaultWildcardConversion
1811 No conversion options are set.
1812
1813 \value UnanchoredWildcardConversion
1814 The conversion will not anchor the pattern. This allows for partial string matches of
1815 wildcard expressions.
1816*/
1817
1818/*!
1819 \since 5.15
1820
1821 Returns a regular expression representation of the given glob \a pattern.
1822 The transformation is targeting file path globbing, which means in particular
1823 that path separators receive special treatment. This implies that it is not
1824 just a basic translation from "*" to ".*".
1825
1826 \snippet code/src_corelib_text_qregularexpression.cpp 31
1827
1828 By default, the returned regular expression is fully anchored. In other
1829 words, there is no need of calling anchoredPattern() again on the
1830 result. To get an a regular expression that is not anchored, pass
1831 UnanchoredWildcardConversion as the conversion \a options.
1832
1833 This implementation follows closely the definition
1834 of wildcard for glob patterns:
1835 \table
1836 \row \li \b{c}
1837 \li Any character represents itself apart from those mentioned
1838 below. Thus \b{c} matches the character \e c.
1839 \row \li \b{?}
1840 \li Matches any single character. It is the same as
1841 \b{.} in full regexps.
1842 \row \li \b{*}
1843 \li Matches zero or more of any characters. It is the
1844 same as \b{.*} in full regexps.
1845 \row \li \b{[abc]}
1846 \li Matches one character given in the bracket.
1847 \row \li \b{[a-c]}
1848 \li Matches one character from the range given in the bracket.
1849 \row \li \b{[!abc]}
1850 \li Matches one character that is not given in the bracket. It is the
1851 same as \b{[^abc]} in full regexp.
1852 \row \li \b{[!a-c]}
1853 \li Matches one character that is not from the range given in the
1854 bracket. It is the same as \b{[^a-c]} in full regexp.
1855 \endtable
1856
1857 \note The backslash (\\) character is \e not an escape char in this context.
1858 In order to match one of the special characters, place it in square brackets
1859 (for example, \c{[?]}).
1860
1861 More information about the implementation can be found in:
1862 \list
1863 \li \l {https://en.wikipedia.org/wiki/Glob_(programming)} {The Wikipedia Glob article}
1864 \li \c {man 7 glob}
1865 \endlist
1866
1867 \sa escape()
1868*/
1869QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, WildcardConversionOptions options)
1870{
1871 const qsizetype wclen = pattern.size();
1872 QString rx;
1873 rx.reserve(wclen + wclen / 16);
1874 qsizetype i = 0;
1875 const QChar *wc = pattern.data();
1876
1877#ifdef Q_OS_WIN
1878 const QLatin1Char nativePathSeparator('\\');
1879 const QLatin1String starEscape("[^/\\\\]*");
1880 const QLatin1String questionMarkEscape("[^/\\\\]");
1881#else
1882 const QLatin1Char nativePathSeparator('/');
1883 const QLatin1String starEscape("[^/]*");
1884 const QLatin1String questionMarkEscape("[^/]");
1885#endif
1886
1887 while (i < wclen) {
1888 const QChar c = wc[i++];
1889 switch (c.unicode()) {
1890 case '*':
1891 rx += starEscape;
1892 break;
1893 case '?':
1894 rx += questionMarkEscape;
1895 break;
1896 case '\\':
1897#ifdef Q_OS_WIN
1898 case '/':
1899 rx += QLatin1String("[/\\\\]");
1900 break;
1901#endif
1902 case '$':
1903 case '(':
1904 case ')':
1905 case '+':
1906 case '.':
1907 case '^':
1908 case '{':
1909 case '|':
1910 case '}':
1911 rx += QLatin1Char('\\');
1912 rx += c;
1913 break;
1914 case '[':
1915 rx += c;
1916 // Support for the [!abc] or [!a-c] syntax
1917 if (i < wclen) {
1918 if (wc[i] == QLatin1Char('!')) {
1919 rx += QLatin1Char('^');
1920 ++i;
1921 }
1922
1923 if (i < wclen && wc[i] == QLatin1Char(']'))
1924 rx += wc[i++];
1925
1926 while (i < wclen && wc[i] != QLatin1Char(']')) {
1927 // The '/' appearing in a character class invalidates the
1928 // regular expression parsing. It also concerns '\\' on
1929 // Windows OS types.
1930 if (wc[i] == QLatin1Char('/') || wc[i] == nativePathSeparator)
1931 return rx;
1932 if (wc[i] == QLatin1Char('\\'))
1933 rx += QLatin1Char('\\');
1934 rx += wc[i++];
1935 }
1936 }
1937 break;
1938 default:
1939 rx += c;
1940 break;
1941 }
1942 }
1943
1944 if (!(options & UnanchoredWildcardConversion))
1945 rx = anchoredPattern(rx);
1946
1947 return rx;
1948}
1949
1950/*!
1951 \since 6.0
1952 Returns a regular expression of the glob pattern \a pattern. The regular expression
1953 will be case sensitive if \a cs is \l{Qt::CaseSensitive}, and converted according to
1954 \a options.
1955
1956 Equivalent to
1957 \code
1958 auto reOptions = cs == Qt::CaseSensitive ? QRegularExpression::NoPatternOption :
1959 QRegularExpression::CaseInsensitiveOption;
1960 return QRegularExpression(wildcardToRegularExpression(str, options), reOptions);
1961 \endcode
1962*/
1963QRegularExpression QRegularExpression::fromWildcard(QStringView pattern, Qt::CaseSensitivity cs,
1964 WildcardConversionOptions options)
1965{
1966 auto reOptions = cs == Qt::CaseSensitive ? QRegularExpression::NoPatternOption :
1967 QRegularExpression::CaseInsensitiveOption;
1968 return QRegularExpression(wildcardToRegularExpression(pattern, options), reOptions);
1969}
1970
1971#if QT_STRINGVIEW_LEVEL < 2
1972/*!
1973 \fn QRegularExpression::anchoredPattern(const QString &expression)
1974 \since 5.12
1975 \overload
1976*/
1977#endif // QT_STRINGVIEW_LEVEL < 2
1978
1979/*!
1980 \since 5.15
1981
1982 Returns the \a expression wrapped between the \c{\A} and \c{\z} anchors to
1983 be used for exact matching.
1984*/
1985QString QRegularExpression::anchoredPattern(QStringView expression)
1986{
1987 return QString()
1988 + QLatin1String("\\A(?:")
1989 + expression
1990 + QLatin1String(")\\z");
1991}
1992
1993/*!
1994 \since 5.1
1995
1996 Constructs a valid, empty QRegularExpressionMatch object. The regular
1997 expression is set to a default-constructed one; the match type to
1998 QRegularExpression::NoMatch and the match options to
1999 QRegularExpression::NoMatchOption.
2000
2001 The object will report no match through the hasMatch() and the
2002 hasPartialMatch() member functions.
2003*/
2004QRegularExpressionMatch::QRegularExpressionMatch()
2005 : d(new QRegularExpressionMatchPrivate(QRegularExpression(),
2006 QString(),
2007 QStringView(),
2008 QRegularExpression::NoMatch,
2009 QRegularExpression::NoMatchOption))
2010{
2011 d->isValid = true;
2012}
2013
2014/*!
2015 Destroys the match result.
2016*/
2017QRegularExpressionMatch::~QRegularExpressionMatch()
2018{
2019}
2020
2021/*!
2022 Constructs a match result by copying the result of the given \a match.
2023
2024 \sa operator=()
2025*/
2026QRegularExpressionMatch::QRegularExpressionMatch(const QRegularExpressionMatch &match)
2027 : d(match.d)
2028{
2029}
2030
2031/*!
2032 Assigns the match result \a match to this object, and returns a reference
2033 to the copy.
2034*/
2035QRegularExpressionMatch &QRegularExpressionMatch::operator=(const QRegularExpressionMatch &match)
2036{
2037 d = match.d;
2038 return *this;
2039}
2040
2041/*!
2042 \fn QRegularExpressionMatch &QRegularExpressionMatch::operator=(QRegularExpressionMatch &&match)
2043
2044 Move-assigns the match result \a match to this object, and returns a reference
2045 to the copy.
2046*/
2047
2048/*!
2049 \fn void QRegularExpressionMatch::swap(QRegularExpressionMatch &other)
2050
2051 Swaps the match result \a other with this match result. This
2052 operation is very fast and never fails.
2053*/
2054
2055/*!
2056 \internal
2057*/
2058QRegularExpressionMatch::QRegularExpressionMatch(QRegularExpressionMatchPrivate &dd)
2059 : d(&dd)
2060{
2061}
2062
2063/*!
2064 Returns the QRegularExpression object whose match() function returned this
2065 object.
2066
2067 \sa QRegularExpression::match(), matchType(), matchOptions()
2068*/
2069QRegularExpression QRegularExpressionMatch::regularExpression() const
2070{
2071 return d->regularExpression;
2072}
2073
2074
2075/*!
2076 Returns the match type that was used to get this QRegularExpressionMatch
2077 object, that is, the match type that was passed to
2078 QRegularExpression::match() or QRegularExpression::globalMatch().
2079
2080 \sa QRegularExpression::match(), regularExpression(), matchOptions()
2081*/
2082QRegularExpression::MatchType QRegularExpressionMatch::matchType() const
2083{
2084 return d->matchType;
2085}
2086
2087/*!
2088 Returns the match options that were used to get this
2089 QRegularExpressionMatch object, that is, the match options that were passed
2090 to QRegularExpression::match() or QRegularExpression::globalMatch().
2091
2092 \sa QRegularExpression::match(), regularExpression(), matchType()
2093*/
2094QRegularExpression::MatchOptions QRegularExpressionMatch::matchOptions() const
2095{
2096 return d->matchOptions;
2097}
2098
2099/*!
2100 Returns the index of the last capturing group that captured something,
2101 including the implicit capturing group 0. This can be used to extract all
2102 the substrings that were captured:
2103
2104 \snippet code/src_corelib_text_qregularexpression.cpp 28
2105
2106 Note that some of the capturing groups with an index less than
2107 lastCapturedIndex() could have not matched, and therefore captured nothing.
2108
2109 If the regular expression did not match, this function returns -1.
2110
2111 \sa captured(), capturedStart(), capturedEnd(), capturedLength()
2112*/
2113int QRegularExpressionMatch::lastCapturedIndex() const
2114{
2115 return d->capturedCount - 1;
2116}
2117
2118/*!
2119 Returns the substring captured by the \a nth capturing group.
2120
2121 If the \a nth capturing group did not capture a string, or if there is no
2122 such capturing group, returns a null QString.
2123
2124 \note The implicit capturing group number 0 captures the substring matched
2125 by the entire pattern.
2126
2127 \sa capturedView(), lastCapturedIndex(), capturedStart(), capturedEnd(),
2128 capturedLength(), QString::isNull()
2129*/
2130QString QRegularExpressionMatch::captured(int nth) const
2131{
2132 return capturedView(nth).toString();
2133}
2134
2135/*!
2136 \since 5.10
2137
2138 Returns a view of the substring captured by the \a nth capturing group.
2139
2140 If the \a nth capturing group did not capture a string, or if there is no
2141 such capturing group, returns a null QStringView.
2142
2143 \note The implicit capturing group number 0 captures the substring matched
2144 by the entire pattern.
2145
2146 \sa captured(), lastCapturedIndex(), capturedStart(), capturedEnd(),
2147 capturedLength(), QStringView::isNull()
2148*/
2149QStringView QRegularExpressionMatch::capturedView(int nth) const
2150{
2151 if (nth < 0 || nth > lastCapturedIndex())
2152 return QStringView();
2153
2154 qsizetype start = capturedStart(nth);
2155
2156 if (start == -1) // didn't capture
2157 return QStringView();
2158
2159 return d->subject.mid(start, capturedLength(nth));
2160}
2161
2162#if QT_STRINGVIEW_LEVEL < 2
2163/*! \fn QString QRegularExpressionMatch::captured(const QString &name) const
2164
2165 Returns the substring captured by the capturing group named \a name.
2166
2167 If the named capturing group \a name did not capture a string, or if
2168 there is no capturing group named \a name, returns a null QString.
2169
2170 \sa capturedView(), capturedStart(), capturedEnd(), capturedLength(),
2171 QString::isNull()
2172*/
2173#endif // QT_STRINGVIEW_LEVEL < 2
2174
2175/*!
2176 \since 5.10
2177
2178 Returns the substring captured by the capturing group named \a name.
2179
2180 If the named capturing group \a name did not capture a string, or if
2181 there is no capturing group named \a name, returns a null QString.
2182
2183 \sa capturedView(), capturedStart(), capturedEnd(), capturedLength(),
2184 QString::isNull()
2185*/
2186QString QRegularExpressionMatch::captured(QStringView name) const
2187{
2188 if (name.isEmpty()) {
2189 qWarning("QRegularExpressionMatch::captured: empty capturing group name passed");
2190 return QString();
2191 }
2192
2193 return capturedView(name).toString();
2194}
2195
2196/*!
2197 \since 5.10
2198
2199 Returns a view of the string captured by the capturing group named \a
2200 name.
2201
2202 If the named capturing group \a name did not capture a string, or if
2203 there is no capturing group named \a name, returns a null QStringView.
2204
2205 \sa captured(), capturedStart(), capturedEnd(), capturedLength(),
2206 QStringView::isNull()
2207*/
2208QStringView QRegularExpressionMatch::capturedView(QStringView name) const
2209{
2210 if (name.isEmpty()) {
2211 qWarning("QRegularExpressionMatch::capturedView: empty capturing group name passed");
2212 return QStringView();
2213 }
2214 int nth = d->regularExpression.d->captureIndexForName(name);
2215 if (nth == -1)
2216 return QStringView();
2217 return capturedView(nth);
2218}
2219
2220/*!
2221 Returns a list of all strings captured by capturing groups, in the order
2222 the groups themselves appear in the pattern string. The list includes the
2223 implicit capturing group number 0, capturing the substring matched by the
2224 entire pattern.
2225*/
2226QStringList QRegularExpressionMatch::capturedTexts() const
2227{
2228 QStringList texts;
2229 texts.reserve(d->capturedCount);
2230 for (int i = 0; i < d->capturedCount; ++i)
2231 texts << captured(i);
2232 return texts;
2233}
2234
2235/*!
2236 Returns the offset inside the subject string corresponding to the
2237 starting position of the substring captured by the \a nth capturing group.
2238 If the \a nth capturing group did not capture a string or doesn't exist,
2239 returns -1.
2240
2241 \sa capturedEnd(), capturedLength(), captured()
2242*/
2243qsizetype QRegularExpressionMatch::capturedStart(int nth) const
2244{
2245 if (nth < 0 || nth > lastCapturedIndex())
2246 return -1;
2247
2248 return d->capturedOffsets.at(nth * 2);
2249}
2250
2251/*!
2252 Returns the length of the substring captured by the \a nth capturing group.
2253
2254 \note This function returns 0 if the \a nth capturing group did not capture
2255 a string or doesn't exist.
2256
2257 \sa capturedStart(), capturedEnd(), captured()
2258*/
2259qsizetype QRegularExpressionMatch::capturedLength(int nth) const
2260{
2261 // bound checking performed by these two functions
2262 return capturedEnd(nth) - capturedStart(nth);
2263}
2264
2265/*!
2266 Returns the offset inside the subject string immediately after the ending
2267 position of the substring captured by the \a nth capturing group. If the \a
2268 nth capturing group did not capture a string or doesn't exist, returns -1.
2269
2270 \sa capturedStart(), capturedLength(), captured()
2271*/
2272qsizetype QRegularExpressionMatch::capturedEnd(int nth) const
2273{
2274 if (nth < 0 || nth > lastCapturedIndex())
2275 return -1;
2276
2277 return d->capturedOffsets.at(nth * 2 + 1);
2278}
2279
2280#if QT_STRINGVIEW_LEVEL < 2
2281/*! \fn qsizetype QRegularExpressionMatch::capturedStart(const QString &name) const
2282
2283 Returns the offset inside the subject string corresponding to the starting
2284 position of the substring captured by the capturing group named \a name.
2285 If the capturing group named \a name did not capture a string or doesn't
2286 exist, returns -1.
2287
2288 \sa capturedEnd(), capturedLength(), captured()
2289*/
2290
2291/*! \fn qsizetype QRegularExpressionMatch::capturedLength(const QString &name) const
2292
2293 Returns the length of the substring captured by the capturing group named
2294 \a name.
2295
2296 \note This function returns 0 if the capturing group named \a name did not
2297 capture a string or doesn't exist.
2298
2299 \sa capturedStart(), capturedEnd(), captured()
2300*/
2301
2302/*! \fn qsizetype QRegularExpressionMatch::capturedEnd(const QString &name) const
2303
2304 Returns the offset inside the subject string immediately after the ending
2305 position of the substring captured by the capturing group named \a name. If
2306 the capturing group named \a name did not capture a string or doesn't
2307 exist, returns -1.
2308
2309 \sa capturedStart(), capturedLength(), captured()
2310*/
2311#endif // QT_STRINGVIEW_LEVEL < 2
2312
2313/*!
2314 \since 5.10
2315
2316 Returns the offset inside the subject string corresponding to the starting
2317 position of the substring captured by the capturing group named \a name.
2318 If the capturing group named \a name did not capture a string or doesn't
2319 exist, returns -1.
2320
2321 \sa capturedEnd(), capturedLength(), captured()
2322*/
2323qsizetype QRegularExpressionMatch::capturedStart(QStringView name) const
2324{
2325 if (name.isEmpty()) {
2326 qWarning("QRegularExpressionMatch::capturedStart: empty capturing group name passed");
2327 return -1;
2328 }
2329 int nth = d->regularExpression.d->captureIndexForName(name);
2330 if (nth == -1)
2331 return -1;
2332 return capturedStart(nth);
2333}
2334
2335/*!
2336 \since 5.10
2337
2338 Returns the length of the substring captured by the capturing group named
2339 \a name.
2340
2341 \note This function returns 0 if the capturing group named \a name did not
2342 capture a string or doesn't exist.
2343
2344 \sa capturedStart(), capturedEnd(), captured()
2345*/
2346qsizetype QRegularExpressionMatch::capturedLength(QStringView name) const
2347{
2348 if (name.isEmpty()) {
2349 qWarning("QRegularExpressionMatch::capturedLength: empty capturing group name passed");
2350 return 0;
2351 }
2352 int nth = d->regularExpression.d->captureIndexForName(name);
2353 if (nth == -1)
2354 return 0;
2355 return capturedLength(nth);
2356}
2357
2358/*!
2359 \since 5.10
2360
2361 Returns the offset inside the subject string immediately after the ending
2362 position of the substring captured by the capturing group named \a name. If
2363 the capturing group named \a name did not capture a string or doesn't
2364 exist, returns -1.
2365
2366 \sa capturedStart(), capturedLength(), captured()
2367*/
2368qsizetype QRegularExpressionMatch::capturedEnd(QStringView name) const
2369{
2370 if (name.isEmpty()) {
2371 qWarning("QRegularExpressionMatch::capturedEnd: empty capturing group name passed");
2372 return -1;
2373 }
2374 int nth = d->regularExpression.d->captureIndexForName(name);
2375 if (nth == -1)
2376 return -1;
2377 return capturedEnd(nth);
2378}
2379
2380/*!
2381 Returns \c true if the regular expression matched against the subject string,
2382 or false otherwise.
2383
2384 \sa QRegularExpression::match(), hasPartialMatch()
2385*/
2386bool QRegularExpressionMatch::hasMatch() const
2387{
2388 return d->hasMatch;
2389}
2390
2391/*!
2392 Returns \c true if the regular expression partially matched against the
2393 subject string, or false otherwise.
2394
2395 \note Only a match that explicitly used the one of the partial match types
2396 can yield a partial match. Still, if such a match succeeds totally, this
2397 function will return false, while hasMatch() will return true.
2398
2399 \sa QRegularExpression::match(), QRegularExpression::MatchType, hasMatch()
2400*/
2401bool QRegularExpressionMatch::hasPartialMatch() const
2402{
2403 return d->hasPartialMatch;
2404}
2405
2406/*!
2407 Returns \c true if the match object was obtained as a result from the
2408 QRegularExpression::match() function invoked on a valid QRegularExpression
2409 object; returns \c false if the QRegularExpression was invalid.
2410
2411 \sa QRegularExpression::match(), QRegularExpression::isValid()
2412*/
2413bool QRegularExpressionMatch::isValid() const
2414{
2415 return d->isValid;
2416}
2417
2418/*!
2419 \internal
2420*/
2421QRegularExpressionMatchIterator::QRegularExpressionMatchIterator(QRegularExpressionMatchIteratorPrivate &dd)
2422 : d(&dd)
2423{
2424}
2425
2426/*!
2427 \since 5.1
2428
2429 Constructs an empty, valid QRegularExpressionMatchIterator object. The
2430 regular expression is set to a default-constructed one; the match type to
2431 QRegularExpression::NoMatch and the match options to
2432 QRegularExpression::NoMatchOption.
2433
2434 Invoking the hasNext() member function on the constructed object will
2435 return false, as the iterator is not iterating on a valid sequence of
2436 matches.
2437*/
2438QRegularExpressionMatchIterator::QRegularExpressionMatchIterator()
2439 : d(new QRegularExpressionMatchIteratorPrivate(QRegularExpression(),
2440 QRegularExpression::NoMatch,
2441 QRegularExpression::NoMatchOption,
2442 QRegularExpressionMatch()))
2443{
2444}
2445
2446/*!
2447 Destroys the QRegularExpressionMatchIterator object.
2448*/
2449QRegularExpressionMatchIterator::~QRegularExpressionMatchIterator()
2450{
2451}
2452
2453/*!
2454 Constructs a QRegularExpressionMatchIterator object as a copy of \a
2455 iterator.
2456
2457 \sa operator=()
2458*/
2459QRegularExpressionMatchIterator::QRegularExpressionMatchIterator(const QRegularExpressionMatchIterator &iterator)
2460 : d(iterator.d)
2461{
2462}
2463
2464/*!
2465 Assigns the iterator \a iterator to this object, and returns a reference to
2466 the copy.
2467*/
2468QRegularExpressionMatchIterator &QRegularExpressionMatchIterator::operator=(const QRegularExpressionMatchIterator &iterator)
2469{
2470 d = iterator.d;
2471 return *this;
2472}
2473
2474/*!
2475 \fn QRegularExpressionMatchIterator &QRegularExpressionMatchIterator::operator=(QRegularExpressionMatchIterator &&iterator)
2476
2477 Move-assigns the \a iterator to this object.
2478*/
2479
2480/*!
2481 \fn void QRegularExpressionMatchIterator::swap(QRegularExpressionMatchIterator &other)
2482
2483 Swaps the iterator \a other with this iterator object. This operation is
2484 very fast and never fails.
2485*/
2486
2487/*!
2488 Returns \c true if the iterator object was obtained as a result from the
2489 QRegularExpression::globalMatch() function invoked on a valid
2490 QRegularExpression object; returns \c false if the QRegularExpression was
2491 invalid.
2492
2493 \sa QRegularExpression::globalMatch(), QRegularExpression::isValid()
2494*/
2495bool QRegularExpressionMatchIterator::isValid() const
2496{
2497 return d->next.isValid();
2498}
2499
2500/*!
2501 Returns \c true if there is at least one match result ahead of the iterator;
2502 otherwise it returns \c false.
2503
2504 \sa next()
2505*/
2506bool QRegularExpressionMatchIterator::hasNext() const
2507{
2508 return d->hasNext();
2509}
2510
2511/*!
2512 Returns the next match result without moving the iterator.
2513
2514 \note Calling this function when the iterator is at the end of the result
2515 set leads to undefined results.
2516*/
2517QRegularExpressionMatch QRegularExpressionMatchIterator::peekNext() const
2518{
2519 if (!hasNext())
2520 qWarning("QRegularExpressionMatchIterator::peekNext() called on an iterator already at end");
2521
2522 return d->next;
2523}
2524
2525/*!
2526 Returns the next match result and advances the iterator by one position.
2527
2528 \note Calling this function when the iterator is at the end of the result
2529 set leads to undefined results.
2530*/
2531QRegularExpressionMatch QRegularExpressionMatchIterator::next()
2532{
2533 if (!hasNext()) {
2534 qWarning("QRegularExpressionMatchIterator::next() called on an iterator already at end");
2535 return d.constData()->next;
2536 }
2537
2538 d.detach();
2539 return qExchange(d->next, d->next.d.constData()->nextMatch());
2540}
2541
2542/*!
2543 Returns the QRegularExpression object whose globalMatch() function returned
2544 this object.
2545
2546 \sa QRegularExpression::globalMatch(), matchType(), matchOptions()
2547*/
2548QRegularExpression QRegularExpressionMatchIterator::regularExpression() const
2549{
2550 return d->regularExpression;
2551}
2552
2553/*!
2554 Returns the match type that was used to get this
2555 QRegularExpressionMatchIterator object, that is, the match type that was
2556 passed to QRegularExpression::globalMatch().
2557
2558 \sa QRegularExpression::globalMatch(), regularExpression(), matchOptions()
2559*/
2560QRegularExpression::MatchType QRegularExpressionMatchIterator::matchType() const
2561{
2562 return d->matchType;
2563}
2564
2565/*!
2566 Returns the match options that were used to get this
2567 QRegularExpressionMatchIterator object, that is, the match options that
2568 were passed to QRegularExpression::globalMatch().
2569
2570 \sa QRegularExpression::globalMatch(), regularExpression(), matchType()
2571*/
2572QRegularExpression::MatchOptions QRegularExpressionMatchIterator::matchOptions() const
2573{
2574 return d->matchOptions;
2575}
2576
2577/*!
2578 \internal
2579*/
2580QtPrivate::QRegularExpressionMatchIteratorRangeBasedForIterator begin(const QRegularExpressionMatchIterator &iterator)
2581{
2582 return QtPrivate::QRegularExpressionMatchIteratorRangeBasedForIterator(iterator);
2583}
2584
2585/*!
2586 \fn QtPrivate::QRegularExpressionMatchIteratorRangeBasedForIteratorSentinel end(const QRegularExpressionMatchIterator &)
2587 \internal
2588*/
2589
2590#ifndef QT_NO_DATASTREAM
2591/*!
2592 \relates QRegularExpression
2593
2594 Writes the regular expression \a re to stream \a out.
2595
2596 \sa {Serializing Qt Data Types}
2597*/
2598QDataStream &operator<<(QDataStream &out, const QRegularExpression &re)
2599{
2600 out << re.pattern() << quint32(re.patternOptions());
2601 return out;
2602}
2603
2604/*!
2605 \relates QRegularExpression
2606
2607 Reads a regular expression from stream \a in into \a re.
2608
2609 \sa {Serializing Qt Data Types}
2610*/
2611QDataStream &operator>>(QDataStream &in, QRegularExpression &re)
2612{
2613 QString pattern;
2614 quint32 patternOptions;
2615 in >> pattern >> patternOptions;
2616 re.setPattern(pattern);
2617 re.setPatternOptions(QRegularExpression::PatternOptions(patternOptions));
2618 return in;
2619}
2620#endif
2621
2622#ifndef QT_NO_DEBUG_STREAM
2623/*!
2624 \relates QRegularExpression
2625
2626 Writes the regular expression \a re into the debug object \a debug for
2627 debugging purposes.
2628
2629 \sa {Debugging Techniques}
2630*/
2631QDebug operator<<(QDebug debug, const QRegularExpression &re)
2632{
2633 QDebugStateSaver saver(debug);
2634 debug.nospace() << "QRegularExpression(" << re.pattern() << ", " << re.patternOptions() << ')';
2635 return debug;
2636}
2637
2638/*!
2639 \relates QRegularExpression
2640
2641 Writes the pattern options \a patternOptions into the debug object \a debug
2642 for debugging purposes.
2643
2644 \sa {Debugging Techniques}
2645*/
2646QDebug operator<<(QDebug debug, QRegularExpression::PatternOptions patternOptions)
2647{
2648 QDebugStateSaver saver(debug);
2649 QByteArray flags;
2650
2651 if (patternOptions == QRegularExpression::NoPatternOption) {
2652 flags = "NoPatternOption";
2653 } else {
2654 flags.reserve(200); // worst case...
2655 if (patternOptions & QRegularExpression::CaseInsensitiveOption)
2656 flags.append("CaseInsensitiveOption|");
2657 if (patternOptions & QRegularExpression::DotMatchesEverythingOption)
2658 flags.append("DotMatchesEverythingOption|");
2659 if (patternOptions & QRegularExpression::MultilineOption)
2660 flags.append("MultilineOption|");
2661 if (patternOptions & QRegularExpression::ExtendedPatternSyntaxOption)
2662 flags.append("ExtendedPatternSyntaxOption|");
2663 if (patternOptions & QRegularExpression::InvertedGreedinessOption)
2664 flags.append("InvertedGreedinessOption|");
2665 if (patternOptions & QRegularExpression::DontCaptureOption)
2666 flags.append("DontCaptureOption|");
2667 if (patternOptions & QRegularExpression::UseUnicodePropertiesOption)
2668 flags.append("UseUnicodePropertiesOption|");
2669 flags.chop(1);
2670 }
2671
2672 debug.nospace() << "QRegularExpression::PatternOptions(" << flags << ')';
2673
2674 return debug;
2675}
2676/*!
2677 \relates QRegularExpressionMatch
2678
2679 Writes the match object \a match into the debug object \a debug for
2680 debugging purposes.
2681
2682 \sa {Debugging Techniques}
2683*/
2684QDebug operator<<(QDebug debug, const QRegularExpressionMatch &match)
2685{
2686 QDebugStateSaver saver(debug);
2687 debug.nospace() << "QRegularExpressionMatch(";
2688
2689 if (!match.isValid()) {
2690 debug << "Invalid)";
2691 return debug;
2692 }
2693
2694 debug << "Valid";
2695
2696 if (match.hasMatch()) {
2697 debug << ", has match: ";
2698 for (int i = 0; i <= match.lastCapturedIndex(); ++i) {
2699 debug << i
2700 << ":(" << match.capturedStart(i) << ", " << match.capturedEnd(i)
2701 << ", " << match.captured(i) << ')';
2702 if (i < match.lastCapturedIndex())
2703 debug << ", ";
2704 }
2705 } else if (match.hasPartialMatch()) {
2706 debug << ", has partial match: ("
2707 << match.capturedStart(0) << ", "
2708 << match.capturedEnd(0) << ", "
2709 << match.captured(0) << ')';
2710 } else {
2711 debug << ", no match";
2712 }
2713
2714 debug << ')';
2715
2716 return debug;
2717}
2718#endif
2719
2720// fool lupdate: make it extract those strings for translation, but don't put them
2721// inside Qt -- they're already inside libpcre (cf. man 3 pcreapi, pcre_compile.c).
2722#if 0
2723
2724/* PCRE is a library of functions to support regular expressions whose syntax
2725and semantics are as close as possible to those of the Perl 5 language.
2726
2727 Written by Philip Hazel
2728 Original API code Copyright (c) 1997-2012 University of Cambridge
2729 New API code Copyright (c) 2015 University of Cambridge
2730
2731-----------------------------------------------------------------------------
2732Redistribution and use in source and binary forms, with or without
2733modification, are permitted provided that the following conditions are met:
2734
2735 * Redistributions of source code must retain the above copyright notice,
2736 this list of conditions and the following disclaimer.
2737
2738 * Redistributions in binary form must reproduce the above copyright
2739 notice, this list of conditions and the following disclaimer in the
2740 documentation and/or other materials provided with the distribution.
2741
2742 * Neither the name of the University of Cambridge nor the names of its
2743 contributors may be used to endorse or promote products derived from
2744 this software without specific prior written permission.
2745
2746THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2747AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2748IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2749ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
2750LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2751CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2752SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2753INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2754CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2755ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2756POSSIBILITY OF SUCH DAMAGE.
2757-----------------------------------------------------------------------------
2758*/
2759
2760static const char *pcreCompileErrorCodes[] =
2761{
2762 QT_TRANSLATE_NOOP("QRegularExpression", "no error"),
2763 QT_TRANSLATE_NOOP("QRegularExpression", "\\ at end of pattern"),
2764 QT_TRANSLATE_NOOP("QRegularExpression", "\\c at end of pattern"),
2765 QT_TRANSLATE_NOOP("QRegularExpression", "unrecognized character follows \\"),
2766 QT_TRANSLATE_NOOP("QRegularExpression", "numbers out of order in {} quantifier"),
2767 QT_TRANSLATE_NOOP("QRegularExpression", "number too big in {} quantifier"),
2768 QT_TRANSLATE_NOOP("QRegularExpression", "missing terminating ] for character class"),
2769 QT_TRANSLATE_NOOP("QRegularExpression", "escape sequence is invalid in character class"),
2770 QT_TRANSLATE_NOOP("QRegularExpression", "range out of order in character class"),
2771 QT_TRANSLATE_NOOP("QRegularExpression", "quantifier does not follow a repeatable item"),
2772 QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unexpected repeat"),
2773 QT_TRANSLATE_NOOP("QRegularExpression", "unrecognized character after (? or (?-"),
2774 QT_TRANSLATE_NOOP("QRegularExpression", "POSIX named classes are supported only within a class"),
2775 QT_TRANSLATE_NOOP("QRegularExpression", "POSIX collating elements are not supported"),
2776 QT_TRANSLATE_NOOP("QRegularExpression", "missing closing parenthesis"),
2777 QT_TRANSLATE_NOOP("QRegularExpression", "reference to non-existent subpattern"),
2778 QT_TRANSLATE_NOOP("QRegularExpression", "pattern passed as NULL"),
2779 QT_TRANSLATE_NOOP("QRegularExpression", "unrecognised compile-time option bit(s)"),
2780 QT_TRANSLATE_NOOP("QRegularExpression", "missing ) after (?# comment"),
2781 QT_TRANSLATE_NOOP("QRegularExpression", "parentheses are too deeply nested"),
2782 QT_TRANSLATE_NOOP("QRegularExpression", "regular expression is too large"),
2783 QT_TRANSLATE_NOOP("QRegularExpression", "failed to allocate heap memory"),
2784 QT_TRANSLATE_NOOP("QRegularExpression", "unmatched closing parenthesis"),
2785 QT_TRANSLATE_NOOP("QRegularExpression", "internal error: code overflow"),
2786 QT_TRANSLATE_NOOP("QRegularExpression", "missing closing parenthesis for condition"),
2787 QT_TRANSLATE_NOOP("QRegularExpression", "lookbehind assertion is not fixed length"),
2788 QT_TRANSLATE_NOOP("QRegularExpression", "a relative value of zero is not allowed"),
2789 QT_TRANSLATE_NOOP("QRegularExpression", "conditional subpattern contains more than two branches"),
2790 QT_TRANSLATE_NOOP("QRegularExpression", "assertion expected after (?( or (?(?C)"),
2791 QT_TRANSLATE_NOOP("QRegularExpression", "digit expected after (?+ or (?-"),
2792 QT_TRANSLATE_NOOP("QRegularExpression", "unknown POSIX class name"),
2793 QT_TRANSLATE_NOOP("QRegularExpression", "internal error in pcre2_study(): should not occur"),
2794 QT_TRANSLATE_NOOP("QRegularExpression", "this version of PCRE2 does not have Unicode support"),
2795 QT_TRANSLATE_NOOP("QRegularExpression", "parentheses are too deeply nested (stack check)"),
2796 QT_TRANSLATE_NOOP("QRegularExpression", "character code point value in \\x{} or \\o{} is too large"),
2797 QT_TRANSLATE_NOOP("QRegularExpression", "lookbehind is too complicated"),
2798 QT_TRANSLATE_NOOP("QRegularExpression", "\\C is not allowed in a lookbehind assertion in UTF-" "16" " mode"),
2799 QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u"),
2800 QT_TRANSLATE_NOOP("QRegularExpression", "number after (?C is greater than 255"),
2801 QT_TRANSLATE_NOOP("QRegularExpression", "closing parenthesis for (?C expected"),
2802 QT_TRANSLATE_NOOP("QRegularExpression", "invalid escape sequence in (*VERB) name"),
2803 QT_TRANSLATE_NOOP("QRegularExpression", "unrecognized character after (?P"),
2804 QT_TRANSLATE_NOOP("QRegularExpression", "syntax error in subpattern name (missing terminator?)"),
2805 QT_TRANSLATE_NOOP("QRegularExpression", "two named subpatterns have the same name (PCRE2_DUPNAMES not set)"),
2806 QT_TRANSLATE_NOOP("QRegularExpression", "subpattern name must start with a non-digit"),
2807 QT_TRANSLATE_NOOP("QRegularExpression", "this version of PCRE2 does not have support for \\P, \\p, or \\X"),
2808 QT_TRANSLATE_NOOP("QRegularExpression", "malformed \\P or \\p sequence"),
2809 QT_TRANSLATE_NOOP("QRegularExpression", "unknown property name after \\P or \\p"),
2810 QT_TRANSLATE_NOOP("QRegularExpression", "subpattern name is too long (maximum " "32" " code units)"),
2811 QT_TRANSLATE_NOOP("QRegularExpression", "too many named subpatterns (maximum " "10000" ")"),
2812 QT_TRANSLATE_NOOP("QRegularExpression", "invalid range in character class"),
2813 QT_TRANSLATE_NOOP("QRegularExpression", "octal value is greater than \\377 in 8-bit non-UTF-8 mode"),
2814 QT_TRANSLATE_NOOP("QRegularExpression", "internal error: overran compiling workspace"),
2815 QT_TRANSLATE_NOOP("QRegularExpression", "internal error: previously-checked referenced subpattern not found"),
2816 QT_TRANSLATE_NOOP("QRegularExpression", "DEFINE subpattern contains more than one branch"),
2817 QT_TRANSLATE_NOOP("QRegularExpression", "missing opening brace after \\o"),
2818 QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unknown newline setting"),
2819 QT_TRANSLATE_NOOP("QRegularExpression", "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number"),
2820 QT_TRANSLATE_NOOP("QRegularExpression", "(?R (recursive pattern call) must be followed by a closing parenthesis"),
2821 QT_TRANSLATE_NOOP("QRegularExpression", "obsolete error (should not occur)"),
2822 QT_TRANSLATE_NOOP("QRegularExpression", "(*VERB) not recognized or malformed"),
2823 QT_TRANSLATE_NOOP("QRegularExpression", "subpattern number is too big"),
2824 QT_TRANSLATE_NOOP("QRegularExpression", "subpattern name expected"),
2825 QT_TRANSLATE_NOOP("QRegularExpression", "internal error: parsed pattern overflow"),
2826 QT_TRANSLATE_NOOP("QRegularExpression", "non-octal character in \\o{} (closing brace missing?)"),
2827 QT_TRANSLATE_NOOP("QRegularExpression", "different names for subpatterns of the same number are not allowed"),
2828 QT_TRANSLATE_NOOP("QRegularExpression", "(*MARK) must have an argument"),
2829 QT_TRANSLATE_NOOP("QRegularExpression", "non-hex character in \\x{} (closing brace missing?)"),
2830 QT_TRANSLATE_NOOP("QRegularExpression", "\\c must be followed by a printable ASCII character"),
2831 QT_TRANSLATE_NOOP("QRegularExpression", "\\c must be followed by a letter or one of [\\]^_?"),
2832 QT_TRANSLATE_NOOP("QRegularExpression", "\\k is not followed by a braced, angle-bracketed, or quoted name"),
2833 QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unknown meta code in check_lookbehinds()"),
2834 QT_TRANSLATE_NOOP("QRegularExpression", "\\N is not supported in a class"),
2835 QT_TRANSLATE_NOOP("QRegularExpression", "callout string is too long"),
2836 QT_TRANSLATE_NOOP("QRegularExpression", "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)"),
2837 QT_TRANSLATE_NOOP("QRegularExpression", "using UTF is disabled by the application"),
2838 QT_TRANSLATE_NOOP("QRegularExpression", "using UCP is disabled by the application"),
2839 QT_TRANSLATE_NOOP("QRegularExpression", "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)"),
2840 QT_TRANSLATE_NOOP("QRegularExpression", "character code point value in \\u.... sequence is too large"),
2841 QT_TRANSLATE_NOOP("QRegularExpression", "digits missing in \\x{} or \\o{} or \\N{U+}"),
2842 QT_TRANSLATE_NOOP("QRegularExpression", "syntax error or number too big in (?(VERSION condition"),
2843 QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unknown opcode in auto_possessify()"),
2844 QT_TRANSLATE_NOOP("QRegularExpression", "missing terminating delimiter for callout with string argument"),
2845 QT_TRANSLATE_NOOP("QRegularExpression", "unrecognized string delimiter follows (?C"),
2846 QT_TRANSLATE_NOOP("QRegularExpression", "using \\C is disabled by the application"),
2847 QT_TRANSLATE_NOOP("QRegularExpression", "(?| and/or (?J: or (?x: parentheses are too deeply nested"),
2848 QT_TRANSLATE_NOOP("QRegularExpression", "using \\C is disabled in this PCRE2 library"),
2849 QT_TRANSLATE_NOOP("QRegularExpression", "regular expression is too complicated"),
2850 QT_TRANSLATE_NOOP("QRegularExpression", "lookbehind assertion is too long"),
2851 QT_TRANSLATE_NOOP("QRegularExpression", "pattern string is longer than the limit set by the application"),
2852 QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unknown code in parsed pattern"),
2853 QT_TRANSLATE_NOOP("QRegularExpression", "internal error: bad code value in parsed_skip()"),
2854 QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode"),
2855 QT_TRANSLATE_NOOP("QRegularExpression", "invalid option bits with PCRE2_LITERAL"),
2856 QT_TRANSLATE_NOOP("QRegularExpression", "\\N{U+dddd} is supported only in Unicode (UTF) mode"),
2857 QT_TRANSLATE_NOOP("QRegularExpression", "invalid hyphen in option setting"),
2858 QT_TRANSLATE_NOOP("QRegularExpression", "(*alpha_assertion) not recognized"),
2859 QT_TRANSLATE_NOOP("QRegularExpression", "script runs require Unicode support, which this version of PCRE2 does not have"),
2860 QT_TRANSLATE_NOOP("QRegularExpression", "too many capturing groups (maximum 65535)"),
2861 QT_TRANSLATE_NOOP("QRegularExpression", "atomic assertion expected after (?( or (?(?C)"),
2862 QT_TRANSLATE_NOOP("QRegularExpression", "no error"),
2863 QT_TRANSLATE_NOOP("QRegularExpression", "no match"),
2864 QT_TRANSLATE_NOOP("QRegularExpression", "partial match"),
2865 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: 1 byte missing at end"),
2866 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: 2 bytes missing at end"),
2867 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: 3 bytes missing at end"),
2868 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: 4 bytes missing at end"),
2869 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: 5 bytes missing at end"),
2870 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: byte 2 top bits not 0x80"),
2871 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: byte 3 top bits not 0x80"),
2872 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: byte 4 top bits not 0x80"),
2873 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: byte 5 top bits not 0x80"),
2874 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: byte 6 top bits not 0x80"),
2875 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: 5-byte character is not allowed (RFC 3629)"),
2876 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: 6-byte character is not allowed (RFC 3629)"),
2877 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: code points greater than 0x10ffff are not defined"),
2878 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: code points 0xd800-0xdfff are not defined"),
2879 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: overlong 2-byte sequence"),
2880 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: overlong 3-byte sequence"),
2881 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: overlong 4-byte sequence"),
2882 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: overlong 5-byte sequence"),
2883 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: overlong 6-byte sequence"),
2884 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: isolated byte with 0x80 bit set"),
2885 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-8 error: illegal byte (0xfe or 0xff)"),
2886 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-16 error: missing low surrogate at end"),
2887 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-16 error: invalid low surrogate"),
2888 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-16 error: isolated low surrogate"),
2889 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-32 error: code points 0xd800-0xdfff are not defined"),
2890 QT_TRANSLATE_NOOP("QRegularExpression", "UTF-32 error: code points greater than 0x10ffff are not defined"),
2891 QT_TRANSLATE_NOOP("QRegularExpression", "bad data value"),
2892 QT_TRANSLATE_NOOP("QRegularExpression", "patterns do not all use the same character tables"),
2893 QT_TRANSLATE_NOOP("QRegularExpression", "magic number missing"),
2894 QT_TRANSLATE_NOOP("QRegularExpression", "pattern compiled in wrong mode: 8/16/32-bit error"),
2895 QT_TRANSLATE_NOOP("QRegularExpression", "bad offset value"),
2896 QT_TRANSLATE_NOOP("QRegularExpression", "bad option value"),
2897 QT_TRANSLATE_NOOP("QRegularExpression", "invalid replacement string"),
2898 QT_TRANSLATE_NOOP("QRegularExpression", "bad offset into UTF string"),
2899 QT_TRANSLATE_NOOP("QRegularExpression", "callout error code"),
2900 QT_TRANSLATE_NOOP("QRegularExpression", "invalid data in workspace for DFA restart"),
2901 QT_TRANSLATE_NOOP("QRegularExpression", "too much recursion for DFA matching"),
2902 QT_TRANSLATE_NOOP("QRegularExpression", "backreference condition or recursion test is not supported for DFA matching"),
2903 QT_TRANSLATE_NOOP("QRegularExpression", "function is not supported for DFA matching"),
2904 QT_TRANSLATE_NOOP("QRegularExpression", "pattern contains an item that is not supported for DFA matching"),
2905 QT_TRANSLATE_NOOP("QRegularExpression", "workspace size exceeded in DFA matching"),
2906 QT_TRANSLATE_NOOP("QRegularExpression", "internal error - pattern overwritten?"),
2907 QT_TRANSLATE_NOOP("QRegularExpression", "bad JIT option"),
2908 QT_TRANSLATE_NOOP("QRegularExpression", "JIT stack limit reached"),
2909 QT_TRANSLATE_NOOP("QRegularExpression", "match limit exceeded"),
2910 QT_TRANSLATE_NOOP("QRegularExpression", "no more memory"),
2911 QT_TRANSLATE_NOOP("QRegularExpression", "unknown substring"),
2912 QT_TRANSLATE_NOOP("QRegularExpression", "non-unique substring name"),
2913 QT_TRANSLATE_NOOP("QRegularExpression", "NULL argument passed"),
2914 QT_TRANSLATE_NOOP("QRegularExpression", "nested recursion at the same subject position"),
2915 QT_TRANSLATE_NOOP("QRegularExpression", "matching depth limit exceeded"),
2916 QT_TRANSLATE_NOOP("QRegularExpression", "requested value is not available"),
2917 QT_TRANSLATE_NOOP("QRegularExpression", "requested value is not set"),
2918 QT_TRANSLATE_NOOP("QRegularExpression", "offset limit set without PCRE2_USE_OFFSET_LIMIT"),
2919 QT_TRANSLATE_NOOP("QRegularExpression", "bad escape sequence in replacement string"),
2920 QT_TRANSLATE_NOOP("QRegularExpression", "expected closing curly bracket in replacement string"),
2921 QT_TRANSLATE_NOOP("QRegularExpression", "bad substitution in replacement string"),
2922 QT_TRANSLATE_NOOP("QRegularExpression", "match with end before start or start moved backwards is not supported"),
2923 QT_TRANSLATE_NOOP("QRegularExpression", "too many replacements (more than INT_MAX)"),
2924 QT_TRANSLATE_NOOP("QRegularExpression", "bad serialized data"),
2925 QT_TRANSLATE_NOOP("QRegularExpression", "heap limit exceeded"),
2926 QT_TRANSLATE_NOOP("QRegularExpression", "invalid syntax"),
2927 QT_TRANSLATE_NOOP("QRegularExpression", "internal error - duplicate substitution match"),
2928 QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching")
2929};
2930#endif // #if 0
2931
2932QT_END_NAMESPACE
2933