1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2020 Giuseppe D'Angelo <dangelog@gmail.com>. |
4 | ** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com> |
5 | ** Copyright (C) 2016 The Qt Company Ltd. |
6 | ** Contact: https://www.qt.io/licensing/ |
7 | ** |
8 | ** This file is part of the QtCore module of the Qt Toolkit. |
9 | ** |
10 | ** $QT_BEGIN_LICENSE:LGPL$ |
11 | ** Commercial License Usage |
12 | ** Licensees holding valid commercial Qt licenses may use this file in |
13 | ** accordance with the commercial license agreement provided with the |
14 | ** Software or, alternatively, in accordance with the terms contained in |
15 | ** a written agreement between you and The Qt Company. For licensing terms |
16 | ** and conditions see https://www.qt.io/terms-conditions. For further |
17 | ** information use the contact form at https://www.qt.io/contact-us. |
18 | ** |
19 | ** GNU Lesser General Public License Usage |
20 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
21 | ** General Public License version 3 as published by the Free Software |
22 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
23 | ** packaging of this file. Please review the following information to |
24 | ** ensure the GNU Lesser General Public License version 3 requirements |
25 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
26 | ** |
27 | ** GNU General Public License Usage |
28 | ** Alternatively, this file may be used under the terms of the GNU |
29 | ** General Public License version 2.0 or (at your option) the GNU General |
30 | ** Public license version 3 or any later version approved by the KDE Free |
31 | ** Qt Foundation. The licenses are as published by the Free Software |
32 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
33 | ** included in the packaging of this file. Please review the following |
34 | ** information to ensure the GNU General Public License requirements will |
35 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
36 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
37 | ** |
38 | ** $QT_END_LICENSE$ |
39 | ** |
40 | ****************************************************************************/ |
41 | |
42 | #include "qregularexpression.h" |
43 | |
44 | #include <QtCore/qcoreapplication.h> |
45 | #include <QtCore/qhashfunctions.h> |
46 | #include <QtCore/qlist.h> |
47 | #include <QtCore/qmutex.h> |
48 | #include <QtCore/qstringlist.h> |
49 | #include <QtCore/qdebug.h> |
50 | #include <QtCore/qthreadstorage.h> |
51 | #include <QtCore/qglobal.h> |
52 | #include <QtCore/qatomic.h> |
53 | #include <QtCore/qdatastream.h> |
54 | |
55 | #define PCRE2_CODE_UNIT_WIDTH 16 |
56 | |
57 | #include <pcre2.h> |
58 | |
59 | QT_BEGIN_NAMESPACE |
60 | |
61 | /*! |
62 | \class QRegularExpression |
63 | \inmodule QtCore |
64 | \reentrant |
65 | |
66 | \brief The QRegularExpression class provides pattern matching using regular |
67 | expressions. |
68 | |
69 | \since 5.0 |
70 | |
71 | \ingroup tools |
72 | \ingroup shared |
73 | |
74 | \keyword regular expression |
75 | |
76 | Regular expressions, or \e{regexps}, are a very powerful tool to handle |
77 | strings and texts. This is useful in many contexts, e.g., |
78 | |
79 | \table |
80 | \row \li Validation |
81 | \li A regexp can test whether a substring meets some criteria, |
82 | e.g. is an integer or contains no whitespace. |
83 | \row \li Searching |
84 | \li A regexp provides more powerful pattern matching than |
85 | simple substring matching, e.g., match one of the words |
86 | \e{mail}, \e{letter} or \e{correspondence}, but none of the |
87 | words \e{email}, \e{mailman}, \e{mailer}, \e{letterbox}, etc. |
88 | \row \li Search and Replace |
89 | \li A regexp can replace all occurrences of a substring with a |
90 | different substring, e.g., replace all occurrences of \e{&} |
91 | with \e{\&} except where the \e{&} is already followed by |
92 | an \e{amp;}. |
93 | \row \li String Splitting |
94 | \li A regexp can be used to identify where a string should be |
95 | split apart, e.g. splitting tab-delimited strings. |
96 | \endtable |
97 | |
98 | This document is by no means a complete reference to pattern matching using |
99 | regular expressions, and the following parts will require the reader to |
100 | have some basic knowledge about Perl-like regular expressions and their |
101 | pattern syntax. |
102 | |
103 | Good references about regular expressions include: |
104 | |
105 | \list |
106 | \li \e {Mastering Regular Expressions} (Third Edition) by Jeffrey E. F. |
107 | Friedl, ISBN 0-596-52812-4; |
108 | \li the \l{http://pcre.org/pcre.txt} {pcrepattern(3)} man page, describing |
109 | the pattern syntax supported by PCRE (the reference implementation of |
110 | Perl-compatible regular expressions); |
111 | \li the \l{http://perldoc.perl.org/perlre.html} {Perl's regular expression |
112 | documentation} and the \l{http://perldoc.perl.org/perlretut.html} {Perl's |
113 | regular expression tutorial}. |
114 | \endlist |
115 | |
116 | \tableofcontents |
117 | |
118 | \section1 Introduction |
119 | |
120 | QRegularExpression implements Perl-compatible regular expressions. It fully |
121 | supports Unicode. For an overview of the regular expression syntax |
122 | supported by QRegularExpression, please refer to the aforementioned |
123 | pcrepattern(3) man page. A regular expression is made up of two things: a |
124 | \b{pattern string} and a set of \b{pattern options} that change the |
125 | meaning of the pattern string. |
126 | |
127 | You can set the pattern string by passing a string to the QRegularExpression |
128 | constructor: |
129 | |
130 | \snippet code/src_corelib_text_qregularexpression.cpp 0 |
131 | |
132 | This sets the pattern string to \c{a pattern}. You can also use the |
133 | setPattern() function to set a pattern on an existing QRegularExpression |
134 | object: |
135 | |
136 | \snippet code/src_corelib_text_qregularexpression.cpp 1 |
137 | |
138 | Note that due to C++ literal strings rules, you must escape all backslashes |
139 | inside the pattern string with another backslash: |
140 | |
141 | \snippet code/src_corelib_text_qregularexpression.cpp 2 |
142 | |
143 | The pattern() function returns the pattern that is currently set for a |
144 | QRegularExpression object: |
145 | |
146 | \snippet code/src_corelib_text_qregularexpression.cpp 3 |
147 | |
148 | \section1 Pattern Options |
149 | |
150 | The meaning of the pattern string can be modified by setting one or more |
151 | \e{pattern options}. For instance, it is possible to set a pattern to match |
152 | case insensitively by setting the QRegularExpression::CaseInsensitiveOption. |
153 | |
154 | You can set the options by passing them to the QRegularExpression |
155 | constructor, as in: |
156 | |
157 | \snippet code/src_corelib_text_qregularexpression.cpp 4 |
158 | |
159 | Alternatively, you can use the setPatternOptions() function on an existing |
160 | QRegularExpressionObject: |
161 | |
162 | \snippet code/src_corelib_text_qregularexpression.cpp 5 |
163 | |
164 | It is possible to get the pattern options currently set on a |
165 | QRegularExpression object by using the patternOptions() function: |
166 | |
167 | \snippet code/src_corelib_text_qregularexpression.cpp 6 |
168 | |
169 | Please refer to the QRegularExpression::PatternOption enum documentation for |
170 | more information about each pattern option. |
171 | |
172 | \section1 Match Type and Match Options |
173 | |
174 | The last two arguments of the match() and the globalMatch() functions set |
175 | the match type and the match options. The match type is a value of the |
176 | QRegularExpression::MatchType enum; the "traditional" matching algorithm is |
177 | chosen by using the NormalMatch match type (the default). It is also |
178 | possible to enable partial matching of the regular expression against a |
179 | subject string: see the \l{partial matching} section for more details. |
180 | |
181 | The match options are a set of one or more QRegularExpression::MatchOption |
182 | values. They change the way a specific match of a regular expression |
183 | against a subject string is done. Please refer to the |
184 | QRegularExpression::MatchOption enum documentation for more details. |
185 | |
186 | \target normal matching |
187 | \section1 Normal Matching |
188 | |
189 | In order to perform a match you can simply invoke the match() function |
190 | passing a string to match against. We refer to this string as the |
191 | \e{subject string}. The result of the match() function is a |
192 | QRegularExpressionMatch object that can be used to inspect the results of |
193 | the match. For instance: |
194 | |
195 | \snippet code/src_corelib_text_qregularexpression.cpp 7 |
196 | |
197 | If a match is successful, the (implicit) capturing group number 0 can be |
198 | used to retrieve the substring matched by the entire pattern (see also the |
199 | section about \l{extracting captured substrings}): |
200 | |
201 | \snippet code/src_corelib_text_qregularexpression.cpp 8 |
202 | |
203 | It's also possible to start a match at an arbitrary offset inside the |
204 | subject string by passing the offset as an argument of the |
205 | match() function. In the following example \c{"12 abc"} |
206 | is not matched because the match is started at offset 1: |
207 | |
208 | \snippet code/src_corelib_text_qregularexpression.cpp 9 |
209 | |
210 | \target extracting captured substrings |
211 | \section2 Extracting captured substrings |
212 | |
213 | The QRegularExpressionMatch object contains also information about the |
214 | substrings captured by the capturing groups in the pattern string. The |
215 | \l{QRegularExpressionMatch::}{captured()} function will return the string |
216 | captured by the n-th capturing group: |
217 | |
218 | \snippet code/src_corelib_text_qregularexpression.cpp 10 |
219 | |
220 | Capturing groups in the pattern are numbered starting from 1, and the |
221 | implicit capturing group 0 is used to capture the substring that matched |
222 | the entire pattern. |
223 | |
224 | It's also possible to retrieve the starting and the ending offsets (inside |
225 | the subject string) of each captured substring, by using the |
226 | \l{QRegularExpressionMatch::}{capturedStart()} and the |
227 | \l{QRegularExpressionMatch::}{capturedEnd()} functions: |
228 | |
229 | \snippet code/src_corelib_text_qregularexpression.cpp 11 |
230 | |
231 | All of these functions have an overload taking a QString as a parameter |
232 | in order to extract \e{named} captured substrings. For instance: |
233 | |
234 | \snippet code/src_corelib_text_qregularexpression.cpp 12 |
235 | |
236 | \target global matching |
237 | \section1 Global Matching |
238 | |
239 | \e{Global matching} is useful to find all the occurrences of a given |
240 | regular expression inside a subject string. Suppose that we want to extract |
241 | all the words from a given string, where a word is a substring matching |
242 | the pattern \c{\w+}. |
243 | |
244 | QRegularExpression::globalMatch returns a QRegularExpressionMatchIterator, |
245 | which is a Java-like forward iterator that can be used to iterate over the |
246 | results. For instance: |
247 | |
248 | \snippet code/src_corelib_text_qregularexpression.cpp 13 |
249 | |
250 | Since it's a Java-like iterator, the QRegularExpressionMatchIterator will |
251 | point immediately before the first result. Every result is returned as a |
252 | QRegularExpressionMatch object. The |
253 | \l{QRegularExpressionMatchIterator::}{hasNext()} function will return true |
254 | if there's at least one more result, and |
255 | \l{QRegularExpressionMatchIterator::}{next()} will return the next result |
256 | and advance the iterator. Continuing from the previous example: |
257 | |
258 | \snippet code/src_corelib_text_qregularexpression.cpp 14 |
259 | |
260 | You can also use \l{QRegularExpressionMatchIterator::}{peekNext()} to get |
261 | the next result without advancing the iterator. |
262 | |
263 | It is also possible to simply use the result of |
264 | QRegularExpression::globalMatch in a range-based for loop, for instance |
265 | like this: |
266 | |
267 | \snippet code/src_corelib_text_qregularexpression.cpp 34 |
268 | |
269 | It is possible to pass a starting offset and one or more match options to |
270 | the globalMatch() function, exactly like normal matching with match(). |
271 | |
272 | \target partial matching |
273 | \section1 Partial Matching |
274 | |
275 | A \e{partial match} is obtained when the end of the subject string is |
276 | reached, but more characters are needed to successfully complete the match. |
277 | Note that a partial match is usually much more inefficient than a normal |
278 | match because many optimizations of the matching algorithm cannot be |
279 | employed. |
280 | |
281 | A partial match must be explicitly requested by specifying a match type of |
282 | PartialPreferCompleteMatch or PartialPreferFirstMatch when calling |
283 | QRegularExpression::match or QRegularExpression::globalMatch. If a partial |
284 | match is found, then calling the \l{QRegularExpressionMatch::}{hasMatch()} |
285 | function on the QRegularExpressionMatch object returned by match() will |
286 | return \c{false}, but \l{QRegularExpressionMatch::}{hasPartialMatch()} will return |
287 | \c{true}. |
288 | |
289 | When a partial match is found, no captured substrings are returned, and the |
290 | (implicit) capturing group 0 corresponding to the whole match captures the |
291 | partially matched substring of the subject string. |
292 | |
293 | Note that asking for a partial match can still lead to a complete match, if |
294 | one is found; in this case, \l{QRegularExpressionMatch::}{hasMatch()} will |
295 | return \c{true} and \l{QRegularExpressionMatch::}{hasPartialMatch()} |
296 | \c{false}. It never happens that a QRegularExpressionMatch reports both a |
297 | partial and a complete match. |
298 | |
299 | Partial matching is mainly useful in two scenarios: validating user input |
300 | in real time and incremental/multi-segment matching. |
301 | |
302 | \target validating user input |
303 | \section2 Validating user input |
304 | |
305 | Suppose that we would like the user to input a date in a specific |
306 | format, for instance "MMM dd, yyyy". We can check the input validity with |
307 | a pattern like: |
308 | |
309 | \c{^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \d\d?, \d\d\d\d$} |
310 | |
311 | (This pattern doesn't catch invalid days, but let's keep it for the |
312 | example's purposes). |
313 | |
314 | We would like to validate the input with this regular expression \e{while} |
315 | the user is typing it, so that we can report an error in the input as soon |
316 | as it is committed (for instance, the user typed the wrong key). In order |
317 | to do so we must distinguish three cases: |
318 | |
319 | \list |
320 | \li the input cannot possibly match the regular expression; |
321 | \li the input does match the regular expression; |
322 | \li the input does not match the regular expression right now, |
323 | but it will if more characters will be added to it. |
324 | \endlist |
325 | |
326 | Note that these three cases represent exactly the possible states of a |
327 | QValidator (see the QValidator::State enum). |
328 | |
329 | In particular, in the last case we want the regular expression engine to |
330 | report a partial match: we are successfully matching the pattern against |
331 | the subject string but the matching cannot continue because the end of the |
332 | subject is encountered. Notice, however, that the matching algorithm should |
333 | continue and try all possibilities, and in case a complete (non-partial) |
334 | match is found, then this one should be reported, and the input string |
335 | accepted as fully valid. |
336 | |
337 | This behavior is implemented by the PartialPreferCompleteMatch match type. |
338 | For instance: |
339 | |
340 | \snippet code/src_corelib_text_qregularexpression.cpp 15 |
341 | |
342 | If matching the same regular expression against the subject string leads to |
343 | a complete match, it is reported as usual: |
344 | |
345 | \snippet code/src_corelib_text_qregularexpression.cpp 16 |
346 | |
347 | Another example with a different pattern, showing the behavior of |
348 | preferring a complete match over a partial one: |
349 | |
350 | \snippet code/src_corelib_text_qregularexpression.cpp 17 |
351 | |
352 | In this case, the subpattern \c{abc\\w+X} partially matches the subject |
353 | string; however, the subpattern \c{def} matches the subject string |
354 | completely, and therefore a complete match is reported. |
355 | |
356 | If multiple partial matches are found when matching (but no complete |
357 | match), then the QRegularExpressionMatch object will report the first one |
358 | that is found. For instance: |
359 | |
360 | \snippet code/src_corelib_text_qregularexpression.cpp 18 |
361 | |
362 | \section2 Incremental/multi-segment matching |
363 | |
364 | Incremental matching is another use case of partial matching. Suppose that |
365 | we want to find the occurrences of a regular expression inside a large text |
366 | (that is, substrings matching the regular expression). In order to do so we |
367 | would like to "feed" the large text to the regular expression engines in |
368 | smaller chunks. The obvious problem is what happens if the substring that |
369 | matches the regular expression spans across two or more chunks. |
370 | |
371 | In this case, the regular expression engine should report a partial match, |
372 | so that we can match again adding new data and (eventually) get a complete |
373 | match. This implies that the regular expression engine may assume that |
374 | there are other characters \e{beyond the end} of the subject string. This |
375 | is not to be taken literally -- the engine will never try to access |
376 | any character after the last one in the subject. |
377 | |
378 | QRegularExpression implements this behavior when using the |
379 | PartialPreferFirstMatch match type. This match type reports a partial match |
380 | as soon as it is found, and other match alternatives are not tried |
381 | (even if they could lead to a complete match). For instance: |
382 | |
383 | \snippet code/src_corelib_text_qregularexpression.cpp 19 |
384 | |
385 | This happens because when matching the first branch of the alternation |
386 | operator a partial match is found, and therefore matching stops, without |
387 | trying the second branch. Another example: |
388 | |
389 | \snippet code/src_corelib_text_qregularexpression.cpp 20 |
390 | |
391 | This shows what could seem a counterintuitive behavior of quantifiers: |
392 | since \c{?} is greedy, then the engine tries first to continue the match |
393 | after having matched \c{"abc"}; but then the matching reaches the end of the |
394 | subject string, and therefore a partial match is reported. This is |
395 | even more surprising in the following example: |
396 | |
397 | \snippet code/src_corelib_text_qregularexpression.cpp 21 |
398 | |
399 | It's easy to understand this behavior if we remember that the engine |
400 | expects the subject string to be only a substring of the whole text we're |
401 | looking for a match into (that is, how we said before, that the engine |
402 | assumes that there are other characters beyond the end of the subject |
403 | string). |
404 | |
405 | Since the \c{*} quantifier is greedy, then reporting a complete match could |
406 | be an error, because after the current subject \c{"abc"} there may be other |
407 | occurrences of \c{"abc"}. For instance, the complete text could have been |
408 | "abcabcX", and therefore the \e{right} match to report (in the complete |
409 | text) would have been \c{"abcabc"}; by matching only against the leading |
410 | \c{"abc"} we instead get a partial match. |
411 | |
412 | \section1 Error Handling |
413 | |
414 | It is possible for a QRegularExpression object to be invalid because of |
415 | syntax errors in the pattern string. The isValid() function will return |
416 | true if the regular expression is valid, or false otherwise: |
417 | |
418 | \snippet code/src_corelib_text_qregularexpression.cpp 22 |
419 | |
420 | You can get more information about the specific error by calling the |
421 | errorString() function; moreover, the patternErrorOffset() function |
422 | will return the offset inside the pattern string |
423 | |
424 | \snippet code/src_corelib_text_qregularexpression.cpp 23 |
425 | |
426 | If a match is attempted with an invalid QRegularExpression, then the |
427 | returned QRegularExpressionMatch object will be invalid as well (that is, |
428 | its \l{QRegularExpressionMatch::}{isValid()} function will return false). |
429 | The same applies for attempting a global match. |
430 | |
431 | \section1 Unsupported Perl-compatible Regular Expressions Features |
432 | |
433 | QRegularExpression does not support all the features available in |
434 | Perl-compatible regular expressions. The most notable one is the fact that |
435 | duplicated names for capturing groups are not supported, and using them can |
436 | lead to undefined behavior. |
437 | |
438 | This may change in a future version of Qt. |
439 | |
440 | \section1 Debugging Code that Uses QRegularExpression |
441 | |
442 | QRegularExpression internally uses a just in time compiler (JIT) to |
443 | optimize the execution of the matching algorithm. The JIT makes extensive |
444 | usage of self-modifying code, which can lead debugging tools such as |
445 | Valgrind to crash. You must enable all checks for self-modifying code if |
446 | you want to debug programs using QRegularExpression (for instance, Valgrind's |
447 | \c{--smc-check} command line option). The downside of enabling such checks |
448 | is that your program will run considerably slower. |
449 | |
450 | To avoid that, the JIT is disabled by default if you compile Qt in debug |
451 | mode. It is possible to override the default and enable or disable the JIT |
452 | usage (both in debug or release mode) by setting the |
453 | \c{QT_ENABLE_REGEXP_JIT} environment variable to a non-zero or zero value |
454 | respectively. |
455 | |
456 | \sa QRegularExpressionMatch, QRegularExpressionMatchIterator |
457 | */ |
458 | |
459 | /*! |
460 | \class QRegularExpressionMatch |
461 | \inmodule QtCore |
462 | \reentrant |
463 | |
464 | \brief The QRegularExpressionMatch class provides the results of a matching |
465 | a QRegularExpression against a string. |
466 | |
467 | \since 5.0 |
468 | |
469 | \ingroup tools |
470 | \ingroup shared |
471 | |
472 | \keyword regular expression match |
473 | |
474 | A QRegularExpressionMatch object can be obtained by calling the |
475 | QRegularExpression::match() function, or as a single result of a global |
476 | match from a QRegularExpressionMatchIterator. |
477 | |
478 | The success or the failure of a match attempt can be inspected by calling |
479 | the hasMatch() function. QRegularExpressionMatch also reports a successful |
480 | partial match through the hasPartialMatch() function. |
481 | |
482 | In addition, QRegularExpressionMatch returns the substrings captured by the |
483 | capturing groups in the pattern string. The implicit capturing group with |
484 | index 0 captures the result of the whole match. The captured() function |
485 | returns each substring captured, either by the capturing group's index or |
486 | by its name: |
487 | |
488 | \snippet code/src_corelib_text_qregularexpression.cpp 29 |
489 | |
490 | For each captured substring it is possible to query its starting and ending |
491 | offsets in the subject string by calling the capturedStart() and the |
492 | capturedEnd() function, respectively. The length of each captured |
493 | substring is available using the capturedLength() function. |
494 | |
495 | The convenience function capturedTexts() will return \e{all} the captured |
496 | substrings at once (including the substring matched by the entire pattern) |
497 | in the order they have been captured by capturing groups; that is, |
498 | \c{captured(i) == capturedTexts().at(i)}. |
499 | |
500 | You can retrieve the QRegularExpression object the subject string was |
501 | matched against by calling the regularExpression() function; the |
502 | match type and the match options are available as well by calling |
503 | the matchType() and the matchOptions() respectively. |
504 | |
505 | Please refer to the QRegularExpression documentation for more information |
506 | about the Qt regular expression classes. |
507 | |
508 | \sa QRegularExpression |
509 | */ |
510 | |
511 | /*! |
512 | \class QRegularExpressionMatchIterator |
513 | \inmodule QtCore |
514 | \reentrant |
515 | |
516 | \brief The QRegularExpressionMatchIterator class provides an iterator on |
517 | the results of a global match of a QRegularExpression object against a string. |
518 | |
519 | \since 5.0 |
520 | |
521 | \ingroup tools |
522 | \ingroup shared |
523 | |
524 | \keyword regular expression iterator |
525 | |
526 | A QRegularExpressionMatchIterator object is a forward only Java-like |
527 | iterator; it can be obtained by calling the |
528 | QRegularExpression::globalMatch() function. A new |
529 | QRegularExpressionMatchIterator will be positioned before the first result. |
530 | You can then call the hasNext() function to check if there are more |
531 | results available; if so, the next() function will return the next |
532 | result and advance the iterator. |
533 | |
534 | Each result is a QRegularExpressionMatch object holding all the information |
535 | for that result (including captured substrings). |
536 | |
537 | For instance: |
538 | |
539 | \snippet code/src_corelib_text_qregularexpression.cpp 30 |
540 | |
541 | Moreover, QRegularExpressionMatchIterator offers a peekNext() function |
542 | to get the next result \e{without} advancing the iterator. |
543 | |
544 | Starting with Qt 6.0, it is also possible to simply use the result of |
545 | QRegularExpression::globalMatch in a range-based for loop, for instance |
546 | like this: |
547 | |
548 | \snippet code/src_corelib_text_qregularexpression.cpp 34 |
549 | |
550 | You can retrieve the QRegularExpression object the subject string was |
551 | matched against by calling the regularExpression() function; the |
552 | match type and the match options are available as well by calling |
553 | the matchType() and the matchOptions() respectively. |
554 | |
555 | Please refer to the QRegularExpression documentation for more information |
556 | about the Qt regular expression classes. |
557 | |
558 | \sa QRegularExpression, QRegularExpressionMatch |
559 | */ |
560 | |
561 | |
562 | /*! |
563 | \enum QRegularExpression::PatternOption |
564 | |
565 | The PatternOption enum defines modifiers to the way the pattern string |
566 | should be interpreted, and therefore the way the pattern matches against a |
567 | subject string. |
568 | |
569 | \value NoPatternOption |
570 | No pattern options are set. |
571 | |
572 | \value CaseInsensitiveOption |
573 | The pattern should match against the subject string in a case |
574 | insensitive way. This option corresponds to the /i modifier in Perl |
575 | regular expressions. |
576 | |
577 | \value DotMatchesEverythingOption |
578 | The dot metacharacter (\c{.}) in the pattern string is allowed to match |
579 | any character in the subject string, including newlines (normally, the |
580 | dot does not match newlines). This option corresponds to the \c{/s} |
581 | modifier in Perl regular expressions. |
582 | |
583 | \value MultilineOption |
584 | The caret (\c{^}) and the dollar (\c{$}) metacharacters in the pattern |
585 | string are allowed to match, respectively, immediately after and |
586 | immediately before any newline in the subject string, as well as at the |
587 | very beginning and at the very end of the subject string. This option |
588 | corresponds to the \c{/m} modifier in Perl regular expressions. |
589 | |
590 | \value ExtendedPatternSyntaxOption |
591 | Any whitespace in the pattern string which is not escaped and outside a |
592 | character class is ignored. Moreover, an unescaped sharp (\b{#}) |
593 | outside a character class causes all the following characters, until |
594 | the first newline (included), to be ignored. This can be used to |
595 | increase the readability of a pattern string as well as put comments |
596 | inside regular expressions; this is particularly useful if the pattern |
597 | string is loaded from a file or written by the user, because in C++ |
598 | code it is always possible to use the rules for string literals to put |
599 | comments outside the pattern string. This option corresponds to the \c{/x} |
600 | modifier in Perl regular expressions. |
601 | |
602 | \value InvertedGreedinessOption |
603 | The greediness of the quantifiers is inverted: \c{*}, \c{+}, \c{?}, |
604 | \c{{m,n}}, etc. become lazy, while their lazy versions (\c{*?}, |
605 | \c{+?}, \c{??}, \c{{m,n}?}, etc.) become greedy. There is no equivalent |
606 | for this option in Perl regular expressions. |
607 | |
608 | \value DontCaptureOption |
609 | The non-named capturing groups do not capture substrings; named |
610 | capturing groups still work as intended, as well as the implicit |
611 | capturing group number 0 corresponding to the entire match. There is no |
612 | equivalent for this option in Perl regular expressions. |
613 | |
614 | \value UseUnicodePropertiesOption |
615 | The meaning of the \c{\w}, \c{\d}, etc., character classes, as well as |
616 | the meaning of their counterparts (\c{\W}, \c{\D}, etc.), is changed |
617 | from matching ASCII characters only to matching any character with the |
618 | corresponding Unicode property. For instance, \c{\d} is changed to |
619 | match any character with the Unicode Nd (decimal digit) property; |
620 | \c{\w} to match any character with either the Unicode L (letter) or N |
621 | (digit) property, plus underscore, and so on. This option corresponds |
622 | to the \c{/u} modifier in Perl regular expressions. |
623 | */ |
624 | |
625 | /*! |
626 | \enum QRegularExpression::MatchType |
627 | |
628 | The MatchType enum defines the type of the match that should be attempted |
629 | against the subject string. |
630 | |
631 | \value NormalMatch |
632 | A normal match is done. |
633 | |
634 | \value PartialPreferCompleteMatch |
635 | The pattern string is matched partially against the subject string. If |
636 | a partial match is found, then it is recorded, and other matching |
637 | alternatives are tried as usual. If a complete match is then found, |
638 | then it's preferred to the partial match; in this case only the |
639 | complete match is reported. If instead no complete match is found (but |
640 | only the partial one), then the partial one is reported. |
641 | |
642 | \value PartialPreferFirstMatch |
643 | The pattern string is matched partially against the subject string. If |
644 | a partial match is found, then matching stops and the partial match is |
645 | reported. In this case, other matching alternatives (potentially |
646 | leading to a complete match) are not tried. Moreover, this match type |
647 | assumes that the subject string only a substring of a larger text, and |
648 | that (in this text) there are other characters beyond the end of the |
649 | subject string. This can lead to surprising results; see the discussion |
650 | in the \l{partial matching} section for more details. |
651 | |
652 | \value NoMatch |
653 | No matching is done. This value is returned as the match type by a |
654 | default constructed QRegularExpressionMatch or |
655 | QRegularExpressionMatchIterator. Using this match type is not very |
656 | useful for the user, as no matching ever happens. This enum value |
657 | has been introduced in Qt 5.1. |
658 | */ |
659 | |
660 | /*! |
661 | \enum QRegularExpression::MatchOption |
662 | |
663 | \value NoMatchOption |
664 | No match options are set. |
665 | |
666 | \value AnchoredMatchOption |
667 | Use AnchorAtOffsetMatchOption instead. |
668 | |
669 | \value AnchorAtOffsetMatchOption |
670 | The match is constrained to start exactly at the offset passed to |
671 | match() in order to be successful, even if the pattern string does not |
672 | contain any metacharacter that anchors the match at that point. |
673 | Note that passing this option does not anchor the end of the match |
674 | to the end of the subject; if you want to fully anchor a regular |
675 | expression, use anchoredPattern(). |
676 | This enum value has been introduced in Qt 6.0. |
677 | |
678 | \value DontCheckSubjectStringMatchOption |
679 | The subject string is not checked for UTF-16 validity before |
680 | attempting a match. Use this option with extreme caution, as |
681 | attempting to match an invalid string may crash the program and/or |
682 | constitute a security issue. This enum value has been introduced in |
683 | Qt 5.4. |
684 | */ |
685 | |
686 | /*! |
687 | \internal |
688 | */ |
689 | static int convertToPcreOptions(QRegularExpression::PatternOptions patternOptions) |
690 | { |
691 | int options = 0; |
692 | |
693 | if (patternOptions & QRegularExpression::CaseInsensitiveOption) |
694 | options |= PCRE2_CASELESS; |
695 | if (patternOptions & QRegularExpression::DotMatchesEverythingOption) |
696 | options |= PCRE2_DOTALL; |
697 | if (patternOptions & QRegularExpression::MultilineOption) |
698 | options |= PCRE2_MULTILINE; |
699 | if (patternOptions & QRegularExpression::ExtendedPatternSyntaxOption) |
700 | options |= PCRE2_EXTENDED; |
701 | if (patternOptions & QRegularExpression::InvertedGreedinessOption) |
702 | options |= PCRE2_UNGREEDY; |
703 | if (patternOptions & QRegularExpression::DontCaptureOption) |
704 | options |= PCRE2_NO_AUTO_CAPTURE; |
705 | if (patternOptions & QRegularExpression::UseUnicodePropertiesOption) |
706 | options |= PCRE2_UCP; |
707 | |
708 | return options; |
709 | } |
710 | |
711 | /*! |
712 | \internal |
713 | */ |
714 | static int convertToPcreOptions(QRegularExpression::MatchOptions matchOptions) |
715 | { |
716 | int options = 0; |
717 | |
718 | if (matchOptions & QRegularExpression::AnchorAtOffsetMatchOption) |
719 | options |= PCRE2_ANCHORED; |
720 | if (matchOptions & QRegularExpression::DontCheckSubjectStringMatchOption) |
721 | options |= PCRE2_NO_UTF_CHECK; |
722 | |
723 | return options; |
724 | } |
725 | |
726 | struct QRegularExpressionPrivate : QSharedData |
727 | { |
728 | QRegularExpressionPrivate(); |
729 | ~QRegularExpressionPrivate(); |
730 | QRegularExpressionPrivate(const QRegularExpressionPrivate &other); |
731 | |
732 | void cleanCompiledPattern(); |
733 | void compilePattern(); |
734 | void getPatternInfo(); |
735 | void optimizePattern(); |
736 | |
737 | enum CheckSubjectStringOption { |
738 | CheckSubjectString, |
739 | DontCheckSubjectString |
740 | }; |
741 | |
742 | void doMatch(QRegularExpressionMatchPrivate *priv, |
743 | qsizetype offset, |
744 | CheckSubjectStringOption checkSubjectStringOption = CheckSubjectString, |
745 | const QRegularExpressionMatchPrivate *previous = nullptr) const; |
746 | |
747 | int captureIndexForName(QStringView name) const; |
748 | |
749 | // sizeof(QSharedData) == 4, so start our members with an enum |
750 | QRegularExpression::PatternOptions patternOptions; |
751 | QString pattern; |
752 | |
753 | // *All* of the following members are managed while holding this mutex, |
754 | // except for isDirty which is set to true by QRegularExpression setters |
755 | // (right after a detach happened). |
756 | mutable QMutex mutex; |
757 | |
758 | // The PCRE code pointer is reference-counted by the QRegularExpressionPrivate |
759 | // objects themselves; when the private is copied (i.e. a detach happened) |
760 | // it is set to nullptr |
761 | pcre2_code_16 *compiledPattern; |
762 | int errorCode; |
763 | qsizetype errorOffset; |
764 | int capturingCount; |
765 | bool usingCrLfNewlines; |
766 | bool isDirty; |
767 | }; |
768 | |
769 | struct QRegularExpressionMatchPrivate : QSharedData |
770 | { |
771 | QRegularExpressionMatchPrivate(const QRegularExpression &re, |
772 | const QString &subjectStorage, |
773 | QStringView subject, |
774 | QRegularExpression::MatchType matchType, |
775 | QRegularExpression::MatchOptions matchOptions); |
776 | |
777 | QRegularExpressionMatch nextMatch() const; |
778 | |
779 | const QRegularExpression regularExpression; |
780 | |
781 | // subject is what we match upon. If we've been asked to match over |
782 | // a QString, then subjectStorage is a copy of that string |
783 | // (so that it's kept alive by us) |
784 | const QString subjectStorage; |
785 | const QStringView subject; |
786 | |
787 | const QRegularExpression::MatchType matchType; |
788 | const QRegularExpression::MatchOptions matchOptions; |
789 | |
790 | // the capturedOffsets vector contains pairs of (start, end) positions |
791 | // for each captured substring |
792 | QList<qsizetype> capturedOffsets; |
793 | |
794 | int capturedCount = 0; |
795 | |
796 | bool hasMatch = false; |
797 | bool hasPartialMatch = false; |
798 | bool isValid = false; |
799 | }; |
800 | |
801 | struct QRegularExpressionMatchIteratorPrivate : QSharedData |
802 | { |
803 | QRegularExpressionMatchIteratorPrivate(const QRegularExpression &re, |
804 | QRegularExpression::MatchType matchType, |
805 | QRegularExpression::MatchOptions matchOptions, |
806 | const QRegularExpressionMatch &next); |
807 | |
808 | bool hasNext() const; |
809 | QRegularExpressionMatch next; |
810 | const QRegularExpression regularExpression; |
811 | const QRegularExpression::MatchType matchType; |
812 | const QRegularExpression::MatchOptions matchOptions; |
813 | }; |
814 | |
815 | /*! |
816 | \internal |
817 | */ |
818 | QRegularExpression::QRegularExpression(QRegularExpressionPrivate &dd) |
819 | : d(&dd) |
820 | { |
821 | } |
822 | |
823 | /*! |
824 | \internal |
825 | */ |
826 | QRegularExpressionPrivate::QRegularExpressionPrivate() |
827 | : QSharedData(), |
828 | patternOptions(), |
829 | pattern(), |
830 | mutex(), |
831 | compiledPattern(nullptr), |
832 | errorCode(0), |
833 | errorOffset(-1), |
834 | capturingCount(0), |
835 | usingCrLfNewlines(false), |
836 | isDirty(true) |
837 | { |
838 | } |
839 | |
840 | /*! |
841 | \internal |
842 | */ |
843 | QRegularExpressionPrivate::~QRegularExpressionPrivate() |
844 | { |
845 | cleanCompiledPattern(); |
846 | } |
847 | |
848 | /*! |
849 | \internal |
850 | |
851 | Copies the private, which means copying only the pattern and the pattern |
852 | options. The compiledPattern pointer is NOT copied (we |
853 | do not own it any more), and in general all the members set when |
854 | compiling a pattern are set to default values. isDirty is set back to true |
855 | so that the pattern has to be recompiled again. |
856 | */ |
857 | QRegularExpressionPrivate::QRegularExpressionPrivate(const QRegularExpressionPrivate &other) |
858 | : QSharedData(other), |
859 | patternOptions(other.patternOptions), |
860 | pattern(other.pattern), |
861 | mutex(), |
862 | compiledPattern(nullptr), |
863 | errorCode(0), |
864 | errorOffset(-1), |
865 | capturingCount(0), |
866 | usingCrLfNewlines(false), |
867 | isDirty(true) |
868 | { |
869 | } |
870 | |
871 | /*! |
872 | \internal |
873 | */ |
874 | void QRegularExpressionPrivate::cleanCompiledPattern() |
875 | { |
876 | pcre2_code_free_16(compiledPattern); |
877 | compiledPattern = nullptr; |
878 | errorCode = 0; |
879 | errorOffset = -1; |
880 | capturingCount = 0; |
881 | usingCrLfNewlines = false; |
882 | } |
883 | |
884 | /*! |
885 | \internal |
886 | */ |
887 | void QRegularExpressionPrivate::compilePattern() |
888 | { |
889 | const QMutexLocker lock(&mutex); |
890 | |
891 | if (!isDirty) |
892 | return; |
893 | |
894 | isDirty = false; |
895 | cleanCompiledPattern(); |
896 | |
897 | int options = convertToPcreOptions(patternOptions); |
898 | options |= PCRE2_UTF; |
899 | |
900 | PCRE2_SIZE patternErrorOffset; |
901 | compiledPattern = pcre2_compile_16(reinterpret_cast<PCRE2_SPTR16>(pattern.utf16()), |
902 | pattern.length(), |
903 | options, |
904 | &errorCode, |
905 | &patternErrorOffset, |
906 | nullptr); |
907 | |
908 | if (!compiledPattern) { |
909 | errorOffset = qsizetype(patternErrorOffset); |
910 | return; |
911 | } else { |
912 | // ignore whatever PCRE2 wrote into errorCode -- leave it to 0 to mean "no error" |
913 | errorCode = 0; |
914 | } |
915 | |
916 | optimizePattern(); |
917 | getPatternInfo(); |
918 | } |
919 | |
920 | /*! |
921 | \internal |
922 | */ |
923 | void QRegularExpressionPrivate::getPatternInfo() |
924 | { |
925 | Q_ASSERT(compiledPattern); |
926 | |
927 | pcre2_pattern_info_16(compiledPattern, PCRE2_INFO_CAPTURECOUNT, &capturingCount); |
928 | |
929 | // detect the settings for the newline |
930 | unsigned int patternNewlineSetting; |
931 | if (pcre2_pattern_info_16(compiledPattern, PCRE2_INFO_NEWLINE, &patternNewlineSetting) != 0) { |
932 | // no option was specified in the regexp, grab PCRE build defaults |
933 | pcre2_config_16(PCRE2_CONFIG_NEWLINE, &patternNewlineSetting); |
934 | } |
935 | |
936 | usingCrLfNewlines = (patternNewlineSetting == PCRE2_NEWLINE_CRLF) || |
937 | (patternNewlineSetting == PCRE2_NEWLINE_ANY) || |
938 | (patternNewlineSetting == PCRE2_NEWLINE_ANYCRLF); |
939 | |
940 | unsigned int hasJOptionChanged; |
941 | pcre2_pattern_info_16(compiledPattern, PCRE2_INFO_JCHANGED, &hasJOptionChanged); |
942 | if (Q_UNLIKELY(hasJOptionChanged)) { |
943 | qWarning("QRegularExpressionPrivate::getPatternInfo(): the pattern '%ls'\n is using the (?J) option; duplicate capturing group names are not supported by Qt" , |
944 | qUtf16Printable(pattern)); |
945 | } |
946 | } |
947 | |
948 | |
949 | /* |
950 | Simple "smartpointer" wrapper around a pcre2_jit_stack_16, to be used with |
951 | QThreadStorage. |
952 | */ |
953 | class QPcreJitStackPointer |
954 | { |
955 | Q_DISABLE_COPY(QPcreJitStackPointer) |
956 | |
957 | public: |
958 | /*! |
959 | \internal |
960 | */ |
961 | QPcreJitStackPointer() |
962 | { |
963 | // The default JIT stack size in PCRE is 32K, |
964 | // we allocate from 32K up to 512K. |
965 | stack = pcre2_jit_stack_create_16(32 * 1024, 512 * 1024, nullptr); |
966 | } |
967 | /*! |
968 | \internal |
969 | */ |
970 | ~QPcreJitStackPointer() |
971 | { |
972 | if (stack) |
973 | pcre2_jit_stack_free_16(stack); |
974 | } |
975 | |
976 | pcre2_jit_stack_16 *stack; |
977 | }; |
978 | |
979 | Q_GLOBAL_STATIC(QThreadStorage<QPcreJitStackPointer *>, jitStacks) |
980 | |
981 | /*! |
982 | \internal |
983 | */ |
984 | static pcre2_jit_stack_16 *qtPcreCallback(void *) |
985 | { |
986 | if (jitStacks()->hasLocalData()) |
987 | return jitStacks()->localData()->stack; |
988 | |
989 | return nullptr; |
990 | } |
991 | |
992 | /*! |
993 | \internal |
994 | */ |
995 | static bool isJitEnabled() |
996 | { |
997 | QByteArray jitEnvironment = qgetenv("QT_ENABLE_REGEXP_JIT" ); |
998 | if (!jitEnvironment.isEmpty()) { |
999 | bool ok; |
1000 | int enableJit = jitEnvironment.toInt(&ok); |
1001 | return ok ? (enableJit != 0) : true; |
1002 | } |
1003 | |
1004 | #ifdef QT_DEBUG |
1005 | return false; |
1006 | #else |
1007 | return true; |
1008 | #endif |
1009 | } |
1010 | |
1011 | /*! |
1012 | \internal |
1013 | |
1014 | The purpose of the function is to call pcre2_jit_compile_16, which |
1015 | JIT-compiles the pattern. |
1016 | |
1017 | It gets called when a pattern is recompiled by us (in compilePattern()), |
1018 | under mutex protection. |
1019 | */ |
1020 | void QRegularExpressionPrivate::optimizePattern() |
1021 | { |
1022 | Q_ASSERT(compiledPattern); |
1023 | |
1024 | static const bool enableJit = isJitEnabled(); |
1025 | |
1026 | if (!enableJit) |
1027 | return; |
1028 | |
1029 | pcre2_jit_compile_16(compiledPattern, PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD); |
1030 | } |
1031 | |
1032 | /*! |
1033 | \internal |
1034 | |
1035 | Returns the capturing group number for the given name. Duplicated names for |
1036 | capturing groups are not supported. |
1037 | */ |
1038 | int QRegularExpressionPrivate::captureIndexForName(QStringView name) const |
1039 | { |
1040 | Q_ASSERT(!name.isEmpty()); |
1041 | |
1042 | if (!compiledPattern) |
1043 | return -1; |
1044 | |
1045 | // See the other usages of pcre2_pattern_info_16 for more details about this |
1046 | PCRE2_SPTR16 *namedCapturingTable; |
1047 | unsigned int namedCapturingTableEntryCount; |
1048 | unsigned int namedCapturingTableEntrySize; |
1049 | |
1050 | pcre2_pattern_info_16(compiledPattern, PCRE2_INFO_NAMETABLE, &namedCapturingTable); |
1051 | pcre2_pattern_info_16(compiledPattern, PCRE2_INFO_NAMECOUNT, &namedCapturingTableEntryCount); |
1052 | pcre2_pattern_info_16(compiledPattern, PCRE2_INFO_NAMEENTRYSIZE, &namedCapturingTableEntrySize); |
1053 | |
1054 | for (unsigned int i = 0; i < namedCapturingTableEntryCount; ++i) { |
1055 | const auto currentNamedCapturingTableRow = |
1056 | reinterpret_cast<const char16_t *>(namedCapturingTable) + namedCapturingTableEntrySize * i; |
1057 | |
1058 | if (name == (currentNamedCapturingTableRow + 1)) { |
1059 | const int index = *currentNamedCapturingTableRow; |
1060 | return index; |
1061 | } |
1062 | } |
1063 | |
1064 | return -1; |
1065 | } |
1066 | |
1067 | /*! |
1068 | \internal |
1069 | |
1070 | This is a simple wrapper for pcre2_match_16 for handling the case in which the |
1071 | JIT runs out of memory. In that case, we allocate a thread-local JIT stack |
1072 | and re-run pcre2_match_16. |
1073 | */ |
1074 | static int safe_pcre2_match_16(const pcre2_code_16 *code, |
1075 | PCRE2_SPTR16 subject, qsizetype length, |
1076 | qsizetype startOffset, int options, |
1077 | pcre2_match_data_16 *matchData, |
1078 | pcre2_match_context_16 *matchContext) |
1079 | { |
1080 | int result = pcre2_match_16(code, subject, length, |
1081 | startOffset, options, matchData, matchContext); |
1082 | |
1083 | if (result == PCRE2_ERROR_JIT_STACKLIMIT && !jitStacks()->hasLocalData()) { |
1084 | QPcreJitStackPointer *p = new QPcreJitStackPointer; |
1085 | jitStacks()->setLocalData(p); |
1086 | |
1087 | result = pcre2_match_16(code, subject, length, |
1088 | startOffset, options, matchData, matchContext); |
1089 | } |
1090 | |
1091 | return result; |
1092 | } |
1093 | |
1094 | /*! |
1095 | \internal |
1096 | |
1097 | Performs a match on the subject string view held by \a priv. The |
1098 | match will be of type priv->matchType and using the options |
1099 | priv->matchOptions; the matching \a offset is relative the |
1100 | substring, and if negative, it's taken as an offset from the end of |
1101 | the substring. |
1102 | |
1103 | It also advances a match if a previous result is given as \a |
1104 | previous. The subject string goes a Unicode validity check if |
1105 | \a checkSubjectString is CheckSubjectString and the match options don't |
1106 | include DontCheckSubjectStringMatchOption (PCRE doesn't like illegal |
1107 | UTF-16 sequences). |
1108 | |
1109 | \a priv is modified to hold the results of the match. |
1110 | |
1111 | Advancing a match is a tricky algorithm. If the previous match matched a |
1112 | non-empty string, we just do an ordinary match at the offset position. |
1113 | |
1114 | If the previous match matched an empty string, then an anchored, non-empty |
1115 | match is attempted at the offset position. If that succeeds, then we got |
1116 | the next match and we can return it. Otherwise, we advance by 1 position |
1117 | (which can be one or two code units in UTF-16!) and reattempt a "normal" |
1118 | match. We also have the problem of detecting the current newline format: if |
1119 | the new advanced offset is pointing to the beginning of a CRLF sequence, we |
1120 | must advance over it. |
1121 | */ |
1122 | void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv, |
1123 | qsizetype offset, |
1124 | CheckSubjectStringOption checkSubjectStringOption, |
1125 | const QRegularExpressionMatchPrivate *previous) const |
1126 | { |
1127 | Q_ASSERT(priv); |
1128 | Q_ASSUME(priv != previous); |
1129 | |
1130 | const qsizetype subjectLength = priv->subject.size(); |
1131 | |
1132 | if (offset < 0) |
1133 | offset += subjectLength; |
1134 | |
1135 | if (offset < 0 || offset > subjectLength) |
1136 | return; |
1137 | |
1138 | if (Q_UNLIKELY(!compiledPattern)) { |
1139 | qWarning("QRegularExpressionPrivate::doMatch(): called on an invalid QRegularExpression object" ); |
1140 | return; |
1141 | } |
1142 | |
1143 | // skip doing the actual matching if NoMatch type was requested |
1144 | if (priv->matchType == QRegularExpression::NoMatch) { |
1145 | priv->isValid = true; |
1146 | return; |
1147 | } |
1148 | |
1149 | int pcreOptions = convertToPcreOptions(priv->matchOptions); |
1150 | |
1151 | if (priv->matchType == QRegularExpression::PartialPreferCompleteMatch) |
1152 | pcreOptions |= PCRE2_PARTIAL_SOFT; |
1153 | else if (priv->matchType == QRegularExpression::PartialPreferFirstMatch) |
1154 | pcreOptions |= PCRE2_PARTIAL_HARD; |
1155 | |
1156 | if (checkSubjectStringOption == DontCheckSubjectString) |
1157 | pcreOptions |= PCRE2_NO_UTF_CHECK; |
1158 | |
1159 | bool previousMatchWasEmpty = false; |
1160 | if (previous && previous->hasMatch && |
1161 | (previous->capturedOffsets.at(0) == previous->capturedOffsets.at(1))) { |
1162 | previousMatchWasEmpty = true; |
1163 | } |
1164 | |
1165 | pcre2_match_context_16 *matchContext = pcre2_match_context_create_16(nullptr); |
1166 | pcre2_jit_stack_assign_16(matchContext, &qtPcreCallback, nullptr); |
1167 | pcre2_match_data_16 *matchData = pcre2_match_data_create_from_pattern_16(compiledPattern, nullptr); |
1168 | |
1169 | const char16_t * const subjectUtf16 = priv->subject.utf16(); |
1170 | |
1171 | int result; |
1172 | |
1173 | if (!previousMatchWasEmpty) { |
1174 | result = safe_pcre2_match_16(compiledPattern, |
1175 | reinterpret_cast<PCRE2_SPTR16>(subjectUtf16), subjectLength, |
1176 | offset, pcreOptions, |
1177 | matchData, matchContext); |
1178 | } else { |
1179 | result = safe_pcre2_match_16(compiledPattern, |
1180 | reinterpret_cast<PCRE2_SPTR16>(subjectUtf16), subjectLength, |
1181 | offset, pcreOptions | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, |
1182 | matchData, matchContext); |
1183 | |
1184 | if (result == PCRE2_ERROR_NOMATCH) { |
1185 | ++offset; |
1186 | |
1187 | if (usingCrLfNewlines |
1188 | && offset < subjectLength |
1189 | && subjectUtf16[offset - 1] == QLatin1Char('\r') |
1190 | && subjectUtf16[offset] == QLatin1Char('\n')) { |
1191 | ++offset; |
1192 | } else if (offset < subjectLength |
1193 | && QChar::isLowSurrogate(subjectUtf16[offset])) { |
1194 | ++offset; |
1195 | } |
1196 | |
1197 | result = safe_pcre2_match_16(compiledPattern, |
1198 | reinterpret_cast<PCRE2_SPTR16>(subjectUtf16), subjectLength, |
1199 | offset, pcreOptions, |
1200 | matchData, matchContext); |
1201 | } |
1202 | } |
1203 | |
1204 | #ifdef QREGULAREXPRESSION_DEBUG |
1205 | qDebug() << "Matching" << pattern << "against" << subject |
1206 | << "offset" << offset |
1207 | << priv->matchType << priv->matchOptions << previousMatchWasEmpty |
1208 | << "result" << result; |
1209 | #endif |
1210 | |
1211 | // result == 0 means not enough space in captureOffsets; should never happen |
1212 | Q_ASSERT(result != 0); |
1213 | |
1214 | if (result > 0) { |
1215 | // full match |
1216 | priv->isValid = true; |
1217 | priv->hasMatch = true; |
1218 | priv->capturedCount = result; |
1219 | priv->capturedOffsets.resize(result * 2); |
1220 | } else { |
1221 | // no match, partial match or error |
1222 | priv->hasPartialMatch = (result == PCRE2_ERROR_PARTIAL); |
1223 | priv->isValid = (result == PCRE2_ERROR_NOMATCH || result == PCRE2_ERROR_PARTIAL); |
1224 | |
1225 | if (result == PCRE2_ERROR_PARTIAL) { |
1226 | // partial match: |
1227 | // leave the start and end capture offsets (i.e. cap(0)) |
1228 | priv->capturedCount = 1; |
1229 | priv->capturedOffsets.resize(2); |
1230 | } else { |
1231 | // no match or error |
1232 | priv->capturedCount = 0; |
1233 | priv->capturedOffsets.clear(); |
1234 | } |
1235 | } |
1236 | |
1237 | // copy the captured substrings offsets, if any |
1238 | if (priv->capturedCount) { |
1239 | PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(matchData); |
1240 | qsizetype *const capturedOffsets = priv->capturedOffsets.data(); |
1241 | |
1242 | for (int i = 0; i < priv->capturedCount * 2; ++i) |
1243 | capturedOffsets[i] = qsizetype(ovector[i]); |
1244 | |
1245 | // For partial matches, PCRE2 and PCRE1 differ in behavior when lookbehinds |
1246 | // are involved. PCRE2 reports the real begin of the match and the maximum |
1247 | // used lookbehind as distinct information; PCRE1 instead automatically |
1248 | // adjusted ovector[0] to include the maximum lookbehind. |
1249 | // |
1250 | // For instance, given the pattern "\bstring\b", and the subject "a str": |
1251 | // * PCRE1 reports partial, capturing " str" |
1252 | // * PCRE2 reports partial, capturing "str" with a lookbehind of 1 |
1253 | // |
1254 | // To keep behavior, emulate PCRE1 here. |
1255 | // (Eventually, we could expose the lookbehind info in a future patch.) |
1256 | if (result == PCRE2_ERROR_PARTIAL) { |
1257 | unsigned int maximumLookBehind; |
1258 | pcre2_pattern_info_16(compiledPattern, PCRE2_INFO_MAXLOOKBEHIND, &maximumLookBehind); |
1259 | capturedOffsets[0] -= maximumLookBehind; |
1260 | } |
1261 | } |
1262 | |
1263 | pcre2_match_data_free_16(matchData); |
1264 | pcre2_match_context_free_16(matchContext); |
1265 | } |
1266 | |
1267 | /*! |
1268 | \internal |
1269 | */ |
1270 | QRegularExpressionMatchPrivate::QRegularExpressionMatchPrivate(const QRegularExpression &re, |
1271 | const QString &subjectStorage, |
1272 | QStringView subject, |
1273 | QRegularExpression::MatchType matchType, |
1274 | QRegularExpression::MatchOptions matchOptions) |
1275 | : regularExpression(re), |
1276 | subjectStorage(subjectStorage), |
1277 | subject(subject), |
1278 | matchType(matchType), |
1279 | matchOptions(matchOptions) |
1280 | { |
1281 | } |
1282 | |
1283 | /*! |
1284 | \internal |
1285 | */ |
1286 | QRegularExpressionMatch QRegularExpressionMatchPrivate::nextMatch() const |
1287 | { |
1288 | Q_ASSERT(isValid); |
1289 | Q_ASSERT(hasMatch || hasPartialMatch); |
1290 | |
1291 | auto nextPrivate = new QRegularExpressionMatchPrivate(regularExpression, |
1292 | subjectStorage, |
1293 | subject, |
1294 | matchType, |
1295 | matchOptions); |
1296 | |
1297 | // Note the DontCheckSubjectString passed for the check of the subject string: |
1298 | // if we're advancing a match on the same subject, |
1299 | // then that subject was already checked at least once (when this object |
1300 | // was created, or when the object that created this one was created, etc.) |
1301 | regularExpression.d->doMatch(nextPrivate, |
1302 | capturedOffsets.at(1), |
1303 | QRegularExpressionPrivate::DontCheckSubjectString, |
1304 | this); |
1305 | return QRegularExpressionMatch(*nextPrivate); |
1306 | } |
1307 | |
1308 | /*! |
1309 | \internal |
1310 | */ |
1311 | QRegularExpressionMatchIteratorPrivate::QRegularExpressionMatchIteratorPrivate(const QRegularExpression &re, |
1312 | QRegularExpression::MatchType matchType, |
1313 | QRegularExpression::MatchOptions matchOptions, |
1314 | const QRegularExpressionMatch &next) |
1315 | : next(next), |
1316 | regularExpression(re), |
1317 | matchType(matchType), matchOptions(matchOptions) |
1318 | { |
1319 | } |
1320 | |
1321 | /*! |
1322 | \internal |
1323 | */ |
1324 | bool QRegularExpressionMatchIteratorPrivate::hasNext() const |
1325 | { |
1326 | return next.isValid() && (next.hasMatch() || next.hasPartialMatch()); |
1327 | } |
1328 | |
1329 | // PUBLIC API |
1330 | |
1331 | /*! |
1332 | Constructs a QRegularExpression object with an empty pattern and no pattern |
1333 | options. |
1334 | |
1335 | \sa setPattern(), setPatternOptions() |
1336 | */ |
1337 | QRegularExpression::QRegularExpression() |
1338 | : d(new QRegularExpressionPrivate) |
1339 | { |
1340 | } |
1341 | |
1342 | /*! |
1343 | Constructs a QRegularExpression object using the given \a pattern as |
1344 | pattern and the \a options as the pattern options. |
1345 | |
1346 | \sa setPattern(), setPatternOptions() |
1347 | */ |
1348 | QRegularExpression::QRegularExpression(const QString &pattern, PatternOptions options) |
1349 | : d(new QRegularExpressionPrivate) |
1350 | { |
1351 | d->pattern = pattern; |
1352 | d->patternOptions = options; |
1353 | } |
1354 | |
1355 | /*! |
1356 | Constructs a QRegularExpression object as a copy of \a re. |
1357 | |
1358 | \sa operator=() |
1359 | */ |
1360 | QRegularExpression::QRegularExpression(const QRegularExpression &re) |
1361 | : d(re.d) |
1362 | { |
1363 | } |
1364 | |
1365 | /*! |
1366 | Destroys the QRegularExpression object. |
1367 | */ |
1368 | QRegularExpression::~QRegularExpression() |
1369 | { |
1370 | } |
1371 | |
1372 | /*! |
1373 | Assigns the regular expression \a re to this object, and returns a reference |
1374 | to the copy. Both the pattern and the pattern options are copied. |
1375 | */ |
1376 | QRegularExpression &QRegularExpression::operator=(const QRegularExpression &re) |
1377 | { |
1378 | d = re.d; |
1379 | return *this; |
1380 | } |
1381 | |
1382 | /*! |
1383 | \fn void QRegularExpression::swap(QRegularExpression &other) |
1384 | |
1385 | Swaps the regular expression \a other with this regular expression. This |
1386 | operation is very fast and never fails. |
1387 | */ |
1388 | |
1389 | /*! |
1390 | Returns the pattern string of the regular expression. |
1391 | |
1392 | \sa setPattern(), patternOptions() |
1393 | */ |
1394 | QString QRegularExpression::pattern() const |
1395 | { |
1396 | return d->pattern; |
1397 | } |
1398 | |
1399 | /*! |
1400 | Sets the pattern string of the regular expression to \a pattern. The |
1401 | pattern options are left unchanged. |
1402 | |
1403 | \sa pattern(), setPatternOptions() |
1404 | */ |
1405 | void QRegularExpression::setPattern(const QString &pattern) |
1406 | { |
1407 | d.detach(); |
1408 | d->isDirty = true; |
1409 | d->pattern = pattern; |
1410 | } |
1411 | |
1412 | /*! |
1413 | Returns the pattern options for the regular expression. |
1414 | |
1415 | \sa setPatternOptions(), pattern() |
1416 | */ |
1417 | QRegularExpression::PatternOptions QRegularExpression::patternOptions() const |
1418 | { |
1419 | return d->patternOptions; |
1420 | } |
1421 | |
1422 | /*! |
1423 | Sets the given \a options as the pattern options of the regular expression. |
1424 | The pattern string is left unchanged. |
1425 | |
1426 | \sa patternOptions(), setPattern() |
1427 | */ |
1428 | void QRegularExpression::setPatternOptions(PatternOptions options) |
1429 | { |
1430 | d.detach(); |
1431 | d->isDirty = true; |
1432 | d->patternOptions = options; |
1433 | } |
1434 | |
1435 | /*! |
1436 | Returns the number of capturing groups inside the pattern string, |
1437 | or -1 if the regular expression is not valid. |
1438 | |
1439 | \note The implicit capturing group 0 is \e{not} included in the returned number. |
1440 | |
1441 | \sa isValid() |
1442 | */ |
1443 | int QRegularExpression::captureCount() const |
1444 | { |
1445 | if (!isValid()) // will compile the pattern |
1446 | return -1; |
1447 | return d->capturingCount; |
1448 | } |
1449 | |
1450 | /*! |
1451 | \since 5.1 |
1452 | |
1453 | Returns a list of captureCount() + 1 elements, containing the names of the |
1454 | named capturing groups in the pattern string. The list is sorted such that |
1455 | the element of the list at position \c{i} is the name of the \c{i}-th |
1456 | capturing group, if it has a name, or an empty string if that capturing |
1457 | group is unnamed. |
1458 | |
1459 | For instance, given the regular expression |
1460 | |
1461 | \snippet code/src_corelib_text_qregularexpression.cpp 32 |
1462 | |
1463 | namedCaptureGroups() will return the following list: |
1464 | |
1465 | \snippet code/src_corelib_text_qregularexpression.cpp 33 |
1466 | |
1467 | which corresponds to the fact that the capturing group #0 (corresponding to |
1468 | the whole match) has no name, the capturing group #1 has name "day", the |
1469 | capturing group #2 has name "month", etc. |
1470 | |
1471 | If the regular expression is not valid, returns an empty list. |
1472 | |
1473 | \sa isValid(), QRegularExpressionMatch::captured(), QString::isEmpty() |
1474 | */ |
1475 | QStringList QRegularExpression::namedCaptureGroups() const |
1476 | { |
1477 | if (!isValid()) // isValid() will compile the pattern |
1478 | return QStringList(); |
1479 | |
1480 | // namedCapturingTable will point to a table of |
1481 | // namedCapturingTableEntryCount entries, each one of which |
1482 | // contains one ushort followed by the name, NUL terminated. |
1483 | // The ushort is the numerical index of the name in the pattern. |
1484 | // The length of each entry is namedCapturingTableEntrySize. |
1485 | PCRE2_SPTR16 *namedCapturingTable; |
1486 | unsigned int namedCapturingTableEntryCount; |
1487 | unsigned int namedCapturingTableEntrySize; |
1488 | |
1489 | pcre2_pattern_info_16(d->compiledPattern, PCRE2_INFO_NAMETABLE, &namedCapturingTable); |
1490 | pcre2_pattern_info_16(d->compiledPattern, PCRE2_INFO_NAMECOUNT, &namedCapturingTableEntryCount); |
1491 | pcre2_pattern_info_16(d->compiledPattern, PCRE2_INFO_NAMEENTRYSIZE, &namedCapturingTableEntrySize); |
1492 | |
1493 | // The +1 is for the implicit group #0 |
1494 | QStringList result(d->capturingCount + 1); |
1495 | |
1496 | for (unsigned int i = 0; i < namedCapturingTableEntryCount; ++i) { |
1497 | const auto currentNamedCapturingTableRow = |
1498 | reinterpret_cast<const char16_t *>(namedCapturingTable) + namedCapturingTableEntrySize * i; |
1499 | |
1500 | const int index = *currentNamedCapturingTableRow; |
1501 | result[index] = QString::fromUtf16(currentNamedCapturingTableRow + 1); |
1502 | } |
1503 | |
1504 | return result; |
1505 | } |
1506 | |
1507 | /*! |
1508 | Returns \c true if the regular expression is a valid regular expression (that |
1509 | is, it contains no syntax errors, etc.), or false otherwise. Use |
1510 | errorString() to obtain a textual description of the error. |
1511 | |
1512 | \sa errorString(), patternErrorOffset() |
1513 | */ |
1514 | bool QRegularExpression::isValid() const |
1515 | { |
1516 | d.data()->compilePattern(); |
1517 | return d->compiledPattern; |
1518 | } |
1519 | |
1520 | /*! |
1521 | Returns a textual description of the error found when checking the validity |
1522 | of the regular expression, or "no error" if no error was found. |
1523 | |
1524 | \sa isValid(), patternErrorOffset() |
1525 | */ |
1526 | QString QRegularExpression::errorString() const |
1527 | { |
1528 | d.data()->compilePattern(); |
1529 | if (d->errorCode) { |
1530 | QString errorString; |
1531 | int errorStringLength; |
1532 | do { |
1533 | errorString.resize(errorString.length() + 64); |
1534 | errorStringLength = pcre2_get_error_message_16(d->errorCode, |
1535 | reinterpret_cast<ushort *>(errorString.data()), |
1536 | errorString.length()); |
1537 | } while (errorStringLength < 0); |
1538 | errorString.resize(errorStringLength); |
1539 | |
1540 | #ifdef QT_NO_TRANSLATION |
1541 | return errorString; |
1542 | #else |
1543 | return QCoreApplication::translate("QRegularExpression" , std::move(errorString).toLatin1().constData()); |
1544 | #endif |
1545 | } |
1546 | #ifdef QT_NO_TRANSLATION |
1547 | return QLatin1String("no error" ); |
1548 | #else |
1549 | return QCoreApplication::translate("QRegularExpression" , "no error" ); |
1550 | #endif |
1551 | } |
1552 | |
1553 | /*! |
1554 | Returns the offset, inside the pattern string, at which an error was found |
1555 | when checking the validity of the regular expression. If no error was |
1556 | found, then -1 is returned. |
1557 | |
1558 | \sa pattern(), isValid(), errorString() |
1559 | */ |
1560 | qsizetype QRegularExpression::patternErrorOffset() const |
1561 | { |
1562 | d.data()->compilePattern(); |
1563 | return d->errorOffset; |
1564 | } |
1565 | |
1566 | /*! |
1567 | Attempts to match the regular expression against the given \a subject |
1568 | string, starting at the position \a offset inside the subject, using a |
1569 | match of type \a matchType and honoring the given \a matchOptions. |
1570 | |
1571 | The returned QRegularExpressionMatch object contains the results of the |
1572 | match. |
1573 | |
1574 | \sa QRegularExpressionMatch, {normal matching} |
1575 | */ |
1576 | QRegularExpressionMatch QRegularExpression::match(const QString &subject, |
1577 | qsizetype offset, |
1578 | MatchType matchType, |
1579 | MatchOptions matchOptions) const |
1580 | { |
1581 | d.data()->compilePattern(); |
1582 | auto priv = new QRegularExpressionMatchPrivate(*this, |
1583 | subject, |
1584 | qToStringViewIgnoringNull(subject), |
1585 | matchType, |
1586 | matchOptions); |
1587 | d->doMatch(priv, offset); |
1588 | return QRegularExpressionMatch(*priv); |
1589 | } |
1590 | |
1591 | /*! |
1592 | \since 6.0 |
1593 | \overload |
1594 | |
1595 | Attempts to match the regular expression against the given \a subjectView |
1596 | string view, starting at the position \a offset inside the subject, using a |
1597 | match of type \a matchType and honoring the given \a matchOptions. |
1598 | |
1599 | The returned QRegularExpressionMatch object contains the results of the |
1600 | match. |
1601 | |
1602 | \note The data referenced by \a subjectView must remain valid as long |
1603 | as there are QRegularExpressionMatch objects using it. |
1604 | |
1605 | \sa QRegularExpressionMatch, {normal matching} |
1606 | */ |
1607 | QRegularExpressionMatch QRegularExpression::match(QStringView subjectView, |
1608 | qsizetype offset, |
1609 | MatchType matchType, |
1610 | MatchOptions matchOptions) const |
1611 | { |
1612 | d.data()->compilePattern(); |
1613 | auto priv = new QRegularExpressionMatchPrivate(*this, |
1614 | QString(), |
1615 | subjectView, |
1616 | matchType, |
1617 | matchOptions); |
1618 | d->doMatch(priv, offset); |
1619 | return QRegularExpressionMatch(*priv); |
1620 | } |
1621 | |
1622 | /*! |
1623 | Attempts to perform a global match of the regular expression against the |
1624 | given \a subject string, starting at the position \a offset inside the |
1625 | subject, using a match of type \a matchType and honoring the given \a |
1626 | matchOptions. |
1627 | |
1628 | The returned QRegularExpressionMatchIterator is positioned before the |
1629 | first match result (if any). |
1630 | |
1631 | \sa QRegularExpressionMatchIterator, {global matching} |
1632 | */ |
1633 | QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &subject, |
1634 | qsizetype offset, |
1635 | MatchType matchType, |
1636 | MatchOptions matchOptions) const |
1637 | { |
1638 | QRegularExpressionMatchIteratorPrivate *priv = |
1639 | new QRegularExpressionMatchIteratorPrivate(*this, |
1640 | matchType, |
1641 | matchOptions, |
1642 | match(subject, offset, matchType, matchOptions)); |
1643 | |
1644 | return QRegularExpressionMatchIterator(*priv); |
1645 | } |
1646 | |
1647 | /*! |
1648 | \since 6.0 |
1649 | \overload |
1650 | |
1651 | Attempts to perform a global match of the regular expression against the |
1652 | given \a subjectView string view, starting at the position \a offset inside the |
1653 | subject, using a match of type \a matchType and honoring the given \a |
1654 | matchOptions. |
1655 | |
1656 | The returned QRegularExpressionMatchIterator is positioned before the |
1657 | first match result (if any). |
1658 | |
1659 | \note The data referenced by \a subjectView must remain valid as |
1660 | long as there are QRegularExpressionMatchIterator or |
1661 | QRegularExpressionMatch objects using it. |
1662 | |
1663 | \sa QRegularExpressionMatchIterator, {global matching} |
1664 | */ |
1665 | QRegularExpressionMatchIterator QRegularExpression::globalMatch(QStringView subjectView, |
1666 | qsizetype offset, |
1667 | MatchType matchType, |
1668 | MatchOptions matchOptions) const |
1669 | { |
1670 | QRegularExpressionMatchIteratorPrivate *priv = |
1671 | new QRegularExpressionMatchIteratorPrivate(*this, |
1672 | matchType, |
1673 | matchOptions, |
1674 | match(subjectView, offset, matchType, matchOptions)); |
1675 | |
1676 | return QRegularExpressionMatchIterator(*priv); |
1677 | } |
1678 | |
1679 | /*! |
1680 | \since 5.4 |
1681 | |
1682 | Compiles the pattern immediately, including JIT compiling it (if |
1683 | the JIT is enabled) for optimization. |
1684 | |
1685 | \sa isValid(), {Debugging Code that Uses QRegularExpression} |
1686 | */ |
1687 | void QRegularExpression::optimize() const |
1688 | { |
1689 | d.data()->compilePattern(); |
1690 | } |
1691 | |
1692 | /*! |
1693 | Returns \c true if the regular expression is equal to \a re, or false |
1694 | otherwise. Two QRegularExpression objects are equal if they have |
1695 | the same pattern string and the same pattern options. |
1696 | |
1697 | \sa operator!=() |
1698 | */ |
1699 | bool QRegularExpression::operator==(const QRegularExpression &re) const |
1700 | { |
1701 | return (d == re.d) || |
1702 | (d->pattern == re.d->pattern && d->patternOptions == re.d->patternOptions); |
1703 | } |
1704 | |
1705 | /*! |
1706 | \fn QRegularExpression & QRegularExpression::operator=(QRegularExpression && re) |
1707 | |
1708 | Move-assigns the regular expression \a re to this object, and returns a reference |
1709 | to the copy. Both the pattern and the pattern options are copied. |
1710 | */ |
1711 | |
1712 | /*! |
1713 | \fn bool QRegularExpression::operator!=(const QRegularExpression &re) const |
1714 | |
1715 | Returns \c true if the regular expression is different from \a re, or |
1716 | false otherwise. |
1717 | |
1718 | \sa operator==() |
1719 | */ |
1720 | |
1721 | /*! |
1722 | \since 5.6 |
1723 | \relates QRegularExpression |
1724 | |
1725 | Returns the hash value for \a key, using |
1726 | \a seed to seed the calculation. |
1727 | */ |
1728 | size_t qHash(const QRegularExpression &key, size_t seed) noexcept |
1729 | { |
1730 | return qHashMulti(seed, key.d->pattern, key.d->patternOptions); |
1731 | } |
1732 | |
1733 | #if QT_STRINGVIEW_LEVEL < 2 |
1734 | /*! |
1735 | \fn QString QRegularExpression::escape(const QString &str) |
1736 | \overload |
1737 | */ |
1738 | #endif // QT_STRINGVIEW_LEVEL < 2 |
1739 | |
1740 | /*! |
1741 | \since 5.15 |
1742 | |
1743 | Escapes all characters of \a str so that they no longer have any special |
1744 | meaning when used as a regular expression pattern string, and returns |
1745 | the escaped string. For instance: |
1746 | |
1747 | \snippet code/src_corelib_text_qregularexpression.cpp 26 |
1748 | |
1749 | This is very convenient in order to build patterns from arbitrary strings: |
1750 | |
1751 | \snippet code/src_corelib_text_qregularexpression.cpp 27 |
1752 | |
1753 | \note This function implements Perl's quotemeta algorithm and escapes with |
1754 | a backslash all characters in \a str, except for the characters in the |
1755 | \c{[A-Z]}, \c{[a-z]} and \c{[0-9]} ranges, as well as the underscore |
1756 | (\c{_}) character. The only difference with Perl is that a literal NUL |
1757 | inside \a str is escaped with the sequence \c{"\\0"} (backslash + |
1758 | \c{'0'}), instead of \c{"\\\0"} (backslash + \c{NUL}). |
1759 | */ |
1760 | QString QRegularExpression::escape(QStringView str) |
1761 | { |
1762 | QString result; |
1763 | const qsizetype count = str.size(); |
1764 | result.reserve(count * 2); |
1765 | |
1766 | // everything but [a-zA-Z0-9_] gets escaped, |
1767 | // cf. perldoc -f quotemeta |
1768 | for (qsizetype i = 0; i < count; ++i) { |
1769 | const QChar current = str.at(i); |
1770 | |
1771 | if (current == QChar::Null) { |
1772 | // unlike Perl, a literal NUL must be escaped with |
1773 | // "\\0" (backslash + 0) and not "\\\0" (backslash + NUL), |
1774 | // because pcre16_compile uses a NUL-terminated string |
1775 | result.append(QLatin1Char('\\')); |
1776 | result.append(QLatin1Char('0')); |
1777 | } else if ( (current < QLatin1Char('a') || current > QLatin1Char('z')) && |
1778 | (current < QLatin1Char('A') || current > QLatin1Char('Z')) && |
1779 | (current < QLatin1Char('0') || current > QLatin1Char('9')) && |
1780 | current != QLatin1Char('_') ) |
1781 | { |
1782 | result.append(QLatin1Char('\\')); |
1783 | result.append(current); |
1784 | if (current.isHighSurrogate() && i < (count - 1)) |
1785 | result.append(str.at(++i)); |
1786 | } else { |
1787 | result.append(current); |
1788 | } |
1789 | } |
1790 | |
1791 | result.squeeze(); |
1792 | return result; |
1793 | } |
1794 | |
1795 | #if QT_STRINGVIEW_LEVEL < 2 |
1796 | /*! |
1797 | \since 5.12 |
1798 | \fn QString QRegularExpression::wildcardToRegularExpression(const QString &pattern, WildcardConversionOptions options) |
1799 | \overload |
1800 | */ |
1801 | #endif // QT_STRINGVIEW_LEVEL < 2 |
1802 | |
1803 | /*! |
1804 | \since 6.0 |
1805 | \enum QRegularExpression::WildcardConversionOption |
1806 | |
1807 | The WildcardConversionOption enum defines modifiers to the way a wildcard glob |
1808 | pattern gets converted to a regular expression pattern. |
1809 | |
1810 | \value DefaultWildcardConversion |
1811 | No conversion options are set. |
1812 | |
1813 | \value UnanchoredWildcardConversion |
1814 | The conversion will not anchor the pattern. This allows for partial string matches of |
1815 | wildcard expressions. |
1816 | */ |
1817 | |
1818 | /*! |
1819 | \since 5.15 |
1820 | |
1821 | Returns a regular expression representation of the given glob \a pattern. |
1822 | The transformation is targeting file path globbing, which means in particular |
1823 | that path separators receive special treatment. This implies that it is not |
1824 | just a basic translation from "*" to ".*". |
1825 | |
1826 | \snippet code/src_corelib_text_qregularexpression.cpp 31 |
1827 | |
1828 | By default, the returned regular expression is fully anchored. In other |
1829 | words, there is no need of calling anchoredPattern() again on the |
1830 | result. To get an a regular expression that is not anchored, pass |
1831 | UnanchoredWildcardConversion as the conversion \a options. |
1832 | |
1833 | This implementation follows closely the definition |
1834 | of wildcard for glob patterns: |
1835 | \table |
1836 | \row \li \b{c} |
1837 | \li Any character represents itself apart from those mentioned |
1838 | below. Thus \b{c} matches the character \e c. |
1839 | \row \li \b{?} |
1840 | \li Matches any single character. It is the same as |
1841 | \b{.} in full regexps. |
1842 | \row \li \b{*} |
1843 | \li Matches zero or more of any characters. It is the |
1844 | same as \b{.*} in full regexps. |
1845 | \row \li \b{[abc]} |
1846 | \li Matches one character given in the bracket. |
1847 | \row \li \b{[a-c]} |
1848 | \li Matches one character from the range given in the bracket. |
1849 | \row \li \b{[!abc]} |
1850 | \li Matches one character that is not given in the bracket. It is the |
1851 | same as \b{[^abc]} in full regexp. |
1852 | \row \li \b{[!a-c]} |
1853 | \li Matches one character that is not from the range given in the |
1854 | bracket. It is the same as \b{[^a-c]} in full regexp. |
1855 | \endtable |
1856 | |
1857 | \note The backslash (\\) character is \e not an escape char in this context. |
1858 | In order to match one of the special characters, place it in square brackets |
1859 | (for example, \c{[?]}). |
1860 | |
1861 | More information about the implementation can be found in: |
1862 | \list |
1863 | \li \l {https://en.wikipedia.org/wiki/Glob_(programming)} {The Wikipedia Glob article} |
1864 | \li \c {man 7 glob} |
1865 | \endlist |
1866 | |
1867 | \sa escape() |
1868 | */ |
1869 | QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, WildcardConversionOptions options) |
1870 | { |
1871 | const qsizetype wclen = pattern.size(); |
1872 | QString rx; |
1873 | rx.reserve(wclen + wclen / 16); |
1874 | qsizetype i = 0; |
1875 | const QChar *wc = pattern.data(); |
1876 | |
1877 | #ifdef Q_OS_WIN |
1878 | const QLatin1Char nativePathSeparator('\\'); |
1879 | const QLatin1String starEscape("[^/\\\\]*" ); |
1880 | const QLatin1String questionMarkEscape("[^/\\\\]" ); |
1881 | #else |
1882 | const QLatin1Char nativePathSeparator('/'); |
1883 | const QLatin1String starEscape("[^/]*" ); |
1884 | const QLatin1String questionMarkEscape("[^/]" ); |
1885 | #endif |
1886 | |
1887 | while (i < wclen) { |
1888 | const QChar c = wc[i++]; |
1889 | switch (c.unicode()) { |
1890 | case '*': |
1891 | rx += starEscape; |
1892 | break; |
1893 | case '?': |
1894 | rx += questionMarkEscape; |
1895 | break; |
1896 | case '\\': |
1897 | #ifdef Q_OS_WIN |
1898 | case '/': |
1899 | rx += QLatin1String("[/\\\\]" ); |
1900 | break; |
1901 | #endif |
1902 | case '$': |
1903 | case '(': |
1904 | case ')': |
1905 | case '+': |
1906 | case '.': |
1907 | case '^': |
1908 | case '{': |
1909 | case '|': |
1910 | case '}': |
1911 | rx += QLatin1Char('\\'); |
1912 | rx += c; |
1913 | break; |
1914 | case '[': |
1915 | rx += c; |
1916 | // Support for the [!abc] or [!a-c] syntax |
1917 | if (i < wclen) { |
1918 | if (wc[i] == QLatin1Char('!')) { |
1919 | rx += QLatin1Char('^'); |
1920 | ++i; |
1921 | } |
1922 | |
1923 | if (i < wclen && wc[i] == QLatin1Char(']')) |
1924 | rx += wc[i++]; |
1925 | |
1926 | while (i < wclen && wc[i] != QLatin1Char(']')) { |
1927 | // The '/' appearing in a character class invalidates the |
1928 | // regular expression parsing. It also concerns '\\' on |
1929 | // Windows OS types. |
1930 | if (wc[i] == QLatin1Char('/') || wc[i] == nativePathSeparator) |
1931 | return rx; |
1932 | if (wc[i] == QLatin1Char('\\')) |
1933 | rx += QLatin1Char('\\'); |
1934 | rx += wc[i++]; |
1935 | } |
1936 | } |
1937 | break; |
1938 | default: |
1939 | rx += c; |
1940 | break; |
1941 | } |
1942 | } |
1943 | |
1944 | if (!(options & UnanchoredWildcardConversion)) |
1945 | rx = anchoredPattern(rx); |
1946 | |
1947 | return rx; |
1948 | } |
1949 | |
1950 | /*! |
1951 | \since 6.0 |
1952 | Returns a regular expression of the glob pattern \a pattern. The regular expression |
1953 | will be case sensitive if \a cs is \l{Qt::CaseSensitive}, and converted according to |
1954 | \a options. |
1955 | |
1956 | Equivalent to |
1957 | \code |
1958 | auto reOptions = cs == Qt::CaseSensitive ? QRegularExpression::NoPatternOption : |
1959 | QRegularExpression::CaseInsensitiveOption; |
1960 | return QRegularExpression(wildcardToRegularExpression(str, options), reOptions); |
1961 | \endcode |
1962 | */ |
1963 | QRegularExpression QRegularExpression::fromWildcard(QStringView pattern, Qt::CaseSensitivity cs, |
1964 | WildcardConversionOptions options) |
1965 | { |
1966 | auto reOptions = cs == Qt::CaseSensitive ? QRegularExpression::NoPatternOption : |
1967 | QRegularExpression::CaseInsensitiveOption; |
1968 | return QRegularExpression(wildcardToRegularExpression(pattern, options), reOptions); |
1969 | } |
1970 | |
1971 | #if QT_STRINGVIEW_LEVEL < 2 |
1972 | /*! |
1973 | \fn QRegularExpression::anchoredPattern(const QString &expression) |
1974 | \since 5.12 |
1975 | \overload |
1976 | */ |
1977 | #endif // QT_STRINGVIEW_LEVEL < 2 |
1978 | |
1979 | /*! |
1980 | \since 5.15 |
1981 | |
1982 | Returns the \a expression wrapped between the \c{\A} and \c{\z} anchors to |
1983 | be used for exact matching. |
1984 | */ |
1985 | QString QRegularExpression::anchoredPattern(QStringView expression) |
1986 | { |
1987 | return QString() |
1988 | + QLatin1String("\\A(?:" ) |
1989 | + expression |
1990 | + QLatin1String(")\\z" ); |
1991 | } |
1992 | |
1993 | /*! |
1994 | \since 5.1 |
1995 | |
1996 | Constructs a valid, empty QRegularExpressionMatch object. The regular |
1997 | expression is set to a default-constructed one; the match type to |
1998 | QRegularExpression::NoMatch and the match options to |
1999 | QRegularExpression::NoMatchOption. |
2000 | |
2001 | The object will report no match through the hasMatch() and the |
2002 | hasPartialMatch() member functions. |
2003 | */ |
2004 | QRegularExpressionMatch::QRegularExpressionMatch() |
2005 | : d(new QRegularExpressionMatchPrivate(QRegularExpression(), |
2006 | QString(), |
2007 | QStringView(), |
2008 | QRegularExpression::NoMatch, |
2009 | QRegularExpression::NoMatchOption)) |
2010 | { |
2011 | d->isValid = true; |
2012 | } |
2013 | |
2014 | /*! |
2015 | Destroys the match result. |
2016 | */ |
2017 | QRegularExpressionMatch::~QRegularExpressionMatch() |
2018 | { |
2019 | } |
2020 | |
2021 | /*! |
2022 | Constructs a match result by copying the result of the given \a match. |
2023 | |
2024 | \sa operator=() |
2025 | */ |
2026 | QRegularExpressionMatch::QRegularExpressionMatch(const QRegularExpressionMatch &match) |
2027 | : d(match.d) |
2028 | { |
2029 | } |
2030 | |
2031 | /*! |
2032 | Assigns the match result \a match to this object, and returns a reference |
2033 | to the copy. |
2034 | */ |
2035 | QRegularExpressionMatch &QRegularExpressionMatch::operator=(const QRegularExpressionMatch &match) |
2036 | { |
2037 | d = match.d; |
2038 | return *this; |
2039 | } |
2040 | |
2041 | /*! |
2042 | \fn QRegularExpressionMatch &QRegularExpressionMatch::operator=(QRegularExpressionMatch &&match) |
2043 | |
2044 | Move-assigns the match result \a match to this object, and returns a reference |
2045 | to the copy. |
2046 | */ |
2047 | |
2048 | /*! |
2049 | \fn void QRegularExpressionMatch::swap(QRegularExpressionMatch &other) |
2050 | |
2051 | Swaps the match result \a other with this match result. This |
2052 | operation is very fast and never fails. |
2053 | */ |
2054 | |
2055 | /*! |
2056 | \internal |
2057 | */ |
2058 | QRegularExpressionMatch::QRegularExpressionMatch(QRegularExpressionMatchPrivate &dd) |
2059 | : d(&dd) |
2060 | { |
2061 | } |
2062 | |
2063 | /*! |
2064 | Returns the QRegularExpression object whose match() function returned this |
2065 | object. |
2066 | |
2067 | \sa QRegularExpression::match(), matchType(), matchOptions() |
2068 | */ |
2069 | QRegularExpression QRegularExpressionMatch::regularExpression() const |
2070 | { |
2071 | return d->regularExpression; |
2072 | } |
2073 | |
2074 | |
2075 | /*! |
2076 | Returns the match type that was used to get this QRegularExpressionMatch |
2077 | object, that is, the match type that was passed to |
2078 | QRegularExpression::match() or QRegularExpression::globalMatch(). |
2079 | |
2080 | \sa QRegularExpression::match(), regularExpression(), matchOptions() |
2081 | */ |
2082 | QRegularExpression::MatchType QRegularExpressionMatch::matchType() const |
2083 | { |
2084 | return d->matchType; |
2085 | } |
2086 | |
2087 | /*! |
2088 | Returns the match options that were used to get this |
2089 | QRegularExpressionMatch object, that is, the match options that were passed |
2090 | to QRegularExpression::match() or QRegularExpression::globalMatch(). |
2091 | |
2092 | \sa QRegularExpression::match(), regularExpression(), matchType() |
2093 | */ |
2094 | QRegularExpression::MatchOptions QRegularExpressionMatch::matchOptions() const |
2095 | { |
2096 | return d->matchOptions; |
2097 | } |
2098 | |
2099 | /*! |
2100 | Returns the index of the last capturing group that captured something, |
2101 | including the implicit capturing group 0. This can be used to extract all |
2102 | the substrings that were captured: |
2103 | |
2104 | \snippet code/src_corelib_text_qregularexpression.cpp 28 |
2105 | |
2106 | Note that some of the capturing groups with an index less than |
2107 | lastCapturedIndex() could have not matched, and therefore captured nothing. |
2108 | |
2109 | If the regular expression did not match, this function returns -1. |
2110 | |
2111 | \sa captured(), capturedStart(), capturedEnd(), capturedLength() |
2112 | */ |
2113 | int QRegularExpressionMatch::lastCapturedIndex() const |
2114 | { |
2115 | return d->capturedCount - 1; |
2116 | } |
2117 | |
2118 | /*! |
2119 | Returns the substring captured by the \a nth capturing group. |
2120 | |
2121 | If the \a nth capturing group did not capture a string, or if there is no |
2122 | such capturing group, returns a null QString. |
2123 | |
2124 | \note The implicit capturing group number 0 captures the substring matched |
2125 | by the entire pattern. |
2126 | |
2127 | \sa capturedView(), lastCapturedIndex(), capturedStart(), capturedEnd(), |
2128 | capturedLength(), QString::isNull() |
2129 | */ |
2130 | QString QRegularExpressionMatch::captured(int nth) const |
2131 | { |
2132 | return capturedView(nth).toString(); |
2133 | } |
2134 | |
2135 | /*! |
2136 | \since 5.10 |
2137 | |
2138 | Returns a view of the substring captured by the \a nth capturing group. |
2139 | |
2140 | If the \a nth capturing group did not capture a string, or if there is no |
2141 | such capturing group, returns a null QStringView. |
2142 | |
2143 | \note The implicit capturing group number 0 captures the substring matched |
2144 | by the entire pattern. |
2145 | |
2146 | \sa captured(), lastCapturedIndex(), capturedStart(), capturedEnd(), |
2147 | capturedLength(), QStringView::isNull() |
2148 | */ |
2149 | QStringView QRegularExpressionMatch::capturedView(int nth) const |
2150 | { |
2151 | if (nth < 0 || nth > lastCapturedIndex()) |
2152 | return QStringView(); |
2153 | |
2154 | qsizetype start = capturedStart(nth); |
2155 | |
2156 | if (start == -1) // didn't capture |
2157 | return QStringView(); |
2158 | |
2159 | return d->subject.mid(start, capturedLength(nth)); |
2160 | } |
2161 | |
2162 | #if QT_STRINGVIEW_LEVEL < 2 |
2163 | /*! \fn QString QRegularExpressionMatch::captured(const QString &name) const |
2164 | |
2165 | Returns the substring captured by the capturing group named \a name. |
2166 | |
2167 | If the named capturing group \a name did not capture a string, or if |
2168 | there is no capturing group named \a name, returns a null QString. |
2169 | |
2170 | \sa capturedView(), capturedStart(), capturedEnd(), capturedLength(), |
2171 | QString::isNull() |
2172 | */ |
2173 | #endif // QT_STRINGVIEW_LEVEL < 2 |
2174 | |
2175 | /*! |
2176 | \since 5.10 |
2177 | |
2178 | Returns the substring captured by the capturing group named \a name. |
2179 | |
2180 | If the named capturing group \a name did not capture a string, or if |
2181 | there is no capturing group named \a name, returns a null QString. |
2182 | |
2183 | \sa capturedView(), capturedStart(), capturedEnd(), capturedLength(), |
2184 | QString::isNull() |
2185 | */ |
2186 | QString QRegularExpressionMatch::captured(QStringView name) const |
2187 | { |
2188 | if (name.isEmpty()) { |
2189 | qWarning("QRegularExpressionMatch::captured: empty capturing group name passed" ); |
2190 | return QString(); |
2191 | } |
2192 | |
2193 | return capturedView(name).toString(); |
2194 | } |
2195 | |
2196 | /*! |
2197 | \since 5.10 |
2198 | |
2199 | Returns a view of the string captured by the capturing group named \a |
2200 | name. |
2201 | |
2202 | If the named capturing group \a name did not capture a string, or if |
2203 | there is no capturing group named \a name, returns a null QStringView. |
2204 | |
2205 | \sa captured(), capturedStart(), capturedEnd(), capturedLength(), |
2206 | QStringView::isNull() |
2207 | */ |
2208 | QStringView QRegularExpressionMatch::capturedView(QStringView name) const |
2209 | { |
2210 | if (name.isEmpty()) { |
2211 | qWarning("QRegularExpressionMatch::capturedView: empty capturing group name passed" ); |
2212 | return QStringView(); |
2213 | } |
2214 | int nth = d->regularExpression.d->captureIndexForName(name); |
2215 | if (nth == -1) |
2216 | return QStringView(); |
2217 | return capturedView(nth); |
2218 | } |
2219 | |
2220 | /*! |
2221 | Returns a list of all strings captured by capturing groups, in the order |
2222 | the groups themselves appear in the pattern string. The list includes the |
2223 | implicit capturing group number 0, capturing the substring matched by the |
2224 | entire pattern. |
2225 | */ |
2226 | QStringList QRegularExpressionMatch::capturedTexts() const |
2227 | { |
2228 | QStringList texts; |
2229 | texts.reserve(d->capturedCount); |
2230 | for (int i = 0; i < d->capturedCount; ++i) |
2231 | texts << captured(i); |
2232 | return texts; |
2233 | } |
2234 | |
2235 | /*! |
2236 | Returns the offset inside the subject string corresponding to the |
2237 | starting position of the substring captured by the \a nth capturing group. |
2238 | If the \a nth capturing group did not capture a string or doesn't exist, |
2239 | returns -1. |
2240 | |
2241 | \sa capturedEnd(), capturedLength(), captured() |
2242 | */ |
2243 | qsizetype QRegularExpressionMatch::capturedStart(int nth) const |
2244 | { |
2245 | if (nth < 0 || nth > lastCapturedIndex()) |
2246 | return -1; |
2247 | |
2248 | return d->capturedOffsets.at(nth * 2); |
2249 | } |
2250 | |
2251 | /*! |
2252 | Returns the length of the substring captured by the \a nth capturing group. |
2253 | |
2254 | \note This function returns 0 if the \a nth capturing group did not capture |
2255 | a string or doesn't exist. |
2256 | |
2257 | \sa capturedStart(), capturedEnd(), captured() |
2258 | */ |
2259 | qsizetype QRegularExpressionMatch::capturedLength(int nth) const |
2260 | { |
2261 | // bound checking performed by these two functions |
2262 | return capturedEnd(nth) - capturedStart(nth); |
2263 | } |
2264 | |
2265 | /*! |
2266 | Returns the offset inside the subject string immediately after the ending |
2267 | position of the substring captured by the \a nth capturing group. If the \a |
2268 | nth capturing group did not capture a string or doesn't exist, returns -1. |
2269 | |
2270 | \sa capturedStart(), capturedLength(), captured() |
2271 | */ |
2272 | qsizetype QRegularExpressionMatch::capturedEnd(int nth) const |
2273 | { |
2274 | if (nth < 0 || nth > lastCapturedIndex()) |
2275 | return -1; |
2276 | |
2277 | return d->capturedOffsets.at(nth * 2 + 1); |
2278 | } |
2279 | |
2280 | #if QT_STRINGVIEW_LEVEL < 2 |
2281 | /*! \fn qsizetype QRegularExpressionMatch::capturedStart(const QString &name) const |
2282 | |
2283 | Returns the offset inside the subject string corresponding to the starting |
2284 | position of the substring captured by the capturing group named \a name. |
2285 | If the capturing group named \a name did not capture a string or doesn't |
2286 | exist, returns -1. |
2287 | |
2288 | \sa capturedEnd(), capturedLength(), captured() |
2289 | */ |
2290 | |
2291 | /*! \fn qsizetype QRegularExpressionMatch::capturedLength(const QString &name) const |
2292 | |
2293 | Returns the length of the substring captured by the capturing group named |
2294 | \a name. |
2295 | |
2296 | \note This function returns 0 if the capturing group named \a name did not |
2297 | capture a string or doesn't exist. |
2298 | |
2299 | \sa capturedStart(), capturedEnd(), captured() |
2300 | */ |
2301 | |
2302 | /*! \fn qsizetype QRegularExpressionMatch::capturedEnd(const QString &name) const |
2303 | |
2304 | Returns the offset inside the subject string immediately after the ending |
2305 | position of the substring captured by the capturing group named \a name. If |
2306 | the capturing group named \a name did not capture a string or doesn't |
2307 | exist, returns -1. |
2308 | |
2309 | \sa capturedStart(), capturedLength(), captured() |
2310 | */ |
2311 | #endif // QT_STRINGVIEW_LEVEL < 2 |
2312 | |
2313 | /*! |
2314 | \since 5.10 |
2315 | |
2316 | Returns the offset inside the subject string corresponding to the starting |
2317 | position of the substring captured by the capturing group named \a name. |
2318 | If the capturing group named \a name did not capture a string or doesn't |
2319 | exist, returns -1. |
2320 | |
2321 | \sa capturedEnd(), capturedLength(), captured() |
2322 | */ |
2323 | qsizetype QRegularExpressionMatch::capturedStart(QStringView name) const |
2324 | { |
2325 | if (name.isEmpty()) { |
2326 | qWarning("QRegularExpressionMatch::capturedStart: empty capturing group name passed" ); |
2327 | return -1; |
2328 | } |
2329 | int nth = d->regularExpression.d->captureIndexForName(name); |
2330 | if (nth == -1) |
2331 | return -1; |
2332 | return capturedStart(nth); |
2333 | } |
2334 | |
2335 | /*! |
2336 | \since 5.10 |
2337 | |
2338 | Returns the length of the substring captured by the capturing group named |
2339 | \a name. |
2340 | |
2341 | \note This function returns 0 if the capturing group named \a name did not |
2342 | capture a string or doesn't exist. |
2343 | |
2344 | \sa capturedStart(), capturedEnd(), captured() |
2345 | */ |
2346 | qsizetype QRegularExpressionMatch::capturedLength(QStringView name) const |
2347 | { |
2348 | if (name.isEmpty()) { |
2349 | qWarning("QRegularExpressionMatch::capturedLength: empty capturing group name passed" ); |
2350 | return 0; |
2351 | } |
2352 | int nth = d->regularExpression.d->captureIndexForName(name); |
2353 | if (nth == -1) |
2354 | return 0; |
2355 | return capturedLength(nth); |
2356 | } |
2357 | |
2358 | /*! |
2359 | \since 5.10 |
2360 | |
2361 | Returns the offset inside the subject string immediately after the ending |
2362 | position of the substring captured by the capturing group named \a name. If |
2363 | the capturing group named \a name did not capture a string or doesn't |
2364 | exist, returns -1. |
2365 | |
2366 | \sa capturedStart(), capturedLength(), captured() |
2367 | */ |
2368 | qsizetype QRegularExpressionMatch::capturedEnd(QStringView name) const |
2369 | { |
2370 | if (name.isEmpty()) { |
2371 | qWarning("QRegularExpressionMatch::capturedEnd: empty capturing group name passed" ); |
2372 | return -1; |
2373 | } |
2374 | int nth = d->regularExpression.d->captureIndexForName(name); |
2375 | if (nth == -1) |
2376 | return -1; |
2377 | return capturedEnd(nth); |
2378 | } |
2379 | |
2380 | /*! |
2381 | Returns \c true if the regular expression matched against the subject string, |
2382 | or false otherwise. |
2383 | |
2384 | \sa QRegularExpression::match(), hasPartialMatch() |
2385 | */ |
2386 | bool QRegularExpressionMatch::hasMatch() const |
2387 | { |
2388 | return d->hasMatch; |
2389 | } |
2390 | |
2391 | /*! |
2392 | Returns \c true if the regular expression partially matched against the |
2393 | subject string, or false otherwise. |
2394 | |
2395 | \note Only a match that explicitly used the one of the partial match types |
2396 | can yield a partial match. Still, if such a match succeeds totally, this |
2397 | function will return false, while hasMatch() will return true. |
2398 | |
2399 | \sa QRegularExpression::match(), QRegularExpression::MatchType, hasMatch() |
2400 | */ |
2401 | bool QRegularExpressionMatch::hasPartialMatch() const |
2402 | { |
2403 | return d->hasPartialMatch; |
2404 | } |
2405 | |
2406 | /*! |
2407 | Returns \c true if the match object was obtained as a result from the |
2408 | QRegularExpression::match() function invoked on a valid QRegularExpression |
2409 | object; returns \c false if the QRegularExpression was invalid. |
2410 | |
2411 | \sa QRegularExpression::match(), QRegularExpression::isValid() |
2412 | */ |
2413 | bool QRegularExpressionMatch::isValid() const |
2414 | { |
2415 | return d->isValid; |
2416 | } |
2417 | |
2418 | /*! |
2419 | \internal |
2420 | */ |
2421 | QRegularExpressionMatchIterator::QRegularExpressionMatchIterator(QRegularExpressionMatchIteratorPrivate &dd) |
2422 | : d(&dd) |
2423 | { |
2424 | } |
2425 | |
2426 | /*! |
2427 | \since 5.1 |
2428 | |
2429 | Constructs an empty, valid QRegularExpressionMatchIterator object. The |
2430 | regular expression is set to a default-constructed one; the match type to |
2431 | QRegularExpression::NoMatch and the match options to |
2432 | QRegularExpression::NoMatchOption. |
2433 | |
2434 | Invoking the hasNext() member function on the constructed object will |
2435 | return false, as the iterator is not iterating on a valid sequence of |
2436 | matches. |
2437 | */ |
2438 | QRegularExpressionMatchIterator::QRegularExpressionMatchIterator() |
2439 | : d(new QRegularExpressionMatchIteratorPrivate(QRegularExpression(), |
2440 | QRegularExpression::NoMatch, |
2441 | QRegularExpression::NoMatchOption, |
2442 | QRegularExpressionMatch())) |
2443 | { |
2444 | } |
2445 | |
2446 | /*! |
2447 | Destroys the QRegularExpressionMatchIterator object. |
2448 | */ |
2449 | QRegularExpressionMatchIterator::~QRegularExpressionMatchIterator() |
2450 | { |
2451 | } |
2452 | |
2453 | /*! |
2454 | Constructs a QRegularExpressionMatchIterator object as a copy of \a |
2455 | iterator. |
2456 | |
2457 | \sa operator=() |
2458 | */ |
2459 | QRegularExpressionMatchIterator::QRegularExpressionMatchIterator(const QRegularExpressionMatchIterator &iterator) |
2460 | : d(iterator.d) |
2461 | { |
2462 | } |
2463 | |
2464 | /*! |
2465 | Assigns the iterator \a iterator to this object, and returns a reference to |
2466 | the copy. |
2467 | */ |
2468 | QRegularExpressionMatchIterator &QRegularExpressionMatchIterator::operator=(const QRegularExpressionMatchIterator &iterator) |
2469 | { |
2470 | d = iterator.d; |
2471 | return *this; |
2472 | } |
2473 | |
2474 | /*! |
2475 | \fn QRegularExpressionMatchIterator &QRegularExpressionMatchIterator::operator=(QRegularExpressionMatchIterator &&iterator) |
2476 | |
2477 | Move-assigns the \a iterator to this object. |
2478 | */ |
2479 | |
2480 | /*! |
2481 | \fn void QRegularExpressionMatchIterator::swap(QRegularExpressionMatchIterator &other) |
2482 | |
2483 | Swaps the iterator \a other with this iterator object. This operation is |
2484 | very fast and never fails. |
2485 | */ |
2486 | |
2487 | /*! |
2488 | Returns \c true if the iterator object was obtained as a result from the |
2489 | QRegularExpression::globalMatch() function invoked on a valid |
2490 | QRegularExpression object; returns \c false if the QRegularExpression was |
2491 | invalid. |
2492 | |
2493 | \sa QRegularExpression::globalMatch(), QRegularExpression::isValid() |
2494 | */ |
2495 | bool QRegularExpressionMatchIterator::isValid() const |
2496 | { |
2497 | return d->next.isValid(); |
2498 | } |
2499 | |
2500 | /*! |
2501 | Returns \c true if there is at least one match result ahead of the iterator; |
2502 | otherwise it returns \c false. |
2503 | |
2504 | \sa next() |
2505 | */ |
2506 | bool QRegularExpressionMatchIterator::hasNext() const |
2507 | { |
2508 | return d->hasNext(); |
2509 | } |
2510 | |
2511 | /*! |
2512 | Returns the next match result without moving the iterator. |
2513 | |
2514 | \note Calling this function when the iterator is at the end of the result |
2515 | set leads to undefined results. |
2516 | */ |
2517 | QRegularExpressionMatch QRegularExpressionMatchIterator::peekNext() const |
2518 | { |
2519 | if (!hasNext()) |
2520 | qWarning("QRegularExpressionMatchIterator::peekNext() called on an iterator already at end" ); |
2521 | |
2522 | return d->next; |
2523 | } |
2524 | |
2525 | /*! |
2526 | Returns the next match result and advances the iterator by one position. |
2527 | |
2528 | \note Calling this function when the iterator is at the end of the result |
2529 | set leads to undefined results. |
2530 | */ |
2531 | QRegularExpressionMatch QRegularExpressionMatchIterator::next() |
2532 | { |
2533 | if (!hasNext()) { |
2534 | qWarning("QRegularExpressionMatchIterator::next() called on an iterator already at end" ); |
2535 | return d.constData()->next; |
2536 | } |
2537 | |
2538 | d.detach(); |
2539 | return qExchange(d->next, d->next.d.constData()->nextMatch()); |
2540 | } |
2541 | |
2542 | /*! |
2543 | Returns the QRegularExpression object whose globalMatch() function returned |
2544 | this object. |
2545 | |
2546 | \sa QRegularExpression::globalMatch(), matchType(), matchOptions() |
2547 | */ |
2548 | QRegularExpression QRegularExpressionMatchIterator::regularExpression() const |
2549 | { |
2550 | return d->regularExpression; |
2551 | } |
2552 | |
2553 | /*! |
2554 | Returns the match type that was used to get this |
2555 | QRegularExpressionMatchIterator object, that is, the match type that was |
2556 | passed to QRegularExpression::globalMatch(). |
2557 | |
2558 | \sa QRegularExpression::globalMatch(), regularExpression(), matchOptions() |
2559 | */ |
2560 | QRegularExpression::MatchType QRegularExpressionMatchIterator::matchType() const |
2561 | { |
2562 | return d->matchType; |
2563 | } |
2564 | |
2565 | /*! |
2566 | Returns the match options that were used to get this |
2567 | QRegularExpressionMatchIterator object, that is, the match options that |
2568 | were passed to QRegularExpression::globalMatch(). |
2569 | |
2570 | \sa QRegularExpression::globalMatch(), regularExpression(), matchType() |
2571 | */ |
2572 | QRegularExpression::MatchOptions QRegularExpressionMatchIterator::matchOptions() const |
2573 | { |
2574 | return d->matchOptions; |
2575 | } |
2576 | |
2577 | /*! |
2578 | \internal |
2579 | */ |
2580 | QtPrivate::QRegularExpressionMatchIteratorRangeBasedForIterator begin(const QRegularExpressionMatchIterator &iterator) |
2581 | { |
2582 | return QtPrivate::QRegularExpressionMatchIteratorRangeBasedForIterator(iterator); |
2583 | } |
2584 | |
2585 | /*! |
2586 | \fn QtPrivate::QRegularExpressionMatchIteratorRangeBasedForIteratorSentinel end(const QRegularExpressionMatchIterator &) |
2587 | \internal |
2588 | */ |
2589 | |
2590 | #ifndef QT_NO_DATASTREAM |
2591 | /*! |
2592 | \relates QRegularExpression |
2593 | |
2594 | Writes the regular expression \a re to stream \a out. |
2595 | |
2596 | \sa {Serializing Qt Data Types} |
2597 | */ |
2598 | QDataStream &operator<<(QDataStream &out, const QRegularExpression &re) |
2599 | { |
2600 | out << re.pattern() << quint32(re.patternOptions()); |
2601 | return out; |
2602 | } |
2603 | |
2604 | /*! |
2605 | \relates QRegularExpression |
2606 | |
2607 | Reads a regular expression from stream \a in into \a re. |
2608 | |
2609 | \sa {Serializing Qt Data Types} |
2610 | */ |
2611 | QDataStream &operator>>(QDataStream &in, QRegularExpression &re) |
2612 | { |
2613 | QString pattern; |
2614 | quint32 patternOptions; |
2615 | in >> pattern >> patternOptions; |
2616 | re.setPattern(pattern); |
2617 | re.setPatternOptions(QRegularExpression::PatternOptions(patternOptions)); |
2618 | return in; |
2619 | } |
2620 | #endif |
2621 | |
2622 | #ifndef QT_NO_DEBUG_STREAM |
2623 | /*! |
2624 | \relates QRegularExpression |
2625 | |
2626 | Writes the regular expression \a re into the debug object \a debug for |
2627 | debugging purposes. |
2628 | |
2629 | \sa {Debugging Techniques} |
2630 | */ |
2631 | QDebug operator<<(QDebug debug, const QRegularExpression &re) |
2632 | { |
2633 | QDebugStateSaver saver(debug); |
2634 | debug.nospace() << "QRegularExpression(" << re.pattern() << ", " << re.patternOptions() << ')'; |
2635 | return debug; |
2636 | } |
2637 | |
2638 | /*! |
2639 | \relates QRegularExpression |
2640 | |
2641 | Writes the pattern options \a patternOptions into the debug object \a debug |
2642 | for debugging purposes. |
2643 | |
2644 | \sa {Debugging Techniques} |
2645 | */ |
2646 | QDebug operator<<(QDebug debug, QRegularExpression::PatternOptions patternOptions) |
2647 | { |
2648 | QDebugStateSaver saver(debug); |
2649 | QByteArray flags; |
2650 | |
2651 | if (patternOptions == QRegularExpression::NoPatternOption) { |
2652 | flags = "NoPatternOption" ; |
2653 | } else { |
2654 | flags.reserve(200); // worst case... |
2655 | if (patternOptions & QRegularExpression::CaseInsensitiveOption) |
2656 | flags.append("CaseInsensitiveOption|" ); |
2657 | if (patternOptions & QRegularExpression::DotMatchesEverythingOption) |
2658 | flags.append("DotMatchesEverythingOption|" ); |
2659 | if (patternOptions & QRegularExpression::MultilineOption) |
2660 | flags.append("MultilineOption|" ); |
2661 | if (patternOptions & QRegularExpression::ExtendedPatternSyntaxOption) |
2662 | flags.append("ExtendedPatternSyntaxOption|" ); |
2663 | if (patternOptions & QRegularExpression::InvertedGreedinessOption) |
2664 | flags.append("InvertedGreedinessOption|" ); |
2665 | if (patternOptions & QRegularExpression::DontCaptureOption) |
2666 | flags.append("DontCaptureOption|" ); |
2667 | if (patternOptions & QRegularExpression::UseUnicodePropertiesOption) |
2668 | flags.append("UseUnicodePropertiesOption|" ); |
2669 | flags.chop(1); |
2670 | } |
2671 | |
2672 | debug.nospace() << "QRegularExpression::PatternOptions(" << flags << ')'; |
2673 | |
2674 | return debug; |
2675 | } |
2676 | /*! |
2677 | \relates QRegularExpressionMatch |
2678 | |
2679 | Writes the match object \a match into the debug object \a debug for |
2680 | debugging purposes. |
2681 | |
2682 | \sa {Debugging Techniques} |
2683 | */ |
2684 | QDebug operator<<(QDebug debug, const QRegularExpressionMatch &match) |
2685 | { |
2686 | QDebugStateSaver saver(debug); |
2687 | debug.nospace() << "QRegularExpressionMatch(" ; |
2688 | |
2689 | if (!match.isValid()) { |
2690 | debug << "Invalid)" ; |
2691 | return debug; |
2692 | } |
2693 | |
2694 | debug << "Valid" ; |
2695 | |
2696 | if (match.hasMatch()) { |
2697 | debug << ", has match: " ; |
2698 | for (int i = 0; i <= match.lastCapturedIndex(); ++i) { |
2699 | debug << i |
2700 | << ":(" << match.capturedStart(i) << ", " << match.capturedEnd(i) |
2701 | << ", " << match.captured(i) << ')'; |
2702 | if (i < match.lastCapturedIndex()) |
2703 | debug << ", " ; |
2704 | } |
2705 | } else if (match.hasPartialMatch()) { |
2706 | debug << ", has partial match: (" |
2707 | << match.capturedStart(0) << ", " |
2708 | << match.capturedEnd(0) << ", " |
2709 | << match.captured(0) << ')'; |
2710 | } else { |
2711 | debug << ", no match" ; |
2712 | } |
2713 | |
2714 | debug << ')'; |
2715 | |
2716 | return debug; |
2717 | } |
2718 | #endif |
2719 | |
2720 | // fool lupdate: make it extract those strings for translation, but don't put them |
2721 | // inside Qt -- they're already inside libpcre (cf. man 3 pcreapi, pcre_compile.c). |
2722 | #if 0 |
2723 | |
2724 | /* PCRE is a library of functions to support regular expressions whose syntax |
2725 | and semantics are as close as possible to those of the Perl 5 language. |
2726 | |
2727 | Written by Philip Hazel |
2728 | Original API code Copyright (c) 1997-2012 University of Cambridge |
2729 | New API code Copyright (c) 2015 University of Cambridge |
2730 | |
2731 | ----------------------------------------------------------------------------- |
2732 | Redistribution and use in source and binary forms, with or without |
2733 | modification, are permitted provided that the following conditions are met: |
2734 | |
2735 | * Redistributions of source code must retain the above copyright notice, |
2736 | this list of conditions and the following disclaimer. |
2737 | |
2738 | * Redistributions in binary form must reproduce the above copyright |
2739 | notice, this list of conditions and the following disclaimer in the |
2740 | documentation and/or other materials provided with the distribution. |
2741 | |
2742 | * Neither the name of the University of Cambridge nor the names of its |
2743 | contributors may be used to endorse or promote products derived from |
2744 | this software without specific prior written permission. |
2745 | |
2746 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
2747 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
2748 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
2749 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
2750 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
2751 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
2752 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
2753 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
2754 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
2755 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
2756 | POSSIBILITY OF SUCH DAMAGE. |
2757 | ----------------------------------------------------------------------------- |
2758 | */ |
2759 | |
2760 | static const char *pcreCompileErrorCodes[] = |
2761 | { |
2762 | QT_TRANSLATE_NOOP("QRegularExpression" , "no error" ), |
2763 | QT_TRANSLATE_NOOP("QRegularExpression" , "\\ at end of pattern" ), |
2764 | QT_TRANSLATE_NOOP("QRegularExpression" , "\\c at end of pattern" ), |
2765 | QT_TRANSLATE_NOOP("QRegularExpression" , "unrecognized character follows \\" ), |
2766 | QT_TRANSLATE_NOOP("QRegularExpression" , "numbers out of order in {} quantifier" ), |
2767 | QT_TRANSLATE_NOOP("QRegularExpression" , "number too big in {} quantifier" ), |
2768 | QT_TRANSLATE_NOOP("QRegularExpression" , "missing terminating ] for character class" ), |
2769 | QT_TRANSLATE_NOOP("QRegularExpression" , "escape sequence is invalid in character class" ), |
2770 | QT_TRANSLATE_NOOP("QRegularExpression" , "range out of order in character class" ), |
2771 | QT_TRANSLATE_NOOP("QRegularExpression" , "quantifier does not follow a repeatable item" ), |
2772 | QT_TRANSLATE_NOOP("QRegularExpression" , "internal error: unexpected repeat" ), |
2773 | QT_TRANSLATE_NOOP("QRegularExpression" , "unrecognized character after (? or (?-" ), |
2774 | QT_TRANSLATE_NOOP("QRegularExpression" , "POSIX named classes are supported only within a class" ), |
2775 | QT_TRANSLATE_NOOP("QRegularExpression" , "POSIX collating elements are not supported" ), |
2776 | QT_TRANSLATE_NOOP("QRegularExpression" , "missing closing parenthesis" ), |
2777 | QT_TRANSLATE_NOOP("QRegularExpression" , "reference to non-existent subpattern" ), |
2778 | QT_TRANSLATE_NOOP("QRegularExpression" , "pattern passed as NULL" ), |
2779 | QT_TRANSLATE_NOOP("QRegularExpression" , "unrecognised compile-time option bit(s)" ), |
2780 | QT_TRANSLATE_NOOP("QRegularExpression" , "missing ) after (?# comment" ), |
2781 | QT_TRANSLATE_NOOP("QRegularExpression" , "parentheses are too deeply nested" ), |
2782 | QT_TRANSLATE_NOOP("QRegularExpression" , "regular expression is too large" ), |
2783 | QT_TRANSLATE_NOOP("QRegularExpression" , "failed to allocate heap memory" ), |
2784 | QT_TRANSLATE_NOOP("QRegularExpression" , "unmatched closing parenthesis" ), |
2785 | QT_TRANSLATE_NOOP("QRegularExpression" , "internal error: code overflow" ), |
2786 | QT_TRANSLATE_NOOP("QRegularExpression" , "missing closing parenthesis for condition" ), |
2787 | QT_TRANSLATE_NOOP("QRegularExpression" , "lookbehind assertion is not fixed length" ), |
2788 | QT_TRANSLATE_NOOP("QRegularExpression" , "a relative value of zero is not allowed" ), |
2789 | QT_TRANSLATE_NOOP("QRegularExpression" , "conditional subpattern contains more than two branches" ), |
2790 | QT_TRANSLATE_NOOP("QRegularExpression" , "assertion expected after (?( or (?(?C)" ), |
2791 | QT_TRANSLATE_NOOP("QRegularExpression" , "digit expected after (?+ or (?-" ), |
2792 | QT_TRANSLATE_NOOP("QRegularExpression" , "unknown POSIX class name" ), |
2793 | QT_TRANSLATE_NOOP("QRegularExpression" , "internal error in pcre2_study(): should not occur" ), |
2794 | QT_TRANSLATE_NOOP("QRegularExpression" , "this version of PCRE2 does not have Unicode support" ), |
2795 | QT_TRANSLATE_NOOP("QRegularExpression" , "parentheses are too deeply nested (stack check)" ), |
2796 | QT_TRANSLATE_NOOP("QRegularExpression" , "character code point value in \\x{} or \\o{} is too large" ), |
2797 | QT_TRANSLATE_NOOP("QRegularExpression" , "lookbehind is too complicated" ), |
2798 | QT_TRANSLATE_NOOP("QRegularExpression" , "\\C is not allowed in a lookbehind assertion in UTF-" "16" " mode" ), |
2799 | QT_TRANSLATE_NOOP("QRegularExpression" , "PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u" ), |
2800 | QT_TRANSLATE_NOOP("QRegularExpression" , "number after (?C is greater than 255" ), |
2801 | QT_TRANSLATE_NOOP("QRegularExpression" , "closing parenthesis for (?C expected" ), |
2802 | QT_TRANSLATE_NOOP("QRegularExpression" , "invalid escape sequence in (*VERB) name" ), |
2803 | QT_TRANSLATE_NOOP("QRegularExpression" , "unrecognized character after (?P" ), |
2804 | QT_TRANSLATE_NOOP("QRegularExpression" , "syntax error in subpattern name (missing terminator?)" ), |
2805 | QT_TRANSLATE_NOOP("QRegularExpression" , "two named subpatterns have the same name (PCRE2_DUPNAMES not set)" ), |
2806 | QT_TRANSLATE_NOOP("QRegularExpression" , "subpattern name must start with a non-digit" ), |
2807 | QT_TRANSLATE_NOOP("QRegularExpression" , "this version of PCRE2 does not have support for \\P, \\p, or \\X" ), |
2808 | QT_TRANSLATE_NOOP("QRegularExpression" , "malformed \\P or \\p sequence" ), |
2809 | QT_TRANSLATE_NOOP("QRegularExpression" , "unknown property name after \\P or \\p" ), |
2810 | QT_TRANSLATE_NOOP("QRegularExpression" , "subpattern name is too long (maximum " "32" " code units)" ), |
2811 | QT_TRANSLATE_NOOP("QRegularExpression" , "too many named subpatterns (maximum " "10000" ")" ), |
2812 | QT_TRANSLATE_NOOP("QRegularExpression" , "invalid range in character class" ), |
2813 | QT_TRANSLATE_NOOP("QRegularExpression" , "octal value is greater than \\377 in 8-bit non-UTF-8 mode" ), |
2814 | QT_TRANSLATE_NOOP("QRegularExpression" , "internal error: overran compiling workspace" ), |
2815 | QT_TRANSLATE_NOOP("QRegularExpression" , "internal error: previously-checked referenced subpattern not found" ), |
2816 | QT_TRANSLATE_NOOP("QRegularExpression" , "DEFINE subpattern contains more than one branch" ), |
2817 | QT_TRANSLATE_NOOP("QRegularExpression" , "missing opening brace after \\o" ), |
2818 | QT_TRANSLATE_NOOP("QRegularExpression" , "internal error: unknown newline setting" ), |
2819 | QT_TRANSLATE_NOOP("QRegularExpression" , "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number" ), |
2820 | QT_TRANSLATE_NOOP("QRegularExpression" , "(?R (recursive pattern call) must be followed by a closing parenthesis" ), |
2821 | QT_TRANSLATE_NOOP("QRegularExpression" , "obsolete error (should not occur)" ), |
2822 | QT_TRANSLATE_NOOP("QRegularExpression" , "(*VERB) not recognized or malformed" ), |
2823 | QT_TRANSLATE_NOOP("QRegularExpression" , "subpattern number is too big" ), |
2824 | QT_TRANSLATE_NOOP("QRegularExpression" , "subpattern name expected" ), |
2825 | QT_TRANSLATE_NOOP("QRegularExpression" , "internal error: parsed pattern overflow" ), |
2826 | QT_TRANSLATE_NOOP("QRegularExpression" , "non-octal character in \\o{} (closing brace missing?)" ), |
2827 | QT_TRANSLATE_NOOP("QRegularExpression" , "different names for subpatterns of the same number are not allowed" ), |
2828 | QT_TRANSLATE_NOOP("QRegularExpression" , "(*MARK) must have an argument" ), |
2829 | QT_TRANSLATE_NOOP("QRegularExpression" , "non-hex character in \\x{} (closing brace missing?)" ), |
2830 | QT_TRANSLATE_NOOP("QRegularExpression" , "\\c must be followed by a printable ASCII character" ), |
2831 | QT_TRANSLATE_NOOP("QRegularExpression" , "\\c must be followed by a letter or one of [\\]^_?" ), |
2832 | QT_TRANSLATE_NOOP("QRegularExpression" , "\\k is not followed by a braced, angle-bracketed, or quoted name" ), |
2833 | QT_TRANSLATE_NOOP("QRegularExpression" , "internal error: unknown meta code in check_lookbehinds()" ), |
2834 | QT_TRANSLATE_NOOP("QRegularExpression" , "\\N is not supported in a class" ), |
2835 | QT_TRANSLATE_NOOP("QRegularExpression" , "callout string is too long" ), |
2836 | QT_TRANSLATE_NOOP("QRegularExpression" , "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)" ), |
2837 | QT_TRANSLATE_NOOP("QRegularExpression" , "using UTF is disabled by the application" ), |
2838 | QT_TRANSLATE_NOOP("QRegularExpression" , "using UCP is disabled by the application" ), |
2839 | QT_TRANSLATE_NOOP("QRegularExpression" , "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)" ), |
2840 | QT_TRANSLATE_NOOP("QRegularExpression" , "character code point value in \\u.... sequence is too large" ), |
2841 | QT_TRANSLATE_NOOP("QRegularExpression" , "digits missing in \\x{} or \\o{} or \\N{U+}" ), |
2842 | QT_TRANSLATE_NOOP("QRegularExpression" , "syntax error or number too big in (?(VERSION condition" ), |
2843 | QT_TRANSLATE_NOOP("QRegularExpression" , "internal error: unknown opcode in auto_possessify()" ), |
2844 | QT_TRANSLATE_NOOP("QRegularExpression" , "missing terminating delimiter for callout with string argument" ), |
2845 | QT_TRANSLATE_NOOP("QRegularExpression" , "unrecognized string delimiter follows (?C" ), |
2846 | QT_TRANSLATE_NOOP("QRegularExpression" , "using \\C is disabled by the application" ), |
2847 | QT_TRANSLATE_NOOP("QRegularExpression" , "(?| and/or (?J: or (?x: parentheses are too deeply nested" ), |
2848 | QT_TRANSLATE_NOOP("QRegularExpression" , "using \\C is disabled in this PCRE2 library" ), |
2849 | QT_TRANSLATE_NOOP("QRegularExpression" , "regular expression is too complicated" ), |
2850 | QT_TRANSLATE_NOOP("QRegularExpression" , "lookbehind assertion is too long" ), |
2851 | QT_TRANSLATE_NOOP("QRegularExpression" , "pattern string is longer than the limit set by the application" ), |
2852 | QT_TRANSLATE_NOOP("QRegularExpression" , "internal error: unknown code in parsed pattern" ), |
2853 | QT_TRANSLATE_NOOP("QRegularExpression" , "internal error: bad code value in parsed_skip()" ), |
2854 | QT_TRANSLATE_NOOP("QRegularExpression" , "PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode" ), |
2855 | QT_TRANSLATE_NOOP("QRegularExpression" , "invalid option bits with PCRE2_LITERAL" ), |
2856 | QT_TRANSLATE_NOOP("QRegularExpression" , "\\N{U+dddd} is supported only in Unicode (UTF) mode" ), |
2857 | QT_TRANSLATE_NOOP("QRegularExpression" , "invalid hyphen in option setting" ), |
2858 | QT_TRANSLATE_NOOP("QRegularExpression" , "(*alpha_assertion) not recognized" ), |
2859 | QT_TRANSLATE_NOOP("QRegularExpression" , "script runs require Unicode support, which this version of PCRE2 does not have" ), |
2860 | QT_TRANSLATE_NOOP("QRegularExpression" , "too many capturing groups (maximum 65535)" ), |
2861 | QT_TRANSLATE_NOOP("QRegularExpression" , "atomic assertion expected after (?( or (?(?C)" ), |
2862 | QT_TRANSLATE_NOOP("QRegularExpression" , "no error" ), |
2863 | QT_TRANSLATE_NOOP("QRegularExpression" , "no match" ), |
2864 | QT_TRANSLATE_NOOP("QRegularExpression" , "partial match" ), |
2865 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: 1 byte missing at end" ), |
2866 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: 2 bytes missing at end" ), |
2867 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: 3 bytes missing at end" ), |
2868 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: 4 bytes missing at end" ), |
2869 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: 5 bytes missing at end" ), |
2870 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: byte 2 top bits not 0x80" ), |
2871 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: byte 3 top bits not 0x80" ), |
2872 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: byte 4 top bits not 0x80" ), |
2873 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: byte 5 top bits not 0x80" ), |
2874 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: byte 6 top bits not 0x80" ), |
2875 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: 5-byte character is not allowed (RFC 3629)" ), |
2876 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: 6-byte character is not allowed (RFC 3629)" ), |
2877 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: code points greater than 0x10ffff are not defined" ), |
2878 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: code points 0xd800-0xdfff are not defined" ), |
2879 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: overlong 2-byte sequence" ), |
2880 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: overlong 3-byte sequence" ), |
2881 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: overlong 4-byte sequence" ), |
2882 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: overlong 5-byte sequence" ), |
2883 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: overlong 6-byte sequence" ), |
2884 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: isolated byte with 0x80 bit set" ), |
2885 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-8 error: illegal byte (0xfe or 0xff)" ), |
2886 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-16 error: missing low surrogate at end" ), |
2887 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-16 error: invalid low surrogate" ), |
2888 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-16 error: isolated low surrogate" ), |
2889 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-32 error: code points 0xd800-0xdfff are not defined" ), |
2890 | QT_TRANSLATE_NOOP("QRegularExpression" , "UTF-32 error: code points greater than 0x10ffff are not defined" ), |
2891 | QT_TRANSLATE_NOOP("QRegularExpression" , "bad data value" ), |
2892 | QT_TRANSLATE_NOOP("QRegularExpression" , "patterns do not all use the same character tables" ), |
2893 | QT_TRANSLATE_NOOP("QRegularExpression" , "magic number missing" ), |
2894 | QT_TRANSLATE_NOOP("QRegularExpression" , "pattern compiled in wrong mode: 8/16/32-bit error" ), |
2895 | QT_TRANSLATE_NOOP("QRegularExpression" , "bad offset value" ), |
2896 | QT_TRANSLATE_NOOP("QRegularExpression" , "bad option value" ), |
2897 | QT_TRANSLATE_NOOP("QRegularExpression" , "invalid replacement string" ), |
2898 | QT_TRANSLATE_NOOP("QRegularExpression" , "bad offset into UTF string" ), |
2899 | QT_TRANSLATE_NOOP("QRegularExpression" , "callout error code" ), |
2900 | QT_TRANSLATE_NOOP("QRegularExpression" , "invalid data in workspace for DFA restart" ), |
2901 | QT_TRANSLATE_NOOP("QRegularExpression" , "too much recursion for DFA matching" ), |
2902 | QT_TRANSLATE_NOOP("QRegularExpression" , "backreference condition or recursion test is not supported for DFA matching" ), |
2903 | QT_TRANSLATE_NOOP("QRegularExpression" , "function is not supported for DFA matching" ), |
2904 | QT_TRANSLATE_NOOP("QRegularExpression" , "pattern contains an item that is not supported for DFA matching" ), |
2905 | QT_TRANSLATE_NOOP("QRegularExpression" , "workspace size exceeded in DFA matching" ), |
2906 | QT_TRANSLATE_NOOP("QRegularExpression" , "internal error - pattern overwritten?" ), |
2907 | QT_TRANSLATE_NOOP("QRegularExpression" , "bad JIT option" ), |
2908 | QT_TRANSLATE_NOOP("QRegularExpression" , "JIT stack limit reached" ), |
2909 | QT_TRANSLATE_NOOP("QRegularExpression" , "match limit exceeded" ), |
2910 | QT_TRANSLATE_NOOP("QRegularExpression" , "no more memory" ), |
2911 | QT_TRANSLATE_NOOP("QRegularExpression" , "unknown substring" ), |
2912 | QT_TRANSLATE_NOOP("QRegularExpression" , "non-unique substring name" ), |
2913 | QT_TRANSLATE_NOOP("QRegularExpression" , "NULL argument passed" ), |
2914 | QT_TRANSLATE_NOOP("QRegularExpression" , "nested recursion at the same subject position" ), |
2915 | QT_TRANSLATE_NOOP("QRegularExpression" , "matching depth limit exceeded" ), |
2916 | QT_TRANSLATE_NOOP("QRegularExpression" , "requested value is not available" ), |
2917 | QT_TRANSLATE_NOOP("QRegularExpression" , "requested value is not set" ), |
2918 | QT_TRANSLATE_NOOP("QRegularExpression" , "offset limit set without PCRE2_USE_OFFSET_LIMIT" ), |
2919 | QT_TRANSLATE_NOOP("QRegularExpression" , "bad escape sequence in replacement string" ), |
2920 | QT_TRANSLATE_NOOP("QRegularExpression" , "expected closing curly bracket in replacement string" ), |
2921 | QT_TRANSLATE_NOOP("QRegularExpression" , "bad substitution in replacement string" ), |
2922 | QT_TRANSLATE_NOOP("QRegularExpression" , "match with end before start or start moved backwards is not supported" ), |
2923 | QT_TRANSLATE_NOOP("QRegularExpression" , "too many replacements (more than INT_MAX)" ), |
2924 | QT_TRANSLATE_NOOP("QRegularExpression" , "bad serialized data" ), |
2925 | QT_TRANSLATE_NOOP("QRegularExpression" , "heap limit exceeded" ), |
2926 | QT_TRANSLATE_NOOP("QRegularExpression" , "invalid syntax" ), |
2927 | QT_TRANSLATE_NOOP("QRegularExpression" , "internal error - duplicate substitution match" ), |
2928 | QT_TRANSLATE_NOOP("QRegularExpression" , "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching" ) |
2929 | }; |
2930 | #endif // #if 0 |
2931 | |
2932 | QT_END_NAMESPACE |
2933 | |