1 | // Copyright 2003-2010 Google Inc. All Rights Reserved. |
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | #ifndef UTIL_PCRE_H_ |
6 | #define UTIL_PCRE_H_ |
7 | |
8 | // This is a variant of PCRE's pcrecpp.h, originally written at Google. |
9 | // The main changes are the addition of the HitLimit method and |
10 | // compilation as PCRE in namespace re2. |
11 | |
12 | // C++ interface to the pcre regular-expression library. PCRE supports |
13 | // Perl-style regular expressions (with extensions like \d, \w, \s, |
14 | // ...). |
15 | // |
16 | // ----------------------------------------------------------------------- |
17 | // REGEXP SYNTAX: |
18 | // |
19 | // This module uses the pcre library and hence supports its syntax |
20 | // for regular expressions: |
21 | // |
22 | // http://www.google.com/search?q=pcre |
23 | // |
24 | // The syntax is pretty similar to Perl's. For those not familiar |
25 | // with Perl's regular expressions, here are some examples of the most |
26 | // commonly used extensions: |
27 | // |
28 | // "hello (\\w+) world" -- \w matches a "word" character |
29 | // "version (\\d+)" -- \d matches a digit |
30 | // "hello\\s+world" -- \s matches any whitespace character |
31 | // "\\b(\\w+)\\b" -- \b matches empty string at a word boundary |
32 | // "(?i)hello" -- (?i) turns on case-insensitive matching |
33 | // "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible |
34 | // |
35 | // ----------------------------------------------------------------------- |
36 | // MATCHING INTERFACE: |
37 | // |
38 | // The "FullMatch" operation checks that supplied text matches a |
39 | // supplied pattern exactly. |
40 | // |
41 | // Example: successful match |
42 | // CHECK(PCRE::FullMatch("hello", "h.*o")); |
43 | // |
44 | // Example: unsuccessful match (requires full match): |
45 | // CHECK(!PCRE::FullMatch("hello", "e")); |
46 | // |
47 | // ----------------------------------------------------------------------- |
48 | // UTF-8 AND THE MATCHING INTERFACE: |
49 | // |
50 | // By default, pattern and text are plain text, one byte per character. |
51 | // The UTF8 flag, passed to the constructor, causes both pattern |
52 | // and string to be treated as UTF-8 text, still a byte stream but |
53 | // potentially multiple bytes per character. In practice, the text |
54 | // is likelier to be UTF-8 than the pattern, but the match returned |
55 | // may depend on the UTF8 flag, so always use it when matching |
56 | // UTF8 text. E.g., "." will match one byte normally but with UTF8 |
57 | // set may match up to three bytes of a multi-byte character. |
58 | // |
59 | // Example: |
60 | // PCRE re(utf8_pattern, PCRE::UTF8); |
61 | // CHECK(PCRE::FullMatch(utf8_string, re)); |
62 | // |
63 | // ----------------------------------------------------------------------- |
64 | // MATCHING WITH SUBSTRING EXTRACTION: |
65 | // |
66 | // You can supply extra pointer arguments to extract matched substrings. |
67 | // |
68 | // Example: extracts "ruby" into "s" and 1234 into "i" |
69 | // int i; |
70 | // std::string s; |
71 | // CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i)); |
72 | // |
73 | // Example: fails because string cannot be stored in integer |
74 | // CHECK(!PCRE::FullMatch("ruby", "(.*)", &i)); |
75 | // |
76 | // Example: fails because there aren't enough sub-patterns: |
77 | // CHECK(!PCRE::FullMatch("ruby:1234", "\\w+:\\d+", &s)); |
78 | // |
79 | // Example: does not try to extract any extra sub-patterns |
80 | // CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s)); |
81 | // |
82 | // Example: does not try to extract into NULL |
83 | // CHECK(PCRE::FullMatch("ruby:1234", "(\\w+):(\\d+)", NULL, &i)); |
84 | // |
85 | // Example: integer overflow causes failure |
86 | // CHECK(!PCRE::FullMatch("ruby:1234567891234", "\\w+:(\\d+)", &i)); |
87 | // |
88 | // ----------------------------------------------------------------------- |
89 | // PARTIAL MATCHES |
90 | // |
91 | // You can use the "PartialMatch" operation when you want the pattern |
92 | // to match any substring of the text. |
93 | // |
94 | // Example: simple search for a string: |
95 | // CHECK(PCRE::PartialMatch("hello", "ell")); |
96 | // |
97 | // Example: find first number in a string |
98 | // int number; |
99 | // CHECK(PCRE::PartialMatch("x*100 + 20", "(\\d+)", &number)); |
100 | // CHECK_EQ(number, 100); |
101 | // |
102 | // ----------------------------------------------------------------------- |
103 | // PPCRE-COMPILED PCREGULAR EXPPCRESSIONS |
104 | // |
105 | // PCRE makes it easy to use any string as a regular expression, without |
106 | // requiring a separate compilation step. |
107 | // |
108 | // If speed is of the essence, you can create a pre-compiled "PCRE" |
109 | // object from the pattern and use it multiple times. If you do so, |
110 | // you can typically parse text faster than with sscanf. |
111 | // |
112 | // Example: precompile pattern for faster matching: |
113 | // PCRE pattern("h.*o"); |
114 | // while (ReadLine(&str)) { |
115 | // if (PCRE::FullMatch(str, pattern)) ...; |
116 | // } |
117 | // |
118 | // ----------------------------------------------------------------------- |
119 | // SCANNING TEXT INCPCREMENTALLY |
120 | // |
121 | // The "Consume" operation may be useful if you want to repeatedly |
122 | // match regular expressions at the front of a string and skip over |
123 | // them as they match. This requires use of the "StringPiece" type, |
124 | // which represents a sub-range of a real string. |
125 | // |
126 | // Example: read lines of the form "var = value" from a string. |
127 | // std::string contents = ...; // Fill string somehow |
128 | // StringPiece input(contents); // Wrap a StringPiece around it |
129 | // |
130 | // std::string var; |
131 | // int value; |
132 | // while (PCRE::Consume(&input, "(\\w+) = (\\d+)\n", &var, &value)) { |
133 | // ...; |
134 | // } |
135 | // |
136 | // Each successful call to "Consume" will set "var/value", and also |
137 | // advance "input" so it points past the matched text. Note that if the |
138 | // regular expression matches an empty string, input will advance |
139 | // by 0 bytes. If the regular expression being used might match |
140 | // an empty string, the loop body must check for this case and either |
141 | // advance the string or break out of the loop. |
142 | // |
143 | // The "FindAndConsume" operation is similar to "Consume" but does not |
144 | // anchor your match at the beginning of the string. For example, you |
145 | // could extract all words from a string by repeatedly calling |
146 | // PCRE::FindAndConsume(&input, "(\\w+)", &word) |
147 | // |
148 | // ----------------------------------------------------------------------- |
149 | // PARSING HEX/OCTAL/C-RADIX NUMBERS |
150 | // |
151 | // By default, if you pass a pointer to a numeric value, the |
152 | // corresponding text is interpreted as a base-10 number. You can |
153 | // instead wrap the pointer with a call to one of the operators Hex(), |
154 | // Octal(), or CRadix() to interpret the text in another base. The |
155 | // CRadix operator interprets C-style "0" (base-8) and "0x" (base-16) |
156 | // prefixes, but defaults to base-10. |
157 | // |
158 | // Example: |
159 | // int a, b, c, d; |
160 | // CHECK(PCRE::FullMatch("100 40 0100 0x40", "(.*) (.*) (.*) (.*)", |
161 | // Octal(&a), Hex(&b), CRadix(&c), CRadix(&d)); |
162 | // will leave 64 in a, b, c, and d. |
163 | |
164 | #include "util/util.h" |
165 | #include "re2/stringpiece.h" |
166 | |
167 | #ifdef USEPCRE |
168 | #include <pcre.h> |
169 | namespace re2 { |
170 | const bool UsingPCRE = true; |
171 | } // namespace re2 |
172 | #else |
173 | struct pcre; // opaque |
174 | namespace re2 { |
175 | const bool UsingPCRE = false; |
176 | } // namespace re2 |
177 | #endif |
178 | |
179 | namespace re2 { |
180 | |
181 | class PCRE_Options; |
182 | |
183 | // Interface for regular expression matching. Also corresponds to a |
184 | // pre-compiled regular expression. An "PCRE" object is safe for |
185 | // concurrent use by multiple threads. |
186 | class PCRE { |
187 | public: |
188 | // We convert user-passed pointers into special Arg objects |
189 | class Arg; |
190 | |
191 | // Marks end of arg list. |
192 | // ONLY USE IN OPTIONAL ARG DEFAULTS. |
193 | // DO NOT PASS EXPLICITLY. |
194 | static Arg no_more_args; |
195 | |
196 | // Options are same value as those in pcre. We provide them here |
197 | // to avoid users needing to include pcre.h and also to isolate |
198 | // users from pcre should we change the underlying library. |
199 | // Only those needed by Google programs are exposed here to |
200 | // avoid collision with options employed internally by regexp.cc |
201 | // Note that some options have equivalents that can be specified in |
202 | // the regexp itself. For example, prefixing your regexp with |
203 | // "(?s)" has the same effect as the PCRE_DOTALL option. |
204 | enum Option { |
205 | None = 0x0000, |
206 | UTF8 = 0x0800, // == PCRE_UTF8 |
207 | EnabledCompileOptions = UTF8, |
208 | EnabledExecOptions = 0x0000, // TODO: use to replace anchor flag |
209 | }; |
210 | |
211 | // We provide implicit conversions from strings so that users can |
212 | // pass in a string or a "const char*" wherever an "PCRE" is expected. |
213 | PCRE(const char* pattern); |
214 | PCRE(const char* pattern, Option option); |
215 | PCRE(const std::string& pattern); |
216 | PCRE(const std::string& pattern, Option option); |
217 | PCRE(const char *pattern, const PCRE_Options& re_option); |
218 | PCRE(const std::string& pattern, const PCRE_Options& re_option); |
219 | |
220 | ~PCRE(); |
221 | |
222 | // The string specification for this PCRE. E.g. |
223 | // PCRE re("ab*c?d+"); |
224 | // re.pattern(); // "ab*c?d+" |
225 | const std::string& pattern() const { return pattern_; } |
226 | |
227 | // If PCRE could not be created properly, returns an error string. |
228 | // Else returns the empty string. |
229 | const std::string& error() const { return *error_; } |
230 | |
231 | // Whether the PCRE has hit a match limit during execution. |
232 | // Not thread safe. Intended only for testing. |
233 | // If hitting match limits is a problem, |
234 | // you should be using PCRE2 (re2/re2.h) |
235 | // instead of checking this flag. |
236 | bool HitLimit(); |
237 | void ClearHitLimit(); |
238 | |
239 | /***** The useful part: the matching interface *****/ |
240 | |
241 | // Matches "text" against "pattern". If pointer arguments are |
242 | // supplied, copies matched sub-patterns into them. |
243 | // |
244 | // You can pass in a "const char*" or a "std::string" for "text". |
245 | // You can pass in a "const char*" or a "std::string" or a "PCRE" for "pattern". |
246 | // |
247 | // The provided pointer arguments can be pointers to any scalar numeric |
248 | // type, or one of: |
249 | // std::string (matched piece is copied to string) |
250 | // StringPiece (StringPiece is mutated to point to matched piece) |
251 | // T (where "bool T::ParseFrom(const char*, size_t)" exists) |
252 | // (void*)NULL (the corresponding matched sub-pattern is not copied) |
253 | // |
254 | // Returns true iff all of the following conditions are satisfied: |
255 | // a. "text" matches "pattern" exactly |
256 | // b. The number of matched sub-patterns is >= number of supplied pointers |
257 | // c. The "i"th argument has a suitable type for holding the |
258 | // string captured as the "i"th sub-pattern. If you pass in |
259 | // NULL for the "i"th argument, or pass fewer arguments than |
260 | // number of sub-patterns, "i"th captured sub-pattern is |
261 | // ignored. |
262 | // |
263 | // CAVEAT: An optional sub-pattern that does not exist in the |
264 | // matched string is assigned the empty string. Therefore, the |
265 | // following will return false (because the empty string is not a |
266 | // valid number): |
267 | // int number; |
268 | // PCRE::FullMatch("abc", "[a-z]+(\\d+)?", &number); |
269 | struct FullMatchFunctor { |
270 | bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args |
271 | const Arg& ptr1 = no_more_args, |
272 | const Arg& ptr2 = no_more_args, |
273 | const Arg& ptr3 = no_more_args, |
274 | const Arg& ptr4 = no_more_args, |
275 | const Arg& ptr5 = no_more_args, |
276 | const Arg& ptr6 = no_more_args, |
277 | const Arg& ptr7 = no_more_args, |
278 | const Arg& ptr8 = no_more_args, |
279 | const Arg& ptr9 = no_more_args, |
280 | const Arg& ptr10 = no_more_args, |
281 | const Arg& ptr11 = no_more_args, |
282 | const Arg& ptr12 = no_more_args, |
283 | const Arg& ptr13 = no_more_args, |
284 | const Arg& ptr14 = no_more_args, |
285 | const Arg& ptr15 = no_more_args, |
286 | const Arg& ptr16 = no_more_args) const; |
287 | }; |
288 | |
289 | static const FullMatchFunctor FullMatch; |
290 | |
291 | // Exactly like FullMatch(), except that "pattern" is allowed to match |
292 | // a substring of "text". |
293 | struct PartialMatchFunctor { |
294 | bool operator ()(const StringPiece& text, const PCRE& re, // 3..16 args |
295 | const Arg& ptr1 = no_more_args, |
296 | const Arg& ptr2 = no_more_args, |
297 | const Arg& ptr3 = no_more_args, |
298 | const Arg& ptr4 = no_more_args, |
299 | const Arg& ptr5 = no_more_args, |
300 | const Arg& ptr6 = no_more_args, |
301 | const Arg& ptr7 = no_more_args, |
302 | const Arg& ptr8 = no_more_args, |
303 | const Arg& ptr9 = no_more_args, |
304 | const Arg& ptr10 = no_more_args, |
305 | const Arg& ptr11 = no_more_args, |
306 | const Arg& ptr12 = no_more_args, |
307 | const Arg& ptr13 = no_more_args, |
308 | const Arg& ptr14 = no_more_args, |
309 | const Arg& ptr15 = no_more_args, |
310 | const Arg& ptr16 = no_more_args) const; |
311 | }; |
312 | |
313 | static const PartialMatchFunctor PartialMatch; |
314 | |
315 | // Like FullMatch() and PartialMatch(), except that pattern has to |
316 | // match a prefix of "text", and "input" is advanced past the matched |
317 | // text. Note: "input" is modified iff this routine returns true. |
318 | struct ConsumeFunctor { |
319 | bool operator ()(StringPiece* input, const PCRE& pattern, // 3..16 args |
320 | const Arg& ptr1 = no_more_args, |
321 | const Arg& ptr2 = no_more_args, |
322 | const Arg& ptr3 = no_more_args, |
323 | const Arg& ptr4 = no_more_args, |
324 | const Arg& ptr5 = no_more_args, |
325 | const Arg& ptr6 = no_more_args, |
326 | const Arg& ptr7 = no_more_args, |
327 | const Arg& ptr8 = no_more_args, |
328 | const Arg& ptr9 = no_more_args, |
329 | const Arg& ptr10 = no_more_args, |
330 | const Arg& ptr11 = no_more_args, |
331 | const Arg& ptr12 = no_more_args, |
332 | const Arg& ptr13 = no_more_args, |
333 | const Arg& ptr14 = no_more_args, |
334 | const Arg& ptr15 = no_more_args, |
335 | const Arg& ptr16 = no_more_args) const; |
336 | }; |
337 | |
338 | static const ConsumeFunctor Consume; |
339 | |
340 | // Like Consume(..), but does not anchor the match at the beginning of the |
341 | // string. That is, "pattern" need not start its match at the beginning of |
342 | // "input". For example, "FindAndConsume(s, "(\\w+)", &word)" finds the next |
343 | // word in "s" and stores it in "word". |
344 | struct FindAndConsumeFunctor { |
345 | bool operator ()(StringPiece* input, const PCRE& pattern, |
346 | const Arg& ptr1 = no_more_args, |
347 | const Arg& ptr2 = no_more_args, |
348 | const Arg& ptr3 = no_more_args, |
349 | const Arg& ptr4 = no_more_args, |
350 | const Arg& ptr5 = no_more_args, |
351 | const Arg& ptr6 = no_more_args, |
352 | const Arg& ptr7 = no_more_args, |
353 | const Arg& ptr8 = no_more_args, |
354 | const Arg& ptr9 = no_more_args, |
355 | const Arg& ptr10 = no_more_args, |
356 | const Arg& ptr11 = no_more_args, |
357 | const Arg& ptr12 = no_more_args, |
358 | const Arg& ptr13 = no_more_args, |
359 | const Arg& ptr14 = no_more_args, |
360 | const Arg& ptr15 = no_more_args, |
361 | const Arg& ptr16 = no_more_args) const; |
362 | }; |
363 | |
364 | static const FindAndConsumeFunctor FindAndConsume; |
365 | |
366 | // Replace the first match of "pattern" in "str" with "rewrite". |
367 | // Within "rewrite", backslash-escaped digits (\1 to \9) can be |
368 | // used to insert text matching corresponding parenthesized group |
369 | // from the pattern. \0 in "rewrite" refers to the entire matching |
370 | // text. E.g., |
371 | // |
372 | // std::string s = "yabba dabba doo"; |
373 | // CHECK(PCRE::Replace(&s, "b+", "d")); |
374 | // |
375 | // will leave "s" containing "yada dabba doo" |
376 | // |
377 | // Returns true if the pattern matches and a replacement occurs, |
378 | // false otherwise. |
379 | static bool Replace(std::string *str, |
380 | const PCRE& pattern, |
381 | const StringPiece& rewrite); |
382 | |
383 | // Like Replace(), except replaces all occurrences of the pattern in |
384 | // the string with the rewrite. Replacements are not subject to |
385 | // re-matching. E.g., |
386 | // |
387 | // std::string s = "yabba dabba doo"; |
388 | // CHECK(PCRE::GlobalReplace(&s, "b+", "d")); |
389 | // |
390 | // will leave "s" containing "yada dada doo" |
391 | // |
392 | // Returns the number of replacements made. |
393 | static int GlobalReplace(std::string *str, |
394 | const PCRE& pattern, |
395 | const StringPiece& rewrite); |
396 | |
397 | // Like Replace, except that if the pattern matches, "rewrite" |
398 | // is copied into "out" with substitutions. The non-matching |
399 | // portions of "text" are ignored. |
400 | // |
401 | // Returns true iff a match occurred and the extraction happened |
402 | // successfully; if no match occurs, the string is left unaffected. |
403 | static bool (const StringPiece &text, |
404 | const PCRE& pattern, |
405 | const StringPiece &rewrite, |
406 | std::string *out); |
407 | |
408 | // Check that the given @p rewrite string is suitable for use with |
409 | // this PCRE. It checks that: |
410 | // * The PCRE has enough parenthesized subexpressions to satisfy all |
411 | // of the \N tokens in @p rewrite, and |
412 | // * The @p rewrite string doesn't have any syntax errors |
413 | // ('\' followed by anything besides [0-9] and '\'). |
414 | // Making this test will guarantee that "replace" and "extract" |
415 | // operations won't LOG(ERROR) or fail because of a bad rewrite |
416 | // string. |
417 | // @param rewrite The proposed rewrite string. |
418 | // @param error An error message is recorded here, iff we return false. |
419 | // Otherwise, it is unchanged. |
420 | // @return true, iff @p rewrite is suitable for use with the PCRE. |
421 | bool CheckRewriteString(const StringPiece& rewrite, |
422 | std::string* error) const; |
423 | |
424 | // Returns a copy of 'unquoted' with all potentially meaningful |
425 | // regexp characters backslash-escaped. The returned string, used |
426 | // as a regular expression, will exactly match the original string. |
427 | // For example, |
428 | // 1.5-2.0? |
429 | // becomes: |
430 | // 1\.5\-2\.0\? |
431 | static std::string QuoteMeta(const StringPiece& unquoted); |
432 | |
433 | /***** Generic matching interface (not so nice to use) *****/ |
434 | |
435 | // Type of match (TODO: Should be restructured as an Option) |
436 | enum Anchor { |
437 | UNANCHORED, // No anchoring |
438 | ANCHOR_START, // Anchor at start only |
439 | ANCHOR_BOTH, // Anchor at start and end |
440 | }; |
441 | |
442 | // General matching routine. Stores the length of the match in |
443 | // "*consumed" if successful. |
444 | bool DoMatch(const StringPiece& text, |
445 | Anchor anchor, |
446 | size_t* consumed, |
447 | const Arg* const* args, int n) const; |
448 | |
449 | // Return the number of capturing subpatterns, or -1 if the |
450 | // regexp wasn't valid on construction. |
451 | int NumberOfCapturingGroups() const; |
452 | |
453 | private: |
454 | void Init(const char* pattern, Option option, int match_limit, |
455 | int stack_limit, bool report_errors); |
456 | |
457 | // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with |
458 | // pairs of integers for the beginning and end positions of matched |
459 | // text. The first pair corresponds to the entire matched text; |
460 | // subsequent pairs correspond, in order, to parentheses-captured |
461 | // matches. Returns the number of pairs (one more than the number of |
462 | // the last subpattern with a match) if matching was successful |
463 | // and zero if the match failed. |
464 | // I.e. for PCRE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching |
465 | // against "foo", "bar", and "baz" respectively. |
466 | // When matching PCRE("(foo)|hello") against "hello", it will return 1. |
467 | // But the values for all subpattern are filled in into "vec". |
468 | int TryMatch(const StringPiece& text, |
469 | size_t startpos, |
470 | Anchor anchor, |
471 | bool empty_ok, |
472 | int *vec, |
473 | int vecsize) const; |
474 | |
475 | // Append the "rewrite" string, with backslash subsitutions from "text" |
476 | // and "vec", to string "out". |
477 | bool Rewrite(std::string *out, |
478 | const StringPiece &rewrite, |
479 | const StringPiece &text, |
480 | int *vec, |
481 | int veclen) const; |
482 | |
483 | // internal implementation for DoMatch |
484 | bool DoMatchImpl(const StringPiece& text, |
485 | Anchor anchor, |
486 | size_t* consumed, |
487 | const Arg* const args[], |
488 | int n, |
489 | int* vec, |
490 | int vecsize) const; |
491 | |
492 | // Compile the regexp for the specified anchoring mode |
493 | pcre* Compile(Anchor anchor); |
494 | |
495 | std::string pattern_; |
496 | Option options_; |
497 | pcre* re_full_; // For full matches |
498 | pcre* re_partial_; // For partial matches |
499 | const std::string* error_; // Error indicator (or empty string) |
500 | bool report_errors_; // Silences error logging if false |
501 | int match_limit_; // Limit on execution resources |
502 | int stack_limit_; // Limit on stack resources (bytes) |
503 | mutable int32_t hit_limit_; // Hit limit during execution (bool) |
504 | |
505 | PCRE(const PCRE&) = delete; |
506 | PCRE& operator=(const PCRE&) = delete; |
507 | }; |
508 | |
509 | // PCRE_Options allow you to set the PCRE::Options, plus any pcre |
510 | // "extra" options. The only extras are match_limit, which limits |
511 | // the CPU time of a match, and stack_limit, which limits the |
512 | // stack usage. Setting a limit to <= 0 lets PCRE pick a sensible default |
513 | // that should not cause too many problems in production code. |
514 | // If PCRE hits a limit during a match, it may return a false negative, |
515 | // but (hopefully) it won't crash. |
516 | // |
517 | // NOTE: If you are handling regular expressions specified by |
518 | // (external or internal) users, rather than hard-coded ones, |
519 | // you should be using PCRE2, which uses an alternate implementation |
520 | // that avoids these issues. See http://go/re2quick. |
521 | class PCRE_Options { |
522 | public: |
523 | // constructor |
524 | PCRE_Options() : option_(PCRE::None), match_limit_(0), stack_limit_(0), report_errors_(true) {} |
525 | // accessors |
526 | PCRE::Option option() const { return option_; } |
527 | void set_option(PCRE::Option option) { |
528 | option_ = option; |
529 | } |
530 | int match_limit() const { return match_limit_; } |
531 | void set_match_limit(int match_limit) { |
532 | match_limit_ = match_limit; |
533 | } |
534 | int stack_limit() const { return stack_limit_; } |
535 | void set_stack_limit(int stack_limit) { |
536 | stack_limit_ = stack_limit; |
537 | } |
538 | |
539 | // If the regular expression is malformed, an error message will be printed |
540 | // iff report_errors() is true. Default: true. |
541 | bool report_errors() const { return report_errors_; } |
542 | void set_report_errors(bool report_errors) { |
543 | report_errors_ = report_errors; |
544 | } |
545 | private: |
546 | PCRE::Option option_; |
547 | int match_limit_; |
548 | int stack_limit_; |
549 | bool report_errors_; |
550 | }; |
551 | |
552 | |
553 | /***** Implementation details *****/ |
554 | |
555 | // Hex/Octal/Binary? |
556 | |
557 | // Special class for parsing into objects that define a ParseFrom() method |
558 | template <class T> |
559 | class _PCRE_MatchObject { |
560 | public: |
561 | static inline bool Parse(const char* str, size_t n, void* dest) { |
562 | if (dest == NULL) return true; |
563 | T* object = reinterpret_cast<T*>(dest); |
564 | return object->ParseFrom(str, n); |
565 | } |
566 | }; |
567 | |
568 | class PCRE::Arg { |
569 | public: |
570 | // Empty constructor so we can declare arrays of PCRE::Arg |
571 | Arg(); |
572 | |
573 | // Constructor specially designed for NULL arguments |
574 | Arg(void*); |
575 | |
576 | typedef bool (*Parser)(const char* str, size_t n, void* dest); |
577 | |
578 | // Type-specific parsers |
579 | #define MAKE_PARSER(type, name) \ |
580 | Arg(type* p) : arg_(p), parser_(name) {} \ |
581 | Arg(type* p, Parser parser) : arg_(p), parser_(parser) {} |
582 | |
583 | MAKE_PARSER(char, parse_char); |
584 | MAKE_PARSER(signed char, parse_schar); |
585 | MAKE_PARSER(unsigned char, parse_uchar); |
586 | MAKE_PARSER(float, parse_float); |
587 | MAKE_PARSER(double, parse_double); |
588 | MAKE_PARSER(std::string, parse_string); |
589 | MAKE_PARSER(StringPiece, parse_stringpiece); |
590 | |
591 | MAKE_PARSER(short, parse_short); |
592 | MAKE_PARSER(unsigned short, parse_ushort); |
593 | MAKE_PARSER(int, parse_int); |
594 | MAKE_PARSER(unsigned int, parse_uint); |
595 | MAKE_PARSER(long, parse_long); |
596 | MAKE_PARSER(unsigned long, parse_ulong); |
597 | MAKE_PARSER(long long, parse_longlong); |
598 | MAKE_PARSER(unsigned long long, parse_ulonglong); |
599 | |
600 | #undef MAKE_PARSER |
601 | |
602 | // Generic constructor |
603 | template <class T> Arg(T*, Parser parser); |
604 | // Generic constructor template |
605 | template <class T> Arg(T* p) |
606 | : arg_(p), parser_(_PCRE_MatchObject<T>::Parse) { |
607 | } |
608 | |
609 | // Parse the data |
610 | bool Parse(const char* str, size_t n) const; |
611 | |
612 | private: |
613 | void* arg_; |
614 | Parser parser_; |
615 | |
616 | static bool parse_null (const char* str, size_t n, void* dest); |
617 | static bool parse_char (const char* str, size_t n, void* dest); |
618 | static bool parse_schar (const char* str, size_t n, void* dest); |
619 | static bool parse_uchar (const char* str, size_t n, void* dest); |
620 | static bool parse_float (const char* str, size_t n, void* dest); |
621 | static bool parse_double (const char* str, size_t n, void* dest); |
622 | static bool parse_string (const char* str, size_t n, void* dest); |
623 | static bool parse_stringpiece (const char* str, size_t n, void* dest); |
624 | |
625 | #define DECLARE_INTEGER_PARSER(name) \ |
626 | private: \ |
627 | static bool parse_##name(const char* str, size_t n, void* dest); \ |
628 | static bool parse_##name##_radix(const char* str, size_t n, void* dest, \ |
629 | int radix); \ |
630 | \ |
631 | public: \ |
632 | static bool parse_##name##_hex(const char* str, size_t n, void* dest); \ |
633 | static bool parse_##name##_octal(const char* str, size_t n, void* dest); \ |
634 | static bool parse_##name##_cradix(const char* str, size_t n, void* dest) |
635 | |
636 | DECLARE_INTEGER_PARSER(short); |
637 | DECLARE_INTEGER_PARSER(ushort); |
638 | DECLARE_INTEGER_PARSER(int); |
639 | DECLARE_INTEGER_PARSER(uint); |
640 | DECLARE_INTEGER_PARSER(long); |
641 | DECLARE_INTEGER_PARSER(ulong); |
642 | DECLARE_INTEGER_PARSER(longlong); |
643 | DECLARE_INTEGER_PARSER(ulonglong); |
644 | |
645 | #undef DECLARE_INTEGER_PARSER |
646 | |
647 | }; |
648 | |
649 | inline PCRE::Arg::Arg() : arg_(NULL), parser_(parse_null) { } |
650 | inline PCRE::Arg::Arg(void* p) : arg_(p), parser_(parse_null) { } |
651 | |
652 | inline bool PCRE::Arg::Parse(const char* str, size_t n) const { |
653 | return (*parser_)(str, n, arg_); |
654 | } |
655 | |
656 | // This part of the parser, appropriate only for ints, deals with bases |
657 | #define MAKE_INTEGER_PARSER(type, name) \ |
658 | inline PCRE::Arg Hex(type* ptr) { \ |
659 | return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_hex); \ |
660 | } \ |
661 | inline PCRE::Arg Octal(type* ptr) { \ |
662 | return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_octal); \ |
663 | } \ |
664 | inline PCRE::Arg CRadix(type* ptr) { \ |
665 | return PCRE::Arg(ptr, PCRE::Arg::parse_##name##_cradix); \ |
666 | } |
667 | |
668 | MAKE_INTEGER_PARSER(short, short); |
669 | MAKE_INTEGER_PARSER(unsigned short, ushort); |
670 | MAKE_INTEGER_PARSER(int, int); |
671 | MAKE_INTEGER_PARSER(unsigned int, uint); |
672 | MAKE_INTEGER_PARSER(long, long); |
673 | MAKE_INTEGER_PARSER(unsigned long, ulong); |
674 | MAKE_INTEGER_PARSER(long long, longlong); |
675 | MAKE_INTEGER_PARSER(unsigned long long, ulonglong); |
676 | |
677 | #undef MAKE_INTEGER_PARSER |
678 | |
679 | } // namespace re2 |
680 | |
681 | #endif // UTIL_PCRE_H_ |
682 | |