1 | // Copyright 2008 The RE2 Authors. All Rights Reserved. |
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | // Random testing of regular expression matching. |
6 | |
7 | #include <stdio.h> |
8 | #include <string> |
9 | #include <vector> |
10 | |
11 | #include "util/test.h" |
12 | #include "util/flags.h" |
13 | #include "re2/testing/exhaustive_tester.h" |
14 | |
15 | DEFINE_FLAG(int, regexpseed, 404, "Random regexp seed." ); |
16 | DEFINE_FLAG(int, regexpcount, 100, "How many random regexps to generate." ); |
17 | DEFINE_FLAG(int, stringseed, 200, "Random string seed." ); |
18 | DEFINE_FLAG(int, stringcount, 100, "How many random strings to generate." ); |
19 | |
20 | namespace re2 { |
21 | |
22 | // Runs a random test on the given parameters. |
23 | // (Always uses the same random seeds for reproducibility. |
24 | // Can give different seeds on command line.) |
25 | static void RandomTest(int maxatoms, int maxops, |
26 | const std::vector<std::string>& alphabet, |
27 | const std::vector<std::string>& ops, |
28 | int maxstrlen, |
29 | const std::vector<std::string>& stralphabet, |
30 | const std::string& wrapper) { |
31 | // Limit to smaller test cases in debug mode, |
32 | // because everything is so much slower. |
33 | if (RE2_DEBUG_MODE) { |
34 | maxatoms--; |
35 | maxops--; |
36 | maxstrlen /= 2; |
37 | } |
38 | |
39 | ExhaustiveTester t(maxatoms, maxops, alphabet, ops, |
40 | maxstrlen, stralphabet, wrapper, "" ); |
41 | t.RandomStrings(GetFlag(FLAGS_stringseed), |
42 | GetFlag(FLAGS_stringcount)); |
43 | t.GenerateRandom(GetFlag(FLAGS_regexpseed), |
44 | GetFlag(FLAGS_regexpcount)); |
45 | printf("%d regexps, %d tests, %d failures [%d/%d str]\n" , |
46 | t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size()); |
47 | EXPECT_EQ(0, t.failures()); |
48 | } |
49 | |
50 | // Tests random small regexps involving literals and egrep operators. |
51 | TEST(Random, SmallEgrepLiterals) { |
52 | RandomTest(5, 5, Explode("abc." ), RegexpGenerator::EgrepOps(), |
53 | 15, Explode("abc" ), |
54 | "" ); |
55 | } |
56 | |
57 | // Tests random bigger regexps involving literals and egrep operators. |
58 | TEST(Random, BigEgrepLiterals) { |
59 | RandomTest(10, 10, Explode("abc." ), RegexpGenerator::EgrepOps(), |
60 | 15, Explode("abc" ), |
61 | "" ); |
62 | } |
63 | |
64 | // Tests random small regexps involving literals, capturing parens, |
65 | // and egrep operators. |
66 | TEST(Random, SmallEgrepCaptures) { |
67 | RandomTest(5, 5, Split(" " , "a (b) ." ), RegexpGenerator::EgrepOps(), |
68 | 15, Explode("abc" ), |
69 | "" ); |
70 | } |
71 | |
72 | // Tests random bigger regexps involving literals, capturing parens, |
73 | // and egrep operators. |
74 | TEST(Random, BigEgrepCaptures) { |
75 | RandomTest(10, 10, Split(" " , "a (b) ." ), RegexpGenerator::EgrepOps(), |
76 | 15, Explode("abc" ), |
77 | "" ); |
78 | } |
79 | |
80 | // Tests random large complicated expressions, using all the possible |
81 | // operators, some literals, some parenthesized literals, and predefined |
82 | // character classes like \d. (Adding larger character classes would |
83 | // make for too many possibilities.) |
84 | TEST(Random, Complicated) { |
85 | std::vector<std::string> ops = Split(" " , |
86 | "%s%s %s|%s %s* %s*? %s+ %s+? %s? %s?? " |
87 | "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} %s{1,2} " |
88 | "%s{2} %s{2,} %s{3,4} %s{4,5}" ); |
89 | |
90 | // Use (?:\b) and (?:\B) instead of \b and \B, |
91 | // because PCRE rejects \b* but accepts (?:\b)*. |
92 | // Ditto ^ and $. |
93 | std::vector<std::string> atoms = Split(" " , |
94 | ". (?:^) (?:$) \\a \\f \\n \\r \\t \\v " |
95 | "\\d \\D \\s \\S \\w \\W (?:\\b) (?:\\B) " |
96 | "a (a) b c - \\\\" ); |
97 | std::vector<std::string> alphabet = Explode("abc123\001\002\003\t\r\n\v\f\a" ); |
98 | RandomTest(10, 10, atoms, ops, 20, alphabet, "" ); |
99 | } |
100 | |
101 | } // namespace re2 |
102 | |