1 | // Copyright 2008 The RE2 Authors. All Rights Reserved. |
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | // Exhaustive testing of regular expression matching. |
6 | |
7 | // Each test picks an alphabet (e.g., "abc"), a maximum string length, |
8 | // a maximum regular expression length, and a maximum number of letters |
9 | // that can appear in the regular expression. Given these parameters, |
10 | // it tries every possible regular expression and string, verifying that |
11 | // the NFA, DFA, and a trivial backtracking implementation agree about |
12 | // the location of the match. |
13 | |
14 | #include <stdio.h> |
15 | |
16 | #include "util/test.h" |
17 | #include "util/flags.h" |
18 | #include "util/logging.h" |
19 | #include "util/strutil.h" |
20 | #include "re2/testing/exhaustive_tester.h" |
21 | #include "re2/testing/tester.h" |
22 | |
23 | // For target `log' in the Makefile. |
24 | #ifndef LOGGING |
25 | #define LOGGING 0 |
26 | #endif |
27 | |
28 | DEFINE_FLAG(bool, show_regexps, false, "show regexps during testing" ); |
29 | |
30 | DEFINE_FLAG(int, max_bad_regexp_inputs, 1, |
31 | "Stop testing a regular expression after finding this many " |
32 | "strings that break it." ); |
33 | |
34 | namespace re2 { |
35 | |
36 | static char* escape(const StringPiece& sp) { |
37 | static char buf[512]; |
38 | char* p = buf; |
39 | *p++ = '\"'; |
40 | for (size_t i = 0; i < sp.size(); i++) { |
41 | if(p+5 >= buf+sizeof buf) |
42 | LOG(FATAL) << "ExhaustiveTester escape: too long" ; |
43 | if(sp[i] == '\\' || sp[i] == '\"') { |
44 | *p++ = '\\'; |
45 | *p++ = sp[i]; |
46 | } else if(sp[i] == '\n') { |
47 | *p++ = '\\'; |
48 | *p++ = 'n'; |
49 | } else { |
50 | *p++ = sp[i]; |
51 | } |
52 | } |
53 | *p++ = '\"'; |
54 | *p = '\0'; |
55 | return buf; |
56 | } |
57 | |
58 | static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) { |
59 | if (!re.Match(input, 0, input.size(), anchor, m, n)) { |
60 | printf("-" ); |
61 | return; |
62 | } |
63 | for (int i = 0; i < n; i++) { |
64 | if (i > 0) |
65 | printf(" " ); |
66 | if (m[i].data() == NULL) |
67 | printf("-" ); |
68 | else |
69 | printf("%td-%td" , |
70 | m[i].begin() - input.begin(), |
71 | m[i].end() - input.begin()); |
72 | } |
73 | } |
74 | |
75 | // Processes a single generated regexp. |
76 | // Compiles it using Regexp interface and PCRE, and then |
77 | // checks that NFA, DFA, and PCRE all return the same results. |
78 | void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) { |
79 | regexps_++; |
80 | std::string regexp = const_regexp; |
81 | if (!topwrapper_.empty()) { |
82 | regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str()); |
83 | } |
84 | |
85 | if (GetFlag(FLAGS_show_regexps)) { |
86 | printf("\r%s" , regexp.c_str()); |
87 | fflush(stdout); |
88 | } |
89 | |
90 | if (LOGGING) { |
91 | // Write out test cases and answers for use in testing |
92 | // other implementations, such as Go's regexp package. |
93 | if (randomstrings_) |
94 | LOG(ERROR) << "Cannot log with random strings." ; |
95 | if (regexps_ == 1) { // first |
96 | printf("strings\n" ); |
97 | strgen_.Reset(); |
98 | while (strgen_.HasNext()) |
99 | printf("%s\n" , escape(strgen_.Next())); |
100 | printf("regexps\n" ); |
101 | } |
102 | printf("%s\n" , escape(regexp)); |
103 | |
104 | RE2 re(regexp); |
105 | RE2::Options longest; |
106 | longest.set_longest_match(true); |
107 | RE2 relongest(regexp, longest); |
108 | int ngroup = re.NumberOfCapturingGroups()+1; |
109 | StringPiece* group = new StringPiece[ngroup]; |
110 | |
111 | strgen_.Reset(); |
112 | while (strgen_.HasNext()) { |
113 | StringPiece input = strgen_.Next(); |
114 | PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup); |
115 | printf(";" ); |
116 | PrintResult(re, input, RE2::UNANCHORED, group, ngroup); |
117 | printf(";" ); |
118 | PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup); |
119 | printf(";" ); |
120 | PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup); |
121 | printf("\n" ); |
122 | } |
123 | delete[] group; |
124 | return; |
125 | } |
126 | |
127 | Tester tester(regexp); |
128 | if (tester.error()) |
129 | return; |
130 | |
131 | strgen_.Reset(); |
132 | strgen_.GenerateNULL(); |
133 | if (randomstrings_) |
134 | strgen_.Random(stringseed_, stringcount_); |
135 | int bad_inputs = 0; |
136 | while (strgen_.HasNext()) { |
137 | tests_++; |
138 | if (!tester.TestInput(strgen_.Next())) { |
139 | failures_++; |
140 | if (++bad_inputs >= GetFlag(FLAGS_max_bad_regexp_inputs)) |
141 | break; |
142 | } |
143 | } |
144 | } |
145 | |
146 | // Runs an exhaustive test on the given parameters. |
147 | void ExhaustiveTest(int maxatoms, int maxops, |
148 | const std::vector<std::string>& alphabet, |
149 | const std::vector<std::string>& ops, |
150 | int maxstrlen, |
151 | const std::vector<std::string>& stralphabet, |
152 | const std::string& wrapper, |
153 | const std::string& topwrapper) { |
154 | if (RE2_DEBUG_MODE) { |
155 | if (maxatoms > 1) |
156 | maxatoms--; |
157 | if (maxops > 1) |
158 | maxops--; |
159 | if (maxstrlen > 1) |
160 | maxstrlen--; |
161 | } |
162 | ExhaustiveTester t(maxatoms, maxops, alphabet, ops, |
163 | maxstrlen, stralphabet, wrapper, |
164 | topwrapper); |
165 | t.Generate(); |
166 | if (!LOGGING) { |
167 | printf("%d regexps, %d tests, %d failures [%d/%d str]\n" , |
168 | t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size()); |
169 | } |
170 | EXPECT_EQ(0, t.failures()); |
171 | } |
172 | |
173 | // Runs an exhaustive test using the given parameters and |
174 | // the basic egrep operators. |
175 | void EgrepTest(int maxatoms, int maxops, const std::string& alphabet, |
176 | int maxstrlen, const std::string& stralphabet, |
177 | const std::string& wrapper) { |
178 | const char* tops[] = { "" , "^(?:%s)" , "(?:%s)$" , "^(?:%s)$" }; |
179 | |
180 | for (size_t i = 0; i < arraysize(tops); i++) { |
181 | ExhaustiveTest(maxatoms, maxops, |
182 | Split("" , alphabet), |
183 | RegexpGenerator::EgrepOps(), |
184 | maxstrlen, |
185 | Split("" , stralphabet), |
186 | wrapper, |
187 | tops[i]); |
188 | } |
189 | } |
190 | |
191 | } // namespace re2 |
192 | |