1 | // Copyright 2008 The RE2 Authors. All Rights Reserved. |
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | // Regular expression engine tester -- test all the implementations against each other. |
6 | |
7 | #include <stddef.h> |
8 | #include <stdint.h> |
9 | #include <string.h> |
10 | #include <string> |
11 | |
12 | #include "util/util.h" |
13 | #include "util/flags.h" |
14 | #include "util/logging.h" |
15 | #include "util/strutil.h" |
16 | #include "re2/testing/tester.h" |
17 | #include "re2/prog.h" |
18 | #include "re2/re2.h" |
19 | #include "re2/regexp.h" |
20 | |
21 | DEFINE_FLAG(bool, dump_prog, false, "dump regexp program" ); |
22 | DEFINE_FLAG(bool, log_okay, false, "log successful runs" ); |
23 | DEFINE_FLAG(bool, dump_rprog, false, "dump reversed regexp program" ); |
24 | |
25 | DEFINE_FLAG(int, max_regexp_failures, 100, |
26 | "maximum number of regexp test failures (-1 = unlimited)" ); |
27 | |
28 | DEFINE_FLAG(std::string, regexp_engines, "" , |
29 | "pattern to select regexp engines to test" ); |
30 | |
31 | namespace re2 { |
32 | |
33 | enum { |
34 | kMaxSubmatch = 1+16, // $0...$16 |
35 | }; |
36 | |
37 | const char* engine_names[kEngineMax] = { |
38 | "Backtrack" , |
39 | "NFA" , |
40 | "DFA" , |
41 | "DFA1" , |
42 | "OnePass" , |
43 | "BitState" , |
44 | "RE2" , |
45 | "RE2a" , |
46 | "RE2b" , |
47 | "PCRE" , |
48 | }; |
49 | |
50 | // Returns the name of the engine. |
51 | static const char* EngineName(Engine e) { |
52 | CHECK_GE(e, 0); |
53 | CHECK_LT(e, arraysize(engine_names)); |
54 | CHECK(engine_names[e] != NULL); |
55 | return engine_names[e]; |
56 | } |
57 | |
58 | // Returns bit mask of engines to use. |
59 | static uint32_t Engines() { |
60 | static bool did_parse = false; |
61 | static uint32_t cached_engines = 0; |
62 | |
63 | if (did_parse) |
64 | return cached_engines; |
65 | |
66 | if (GetFlag(FLAGS_regexp_engines).empty()) { |
67 | cached_engines = ~0; |
68 | } else { |
69 | for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) |
70 | if (GetFlag(FLAGS_regexp_engines).find(EngineName(i)) != std::string::npos) |
71 | cached_engines |= 1<<i; |
72 | } |
73 | |
74 | if (cached_engines == 0) |
75 | LOG(INFO) << "Warning: no engines enabled." ; |
76 | if (!UsingPCRE) |
77 | cached_engines &= ~(1<<kEnginePCRE); |
78 | for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) { |
79 | if (cached_engines & (1<<i)) |
80 | LOG(INFO) << EngineName(i) << " enabled" ; |
81 | } |
82 | |
83 | did_parse = true; |
84 | return cached_engines; |
85 | } |
86 | |
87 | // The result of running a match. |
88 | struct TestInstance::Result { |
89 | bool skipped; // test skipped: wasn't applicable |
90 | bool matched; // found a match |
91 | bool untrusted; // don't really trust the answer |
92 | bool have_submatch; // computed all submatch info |
93 | bool have_submatch0; // computed just submatch[0] |
94 | StringPiece submatch[kMaxSubmatch]; |
95 | }; |
96 | |
97 | typedef TestInstance::Result Result; |
98 | |
99 | // Formats a single capture range s in text in the form (a,b) |
100 | // where a and b are the starting and ending offsets of s in text. |
101 | static std::string FormatCapture(const StringPiece& text, |
102 | const StringPiece& s) { |
103 | if (s.data() == NULL) |
104 | return "(?,?)" ; |
105 | return StringPrintf("(%td,%td)" , |
106 | s.begin() - text.begin(), |
107 | s.end() - text.begin()); |
108 | } |
109 | |
110 | // Returns whether text contains non-ASCII (>= 0x80) bytes. |
111 | static bool NonASCII(const StringPiece& text) { |
112 | for (size_t i = 0; i < text.size(); i++) |
113 | if ((uint8_t)text[i] >= 0x80) |
114 | return true; |
115 | return false; |
116 | } |
117 | |
118 | // Returns string representation of match kind. |
119 | static std::string FormatKind(Prog::MatchKind kind) { |
120 | switch (kind) { |
121 | case Prog::kFullMatch: |
122 | return "full match" ; |
123 | case Prog::kLongestMatch: |
124 | return "longest match" ; |
125 | case Prog::kFirstMatch: |
126 | return "first match" ; |
127 | case Prog::kManyMatch: |
128 | return "many match" ; |
129 | } |
130 | return "???" ; |
131 | } |
132 | |
133 | // Returns string representation of anchor kind. |
134 | static std::string FormatAnchor(Prog::Anchor anchor) { |
135 | switch (anchor) { |
136 | case Prog::kAnchored: |
137 | return "anchored" ; |
138 | case Prog::kUnanchored: |
139 | return "unanchored" ; |
140 | } |
141 | return "???" ; |
142 | } |
143 | |
144 | struct ParseMode { |
145 | Regexp::ParseFlags parse_flags; |
146 | std::string desc; |
147 | }; |
148 | |
149 | static const Regexp::ParseFlags single_line = |
150 | Regexp::LikePerl; |
151 | static const Regexp::ParseFlags multi_line = |
152 | static_cast<Regexp::ParseFlags>(Regexp::LikePerl & ~Regexp::OneLine); |
153 | |
154 | static ParseMode parse_modes[] = { |
155 | { single_line, "single-line" }, |
156 | { single_line|Regexp::Latin1, "single-line, latin1" }, |
157 | { multi_line, "multiline" }, |
158 | { multi_line|Regexp::NonGreedy, "multiline, nongreedy" }, |
159 | { multi_line|Regexp::Latin1, "multiline, latin1" }, |
160 | }; |
161 | |
162 | static std::string FormatMode(Regexp::ParseFlags flags) { |
163 | for (size_t i = 0; i < arraysize(parse_modes); i++) |
164 | if (parse_modes[i].parse_flags == flags) |
165 | return parse_modes[i].desc; |
166 | return StringPrintf("%#x" , static_cast<uint32_t>(flags)); |
167 | } |
168 | |
169 | // Constructs and saves all the matching engines that |
170 | // will be required for the given tests. |
171 | TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind, |
172 | Regexp::ParseFlags flags) |
173 | : regexp_str_(regexp_str), |
174 | kind_(kind), |
175 | flags_(flags), |
176 | error_(false), |
177 | regexp_(NULL), |
178 | num_captures_(0), |
179 | prog_(NULL), |
180 | rprog_(NULL), |
181 | re_(NULL), |
182 | re2_(NULL) { |
183 | |
184 | VLOG(1) << CEscape(regexp_str); |
185 | |
186 | // Compile regexp to prog. |
187 | // Always required - needed for backtracking (reference implementation). |
188 | RegexpStatus status; |
189 | regexp_ = Regexp::Parse(regexp_str, flags, &status); |
190 | if (regexp_ == NULL) { |
191 | LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_) |
192 | << " mode: " << FormatMode(flags); |
193 | error_ = true; |
194 | return; |
195 | } |
196 | num_captures_ = regexp_->NumCaptures(); |
197 | prog_ = regexp_->CompileToProg(0); |
198 | if (prog_ == NULL) { |
199 | LOG(INFO) << "Cannot compile: " << CEscape(regexp_str_); |
200 | error_ = true; |
201 | return; |
202 | } |
203 | if (GetFlag(FLAGS_dump_prog)) { |
204 | LOG(INFO) << "Prog for " |
205 | << " regexp " |
206 | << CEscape(regexp_str_) |
207 | << " (" << FormatKind(kind_) |
208 | << ", " << FormatMode(flags_) |
209 | << ")\n" |
210 | << prog_->Dump(); |
211 | } |
212 | |
213 | // Compile regexp to reversed prog. Only needed for DFA engines. |
214 | if (Engines() & ((1<<kEngineDFA)|(1<<kEngineDFA1))) { |
215 | rprog_ = regexp_->CompileToReverseProg(0); |
216 | if (rprog_ == NULL) { |
217 | LOG(INFO) << "Cannot reverse compile: " << CEscape(regexp_str_); |
218 | error_ = true; |
219 | return; |
220 | } |
221 | if (GetFlag(FLAGS_dump_rprog)) |
222 | LOG(INFO) << rprog_->Dump(); |
223 | } |
224 | |
225 | // Create re string that will be used for RE and RE2. |
226 | std::string re = std::string(regexp_str); |
227 | // Accomodate flags. |
228 | // Regexp::Latin1 will be accomodated below. |
229 | if (!(flags & Regexp::OneLine)) |
230 | re = "(?m)" + re; |
231 | if (flags & Regexp::NonGreedy) |
232 | re = "(?U)" + re; |
233 | if (flags & Regexp::DotNL) |
234 | re = "(?s)" + re; |
235 | |
236 | // Compile regexp to RE2. |
237 | if (Engines() & ((1<<kEngineRE2)|(1<<kEngineRE2a)|(1<<kEngineRE2b))) { |
238 | RE2::Options options; |
239 | if (flags & Regexp::Latin1) |
240 | options.set_encoding(RE2::Options::EncodingLatin1); |
241 | if (kind_ == Prog::kLongestMatch) |
242 | options.set_longest_match(true); |
243 | re2_ = new RE2(re, options); |
244 | if (!re2_->error().empty()) { |
245 | LOG(INFO) << "Cannot RE2: " << CEscape(re); |
246 | error_ = true; |
247 | return; |
248 | } |
249 | } |
250 | |
251 | // Compile regexp to RE. |
252 | // PCRE as exposed by the RE interface isn't always usable. |
253 | // 1. It disagrees about handling of empty-string reptitions |
254 | // like matching (a*)* against "b". PCRE treats the (a*) as |
255 | // occurring once, while we treat it as occurring not at all. |
256 | // 2. It treats $ as this weird thing meaning end of string |
257 | // or before the \n at the end of the string. |
258 | // 3. It doesn't implement POSIX leftmost-longest matching. |
259 | // 4. It lets \s match vertical tab. |
260 | // MimicsPCRE() detects 1 and 2. |
261 | if ((Engines() & (1<<kEnginePCRE)) && regexp_->MimicsPCRE() && |
262 | kind_ != Prog::kLongestMatch) { |
263 | PCRE_Options o; |
264 | o.set_option(PCRE::UTF8); |
265 | if (flags & Regexp::Latin1) |
266 | o.set_option(PCRE::None); |
267 | // PCRE has interface bug keeping us from finding $0, so |
268 | // add one more layer of parens. |
269 | re_ = new PCRE("(" +re+")" , o); |
270 | if (!re_->error().empty()) { |
271 | LOG(INFO) << "Cannot PCRE: " << CEscape(re); |
272 | error_ = true; |
273 | return; |
274 | } |
275 | } |
276 | } |
277 | |
278 | TestInstance::~TestInstance() { |
279 | if (regexp_) |
280 | regexp_->Decref(); |
281 | delete prog_; |
282 | delete rprog_; |
283 | delete re_; |
284 | delete re2_; |
285 | } |
286 | |
287 | // Runs a single search using the named engine type. |
288 | // This interface hides all the irregularities of the various |
289 | // engine interfaces from the rest of this file. |
290 | void TestInstance::RunSearch(Engine type, |
291 | const StringPiece& orig_text, |
292 | const StringPiece& orig_context, |
293 | Prog::Anchor anchor, |
294 | Result* result) { |
295 | // Result is not trivial, so we cannot freely clear it with memset(3), |
296 | // but zeroing objects like so is safe and expedient for our purposes. |
297 | memset(reinterpret_cast<void*>(result), 0, sizeof *result); |
298 | if (regexp_ == NULL) { |
299 | result->skipped = true; |
300 | return; |
301 | } |
302 | int nsubmatch = 1 + num_captures_; // NumCaptures doesn't count $0 |
303 | if (nsubmatch > kMaxSubmatch) |
304 | nsubmatch = kMaxSubmatch; |
305 | |
306 | StringPiece text = orig_text; |
307 | StringPiece context = orig_context; |
308 | |
309 | switch (type) { |
310 | default: |
311 | LOG(FATAL) << "Bad RunSearch type: " << (int)type; |
312 | |
313 | case kEngineBacktrack: |
314 | if (prog_ == NULL) { |
315 | result->skipped = true; |
316 | break; |
317 | } |
318 | result->matched = |
319 | prog_->UnsafeSearchBacktrack(text, context, anchor, kind_, |
320 | result->submatch, nsubmatch); |
321 | result->have_submatch = true; |
322 | break; |
323 | |
324 | case kEngineNFA: |
325 | if (prog_ == NULL) { |
326 | result->skipped = true; |
327 | break; |
328 | } |
329 | result->matched = |
330 | prog_->SearchNFA(text, context, anchor, kind_, |
331 | result->submatch, nsubmatch); |
332 | result->have_submatch = true; |
333 | break; |
334 | |
335 | case kEngineDFA: |
336 | if (prog_ == NULL) { |
337 | result->skipped = true; |
338 | break; |
339 | } |
340 | result->matched = prog_->SearchDFA(text, context, anchor, kind_, NULL, |
341 | &result->skipped, NULL); |
342 | break; |
343 | |
344 | case kEngineDFA1: |
345 | if (prog_ == NULL || rprog_ == NULL) { |
346 | result->skipped = true; |
347 | break; |
348 | } |
349 | result->matched = |
350 | prog_->SearchDFA(text, context, anchor, kind_, result->submatch, |
351 | &result->skipped, NULL); |
352 | // If anchored, no need for second run, |
353 | // but do it anyway to find more bugs. |
354 | if (result->matched) { |
355 | if (!rprog_->SearchDFA(result->submatch[0], context, |
356 | Prog::kAnchored, Prog::kLongestMatch, |
357 | result->submatch, |
358 | &result->skipped, NULL)) { |
359 | LOG(ERROR) << "Reverse DFA inconsistency: " |
360 | << CEscape(regexp_str_) |
361 | << " on " << CEscape(text); |
362 | result->matched = false; |
363 | } |
364 | } |
365 | result->have_submatch0 = true; |
366 | break; |
367 | |
368 | case kEngineOnePass: |
369 | if (prog_ == NULL || |
370 | !prog_->IsOnePass() || |
371 | anchor == Prog::kUnanchored || |
372 | nsubmatch > Prog::kMaxOnePassCapture) { |
373 | result->skipped = true; |
374 | break; |
375 | } |
376 | result->matched = prog_->SearchOnePass(text, context, anchor, kind_, |
377 | result->submatch, nsubmatch); |
378 | result->have_submatch = true; |
379 | break; |
380 | |
381 | case kEngineBitState: |
382 | if (prog_ == NULL || |
383 | !prog_->CanBitState()) { |
384 | result->skipped = true; |
385 | break; |
386 | } |
387 | result->matched = prog_->SearchBitState(text, context, anchor, kind_, |
388 | result->submatch, nsubmatch); |
389 | result->have_submatch = true; |
390 | break; |
391 | |
392 | case kEngineRE2: |
393 | case kEngineRE2a: |
394 | case kEngineRE2b: { |
395 | if (!re2_ || text.end() != context.end()) { |
396 | result->skipped = true; |
397 | break; |
398 | } |
399 | |
400 | RE2::Anchor re_anchor; |
401 | if (anchor == Prog::kAnchored) |
402 | re_anchor = RE2::ANCHOR_START; |
403 | else |
404 | re_anchor = RE2::UNANCHORED; |
405 | if (kind_ == Prog::kFullMatch) |
406 | re_anchor = RE2::ANCHOR_BOTH; |
407 | |
408 | result->matched = re2_->Match( |
409 | context, |
410 | static_cast<size_t>(text.begin() - context.begin()), |
411 | static_cast<size_t>(text.end() - context.begin()), |
412 | re_anchor, |
413 | result->submatch, |
414 | nsubmatch); |
415 | result->have_submatch = nsubmatch > 0; |
416 | break; |
417 | } |
418 | |
419 | case kEnginePCRE: { |
420 | if (!re_ || text.begin() != context.begin() || |
421 | text.end() != context.end()) { |
422 | result->skipped = true; |
423 | break; |
424 | } |
425 | |
426 | // In Perl/PCRE, \v matches any character considered vertical |
427 | // whitespace, not just vertical tab. Regexp::MimicsPCRE() is |
428 | // unable to handle all cases of this, unfortunately, so just |
429 | // catch them here. :( |
430 | if (regexp_str_.find("\\v" ) != StringPiece::npos && |
431 | (text.find('\n') != StringPiece::npos || |
432 | text.find('\f') != StringPiece::npos || |
433 | text.find('\r') != StringPiece::npos)) { |
434 | result->skipped = true; |
435 | break; |
436 | } |
437 | |
438 | // PCRE 8.34 or so started allowing vertical tab to match \s, |
439 | // following a change made in Perl 5.18. RE2 does not. |
440 | if ((regexp_str_.find("\\s" ) != StringPiece::npos || |
441 | regexp_str_.find("\\S" ) != StringPiece::npos) && |
442 | text.find('\v') != StringPiece::npos) { |
443 | result->skipped = true; |
444 | break; |
445 | } |
446 | |
447 | const PCRE::Arg **argptr = new const PCRE::Arg*[nsubmatch]; |
448 | PCRE::Arg *a = new PCRE::Arg[nsubmatch]; |
449 | for (int i = 0; i < nsubmatch; i++) { |
450 | a[i] = PCRE::Arg(&result->submatch[i]); |
451 | argptr[i] = &a[i]; |
452 | } |
453 | size_t consumed; |
454 | PCRE::Anchor pcre_anchor; |
455 | if (anchor == Prog::kAnchored) |
456 | pcre_anchor = PCRE::ANCHOR_START; |
457 | else |
458 | pcre_anchor = PCRE::UNANCHORED; |
459 | if (kind_ == Prog::kFullMatch) |
460 | pcre_anchor = PCRE::ANCHOR_BOTH; |
461 | re_->ClearHitLimit(); |
462 | result->matched = |
463 | re_->DoMatch(text, |
464 | pcre_anchor, |
465 | &consumed, |
466 | argptr, nsubmatch); |
467 | if (re_->HitLimit()) { |
468 | result->untrusted = true; |
469 | delete[] argptr; |
470 | delete[] a; |
471 | break; |
472 | } |
473 | result->have_submatch = true; |
474 | delete[] argptr; |
475 | delete[] a; |
476 | break; |
477 | } |
478 | } |
479 | |
480 | if (!result->matched) |
481 | memset(result->submatch, 0, sizeof result->submatch); |
482 | } |
483 | |
484 | // Checks whether r is okay given that correct is the right answer. |
485 | // Specifically, r's answers have to match (but it doesn't have to |
486 | // claim to have all the answers). |
487 | static bool ResultOkay(const Result& r, const Result& correct) { |
488 | if (r.skipped) |
489 | return true; |
490 | if (r.matched != correct.matched) |
491 | return false; |
492 | if (r.have_submatch || r.have_submatch0) { |
493 | for (int i = 0; i < kMaxSubmatch; i++) { |
494 | if (correct.submatch[i].data() != r.submatch[i].data() || |
495 | correct.submatch[i].size() != r.submatch[i].size()) |
496 | return false; |
497 | if (!r.have_submatch) |
498 | break; |
499 | } |
500 | } |
501 | return true; |
502 | } |
503 | |
504 | // Runs a single test. |
505 | bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context, |
506 | Prog::Anchor anchor) { |
507 | // Backtracking is the gold standard. |
508 | Result correct; |
509 | RunSearch(kEngineBacktrack, text, context, anchor, &correct); |
510 | if (correct.skipped) { |
511 | if (regexp_ == NULL) |
512 | return true; |
513 | LOG(ERROR) << "Skipped backtracking! " << CEscape(regexp_str_) |
514 | << " " << FormatMode(flags_); |
515 | return false; |
516 | } |
517 | VLOG(1) << "Try: regexp " << CEscape(regexp_str_) |
518 | << " text " << CEscape(text) |
519 | << " (" << FormatKind(kind_) |
520 | << ", " << FormatAnchor(anchor) |
521 | << ", " << FormatMode(flags_) |
522 | << ")" ; |
523 | |
524 | // Compare the others. |
525 | bool all_okay = true; |
526 | for (Engine i = kEngineBacktrack+1; i < kEngineMax; i++) { |
527 | if (!(Engines() & (1<<i))) |
528 | continue; |
529 | |
530 | Result r; |
531 | RunSearch(i, text, context, anchor, &r); |
532 | if (ResultOkay(r, correct)) { |
533 | if (GetFlag(FLAGS_log_okay)) |
534 | LogMatch(r.skipped ? "Skipped: " : "Okay: " , i, text, context, anchor); |
535 | continue; |
536 | } |
537 | |
538 | // We disagree with PCRE on the meaning of some Unicode matches. |
539 | // In particular, we treat non-ASCII UTF-8 as non-word characters. |
540 | // We also treat "empty" character sets like [^\w\W] as being |
541 | // impossible to match, while PCRE apparently excludes some code |
542 | // points (e.g., 0x0080) from both \w and \W. |
543 | if (i == kEnginePCRE && NonASCII(text)) |
544 | continue; |
545 | |
546 | if (!r.untrusted) |
547 | all_okay = false; |
548 | |
549 | LogMatch(r.untrusted ? "(Untrusted) Mismatch: " : "Mismatch: " , i, text, |
550 | context, anchor); |
551 | if (r.matched != correct.matched) { |
552 | if (r.matched) { |
553 | LOG(INFO) << " Should not match (but does)." ; |
554 | } else { |
555 | LOG(INFO) << " Should match (but does not)." ; |
556 | continue; |
557 | } |
558 | } |
559 | for (int i = 0; i < 1+num_captures_; i++) { |
560 | if (r.submatch[i].data() != correct.submatch[i].data() || |
561 | r.submatch[i].size() != correct.submatch[i].size()) { |
562 | LOG(INFO) << |
563 | StringPrintf(" $%d: should be %s is %s" , |
564 | i, |
565 | FormatCapture(text, correct.submatch[i]).c_str(), |
566 | FormatCapture(text, r.submatch[i]).c_str()); |
567 | } else { |
568 | LOG(INFO) << |
569 | StringPrintf(" $%d: %s ok" , i, |
570 | FormatCapture(text, r.submatch[i]).c_str()); |
571 | } |
572 | } |
573 | } |
574 | |
575 | if (!all_okay) { |
576 | // This will be initialised once (after flags have been initialised) |
577 | // and that is desirable because we want to enforce a global limit. |
578 | static int max_regexp_failures = GetFlag(FLAGS_max_regexp_failures); |
579 | if (max_regexp_failures > 0 && --max_regexp_failures == 0) |
580 | LOG(QFATAL) << "Too many regexp failures." ; |
581 | } |
582 | |
583 | return all_okay; |
584 | } |
585 | |
586 | void TestInstance::LogMatch(const char* prefix, Engine e, |
587 | const StringPiece& text, const StringPiece& context, |
588 | Prog::Anchor anchor) { |
589 | LOG(INFO) << prefix |
590 | << EngineName(e) |
591 | << " regexp " |
592 | << CEscape(regexp_str_) |
593 | << " " |
594 | << CEscape(regexp_->ToString()) |
595 | << " text " |
596 | << CEscape(text) |
597 | << " (" |
598 | << text.begin() - context.begin() |
599 | << "," |
600 | << text.end() - context.begin() |
601 | << ") of context " |
602 | << CEscape(context) |
603 | << " (" << FormatKind(kind_) |
604 | << ", " << FormatAnchor(anchor) |
605 | << ", " << FormatMode(flags_) |
606 | << ")" ; |
607 | } |
608 | |
609 | static Prog::MatchKind kinds[] = { |
610 | Prog::kFirstMatch, |
611 | Prog::kLongestMatch, |
612 | Prog::kFullMatch, |
613 | }; |
614 | |
615 | // Test all possible match kinds and parse modes. |
616 | Tester::Tester(const StringPiece& regexp) { |
617 | error_ = false; |
618 | for (size_t i = 0; i < arraysize(kinds); i++) { |
619 | for (size_t j = 0; j < arraysize(parse_modes); j++) { |
620 | TestInstance* t = new TestInstance(regexp, kinds[i], |
621 | parse_modes[j].parse_flags); |
622 | error_ |= t->error(); |
623 | v_.push_back(t); |
624 | } |
625 | } |
626 | } |
627 | |
628 | Tester::~Tester() { |
629 | for (size_t i = 0; i < v_.size(); i++) |
630 | delete v_[i]; |
631 | } |
632 | |
633 | bool Tester::TestCase(const StringPiece& text, const StringPiece& context, |
634 | Prog::Anchor anchor) { |
635 | bool okay = true; |
636 | for (size_t i = 0; i < v_.size(); i++) |
637 | okay &= (!v_[i]->error() && v_[i]->RunCase(text, context, anchor)); |
638 | return okay; |
639 | } |
640 | |
641 | static Prog::Anchor anchors[] = { |
642 | Prog::kAnchored, |
643 | Prog::kUnanchored |
644 | }; |
645 | |
646 | bool Tester::TestInput(const StringPiece& text) { |
647 | bool okay = TestInputInContext(text, text); |
648 | if (!text.empty()) { |
649 | StringPiece sp; |
650 | sp = text; |
651 | sp.remove_prefix(1); |
652 | okay &= TestInputInContext(sp, text); |
653 | sp = text; |
654 | sp.remove_suffix(1); |
655 | okay &= TestInputInContext(sp, text); |
656 | } |
657 | return okay; |
658 | } |
659 | |
660 | bool Tester::TestInputInContext(const StringPiece& text, |
661 | const StringPiece& context) { |
662 | bool okay = true; |
663 | for (size_t i = 0; i < arraysize(anchors); i++) |
664 | okay &= TestCase(text, context, anchors[i]); |
665 | return okay; |
666 | } |
667 | |
668 | bool TestRegexpOnText(const StringPiece& regexp, |
669 | const StringPiece& text) { |
670 | Tester t(regexp); |
671 | return t.TestInput(text); |
672 | } |
673 | |
674 | } // namespace re2 |
675 | |