1// Copyright 2008 The RE2 Authors. All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#ifndef RE2_TESTING_TESTER_H_
6#define RE2_TESTING_TESTER_H_
7
8// Comparative tester for regular expression matching.
9// Checks all implementations against each other.
10
11#include <vector>
12
13#include "re2/stringpiece.h"
14#include "re2/prog.h"
15#include "re2/regexp.h"
16#include "re2/re2.h"
17#include "util/pcre.h"
18
19namespace re2 {
20
21// All the supported regexp engines.
22enum Engine {
23 kEngineBacktrack = 0, // Prog::UnsafeSearchBacktrack
24 kEngineNFA, // Prog::SearchNFA
25 kEngineDFA, // Prog::SearchDFA, only ask whether it matched
26 kEngineDFA1, // Prog::SearchDFA, ask for match[0]
27 kEngineOnePass, // Prog::SearchOnePass, if applicable
28 kEngineBitState, // Prog::SearchBitState
29 kEngineRE2, // RE2, all submatches
30 kEngineRE2a, // RE2, only ask for match[0]
31 kEngineRE2b, // RE2, only ask whether it matched
32 kEnginePCRE, // PCRE (util/pcre.h)
33
34 kEngineMax,
35};
36
37// Make normal math on the enum preserve the type.
38// By default, C++ doesn't define ++ on enum, and e+1 has type int.
39static inline void operator++(Engine& e, int unused) {
40 e = static_cast<Engine>(e+1);
41}
42
43static inline Engine operator+(Engine e, int i) {
44 return static_cast<Engine>(static_cast<int>(e)+i);
45}
46
47// A TestInstance caches per-regexp state for a given
48// regular expression in a given configuration
49// (UTF-8 vs Latin1, longest vs first match, etc.).
50class TestInstance {
51 public:
52 struct Result;
53
54 TestInstance(const StringPiece& regexp, Prog::MatchKind kind,
55 Regexp::ParseFlags flags);
56 ~TestInstance();
57 Regexp::ParseFlags flags() { return flags_; }
58 bool error() { return error_; }
59
60 // Runs a single test case: search in text, which is in context,
61 // using the given anchoring.
62 bool RunCase(const StringPiece& text, const StringPiece& context,
63 Prog::Anchor anchor);
64
65 private:
66 // Runs a single search using the named engine type.
67 void RunSearch(Engine type,
68 const StringPiece& text, const StringPiece& context,
69 Prog::Anchor anchor,
70 Result *result);
71
72 void LogMatch(const char* prefix, Engine e, const StringPiece& text,
73 const StringPiece& context, Prog::Anchor anchor);
74
75 const StringPiece regexp_str_; // regexp being tested
76 Prog::MatchKind kind_; // kind of match
77 Regexp::ParseFlags flags_; // flags for parsing regexp_str_
78 bool error_; // error during constructor?
79
80 Regexp* regexp_; // parsed regexp
81 int num_captures_; // regexp_->NumCaptures() cached
82 Prog* prog_; // compiled program
83 Prog* rprog_; // compiled reverse program
84 PCRE* re_; // PCRE implementation
85 RE2* re2_; // RE2 implementation
86
87 TestInstance(const TestInstance&) = delete;
88 TestInstance& operator=(const TestInstance&) = delete;
89};
90
91// A group of TestInstances for all possible configurations.
92class Tester {
93 public:
94 explicit Tester(const StringPiece& regexp);
95 ~Tester();
96
97 bool error() { return error_; }
98
99 // Runs a single test case: search in text, which is in context,
100 // using the given anchoring.
101 bool TestCase(const StringPiece& text, const StringPiece& context,
102 Prog::Anchor anchor);
103
104 // Run TestCase(text, text, anchor) for all anchoring modes.
105 bool TestInput(const StringPiece& text);
106
107 // Run TestCase(text, context, anchor) for all anchoring modes.
108 bool TestInputInContext(const StringPiece& text, const StringPiece& context);
109
110 private:
111 bool error_;
112 std::vector<TestInstance*> v_;
113
114 Tester(const Tester&) = delete;
115 Tester& operator=(const Tester&) = delete;
116};
117
118// Run all possible tests using regexp and text.
119bool TestRegexpOnText(const StringPiece& regexp, const StringPiece& text);
120
121} // namespace re2
122
123#endif // RE2_TESTING_TESTER_H_
124