1 | // Copyright 2008 The RE2 Authors. All Rights Reserved. |
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | #ifndef RE2_TESTING_REGEXP_GENERATOR_H_ |
6 | #define RE2_TESTING_REGEXP_GENERATOR_H_ |
7 | |
8 | // Regular expression generator: generates all possible |
9 | // regular expressions within given parameters (see below for details). |
10 | |
11 | #include <stdint.h> |
12 | #include <random> |
13 | #include <string> |
14 | #include <vector> |
15 | |
16 | #include "util/util.h" |
17 | #include "re2/stringpiece.h" |
18 | |
19 | namespace re2 { |
20 | |
21 | // Regular expression generator. |
22 | // |
23 | // Given a set of atom expressions like "a", "b", or "." |
24 | // and operators like "%s*", generates all possible regular expressions |
25 | // using at most maxbases base expressions and maxops operators. |
26 | // For each such expression re, calls HandleRegexp(re). |
27 | // |
28 | // Callers are expected to subclass RegexpGenerator and provide HandleRegexp. |
29 | // |
30 | class RegexpGenerator { |
31 | public: |
32 | RegexpGenerator(int maxatoms, int maxops, |
33 | const std::vector<std::string>& atoms, |
34 | const std::vector<std::string>& ops); |
35 | virtual ~RegexpGenerator() {} |
36 | |
37 | // Generates all the regular expressions, calling HandleRegexp(re) for each. |
38 | void Generate(); |
39 | |
40 | // Generates n random regular expressions, calling HandleRegexp(re) for each. |
41 | void GenerateRandom(int32_t seed, int n); |
42 | |
43 | // Handles a regular expression. Must be provided by subclass. |
44 | virtual void HandleRegexp(const std::string& regexp) = 0; |
45 | |
46 | // The egrep regexp operators: * + ? | and concatenation. |
47 | static const std::vector<std::string>& EgrepOps(); |
48 | |
49 | private: |
50 | void RunPostfix(const std::vector<std::string>& post); |
51 | void GeneratePostfix(std::vector<std::string>* post, |
52 | int nstk, int ops, int lits); |
53 | bool GenerateRandomPostfix(std::vector<std::string>* post, |
54 | int nstk, int ops, int lits); |
55 | |
56 | int maxatoms_; // Maximum number of atoms allowed in expr. |
57 | int maxops_; // Maximum number of ops allowed in expr. |
58 | std::vector<std::string> atoms_; // Possible atoms. |
59 | std::vector<std::string> ops_; // Possible ops. |
60 | std::minstd_rand0 rng_; // Random number generator. |
61 | |
62 | RegexpGenerator(const RegexpGenerator&) = delete; |
63 | RegexpGenerator& operator=(const RegexpGenerator&) = delete; |
64 | }; |
65 | |
66 | // Helpers for preparing arguments to RegexpGenerator constructor. |
67 | |
68 | // Returns one string for each character in s. |
69 | std::vector<std::string> Explode(const StringPiece& s); |
70 | |
71 | // Splits string everywhere sep is found, returning |
72 | // vector of pieces. |
73 | std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s); |
74 | |
75 | } // namespace re2 |
76 | |
77 | #endif // RE2_TESTING_REGEXP_GENERATOR_H_ |
78 | |