1// Copyright 2008 The RE2 Authors. All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Random testing of regular expression matching.
6
7#include <stdio.h>
8#include <string>
9#include <vector>
10
11#include "util/test.h"
12#include "util/flags.h"
13#include "re2/testing/exhaustive_tester.h"
14
15DEFINE_FLAG(int, regexpseed, 404, "Random regexp seed.");
16DEFINE_FLAG(int, regexpcount, 100, "How many random regexps to generate.");
17DEFINE_FLAG(int, stringseed, 200, "Random string seed.");
18DEFINE_FLAG(int, stringcount, 100, "How many random strings to generate.");
19
20namespace re2 {
21
22// Runs a random test on the given parameters.
23// (Always uses the same random seeds for reproducibility.
24// Can give different seeds on command line.)
25static void RandomTest(int maxatoms, int maxops,
26 const std::vector<std::string>& alphabet,
27 const std::vector<std::string>& ops,
28 int maxstrlen,
29 const std::vector<std::string>& stralphabet,
30 const std::string& wrapper) {
31 // Limit to smaller test cases in debug mode,
32 // because everything is so much slower.
33 if (RE2_DEBUG_MODE) {
34 maxatoms--;
35 maxops--;
36 maxstrlen /= 2;
37 }
38
39 ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
40 maxstrlen, stralphabet, wrapper, "");
41 t.RandomStrings(GetFlag(FLAGS_stringseed),
42 GetFlag(FLAGS_stringcount));
43 t.GenerateRandom(GetFlag(FLAGS_regexpseed),
44 GetFlag(FLAGS_regexpcount));
45 printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
46 t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
47 EXPECT_EQ(0, t.failures());
48}
49
50// Tests random small regexps involving literals and egrep operators.
51TEST(Random, SmallEgrepLiterals) {
52 RandomTest(5, 5, Explode("abc."), RegexpGenerator::EgrepOps(),
53 15, Explode("abc"),
54 "");
55}
56
57// Tests random bigger regexps involving literals and egrep operators.
58TEST(Random, BigEgrepLiterals) {
59 RandomTest(10, 10, Explode("abc."), RegexpGenerator::EgrepOps(),
60 15, Explode("abc"),
61 "");
62}
63
64// Tests random small regexps involving literals, capturing parens,
65// and egrep operators.
66TEST(Random, SmallEgrepCaptures) {
67 RandomTest(5, 5, Split(" ", "a (b) ."), RegexpGenerator::EgrepOps(),
68 15, Explode("abc"),
69 "");
70}
71
72// Tests random bigger regexps involving literals, capturing parens,
73// and egrep operators.
74TEST(Random, BigEgrepCaptures) {
75 RandomTest(10, 10, Split(" ", "a (b) ."), RegexpGenerator::EgrepOps(),
76 15, Explode("abc"),
77 "");
78}
79
80// Tests random large complicated expressions, using all the possible
81// operators, some literals, some parenthesized literals, and predefined
82// character classes like \d. (Adding larger character classes would
83// make for too many possibilities.)
84TEST(Random, Complicated) {
85 std::vector<std::string> ops = Split(" ",
86 "%s%s %s|%s %s* %s*? %s+ %s+? %s? %s?? "
87 "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} %s{1,2} "
88 "%s{2} %s{2,} %s{3,4} %s{4,5}");
89
90 // Use (?:\b) and (?:\B) instead of \b and \B,
91 // because PCRE rejects \b* but accepts (?:\b)*.
92 // Ditto ^ and $.
93 std::vector<std::string> atoms = Split(" ",
94 ". (?:^) (?:$) \\a \\f \\n \\r \\t \\v "
95 "\\d \\D \\s \\S \\w \\W (?:\\b) (?:\\B) "
96 "a (a) b c - \\\\");
97 std::vector<std::string> alphabet = Explode("abc123\001\002\003\t\r\n\v\f\a");
98 RandomTest(10, 10, atoms, ops, 20, alphabet, "");
99}
100
101} // namespace re2
102