1// Copyright 2008 The RE2 Authors. All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Exhaustive testing of regular expression matching.
6
7#include <stddef.h>
8#include <memory>
9#include <string>
10#include <vector>
11
12#include "util/test.h"
13#include "util/utf.h"
14#include "re2/testing/exhaustive_tester.h"
15
16namespace re2 {
17
18// Test simple character classes by themselves.
19TEST(CharacterClasses, Exhaustive) {
20 std::vector<std::string> atoms = Split(" ",
21 "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b .");
22 ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(),
23 5, Explode("ab"), "", "");
24}
25
26// Test simple character classes inside a___b (for example, a[a]b).
27TEST(CharacterClasses, ExhaustiveAB) {
28 std::vector<std::string> atoms = Split(" ",
29 "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b .");
30 ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(),
31 5, Explode("ab"), "a%sb", "");
32}
33
34// Returns UTF8 for Rune r
35static std::string UTF8(Rune r) {
36 char buf[UTFmax+1];
37 buf[runetochar(buf, &r)] = 0;
38 return std::string(buf);
39}
40
41// Returns a vector of "interesting" UTF8 characters.
42// Unicode is now too big to just return all of them,
43// so UTF8Characters return a set likely to be good test cases.
44static const std::vector<std::string>& InterestingUTF8() {
45 static bool init;
46 static std::vector<std::string> v;
47
48 if (init)
49 return v;
50
51 init = true;
52 // All the Latin1 equivalents are interesting.
53 for (int i = 1; i < 256; i++)
54 v.push_back(UTF8(i));
55
56 // After that, the codes near bit boundaries are
57 // interesting, because they span byte sequence lengths.
58 for (int j = 0; j < 8; j++)
59 v.push_back(UTF8(256 + j));
60 for (int i = 512; i < Runemax; i <<= 1)
61 for (int j = -8; j < 8; j++)
62 v.push_back(UTF8(i + j));
63
64 // The codes near Runemax, including Runemax itself, are interesting.
65 for (int j = -8; j <= 0; j++)
66 v.push_back(UTF8(Runemax + j));
67
68 return v;
69}
70
71// Test interesting UTF-8 characters against character classes.
72TEST(InterestingUTF8, SingleOps) {
73 std::vector<std::string> atoms = Split(" ",
74 ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B "
75 "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] "
76 "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] "
77 "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]");
78 std::vector<std::string> ops; // no ops
79 ExhaustiveTest(1, 0, atoms, ops,
80 1, InterestingUTF8(), "", "");
81}
82
83// Test interesting UTF-8 characters against character classes,
84// but wrap everything inside AB.
85TEST(InterestingUTF8, AB) {
86 std::vector<std::string> atoms = Split(" ",
87 ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B "
88 "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] "
89 "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] "
90 "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]");
91 std::vector<std::string> ops; // no ops
92 std::vector<std::string> alpha = InterestingUTF8();
93 for (size_t i = 0; i < alpha.size(); i++)
94 alpha[i] = "a" + alpha[i] + "b";
95 ExhaustiveTest(1, 0, atoms, ops,
96 1, alpha, "a%sb", "");
97}
98
99} // namespace re2
100
101