1// Copyright 2008 The RE2 Authors. All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Exhaustive testing of regular expression matching.
6
7#include <stddef.h>
8#include <memory>
9#include <string>
10#include <vector>
11
12#include "util/test.h"
13#include "re2/testing/exhaustive_tester.h"
14
15namespace re2 {
16
17// Test empty string matches (aka "(?:)")
18TEST(EmptyString, Exhaustive) {
19 ExhaustiveTest(2, 2, Split(" ", "(?:) a"),
20 RegexpGenerator::EgrepOps(),
21 5, Split("", "ab"), "", "");
22}
23
24// Test escaped versions of regexp syntax.
25TEST(Punctuation, Literals) {
26 std::vector<std::string> alphabet = Explode("()*+?{}[]\\^$.");
27 std::vector<std::string> escaped = alphabet;
28 for (size_t i = 0; i < escaped.size(); i++)
29 escaped[i] = "\\" + escaped[i];
30 ExhaustiveTest(1, 1, escaped, RegexpGenerator::EgrepOps(),
31 2, alphabet, "", "");
32}
33
34// Test ^ $ . \A \z in presence of line endings.
35// Have to wrap the empty-width ones in (?:) so that
36// they can be repeated -- PCRE rejects ^* but allows (?:^)*
37TEST(LineEnds, Exhaustive) {
38 ExhaustiveTest(2, 2, Split(" ", "(?:^) (?:$) . a \\n (?:\\A) (?:\\z)"),
39 RegexpGenerator::EgrepOps(),
40 4, Explode("ab\n"), "", "");
41}
42
43// Test what does and does not match \n.
44// This would be a good test, except that PCRE seems to have a bug:
45// in single-byte character set mode (the default),
46// [^a] matches \n, but in UTF-8 mode it does not.
47// So when we run the test, the tester complains that
48// we don't agree with PCRE, but it's PCRE that is at fault.
49// For what it's worth, Perl gets this right (matches
50// regardless of whether UTF-8 input is selected):
51//
52// #!/usr/bin/perl
53// use POSIX qw(locale_h);
54// print "matches in latin1\n" if "\n" =~ /[^a]/;
55// setlocale("en_US.utf8");
56// print "matches in utf8\n" if "\n" =~ /[^a]/;
57//
58// The rule chosen for RE2 is that by default, like Perl,
59// dot does not match \n but negated character classes [^a] do.
60// (?s) will allow dot to match \n; there is no way in RE2
61// to stop [^a] from matching \n, though the underlying library
62// provides a mechanism, and RE2 could add new syntax if needed.
63//
64// TEST(Newlines, Exhaustive) {
65// std::vector<std::string> empty_vector;
66// ExhaustiveTest(1, 1, Split(" ", "\\n . a [^a]"),
67// RegexpGenerator::EgrepOps(),
68// 4, Explode("a\n"), "");
69// }
70
71} // namespace re2
72
73