1// -*- coding: utf-8 -*-
2// Copyright 2002-2009 The RE2 Authors. All Rights Reserved.
3// Use of this source code is governed by a BSD-style
4// license that can be found in the LICENSE file.
5
6// TODO: Test extractions for PartialMatch/Consume
7
8#include <errno.h>
9#include <stddef.h>
10#include <stdint.h>
11#include <string.h>
12#include <map>
13#include <string>
14#include <utility>
15#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
16#include <sys/mman.h>
17#include <unistd.h> /* for sysconf */
18#endif
19
20#include "util/test.h"
21#include "util/logging.h"
22#include "util/strutil.h"
23#include "re2/re2.h"
24#include "re2/regexp.h"
25
26namespace re2 {
27
28TEST(RE2, HexTests) {
29#define ASSERT_HEX(type, value) \
30 do { \
31 type v; \
32 ASSERT_TRUE( \
33 RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \
34 ASSERT_EQ(v, 0x##value); \
35 ASSERT_TRUE(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*", \
36 RE2::CRadix(&v))); \
37 ASSERT_EQ(v, 0x##value); \
38 } while (0)
39
40 ASSERT_HEX(short, 2bad);
41 ASSERT_HEX(unsigned short, 2badU);
42 ASSERT_HEX(int, dead);
43 ASSERT_HEX(unsigned int, deadU);
44 ASSERT_HEX(long, 7eadbeefL);
45 ASSERT_HEX(unsigned long, deadbeefUL);
46 ASSERT_HEX(long long, 12345678deadbeefLL);
47 ASSERT_HEX(unsigned long long, cafebabedeadbeefULL);
48
49#undef ASSERT_HEX
50}
51
52TEST(RE2, OctalTests) {
53#define ASSERT_OCTAL(type, value) \
54 do { \
55 type v; \
56 ASSERT_TRUE(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \
57 ASSERT_EQ(v, 0##value); \
58 ASSERT_TRUE(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*", \
59 RE2::CRadix(&v))); \
60 ASSERT_EQ(v, 0##value); \
61 } while (0)
62
63 ASSERT_OCTAL(short, 77777);
64 ASSERT_OCTAL(unsigned short, 177777U);
65 ASSERT_OCTAL(int, 17777777777);
66 ASSERT_OCTAL(unsigned int, 37777777777U);
67 ASSERT_OCTAL(long, 17777777777L);
68 ASSERT_OCTAL(unsigned long, 37777777777UL);
69 ASSERT_OCTAL(long long, 777777777777777777777LL);
70 ASSERT_OCTAL(unsigned long long, 1777777777777777777777ULL);
71
72#undef ASSERT_OCTAL
73}
74
75TEST(RE2, DecimalTests) {
76#define ASSERT_DECIMAL(type, value) \
77 do { \
78 type v; \
79 ASSERT_TRUE(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v)); \
80 ASSERT_EQ(v, value); \
81 ASSERT_TRUE( \
82 RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
83 ASSERT_EQ(v, value); \
84 } while (0)
85
86 ASSERT_DECIMAL(short, -1);
87 ASSERT_DECIMAL(unsigned short, 9999);
88 ASSERT_DECIMAL(int, -1000);
89 ASSERT_DECIMAL(unsigned int, 12345U);
90 ASSERT_DECIMAL(long, -10000000L);
91 ASSERT_DECIMAL(unsigned long, 3083324652U);
92 ASSERT_DECIMAL(long long, -100000000000000LL);
93 ASSERT_DECIMAL(unsigned long long, 1234567890987654321ULL);
94
95#undef ASSERT_DECIMAL
96}
97
98TEST(RE2, Replace) {
99 struct ReplaceTest {
100 const char *regexp;
101 const char *rewrite;
102 const char *original;
103 const char *single;
104 const char *global;
105 int greplace_count;
106 };
107 static const ReplaceTest tests[] = {
108 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
109 "\\2\\1ay",
110 "the quick brown fox jumps over the lazy dogs.",
111 "ethay quick brown fox jumps over the lazy dogs.",
112 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
113 9 },
114 { "\\w+",
115 "\\0-NOSPAM",
116 "abcd.efghi@google.com",
117 "abcd-NOSPAM.efghi@google.com",
118 "abcd-NOSPAM.efghi-NOSPAM@google-NOSPAM.com-NOSPAM",
119 4 },
120 { "^",
121 "(START)",
122 "foo",
123 "(START)foo",
124 "(START)foo",
125 1 },
126 { "^",
127 "(START)",
128 "",
129 "(START)",
130 "(START)",
131 1 },
132 { "$",
133 "(END)",
134 "",
135 "(END)",
136 "(END)",
137 1 },
138 { "b",
139 "bb",
140 "ababababab",
141 "abbabababab",
142 "abbabbabbabbabb",
143 5 },
144 { "b",
145 "bb",
146 "bbbbbb",
147 "bbbbbbb",
148 "bbbbbbbbbbbb",
149 6 },
150 { "b+",
151 "bb",
152 "bbbbbb",
153 "bb",
154 "bb",
155 1 },
156 { "b*",
157 "bb",
158 "bbbbbb",
159 "bb",
160 "bb",
161 1 },
162 { "b*",
163 "bb",
164 "aaaaa",
165 "bbaaaaa",
166 "bbabbabbabbabbabb",
167 6 },
168 // Check newline handling
169 { "a.*a",
170 "(\\0)",
171 "aba\naba",
172 "(aba)\naba",
173 "(aba)\n(aba)",
174 2 },
175 { "", NULL, NULL, NULL, NULL, 0 }
176 };
177
178 for (const ReplaceTest* t = tests; t->original != NULL; t++) {
179 std::string one(t->original);
180 ASSERT_TRUE(RE2::Replace(&one, t->regexp, t->rewrite));
181 ASSERT_EQ(one, t->single);
182 std::string all(t->original);
183 ASSERT_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count)
184 << "Got: " << all;
185 ASSERT_EQ(all, t->global);
186 }
187}
188
189static void TestCheckRewriteString(const char* regexp, const char* rewrite,
190 bool expect_ok) {
191 std::string error;
192 RE2 exp(regexp);
193 bool actual_ok = exp.CheckRewriteString(rewrite, &error);
194 EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error;
195}
196
197TEST(CheckRewriteString, all) {
198 TestCheckRewriteString("abc", "foo", true);
199 TestCheckRewriteString("abc", "foo\\", false);
200 TestCheckRewriteString("abc", "foo\\0bar", true);
201
202 TestCheckRewriteString("a(b)c", "foo", true);
203 TestCheckRewriteString("a(b)c", "foo\\0bar", true);
204 TestCheckRewriteString("a(b)c", "foo\\1bar", true);
205 TestCheckRewriteString("a(b)c", "foo\\2bar", false);
206 TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true);
207
208 TestCheckRewriteString("a(b)(c)", "foo\\12", true);
209 TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true);
210 TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false);
211}
212
213TEST(RE2, Extract) {
214 std::string s;
215
216 ASSERT_TRUE(RE2::Extract("boris@kremvax.ru", "(.*)@([^.]*)", "\\2!\\1", &s));
217 ASSERT_EQ(s, "kremvax!boris");
218
219 ASSERT_TRUE(RE2::Extract("foo", ".*", "'\\0'", &s));
220 ASSERT_EQ(s, "'foo'");
221 // check that false match doesn't overwrite
222 ASSERT_FALSE(RE2::Extract("baz", "bar", "'\\0'", &s));
223 ASSERT_EQ(s, "'foo'");
224}
225
226TEST(RE2, Consume) {
227 RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
228 std::string word;
229
230 std::string s(" aaa b!@#$@#$cccc");
231 StringPiece input(s);
232
233 ASSERT_TRUE(RE2::Consume(&input, r, &word));
234 ASSERT_EQ(word, "aaa") << " input: " << input;
235 ASSERT_TRUE(RE2::Consume(&input, r, &word));
236 ASSERT_EQ(word, "b") << " input: " << input;
237 ASSERT_FALSE(RE2::Consume(&input, r, &word)) << " input: " << input;
238}
239
240TEST(RE2, ConsumeN) {
241 const std::string s(" one two three 4");
242 StringPiece input(s);
243
244 RE2::Arg argv[2];
245 const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
246
247 // 0 arg
248 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0)); // Skips "one".
249
250 // 1 arg
251 std::string word;
252 argv[0] = &word;
253 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1));
254 EXPECT_EQ("two", word);
255
256 // Multi-args
257 int n;
258 argv[1] = &n;
259 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2));
260 EXPECT_EQ("three", word);
261 EXPECT_EQ(4, n);
262}
263
264TEST(RE2, FindAndConsume) {
265 RE2 r("(\\w+)"); // matches a word
266 std::string word;
267
268 std::string s(" aaa b!@#$@#$cccc");
269 StringPiece input(s);
270
271 ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
272 ASSERT_EQ(word, "aaa");
273 ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
274 ASSERT_EQ(word, "b");
275 ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
276 ASSERT_EQ(word, "cccc");
277 ASSERT_FALSE(RE2::FindAndConsume(&input, r, &word));
278
279 // Check that FindAndConsume works without any submatches.
280 // Earlier version used uninitialized data for
281 // length to consume.
282 input = "aaa";
283 ASSERT_TRUE(RE2::FindAndConsume(&input, "aaa"));
284 ASSERT_EQ(input, "");
285}
286
287TEST(RE2, FindAndConsumeN) {
288 const std::string s(" one two three 4");
289 StringPiece input(s);
290
291 RE2::Arg argv[2];
292 const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
293
294 // 0 arg
295 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0)); // Skips "one".
296
297 // 1 arg
298 std::string word;
299 argv[0] = &word;
300 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1));
301 EXPECT_EQ("two", word);
302
303 // Multi-args
304 int n;
305 argv[1] = &n;
306 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2));
307 EXPECT_EQ("three", word);
308 EXPECT_EQ(4, n);
309}
310
311TEST(RE2, MatchNumberPeculiarity) {
312 RE2 r("(foo)|(bar)|(baz)");
313 std::string word1;
314 std::string word2;
315 std::string word3;
316
317 ASSERT_TRUE(RE2::PartialMatch("foo", r, &word1, &word2, &word3));
318 ASSERT_EQ(word1, "foo");
319 ASSERT_EQ(word2, "");
320 ASSERT_EQ(word3, "");
321 ASSERT_TRUE(RE2::PartialMatch("bar", r, &word1, &word2, &word3));
322 ASSERT_EQ(word1, "");
323 ASSERT_EQ(word2, "bar");
324 ASSERT_EQ(word3, "");
325 ASSERT_TRUE(RE2::PartialMatch("baz", r, &word1, &word2, &word3));
326 ASSERT_EQ(word1, "");
327 ASSERT_EQ(word2, "");
328 ASSERT_EQ(word3, "baz");
329 ASSERT_FALSE(RE2::PartialMatch("f", r, &word1, &word2, &word3));
330
331 std::string a;
332 ASSERT_TRUE(RE2::FullMatch("hello", "(foo)|hello", &a));
333 ASSERT_EQ(a, "");
334}
335
336TEST(RE2, Match) {
337 RE2 re("((\\w+):([0-9]+))"); // extracts host and port
338 StringPiece group[4];
339
340 // No match.
341 StringPiece s = "zyzzyva";
342 ASSERT_FALSE(
343 re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
344
345 // Matches and extracts.
346 s = "a chrisr:9000 here";
347 ASSERT_TRUE(
348 re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
349 ASSERT_EQ(group[0], "chrisr:9000");
350 ASSERT_EQ(group[1], "chrisr:9000");
351 ASSERT_EQ(group[2], "chrisr");
352 ASSERT_EQ(group[3], "9000");
353
354 std::string all, host;
355 int port;
356 ASSERT_TRUE(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port));
357 ASSERT_EQ(all, "chrisr:9000");
358 ASSERT_EQ(host, "chrisr");
359 ASSERT_EQ(port, 9000);
360}
361
362static void TestRecursion(int size, const char* pattern) {
363 // Fill up a string repeating the pattern given
364 std::string domain;
365 domain.resize(size);
366 size_t patlen = strlen(pattern);
367 for (int i = 0; i < size; i++) {
368 domain[i] = pattern[i % patlen];
369 }
370 // Just make sure it doesn't crash due to too much recursion.
371 RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet);
372 RE2::FullMatch(domain, re);
373}
374
375// A meta-quoted string, interpreted as a pattern, should always match
376// the original unquoted string.
377static void TestQuoteMeta(const std::string& unquoted,
378 const RE2::Options& options = RE2::DefaultOptions) {
379 std::string quoted = RE2::QuoteMeta(unquoted);
380 RE2 re(quoted, options);
381 EXPECT_TRUE(RE2::FullMatch(unquoted, re))
382 << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
383}
384
385// A meta-quoted string, interpreted as a pattern, should always match
386// the original unquoted string.
387static void NegativeTestQuoteMeta(
388 const std::string& unquoted, const std::string& should_not_match,
389 const RE2::Options& options = RE2::DefaultOptions) {
390 std::string quoted = RE2::QuoteMeta(unquoted);
391 RE2 re(quoted, options);
392 EXPECT_FALSE(RE2::FullMatch(should_not_match, re))
393 << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
394}
395
396// Tests that quoted meta characters match their original strings,
397// and that a few things that shouldn't match indeed do not.
398TEST(QuoteMeta, Simple) {
399 TestQuoteMeta("foo");
400 TestQuoteMeta("foo.bar");
401 TestQuoteMeta("foo\\.bar");
402 TestQuoteMeta("[1-9]");
403 TestQuoteMeta("1.5-2.0?");
404 TestQuoteMeta("\\d");
405 TestQuoteMeta("Who doesn't like ice cream?");
406 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
407 TestQuoteMeta("((?!)xxx).*yyy");
408 TestQuoteMeta("([");
409}
410TEST(QuoteMeta, SimpleNegative) {
411 NegativeTestQuoteMeta("foo", "bar");
412 NegativeTestQuoteMeta("...", "bar");
413 NegativeTestQuoteMeta("\\.", ".");
414 NegativeTestQuoteMeta("\\.", "..");
415 NegativeTestQuoteMeta("(a)", "a");
416 NegativeTestQuoteMeta("(a|b)", "a");
417 NegativeTestQuoteMeta("(a|b)", "(a)");
418 NegativeTestQuoteMeta("(a|b)", "a|b");
419 NegativeTestQuoteMeta("[0-9]", "0");
420 NegativeTestQuoteMeta("[0-9]", "0-9");
421 NegativeTestQuoteMeta("[0-9]", "[9]");
422 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
423}
424
425TEST(QuoteMeta, Latin1) {
426 TestQuoteMeta("3\xb2 = 9", RE2::Latin1);
427}
428
429TEST(QuoteMeta, UTF8) {
430 TestQuoteMeta("Plácido Domingo");
431 TestQuoteMeta("xyz"); // No fancy utf8.
432 TestQuoteMeta("\xc2\xb0"); // 2-byte utf8 -- a degree symbol.
433 TestQuoteMeta("27\xc2\xb0 degrees"); // As a middle character.
434 TestQuoteMeta("\xe2\x80\xb3"); // 3-byte utf8 -- a double prime.
435 TestQuoteMeta("\xf0\x9d\x85\x9f"); // 4-byte utf8 -- a music note.
436 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, this should
437 // still work.
438 NegativeTestQuoteMeta("27\xc2\xb0",
439 "27\\\xc2\\\xb0"); // 2-byte utf8 -- a degree symbol.
440}
441
442TEST(QuoteMeta, HasNull) {
443 std::string has_null;
444
445 // string with one null character
446 has_null += '\0';
447 TestQuoteMeta(has_null);
448 NegativeTestQuoteMeta(has_null, "");
449
450 // Don't want null-followed-by-'1' to be interpreted as '\01'.
451 has_null += '1';
452 TestQuoteMeta(has_null);
453 NegativeTestQuoteMeta(has_null, "\1");
454}
455
456TEST(ProgramSize, BigProgram) {
457 RE2 re_simple("simple regexp");
458 RE2 re_medium("medium.*regexp");
459 RE2 re_complex("complex.{1,128}regexp");
460
461 ASSERT_GT(re_simple.ProgramSize(), 0);
462 ASSERT_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
463 ASSERT_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
464
465 ASSERT_GT(re_simple.ReverseProgramSize(), 0);
466 ASSERT_GT(re_medium.ReverseProgramSize(), re_simple.ReverseProgramSize());
467 ASSERT_GT(re_complex.ReverseProgramSize(), re_medium.ReverseProgramSize());
468}
469
470TEST(ProgramFanout, BigProgram) {
471 RE2 re1("(?:(?:(?:(?:(?:.)?){1})*)+)");
472 RE2 re10("(?:(?:(?:(?:(?:.)?){10})*)+)");
473 RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)");
474 RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)");
475
476 std::map<int, int> histogram;
477
478 // 3 is the largest non-empty bucket and has 1 element.
479 ASSERT_EQ(3, re1.ProgramFanout(&histogram));
480 ASSERT_EQ(1, histogram[3]);
481
482 // 7 is the largest non-empty bucket and has 10 elements.
483 ASSERT_EQ(7, re10.ProgramFanout(&histogram));
484 ASSERT_EQ(10, histogram[7]);
485
486 // 10 is the largest non-empty bucket and has 100 elements.
487 ASSERT_EQ(10, re100.ProgramFanout(&histogram));
488 ASSERT_EQ(100, histogram[10]);
489
490 // 13 is the largest non-empty bucket and has 1000 elements.
491 ASSERT_EQ(13, re1000.ProgramFanout(&histogram));
492 ASSERT_EQ(1000, histogram[13]);
493
494 // 2 is the largest non-empty bucket and has 3 elements.
495 // This differs from the others due to how reverse `.' works.
496 ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram));
497 ASSERT_EQ(3, histogram[2]);
498
499 // 5 is the largest non-empty bucket and has 10 elements.
500 ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram));
501 ASSERT_EQ(10, histogram[5]);
502
503 // 9 is the largest non-empty bucket and has 100 elements.
504 ASSERT_EQ(9, re100.ReverseProgramFanout(&histogram));
505 ASSERT_EQ(100, histogram[9]);
506
507 // 12 is the largest non-empty bucket and has 1000 elements.
508 ASSERT_EQ(12, re1000.ReverseProgramFanout(&histogram));
509 ASSERT_EQ(1000, histogram[12]);
510}
511
512// Issue 956519: handling empty character sets was
513// causing NULL dereference. This tests a few empty character sets.
514// (The way to get an empty character set is to negate a full one.)
515TEST(EmptyCharset, Fuzz) {
516 static const char *empties[] = {
517 "[^\\S\\s]",
518 "[^\\S[:space:]]",
519 "[^\\D\\d]",
520 "[^\\D[:digit:]]"
521 };
522 for (size_t i = 0; i < arraysize(empties); i++)
523 ASSERT_FALSE(RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
524}
525
526// Bitstate assumes that kInstFail instructions in
527// alternations or capture groups have been "compiled away".
528TEST(EmptyCharset, BitstateAssumptions) {
529 // Captures trigger use of Bitstate.
530 static const char *nop_empties[] = {
531 "((((()))))" "[^\\S\\s]?",
532 "((((()))))" "([^\\S\\s])?",
533 "((((()))))" "([^\\S\\s]|[^\\S\\s])?",
534 "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)"
535 };
536 StringPiece group[6];
537 for (size_t i = 0; i < arraysize(nop_empties); i++)
538 ASSERT_TRUE(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6));
539}
540
541// Test that named groups work correctly.
542TEST(Capture, NamedGroups) {
543 {
544 RE2 re("(hello world)");
545 ASSERT_EQ(re.NumberOfCapturingGroups(), 1);
546 const std::map<std::string, int>& m = re.NamedCapturingGroups();
547 ASSERT_EQ(m.size(), 0);
548 }
549
550 {
551 RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
552 ASSERT_EQ(re.NumberOfCapturingGroups(), 6);
553 const std::map<std::string, int>& m = re.NamedCapturingGroups();
554 ASSERT_EQ(m.size(), 4);
555 ASSERT_EQ(m.find("A")->second, 1);
556 ASSERT_EQ(m.find("B")->second, 2);
557 ASSERT_EQ(m.find("C")->second, 3);
558 ASSERT_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous
559 }
560}
561
562TEST(RE2, CapturedGroupTest) {
563 RE2 re("directions from (?P<S>.*) to (?P<D>.*)");
564 int num_groups = re.NumberOfCapturingGroups();
565 EXPECT_EQ(2, num_groups);
566 std::string args[4];
567 RE2::Arg arg0(&args[0]);
568 RE2::Arg arg1(&args[1]);
569 RE2::Arg arg2(&args[2]);
570 RE2::Arg arg3(&args[3]);
571
572 const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3};
573 EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose",
574 re, matches, num_groups));
575 const std::map<std::string, int>& named_groups = re.NamedCapturingGroups();
576 EXPECT_TRUE(named_groups.find("S") != named_groups.end());
577 EXPECT_TRUE(named_groups.find("D") != named_groups.end());
578
579 // The named group index is 1-based.
580 int source_group_index = named_groups.find("S")->second;
581 int destination_group_index = named_groups.find("D")->second;
582 EXPECT_EQ(1, source_group_index);
583 EXPECT_EQ(2, destination_group_index);
584
585 // The args is zero-based.
586 EXPECT_EQ("mountain view", args[source_group_index - 1]);
587 EXPECT_EQ("san jose", args[destination_group_index - 1]);
588}
589
590TEST(RE2, FullMatchWithNoArgs) {
591 ASSERT_TRUE(RE2::FullMatch("h", "h"));
592 ASSERT_TRUE(RE2::FullMatch("hello", "hello"));
593 ASSERT_TRUE(RE2::FullMatch("hello", "h.*o"));
594 ASSERT_FALSE(RE2::FullMatch("othello", "h.*o")); // Must be anchored at front
595 ASSERT_FALSE(RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end
596}
597
598TEST(RE2, PartialMatch) {
599 ASSERT_TRUE(RE2::PartialMatch("x", "x"));
600 ASSERT_TRUE(RE2::PartialMatch("hello", "h.*o"));
601 ASSERT_TRUE(RE2::PartialMatch("othello", "h.*o"));
602 ASSERT_TRUE(RE2::PartialMatch("hello!", "h.*o"));
603 ASSERT_TRUE(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))"));
604}
605
606TEST(RE2, PartialMatchN) {
607 RE2::Arg argv[2];
608 const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
609
610 // 0 arg
611 EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0));
612 EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0));
613
614 // 1 arg
615 int i;
616 argv[0] = &i;
617 EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1));
618 EXPECT_EQ(1001, i);
619 EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1));
620
621 // Multi-arg
622 std::string s;
623 argv[1] = &s;
624 EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2));
625 EXPECT_EQ(42, i);
626 EXPECT_EQ("life", s);
627 EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2));
628}
629
630TEST(RE2, FullMatchZeroArg) {
631 // Zero-arg
632 ASSERT_TRUE(RE2::FullMatch("1001", "\\d+"));
633}
634
635TEST(RE2, FullMatchOneArg) {
636 int i;
637
638 // Single-arg
639 ASSERT_TRUE(RE2::FullMatch("1001", "(\\d+)", &i));
640 ASSERT_EQ(i, 1001);
641 ASSERT_TRUE(RE2::FullMatch("-123", "(-?\\d+)", &i));
642 ASSERT_EQ(i, -123);
643 ASSERT_FALSE(RE2::FullMatch("10", "()\\d+", &i));
644 ASSERT_FALSE(
645 RE2::FullMatch("1234567890123456789012345678901234567890", "(\\d+)", &i));
646}
647
648TEST(RE2, FullMatchIntegerArg) {
649 int i;
650
651 // Digits surrounding integer-arg
652 ASSERT_TRUE(RE2::FullMatch("1234", "1(\\d*)4", &i));
653 ASSERT_EQ(i, 23);
654 ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)\\d+", &i));
655 ASSERT_EQ(i, 1);
656 ASSERT_TRUE(RE2::FullMatch("-1234", "(-\\d)\\d+", &i));
657 ASSERT_EQ(i, -1);
658 ASSERT_TRUE(RE2::PartialMatch("1234", "(\\d)", &i));
659 ASSERT_EQ(i, 1);
660 ASSERT_TRUE(RE2::PartialMatch("-1234", "(-\\d)", &i));
661 ASSERT_EQ(i, -1);
662}
663
664TEST(RE2, FullMatchStringArg) {
665 std::string s;
666 // String-arg
667 ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", &s));
668 ASSERT_EQ(s, std::string("ell"));
669}
670
671TEST(RE2, FullMatchStringPieceArg) {
672 int i;
673 // StringPiece-arg
674 StringPiece sp;
675 ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
676 ASSERT_EQ(sp.size(), 4);
677 ASSERT_TRUE(memcmp(sp.data(), "ruby", 4) == 0);
678 ASSERT_EQ(i, 1234);
679}
680
681TEST(RE2, FullMatchMultiArg) {
682 int i;
683 std::string s;
684 // Multi-arg
685 ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
686 ASSERT_EQ(s, std::string("ruby"));
687 ASSERT_EQ(i, 1234);
688}
689
690TEST(RE2, FullMatchN) {
691 RE2::Arg argv[2];
692 const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
693
694 // 0 arg
695 EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0));
696 EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0));
697
698 // 1 arg
699 int i;
700 argv[0] = &i;
701 EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1));
702 EXPECT_EQ(1001, i);
703 EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1));
704
705 // Multi-arg
706 std::string s;
707 argv[1] = &s;
708 EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2));
709 EXPECT_EQ(42, i);
710 EXPECT_EQ("life", s);
711 EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2));
712}
713
714TEST(RE2, FullMatchIgnoredArg) {
715 int i;
716 std::string s;
717
718 // Old-school NULL should be ignored.
719 ASSERT_TRUE(
720 RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i));
721 ASSERT_EQ(s, std::string("ruby"));
722 ASSERT_EQ(i, 1234);
723
724 // C++11 nullptr should also be ignored.
725 ASSERT_TRUE(RE2::FullMatch("rubz:1235", "(\\w+)(:)(\\d+)", &s, nullptr, &i));
726 ASSERT_EQ(s, std::string("rubz"));
727 ASSERT_EQ(i, 1235);
728}
729
730TEST(RE2, FullMatchTypedNullArg) {
731 std::string s;
732
733 // Ignore non-void* NULL arg
734 ASSERT_TRUE(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL));
735 ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (std::string*)NULL));
736 ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL));
737 ASSERT_TRUE(RE2::FullMatch("1234", "(.*)", (int*)NULL));
738 ASSERT_TRUE(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL));
739 ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL));
740 ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL));
741
742 // Fail on non-void* NULL arg if the match doesn't parse for the given type.
743 ASSERT_FALSE(RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL));
744 ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (int*)NULL));
745 ASSERT_FALSE(RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL));
746 ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (double*)NULL));
747 ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (float*)NULL));
748}
749
750// Check that numeric parsing code does not read past the end of
751// the number being parsed.
752// This implementation requires mmap(2) et al. and thus cannot
753// be used unless they are available.
754TEST(RE2, NULTerminated) {
755#if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0
756 char *v;
757 int x;
758 long pagesize = sysconf(_SC_PAGE_SIZE);
759
760#ifndef MAP_ANONYMOUS
761#define MAP_ANONYMOUS MAP_ANON
762#endif
763 v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
764 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
765 ASSERT_TRUE(v != reinterpret_cast<char*>(-1));
766 LOG(INFO) << "Memory at " << (void*)v;
767 ASSERT_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
768 v[pagesize - 1] = '1';
769
770 x = 0;
771 ASSERT_TRUE(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
772 ASSERT_EQ(x, 1);
773#endif
774}
775
776TEST(RE2, FullMatchTypeTests) {
777 // Type tests
778 std::string zeros(1000, '0');
779 {
780 char c;
781 ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
782 ASSERT_EQ(c, 'H');
783 }
784 {
785 unsigned char c;
786 ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
787 ASSERT_EQ(c, static_cast<unsigned char>('H'));
788 }
789 {
790 int16_t v;
791 ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
792 ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
793 ASSERT_TRUE(RE2::FullMatch("32767", "(-?\\d+)", &v)); ASSERT_EQ(v, 32767);
794 ASSERT_TRUE(RE2::FullMatch("-32768", "(-?\\d+)", &v)); ASSERT_EQ(v, -32768);
795 ASSERT_FALSE(RE2::FullMatch("-32769", "(-?\\d+)", &v));
796 ASSERT_FALSE(RE2::FullMatch("32768", "(-?\\d+)", &v));
797 }
798 {
799 uint16_t v;
800 ASSERT_TRUE(RE2::FullMatch("100", "(\\d+)", &v)); ASSERT_EQ(v, 100);
801 ASSERT_TRUE(RE2::FullMatch("32767", "(\\d+)", &v)); ASSERT_EQ(v, 32767);
802 ASSERT_TRUE(RE2::FullMatch("65535", "(\\d+)", &v)); ASSERT_EQ(v, 65535);
803 ASSERT_FALSE(RE2::FullMatch("65536", "(\\d+)", &v));
804 }
805 {
806 int32_t v;
807 static const int32_t max = INT32_C(0x7fffffff);
808 static const int32_t min = -max - 1;
809 ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
810 ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
811 ASSERT_TRUE(RE2::FullMatch("2147483647", "(-?\\d+)", &v)); ASSERT_EQ(v, max);
812 ASSERT_TRUE(RE2::FullMatch("-2147483648", "(-?\\d+)", &v)); ASSERT_EQ(v, min);
813 ASSERT_FALSE(RE2::FullMatch("-2147483649", "(-?\\d+)", &v));
814 ASSERT_FALSE(RE2::FullMatch("2147483648", "(-?\\d+)", &v));
815
816 ASSERT_TRUE(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v));
817 ASSERT_EQ(v, max);
818 ASSERT_TRUE(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v));
819 ASSERT_EQ(v, min);
820
821 ASSERT_FALSE(RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v));
822 ASSERT_TRUE(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v)));
823 ASSERT_EQ(v, max);
824 ASSERT_FALSE(RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v)));
825 }
826 {
827 uint32_t v;
828 static const uint32_t max = UINT32_C(0xffffffff);
829 ASSERT_TRUE(RE2::FullMatch("100", "(\\d+)", &v)); ASSERT_EQ(v, 100);
830 ASSERT_TRUE(RE2::FullMatch("4294967295", "(\\d+)", &v)); ASSERT_EQ(v, max);
831 ASSERT_FALSE(RE2::FullMatch("4294967296", "(\\d+)", &v));
832 ASSERT_FALSE(RE2::FullMatch("-1", "(\\d+)", &v));
833
834 ASSERT_TRUE(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); ASSERT_EQ(v, max);
835 }
836 {
837 int64_t v;
838 static const int64_t max = INT64_C(0x7fffffffffffffff);
839 static const int64_t min = -max - 1;
840 std::string str;
841
842 ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
843 ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
844
845 str = std::to_string(max);
846 ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, max);
847
848 str = std::to_string(min);
849 ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, min);
850
851 str = std::to_string(max);
852 ASSERT_NE(str.back(), '9');
853 str.back()++;
854 ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
855
856 str = std::to_string(min);
857 ASSERT_NE(str.back(), '9');
858 str.back()++;
859 ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
860 }
861 {
862 uint64_t v;
863 int64_t v2;
864 static const uint64_t max = UINT64_C(0xffffffffffffffff);
865 std::string str;
866
867 ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
868 ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v2)); ASSERT_EQ(v2, -100);
869
870 str = std::to_string(max);
871 ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, max);
872
873 ASSERT_NE(str.back(), '9');
874 str.back()++;
875 ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
876 }
877}
878
879TEST(RE2, FloatingPointFullMatchTypes) {
880 std::string zeros(1000, '0');
881 {
882 float v;
883 ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100);
884 ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
885 ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, float(1e23));
886 ASSERT_TRUE(RE2::FullMatch(" 100", "(.*)", &v)); ASSERT_EQ(v, 100);
887
888 ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
889 ASSERT_EQ(v, float(1e23));
890
891 // 6700000000081920.1 is an edge case.
892 // 6700000000081920 is exactly halfway between
893 // two float32s, so the .1 should make it round up.
894 // However, the .1 is outside the precision possible with
895 // a float64: the nearest float64 is 6700000000081920.
896 // So if the code uses strtod and then converts to float32,
897 // round-to-even will make it round down instead of up.
898 // To pass the test, the parser must call strtof directly.
899 // This test case is carefully chosen to use only a 17-digit
900 // number, since C does not guarantee to get the correctly
901 // rounded answer for strtod and strtof unless the input is
902 // short.
903 //
904 // This is known to fail on Cygwin and MinGW due to a broken
905 // implementation of strtof(3). And apparently MSVC too. Sigh.
906#if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
907 ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
908 ASSERT_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f);
909 ASSERT_TRUE(RE2::FullMatch("6700000000081920.1", "(.*)", &v));
910 ASSERT_EQ(v, 6700000000081920.1f)
911 << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f);
912#endif
913 }
914 {
915 double v;
916 ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100);
917 ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
918 ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, 1e23);
919 ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
920 ASSERT_EQ(v, double(1e23));
921
922 ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
923 ASSERT_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1);
924 ASSERT_TRUE(RE2::FullMatch("1.00000005960464485", "(.*)", &v));
925 ASSERT_EQ(v, 1.0000000596046448)
926 << StringPrintf("%.17g != %.17g", v, 1.0000000596046448);
927 }
928}
929
930TEST(RE2, FullMatchAnchored) {
931 int i;
932 // Check that matching is fully anchored
933 ASSERT_FALSE(RE2::FullMatch("x1001", "(\\d+)", &i));
934 ASSERT_FALSE(RE2::FullMatch("1001x", "(\\d+)", &i));
935 ASSERT_TRUE(RE2::FullMatch("x1001", "x(\\d+)", &i)); ASSERT_EQ(i, 1001);
936 ASSERT_TRUE(RE2::FullMatch("1001x", "(\\d+)x", &i)); ASSERT_EQ(i, 1001);
937}
938
939TEST(RE2, FullMatchBraces) {
940 // Braces
941 ASSERT_TRUE(RE2::FullMatch("0abcd", "[0-9a-f+.-]{5,}"));
942 ASSERT_TRUE(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}"));
943 ASSERT_FALSE(RE2::FullMatch("0abc", "[0-9a-f+.-]{5,}"));
944}
945
946TEST(RE2, Complicated) {
947 // Complicated RE2
948 ASSERT_TRUE(RE2::FullMatch("foo", "foo|bar|[A-Z]"));
949 ASSERT_TRUE(RE2::FullMatch("bar", "foo|bar|[A-Z]"));
950 ASSERT_TRUE(RE2::FullMatch("X", "foo|bar|[A-Z]"));
951 ASSERT_FALSE(RE2::FullMatch("XY", "foo|bar|[A-Z]"));
952}
953
954TEST(RE2, FullMatchEnd) {
955 // Check full-match handling (needs '$' tacked on internally)
956 ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo"));
957 ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo"));
958 ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo$"));
959 ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo$"));
960 ASSERT_TRUE(RE2::FullMatch("foo", "foo$"));
961 ASSERT_FALSE(RE2::FullMatch("foo$bar", "foo\\$"));
962 ASSERT_FALSE(RE2::FullMatch("fox", "fo|bar"));
963
964 // Uncomment the following if we change the handling of '$' to
965 // prevent it from matching a trailing newline
966 if (false) {
967 // Check that we don't get bitten by pcre's special handling of a
968 // '\n' at the end of the string matching '$'
969 ASSERT_FALSE(RE2::PartialMatch("foo\n", "foo$"));
970 }
971}
972
973TEST(RE2, FullMatchArgCount) {
974 // Number of args
975 int a[16];
976 ASSERT_TRUE(RE2::FullMatch("", ""));
977
978 memset(a, 0, sizeof(0));
979 ASSERT_TRUE(RE2::FullMatch("1", "(\\d){1}", &a[0]));
980 ASSERT_EQ(a[0], 1);
981
982 memset(a, 0, sizeof(0));
983 ASSERT_TRUE(RE2::FullMatch("12", "(\\d)(\\d)", &a[0], &a[1]));
984 ASSERT_EQ(a[0], 1);
985 ASSERT_EQ(a[1], 2);
986
987 memset(a, 0, sizeof(0));
988 ASSERT_TRUE(RE2::FullMatch("123", "(\\d)(\\d)(\\d)", &a[0], &a[1], &a[2]));
989 ASSERT_EQ(a[0], 1);
990 ASSERT_EQ(a[1], 2);
991 ASSERT_EQ(a[2], 3);
992
993 memset(a, 0, sizeof(0));
994 ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
995 &a[2], &a[3]));
996 ASSERT_EQ(a[0], 1);
997 ASSERT_EQ(a[1], 2);
998 ASSERT_EQ(a[2], 3);
999 ASSERT_EQ(a[3], 4);
1000
1001 memset(a, 0, sizeof(0));
1002 ASSERT_TRUE(RE2::FullMatch("12345", "(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
1003 &a[2], &a[3], &a[4]));
1004 ASSERT_EQ(a[0], 1);
1005 ASSERT_EQ(a[1], 2);
1006 ASSERT_EQ(a[2], 3);
1007 ASSERT_EQ(a[3], 4);
1008 ASSERT_EQ(a[4], 5);
1009
1010 memset(a, 0, sizeof(0));
1011 ASSERT_TRUE(RE2::FullMatch("123456", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0],
1012 &a[1], &a[2], &a[3], &a[4], &a[5]));
1013 ASSERT_EQ(a[0], 1);
1014 ASSERT_EQ(a[1], 2);
1015 ASSERT_EQ(a[2], 3);
1016 ASSERT_EQ(a[3], 4);
1017 ASSERT_EQ(a[4], 5);
1018 ASSERT_EQ(a[5], 6);
1019
1020 memset(a, 0, sizeof(0));
1021 ASSERT_TRUE(RE2::FullMatch("1234567", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
1022 &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6]));
1023 ASSERT_EQ(a[0], 1);
1024 ASSERT_EQ(a[1], 2);
1025 ASSERT_EQ(a[2], 3);
1026 ASSERT_EQ(a[3], 4);
1027 ASSERT_EQ(a[4], 5);
1028 ASSERT_EQ(a[5], 6);
1029 ASSERT_EQ(a[6], 7);
1030
1031 memset(a, 0, sizeof(0));
1032 ASSERT_TRUE(RE2::FullMatch("1234567890123456",
1033 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1034 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
1035 &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6],
1036 &a[7], &a[8], &a[9], &a[10], &a[11], &a[12],
1037 &a[13], &a[14], &a[15]));
1038 ASSERT_EQ(a[0], 1);
1039 ASSERT_EQ(a[1], 2);
1040 ASSERT_EQ(a[2], 3);
1041 ASSERT_EQ(a[3], 4);
1042 ASSERT_EQ(a[4], 5);
1043 ASSERT_EQ(a[5], 6);
1044 ASSERT_EQ(a[6], 7);
1045 ASSERT_EQ(a[7], 8);
1046 ASSERT_EQ(a[8], 9);
1047 ASSERT_EQ(a[9], 0);
1048 ASSERT_EQ(a[10], 1);
1049 ASSERT_EQ(a[11], 2);
1050 ASSERT_EQ(a[12], 3);
1051 ASSERT_EQ(a[13], 4);
1052 ASSERT_EQ(a[14], 5);
1053 ASSERT_EQ(a[15], 6);
1054}
1055
1056TEST(RE2, Accessors) {
1057 // Check the pattern() accessor
1058 {
1059 const std::string kPattern = "http://([^/]+)/.*";
1060 const RE2 re(kPattern);
1061 ASSERT_EQ(kPattern, re.pattern());
1062 }
1063
1064 // Check RE2 error field.
1065 {
1066 RE2 re("foo");
1067 ASSERT_TRUE(re.error().empty()); // Must have no error
1068 ASSERT_TRUE(re.ok());
1069 ASSERT_EQ(re.error_code(), RE2::NoError);
1070 }
1071}
1072
1073TEST(RE2, UTF8) {
1074 // Check UTF-8 handling
1075 // Three Japanese characters (nihongo)
1076 const char utf8_string[] = {
1077 (char)0xe6, (char)0x97, (char)0xa5, // 65e5
1078 (char)0xe6, (char)0x9c, (char)0xac, // 627c
1079 (char)0xe8, (char)0xaa, (char)0x9e, // 8a9e
1080 0
1081 };
1082 const char utf8_pattern[] = {
1083 '.',
1084 (char)0xe6, (char)0x9c, (char)0xac, // 627c
1085 '.',
1086 0
1087 };
1088
1089 // Both should match in either mode, bytes or UTF-8
1090 RE2 re_test1(".........", RE2::Latin1);
1091 ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test1));
1092 RE2 re_test2("...");
1093 ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test2));
1094
1095 // Check that '.' matches one byte or UTF-8 character
1096 // according to the mode.
1097 std::string s;
1098 RE2 re_test3("(.)", RE2::Latin1);
1099 ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test3, &s));
1100 ASSERT_EQ(s, std::string("\xe6"));
1101 RE2 re_test4("(.)");
1102 ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test4, &s));
1103 ASSERT_EQ(s, std::string("\xe6\x97\xa5"));
1104
1105 // Check that string matches itself in either mode
1106 RE2 re_test5(utf8_string, RE2::Latin1);
1107 ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test5));
1108 RE2 re_test6(utf8_string);
1109 ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test6));
1110
1111 // Check that pattern matches string only in UTF8 mode
1112 RE2 re_test7(utf8_pattern, RE2::Latin1);
1113 ASSERT_FALSE(RE2::FullMatch(utf8_string, re_test7));
1114 RE2 re_test8(utf8_pattern);
1115 ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test8));
1116}
1117
1118TEST(RE2, UngreedyUTF8) {
1119 // Check that ungreedy, UTF8 regular expressions don't match when they
1120 // oughtn't -- see bug 82246.
1121 {
1122 // This code always worked.
1123 const char* pattern = "\\w+X";
1124 const std::string target = "a aX";
1125 RE2 match_sentence(pattern, RE2::Latin1);
1126 RE2 match_sentence_re(pattern);
1127
1128 ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
1129 ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
1130 }
1131 {
1132 const char* pattern = "(?U)\\w+X";
1133 const std::string target = "a aX";
1134 RE2 match_sentence(pattern, RE2::Latin1);
1135 ASSERT_EQ(match_sentence.error(), "");
1136 RE2 match_sentence_re(pattern);
1137
1138 ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
1139 ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
1140 }
1141}
1142
1143TEST(RE2, Rejects) {
1144 {
1145 RE2 re("a\\1", RE2::Quiet);
1146 ASSERT_FALSE(re.ok()); }
1147 {
1148 RE2 re("a[x", RE2::Quiet);
1149 ASSERT_FALSE(re.ok());
1150 }
1151 {
1152 RE2 re("a[z-a]", RE2::Quiet);
1153 ASSERT_FALSE(re.ok());
1154 }
1155 {
1156 RE2 re("a[[:foobar:]]", RE2::Quiet);
1157 ASSERT_FALSE(re.ok());
1158 }
1159 {
1160 RE2 re("a(b", RE2::Quiet);
1161 ASSERT_FALSE(re.ok());
1162 }
1163 {
1164 RE2 re("a\\", RE2::Quiet);
1165 ASSERT_FALSE(re.ok());
1166 }
1167}
1168
1169TEST(RE2, NoCrash) {
1170 // Test that using a bad regexp doesn't crash.
1171 {
1172 RE2 re("a\\", RE2::Quiet);
1173 ASSERT_FALSE(re.ok());
1174 ASSERT_FALSE(RE2::PartialMatch("a\\b", re));
1175 }
1176
1177 // Test that using an enormous regexp doesn't crash
1178 {
1179 RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet);
1180 ASSERT_FALSE(re.ok());
1181 ASSERT_FALSE(RE2::PartialMatch("aaa", re));
1182 }
1183
1184 // Test that a crazy regexp still compiles and runs.
1185 {
1186 RE2 re(".{512}x", RE2::Quiet);
1187 ASSERT_TRUE(re.ok());
1188 std::string s;
1189 s.append(515, 'c');
1190 s.append("x");
1191 ASSERT_TRUE(RE2::PartialMatch(s, re));
1192 }
1193}
1194
1195TEST(RE2, Recursion) {
1196 // Test that recursion is stopped.
1197 // This test is PCRE-legacy -- there's no recursion in RE2.
1198 int bytes = 15 * 1024; // enough to crash PCRE
1199 TestRecursion(bytes, ".");
1200 TestRecursion(bytes, "a");
1201 TestRecursion(bytes, "a.");
1202 TestRecursion(bytes, "ab.");
1203 TestRecursion(bytes, "abc.");
1204}
1205
1206TEST(RE2, BigCountedRepetition) {
1207 // Test that counted repetition works, given tons of memory.
1208 RE2::Options opt;
1209 opt.set_max_mem(256<<20);
1210
1211 RE2 re(".{512}x", opt);
1212 ASSERT_TRUE(re.ok());
1213 std::string s;
1214 s.append(515, 'c');
1215 s.append("x");
1216 ASSERT_TRUE(RE2::PartialMatch(s, re));
1217}
1218
1219TEST(RE2, DeepRecursion) {
1220 // Test for deep stack recursion. This would fail with a
1221 // segmentation violation due to stack overflow before pcre was
1222 // patched.
1223 // Again, a PCRE legacy test. RE2 doesn't recurse.
1224 std::string comment("x*");
1225 std::string a(131072, 'a');
1226 comment += a;
1227 comment += "*x";
1228 RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)");
1229 ASSERT_TRUE(RE2::FullMatch(comment, re));
1230}
1231
1232// Suggested by Josh Hyman. Failed when SearchOnePass was
1233// not implementing case-folding.
1234TEST(CaseInsensitive, MatchAndConsume) {
1235 std::string result;
1236 std::string text = "A fish named *Wanda*";
1237 StringPiece sp(text);
1238
1239 EXPECT_TRUE(RE2::PartialMatch(sp, "(?i)([wand]{5})", &result));
1240 EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
1241}
1242
1243// RE2 should permit implicit conversions from string, StringPiece, const char*,
1244// and C string literals.
1245TEST(RE2, ImplicitConversions) {
1246 std::string re_string(".");
1247 StringPiece re_stringpiece(".");
1248 const char* re_cstring = ".";
1249 EXPECT_TRUE(RE2::PartialMatch("e", re_string));
1250 EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece));
1251 EXPECT_TRUE(RE2::PartialMatch("e", re_cstring));
1252 EXPECT_TRUE(RE2::PartialMatch("e", "."));
1253}
1254
1255// Bugs introduced by 8622304
1256TEST(RE2, CL8622304) {
1257 // reported by ingow
1258 std::string dir;
1259 EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])")); // ok
1260 EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir)); // fails
1261
1262 // reported by jacobsa
1263 std::string key, val;
1264 EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true",
1265 "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?",
1266 &key,
1267 &val));
1268 EXPECT_EQ(key, "bar");
1269 EXPECT_EQ(val, "1,0x2F,030,4,5");
1270}
1271
1272
1273// Check that RE2 returns correct regexp pieces on error.
1274// In particular, make sure it returns whole runes
1275// and that it always reports invalid UTF-8.
1276// Also check that Perl error flag piece is big enough.
1277static struct ErrorTest {
1278 const char *regexp;
1279 const char *error;
1280} error_tests[] = {
1281 { "ab\\αcd", "\\α" },
1282 { "ef\\x☺01", "\\x☺0" },
1283 { "gh\\x1☺01", "\\x1☺" },
1284 { "ij\\x1", "\\x1" },
1285 { "kl\\x", "\\x" },
1286 { "uv\\x{0000☺}", "\\x{0000☺" },
1287 { "wx\\p{ABC", "\\p{ABC" },
1288 { "yz(?smiUX:abc)", "(?smiUX" }, // used to return (?s but the error is X
1289 { "aa(?sm☺i", "(?sm☺" },
1290 { "bb[abc", "[abc" },
1291
1292 { "mn\\x1\377", "" }, // no argument string returned for invalid UTF-8
1293 { "op\377qr", "" },
1294 { "st\\x{00000\377", "" },
1295 { "zz\\p{\377}", "" },
1296 { "zz\\x{00\377}", "" },
1297 { "zz(?P<name\377>abc)", "" },
1298};
1299TEST(RE2, ErrorArgs) {
1300 for (size_t i = 0; i < arraysize(error_tests); i++) {
1301 RE2 re(error_tests[i].regexp, RE2::Quiet);
1302 EXPECT_FALSE(re.ok());
1303 EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error();
1304 }
1305}
1306
1307// Check that "never match \n" mode never matches \n.
1308static struct NeverTest {
1309 const char* regexp;
1310 const char* text;
1311 const char* match;
1312} never_tests[] = {
1313 { "(.*)", "abc\ndef\nghi\n", "abc" },
1314 { "(?s)(abc.*def)", "abc\ndef\n", NULL },
1315 { "(abc(.|\n)*def)", "abc\ndef\n", NULL },
1316 { "(abc[^x]*def)", "abc\ndef\n", NULL },
1317 { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" },
1318};
1319TEST(RE2, NeverNewline) {
1320 RE2::Options opt;
1321 opt.set_never_nl(true);
1322 for (size_t i = 0; i < arraysize(never_tests); i++) {
1323 const NeverTest& t = never_tests[i];
1324 RE2 re(t.regexp, opt);
1325 if (t.match == NULL) {
1326 EXPECT_FALSE(re.PartialMatch(t.text, re));
1327 } else {
1328 StringPiece m;
1329 EXPECT_TRUE(re.PartialMatch(t.text, re, &m));
1330 EXPECT_EQ(m, t.match);
1331 }
1332 }
1333}
1334
1335// Check that dot_nl option works.
1336TEST(RE2, DotNL) {
1337 RE2::Options opt;
1338 opt.set_dot_nl(true);
1339 EXPECT_TRUE(RE2::PartialMatch("\n", RE2(".", opt)));
1340 EXPECT_FALSE(RE2::PartialMatch("\n", RE2("(?-s).", opt)));
1341 opt.set_never_nl(true);
1342 EXPECT_FALSE(RE2::PartialMatch("\n", RE2(".", opt)));
1343}
1344
1345// Check that there are no capturing groups in "never capture" mode.
1346TEST(RE2, NeverCapture) {
1347 RE2::Options opt;
1348 opt.set_never_capture(true);
1349 RE2 re("(r)(e)", opt);
1350 EXPECT_EQ(0, re.NumberOfCapturingGroups());
1351}
1352
1353// Bitstate bug was looking at submatch[0] even if nsubmatch == 0.
1354// Triggered by a failed DFA search falling back to Bitstate when
1355// using Match with a NULL submatch set. Bitstate tried to read
1356// the submatch[0] entry even if nsubmatch was 0.
1357TEST(RE2, BitstateCaptureBug) {
1358 RE2::Options opt;
1359 opt.set_max_mem(20000);
1360 RE2 re("(_________$)", opt);
1361 StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
1362 EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0));
1363}
1364
1365// C++ version of bug 609710.
1366TEST(RE2, UnicodeClasses) {
1367 const std::string str = "ABCDEFGHI譚永鋒";
1368 std::string a, b, c;
1369
1370 EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}"));
1371 EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}"));
1372 EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}"));
1373 EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}"));
1374 EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}"));
1375 EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}"));
1376
1377 EXPECT_TRUE(RE2::FullMatch("譚", "\\p{L}"));
1378 EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Lu}"));
1379 EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Ll}"));
1380 EXPECT_FALSE(RE2::FullMatch("譚", "\\P{L}"));
1381 EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Lu}"));
1382 EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Ll}"));
1383
1384 EXPECT_TRUE(RE2::FullMatch("永", "\\p{L}"));
1385 EXPECT_FALSE(RE2::FullMatch("永", "\\p{Lu}"));
1386 EXPECT_FALSE(RE2::FullMatch("永", "\\p{Ll}"));
1387 EXPECT_FALSE(RE2::FullMatch("永", "\\P{L}"));
1388 EXPECT_TRUE(RE2::FullMatch("永", "\\P{Lu}"));
1389 EXPECT_TRUE(RE2::FullMatch("永", "\\P{Ll}"));
1390
1391 EXPECT_TRUE(RE2::FullMatch("鋒", "\\p{L}"));
1392 EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Lu}"));
1393 EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Ll}"));
1394 EXPECT_FALSE(RE2::FullMatch("鋒", "\\P{L}"));
1395 EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Lu}"));
1396 EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Ll}"));
1397
1398 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c));
1399 EXPECT_EQ("A", a);
1400 EXPECT_EQ("B", b);
1401 EXPECT_EQ("C", c);
1402
1403 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c));
1404 EXPECT_EQ("A", a);
1405 EXPECT_EQ("B", b);
1406 EXPECT_EQ("C", c);
1407
1408 EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}"));
1409
1410 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c));
1411 EXPECT_EQ("A", a);
1412 EXPECT_EQ("B", b);
1413 EXPECT_EQ("C", c);
1414
1415 EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]"));
1416
1417 EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b, &c));
1418 EXPECT_EQ("譚", a);
1419 EXPECT_EQ("永", b);
1420 EXPECT_EQ("鋒", c);
1421}
1422
1423TEST(RE2, LazyRE2) {
1424 // Test with and without options.
1425 static LazyRE2 a = {"a"};
1426 static LazyRE2 b = {"b", RE2::Latin1};
1427
1428 EXPECT_EQ("a", a->pattern());
1429 EXPECT_EQ(RE2::Options::EncodingUTF8, a->options().encoding());
1430
1431 EXPECT_EQ("b", b->pattern());
1432 EXPECT_EQ(RE2::Options::EncodingLatin1, b->options().encoding());
1433}
1434
1435// Bug reported by saito. 2009/02/17
1436TEST(RE2, NullVsEmptyString) {
1437 RE2 re(".*");
1438 EXPECT_TRUE(re.ok());
1439
1440 StringPiece null;
1441 EXPECT_TRUE(RE2::FullMatch(null, re));
1442
1443 StringPiece empty("");
1444 EXPECT_TRUE(RE2::FullMatch(empty, re));
1445}
1446
1447// Similar to the previous test, check that the null string and the empty
1448// string both match, but also that the null string can only provide null
1449// submatches whereas the empty string can also provide empty submatches.
1450TEST(RE2, NullVsEmptyStringSubmatches) {
1451 RE2 re("()|(foo)");
1452 EXPECT_TRUE(re.ok());
1453
1454 // matches[0] is overall match, [1] is (), [2] is (foo), [3] is nonexistent.
1455 StringPiece matches[4];
1456
1457 for (size_t i = 0; i < arraysize(matches); i++)
1458 matches[i] = "bar";
1459
1460 StringPiece null;
1461 EXPECT_TRUE(re.Match(null, 0, null.size(), RE2::UNANCHORED,
1462 matches, arraysize(matches)));
1463 for (size_t i = 0; i < arraysize(matches); i++) {
1464 EXPECT_TRUE(matches[i].data() == NULL); // always null
1465 EXPECT_TRUE(matches[i].empty());
1466 }
1467
1468 for (size_t i = 0; i < arraysize(matches); i++)
1469 matches[i] = "bar";
1470
1471 StringPiece empty("");
1472 EXPECT_TRUE(re.Match(empty, 0, empty.size(), RE2::UNANCHORED,
1473 matches, arraysize(matches)));
1474 EXPECT_TRUE(matches[0].data() != NULL); // empty, not null
1475 EXPECT_TRUE(matches[0].empty());
1476 EXPECT_TRUE(matches[1].data() != NULL); // empty, not null
1477 EXPECT_TRUE(matches[1].empty());
1478 EXPECT_TRUE(matches[2].data() == NULL);
1479 EXPECT_TRUE(matches[2].empty());
1480 EXPECT_TRUE(matches[3].data() == NULL);
1481 EXPECT_TRUE(matches[3].empty());
1482}
1483
1484// Issue 1816809
1485TEST(RE2, Bug1816809) {
1486 RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
1487 StringPiece piece("llx-3;llx4");
1488 std::string x;
1489 EXPECT_TRUE(RE2::Consume(&piece, re, &x));
1490}
1491
1492// Issue 3061120
1493TEST(RE2, Bug3061120) {
1494 RE2 re("(?i)\\W");
1495 EXPECT_FALSE(RE2::PartialMatch("x", re)); // always worked
1496 EXPECT_FALSE(RE2::PartialMatch("k", re)); // broke because of kelvin
1497 EXPECT_FALSE(RE2::PartialMatch("s", re)); // broke because of latin long s
1498}
1499
1500TEST(RE2, CapturingGroupNames) {
1501 // Opening parentheses annotated with group IDs:
1502 // 12 3 45 6 7
1503 RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))");
1504 EXPECT_TRUE(re.ok());
1505 const std::map<int, std::string>& have = re.CapturingGroupNames();
1506 std::map<int, std::string> want;
1507 want[3] = "G2";
1508 want[6] = "G2";
1509 want[7] = "G1";
1510 EXPECT_EQ(want, have);
1511}
1512
1513TEST(RE2, RegexpToStringLossOfAnchor) {
1514 EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at");
1515 EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at");
1516 EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$");
1517 EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)");
1518}
1519
1520// Issue 10131674
1521TEST(RE2, Bug10131674) {
1522 // Some of these escapes describe values that do not fit in a byte.
1523 RE2 re("\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332", RE2::Latin1);
1524 EXPECT_FALSE(re.ok());
1525 EXPECT_FALSE(RE2::FullMatch("hello world", re));
1526}
1527
1528TEST(RE2, Bug18391750) {
1529 // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanitizer.
1530 const char t[] = {
1531 (char)0x28, (char)0x28, (char)0xfc, (char)0xfc, (char)0x08, (char)0x08,
1532 (char)0x26, (char)0x26, (char)0x28, (char)0xc2, (char)0x9b, (char)0xc5,
1533 (char)0xc5, (char)0xd4, (char)0x8f, (char)0x8f, (char)0x69, (char)0x69,
1534 (char)0xe7, (char)0x29, (char)0x7b, (char)0x37, (char)0x31, (char)0x31,
1535 (char)0x7d, (char)0xae, (char)0x7c, (char)0x7c, (char)0xf3, (char)0x29,
1536 (char)0xae, (char)0xae, (char)0x2e, (char)0x2a, (char)0x29, (char)0x00,
1537 };
1538 RE2::Options opt;
1539 opt.set_encoding(RE2::Options::EncodingLatin1);
1540 opt.set_longest_match(true);
1541 opt.set_dot_nl(true);
1542 opt.set_case_sensitive(false);
1543 RE2 re(t, opt);
1544 ASSERT_TRUE(re.ok());
1545 RE2::PartialMatch(t, re);
1546}
1547
1548TEST(RE2, Bug18458852) {
1549 // Bug in parser accepting invalid (too large) rune,
1550 // causing compiler to fail in DCHECK in UTF-8
1551 // character class code.
1552 const char b[] = {
1553 (char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28,
1554 (char)0x24, (char)0x5b, (char)0x5e, (char)0xf5, (char)0x87, (char)0x87,
1555 (char)0x90, (char)0x29, (char)0x5d, (char)0x29, (char)0x29, (char)0x00,
1556 };
1557 RE2 re(b);
1558 ASSERT_FALSE(re.ok());
1559}
1560
1561TEST(RE2, Bug18523943) {
1562 // Bug in BitState: case kFailInst failed the match entirely.
1563
1564 RE2::Options opt;
1565 const char a[] = {
1566 (char)0x29, (char)0x29, (char)0x24, (char)0x00,
1567 };
1568 const char b[] = {
1569 (char)0x28, (char)0x0a, (char)0x2a, (char)0x2a, (char)0x29, (char)0x00,
1570 };
1571 opt.set_log_errors(false);
1572 opt.set_encoding(RE2::Options::EncodingLatin1);
1573 opt.set_posix_syntax(true);
1574 opt.set_longest_match(true);
1575 opt.set_literal(false);
1576 opt.set_never_nl(true);
1577
1578 RE2 re((const char*)b, opt);
1579 ASSERT_TRUE(re.ok());
1580 std::string s1;
1581 ASSERT_TRUE(RE2::PartialMatch((const char*)a, re, &s1));
1582}
1583
1584TEST(RE2, Bug21371806) {
1585 // Bug in parser accepting Unicode groups in Latin-1 mode,
1586 // causing compiler to fail in DCHECK in prog.cc.
1587
1588 RE2::Options opt;
1589 opt.set_encoding(RE2::Options::EncodingLatin1);
1590
1591 RE2 re("g\\p{Zl}]", opt);
1592 ASSERT_TRUE(re.ok());
1593}
1594
1595TEST(RE2, Bug26356109) {
1596 // Bug in parser caused by factoring of common prefixes in alternations.
1597
1598 // In the past, this was factored to "a\\C*?[bc]". Thus, the automaton would
1599 // consume "ab" and then stop (when unanchored) whereas it should consume all
1600 // of "abc" as per first-match semantics.
1601 RE2 re("a\\C*?c|a\\C*?b");
1602 ASSERT_TRUE(re.ok());
1603
1604 std::string s = "abc";
1605 StringPiece m;
1606
1607 ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
1608 ASSERT_EQ(m, s) << " (UNANCHORED) got m='" << m << "', want '" << s << "'";
1609
1610 ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::ANCHOR_BOTH, &m, 1));
1611 ASSERT_EQ(m, s) << " (ANCHOR_BOTH) got m='" << m << "', want '" << s << "'";
1612}
1613
1614TEST(RE2, Issue104) {
1615 // RE2::GlobalReplace always advanced by one byte when the empty string was
1616 // matched, which would clobber any rune that is longer than one byte.
1617
1618 std::string s = "bc";
1619 ASSERT_EQ(3, RE2::GlobalReplace(&s, "a*", "d"));
1620 ASSERT_EQ("dbdcd", s);
1621
1622 s = "ąć";
1623 ASSERT_EQ(3, RE2::GlobalReplace(&s, "Ć*", "Ĉ"));
1624 ASSERT_EQ("ĈąĈćĈ", s);
1625
1626 s = "人类";
1627 ASSERT_EQ(3, RE2::GlobalReplace(&s, "大*", "小"));
1628 ASSERT_EQ("小人小类小", s);
1629}
1630
1631} // namespace re2
1632