1// Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Benchmarks for regular expression implementations.
6
7#include <stdint.h>
8#include <stdio.h>
9#include <stdlib.h>
10#include <string>
11#include <thread>
12#include <utility>
13
14#include "util/benchmark.h"
15#include "util/test.h"
16#include "util/flags.h"
17#include "util/logging.h"
18#include "util/malloc_counter.h"
19#include "util/strutil.h"
20#include "re2/prog.h"
21#include "re2/re2.h"
22#include "re2/regexp.h"
23#include "util/pcre.h"
24
25namespace re2 {
26void Test();
27void MemoryUsage();
28} // namespace re2
29
30typedef testing::MallocCounter MallocCounter;
31
32namespace re2 {
33
34void Test() {
35 Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL);
36 CHECK(re);
37 Prog* prog = re->CompileToProg(0);
38 CHECK(prog);
39 CHECK(prog->IsOnePass());
40 CHECK(prog->CanBitState());
41 const char* text = "650-253-0001";
42 StringPiece sp[4];
43 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
44 CHECK_EQ(sp[0], "650-253-0001");
45 CHECK_EQ(sp[1], "650");
46 CHECK_EQ(sp[2], "253");
47 CHECK_EQ(sp[3], "0001");
48 delete prog;
49 re->Decref();
50 LOG(INFO) << "test passed\n";
51}
52
53void MemoryUsage() {
54 const char* regexp = "(\\d+)-(\\d+)-(\\d+)";
55 const char* text = "650-253-0001";
56 {
57 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
58 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
59 CHECK(re);
60 // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
61 // because LOG(INFO) might do a big allocation before they get evaluated.
62 fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n",
63 mc.HeapGrowth(), mc.PeakHeapGrowth());
64 mc.Reset();
65
66 Prog* prog = re->CompileToProg(0);
67 CHECK(prog);
68 CHECK(prog->IsOnePass());
69 CHECK(prog->CanBitState());
70 fprintf(stderr, "Prog: %7lld bytes (peak=%lld)\n",
71 mc.HeapGrowth(), mc.PeakHeapGrowth());
72 mc.Reset();
73
74 StringPiece sp[4];
75 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
76 fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n",
77 mc.HeapGrowth(), mc.PeakHeapGrowth());
78 delete prog;
79 re->Decref();
80 }
81
82 {
83 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
84
85 PCRE re(regexp, PCRE::UTF8);
86 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n",
87 mc.HeapGrowth(), mc.PeakHeapGrowth());
88 PCRE::FullMatch(text, re);
89 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n",
90 mc.HeapGrowth(), mc.PeakHeapGrowth());
91 }
92
93 {
94 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
95
96 PCRE* re = new PCRE(regexp, PCRE::UTF8);
97 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n",
98 mc.HeapGrowth(), mc.PeakHeapGrowth());
99 PCRE::FullMatch(text, *re);
100 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n",
101 mc.HeapGrowth(), mc.PeakHeapGrowth());
102 delete re;
103 }
104
105 {
106 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
107
108 RE2 re(regexp);
109 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n",
110 mc.HeapGrowth(), mc.PeakHeapGrowth());
111 RE2::FullMatch(text, re);
112 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n",
113 mc.HeapGrowth(), mc.PeakHeapGrowth());
114 }
115
116 fprintf(stderr, "sizeof: PCRE=%zd RE2=%zd Prog=%zd Inst=%zd\n",
117 sizeof(PCRE), sizeof(RE2), sizeof(Prog), sizeof(Prog::Inst));
118}
119
120int NumCPUs() {
121 return static_cast<int>(std::thread::hardware_concurrency());
122}
123
124// Regular expression implementation wrappers.
125// Defined at bottom of file, but they are repetitive
126// and not interesting.
127
128typedef void SearchImpl(benchmark::State& state, const char* regexp,
129 const StringPiece& text, Prog::Anchor anchor,
130 bool expect_match);
131
132SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState, SearchPCRE,
133 SearchRE2, SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass,
134 SearchCachedBitState, SearchCachedPCRE, SearchCachedRE2;
135
136typedef void ParseImpl(benchmark::State& state, const char* regexp,
137 const StringPiece& text);
138
139ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState, Parse1PCRE, Parse1RE2,
140 Parse1Backtrack, Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState,
141 Parse1CachedPCRE, Parse1CachedRE2, Parse1CachedBacktrack;
142
143ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState, Parse3PCRE, Parse3RE2,
144 Parse3Backtrack, Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState,
145 Parse3CachedPCRE, Parse3CachedRE2, Parse3CachedBacktrack;
146
147ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2;
148
149ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2;
150
151// Benchmark: failed search for regexp in random text.
152
153// Generate random text that won't contain the search string,
154// to test worst-case search behavior.
155void MakeText(std::string* text, int64_t nbytes) {
156 srand(1);
157 text->resize(nbytes);
158 for (int64_t i = 0; i < nbytes; i++) {
159 // Generate a one-byte rune that isn't a control character (e.g. '\n').
160 // Clipping to 0x20 introduces some bias, but we don't need uniformity.
161 int byte = rand() & 0x7F;
162 if (byte < 0x20)
163 byte = 0x20;
164 (*text)[i] = byte;
165 }
166}
167
168// Makes text of size nbytes, then calls run to search
169// the text for regexp iters times.
170void Search(benchmark::State& state, const char* regexp, SearchImpl* search) {
171 std::string s;
172 MakeText(&s, state.range(0));
173 search(state, regexp, s, Prog::kUnanchored, false);
174 state.SetBytesProcessed(state.iterations() * state.range(0));
175}
176
177// These two are easy because they start with an A,
178// giving the search loop something to memchr for.
179#define EASY0 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
180#define EASY1 "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
181
182// This is a little harder, since it starts with a character class
183// and thus can't be memchr'ed. Could look for ABC and work backward,
184// but no one does that.
185#define MEDIUM "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
186
187// This is a fair amount harder, because of the leading [ -~]*.
188// A bad backtracking implementation will take O(text^2) time to
189// figure out there's no match.
190#define HARD "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
191
192// This has quite a high degree of fanout.
193// NFA execution will be particularly slow.
194#define FANOUT "(?:[\\x{80}-\\x{10FFFF}]?){100}[\\x{80}-\\x{10FFFF}]"
195
196// This stresses engines that are trying to track parentheses.
197#define PARENS "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \
198 "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
199
200void Search_Easy0_CachedDFA(benchmark::State& state) { Search(state, EASY0, SearchCachedDFA); }
201void Search_Easy0_CachedNFA(benchmark::State& state) { Search(state, EASY0, SearchCachedNFA); }
202void Search_Easy0_CachedPCRE(benchmark::State& state) { Search(state, EASY0, SearchCachedPCRE); }
203void Search_Easy0_CachedRE2(benchmark::State& state) { Search(state, EASY0, SearchCachedRE2); }
204
205BENCHMARK_RANGE(Search_Easy0_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
206BENCHMARK_RANGE(Search_Easy0_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
207#ifdef USEPCRE
208BENCHMARK_RANGE(Search_Easy0_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
209#endif
210BENCHMARK_RANGE(Search_Easy0_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
211
212void Search_Easy1_CachedDFA(benchmark::State& state) { Search(state, EASY1, SearchCachedDFA); }
213void Search_Easy1_CachedNFA(benchmark::State& state) { Search(state, EASY1, SearchCachedNFA); }
214void Search_Easy1_CachedPCRE(benchmark::State& state) { Search(state, EASY1, SearchCachedPCRE); }
215void Search_Easy1_CachedRE2(benchmark::State& state) { Search(state, EASY1, SearchCachedRE2); }
216
217BENCHMARK_RANGE(Search_Easy1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
218BENCHMARK_RANGE(Search_Easy1_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
219#ifdef USEPCRE
220BENCHMARK_RANGE(Search_Easy1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
221#endif
222BENCHMARK_RANGE(Search_Easy1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
223
224void Search_Medium_CachedDFA(benchmark::State& state) { Search(state, MEDIUM, SearchCachedDFA); }
225void Search_Medium_CachedNFA(benchmark::State& state) { Search(state, MEDIUM, SearchCachedNFA); }
226void Search_Medium_CachedPCRE(benchmark::State& state) { Search(state, MEDIUM, SearchCachedPCRE); }
227void Search_Medium_CachedRE2(benchmark::State& state) { Search(state, MEDIUM, SearchCachedRE2); }
228
229BENCHMARK_RANGE(Search_Medium_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
230BENCHMARK_RANGE(Search_Medium_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
231#ifdef USEPCRE
232BENCHMARK_RANGE(Search_Medium_CachedPCRE, 8, 256<<10)->ThreadRange(1, NumCPUs());
233#endif
234BENCHMARK_RANGE(Search_Medium_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
235
236void Search_Hard_CachedDFA(benchmark::State& state) { Search(state, HARD, SearchCachedDFA); }
237void Search_Hard_CachedNFA(benchmark::State& state) { Search(state, HARD, SearchCachedNFA); }
238void Search_Hard_CachedPCRE(benchmark::State& state) { Search(state, HARD, SearchCachedPCRE); }
239void Search_Hard_CachedRE2(benchmark::State& state) { Search(state, HARD, SearchCachedRE2); }
240
241BENCHMARK_RANGE(Search_Hard_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
242BENCHMARK_RANGE(Search_Hard_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
243#ifdef USEPCRE
244BENCHMARK_RANGE(Search_Hard_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs());
245#endif
246BENCHMARK_RANGE(Search_Hard_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
247
248void Search_Fanout_CachedDFA(benchmark::State& state) { Search(state, FANOUT, SearchCachedDFA); }
249void Search_Fanout_CachedNFA(benchmark::State& state) { Search(state, FANOUT, SearchCachedNFA); }
250void Search_Fanout_CachedPCRE(benchmark::State& state) { Search(state, FANOUT, SearchCachedPCRE); }
251void Search_Fanout_CachedRE2(benchmark::State& state) { Search(state, FANOUT, SearchCachedRE2); }
252
253BENCHMARK_RANGE(Search_Fanout_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
254BENCHMARK_RANGE(Search_Fanout_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
255#ifdef USEPCRE
256BENCHMARK_RANGE(Search_Fanout_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs());
257#endif
258BENCHMARK_RANGE(Search_Fanout_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
259
260void Search_Parens_CachedDFA(benchmark::State& state) { Search(state, PARENS, SearchCachedDFA); }
261void Search_Parens_CachedNFA(benchmark::State& state) { Search(state, PARENS, SearchCachedNFA); }
262void Search_Parens_CachedPCRE(benchmark::State& state) { Search(state, PARENS, SearchCachedPCRE); }
263void Search_Parens_CachedRE2(benchmark::State& state) { Search(state, PARENS, SearchCachedRE2); }
264
265BENCHMARK_RANGE(Search_Parens_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
266BENCHMARK_RANGE(Search_Parens_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
267#ifdef USEPCRE
268BENCHMARK_RANGE(Search_Parens_CachedPCRE, 8, 8)->ThreadRange(1, NumCPUs());
269#endif
270BENCHMARK_RANGE(Search_Parens_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
271
272void SearchBigFixed(benchmark::State& state, SearchImpl* search) {
273 std::string s;
274 s.append(state.range(0)/2, 'x');
275 std::string regexp = "^" + s + ".*$";
276 std::string t;
277 MakeText(&t, state.range(0)/2);
278 s += t;
279 search(state, regexp.c_str(), s, Prog::kUnanchored, true);
280 state.SetBytesProcessed(state.iterations() * state.range(0));
281}
282
283void Search_BigFixed_CachedDFA(benchmark::State& state) { SearchBigFixed(state, SearchCachedDFA); }
284void Search_BigFixed_CachedNFA(benchmark::State& state) { SearchBigFixed(state, SearchCachedNFA); }
285void Search_BigFixed_CachedPCRE(benchmark::State& state) { SearchBigFixed(state, SearchCachedPCRE); }
286void Search_BigFixed_CachedRE2(benchmark::State& state) { SearchBigFixed(state, SearchCachedRE2); }
287
288BENCHMARK_RANGE(Search_BigFixed_CachedDFA, 8, 1<<20)->ThreadRange(1, NumCPUs());
289BENCHMARK_RANGE(Search_BigFixed_CachedNFA, 8, 32<<10)->ThreadRange(1, NumCPUs());
290#ifdef USEPCRE
291BENCHMARK_RANGE(Search_BigFixed_CachedPCRE, 8, 32<<10)->ThreadRange(1, NumCPUs());
292#endif
293BENCHMARK_RANGE(Search_BigFixed_CachedRE2, 8, 1<<20)->ThreadRange(1, NumCPUs());
294
295// Benchmark: FindAndConsume
296
297void FindAndConsume(benchmark::State& state) {
298 std::string s;
299 MakeText(&s, state.range(0));
300 s.append("Hello World");
301 RE2 re("((Hello World))");
302 for (auto _ : state) {
303 StringPiece t = s;
304 StringPiece u;
305 CHECK(RE2::FindAndConsume(&t, re, &u));
306 CHECK_EQ(u, "Hello World");
307 }
308 state.SetBytesProcessed(state.iterations() * state.range(0));
309}
310
311BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs());
312
313// Benchmark: successful anchored search.
314
315void SearchSuccess(benchmark::State& state, const char* regexp,
316 SearchImpl* search) {
317 std::string s;
318 MakeText(&s, state.range(0));
319 search(state, regexp, s, Prog::kAnchored, true);
320 state.SetBytesProcessed(state.iterations() * state.range(0));
321}
322
323// Unambiguous search (RE2 can use OnePass).
324
325void Search_Success_DFA(benchmark::State& state) { SearchSuccess(state, ".*$", SearchDFA); }
326void Search_Success_NFA(benchmark::State& state) { SearchSuccess(state, ".*$", SearchNFA); }
327void Search_Success_PCRE(benchmark::State& state) { SearchSuccess(state, ".*$", SearchPCRE); }
328void Search_Success_RE2(benchmark::State& state) { SearchSuccess(state, ".*$", SearchRE2); }
329void Search_Success_OnePass(benchmark::State& state) { SearchSuccess(state, ".*$", SearchOnePass); }
330
331BENCHMARK_RANGE(Search_Success_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
332BENCHMARK_RANGE(Search_Success_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
333#ifdef USEPCRE
334BENCHMARK_RANGE(Search_Success_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
335#endif
336BENCHMARK_RANGE(Search_Success_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
337BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
338
339void Search_Success_CachedDFA(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedDFA); }
340void Search_Success_CachedNFA(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedNFA); }
341void Search_Success_CachedPCRE(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedPCRE); }
342void Search_Success_CachedRE2(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedRE2); }
343void Search_Success_CachedOnePass(benchmark::State& state) { SearchSuccess(state, ".*$", SearchCachedOnePass); }
344
345BENCHMARK_RANGE(Search_Success_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
346BENCHMARK_RANGE(Search_Success_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
347#ifdef USEPCRE
348BENCHMARK_RANGE(Search_Success_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
349#endif
350BENCHMARK_RANGE(Search_Success_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
351BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
352
353// Ambiguous search (RE2 cannot use OnePass).
354// Used to be ".*.$", but that is coalesced to ".+$" these days.
355
356void Search_Success1_DFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchDFA); }
357void Search_Success1_NFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchNFA); }
358void Search_Success1_PCRE(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchPCRE); }
359void Search_Success1_RE2(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchRE2); }
360void Search_Success1_BitState(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchBitState); }
361
362BENCHMARK_RANGE(Search_Success1_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
363BENCHMARK_RANGE(Search_Success1_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
364#ifdef USEPCRE
365BENCHMARK_RANGE(Search_Success1_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
366#endif
367BENCHMARK_RANGE(Search_Success1_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
368BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
369
370void Search_Success1_CachedDFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedDFA); }
371void Search_Success1_CachedNFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedNFA); }
372void Search_Success1_CachedPCRE(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedPCRE); }
373void Search_Success1_CachedRE2(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedRE2); }
374void Search_Success1_CachedBitState(benchmark::State& state) { SearchSuccess(state, ".*\\C$", SearchCachedBitState); }
375
376BENCHMARK_RANGE(Search_Success1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
377BENCHMARK_RANGE(Search_Success1_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
378#ifdef USEPCRE
379BENCHMARK_RANGE(Search_Success1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
380#endif
381BENCHMARK_RANGE(Search_Success1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
382BENCHMARK_RANGE(Search_Success1_CachedBitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
383
384// Benchmark: AltMatch optimisation (just to verify that it works)
385// Note that OnePass doesn't implement it!
386
387void SearchAltMatch(benchmark::State& state, SearchImpl* search) {
388 std::string s;
389 MakeText(&s, state.range(0));
390 search(state, "\\C*", s, Prog::kAnchored, true);
391 state.SetBytesProcessed(state.iterations() * state.range(0));
392}
393
394void Search_AltMatch_DFA(benchmark::State& state) { SearchAltMatch(state, SearchDFA); }
395void Search_AltMatch_NFA(benchmark::State& state) { SearchAltMatch(state, SearchNFA); }
396void Search_AltMatch_OnePass(benchmark::State& state) { SearchAltMatch(state, SearchOnePass); }
397void Search_AltMatch_BitState(benchmark::State& state) { SearchAltMatch(state, SearchBitState); }
398void Search_AltMatch_PCRE(benchmark::State& state) { SearchAltMatch(state, SearchPCRE); }
399void Search_AltMatch_RE2(benchmark::State& state) { SearchAltMatch(state, SearchRE2); }
400
401BENCHMARK_RANGE(Search_AltMatch_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
402BENCHMARK_RANGE(Search_AltMatch_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
403BENCHMARK_RANGE(Search_AltMatch_OnePass, 8, 16<<20)->ThreadRange(1, NumCPUs());
404BENCHMARK_RANGE(Search_AltMatch_BitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
405#ifdef USEPCRE
406BENCHMARK_RANGE(Search_AltMatch_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
407#endif
408BENCHMARK_RANGE(Search_AltMatch_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
409
410void Search_AltMatch_CachedDFA(benchmark::State& state) { SearchAltMatch(state, SearchCachedDFA); }
411void Search_AltMatch_CachedNFA(benchmark::State& state) { SearchAltMatch(state, SearchCachedNFA); }
412void Search_AltMatch_CachedOnePass(benchmark::State& state) { SearchAltMatch(state, SearchCachedOnePass); }
413void Search_AltMatch_CachedBitState(benchmark::State& state) { SearchAltMatch(state, SearchCachedBitState); }
414void Search_AltMatch_CachedPCRE(benchmark::State& state) { SearchAltMatch(state, SearchCachedPCRE); }
415void Search_AltMatch_CachedRE2(benchmark::State& state) { SearchAltMatch(state, SearchCachedRE2); }
416
417BENCHMARK_RANGE(Search_AltMatch_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
418BENCHMARK_RANGE(Search_AltMatch_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
419BENCHMARK_RANGE(Search_AltMatch_CachedOnePass, 8, 16<<20)->ThreadRange(1, NumCPUs());
420BENCHMARK_RANGE(Search_AltMatch_CachedBitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
421#ifdef USEPCRE
422BENCHMARK_RANGE(Search_AltMatch_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
423#endif
424BENCHMARK_RANGE(Search_AltMatch_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
425
426// Benchmark: use regexp to find phone number.
427
428void SearchDigits(benchmark::State& state, SearchImpl* search) {
429 StringPiece s("650-253-0001");
430 search(state, "([0-9]+)-([0-9]+)-([0-9]+)", s, Prog::kAnchored, true);
431 state.SetItemsProcessed(state.iterations());
432}
433
434void Search_Digits_DFA(benchmark::State& state) { SearchDigits(state, SearchDFA); }
435void Search_Digits_NFA(benchmark::State& state) { SearchDigits(state, SearchNFA); }
436void Search_Digits_OnePass(benchmark::State& state) { SearchDigits(state, SearchOnePass); }
437void Search_Digits_PCRE(benchmark::State& state) { SearchDigits(state, SearchPCRE); }
438void Search_Digits_RE2(benchmark::State& state) { SearchDigits(state, SearchRE2); }
439void Search_Digits_BitState(benchmark::State& state) { SearchDigits(state, SearchBitState); }
440
441BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs());
442BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs());
443BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs());
444#ifdef USEPCRE
445BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs());
446#endif
447BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs());
448BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs());
449
450// Benchmark: use regexp to parse digit fields in phone number.
451
452void Parse3Digits(benchmark::State& state,
453 void (*parse3)(benchmark::State&, const char*,
454 const StringPiece&)) {
455 parse3(state, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001");
456 state.SetItemsProcessed(state.iterations());
457}
458
459void Parse_Digits_NFA(benchmark::State& state) { Parse3Digits(state, Parse3NFA); }
460void Parse_Digits_OnePass(benchmark::State& state) { Parse3Digits(state, Parse3OnePass); }
461void Parse_Digits_PCRE(benchmark::State& state) { Parse3Digits(state, Parse3PCRE); }
462void Parse_Digits_RE2(benchmark::State& state) { Parse3Digits(state, Parse3RE2); }
463void Parse_Digits_Backtrack(benchmark::State& state) { Parse3Digits(state, Parse3Backtrack); }
464void Parse_Digits_BitState(benchmark::State& state) { Parse3Digits(state, Parse3BitState); }
465
466BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs());
467BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs());
468#ifdef USEPCRE
469BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs());
470#endif
471BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs());
472BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs());
473BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs());
474
475void Parse_CachedDigits_NFA(benchmark::State& state) { Parse3Digits(state, Parse3CachedNFA); }
476void Parse_CachedDigits_OnePass(benchmark::State& state) { Parse3Digits(state, Parse3CachedOnePass); }
477void Parse_CachedDigits_PCRE(benchmark::State& state) { Parse3Digits(state, Parse3CachedPCRE); }
478void Parse_CachedDigits_RE2(benchmark::State& state) { Parse3Digits(state, Parse3CachedRE2); }
479void Parse_CachedDigits_Backtrack(benchmark::State& state) { Parse3Digits(state, Parse3CachedBacktrack); }
480void Parse_CachedDigits_BitState(benchmark::State& state) { Parse3Digits(state, Parse3CachedBitState); }
481
482BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs());
483BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs());
484#ifdef USEPCRE
485BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs());
486#endif
487BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs());
488BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs());
489BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs());
490
491void Parse3DigitDs(benchmark::State& state,
492 void (*parse3)(benchmark::State&, const char*,
493 const StringPiece&)) {
494 parse3(state, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001");
495 state.SetItemsProcessed(state.iterations());
496}
497
498void Parse_DigitDs_NFA(benchmark::State& state) { Parse3DigitDs(state, Parse3NFA); }
499void Parse_DigitDs_OnePass(benchmark::State& state) { Parse3DigitDs(state, Parse3OnePass); }
500void Parse_DigitDs_PCRE(benchmark::State& state) { Parse3DigitDs(state, Parse3PCRE); }
501void Parse_DigitDs_RE2(benchmark::State& state) { Parse3DigitDs(state, Parse3RE2); }
502void Parse_DigitDs_Backtrack(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBacktrack); }
503void Parse_DigitDs_BitState(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBitState); }
504
505BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs());
506BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs());
507#ifdef USEPCRE
508BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs());
509#endif
510BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs());
511BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs());
512BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs());
513
514void Parse_CachedDigitDs_NFA(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedNFA); }
515void Parse_CachedDigitDs_OnePass(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedOnePass); }
516void Parse_CachedDigitDs_PCRE(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedPCRE); }
517void Parse_CachedDigitDs_RE2(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedRE2); }
518void Parse_CachedDigitDs_Backtrack(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBacktrack); }
519void Parse_CachedDigitDs_BitState(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBitState); }
520
521BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs());
522BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs());
523#ifdef USEPCRE
524BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs());
525#endif
526BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs());
527BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs());
528BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs());
529
530// Benchmark: splitting off leading number field.
531
532void Parse1Split(benchmark::State& state,
533 void (*parse1)(benchmark::State&, const char*,
534 const StringPiece&)) {
535 parse1(state, "[0-9]+-(.*)", "650-253-0001");
536 state.SetItemsProcessed(state.iterations());
537}
538
539void Parse_Split_NFA(benchmark::State& state) { Parse1Split(state, Parse1NFA); }
540void Parse_Split_OnePass(benchmark::State& state) { Parse1Split(state, Parse1OnePass); }
541void Parse_Split_PCRE(benchmark::State& state) { Parse1Split(state, Parse1PCRE); }
542void Parse_Split_RE2(benchmark::State& state) { Parse1Split(state, Parse1RE2); }
543void Parse_Split_BitState(benchmark::State& state) { Parse1Split(state, Parse1BitState); }
544
545BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs());
546BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs());
547#ifdef USEPCRE
548BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs());
549#endif
550BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs());
551BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs());
552
553void Parse_CachedSplit_NFA(benchmark::State& state) { Parse1Split(state, Parse1CachedNFA); }
554void Parse_CachedSplit_OnePass(benchmark::State& state) { Parse1Split(state, Parse1CachedOnePass); }
555void Parse_CachedSplit_PCRE(benchmark::State& state) { Parse1Split(state, Parse1CachedPCRE); }
556void Parse_CachedSplit_RE2(benchmark::State& state) { Parse1Split(state, Parse1CachedRE2); }
557void Parse_CachedSplit_BitState(benchmark::State& state) { Parse1Split(state, Parse1CachedBitState); }
558
559BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs());
560BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs());
561#ifdef USEPCRE
562BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs());
563#endif
564BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs());
565BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs());
566
567// Benchmark: splitting off leading number field but harder (ambiguous regexp).
568
569void Parse1SplitHard(benchmark::State& state,
570 void (*run)(benchmark::State&, const char*,
571 const StringPiece&)) {
572 run(state, "[0-9]+.(.*)", "650-253-0001");
573 state.SetItemsProcessed(state.iterations());
574}
575
576void Parse_SplitHard_NFA(benchmark::State& state) { Parse1SplitHard(state, Parse1NFA); }
577void Parse_SplitHard_PCRE(benchmark::State& state) { Parse1SplitHard(state, Parse1PCRE); }
578void Parse_SplitHard_RE2(benchmark::State& state) { Parse1SplitHard(state, Parse1RE2); }
579void Parse_SplitHard_BitState(benchmark::State& state) { Parse1SplitHard(state, Parse1BitState); }
580
581#ifdef USEPCRE
582BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs());
583#endif
584BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs());
585BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs());
586BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs());
587
588void Parse_CachedSplitHard_NFA(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedNFA); }
589void Parse_CachedSplitHard_PCRE(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedPCRE); }
590void Parse_CachedSplitHard_RE2(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedRE2); }
591void Parse_CachedSplitHard_BitState(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedBitState); }
592void Parse_CachedSplitHard_Backtrack(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedBacktrack); }
593
594#ifdef USEPCRE
595BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs());
596#endif
597BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs());
598BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs());
599BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs());
600BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs());
601
602// Benchmark: Parse1SplitHard, big text, small match.
603
604void Parse1SplitBig1(benchmark::State& state,
605 void (*run)(benchmark::State&, const char*,
606 const StringPiece&)) {
607 std::string s;
608 s.append(100000, 'x');
609 s.append("650-253-0001");
610 run(state, "[0-9]+.(.*)", s);
611 state.SetItemsProcessed(state.iterations());
612}
613
614void Parse_CachedSplitBig1_PCRE(benchmark::State& state) { Parse1SplitBig1(state, SearchParse1CachedPCRE); }
615void Parse_CachedSplitBig1_RE2(benchmark::State& state) { Parse1SplitBig1(state, SearchParse1CachedRE2); }
616
617#ifdef USEPCRE
618BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs());
619#endif
620BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs());
621
622// Benchmark: Parse1SplitHard, big text, big match.
623
624void Parse1SplitBig2(benchmark::State& state,
625 void (*run)(benchmark::State&, const char*,
626 const StringPiece&)) {
627 std::string s;
628 s.append("650-253-");
629 s.append(100000, '0');
630 run(state, "[0-9]+.(.*)", s);
631 state.SetItemsProcessed(state.iterations());
632}
633
634void Parse_CachedSplitBig2_PCRE(benchmark::State& state) { Parse1SplitBig2(state, SearchParse1CachedPCRE); }
635void Parse_CachedSplitBig2_RE2(benchmark::State& state) { Parse1SplitBig2(state, SearchParse1CachedRE2); }
636
637#ifdef USEPCRE
638BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs());
639#endif
640BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
641
642// Benchmark: measure time required to parse (but not execute)
643// a simple regular expression.
644
645void ParseRegexp(benchmark::State& state, const std::string& regexp) {
646 for (auto _ : state) {
647 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
648 CHECK(re);
649 re->Decref();
650 }
651}
652
653void SimplifyRegexp(benchmark::State& state, const std::string& regexp) {
654 for (auto _ : state) {
655 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
656 CHECK(re);
657 Regexp* sre = re->Simplify();
658 CHECK(sre);
659 sre->Decref();
660 re->Decref();
661 }
662}
663
664void NullWalkRegexp(benchmark::State& state, const std::string& regexp) {
665 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
666 CHECK(re);
667 for (auto _ : state) {
668 re->NullWalk();
669 }
670 re->Decref();
671}
672
673void SimplifyCompileRegexp(benchmark::State& state, const std::string& regexp) {
674 for (auto _ : state) {
675 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
676 CHECK(re);
677 Regexp* sre = re->Simplify();
678 CHECK(sre);
679 Prog* prog = sre->CompileToProg(0);
680 CHECK(prog);
681 delete prog;
682 sre->Decref();
683 re->Decref();
684 }
685}
686
687void CompileRegexp(benchmark::State& state, const std::string& regexp) {
688 for (auto _ : state) {
689 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
690 CHECK(re);
691 Prog* prog = re->CompileToProg(0);
692 CHECK(prog);
693 delete prog;
694 re->Decref();
695 }
696}
697
698void CompileToProg(benchmark::State& state, const std::string& regexp) {
699 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
700 CHECK(re);
701 for (auto _ : state) {
702 Prog* prog = re->CompileToProg(0);
703 CHECK(prog);
704 delete prog;
705 }
706 re->Decref();
707}
708
709void CompileByteMap(benchmark::State& state, const std::string& regexp) {
710 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
711 CHECK(re);
712 Prog* prog = re->CompileToProg(0);
713 CHECK(prog);
714 for (auto _ : state) {
715 prog->ComputeByteMap();
716 }
717 delete prog;
718 re->Decref();
719}
720
721void CompilePCRE(benchmark::State& state, const std::string& regexp) {
722 for (auto _ : state) {
723 PCRE re(regexp, PCRE::UTF8);
724 CHECK_EQ(re.error(), "");
725 }
726}
727
728void CompileRE2(benchmark::State& state, const std::string& regexp) {
729 for (auto _ : state) {
730 RE2 re(regexp);
731 CHECK_EQ(re.error(), "");
732 }
733}
734
735void RunBuild(benchmark::State& state, const std::string& regexp,
736 void (*run)(benchmark::State&, const std::string&)) {
737 run(state, regexp);
738 state.SetItemsProcessed(state.iterations());
739}
740
741} // namespace re2
742
743DEFINE_FLAG(std::string, compile_regexp, "(.*)-(\\d+)-of-(\\d+)",
744 "regexp for compile benchmarks");
745
746namespace re2 {
747
748void BM_PCRE_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompilePCRE); }
749void BM_Regexp_Parse(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), ParseRegexp); }
750void BM_Regexp_Simplify(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyRegexp); }
751void BM_CompileToProg(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileToProg); }
752void BM_CompileByteMap(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileByteMap); }
753void BM_Regexp_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRegexp); }
754void BM_Regexp_SimplifyCompile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyCompileRegexp); }
755void BM_Regexp_NullWalk(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), NullWalkRegexp); }
756void BM_RE2_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRE2); }
757
758#ifdef USEPCRE
759BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs());
760#endif
761BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs());
762BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs());
763BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs());
764BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs());
765BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs());
766BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs());
767BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs());
768BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs());
769
770// Makes text of size nbytes, then calls run to search
771// the text for regexp iters times.
772void SearchPhone(benchmark::State& state, ParseImpl* search) {
773 std::string s;
774 MakeText(&s, state.range(0));
775 s.append("(650) 253-0001");
776 search(state, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s);
777 state.SetBytesProcessed(state.iterations() * state.range(0));
778}
779
780void SearchPhone_CachedPCRE(benchmark::State& state) {
781 SearchPhone(state, SearchParse2CachedPCRE);
782}
783
784void SearchPhone_CachedRE2(benchmark::State& state) {
785 SearchPhone(state, SearchParse2CachedRE2);
786}
787
788#ifdef USEPCRE
789BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
790#endif
791BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
792
793/*
794TODO(rsc): Make this work again.
795
796// Generates and returns a string over binary alphabet {0,1} that contains
797// all possible binary sequences of length n as subsequences. The obvious
798// brute force method would generate a string of length n * 2^n, but this
799// generates a string of length n + 2^n - 1 called a De Bruijn cycle.
800// See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
801static std::string DeBruijnString(int n) {
802 CHECK_LT(n, 8*sizeof(int));
803 CHECK_GT(n, 0);
804
805 std::vector<bool> did(1<<n);
806 for (int i = 0; i < 1<<n; i++)
807 did[i] = false;
808
809 std::string s;
810 for (int i = 0; i < n-1; i++)
811 s.append("0");
812 int bits = 0;
813 int mask = (1<<n) - 1;
814 for (int i = 0; i < (1<<n); i++) {
815 bits <<= 1;
816 bits &= mask;
817 if (!did[bits|1]) {
818 bits |= 1;
819 s.append("1");
820 } else {
821 s.append("0");
822 }
823 CHECK(!did[bits]);
824 did[bits] = true;
825 }
826 return s;
827}
828
829void CacheFill(int iters, int n, SearchImpl *srch) {
830 std::string s = DeBruijnString(n+1);
831 std::string t;
832 for (int i = n+1; i < 20; i++) {
833 t = s + s;
834 using std::swap;
835 swap(s, t);
836 }
837 srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s,
838 Prog::kUnanchored, true);
839 SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*s.size());
840}
841
842void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); }
843void CacheFillRE2(int i, int n) { CacheFill(i, n, SearchCachedRE2); }
844void CacheFillNFA(int i, int n) { CacheFill(i, n, SearchCachedNFA); }
845void CacheFillDFA(int i, int n) { CacheFill(i, n, SearchCachedDFA); }
846
847// BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers
848// for the static BenchmarkRegisterer, which makes it unusable inside
849// a macro like DO24 below. MY_BENCHMARK_WITH_ARG uses the argument a
850// to make the identifiers distinct (only possible when 'a' is a simple
851// expression like 2, not like 1+1).
852#define MY_BENCHMARK_WITH_ARG(n, a) \
853 bool __benchmark_ ## n ## a = \
854 (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, NumCPUs());
855
856#define DO24(A, B) \
857 A(B, 1); A(B, 2); A(B, 3); A(B, 4); A(B, 5); A(B, 6); \
858 A(B, 7); A(B, 8); A(B, 9); A(B, 10); A(B, 11); A(B, 12); \
859 A(B, 13); A(B, 14); A(B, 15); A(B, 16); A(B, 17); A(B, 18); \
860 A(B, 19); A(B, 20); A(B, 21); A(B, 22); A(B, 23); A(B, 24);
861
862DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE)
863DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA)
864DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2)
865DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA)
866
867#undef DO24
868#undef MY_BENCHMARK_WITH_ARG
869*/
870
871////////////////////////////////////////////////////////////////////////
872//
873// Implementation routines. Sad that there are so many,
874// but all the interfaces are slightly different.
875
876// Runs implementation to search for regexp in text, iters times.
877// Expect_match says whether the regexp should be found.
878// Anchored says whether to run an anchored search.
879
880void SearchDFA(benchmark::State& state, const char* regexp,
881 const StringPiece& text, Prog::Anchor anchor,
882 bool expect_match) {
883 for (auto _ : state) {
884 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
885 CHECK(re);
886 Prog* prog = re->CompileToProg(0);
887 CHECK(prog);
888 bool failed = false;
889 CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
890 NULL, &failed, NULL),
891 expect_match);
892 CHECK(!failed);
893 delete prog;
894 re->Decref();
895 }
896}
897
898void SearchNFA(benchmark::State& state, const char* regexp,
899 const StringPiece& text, Prog::Anchor anchor,
900 bool expect_match) {
901 for (auto _ : state) {
902 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
903 CHECK(re);
904 Prog* prog = re->CompileToProg(0);
905 CHECK(prog);
906 CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
907 NULL, 0),
908 expect_match);
909 delete prog;
910 re->Decref();
911 }
912}
913
914void SearchOnePass(benchmark::State& state, const char* regexp,
915 const StringPiece& text, Prog::Anchor anchor,
916 bool expect_match) {
917 for (auto _ : state) {
918 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
919 CHECK(re);
920 Prog* prog = re->CompileToProg(0);
921 CHECK(prog);
922 CHECK(prog->IsOnePass());
923 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
924 expect_match);
925 delete prog;
926 re->Decref();
927 }
928}
929
930void SearchBitState(benchmark::State& state, const char* regexp,
931 const StringPiece& text, Prog::Anchor anchor,
932 bool expect_match) {
933 for (auto _ : state) {
934 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
935 CHECK(re);
936 Prog* prog = re->CompileToProg(0);
937 CHECK(prog);
938 CHECK(prog->CanBitState());
939 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
940 expect_match);
941 delete prog;
942 re->Decref();
943 }
944}
945
946void SearchPCRE(benchmark::State& state, const char* regexp,
947 const StringPiece& text, Prog::Anchor anchor,
948 bool expect_match) {
949 for (auto _ : state) {
950 PCRE re(regexp, PCRE::UTF8);
951 CHECK_EQ(re.error(), "");
952 if (anchor == Prog::kAnchored)
953 CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
954 else
955 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
956 }
957}
958
959void SearchRE2(benchmark::State& state, const char* regexp,
960 const StringPiece& text, Prog::Anchor anchor,
961 bool expect_match) {
962 for (auto _ : state) {
963 RE2 re(regexp);
964 CHECK_EQ(re.error(), "");
965 if (anchor == Prog::kAnchored)
966 CHECK_EQ(RE2::FullMatch(text, re), expect_match);
967 else
968 CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
969 }
970}
971
972// SearchCachedXXX is like SearchXXX but only does the
973// regexp parsing and compiling once. This lets us measure
974// search time without the per-regexp overhead.
975
976void SearchCachedDFA(benchmark::State& state, const char* regexp,
977 const StringPiece& text, Prog::Anchor anchor,
978 bool expect_match) {
979 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
980 CHECK(re);
981 Prog* prog = re->CompileToProg(1LL<<31);
982 CHECK(prog);
983 for (auto _ : state) {
984 bool failed = false;
985 CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
986 NULL, &failed, NULL),
987 expect_match);
988 CHECK(!failed);
989 }
990 delete prog;
991 re->Decref();
992}
993
994void SearchCachedNFA(benchmark::State& state, const char* regexp,
995 const StringPiece& text, Prog::Anchor anchor,
996 bool expect_match) {
997 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
998 CHECK(re);
999 Prog* prog = re->CompileToProg(0);
1000 CHECK(prog);
1001 for (auto _ : state) {
1002 CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
1003 NULL, 0),
1004 expect_match);
1005 }
1006 delete prog;
1007 re->Decref();
1008}
1009
1010void SearchCachedOnePass(benchmark::State& state, const char* regexp,
1011 const StringPiece& text, Prog::Anchor anchor,
1012 bool expect_match) {
1013 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1014 CHECK(re);
1015 Prog* prog = re->CompileToProg(0);
1016 CHECK(prog);
1017 CHECK(prog->IsOnePass());
1018 for (auto _ : state) {
1019 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
1020 expect_match);
1021 }
1022 delete prog;
1023 re->Decref();
1024}
1025
1026void SearchCachedBitState(benchmark::State& state, const char* regexp,
1027 const StringPiece& text, Prog::Anchor anchor,
1028 bool expect_match) {
1029 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1030 CHECK(re);
1031 Prog* prog = re->CompileToProg(0);
1032 CHECK(prog);
1033 CHECK(prog->CanBitState());
1034 for (auto _ : state) {
1035 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
1036 expect_match);
1037 }
1038 delete prog;
1039 re->Decref();
1040}
1041
1042void SearchCachedPCRE(benchmark::State& state, const char* regexp,
1043 const StringPiece& text, Prog::Anchor anchor,
1044 bool expect_match) {
1045 PCRE re(regexp, PCRE::UTF8);
1046 CHECK_EQ(re.error(), "");
1047 for (auto _ : state) {
1048 if (anchor == Prog::kAnchored)
1049 CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
1050 else
1051 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
1052 }
1053}
1054
1055void SearchCachedRE2(benchmark::State& state, const char* regexp,
1056 const StringPiece& text, Prog::Anchor anchor,
1057 bool expect_match) {
1058 RE2 re(regexp);
1059 CHECK_EQ(re.error(), "");
1060 for (auto _ : state) {
1061 if (anchor == Prog::kAnchored)
1062 CHECK_EQ(RE2::FullMatch(text, re), expect_match);
1063 else
1064 CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
1065 }
1066}
1067
1068// Runs implementation to full match regexp against text,
1069// extracting three submatches. Expects match always.
1070
1071void Parse3NFA(benchmark::State& state, const char* regexp,
1072 const StringPiece& text) {
1073 for (auto _ : state) {
1074 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1075 CHECK(re);
1076 Prog* prog = re->CompileToProg(0);
1077 CHECK(prog);
1078 StringPiece sp[4]; // 4 because sp[0] is whole match.
1079 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1080 Prog::kFullMatch, sp, 4));
1081 delete prog;
1082 re->Decref();
1083 }
1084}
1085
1086void Parse3OnePass(benchmark::State& state, const char* regexp,
1087 const StringPiece& text) {
1088 for (auto _ : state) {
1089 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1090 CHECK(re);
1091 Prog* prog = re->CompileToProg(0);
1092 CHECK(prog);
1093 CHECK(prog->IsOnePass());
1094 StringPiece sp[4]; // 4 because sp[0] is whole match.
1095 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1096 delete prog;
1097 re->Decref();
1098 }
1099}
1100
1101void Parse3BitState(benchmark::State& state, const char* regexp,
1102 const StringPiece& text) {
1103 for (auto _ : state) {
1104 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1105 CHECK(re);
1106 Prog* prog = re->CompileToProg(0);
1107 CHECK(prog);
1108 CHECK(prog->CanBitState());
1109 StringPiece sp[4]; // 4 because sp[0] is whole match.
1110 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1111 delete prog;
1112 re->Decref();
1113 }
1114}
1115
1116void Parse3Backtrack(benchmark::State& state, const char* regexp,
1117 const StringPiece& text) {
1118 for (auto _ : state) {
1119 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1120 CHECK(re);
1121 Prog* prog = re->CompileToProg(0);
1122 CHECK(prog);
1123 StringPiece sp[4]; // 4 because sp[0] is whole match.
1124 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1125 delete prog;
1126 re->Decref();
1127 }
1128}
1129
1130void Parse3PCRE(benchmark::State& state, const char* regexp,
1131 const StringPiece& text) {
1132 for (auto _ : state) {
1133 PCRE re(regexp, PCRE::UTF8);
1134 CHECK_EQ(re.error(), "");
1135 StringPiece sp1, sp2, sp3;
1136 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1137 }
1138}
1139
1140void Parse3RE2(benchmark::State& state, const char* regexp,
1141 const StringPiece& text) {
1142 for (auto _ : state) {
1143 RE2 re(regexp);
1144 CHECK_EQ(re.error(), "");
1145 StringPiece sp1, sp2, sp3;
1146 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1147 }
1148}
1149
1150void Parse3CachedNFA(benchmark::State& state, const char* regexp,
1151 const StringPiece& text) {
1152 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1153 CHECK(re);
1154 Prog* prog = re->CompileToProg(0);
1155 CHECK(prog);
1156 StringPiece sp[4]; // 4 because sp[0] is whole match.
1157 for (auto _ : state) {
1158 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1159 Prog::kFullMatch, sp, 4));
1160 }
1161 delete prog;
1162 re->Decref();
1163}
1164
1165void Parse3CachedOnePass(benchmark::State& state, const char* regexp,
1166 const StringPiece& text) {
1167 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1168 CHECK(re);
1169 Prog* prog = re->CompileToProg(0);
1170 CHECK(prog);
1171 CHECK(prog->IsOnePass());
1172 StringPiece sp[4]; // 4 because sp[0] is whole match.
1173 for (auto _ : state) {
1174 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1175 }
1176 delete prog;
1177 re->Decref();
1178}
1179
1180void Parse3CachedBitState(benchmark::State& state, const char* regexp,
1181 const StringPiece& text) {
1182 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1183 CHECK(re);
1184 Prog* prog = re->CompileToProg(0);
1185 CHECK(prog);
1186 CHECK(prog->CanBitState());
1187 StringPiece sp[4]; // 4 because sp[0] is whole match.
1188 for (auto _ : state) {
1189 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1190 }
1191 delete prog;
1192 re->Decref();
1193}
1194
1195void Parse3CachedBacktrack(benchmark::State& state, const char* regexp,
1196 const StringPiece& text) {
1197 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1198 CHECK(re);
1199 Prog* prog = re->CompileToProg(0);
1200 CHECK(prog);
1201 StringPiece sp[4]; // 4 because sp[0] is whole match.
1202 for (auto _ : state) {
1203 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1204 }
1205 delete prog;
1206 re->Decref();
1207}
1208
1209void Parse3CachedPCRE(benchmark::State& state, const char* regexp,
1210 const StringPiece& text) {
1211 PCRE re(regexp, PCRE::UTF8);
1212 CHECK_EQ(re.error(), "");
1213 StringPiece sp1, sp2, sp3;
1214 for (auto _ : state) {
1215 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1216 }
1217}
1218
1219void Parse3CachedRE2(benchmark::State& state, const char* regexp,
1220 const StringPiece& text) {
1221 RE2 re(regexp);
1222 CHECK_EQ(re.error(), "");
1223 StringPiece sp1, sp2, sp3;
1224 for (auto _ : state) {
1225 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1226 }
1227}
1228
1229// Runs implementation to full match regexp against text,
1230// extracting three submatches. Expects match always.
1231
1232void Parse1NFA(benchmark::State& state, const char* regexp,
1233 const StringPiece& text) {
1234 for (auto _ : state) {
1235 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1236 CHECK(re);
1237 Prog* prog = re->CompileToProg(0);
1238 CHECK(prog);
1239 StringPiece sp[2]; // 2 because sp[0] is whole match.
1240 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1241 Prog::kFullMatch, sp, 2));
1242 delete prog;
1243 re->Decref();
1244 }
1245}
1246
1247void Parse1OnePass(benchmark::State& state, const char* regexp,
1248 const StringPiece& text) {
1249 for (auto _ : state) {
1250 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1251 CHECK(re);
1252 Prog* prog = re->CompileToProg(0);
1253 CHECK(prog);
1254 CHECK(prog->IsOnePass());
1255 StringPiece sp[2]; // 2 because sp[0] is whole match.
1256 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1257 delete prog;
1258 re->Decref();
1259 }
1260}
1261
1262void Parse1BitState(benchmark::State& state, const char* regexp,
1263 const StringPiece& text) {
1264 for (auto _ : state) {
1265 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1266 CHECK(re);
1267 Prog* prog = re->CompileToProg(0);
1268 CHECK(prog);
1269 CHECK(prog->CanBitState());
1270 StringPiece sp[2]; // 2 because sp[0] is whole match.
1271 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1272 delete prog;
1273 re->Decref();
1274 }
1275}
1276
1277void Parse1PCRE(benchmark::State& state, const char* regexp,
1278 const StringPiece& text) {
1279 for (auto _ : state) {
1280 PCRE re(regexp, PCRE::UTF8);
1281 CHECK_EQ(re.error(), "");
1282 StringPiece sp1;
1283 CHECK(PCRE::FullMatch(text, re, &sp1));
1284 }
1285}
1286
1287void Parse1RE2(benchmark::State& state, const char* regexp,
1288 const StringPiece& text) {
1289 for (auto _ : state) {
1290 RE2 re(regexp);
1291 CHECK_EQ(re.error(), "");
1292 StringPiece sp1;
1293 CHECK(RE2::FullMatch(text, re, &sp1));
1294 }
1295}
1296
1297void Parse1CachedNFA(benchmark::State& state, const char* regexp,
1298 const StringPiece& text) {
1299 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1300 CHECK(re);
1301 Prog* prog = re->CompileToProg(0);
1302 CHECK(prog);
1303 StringPiece sp[2]; // 2 because sp[0] is whole match.
1304 for (auto _ : state) {
1305 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1306 Prog::kFullMatch, sp, 2));
1307 }
1308 delete prog;
1309 re->Decref();
1310}
1311
1312void Parse1CachedOnePass(benchmark::State& state, const char* regexp,
1313 const StringPiece& text) {
1314 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1315 CHECK(re);
1316 Prog* prog = re->CompileToProg(0);
1317 CHECK(prog);
1318 CHECK(prog->IsOnePass());
1319 StringPiece sp[2]; // 2 because sp[0] is whole match.
1320 for (auto _ : state) {
1321 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1322 }
1323 delete prog;
1324 re->Decref();
1325}
1326
1327void Parse1CachedBitState(benchmark::State& state, const char* regexp,
1328 const StringPiece& text) {
1329 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1330 CHECK(re);
1331 Prog* prog = re->CompileToProg(0);
1332 CHECK(prog);
1333 CHECK(prog->CanBitState());
1334 StringPiece sp[2]; // 2 because sp[0] is whole match.
1335 for (auto _ : state) {
1336 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1337 }
1338 delete prog;
1339 re->Decref();
1340}
1341
1342void Parse1CachedBacktrack(benchmark::State& state, const char* regexp,
1343 const StringPiece& text) {
1344 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1345 CHECK(re);
1346 Prog* prog = re->CompileToProg(0);
1347 CHECK(prog);
1348 StringPiece sp[2]; // 2 because sp[0] is whole match.
1349 for (auto _ : state) {
1350 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1351 }
1352 delete prog;
1353 re->Decref();
1354}
1355
1356void Parse1CachedPCRE(benchmark::State& state, const char* regexp,
1357 const StringPiece& text) {
1358 PCRE re(regexp, PCRE::UTF8);
1359 CHECK_EQ(re.error(), "");
1360 StringPiece sp1;
1361 for (auto _ : state) {
1362 CHECK(PCRE::FullMatch(text, re, &sp1));
1363 }
1364}
1365
1366void Parse1CachedRE2(benchmark::State& state, const char* regexp,
1367 const StringPiece& text) {
1368 RE2 re(regexp);
1369 CHECK_EQ(re.error(), "");
1370 StringPiece sp1;
1371 for (auto _ : state) {
1372 CHECK(RE2::FullMatch(text, re, &sp1));
1373 }
1374}
1375
1376void SearchParse2CachedPCRE(benchmark::State& state, const char* regexp,
1377 const StringPiece& text) {
1378 PCRE re(regexp, PCRE::UTF8);
1379 CHECK_EQ(re.error(), "");
1380 for (auto _ : state) {
1381 StringPiece sp1, sp2;
1382 CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
1383 }
1384}
1385
1386void SearchParse2CachedRE2(benchmark::State& state, const char* regexp,
1387 const StringPiece& text) {
1388 RE2 re(regexp);
1389 CHECK_EQ(re.error(), "");
1390 for (auto _ : state) {
1391 StringPiece sp1, sp2;
1392 CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
1393 }
1394}
1395
1396void SearchParse1CachedPCRE(benchmark::State& state, const char* regexp,
1397 const StringPiece& text) {
1398 PCRE re(regexp, PCRE::UTF8);
1399 CHECK_EQ(re.error(), "");
1400 for (auto _ : state) {
1401 StringPiece sp1;
1402 CHECK(PCRE::PartialMatch(text, re, &sp1));
1403 }
1404}
1405
1406void SearchParse1CachedRE2(benchmark::State& state, const char* regexp,
1407 const StringPiece& text) {
1408 RE2 re(regexp);
1409 CHECK_EQ(re.error(), "");
1410 for (auto _ : state) {
1411 StringPiece sp1;
1412 CHECK(RE2::PartialMatch(text, re, &sp1));
1413 }
1414}
1415
1416void EmptyPartialMatchPCRE(benchmark::State& state) {
1417 PCRE re("");
1418 for (auto _ : state) {
1419 PCRE::PartialMatch("", re);
1420 }
1421}
1422
1423void EmptyPartialMatchRE2(benchmark::State& state) {
1424 RE2 re("");
1425 for (auto _ : state) {
1426 RE2::PartialMatch("", re);
1427 }
1428}
1429#ifdef USEPCRE
1430BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1431#endif
1432BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs());
1433
1434void SimplePartialMatchPCRE(benchmark::State& state) {
1435 PCRE re("abcdefg");
1436 for (auto _ : state) {
1437 PCRE::PartialMatch("abcdefg", re);
1438 }
1439}
1440
1441void SimplePartialMatchRE2(benchmark::State& state) {
1442 RE2 re("abcdefg");
1443 for (auto _ : state) {
1444 RE2::PartialMatch("abcdefg", re);
1445 }
1446}
1447#ifdef USEPCRE
1448BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs());
1449#endif
1450BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs());
1451
1452static std::string http_text =
1453 "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf"
1454 "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1";
1455
1456void HTTPPartialMatchPCRE(benchmark::State& state) {
1457 StringPiece a;
1458 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1459 for (auto _ : state) {
1460 PCRE::PartialMatch(http_text, re, &a);
1461 }
1462}
1463
1464void HTTPPartialMatchRE2(benchmark::State& state) {
1465 StringPiece a;
1466 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1467 for (auto _ : state) {
1468 RE2::PartialMatch(http_text, re, &a);
1469 }
1470}
1471
1472#ifdef USEPCRE
1473BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1474#endif
1475BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1476
1477static std::string smallhttp_text =
1478 "GET /abc HTTP/1.1";
1479
1480void SmallHTTPPartialMatchPCRE(benchmark::State& state) {
1481 StringPiece a;
1482 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1483 for (auto _ : state) {
1484 PCRE::PartialMatch(smallhttp_text, re, &a);
1485 }
1486}
1487
1488void SmallHTTPPartialMatchRE2(benchmark::State& state) {
1489 StringPiece a;
1490 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1491 for (auto _ : state) {
1492 RE2::PartialMatch(smallhttp_text, re, &a);
1493 }
1494}
1495
1496#ifdef USEPCRE
1497BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1498#endif
1499BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1500
1501void DotMatchPCRE(benchmark::State& state) {
1502 StringPiece a;
1503 PCRE re("(?-s)^(.+)");
1504 for (auto _ : state) {
1505 PCRE::PartialMatch(http_text, re, &a);
1506 }
1507}
1508
1509void DotMatchRE2(benchmark::State& state) {
1510 StringPiece a;
1511 RE2 re("(?-s)^(.+)");
1512 for (auto _ : state) {
1513 RE2::PartialMatch(http_text, re, &a);
1514 }
1515}
1516
1517#ifdef USEPCRE
1518BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs());
1519#endif
1520BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs());
1521
1522void ASCIIMatchPCRE(benchmark::State& state) {
1523 StringPiece a;
1524 PCRE re("(?-s)^([ -~]+)");
1525 for (auto _ : state) {
1526 PCRE::PartialMatch(http_text, re, &a);
1527 }
1528}
1529
1530void ASCIIMatchRE2(benchmark::State& state) {
1531 StringPiece a;
1532 RE2 re("(?-s)^([ -~]+)");
1533 for (auto _ : state) {
1534 RE2::PartialMatch(http_text, re, &a);
1535 }
1536}
1537
1538#ifdef USEPCRE
1539BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs());
1540#endif
1541BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs());
1542
1543void FullMatchPCRE(benchmark::State& state, const char *regexp) {
1544 std::string s;
1545 MakeText(&s, state.range(0));
1546 s += "ABCDEFGHIJ";
1547 PCRE re(regexp);
1548 for (auto _ : state) {
1549 CHECK(PCRE::FullMatch(s, re));
1550 }
1551 state.SetBytesProcessed(state.iterations() * state.range(0));
1552}
1553
1554void FullMatchRE2(benchmark::State& state, const char *regexp) {
1555 std::string s;
1556 MakeText(&s, state.range(0));
1557 s += "ABCDEFGHIJ";
1558 RE2 re(regexp, RE2::Latin1);
1559 for (auto _ : state) {
1560 CHECK(RE2::FullMatch(s, re));
1561 }
1562 state.SetBytesProcessed(state.iterations() * state.range(0));
1563}
1564
1565void FullMatch_DotStar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*"); }
1566void FullMatch_DotStar_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s).*"); }
1567
1568void FullMatch_DotStarDollar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*$"); }
1569void FullMatch_DotStarDollar_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s).*$"); }
1570
1571void FullMatch_DotStarCapture_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s)((.*)()()($))"); }
1572void FullMatch_DotStarCapture_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s)((.*)()()($))"); }
1573
1574#ifdef USEPCRE
1575BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20);
1576#endif
1577BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2, 8, 2<<20);
1578
1579#ifdef USEPCRE
1580BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20);
1581#endif
1582BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2, 8, 2<<20);
1583
1584#ifdef USEPCRE
1585BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20);
1586#endif
1587BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2, 8, 2<<20);
1588
1589void PossibleMatchRangeCommon(benchmark::State& state, const char* regexp) {
1590 RE2 re(regexp);
1591 std::string min;
1592 std::string max;
1593 const int kMaxLen = 16;
1594 for (auto _ : state) {
1595 CHECK(re.PossibleMatchRange(&min, &max, kMaxLen));
1596 }
1597}
1598
1599void PossibleMatchRange_Trivial(benchmark::State& state) {
1600 PossibleMatchRangeCommon(state, ".*");
1601}
1602void PossibleMatchRange_Complex(benchmark::State& state) {
1603 PossibleMatchRangeCommon(state, "^abc[def]?[gh]{1,2}.*");
1604}
1605void PossibleMatchRange_Prefix(benchmark::State& state) {
1606 PossibleMatchRangeCommon(state, "^some_random_prefix.*");
1607}
1608void PossibleMatchRange_NoProg(benchmark::State& state) {
1609 PossibleMatchRangeCommon(state, "^some_random_string$");
1610}
1611
1612BENCHMARK(PossibleMatchRange_Trivial);
1613BENCHMARK(PossibleMatchRange_Complex);
1614BENCHMARK(PossibleMatchRange_Prefix);
1615BENCHMARK(PossibleMatchRange_NoProg);
1616
1617} // namespace re2
1618