1 | // Copyright 2006-2008 The RE2 Authors. All Rights Reserved. |
2 | // Use of this source code is governed by a BSD-style |
3 | // license that can be found in the LICENSE file. |
4 | |
5 | // Benchmarks for regular expression implementations. |
6 | |
7 | #include <stdint.h> |
8 | #include <stdio.h> |
9 | #include <stdlib.h> |
10 | #include <string> |
11 | #include <thread> |
12 | #include <utility> |
13 | |
14 | #include "util/benchmark.h" |
15 | #include "util/test.h" |
16 | #include "util/flags.h" |
17 | #include "util/logging.h" |
18 | #include "util/malloc_counter.h" |
19 | #include "util/strutil.h" |
20 | #include "re2/prog.h" |
21 | #include "re2/re2.h" |
22 | #include "re2/regexp.h" |
23 | #include "util/pcre.h" |
24 | |
25 | namespace re2 { |
26 | void Test(); |
27 | void MemoryUsage(); |
28 | } // namespace re2 |
29 | |
30 | typedef testing::MallocCounter MallocCounter; |
31 | |
32 | namespace re2 { |
33 | |
34 | void Test() { |
35 | Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)" , Regexp::LikePerl, NULL); |
36 | CHECK(re); |
37 | Prog* prog = re->CompileToProg(0); |
38 | CHECK(prog); |
39 | CHECK(prog->IsOnePass()); |
40 | CHECK(prog->CanBitState()); |
41 | const char* text = "650-253-0001" ; |
42 | StringPiece sp[4]; |
43 | CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
44 | CHECK_EQ(sp[0], "650-253-0001" ); |
45 | CHECK_EQ(sp[1], "650" ); |
46 | CHECK_EQ(sp[2], "253" ); |
47 | CHECK_EQ(sp[3], "0001" ); |
48 | delete prog; |
49 | re->Decref(); |
50 | LOG(INFO) << "test passed\n" ; |
51 | } |
52 | |
53 | void MemoryUsage() { |
54 | const char* regexp = "(\\d+)-(\\d+)-(\\d+)" ; |
55 | const char* text = "650-253-0001" ; |
56 | { |
57 | MallocCounter mc(MallocCounter::THIS_THREAD_ONLY); |
58 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
59 | CHECK(re); |
60 | // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly, |
61 | // because LOG(INFO) might do a big allocation before they get evaluated. |
62 | fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n" , |
63 | mc.HeapGrowth(), mc.PeakHeapGrowth()); |
64 | mc.Reset(); |
65 | |
66 | Prog* prog = re->CompileToProg(0); |
67 | CHECK(prog); |
68 | CHECK(prog->IsOnePass()); |
69 | CHECK(prog->CanBitState()); |
70 | fprintf(stderr, "Prog: %7lld bytes (peak=%lld)\n" , |
71 | mc.HeapGrowth(), mc.PeakHeapGrowth()); |
72 | mc.Reset(); |
73 | |
74 | StringPiece sp[4]; |
75 | CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
76 | fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n" , |
77 | mc.HeapGrowth(), mc.PeakHeapGrowth()); |
78 | delete prog; |
79 | re->Decref(); |
80 | } |
81 | |
82 | { |
83 | MallocCounter mc(MallocCounter::THIS_THREAD_ONLY); |
84 | |
85 | PCRE re(regexp, PCRE::UTF8); |
86 | fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n" , |
87 | mc.HeapGrowth(), mc.PeakHeapGrowth()); |
88 | PCRE::FullMatch(text, re); |
89 | fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n" , |
90 | mc.HeapGrowth(), mc.PeakHeapGrowth()); |
91 | } |
92 | |
93 | { |
94 | MallocCounter mc(MallocCounter::THIS_THREAD_ONLY); |
95 | |
96 | PCRE* re = new PCRE(regexp, PCRE::UTF8); |
97 | fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n" , |
98 | mc.HeapGrowth(), mc.PeakHeapGrowth()); |
99 | PCRE::FullMatch(text, *re); |
100 | fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n" , |
101 | mc.HeapGrowth(), mc.PeakHeapGrowth()); |
102 | delete re; |
103 | } |
104 | |
105 | { |
106 | MallocCounter mc(MallocCounter::THIS_THREAD_ONLY); |
107 | |
108 | RE2 re(regexp); |
109 | fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n" , |
110 | mc.HeapGrowth(), mc.PeakHeapGrowth()); |
111 | RE2::FullMatch(text, re); |
112 | fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n" , |
113 | mc.HeapGrowth(), mc.PeakHeapGrowth()); |
114 | } |
115 | |
116 | fprintf(stderr, "sizeof: PCRE=%zd RE2=%zd Prog=%zd Inst=%zd\n" , |
117 | sizeof(PCRE), sizeof(RE2), sizeof(Prog), sizeof(Prog::Inst)); |
118 | } |
119 | |
120 | int NumCPUs() { |
121 | return static_cast<int>(std::thread::hardware_concurrency()); |
122 | } |
123 | |
124 | // Regular expression implementation wrappers. |
125 | // Defined at bottom of file, but they are repetitive |
126 | // and not interesting. |
127 | |
128 | typedef void SearchImpl(benchmark::State& state, const char* regexp, |
129 | const StringPiece& text, Prog::Anchor anchor, |
130 | bool expect_match); |
131 | |
132 | SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState, SearchPCRE, |
133 | SearchRE2, SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass, |
134 | SearchCachedBitState, SearchCachedPCRE, SearchCachedRE2; |
135 | |
136 | typedef void ParseImpl(benchmark::State& state, const char* regexp, |
137 | const StringPiece& text); |
138 | |
139 | ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState, Parse1PCRE, Parse1RE2, |
140 | Parse1Backtrack, Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState, |
141 | Parse1CachedPCRE, Parse1CachedRE2, Parse1CachedBacktrack; |
142 | |
143 | ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState, Parse3PCRE, Parse3RE2, |
144 | Parse3Backtrack, Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState, |
145 | Parse3CachedPCRE, Parse3CachedRE2, Parse3CachedBacktrack; |
146 | |
147 | ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2; |
148 | |
149 | ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2; |
150 | |
151 | // Benchmark: failed search for regexp in random text. |
152 | |
153 | // Generate random text that won't contain the search string, |
154 | // to test worst-case search behavior. |
155 | void MakeText(std::string* text, int64_t nbytes) { |
156 | srand(1); |
157 | text->resize(nbytes); |
158 | for (int64_t i = 0; i < nbytes; i++) { |
159 | // Generate a one-byte rune that isn't a control character (e.g. '\n'). |
160 | // Clipping to 0x20 introduces some bias, but we don't need uniformity. |
161 | int byte = rand() & 0x7F; |
162 | if (byte < 0x20) |
163 | byte = 0x20; |
164 | (*text)[i] = byte; |
165 | } |
166 | } |
167 | |
168 | // Makes text of size nbytes, then calls run to search |
169 | // the text for regexp iters times. |
170 | void Search(benchmark::State& state, const char* regexp, SearchImpl* search) { |
171 | std::string s; |
172 | MakeText(&s, state.range(0)); |
173 | search(state, regexp, s, Prog::kUnanchored, false); |
174 | state.SetBytesProcessed(state.iterations() * state.range(0)); |
175 | } |
176 | |
177 | // These two are easy because they start with an A, |
178 | // giving the search loop something to memchr for. |
179 | #define EASY0 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$" |
180 | #define EASY1 "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$" |
181 | |
182 | // This is a little harder, since it starts with a character class |
183 | // and thus can't be memchr'ed. Could look for ABC and work backward, |
184 | // but no one does that. |
185 | #define MEDIUM "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$" |
186 | |
187 | // This is a fair amount harder, because of the leading [ -~]*. |
188 | // A bad backtracking implementation will take O(text^2) time to |
189 | // figure out there's no match. |
190 | #define HARD "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$" |
191 | |
192 | // This has quite a high degree of fanout. |
193 | // NFA execution will be particularly slow. |
194 | #define FANOUT "(?:[\\x{80}-\\x{10FFFF}]?){100}[\\x{80}-\\x{10FFFF}]" |
195 | |
196 | // This stresses engines that are trying to track parentheses. |
197 | #define PARENS "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \ |
198 | "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$" |
199 | |
200 | void Search_Easy0_CachedDFA(benchmark::State& state) { Search(state, EASY0, SearchCachedDFA); } |
201 | void Search_Easy0_CachedNFA(benchmark::State& state) { Search(state, EASY0, SearchCachedNFA); } |
202 | void Search_Easy0_CachedPCRE(benchmark::State& state) { Search(state, EASY0, SearchCachedPCRE); } |
203 | void Search_Easy0_CachedRE2(benchmark::State& state) { Search(state, EASY0, SearchCachedRE2); } |
204 | |
205 | BENCHMARK_RANGE(Search_Easy0_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
206 | BENCHMARK_RANGE(Search_Easy0_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs()); |
207 | #ifdef USEPCRE |
208 | BENCHMARK_RANGE(Search_Easy0_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
209 | #endif |
210 | BENCHMARK_RANGE(Search_Easy0_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
211 | |
212 | void Search_Easy1_CachedDFA(benchmark::State& state) { Search(state, EASY1, SearchCachedDFA); } |
213 | void Search_Easy1_CachedNFA(benchmark::State& state) { Search(state, EASY1, SearchCachedNFA); } |
214 | void Search_Easy1_CachedPCRE(benchmark::State& state) { Search(state, EASY1, SearchCachedPCRE); } |
215 | void Search_Easy1_CachedRE2(benchmark::State& state) { Search(state, EASY1, SearchCachedRE2); } |
216 | |
217 | BENCHMARK_RANGE(Search_Easy1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
218 | BENCHMARK_RANGE(Search_Easy1_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs()); |
219 | #ifdef USEPCRE |
220 | BENCHMARK_RANGE(Search_Easy1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
221 | #endif |
222 | BENCHMARK_RANGE(Search_Easy1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
223 | |
224 | void Search_Medium_CachedDFA(benchmark::State& state) { Search(state, MEDIUM, SearchCachedDFA); } |
225 | void Search_Medium_CachedNFA(benchmark::State& state) { Search(state, MEDIUM, SearchCachedNFA); } |
226 | void Search_Medium_CachedPCRE(benchmark::State& state) { Search(state, MEDIUM, SearchCachedPCRE); } |
227 | void Search_Medium_CachedRE2(benchmark::State& state) { Search(state, MEDIUM, SearchCachedRE2); } |
228 | |
229 | BENCHMARK_RANGE(Search_Medium_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
230 | BENCHMARK_RANGE(Search_Medium_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs()); |
231 | #ifdef USEPCRE |
232 | BENCHMARK_RANGE(Search_Medium_CachedPCRE, 8, 256<<10)->ThreadRange(1, NumCPUs()); |
233 | #endif |
234 | BENCHMARK_RANGE(Search_Medium_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
235 | |
236 | void Search_Hard_CachedDFA(benchmark::State& state) { Search(state, HARD, SearchCachedDFA); } |
237 | void Search_Hard_CachedNFA(benchmark::State& state) { Search(state, HARD, SearchCachedNFA); } |
238 | void Search_Hard_CachedPCRE(benchmark::State& state) { Search(state, HARD, SearchCachedPCRE); } |
239 | void Search_Hard_CachedRE2(benchmark::State& state) { Search(state, HARD, SearchCachedRE2); } |
240 | |
241 | BENCHMARK_RANGE(Search_Hard_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
242 | BENCHMARK_RANGE(Search_Hard_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs()); |
243 | #ifdef USEPCRE |
244 | BENCHMARK_RANGE(Search_Hard_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs()); |
245 | #endif |
246 | BENCHMARK_RANGE(Search_Hard_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
247 | |
248 | void Search_Fanout_CachedDFA(benchmark::State& state) { Search(state, FANOUT, SearchCachedDFA); } |
249 | void Search_Fanout_CachedNFA(benchmark::State& state) { Search(state, FANOUT, SearchCachedNFA); } |
250 | void Search_Fanout_CachedPCRE(benchmark::State& state) { Search(state, FANOUT, SearchCachedPCRE); } |
251 | void Search_Fanout_CachedRE2(benchmark::State& state) { Search(state, FANOUT, SearchCachedRE2); } |
252 | |
253 | BENCHMARK_RANGE(Search_Fanout_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
254 | BENCHMARK_RANGE(Search_Fanout_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs()); |
255 | #ifdef USEPCRE |
256 | BENCHMARK_RANGE(Search_Fanout_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs()); |
257 | #endif |
258 | BENCHMARK_RANGE(Search_Fanout_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
259 | |
260 | void Search_Parens_CachedDFA(benchmark::State& state) { Search(state, PARENS, SearchCachedDFA); } |
261 | void Search_Parens_CachedNFA(benchmark::State& state) { Search(state, PARENS, SearchCachedNFA); } |
262 | void Search_Parens_CachedPCRE(benchmark::State& state) { Search(state, PARENS, SearchCachedPCRE); } |
263 | void Search_Parens_CachedRE2(benchmark::State& state) { Search(state, PARENS, SearchCachedRE2); } |
264 | |
265 | BENCHMARK_RANGE(Search_Parens_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
266 | BENCHMARK_RANGE(Search_Parens_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs()); |
267 | #ifdef USEPCRE |
268 | BENCHMARK_RANGE(Search_Parens_CachedPCRE, 8, 8)->ThreadRange(1, NumCPUs()); |
269 | #endif |
270 | BENCHMARK_RANGE(Search_Parens_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
271 | |
272 | void SearchBigFixed(benchmark::State& state, SearchImpl* search) { |
273 | std::string s; |
274 | s.append(state.range(0)/2, 'x'); |
275 | std::string regexp = "^" + s + ".*$" ; |
276 | std::string t; |
277 | MakeText(&t, state.range(0)/2); |
278 | s += t; |
279 | search(state, regexp.c_str(), s, Prog::kUnanchored, true); |
280 | state.SetBytesProcessed(state.iterations() * state.range(0)); |
281 | } |
282 | |
283 | void Search_BigFixed_CachedDFA(benchmark::State& state) { SearchBigFixed(state, SearchCachedDFA); } |
284 | void Search_BigFixed_CachedNFA(benchmark::State& state) { SearchBigFixed(state, SearchCachedNFA); } |
285 | void Search_BigFixed_CachedPCRE(benchmark::State& state) { SearchBigFixed(state, SearchCachedPCRE); } |
286 | void Search_BigFixed_CachedRE2(benchmark::State& state) { SearchBigFixed(state, SearchCachedRE2); } |
287 | |
288 | BENCHMARK_RANGE(Search_BigFixed_CachedDFA, 8, 1<<20)->ThreadRange(1, NumCPUs()); |
289 | BENCHMARK_RANGE(Search_BigFixed_CachedNFA, 8, 32<<10)->ThreadRange(1, NumCPUs()); |
290 | #ifdef USEPCRE |
291 | BENCHMARK_RANGE(Search_BigFixed_CachedPCRE, 8, 32<<10)->ThreadRange(1, NumCPUs()); |
292 | #endif |
293 | BENCHMARK_RANGE(Search_BigFixed_CachedRE2, 8, 1<<20)->ThreadRange(1, NumCPUs()); |
294 | |
295 | // Benchmark: FindAndConsume |
296 | |
297 | void FindAndConsume(benchmark::State& state) { |
298 | std::string s; |
299 | MakeText(&s, state.range(0)); |
300 | s.append("Hello World" ); |
301 | RE2 re("((Hello World))" ); |
302 | for (auto _ : state) { |
303 | StringPiece t = s; |
304 | StringPiece u; |
305 | CHECK(RE2::FindAndConsume(&t, re, &u)); |
306 | CHECK_EQ(u, "Hello World" ); |
307 | } |
308 | state.SetBytesProcessed(state.iterations() * state.range(0)); |
309 | } |
310 | |
311 | BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
312 | |
313 | // Benchmark: successful anchored search. |
314 | |
315 | void SearchSuccess(benchmark::State& state, const char* regexp, |
316 | SearchImpl* search) { |
317 | std::string s; |
318 | MakeText(&s, state.range(0)); |
319 | search(state, regexp, s, Prog::kAnchored, true); |
320 | state.SetBytesProcessed(state.iterations() * state.range(0)); |
321 | } |
322 | |
323 | // Unambiguous search (RE2 can use OnePass). |
324 | |
325 | void Search_Success_DFA(benchmark::State& state) { SearchSuccess(state, ".*$" , SearchDFA); } |
326 | void Search_Success_NFA(benchmark::State& state) { SearchSuccess(state, ".*$" , SearchNFA); } |
327 | void Search_Success_PCRE(benchmark::State& state) { SearchSuccess(state, ".*$" , SearchPCRE); } |
328 | void Search_Success_RE2(benchmark::State& state) { SearchSuccess(state, ".*$" , SearchRE2); } |
329 | void Search_Success_OnePass(benchmark::State& state) { SearchSuccess(state, ".*$" , SearchOnePass); } |
330 | |
331 | BENCHMARK_RANGE(Search_Success_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
332 | BENCHMARK_RANGE(Search_Success_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
333 | #ifdef USEPCRE |
334 | BENCHMARK_RANGE(Search_Success_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
335 | #endif |
336 | BENCHMARK_RANGE(Search_Success_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
337 | BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs()); |
338 | |
339 | void Search_Success_CachedDFA(benchmark::State& state) { SearchSuccess(state, ".*$" , SearchCachedDFA); } |
340 | void Search_Success_CachedNFA(benchmark::State& state) { SearchSuccess(state, ".*$" , SearchCachedNFA); } |
341 | void Search_Success_CachedPCRE(benchmark::State& state) { SearchSuccess(state, ".*$" , SearchCachedPCRE); } |
342 | void Search_Success_CachedRE2(benchmark::State& state) { SearchSuccess(state, ".*$" , SearchCachedRE2); } |
343 | void Search_Success_CachedOnePass(benchmark::State& state) { SearchSuccess(state, ".*$" , SearchCachedOnePass); } |
344 | |
345 | BENCHMARK_RANGE(Search_Success_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
346 | BENCHMARK_RANGE(Search_Success_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
347 | #ifdef USEPCRE |
348 | BENCHMARK_RANGE(Search_Success_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
349 | #endif |
350 | BENCHMARK_RANGE(Search_Success_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
351 | BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs()); |
352 | |
353 | // Ambiguous search (RE2 cannot use OnePass). |
354 | // Used to be ".*.$", but that is coalesced to ".+$" these days. |
355 | |
356 | void Search_Success1_DFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$" , SearchDFA); } |
357 | void Search_Success1_NFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$" , SearchNFA); } |
358 | void Search_Success1_PCRE(benchmark::State& state) { SearchSuccess(state, ".*\\C$" , SearchPCRE); } |
359 | void Search_Success1_RE2(benchmark::State& state) { SearchSuccess(state, ".*\\C$" , SearchRE2); } |
360 | void Search_Success1_BitState(benchmark::State& state) { SearchSuccess(state, ".*\\C$" , SearchBitState); } |
361 | |
362 | BENCHMARK_RANGE(Search_Success1_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
363 | BENCHMARK_RANGE(Search_Success1_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
364 | #ifdef USEPCRE |
365 | BENCHMARK_RANGE(Search_Success1_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
366 | #endif |
367 | BENCHMARK_RANGE(Search_Success1_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
368 | BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs()); |
369 | |
370 | void Search_Success1_CachedDFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$" , SearchCachedDFA); } |
371 | void Search_Success1_CachedNFA(benchmark::State& state) { SearchSuccess(state, ".*\\C$" , SearchCachedNFA); } |
372 | void Search_Success1_CachedPCRE(benchmark::State& state) { SearchSuccess(state, ".*\\C$" , SearchCachedPCRE); } |
373 | void Search_Success1_CachedRE2(benchmark::State& state) { SearchSuccess(state, ".*\\C$" , SearchCachedRE2); } |
374 | void Search_Success1_CachedBitState(benchmark::State& state) { SearchSuccess(state, ".*\\C$" , SearchCachedBitState); } |
375 | |
376 | BENCHMARK_RANGE(Search_Success1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
377 | BENCHMARK_RANGE(Search_Success1_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
378 | #ifdef USEPCRE |
379 | BENCHMARK_RANGE(Search_Success1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
380 | #endif |
381 | BENCHMARK_RANGE(Search_Success1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
382 | BENCHMARK_RANGE(Search_Success1_CachedBitState, 8, 2<<20)->ThreadRange(1, NumCPUs()); |
383 | |
384 | // Benchmark: AltMatch optimisation (just to verify that it works) |
385 | // Note that OnePass doesn't implement it! |
386 | |
387 | void SearchAltMatch(benchmark::State& state, SearchImpl* search) { |
388 | std::string s; |
389 | MakeText(&s, state.range(0)); |
390 | search(state, "\\C*" , s, Prog::kAnchored, true); |
391 | state.SetBytesProcessed(state.iterations() * state.range(0)); |
392 | } |
393 | |
394 | void Search_AltMatch_DFA(benchmark::State& state) { SearchAltMatch(state, SearchDFA); } |
395 | void Search_AltMatch_NFA(benchmark::State& state) { SearchAltMatch(state, SearchNFA); } |
396 | void Search_AltMatch_OnePass(benchmark::State& state) { SearchAltMatch(state, SearchOnePass); } |
397 | void Search_AltMatch_BitState(benchmark::State& state) { SearchAltMatch(state, SearchBitState); } |
398 | void Search_AltMatch_PCRE(benchmark::State& state) { SearchAltMatch(state, SearchPCRE); } |
399 | void Search_AltMatch_RE2(benchmark::State& state) { SearchAltMatch(state, SearchRE2); } |
400 | |
401 | BENCHMARK_RANGE(Search_AltMatch_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
402 | BENCHMARK_RANGE(Search_AltMatch_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
403 | BENCHMARK_RANGE(Search_AltMatch_OnePass, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
404 | BENCHMARK_RANGE(Search_AltMatch_BitState, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
405 | #ifdef USEPCRE |
406 | BENCHMARK_RANGE(Search_AltMatch_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
407 | #endif |
408 | BENCHMARK_RANGE(Search_AltMatch_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
409 | |
410 | void Search_AltMatch_CachedDFA(benchmark::State& state) { SearchAltMatch(state, SearchCachedDFA); } |
411 | void Search_AltMatch_CachedNFA(benchmark::State& state) { SearchAltMatch(state, SearchCachedNFA); } |
412 | void Search_AltMatch_CachedOnePass(benchmark::State& state) { SearchAltMatch(state, SearchCachedOnePass); } |
413 | void Search_AltMatch_CachedBitState(benchmark::State& state) { SearchAltMatch(state, SearchCachedBitState); } |
414 | void Search_AltMatch_CachedPCRE(benchmark::State& state) { SearchAltMatch(state, SearchCachedPCRE); } |
415 | void Search_AltMatch_CachedRE2(benchmark::State& state) { SearchAltMatch(state, SearchCachedRE2); } |
416 | |
417 | BENCHMARK_RANGE(Search_AltMatch_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
418 | BENCHMARK_RANGE(Search_AltMatch_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
419 | BENCHMARK_RANGE(Search_AltMatch_CachedOnePass, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
420 | BENCHMARK_RANGE(Search_AltMatch_CachedBitState, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
421 | #ifdef USEPCRE |
422 | BENCHMARK_RANGE(Search_AltMatch_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
423 | #endif |
424 | BENCHMARK_RANGE(Search_AltMatch_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
425 | |
426 | // Benchmark: use regexp to find phone number. |
427 | |
428 | void SearchDigits(benchmark::State& state, SearchImpl* search) { |
429 | StringPiece s("650-253-0001" ); |
430 | search(state, "([0-9]+)-([0-9]+)-([0-9]+)" , s, Prog::kAnchored, true); |
431 | state.SetItemsProcessed(state.iterations()); |
432 | } |
433 | |
434 | void Search_Digits_DFA(benchmark::State& state) { SearchDigits(state, SearchDFA); } |
435 | void Search_Digits_NFA(benchmark::State& state) { SearchDigits(state, SearchNFA); } |
436 | void Search_Digits_OnePass(benchmark::State& state) { SearchDigits(state, SearchOnePass); } |
437 | void Search_Digits_PCRE(benchmark::State& state) { SearchDigits(state, SearchPCRE); } |
438 | void Search_Digits_RE2(benchmark::State& state) { SearchDigits(state, SearchRE2); } |
439 | void Search_Digits_BitState(benchmark::State& state) { SearchDigits(state, SearchBitState); } |
440 | |
441 | BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs()); |
442 | BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs()); |
443 | BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs()); |
444 | #ifdef USEPCRE |
445 | BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs()); |
446 | #endif |
447 | BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs()); |
448 | BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs()); |
449 | |
450 | // Benchmark: use regexp to parse digit fields in phone number. |
451 | |
452 | void Parse3Digits(benchmark::State& state, |
453 | void (*parse3)(benchmark::State&, const char*, |
454 | const StringPiece&)) { |
455 | parse3(state, "([0-9]+)-([0-9]+)-([0-9]+)" , "650-253-0001" ); |
456 | state.SetItemsProcessed(state.iterations()); |
457 | } |
458 | |
459 | void Parse_Digits_NFA(benchmark::State& state) { Parse3Digits(state, Parse3NFA); } |
460 | void Parse_Digits_OnePass(benchmark::State& state) { Parse3Digits(state, Parse3OnePass); } |
461 | void Parse_Digits_PCRE(benchmark::State& state) { Parse3Digits(state, Parse3PCRE); } |
462 | void Parse_Digits_RE2(benchmark::State& state) { Parse3Digits(state, Parse3RE2); } |
463 | void Parse_Digits_Backtrack(benchmark::State& state) { Parse3Digits(state, Parse3Backtrack); } |
464 | void Parse_Digits_BitState(benchmark::State& state) { Parse3Digits(state, Parse3BitState); } |
465 | |
466 | BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs()); |
467 | BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs()); |
468 | #ifdef USEPCRE |
469 | BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs()); |
470 | #endif |
471 | BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs()); |
472 | BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs()); |
473 | BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs()); |
474 | |
475 | void Parse_CachedDigits_NFA(benchmark::State& state) { Parse3Digits(state, Parse3CachedNFA); } |
476 | void Parse_CachedDigits_OnePass(benchmark::State& state) { Parse3Digits(state, Parse3CachedOnePass); } |
477 | void Parse_CachedDigits_PCRE(benchmark::State& state) { Parse3Digits(state, Parse3CachedPCRE); } |
478 | void Parse_CachedDigits_RE2(benchmark::State& state) { Parse3Digits(state, Parse3CachedRE2); } |
479 | void Parse_CachedDigits_Backtrack(benchmark::State& state) { Parse3Digits(state, Parse3CachedBacktrack); } |
480 | void Parse_CachedDigits_BitState(benchmark::State& state) { Parse3Digits(state, Parse3CachedBitState); } |
481 | |
482 | BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs()); |
483 | BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs()); |
484 | #ifdef USEPCRE |
485 | BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs()); |
486 | #endif |
487 | BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs()); |
488 | BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs()); |
489 | BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs()); |
490 | |
491 | void Parse3DigitDs(benchmark::State& state, |
492 | void (*parse3)(benchmark::State&, const char*, |
493 | const StringPiece&)) { |
494 | parse3(state, "(\\d+)-(\\d+)-(\\d+)" , "650-253-0001" ); |
495 | state.SetItemsProcessed(state.iterations()); |
496 | } |
497 | |
498 | void Parse_DigitDs_NFA(benchmark::State& state) { Parse3DigitDs(state, Parse3NFA); } |
499 | void Parse_DigitDs_OnePass(benchmark::State& state) { Parse3DigitDs(state, Parse3OnePass); } |
500 | void Parse_DigitDs_PCRE(benchmark::State& state) { Parse3DigitDs(state, Parse3PCRE); } |
501 | void Parse_DigitDs_RE2(benchmark::State& state) { Parse3DigitDs(state, Parse3RE2); } |
502 | void Parse_DigitDs_Backtrack(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBacktrack); } |
503 | void Parse_DigitDs_BitState(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBitState); } |
504 | |
505 | BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs()); |
506 | BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs()); |
507 | #ifdef USEPCRE |
508 | BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs()); |
509 | #endif |
510 | BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs()); |
511 | BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs()); |
512 | BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs()); |
513 | |
514 | void Parse_CachedDigitDs_NFA(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedNFA); } |
515 | void Parse_CachedDigitDs_OnePass(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedOnePass); } |
516 | void Parse_CachedDigitDs_PCRE(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedPCRE); } |
517 | void Parse_CachedDigitDs_RE2(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedRE2); } |
518 | void Parse_CachedDigitDs_Backtrack(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBacktrack); } |
519 | void Parse_CachedDigitDs_BitState(benchmark::State& state) { Parse3DigitDs(state, Parse3CachedBitState); } |
520 | |
521 | BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs()); |
522 | BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs()); |
523 | #ifdef USEPCRE |
524 | BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs()); |
525 | #endif |
526 | BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs()); |
527 | BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs()); |
528 | BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs()); |
529 | |
530 | // Benchmark: splitting off leading number field. |
531 | |
532 | void Parse1Split(benchmark::State& state, |
533 | void (*parse1)(benchmark::State&, const char*, |
534 | const StringPiece&)) { |
535 | parse1(state, "[0-9]+-(.*)" , "650-253-0001" ); |
536 | state.SetItemsProcessed(state.iterations()); |
537 | } |
538 | |
539 | void Parse_Split_NFA(benchmark::State& state) { Parse1Split(state, Parse1NFA); } |
540 | void Parse_Split_OnePass(benchmark::State& state) { Parse1Split(state, Parse1OnePass); } |
541 | void Parse_Split_PCRE(benchmark::State& state) { Parse1Split(state, Parse1PCRE); } |
542 | void Parse_Split_RE2(benchmark::State& state) { Parse1Split(state, Parse1RE2); } |
543 | void Parse_Split_BitState(benchmark::State& state) { Parse1Split(state, Parse1BitState); } |
544 | |
545 | BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs()); |
546 | BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs()); |
547 | #ifdef USEPCRE |
548 | BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs()); |
549 | #endif |
550 | BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs()); |
551 | BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs()); |
552 | |
553 | void Parse_CachedSplit_NFA(benchmark::State& state) { Parse1Split(state, Parse1CachedNFA); } |
554 | void Parse_CachedSplit_OnePass(benchmark::State& state) { Parse1Split(state, Parse1CachedOnePass); } |
555 | void Parse_CachedSplit_PCRE(benchmark::State& state) { Parse1Split(state, Parse1CachedPCRE); } |
556 | void Parse_CachedSplit_RE2(benchmark::State& state) { Parse1Split(state, Parse1CachedRE2); } |
557 | void Parse_CachedSplit_BitState(benchmark::State& state) { Parse1Split(state, Parse1CachedBitState); } |
558 | |
559 | BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs()); |
560 | BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs()); |
561 | #ifdef USEPCRE |
562 | BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs()); |
563 | #endif |
564 | BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs()); |
565 | BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs()); |
566 | |
567 | // Benchmark: splitting off leading number field but harder (ambiguous regexp). |
568 | |
569 | void Parse1SplitHard(benchmark::State& state, |
570 | void (*run)(benchmark::State&, const char*, |
571 | const StringPiece&)) { |
572 | run(state, "[0-9]+.(.*)" , "650-253-0001" ); |
573 | state.SetItemsProcessed(state.iterations()); |
574 | } |
575 | |
576 | void Parse_SplitHard_NFA(benchmark::State& state) { Parse1SplitHard(state, Parse1NFA); } |
577 | void Parse_SplitHard_PCRE(benchmark::State& state) { Parse1SplitHard(state, Parse1PCRE); } |
578 | void Parse_SplitHard_RE2(benchmark::State& state) { Parse1SplitHard(state, Parse1RE2); } |
579 | void Parse_SplitHard_BitState(benchmark::State& state) { Parse1SplitHard(state, Parse1BitState); } |
580 | |
581 | #ifdef USEPCRE |
582 | BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs()); |
583 | #endif |
584 | BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs()); |
585 | BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs()); |
586 | BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs()); |
587 | |
588 | void Parse_CachedSplitHard_NFA(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedNFA); } |
589 | void Parse_CachedSplitHard_PCRE(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedPCRE); } |
590 | void Parse_CachedSplitHard_RE2(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedRE2); } |
591 | void Parse_CachedSplitHard_BitState(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedBitState); } |
592 | void Parse_CachedSplitHard_Backtrack(benchmark::State& state) { Parse1SplitHard(state, Parse1CachedBacktrack); } |
593 | |
594 | #ifdef USEPCRE |
595 | BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs()); |
596 | #endif |
597 | BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs()); |
598 | BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs()); |
599 | BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs()); |
600 | BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs()); |
601 | |
602 | // Benchmark: Parse1SplitHard, big text, small match. |
603 | |
604 | void Parse1SplitBig1(benchmark::State& state, |
605 | void (*run)(benchmark::State&, const char*, |
606 | const StringPiece&)) { |
607 | std::string s; |
608 | s.append(100000, 'x'); |
609 | s.append("650-253-0001" ); |
610 | run(state, "[0-9]+.(.*)" , s); |
611 | state.SetItemsProcessed(state.iterations()); |
612 | } |
613 | |
614 | void Parse_CachedSplitBig1_PCRE(benchmark::State& state) { Parse1SplitBig1(state, SearchParse1CachedPCRE); } |
615 | void Parse_CachedSplitBig1_RE2(benchmark::State& state) { Parse1SplitBig1(state, SearchParse1CachedRE2); } |
616 | |
617 | #ifdef USEPCRE |
618 | BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs()); |
619 | #endif |
620 | BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs()); |
621 | |
622 | // Benchmark: Parse1SplitHard, big text, big match. |
623 | |
624 | void Parse1SplitBig2(benchmark::State& state, |
625 | void (*run)(benchmark::State&, const char*, |
626 | const StringPiece&)) { |
627 | std::string s; |
628 | s.append("650-253-" ); |
629 | s.append(100000, '0'); |
630 | run(state, "[0-9]+.(.*)" , s); |
631 | state.SetItemsProcessed(state.iterations()); |
632 | } |
633 | |
634 | void Parse_CachedSplitBig2_PCRE(benchmark::State& state) { Parse1SplitBig2(state, SearchParse1CachedPCRE); } |
635 | void Parse_CachedSplitBig2_RE2(benchmark::State& state) { Parse1SplitBig2(state, SearchParse1CachedRE2); } |
636 | |
637 | #ifdef USEPCRE |
638 | BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs()); |
639 | #endif |
640 | BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs()); |
641 | |
642 | // Benchmark: measure time required to parse (but not execute) |
643 | // a simple regular expression. |
644 | |
645 | void ParseRegexp(benchmark::State& state, const std::string& regexp) { |
646 | for (auto _ : state) { |
647 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
648 | CHECK(re); |
649 | re->Decref(); |
650 | } |
651 | } |
652 | |
653 | void SimplifyRegexp(benchmark::State& state, const std::string& regexp) { |
654 | for (auto _ : state) { |
655 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
656 | CHECK(re); |
657 | Regexp* sre = re->Simplify(); |
658 | CHECK(sre); |
659 | sre->Decref(); |
660 | re->Decref(); |
661 | } |
662 | } |
663 | |
664 | void NullWalkRegexp(benchmark::State& state, const std::string& regexp) { |
665 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
666 | CHECK(re); |
667 | for (auto _ : state) { |
668 | re->NullWalk(); |
669 | } |
670 | re->Decref(); |
671 | } |
672 | |
673 | void SimplifyCompileRegexp(benchmark::State& state, const std::string& regexp) { |
674 | for (auto _ : state) { |
675 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
676 | CHECK(re); |
677 | Regexp* sre = re->Simplify(); |
678 | CHECK(sre); |
679 | Prog* prog = sre->CompileToProg(0); |
680 | CHECK(prog); |
681 | delete prog; |
682 | sre->Decref(); |
683 | re->Decref(); |
684 | } |
685 | } |
686 | |
687 | void CompileRegexp(benchmark::State& state, const std::string& regexp) { |
688 | for (auto _ : state) { |
689 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
690 | CHECK(re); |
691 | Prog* prog = re->CompileToProg(0); |
692 | CHECK(prog); |
693 | delete prog; |
694 | re->Decref(); |
695 | } |
696 | } |
697 | |
698 | void CompileToProg(benchmark::State& state, const std::string& regexp) { |
699 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
700 | CHECK(re); |
701 | for (auto _ : state) { |
702 | Prog* prog = re->CompileToProg(0); |
703 | CHECK(prog); |
704 | delete prog; |
705 | } |
706 | re->Decref(); |
707 | } |
708 | |
709 | void CompileByteMap(benchmark::State& state, const std::string& regexp) { |
710 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
711 | CHECK(re); |
712 | Prog* prog = re->CompileToProg(0); |
713 | CHECK(prog); |
714 | for (auto _ : state) { |
715 | prog->ComputeByteMap(); |
716 | } |
717 | delete prog; |
718 | re->Decref(); |
719 | } |
720 | |
721 | void CompilePCRE(benchmark::State& state, const std::string& regexp) { |
722 | for (auto _ : state) { |
723 | PCRE re(regexp, PCRE::UTF8); |
724 | CHECK_EQ(re.error(), "" ); |
725 | } |
726 | } |
727 | |
728 | void CompileRE2(benchmark::State& state, const std::string& regexp) { |
729 | for (auto _ : state) { |
730 | RE2 re(regexp); |
731 | CHECK_EQ(re.error(), "" ); |
732 | } |
733 | } |
734 | |
735 | void RunBuild(benchmark::State& state, const std::string& regexp, |
736 | void (*run)(benchmark::State&, const std::string&)) { |
737 | run(state, regexp); |
738 | state.SetItemsProcessed(state.iterations()); |
739 | } |
740 | |
741 | } // namespace re2 |
742 | |
743 | DEFINE_FLAG(std::string, compile_regexp, "(.*)-(\\d+)-of-(\\d+)" , |
744 | "regexp for compile benchmarks" ); |
745 | |
746 | namespace re2 { |
747 | |
748 | void BM_PCRE_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompilePCRE); } |
749 | void BM_Regexp_Parse(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), ParseRegexp); } |
750 | void BM_Regexp_Simplify(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyRegexp); } |
751 | void BM_CompileToProg(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileToProg); } |
752 | void BM_CompileByteMap(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileByteMap); } |
753 | void BM_Regexp_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRegexp); } |
754 | void BM_Regexp_SimplifyCompile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), SimplifyCompileRegexp); } |
755 | void BM_Regexp_NullWalk(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), NullWalkRegexp); } |
756 | void BM_RE2_Compile(benchmark::State& state) { RunBuild(state, GetFlag(FLAGS_compile_regexp), CompileRE2); } |
757 | |
758 | #ifdef USEPCRE |
759 | BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs()); |
760 | #endif |
761 | BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs()); |
762 | BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs()); |
763 | BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs()); |
764 | BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs()); |
765 | BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs()); |
766 | BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs()); |
767 | BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs()); |
768 | BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs()); |
769 | |
770 | // Makes text of size nbytes, then calls run to search |
771 | // the text for regexp iters times. |
772 | void SearchPhone(benchmark::State& state, ParseImpl* search) { |
773 | std::string s; |
774 | MakeText(&s, state.range(0)); |
775 | s.append("(650) 253-0001" ); |
776 | search(state, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})" , s); |
777 | state.SetBytesProcessed(state.iterations() * state.range(0)); |
778 | } |
779 | |
780 | void SearchPhone_CachedPCRE(benchmark::State& state) { |
781 | SearchPhone(state, SearchParse2CachedPCRE); |
782 | } |
783 | |
784 | void SearchPhone_CachedRE2(benchmark::State& state) { |
785 | SearchPhone(state, SearchParse2CachedRE2); |
786 | } |
787 | |
788 | #ifdef USEPCRE |
789 | BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
790 | #endif |
791 | BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs()); |
792 | |
793 | /* |
794 | TODO(rsc): Make this work again. |
795 | |
796 | // Generates and returns a string over binary alphabet {0,1} that contains |
797 | // all possible binary sequences of length n as subsequences. The obvious |
798 | // brute force method would generate a string of length n * 2^n, but this |
799 | // generates a string of length n + 2^n - 1 called a De Bruijn cycle. |
800 | // See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17. |
801 | static std::string DeBruijnString(int n) { |
802 | CHECK_LT(n, 8*sizeof(int)); |
803 | CHECK_GT(n, 0); |
804 | |
805 | std::vector<bool> did(1<<n); |
806 | for (int i = 0; i < 1<<n; i++) |
807 | did[i] = false; |
808 | |
809 | std::string s; |
810 | for (int i = 0; i < n-1; i++) |
811 | s.append("0"); |
812 | int bits = 0; |
813 | int mask = (1<<n) - 1; |
814 | for (int i = 0; i < (1<<n); i++) { |
815 | bits <<= 1; |
816 | bits &= mask; |
817 | if (!did[bits|1]) { |
818 | bits |= 1; |
819 | s.append("1"); |
820 | } else { |
821 | s.append("0"); |
822 | } |
823 | CHECK(!did[bits]); |
824 | did[bits] = true; |
825 | } |
826 | return s; |
827 | } |
828 | |
829 | void CacheFill(int iters, int n, SearchImpl *srch) { |
830 | std::string s = DeBruijnString(n+1); |
831 | std::string t; |
832 | for (int i = n+1; i < 20; i++) { |
833 | t = s + s; |
834 | using std::swap; |
835 | swap(s, t); |
836 | } |
837 | srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s, |
838 | Prog::kUnanchored, true); |
839 | SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*s.size()); |
840 | } |
841 | |
842 | void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); } |
843 | void CacheFillRE2(int i, int n) { CacheFill(i, n, SearchCachedRE2); } |
844 | void CacheFillNFA(int i, int n) { CacheFill(i, n, SearchCachedNFA); } |
845 | void CacheFillDFA(int i, int n) { CacheFill(i, n, SearchCachedDFA); } |
846 | |
847 | // BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers |
848 | // for the static BenchmarkRegisterer, which makes it unusable inside |
849 | // a macro like DO24 below. MY_BENCHMARK_WITH_ARG uses the argument a |
850 | // to make the identifiers distinct (only possible when 'a' is a simple |
851 | // expression like 2, not like 1+1). |
852 | #define MY_BENCHMARK_WITH_ARG(n, a) \ |
853 | bool __benchmark_ ## n ## a = \ |
854 | (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, NumCPUs()); |
855 | |
856 | #define DO24(A, B) \ |
857 | A(B, 1); A(B, 2); A(B, 3); A(B, 4); A(B, 5); A(B, 6); \ |
858 | A(B, 7); A(B, 8); A(B, 9); A(B, 10); A(B, 11); A(B, 12); \ |
859 | A(B, 13); A(B, 14); A(B, 15); A(B, 16); A(B, 17); A(B, 18); \ |
860 | A(B, 19); A(B, 20); A(B, 21); A(B, 22); A(B, 23); A(B, 24); |
861 | |
862 | DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE) |
863 | DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA) |
864 | DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2) |
865 | DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA) |
866 | |
867 | #undef DO24 |
868 | #undef MY_BENCHMARK_WITH_ARG |
869 | */ |
870 | |
871 | //////////////////////////////////////////////////////////////////////// |
872 | // |
873 | // Implementation routines. Sad that there are so many, |
874 | // but all the interfaces are slightly different. |
875 | |
876 | // Runs implementation to search for regexp in text, iters times. |
877 | // Expect_match says whether the regexp should be found. |
878 | // Anchored says whether to run an anchored search. |
879 | |
880 | void SearchDFA(benchmark::State& state, const char* regexp, |
881 | const StringPiece& text, Prog::Anchor anchor, |
882 | bool expect_match) { |
883 | for (auto _ : state) { |
884 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
885 | CHECK(re); |
886 | Prog* prog = re->CompileToProg(0); |
887 | CHECK(prog); |
888 | bool failed = false; |
889 | CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch, |
890 | NULL, &failed, NULL), |
891 | expect_match); |
892 | CHECK(!failed); |
893 | delete prog; |
894 | re->Decref(); |
895 | } |
896 | } |
897 | |
898 | void SearchNFA(benchmark::State& state, const char* regexp, |
899 | const StringPiece& text, Prog::Anchor anchor, |
900 | bool expect_match) { |
901 | for (auto _ : state) { |
902 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
903 | CHECK(re); |
904 | Prog* prog = re->CompileToProg(0); |
905 | CHECK(prog); |
906 | CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch, |
907 | NULL, 0), |
908 | expect_match); |
909 | delete prog; |
910 | re->Decref(); |
911 | } |
912 | } |
913 | |
914 | void SearchOnePass(benchmark::State& state, const char* regexp, |
915 | const StringPiece& text, Prog::Anchor anchor, |
916 | bool expect_match) { |
917 | for (auto _ : state) { |
918 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
919 | CHECK(re); |
920 | Prog* prog = re->CompileToProg(0); |
921 | CHECK(prog); |
922 | CHECK(prog->IsOnePass()); |
923 | CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0), |
924 | expect_match); |
925 | delete prog; |
926 | re->Decref(); |
927 | } |
928 | } |
929 | |
930 | void SearchBitState(benchmark::State& state, const char* regexp, |
931 | const StringPiece& text, Prog::Anchor anchor, |
932 | bool expect_match) { |
933 | for (auto _ : state) { |
934 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
935 | CHECK(re); |
936 | Prog* prog = re->CompileToProg(0); |
937 | CHECK(prog); |
938 | CHECK(prog->CanBitState()); |
939 | CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0), |
940 | expect_match); |
941 | delete prog; |
942 | re->Decref(); |
943 | } |
944 | } |
945 | |
946 | void SearchPCRE(benchmark::State& state, const char* regexp, |
947 | const StringPiece& text, Prog::Anchor anchor, |
948 | bool expect_match) { |
949 | for (auto _ : state) { |
950 | PCRE re(regexp, PCRE::UTF8); |
951 | CHECK_EQ(re.error(), "" ); |
952 | if (anchor == Prog::kAnchored) |
953 | CHECK_EQ(PCRE::FullMatch(text, re), expect_match); |
954 | else |
955 | CHECK_EQ(PCRE::PartialMatch(text, re), expect_match); |
956 | } |
957 | } |
958 | |
959 | void SearchRE2(benchmark::State& state, const char* regexp, |
960 | const StringPiece& text, Prog::Anchor anchor, |
961 | bool expect_match) { |
962 | for (auto _ : state) { |
963 | RE2 re(regexp); |
964 | CHECK_EQ(re.error(), "" ); |
965 | if (anchor == Prog::kAnchored) |
966 | CHECK_EQ(RE2::FullMatch(text, re), expect_match); |
967 | else |
968 | CHECK_EQ(RE2::PartialMatch(text, re), expect_match); |
969 | } |
970 | } |
971 | |
972 | // SearchCachedXXX is like SearchXXX but only does the |
973 | // regexp parsing and compiling once. This lets us measure |
974 | // search time without the per-regexp overhead. |
975 | |
976 | void SearchCachedDFA(benchmark::State& state, const char* regexp, |
977 | const StringPiece& text, Prog::Anchor anchor, |
978 | bool expect_match) { |
979 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
980 | CHECK(re); |
981 | Prog* prog = re->CompileToProg(1LL<<31); |
982 | CHECK(prog); |
983 | for (auto _ : state) { |
984 | bool failed = false; |
985 | CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch, |
986 | NULL, &failed, NULL), |
987 | expect_match); |
988 | CHECK(!failed); |
989 | } |
990 | delete prog; |
991 | re->Decref(); |
992 | } |
993 | |
994 | void SearchCachedNFA(benchmark::State& state, const char* regexp, |
995 | const StringPiece& text, Prog::Anchor anchor, |
996 | bool expect_match) { |
997 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
998 | CHECK(re); |
999 | Prog* prog = re->CompileToProg(0); |
1000 | CHECK(prog); |
1001 | for (auto _ : state) { |
1002 | CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch, |
1003 | NULL, 0), |
1004 | expect_match); |
1005 | } |
1006 | delete prog; |
1007 | re->Decref(); |
1008 | } |
1009 | |
1010 | void SearchCachedOnePass(benchmark::State& state, const char* regexp, |
1011 | const StringPiece& text, Prog::Anchor anchor, |
1012 | bool expect_match) { |
1013 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
1014 | CHECK(re); |
1015 | Prog* prog = re->CompileToProg(0); |
1016 | CHECK(prog); |
1017 | CHECK(prog->IsOnePass()); |
1018 | for (auto _ : state) { |
1019 | CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0), |
1020 | expect_match); |
1021 | } |
1022 | delete prog; |
1023 | re->Decref(); |
1024 | } |
1025 | |
1026 | void SearchCachedBitState(benchmark::State& state, const char* regexp, |
1027 | const StringPiece& text, Prog::Anchor anchor, |
1028 | bool expect_match) { |
1029 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
1030 | CHECK(re); |
1031 | Prog* prog = re->CompileToProg(0); |
1032 | CHECK(prog); |
1033 | CHECK(prog->CanBitState()); |
1034 | for (auto _ : state) { |
1035 | CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0), |
1036 | expect_match); |
1037 | } |
1038 | delete prog; |
1039 | re->Decref(); |
1040 | } |
1041 | |
1042 | void SearchCachedPCRE(benchmark::State& state, const char* regexp, |
1043 | const StringPiece& text, Prog::Anchor anchor, |
1044 | bool expect_match) { |
1045 | PCRE re(regexp, PCRE::UTF8); |
1046 | CHECK_EQ(re.error(), "" ); |
1047 | for (auto _ : state) { |
1048 | if (anchor == Prog::kAnchored) |
1049 | CHECK_EQ(PCRE::FullMatch(text, re), expect_match); |
1050 | else |
1051 | CHECK_EQ(PCRE::PartialMatch(text, re), expect_match); |
1052 | } |
1053 | } |
1054 | |
1055 | void SearchCachedRE2(benchmark::State& state, const char* regexp, |
1056 | const StringPiece& text, Prog::Anchor anchor, |
1057 | bool expect_match) { |
1058 | RE2 re(regexp); |
1059 | CHECK_EQ(re.error(), "" ); |
1060 | for (auto _ : state) { |
1061 | if (anchor == Prog::kAnchored) |
1062 | CHECK_EQ(RE2::FullMatch(text, re), expect_match); |
1063 | else |
1064 | CHECK_EQ(RE2::PartialMatch(text, re), expect_match); |
1065 | } |
1066 | } |
1067 | |
1068 | // Runs implementation to full match regexp against text, |
1069 | // extracting three submatches. Expects match always. |
1070 | |
1071 | void Parse3NFA(benchmark::State& state, const char* regexp, |
1072 | const StringPiece& text) { |
1073 | for (auto _ : state) { |
1074 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
1075 | CHECK(re); |
1076 | Prog* prog = re->CompileToProg(0); |
1077 | CHECK(prog); |
1078 | StringPiece sp[4]; // 4 because sp[0] is whole match. |
1079 | CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored, |
1080 | Prog::kFullMatch, sp, 4)); |
1081 | delete prog; |
1082 | re->Decref(); |
1083 | } |
1084 | } |
1085 | |
1086 | void Parse3OnePass(benchmark::State& state, const char* regexp, |
1087 | const StringPiece& text) { |
1088 | for (auto _ : state) { |
1089 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
1090 | CHECK(re); |
1091 | Prog* prog = re->CompileToProg(0); |
1092 | CHECK(prog); |
1093 | CHECK(prog->IsOnePass()); |
1094 | StringPiece sp[4]; // 4 because sp[0] is whole match. |
1095 | CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
1096 | delete prog; |
1097 | re->Decref(); |
1098 | } |
1099 | } |
1100 | |
1101 | void Parse3BitState(benchmark::State& state, const char* regexp, |
1102 | const StringPiece& text) { |
1103 | for (auto _ : state) { |
1104 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
1105 | CHECK(re); |
1106 | Prog* prog = re->CompileToProg(0); |
1107 | CHECK(prog); |
1108 | CHECK(prog->CanBitState()); |
1109 | StringPiece sp[4]; // 4 because sp[0] is whole match. |
1110 | CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
1111 | delete prog; |
1112 | re->Decref(); |
1113 | } |
1114 | } |
1115 | |
1116 | void Parse3Backtrack(benchmark::State& state, const char* regexp, |
1117 | const StringPiece& text) { |
1118 | for (auto _ : state) { |
1119 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
1120 | CHECK(re); |
1121 | Prog* prog = re->CompileToProg(0); |
1122 | CHECK(prog); |
1123 | StringPiece sp[4]; // 4 because sp[0] is whole match. |
1124 | CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
1125 | delete prog; |
1126 | re->Decref(); |
1127 | } |
1128 | } |
1129 | |
1130 | void Parse3PCRE(benchmark::State& state, const char* regexp, |
1131 | const StringPiece& text) { |
1132 | for (auto _ : state) { |
1133 | PCRE re(regexp, PCRE::UTF8); |
1134 | CHECK_EQ(re.error(), "" ); |
1135 | StringPiece sp1, sp2, sp3; |
1136 | CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3)); |
1137 | } |
1138 | } |
1139 | |
1140 | void Parse3RE2(benchmark::State& state, const char* regexp, |
1141 | const StringPiece& text) { |
1142 | for (auto _ : state) { |
1143 | RE2 re(regexp); |
1144 | CHECK_EQ(re.error(), "" ); |
1145 | StringPiece sp1, sp2, sp3; |
1146 | CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3)); |
1147 | } |
1148 | } |
1149 | |
1150 | void Parse3CachedNFA(benchmark::State& state, const char* regexp, |
1151 | const StringPiece& text) { |
1152 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
1153 | CHECK(re); |
1154 | Prog* prog = re->CompileToProg(0); |
1155 | CHECK(prog); |
1156 | StringPiece sp[4]; // 4 because sp[0] is whole match. |
1157 | for (auto _ : state) { |
1158 | CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored, |
1159 | Prog::kFullMatch, sp, 4)); |
1160 | } |
1161 | delete prog; |
1162 | re->Decref(); |
1163 | } |
1164 | |
1165 | void Parse3CachedOnePass(benchmark::State& state, const char* regexp, |
1166 | const StringPiece& text) { |
1167 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
1168 | CHECK(re); |
1169 | Prog* prog = re->CompileToProg(0); |
1170 | CHECK(prog); |
1171 | CHECK(prog->IsOnePass()); |
1172 | StringPiece sp[4]; // 4 because sp[0] is whole match. |
1173 | for (auto _ : state) { |
1174 | CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
1175 | } |
1176 | delete prog; |
1177 | re->Decref(); |
1178 | } |
1179 | |
1180 | void Parse3CachedBitState(benchmark::State& state, const char* regexp, |
1181 | const StringPiece& text) { |
1182 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
1183 | CHECK(re); |
1184 | Prog* prog = re->CompileToProg(0); |
1185 | CHECK(prog); |
1186 | CHECK(prog->CanBitState()); |
1187 | StringPiece sp[4]; // 4 because sp[0] is whole match. |
1188 | for (auto _ : state) { |
1189 | CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
1190 | } |
1191 | delete prog; |
1192 | re->Decref(); |
1193 | } |
1194 | |
1195 | void Parse3CachedBacktrack(benchmark::State& state, const char* regexp, |
1196 | const StringPiece& text) { |
1197 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
1198 | CHECK(re); |
1199 | Prog* prog = re->CompileToProg(0); |
1200 | CHECK(prog); |
1201 | StringPiece sp[4]; // 4 because sp[0] is whole match. |
1202 | for (auto _ : state) { |
1203 | CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4)); |
1204 | } |
1205 | delete prog; |
1206 | re->Decref(); |
1207 | } |
1208 | |
1209 | void Parse3CachedPCRE(benchmark::State& state, const char* regexp, |
1210 | const StringPiece& text) { |
1211 | PCRE re(regexp, PCRE::UTF8); |
1212 | CHECK_EQ(re.error(), "" ); |
1213 | StringPiece sp1, sp2, sp3; |
1214 | for (auto _ : state) { |
1215 | CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3)); |
1216 | } |
1217 | } |
1218 | |
1219 | void Parse3CachedRE2(benchmark::State& state, const char* regexp, |
1220 | const StringPiece& text) { |
1221 | RE2 re(regexp); |
1222 | CHECK_EQ(re.error(), "" ); |
1223 | StringPiece sp1, sp2, sp3; |
1224 | for (auto _ : state) { |
1225 | CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3)); |
1226 | } |
1227 | } |
1228 | |
1229 | // Runs implementation to full match regexp against text, |
1230 | // extracting three submatches. Expects match always. |
1231 | |
1232 | void Parse1NFA(benchmark::State& state, const char* regexp, |
1233 | const StringPiece& text) { |
1234 | for (auto _ : state) { |
1235 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
1236 | CHECK(re); |
1237 | Prog* prog = re->CompileToProg(0); |
1238 | CHECK(prog); |
1239 | StringPiece sp[2]; // 2 because sp[0] is whole match. |
1240 | CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored, |
1241 | Prog::kFullMatch, sp, 2)); |
1242 | delete prog; |
1243 | re->Decref(); |
1244 | } |
1245 | } |
1246 | |
1247 | void Parse1OnePass(benchmark::State& state, const char* regexp, |
1248 | const StringPiece& text) { |
1249 | for (auto _ : state) { |
1250 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
1251 | CHECK(re); |
1252 | Prog* prog = re->CompileToProg(0); |
1253 | CHECK(prog); |
1254 | CHECK(prog->IsOnePass()); |
1255 | StringPiece sp[2]; // 2 because sp[0] is whole match. |
1256 | CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2)); |
1257 | delete prog; |
1258 | re->Decref(); |
1259 | } |
1260 | } |
1261 | |
1262 | void Parse1BitState(benchmark::State& state, const char* regexp, |
1263 | const StringPiece& text) { |
1264 | for (auto _ : state) { |
1265 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
1266 | CHECK(re); |
1267 | Prog* prog = re->CompileToProg(0); |
1268 | CHECK(prog); |
1269 | CHECK(prog->CanBitState()); |
1270 | StringPiece sp[2]; // 2 because sp[0] is whole match. |
1271 | CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2)); |
1272 | delete prog; |
1273 | re->Decref(); |
1274 | } |
1275 | } |
1276 | |
1277 | void Parse1PCRE(benchmark::State& state, const char* regexp, |
1278 | const StringPiece& text) { |
1279 | for (auto _ : state) { |
1280 | PCRE re(regexp, PCRE::UTF8); |
1281 | CHECK_EQ(re.error(), "" ); |
1282 | StringPiece sp1; |
1283 | CHECK(PCRE::FullMatch(text, re, &sp1)); |
1284 | } |
1285 | } |
1286 | |
1287 | void Parse1RE2(benchmark::State& state, const char* regexp, |
1288 | const StringPiece& text) { |
1289 | for (auto _ : state) { |
1290 | RE2 re(regexp); |
1291 | CHECK_EQ(re.error(), "" ); |
1292 | StringPiece sp1; |
1293 | CHECK(RE2::FullMatch(text, re, &sp1)); |
1294 | } |
1295 | } |
1296 | |
1297 | void Parse1CachedNFA(benchmark::State& state, const char* regexp, |
1298 | const StringPiece& text) { |
1299 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
1300 | CHECK(re); |
1301 | Prog* prog = re->CompileToProg(0); |
1302 | CHECK(prog); |
1303 | StringPiece sp[2]; // 2 because sp[0] is whole match. |
1304 | for (auto _ : state) { |
1305 | CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored, |
1306 | Prog::kFullMatch, sp, 2)); |
1307 | } |
1308 | delete prog; |
1309 | re->Decref(); |
1310 | } |
1311 | |
1312 | void Parse1CachedOnePass(benchmark::State& state, const char* regexp, |
1313 | const StringPiece& text) { |
1314 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
1315 | CHECK(re); |
1316 | Prog* prog = re->CompileToProg(0); |
1317 | CHECK(prog); |
1318 | CHECK(prog->IsOnePass()); |
1319 | StringPiece sp[2]; // 2 because sp[0] is whole match. |
1320 | for (auto _ : state) { |
1321 | CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2)); |
1322 | } |
1323 | delete prog; |
1324 | re->Decref(); |
1325 | } |
1326 | |
1327 | void Parse1CachedBitState(benchmark::State& state, const char* regexp, |
1328 | const StringPiece& text) { |
1329 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
1330 | CHECK(re); |
1331 | Prog* prog = re->CompileToProg(0); |
1332 | CHECK(prog); |
1333 | CHECK(prog->CanBitState()); |
1334 | StringPiece sp[2]; // 2 because sp[0] is whole match. |
1335 | for (auto _ : state) { |
1336 | CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2)); |
1337 | } |
1338 | delete prog; |
1339 | re->Decref(); |
1340 | } |
1341 | |
1342 | void Parse1CachedBacktrack(benchmark::State& state, const char* regexp, |
1343 | const StringPiece& text) { |
1344 | Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL); |
1345 | CHECK(re); |
1346 | Prog* prog = re->CompileToProg(0); |
1347 | CHECK(prog); |
1348 | StringPiece sp[2]; // 2 because sp[0] is whole match. |
1349 | for (auto _ : state) { |
1350 | CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2)); |
1351 | } |
1352 | delete prog; |
1353 | re->Decref(); |
1354 | } |
1355 | |
1356 | void Parse1CachedPCRE(benchmark::State& state, const char* regexp, |
1357 | const StringPiece& text) { |
1358 | PCRE re(regexp, PCRE::UTF8); |
1359 | CHECK_EQ(re.error(), "" ); |
1360 | StringPiece sp1; |
1361 | for (auto _ : state) { |
1362 | CHECK(PCRE::FullMatch(text, re, &sp1)); |
1363 | } |
1364 | } |
1365 | |
1366 | void Parse1CachedRE2(benchmark::State& state, const char* regexp, |
1367 | const StringPiece& text) { |
1368 | RE2 re(regexp); |
1369 | CHECK_EQ(re.error(), "" ); |
1370 | StringPiece sp1; |
1371 | for (auto _ : state) { |
1372 | CHECK(RE2::FullMatch(text, re, &sp1)); |
1373 | } |
1374 | } |
1375 | |
1376 | void SearchParse2CachedPCRE(benchmark::State& state, const char* regexp, |
1377 | const StringPiece& text) { |
1378 | PCRE re(regexp, PCRE::UTF8); |
1379 | CHECK_EQ(re.error(), "" ); |
1380 | for (auto _ : state) { |
1381 | StringPiece sp1, sp2; |
1382 | CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2)); |
1383 | } |
1384 | } |
1385 | |
1386 | void SearchParse2CachedRE2(benchmark::State& state, const char* regexp, |
1387 | const StringPiece& text) { |
1388 | RE2 re(regexp); |
1389 | CHECK_EQ(re.error(), "" ); |
1390 | for (auto _ : state) { |
1391 | StringPiece sp1, sp2; |
1392 | CHECK(RE2::PartialMatch(text, re, &sp1, &sp2)); |
1393 | } |
1394 | } |
1395 | |
1396 | void SearchParse1CachedPCRE(benchmark::State& state, const char* regexp, |
1397 | const StringPiece& text) { |
1398 | PCRE re(regexp, PCRE::UTF8); |
1399 | CHECK_EQ(re.error(), "" ); |
1400 | for (auto _ : state) { |
1401 | StringPiece sp1; |
1402 | CHECK(PCRE::PartialMatch(text, re, &sp1)); |
1403 | } |
1404 | } |
1405 | |
1406 | void SearchParse1CachedRE2(benchmark::State& state, const char* regexp, |
1407 | const StringPiece& text) { |
1408 | RE2 re(regexp); |
1409 | CHECK_EQ(re.error(), "" ); |
1410 | for (auto _ : state) { |
1411 | StringPiece sp1; |
1412 | CHECK(RE2::PartialMatch(text, re, &sp1)); |
1413 | } |
1414 | } |
1415 | |
1416 | void EmptyPartialMatchPCRE(benchmark::State& state) { |
1417 | PCRE re("" ); |
1418 | for (auto _ : state) { |
1419 | PCRE::PartialMatch("" , re); |
1420 | } |
1421 | } |
1422 | |
1423 | void EmptyPartialMatchRE2(benchmark::State& state) { |
1424 | RE2 re("" ); |
1425 | for (auto _ : state) { |
1426 | RE2::PartialMatch("" , re); |
1427 | } |
1428 | } |
1429 | #ifdef USEPCRE |
1430 | BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs()); |
1431 | #endif |
1432 | BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs()); |
1433 | |
1434 | void SimplePartialMatchPCRE(benchmark::State& state) { |
1435 | PCRE re("abcdefg" ); |
1436 | for (auto _ : state) { |
1437 | PCRE::PartialMatch("abcdefg" , re); |
1438 | } |
1439 | } |
1440 | |
1441 | void SimplePartialMatchRE2(benchmark::State& state) { |
1442 | RE2 re("abcdefg" ); |
1443 | for (auto _ : state) { |
1444 | RE2::PartialMatch("abcdefg" , re); |
1445 | } |
1446 | } |
1447 | #ifdef USEPCRE |
1448 | BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs()); |
1449 | #endif |
1450 | BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs()); |
1451 | |
1452 | static std::string http_text = |
1453 | "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf" |
1454 | "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1" ; |
1455 | |
1456 | void HTTPPartialMatchPCRE(benchmark::State& state) { |
1457 | StringPiece a; |
1458 | PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP" ); |
1459 | for (auto _ : state) { |
1460 | PCRE::PartialMatch(http_text, re, &a); |
1461 | } |
1462 | } |
1463 | |
1464 | void HTTPPartialMatchRE2(benchmark::State& state) { |
1465 | StringPiece a; |
1466 | RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP" ); |
1467 | for (auto _ : state) { |
1468 | RE2::PartialMatch(http_text, re, &a); |
1469 | } |
1470 | } |
1471 | |
1472 | #ifdef USEPCRE |
1473 | BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs()); |
1474 | #endif |
1475 | BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs()); |
1476 | |
1477 | static std::string smallhttp_text = |
1478 | "GET /abc HTTP/1.1" ; |
1479 | |
1480 | void SmallHTTPPartialMatchPCRE(benchmark::State& state) { |
1481 | StringPiece a; |
1482 | PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP" ); |
1483 | for (auto _ : state) { |
1484 | PCRE::PartialMatch(smallhttp_text, re, &a); |
1485 | } |
1486 | } |
1487 | |
1488 | void SmallHTTPPartialMatchRE2(benchmark::State& state) { |
1489 | StringPiece a; |
1490 | RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP" ); |
1491 | for (auto _ : state) { |
1492 | RE2::PartialMatch(smallhttp_text, re, &a); |
1493 | } |
1494 | } |
1495 | |
1496 | #ifdef USEPCRE |
1497 | BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs()); |
1498 | #endif |
1499 | BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs()); |
1500 | |
1501 | void DotMatchPCRE(benchmark::State& state) { |
1502 | StringPiece a; |
1503 | PCRE re("(?-s)^(.+)" ); |
1504 | for (auto _ : state) { |
1505 | PCRE::PartialMatch(http_text, re, &a); |
1506 | } |
1507 | } |
1508 | |
1509 | void DotMatchRE2(benchmark::State& state) { |
1510 | StringPiece a; |
1511 | RE2 re("(?-s)^(.+)" ); |
1512 | for (auto _ : state) { |
1513 | RE2::PartialMatch(http_text, re, &a); |
1514 | } |
1515 | } |
1516 | |
1517 | #ifdef USEPCRE |
1518 | BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs()); |
1519 | #endif |
1520 | BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs()); |
1521 | |
1522 | void ASCIIMatchPCRE(benchmark::State& state) { |
1523 | StringPiece a; |
1524 | PCRE re("(?-s)^([ -~]+)" ); |
1525 | for (auto _ : state) { |
1526 | PCRE::PartialMatch(http_text, re, &a); |
1527 | } |
1528 | } |
1529 | |
1530 | void ASCIIMatchRE2(benchmark::State& state) { |
1531 | StringPiece a; |
1532 | RE2 re("(?-s)^([ -~]+)" ); |
1533 | for (auto _ : state) { |
1534 | RE2::PartialMatch(http_text, re, &a); |
1535 | } |
1536 | } |
1537 | |
1538 | #ifdef USEPCRE |
1539 | BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs()); |
1540 | #endif |
1541 | BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs()); |
1542 | |
1543 | void FullMatchPCRE(benchmark::State& state, const char *regexp) { |
1544 | std::string s; |
1545 | MakeText(&s, state.range(0)); |
1546 | s += "ABCDEFGHIJ" ; |
1547 | PCRE re(regexp); |
1548 | for (auto _ : state) { |
1549 | CHECK(PCRE::FullMatch(s, re)); |
1550 | } |
1551 | state.SetBytesProcessed(state.iterations() * state.range(0)); |
1552 | } |
1553 | |
1554 | void FullMatchRE2(benchmark::State& state, const char *regexp) { |
1555 | std::string s; |
1556 | MakeText(&s, state.range(0)); |
1557 | s += "ABCDEFGHIJ" ; |
1558 | RE2 re(regexp, RE2::Latin1); |
1559 | for (auto _ : state) { |
1560 | CHECK(RE2::FullMatch(s, re)); |
1561 | } |
1562 | state.SetBytesProcessed(state.iterations() * state.range(0)); |
1563 | } |
1564 | |
1565 | void FullMatch_DotStar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*" ); } |
1566 | void FullMatch_DotStar_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s).*" ); } |
1567 | |
1568 | void FullMatch_DotStarDollar_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s).*$" ); } |
1569 | void FullMatch_DotStarDollar_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s).*$" ); } |
1570 | |
1571 | void FullMatch_DotStarCapture_CachedPCRE(benchmark::State& state) { FullMatchPCRE(state, "(?s)((.*)()()($))" ); } |
1572 | void FullMatch_DotStarCapture_CachedRE2(benchmark::State& state) { FullMatchRE2(state, "(?s)((.*)()()($))" ); } |
1573 | |
1574 | #ifdef USEPCRE |
1575 | BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20); |
1576 | #endif |
1577 | BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2, 8, 2<<20); |
1578 | |
1579 | #ifdef USEPCRE |
1580 | BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20); |
1581 | #endif |
1582 | BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2, 8, 2<<20); |
1583 | |
1584 | #ifdef USEPCRE |
1585 | BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20); |
1586 | #endif |
1587 | BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2, 8, 2<<20); |
1588 | |
1589 | void PossibleMatchRangeCommon(benchmark::State& state, const char* regexp) { |
1590 | RE2 re(regexp); |
1591 | std::string min; |
1592 | std::string max; |
1593 | const int kMaxLen = 16; |
1594 | for (auto _ : state) { |
1595 | CHECK(re.PossibleMatchRange(&min, &max, kMaxLen)); |
1596 | } |
1597 | } |
1598 | |
1599 | void PossibleMatchRange_Trivial(benchmark::State& state) { |
1600 | PossibleMatchRangeCommon(state, ".*" ); |
1601 | } |
1602 | void PossibleMatchRange_Complex(benchmark::State& state) { |
1603 | PossibleMatchRangeCommon(state, "^abc[def]?[gh]{1,2}.*" ); |
1604 | } |
1605 | void PossibleMatchRange_Prefix(benchmark::State& state) { |
1606 | PossibleMatchRangeCommon(state, "^some_random_prefix.*" ); |
1607 | } |
1608 | void PossibleMatchRange_NoProg(benchmark::State& state) { |
1609 | PossibleMatchRangeCommon(state, "^some_random_string$" ); |
1610 | } |
1611 | |
1612 | BENCHMARK(PossibleMatchRange_Trivial); |
1613 | BENCHMARK(PossibleMatchRange_Complex); |
1614 | BENCHMARK(PossibleMatchRange_Prefix); |
1615 | BENCHMARK(PossibleMatchRange_NoProg); |
1616 | |
1617 | } // namespace re2 |
1618 | |