Benchmark.cpp source code [folly/Benchmark.cpp]

1	/*
2	* Copyright 2012-present Facebook, Inc.
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*/
16
17	// @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
18
19	#include <folly/Benchmark.h>
20
21	#include <algorithm>
22	#include <cmath>
23	#include <cstring>
24	#include <iostream>
25	#include <limits>
26	#include <map>
27	#include <memory>
28	#include <numeric>
29	#include <utility>
30	#include <vector>
31
32	#include <boost/regex.hpp>
33
34	#include <folly/MapUtil.h>
35	#include <folly/String.h>
36	#include <folly/container/Foreach.h>
37	#include <folly/json.h>
38
39	using namespace std;
40
41	DEFINE_bool(benchmark, false, "Run benchmarks.");
42	DEFINE_bool(json, false, "Output in JSON format.");
43	DEFINE_bool(json_verbose, false, "Output in verbose JSON format.");
44
45	DEFINE_string(
46	bm_regex,
47	"",
48	"Only benchmarks whose names match this regex will be run.");
49
50	DEFINE_int64(
51	bm_min_usec,
52	`100`,
53	"Minimum # of microseconds we'll accept for each benchmark.");
54
55	DEFINE_int32(
56	bm_min_iters,
57	`1`,
58	"Minimum # of iterations we'll try for each benchmark.");
59
60	DEFINE_int64(
61	bm_max_iters,
62	`1` << `30`,
63	"Maximum # of iterations we'll try for each benchmark.");
64
65	DEFINE_int32(
66	bm_max_secs,
67	`1`,
68	"Maximum # of seconds we'll spend on each benchmark.");
69
70	namespace folly {
71
72	std::chrono::high_resolution_clock::duration BenchmarkSuspender::timeSpent;
73
74	typedef function<detail::TimeIterData(unsigned int)> BenchmarkFun;
75
76	vector<detail::BenchmarkRegistration>& benchmarks() {
77	static vector<detail::BenchmarkRegistration> _benchmarks;
78	return _benchmarks;
79	}
80
81	#define FB_FOLLY_GLOBAL_BENCHMARK_BASELINE fbFollyGlobalBenchmarkBaseline
82	#define FB_STRINGIZE_X2(x) FB_STRINGIZE(x)
83
84	// Add the global baseline
85	BENCHMARK(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE) {
86	#ifdef _MSC_VER
87	_ReadWriteBarrier();
88	#else
89	asm volatile("");
90	#endif
91	}
92
93	size_t getGlobalBenchmarkBaselineIndex() {
94	const char* global = FB_STRINGIZE_X2(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE);
95	auto it = std::find_if(
96	benchmarks().begin(),
97	benchmarks().end(),
98	[global](const detail::BenchmarkRegistration& v) {
99	return v.name == global;
100	});
101	CHECK(it != benchmarks().end());
102	return size_t(std::distance(benchmarks().begin(), it));
103	}
104
105	#undef FB_STRINGIZE_X2
106	#undef FB_FOLLY_GLOBAL_BENCHMARK_BASELINE
107
108	void detail::addBenchmarkImpl(
109	const char* file,
110	const char* name,
111	BenchmarkFun fun,
112	bool useCounter) {
113	benchmarks().push_back({file, name, std::move(fun), useCounter});
114	}
115
116	static std::pair<double, UserCounters> runBenchmarkGetNSPerIteration(
117	const BenchmarkFun& fun,
118	const double globalBaseline) {
119	using std::chrono::duration_cast;
120	using std::chrono::high_resolution_clock;
121	using std::chrono::microseconds;
122	using std::chrono::nanoseconds;
123	using std::chrono::seconds;
124
125	// They key here is accuracy; too low numbers means the accuracy was
126	// coarse. We up the ante until we get to at least minNanoseconds
127	// timings.
128	static_assert(
129	std::is_same<high_resolution_clock::duration, nanoseconds>::value,
130	"High resolution clock must be nanosecond resolution.");
131	// We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
132	// the clock resolution is worse than that, it will be larger. In
133	// essence we're aiming at making the quantization noise 0.01%.
134	static const auto minNanoseconds = std::max<nanoseconds>(
135	nanoseconds (`100000`), microseconds (FLAGS_bm_min_usec));
136
137	// We do measurements in several epochs and take the minimum, to
138	// account for jitter.
139	static const unsigned int epochs = `1000`;
140	// We establish a total time budget as we don't want a measurement
141	// to take too long. This will curtail the number of actual epochs.
142	const auto timeBudget = seconds (FLAGS_bm_max_secs);
143	auto global = high_resolution_clock::now();
144
145	std::vector<std::pair<double, UserCounters>> epochResults(epochs);
146	size_t actualEpochs = `0`;
147
148	for (; actualEpochs < epochs; ++actualEpochs) {
149	const auto maxIters = uint32_t(FLAGS_bm_max_iters);
150	for (auto n = uint32_t(FLAGS_bm_min_iters); n < maxIters; n *= `2`) {
151	detail::TimeIterData timeIterData = fun (static_cast<unsigned int>(n));
152	if (timeIterData.duration < minNanoseconds) {
153	continue;
154	}
155	// We got an accurate enough timing, done. But only save if
156	// smaller than the current result.
157	auto nsecs = duration_cast<nanoseconds>(timeIterData.duration);
158	epochResults [actualEpochs] = std::make_pair(
159	max(`0.0`, double(nsecs.count()) / timeIterData.niter - globalBaseline),
160	std::move(timeIterData.userCounters));
161	// Done with the current epoch, we got a meaningful timing.
162	break;
163	}
164	auto now = high_resolution_clock::now();
165	if (now - global >= timeBudget) {
166	// No more time budget available.
167	++actualEpochs;
168	break;
169	}
170	}
171
172	// Current state of the art: get the minimum. After some
173	// experimentation, it seems taking the minimum is the best.
174	auto iter = min_element(
175	epochResults.begin(),
176	epochResults.begin() + actualEpochs,
177	[](const auto& a, const auto& b) { return a.first < b.first; });
178
179	// If the benchmark was basically drowned in baseline noise, it's
180	// possible it became negative.
181	return std::make_pair(max(`0.0`, iter ->first), iter ->second);
182	}
183
184	struct ScaleInfo {
185	double boundary;
186	const char* suffix;
187	};
188
189	static const ScaleInfo kTimeSuffixes[]{
190	{`365.25` * `24` * `3600`, "years"},
191	{`24` * `3600`, "days"},
192	{`3600`, "hr"},
193	{`60`, "min"},
194	{`1`, "s"},
195	{`1E-3`, "ms"},
196	{`1E-6`, "us"},
197	{`1E-9`, "ns"},
198	{`1E-12`, "ps"},
199	{`1E-15`, "fs"},
200	{`0`, nullptr},
201	};
202
203	static const ScaleInfo kMetricSuffixes[]{
204	{`1E24`, "Y"}, // yotta
205	{`1E21`, "Z"}, // zetta
206	{`1E18`, "X"}, // "exa" written with suffix 'X' so as to not create
207	// confusion with scientific notation
208	{`1E15`, "P"}, // peta
209	{`1E12`, "T"}, // terra
210	{`1E9`, "G"}, // giga
211	{`1E6`, "M"}, // mega
212	{`1E3`, "K"}, // kilo
213	{`1`, ""},
214	{`1E-3`, "m"}, // milli
215	{`1E-6`, "u"}, // micro
216	{`1E-9`, "n"}, // nano
217	{`1E-12`, "p"}, // pico
218	{`1E-15`, "f"}, // femto
219	{`1E-18`, "a"}, // atto
220	{`1E-21`, "z"}, // zepto
221	{`1E-24`, "y"}, // yocto
222	{`0`, nullptr},
223	};
224
225	static string
226	humanReadable(double n, unsigned int decimals, const ScaleInfo* scales) {
227	if (std::isinf(n) \|\| std::isnan(n)) {
228	return folly::to<string>(n);
229	}
230
231	const double absValue = fabs(n);
232	const ScaleInfo* scale = scales;
233	while (absValue < scale[`0`].boundary && scale[`1`].suffix != nullptr) {
234	++scale;
235	}
236
237	const double scaledValue = n / scale->boundary;
238	return stringPrintf("%.*f%s", decimals, scaledValue, scale->suffix);
239	}
240
241	static string readableTime(double n, unsigned int decimals) {
242	return humanReadable(n, decimals, kTimeSuffixes);
243	}
244
245	static string metricReadable(double n, unsigned int decimals) {
246	return humanReadable(n, decimals, kMetricSuffixes);
247	}
248
249	namespace {
250	class BenchmarkResultsPrinter {
251	public:
252	BenchmarkResultsPrinter() = default;
253	explicit BenchmarkResultsPrinter(std::set<std::string> counterNames)
254	: counterNames_(std::move(counterNames)),
255	namesLength_{std::accumulate(
256	counterNames_.begin(),
257	counterNames_.end(),
258	size_t{`0`},
259	[](size_t acc, auto&& name) { return acc + `2` + name.length(); })} {}
260
261	static constexpr unsigned int columns{`76`};
262	void separator(char pad) {
263	puts(string (columns + namesLength_, pad).c_str());
264	}
265
266	void header(const string& file) {
267	separator(`'='`);
268	printf("%-*srelative time/iter iters/s", columns - `28`, file.c_str());
269	for (auto const& name : counterNames_) {
270	printf(" %s", name.c_str());
271	}
272	printf("\n");
273	separator(`'='`);
274	}
275
276	void print(const vector<detail::BenchmarkResult>& data) {
277	for (auto& datum : data) {
278	auto file = datum.file;
279	if (file != lastFile_) {
280	// New file starting
281	header(file);
282	lastFile_ = file;
283	}
284
285	string s = datum.name;
286	if (s == "-") {
287	separator(`'-'`);
288	continue;
289	}
290	bool useBaseline / = void /;
291	if (s [`0`] == `'%'`) {
292	s.erase(`0`, `1`);
293	useBaseline = true;
294	} else {
295	baselineNsPerIter_ = datum.timeInNs;
296	useBaseline = false;
297	}
298	s.resize(columns - `29`, `' '`);
299	auto nsPerIter = datum.timeInNs;
300	auto secPerIter = nsPerIter / `1E9`;
301	auto itersPerSec = (secPerIter == `0`)
302	? std::numeric_limits<double>::infinity()
303	: (`1` / secPerIter);
304	if (!useBaseline) {
305	// Print without baseline
306	printf(
307	"%*s %9s %7s",
308	static_cast<int>(s.size()),
309	s.c_str(),
310	readableTime(secPerIter, `2`).c_str(),
311	metricReadable(itersPerSec, `2`).c_str());
312	} else {
313	// Print with baseline
314	auto rel = baselineNsPerIter_ / nsPerIter * `100.0`;
315	printf(
316	"%*s %7.2f%% %9s %7s",
317	static_cast<int>(s.size()),
318	s.c_str(),
319	rel,
320	readableTime(secPerIter, `2`).c_str(),
321	metricReadable(itersPerSec, `2`).c_str());
322	}
323	for (auto const& name : counterNames_) {
324	if (auto ptr = folly::get_ptr(datum.counters, name)) {
325	printf(" %-d", int(name.length()), ptr);
326	} else {
327	printf(" %-s", int*(name.length()), "NaN");
328	}
329	}
330	printf("\n");
331	}
332	}
333
334	private:
335	std::set<std::string> counterNames_;
336	size_t namesLength_{`0`};
337	double baselineNsPerIter_{numeric_limits<double>::max()};
338	string lastFile_;
339	};
340	} // namespace
341
342	static void printBenchmarkResultsAsJson(
343	const vector<detail::BenchmarkResult>& data) {
344	dynamic d = dynamic::object;
345	for (auto& datum : data) {
346	d [datum.name] = datum.timeInNs * `1000.`;
347	}
348
349	printf("%s\n", toPrettyJson(d).c_str());
350	}
351
352	static void printBenchmarkResultsAsVerboseJson(
353	const vector<detail::BenchmarkResult>& data) {
354	dynamic d;
355	benchmarkResultsToDynamic(data, d);
356	printf("%s\n", toPrettyJson(d).c_str());
357	}
358
359	static void printBenchmarkResults(const vector<detail::BenchmarkResult>& data) {
360	if (FLAGS_json_verbose) {
361	printBenchmarkResultsAsVerboseJson(data);
362	return;
363	} else if (FLAGS_json) {
364	printBenchmarkResultsAsJson(data);
365	return;
366	}
367
368	CHECK(FLAGS_json_verbose \|\| FLAGS_json) << "Cannot print benchmark results";
369	}
370
371	void benchmarkResultsToDynamic(
372	const vector<detail::BenchmarkResult>& data,
373	dynamic& out) {
374	out = dynamic::array;
375	for (auto& datum : data) {
376	out.push_back(dynamic::array(datum.file, datum.name, datum.timeInNs));
377	}
378	}
379
380	void benchmarkResultsFromDynamic(
381	const dynamic& d,
382	vector<detail::BenchmarkResult>& results) {
383	for (auto& datum : d) {
384	results.push_back({datum [`0`].asString(),
385	datum [`1`].asString(),
386	datum [`2`].asDouble(),
387	UserCounters {}});
388	}
389	}
390
391	static pair<StringPiece, StringPiece> resultKey(
392	const detail::BenchmarkResult& result) {
393	return pair<StringPiece, StringPiece>(result.file, result.name);
394	}
395
396	void printResultComparison(
397	const vector<detail::BenchmarkResult>& base,
398	const vector<detail::BenchmarkResult>& test) {
399	map<pair<StringPiece, StringPiece>, double> baselines;
400
401	for (auto& baseResult : base) {
402	baselines [resultKey(baseResult)] = baseResult.timeInNs;
403	}
404	//
405	// Width available
406	static const unsigned int columns = `76`;
407
408	// Compute the longest benchmark name
409	size_t longestName = `0`;
410	for (auto& datum : test) {
411	longestName = max(longestName, datum.name.size());
412	}
413
414	// Print a horizontal rule
415	auto separator = [&](char pad) { puts(string (columns, pad).c_str()); };
416
417	// Print header for a file
418	auto header = [&](const string& file) {
419	separator (`'='`);
420	printf("%-*srelative time/iter iters/s", columns - `28`, file.c_str());
421	separator (`'='`);
422	};
423
424	string lastFile;
425
426	for (auto& datum : test) {
427	folly::Optional<double> baseline =
428	folly::get_optional(baselines, resultKey(datum));
429	auto file = datum.file;
430	if (file != lastFile) {
431	// New file starting
432	header (file);
433	lastFile = file;
434	}
435
436	string s = datum.name;
437	if (s == "-") {
438	separator (`'-'`);
439	continue;
440	}
441	if (s [`0`] == `'%'`) {
442	s.erase(`0`, `1`);
443	}
444	s.resize(columns - `29`, `' '`);
445	auto nsPerIter = datum.timeInNs;
446	auto secPerIter = nsPerIter / `1E9`;
447	auto itersPerSec = (secPerIter == `0`)
448	? std::numeric_limits<double>::infinity()
449	: (`1` / secPerIter);
450	if (!baseline) {
451	// Print without baseline
452	printf(
453	"%*s %9s %7s\n",
454	static_cast<int>(s.size()),
455	s.c_str(),
456	readableTime(secPerIter, `2`).c_str(),
457	metricReadable(itersPerSec, `2`).c_str());
458	} else {
459	// Print with baseline
460	auto rel = baseline / nsPerIter `100.0`;
461	printf(
462	"%*s %7.2f%% %9s %7s\n",
463	static_cast<int>(s.size()),
464	s.c_str(),
465	rel,
466	readableTime(secPerIter, `2`).c_str(),
467	metricReadable(itersPerSec, `2`).c_str());
468	}
469	}
470	separator (`'='`);
471	}
472
473	void checkRunMode() {
474	if (folly::kIsDebug \|\| folly::kIsSanitize) {
475	std::cerr << "WARNING: Benchmark running "
476	<< (folly::kIsDebug ? "in DEBUG mode" : "with SANITIZERS")
477	<< std::endl;
478	}
479	}
480
481	void runBenchmarks() {
482	CHECK(!benchmarks().empty());
483
484	checkRunMode();
485
486	vector<detail::BenchmarkResult> results;
487	results.reserve(benchmarks().size() - `1`);
488
489	std::unique_ptr<boost::regex> bmRegex;
490	if (!FLAGS_bm_regex.empty()) {
491	bmRegex = std::make_unique<boost::regex>(FLAGS_bm_regex);
492	}
493
494	// PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
495
496	size_t baselineIndex = getGlobalBenchmarkBaselineIndex();
497
498	auto const globalBaseline =
499	runBenchmarkGetNSPerIteration(benchmarks()[baselineIndex].func, `0`);
500
501	bool useCounter =
502	std::any_of(benchmarks().begin(), benchmarks().end(), [](const auto& bm) {
503	return bm.useCounter;
504	});
505	BenchmarkResultsPrinter printer;
506	std::set<std::string> counterNames;
507	FOR_EACH_RANGE (i, `0`, benchmarks().size()) {
508	if (i == baselineIndex) {
509	continue;
510	}
511	std::pair<double, UserCounters> elapsed;
512	auto& bm = benchmarks()[i];
513	if (bm.name != "-") { // skip separators
514	if (bmRegex && !boost::regex_search(bm.name, *bmRegex)) {
515	continue;
516	}
517	elapsed = runBenchmarkGetNSPerIteration(bm.func, globalBaseline.first);
518	}
519
520	// if customized user counters is used, it cannot print the result in real
521	// time as it needs to run all cases first to know the complete set of
522	// counters have been used, then the header can be printed out properly
523	if (!FLAGS_json_verbose && !FLAGS_json && !useCounter) {
524	printer.print({{bm.file, bm.name, elapsed.first, elapsed.second}});
525	} else {
526	results.push_back({bm.file, bm.name, elapsed.first, elapsed.second});
527	}
528
529	// get all counter names
530	for (auto const& kv : elapsed.second) {
531	counterNames.insert(kv.first);
532	}
533	}
534
535	// PLEASE MAKE NOISE. MEASUREMENTS DONE.
536	if (FLAGS_json_verbose \|\| FLAGS_json) {
537	printBenchmarkResults(results);
538	} else {
539	printer = BenchmarkResultsPrinter {std::move(counterNames)};
540	printer.print(results);
541	printer.separator(`'='`);
542	}
543
544	checkRunMode();
545	}
546
547	} // namespace folly
548

Browse the source code of folly/Benchmark.cpp