1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "benchmark/benchmark.h"
16#include "benchmark_api_internal.h"
17#include "internal_macros.h"
18
19#ifndef BENCHMARK_OS_WINDOWS
20#ifndef __Fuchsia__
21#include <sys/resource.h>
22#endif
23#include <sys/time.h>
24#include <unistd.h>
25#endif
26
27#include <algorithm>
28#include <atomic>
29#include <condition_variable>
30#include <cstdio>
31#include <cstdlib>
32#include <cstring>
33#include <fstream>
34#include <iostream>
35#include <memory>
36#include <thread>
37
38#include "check.h"
39#include "colorprint.h"
40#include "commandlineflags.h"
41#include "complexity.h"
42#include "log.h"
43#include "mutex.h"
44#include "re.h"
45#include "stat.h"
46#include "string_util.h"
47#include "sysinfo.h"
48#include "timers.h"
49
50DEFINE_bool(benchmark_list_tests, false,
51 "Print a list of benchmarks. This option overrides all other "
52 "options.");
53
54DEFINE_string(benchmark_filter, ".",
55 "A regular expression that specifies the set of benchmarks "
56 "to execute. If this flag is empty, no benchmarks are run. "
57 "If this flag is the string \"all\", all benchmarks linked "
58 "into the process are run.");
59
60DEFINE_double(benchmark_min_time, 0.5,
61 "Minimum number of seconds we should run benchmark before "
62 "results are considered significant. For cpu-time based "
63 "tests, this is the lower bound on the total cpu time "
64 "used by all threads that make up the test. For real-time "
65 "based tests, this is the lower bound on the elapsed time "
66 "of the benchmark execution, regardless of number of "
67 "threads.");
68
69DEFINE_int32(benchmark_repetitions, 1,
70 "The number of runs of each benchmark. If greater than 1, the "
71 "mean and standard deviation of the runs will be reported.");
72
73DEFINE_bool(benchmark_report_aggregates_only, false,
74 "Report the result of each benchmark repetitions. When 'true' is "
75 "specified only the mean, standard deviation, and other statistics "
76 "are reported for repeated benchmarks.");
77
78DEFINE_string(benchmark_format, "console",
79 "The format to use for console output. Valid values are "
80 "'console', 'json', or 'csv'.");
81
82DEFINE_string(benchmark_out_format, "json",
83 "The format to use for file output. Valid values are "
84 "'console', 'json', or 'csv'.");
85
86DEFINE_string(benchmark_out, "", "The file to write additonal output to");
87
88DEFINE_string(benchmark_color, "auto",
89 "Whether to use colors in the output. Valid values: "
90 "'true'/'yes'/1, 'false'/'no'/0, and 'auto'. 'auto' means to use "
91 "colors if the output is being sent to a terminal and the TERM "
92 "environment variable is set to a terminal type that supports "
93 "colors.");
94
95DEFINE_int32(v, 0, "The level of verbose logging to output");
96
97namespace benchmark {
98namespace internal {
99
100void UseCharPointer(char const volatile*) {}
101
102} // end namespace internal
103
104namespace {
105
106static const size_t kMaxIterations = 1000000000;
107
108} // end namespace
109
110namespace internal {
111
112class ThreadManager {
113 public:
114 ThreadManager(int num_threads)
115 : alive_threads_(num_threads), start_stop_barrier_(num_threads) {}
116
117 Mutex& GetBenchmarkMutex() const RETURN_CAPABILITY(benchmark_mutex_) {
118 return benchmark_mutex_;
119 }
120
121 bool StartStopBarrier() EXCLUDES(end_cond_mutex_) {
122 return start_stop_barrier_.wait();
123 }
124
125 void NotifyThreadComplete() EXCLUDES(end_cond_mutex_) {
126 start_stop_barrier_.removeThread();
127 if (--alive_threads_ == 0) {
128 MutexLock lock(end_cond_mutex_);
129 end_condition_.notify_all();
130 }
131 }
132
133 void WaitForAllThreads() EXCLUDES(end_cond_mutex_) {
134 MutexLock lock(end_cond_mutex_);
135 end_condition_.wait(lock.native_handle(),
136 [this]() { return alive_threads_ == 0; });
137 }
138
139 public:
140 struct Result {
141 double real_time_used = 0;
142 double cpu_time_used = 0;
143 double manual_time_used = 0;
144 int64_t bytes_processed = 0;
145 int64_t items_processed = 0;
146 int complexity_n = 0;
147 std::string report_label_;
148 std::string error_message_;
149 bool has_error_ = false;
150 };
151 GUARDED_BY(GetBenchmarkMutex()) Result results;
152
153 private:
154 mutable Mutex benchmark_mutex_;
155 std::atomic<int> alive_threads_;
156 Barrier start_stop_barrier_;
157 Mutex end_cond_mutex_;
158 Condition end_condition_;
159};
160
161// Timer management class
162class ThreadTimer {
163 public:
164 ThreadTimer() = default;
165
166 // Called by each thread
167 void StartTimer() {
168 running_ = true;
169 start_real_time_ = ChronoClockNow();
170 start_cpu_time_ = ThreadCPUUsage();
171 }
172
173 // Called by each thread
174 void StopTimer() {
175 CHECK(running_);
176 running_ = false;
177 real_time_used_ += ChronoClockNow() - start_real_time_;
178 cpu_time_used_ += ThreadCPUUsage() - start_cpu_time_;
179 }
180
181 // Called by each thread
182 void SetIterationTime(double seconds) { manual_time_used_ += seconds; }
183
184 bool running() const { return running_; }
185
186 // REQUIRES: timer is not running
187 double real_time_used() {
188 CHECK(!running_);
189 return real_time_used_;
190 }
191
192 // REQUIRES: timer is not running
193 double cpu_time_used() {
194 CHECK(!running_);
195 return cpu_time_used_;
196 }
197
198 // REQUIRES: timer is not running
199 double manual_time_used() {
200 CHECK(!running_);
201 return manual_time_used_;
202 }
203
204 private:
205 bool running_ = false; // Is the timer running
206 double start_real_time_ = 0; // If running_
207 double start_cpu_time_ = 0; // If running_
208
209 // Accumulated time so far (does not contain current slice if running_)
210 double real_time_used_ = 0;
211 double cpu_time_used_ = 0;
212 // Manually set iteration time. User sets this with SetIterationTime(seconds).
213 double manual_time_used_ = 0;
214};
215
216namespace {
217
218BenchmarkReporter::Run CreateRunReport(
219 const benchmark::internal::Benchmark::Instance& b,
220 const internal::ThreadManager::Result& results, size_t iters,
221 double seconds) {
222 // Create report about this benchmark run.
223 BenchmarkReporter::Run report;
224
225 report.benchmark_name = b.name;
226 report.error_occurred = results.has_error_;
227 report.error_message = results.error_message_;
228 report.report_label = results.report_label_;
229 // Report the total iterations across all threads.
230 report.iterations = static_cast<int64_t>(iters) * b.threads;
231 report.time_unit = b.time_unit;
232
233 if (!report.error_occurred) {
234 double bytes_per_second = 0;
235 if (results.bytes_processed > 0 && seconds > 0.0) {
236 bytes_per_second = (results.bytes_processed / seconds);
237 }
238 double items_per_second = 0;
239 if (results.items_processed > 0 && seconds > 0.0) {
240 items_per_second = (results.items_processed / seconds);
241 }
242
243 if (b.use_manual_time) {
244 report.real_accumulated_time = results.manual_time_used;
245 } else {
246 report.real_accumulated_time = results.real_time_used;
247 }
248 report.cpu_accumulated_time = results.cpu_time_used;
249 report.bytes_per_second = bytes_per_second;
250 report.items_per_second = items_per_second;
251 report.complexity_n = results.complexity_n;
252 report.complexity = b.complexity;
253 report.complexity_lambda = b.complexity_lambda;
254 }
255 return report;
256}
257
258// Execute one thread of benchmark b for the specified number of iterations.
259// Adds the stats collected for the thread into *total.
260void RunInThread(const benchmark::internal::Benchmark::Instance* b,
261 size_t iters, int thread_id,
262 internal::ThreadManager* manager) {
263 internal::ThreadTimer timer;
264 State st(iters, b->arg, thread_id, b->threads, &timer, manager);
265 b->benchmark->Run(st);
266 CHECK(st.iterations() == st.max_iterations)
267 << "Benchmark returned before State::KeepRunning() returned false!";
268 {
269 MutexLock l(manager->GetBenchmarkMutex());
270 internal::ThreadManager::Result& results = manager->results;
271 results.cpu_time_used += timer.cpu_time_used();
272 results.real_time_used += timer.real_time_used();
273 results.manual_time_used += timer.manual_time_used();
274 results.bytes_processed += st.bytes_processed();
275 results.items_processed += st.items_processed();
276 results.complexity_n += st.complexity_length_n();
277 }
278 manager->NotifyThreadComplete();
279}
280
281std::vector<BenchmarkReporter::Run> RunBenchmark(
282 const benchmark::internal::Benchmark::Instance& b,
283 std::vector<BenchmarkReporter::Run>* complexity_reports) {
284 std::vector<BenchmarkReporter::Run> reports; // return value
285
286 size_t iters = 1;
287 std::unique_ptr<internal::ThreadManager> manager;
288 std::vector<std::thread> pool(b.threads - 1);
289 const int repeats =
290 b.repetitions != 0 ? b.repetitions : FLAGS_benchmark_repetitions;
291 const bool report_aggregates_only =
292 repeats != 1 &&
293 (b.report_mode == internal::RM_Unspecified
294 ? FLAGS_benchmark_report_aggregates_only
295 : b.report_mode == internal::RM_ReportAggregatesOnly);
296 for (int i = 0; i < repeats; i++) {
297 for (;;) {
298 // Try benchmark
299 VLOG(2) << "Running " << b.name << " for " << iters << "\n";
300
301 manager.reset(new internal::ThreadManager(b.threads));
302 for (std::size_t ti = 0; ti < pool.size(); ++ti) {
303 pool[ti] = std::thread(&RunInThread, &b, iters,
304 static_cast<int>(ti + 1), manager.get());
305 }
306 RunInThread(&b, iters, 0, manager.get());
307 manager->WaitForAllThreads();
308 for (std::thread& thread : pool) thread.join();
309 internal::ThreadManager::Result results;
310 {
311 MutexLock l(manager->GetBenchmarkMutex());
312 results = manager->results;
313 }
314 manager.reset();
315 // Adjust real/manual time stats since they were reported per thread.
316 results.real_time_used /= b.threads;
317 results.manual_time_used /= b.threads;
318
319 VLOG(2) << "Ran in " << results.cpu_time_used << "/"
320 << results.real_time_used << "\n";
321
322 // Base decisions off of real time if requested by this benchmark.
323 double seconds = results.cpu_time_used;
324 if (b.use_manual_time) {
325 seconds = results.manual_time_used;
326 } else if (b.use_real_time) {
327 seconds = results.real_time_used;
328 }
329
330 const double min_time =
331 !IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time;
332 // If this was the first run, was elapsed time or cpu time large enough?
333 // If this is not the first run, go with the current value of iter.
334 if ((i > 0) || results.has_error_ || (iters >= kMaxIterations) ||
335 (seconds >= min_time) || (results.real_time_used >= 5 * min_time)) {
336 BenchmarkReporter::Run report =
337 CreateRunReport(b, results, iters, seconds);
338 if (!report.error_occurred && b.complexity != oNone)
339 complexity_reports->push_back(report);
340 reports.push_back(report);
341 break;
342 }
343
344 // See how much iterations should be increased by
345 // Note: Avoid division by zero with max(seconds, 1ns).
346 double multiplier = min_time * 1.4 / std::max(seconds, 1e-9);
347 // If our last run was at least 10% of FLAGS_benchmark_min_time then we
348 // use the multiplier directly. Otherwise we use at most 10 times
349 // expansion.
350 // NOTE: When the last run was at least 10% of the min time the max
351 // expansion should be 14x.
352 bool is_significant = (seconds / min_time) > 0.1;
353 multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
354 if (multiplier <= 1.0) multiplier = 2.0;
355 double next_iters = std::max(multiplier * iters, iters + 1.0);
356 if (next_iters > kMaxIterations) {
357 next_iters = kMaxIterations;
358 }
359 VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
360 iters = static_cast<int>(next_iters + 0.5);
361 }
362 }
363 // Calculate additional statistics
364 auto stat_reports = ComputeStats(reports);
365 if ((b.complexity != oNone) && b.last_benchmark_instance) {
366 auto additional_run_stats = ComputeBigO(*complexity_reports);
367 stat_reports.insert(stat_reports.end(), additional_run_stats.begin(),
368 additional_run_stats.end());
369 complexity_reports->clear();
370 }
371
372 if (report_aggregates_only) reports.clear();
373 reports.insert(reports.end(), stat_reports.begin(), stat_reports.end());
374 return reports;
375}
376
377} // namespace
378} // namespace internal
379
380State::State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
381 int n_threads, internal::ThreadTimer* timer,
382 internal::ThreadManager* manager)
383 : started_(false),
384 finished_(false),
385 total_iterations_(0),
386 range_(ranges),
387 bytes_processed_(0),
388 items_processed_(0),
389 complexity_n_(0),
390 error_occurred_(false),
391 thread_index(thread_i),
392 threads(n_threads),
393 max_iterations(max_iters),
394 timer_(timer),
395 manager_(manager) {
396 CHECK(max_iterations != 0) << "At least one iteration must be run";
397 CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
398}
399
400void State::PauseTiming() {
401 // Add in time accumulated so far
402 CHECK(started_ && !finished_ && !error_occurred_);
403 timer_->StopTimer();
404}
405
406void State::ResumeTiming() {
407 CHECK(started_ && !finished_ && !error_occurred_);
408 timer_->StartTimer();
409}
410
411void State::SkipWithError(const char* msg) {
412 CHECK(msg);
413 error_occurred_ = true;
414 {
415 MutexLock l(manager_->GetBenchmarkMutex());
416 if (manager_->results.has_error_ == false) {
417 manager_->results.error_message_ = msg;
418 manager_->results.has_error_ = true;
419 }
420 }
421 total_iterations_ = max_iterations;
422 if (timer_->running()) timer_->StopTimer();
423}
424
425void State::SetIterationTime(double seconds) {
426 timer_->SetIterationTime(seconds);
427}
428
429void State::SetLabel(const char* label) {
430 MutexLock l(manager_->GetBenchmarkMutex());
431 manager_->results.report_label_ = label;
432}
433
434void State::StartKeepRunning() {
435 CHECK(!started_ && !finished_);
436 started_ = true;
437 manager_->StartStopBarrier();
438 if (!error_occurred_) ResumeTiming();
439}
440
441void State::FinishKeepRunning() {
442 CHECK(started_ && (!finished_ || error_occurred_));
443 if (!error_occurred_) {
444 PauseTiming();
445 }
446 // Total iterations now is one greater than max iterations. Fix this.
447 total_iterations_ = max_iterations;
448 finished_ = true;
449 manager_->StartStopBarrier();
450}
451
452namespace internal {
453namespace {
454
455void RunBenchmarks(const std::vector<Benchmark::Instance>& benchmarks,
456 BenchmarkReporter* console_reporter,
457 BenchmarkReporter* file_reporter) {
458 // Note the file_reporter can be null.
459 CHECK(console_reporter != nullptr);
460
461 // Determine the width of the name field using a minimum width of 10.
462 bool has_repetitions = FLAGS_benchmark_repetitions > 1;
463 size_t name_field_width = 10;
464 for (const Benchmark::Instance& benchmark : benchmarks) {
465 name_field_width =
466 std::max<size_t>(name_field_width, benchmark.name.size());
467 has_repetitions |= benchmark.repetitions > 1;
468 }
469 if (has_repetitions) name_field_width += std::strlen("_stddev");
470
471 // Print header here
472 BenchmarkReporter::Context context;
473 context.num_cpus = NumCPUs();
474 context.mhz_per_cpu = CyclesPerSecond() / 1000000.0f;
475
476 context.cpu_scaling_enabled = CpuScalingEnabled();
477 context.name_field_width = name_field_width;
478
479 // Keep track of runing times of all instances of current benchmark
480 std::vector<BenchmarkReporter::Run> complexity_reports;
481
482 // We flush streams after invoking reporter methods that write to them. This
483 // ensures users get timely updates even when streams are not line-buffered.
484 auto flushStreams = [](BenchmarkReporter* reporter) {
485 if (!reporter) return;
486 std::flush(reporter->GetOutputStream());
487 std::flush(reporter->GetErrorStream());
488 };
489
490 if (console_reporter->ReportContext(context) &&
491 (!file_reporter || file_reporter->ReportContext(context))) {
492 flushStreams(console_reporter);
493 flushStreams(file_reporter);
494 for (const auto& benchmark : benchmarks) {
495 std::vector<BenchmarkReporter::Run> reports =
496 RunBenchmark(benchmark, &complexity_reports);
497 console_reporter->ReportRuns(reports);
498 if (file_reporter) file_reporter->ReportRuns(reports);
499 flushStreams(console_reporter);
500 flushStreams(file_reporter);
501 }
502 }
503 console_reporter->Finalize();
504 if (file_reporter) file_reporter->Finalize();
505 flushStreams(console_reporter);
506 flushStreams(file_reporter);
507}
508
509std::unique_ptr<BenchmarkReporter> CreateReporter(
510 std::string const& name, ConsoleReporter::OutputOptions allow_color) {
511 typedef std::unique_ptr<BenchmarkReporter> PtrType;
512 if (name == "console") {
513 return PtrType(new ConsoleReporter(allow_color));
514 } else if (name == "json") {
515 return PtrType(new JSONReporter);
516 } else if (name == "csv") {
517 return PtrType(new CSVReporter);
518 } else {
519 std::cerr << "Unexpected format: '" << name << "'\n";
520 std::exit(1);
521 }
522}
523
524} // end namespace
525} // end namespace internal
526
527size_t RunSpecifiedBenchmarks() {
528 return RunSpecifiedBenchmarks(nullptr, nullptr);
529}
530
531size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter) {
532 return RunSpecifiedBenchmarks(console_reporter, nullptr);
533}
534
535size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
536 BenchmarkReporter* file_reporter) {
537 std::string spec = FLAGS_benchmark_filter;
538 if (spec.empty() || spec == "all")
539 spec = "."; // Regexp that matches all benchmarks
540
541 // Setup the reporters
542 std::ofstream output_file;
543 std::unique_ptr<BenchmarkReporter> default_console_reporter;
544 std::unique_ptr<BenchmarkReporter> default_file_reporter;
545 if (!console_reporter) {
546 auto output_opts = ConsoleReporter::OO_None;
547 if (FLAGS_benchmark_color == "auto")
548 output_opts = IsColorTerminal() ? ConsoleReporter::OO_Color
549 : ConsoleReporter::OO_None;
550 else
551 output_opts = IsTruthyFlagValue(FLAGS_benchmark_color)
552 ? ConsoleReporter::OO_Color
553 : ConsoleReporter::OO_None;
554 default_console_reporter =
555 internal::CreateReporter(FLAGS_benchmark_format, output_opts);
556 console_reporter = default_console_reporter.get();
557 }
558 auto& Out = console_reporter->GetOutputStream();
559 auto& Err = console_reporter->GetErrorStream();
560
561 std::string const& fname = FLAGS_benchmark_out;
562 if (fname == "" && file_reporter) {
563 Err << "A custom file reporter was provided but "
564 "--benchmark_out=<file> was not specified."
565 << std::endl;
566 std::exit(1);
567 }
568 if (fname != "") {
569 output_file.open(fname);
570 if (!output_file.is_open()) {
571 Err << "invalid file name: '" << fname << std::endl;
572 std::exit(1);
573 }
574 if (!file_reporter) {
575 default_file_reporter = internal::CreateReporter(
576 FLAGS_benchmark_out_format, ConsoleReporter::OO_None);
577 file_reporter = default_file_reporter.get();
578 }
579 file_reporter->SetOutputStream(&output_file);
580 file_reporter->SetErrorStream(&output_file);
581 }
582
583 std::vector<internal::Benchmark::Instance> benchmarks;
584 if (!FindBenchmarksInternal(spec, &benchmarks, &Err)) return 0;
585
586 if (benchmarks.empty()) {
587 Err << "Failed to match any benchmarks against regex: " << spec << "\n";
588 return 0;
589 }
590
591 if (FLAGS_benchmark_list_tests) {
592 for (auto const& benchmark : benchmarks) Out << benchmark.name << "\n";
593 } else {
594 internal::RunBenchmarks(benchmarks, console_reporter, file_reporter);
595 }
596
597 return benchmarks.size();
598}
599
600namespace internal {
601
602void PrintUsageAndExit() {
603 fprintf(stdout,
604 "benchmark"
605 " [--benchmark_list_tests={true|false}]\n"
606 " [--benchmark_filter=<regex>]\n"
607 " [--benchmark_min_time=<min_time>]\n"
608 " [--benchmark_repetitions=<num_repetitions>]\n"
609 " [--benchmark_report_aggregates_only={true|false}\n"
610 " [--benchmark_format=<console|json|csv>]\n"
611 " [--benchmark_out=<filename>]\n"
612 " [--benchmark_out_format=<json|console|csv>]\n"
613 " [--benchmark_color={auto|true|false}]\n"
614 " [--v=<verbosity>]\n");
615 exit(0);
616}
617
618void ParseCommandLineFlags(int* argc, char** argv) {
619 using namespace benchmark;
620 for (int i = 1; i < *argc; ++i) {
621 if (ParseBoolFlag(argv[i], "benchmark_list_tests",
622 &FLAGS_benchmark_list_tests) ||
623 ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
624 ParseDoubleFlag(argv[i], "benchmark_min_time",
625 &FLAGS_benchmark_min_time) ||
626 ParseInt32Flag(argv[i], "benchmark_repetitions",
627 &FLAGS_benchmark_repetitions) ||
628 ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
629 &FLAGS_benchmark_report_aggregates_only) ||
630 ParseStringFlag(argv[i], "benchmark_format", &FLAGS_benchmark_format) ||
631 ParseStringFlag(argv[i], "benchmark_out", &FLAGS_benchmark_out) ||
632 ParseStringFlag(argv[i], "benchmark_out_format",
633 &FLAGS_benchmark_out_format) ||
634 ParseStringFlag(argv[i], "benchmark_color", &FLAGS_benchmark_color) ||
635 // "color_print" is the deprecated name for "benchmark_color".
636 // TODO: Remove this.
637 ParseStringFlag(argv[i], "color_print", &FLAGS_benchmark_color) ||
638 ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
639 for (int j = i; j != *argc; ++j) argv[j] = argv[j + 1];
640
641 --(*argc);
642 --i;
643 } else if (IsFlag(argv[i], "help")) {
644 PrintUsageAndExit();
645 }
646 }
647 for (auto const* flag :
648 {&FLAGS_benchmark_format, &FLAGS_benchmark_out_format})
649 if (*flag != "console" && *flag != "json" && *flag != "csv") {
650 PrintUsageAndExit();
651 }
652 if (FLAGS_benchmark_color.empty()) {
653 PrintUsageAndExit();
654 }
655}
656
657int InitializeStreams() {
658 static std::ios_base::Init init;
659 return 0;
660}
661
662} // end namespace internal
663
664void Initialize(int* argc, char** argv) {
665 internal::ParseCommandLineFlags(argc, argv);
666 internal::LogLevel() = FLAGS_v;
667}
668
669} // end namespace benchmark
670