benchmark_runner.cpp source code [DuckDB/benchmark/benchmark_runner.cpp]

1	#include "benchmark_runner.hpp"
2
3	#include "duckdb/common/profiler.hpp"
4	#include "duckdb/common/string_util.hpp"
5	#include "duckdb.hpp"
6	#include "duckdb_benchmark.hpp"
7
8	#define CATCH_CONFIG_RUNNER
9	#include "catch.hpp"
10	#include "re2/re2.h"
11
12	#include <fstream>
13	#include <sstream>
14	#include <thread>
15
16	using namespace duckdb;
17	using namespace std;
18
19	void BenchmarkRunner::RegisterBenchmark(Benchmark *benchmark) {
20	GetInstance().benchmarks.push_back(benchmark);
21	}
22
23	Benchmark::Benchmark(bool register_benchmark, string name, string group) : name (name), group (group) {
24	if (register_benchmark) {
25	BenchmarkRunner::RegisterBenchmark(this);
26	}
27	}
28
29	void BenchmarkRunner::SaveDatabase(DuckDB &db, string name) {
30	auto &fs = *db.file_system;
31	// check if the database directory exists; if not create it
32	if (!fs.DirectoryExists(DUCKDB_BENCHMARK_DIRECTORY)) {
33	fs.CreateDirectory(DUCKDB_BENCHMARK_DIRECTORY);
34	}
35	// first export the schema
36	// create two files, "[name].sql" and "[name].list"
37	// [name].sql contains the SQL used to re-create the tables
38	// [name].list contains a list of the exported tables
39	ofstream sql_file(fs.JoinPath(DUCKDB_BENCHMARK_DIRECTORY, name + ".sql"));
40	ofstream list_file(fs.JoinPath(DUCKDB_BENCHMARK_DIRECTORY, name + ".list"));
41
42	vector<string> table_list;
43	Connection con(db);
44	auto result = con.Query("SELECT name, sql FROM sqlite_master()");
45	for (auto &row : *result) {
46	auto table_name = row.GetValue<string>(`0`);
47	auto table_sql = row.GetValue<string>(`1`);
48	table_list.push_back(table_name);
49
50	list_file << table_name << std::endl;
51	sql_file << table_sql << std::endl;
52	}
53	sql_file.close();
54	list_file.close();
55
56	// now for each table, write it to a separate file "[name]_[tablename].csv"
57	for (auto &table : table_list) {
58	auto target_path = fs.JoinPath(DUCKDB_BENCHMARK_DIRECTORY, name + "_" + table + ".csv");
59	result = con.Query("COPY " + table + " TO '" + target_path + "'");
60	if (!result ->success) {
61	throw Exception ("Failed to save database: " + result ->error);
62	}
63	}
64	}
65
66	bool BenchmarkRunner::TryLoadDatabase(DuckDB &db, string name) {
67	auto &fs = *db.file_system;
68	if (!fs.DirectoryExists(DUCKDB_BENCHMARK_DIRECTORY)) {
69	return false;
70	}
71	auto sql_fname = fs.JoinPath(DUCKDB_BENCHMARK_DIRECTORY, name + ".sql");
72	auto list_fname = fs.JoinPath(DUCKDB_BENCHMARK_DIRECTORY, name + ".list");
73	// check if the [name].list and [name].sql files exist
74	if (!fs.FileExists(list_fname) \|\| !fs.FileExists(sql_fname)) {
75	return false;
76	}
77	Connection con(db);
78	// the files exist, load the data into the database
79	// first load the entire SQL and execute it
80	ifstream sql_file(sql_fname);
81	std::stringstream buffer;
82	buffer << sql_file.rdbuf();
83	auto result = con.Query(buffer.str());
84	if (!result ->success) {
85	throw Exception ("Failed to load database: " + result ->error);
86	}
87	// now read the tables line by line
88	ifstream list_file(list_fname);
89	string table_name;
90	while (getline(list_file, table_name)) {
91	// for each table, copy the files
92	auto target_path = fs.JoinPath(DUCKDB_BENCHMARK_DIRECTORY, name + "_" + table_name + ".csv");
93	result = con.Query("COPY " + table_name + " FROM '" + target_path + "'");
94	if (!result ->success) {
95	throw Exception ("Failed to load database: " + result ->error);
96	}
97	}
98	return true;
99	}
100
101	volatile bool is_active = false;
102	volatile bool timeout = false;
103
104	void sleep_thread(Benchmark benchmark, BenchmarkState state, int timeout_duration) {
105	// timeout is given in seconds
106	// we wait 10ms per iteration, so timeout 100 gives us the amount of*
107	// iterations
108	if (timeout_duration < `0`) {
109	return;
110	}
111	for (size_t i = `0`; i < (size_t)(timeout_duration * `100`) && is_active; i++) {
112	std::this_thread::sleep_for(std::chrono::milliseconds (`10`));
113	}
114	if (is_active) {
115	timeout = true;
116	benchmark->Interrupt(state);
117	}
118	}
119
120	void BenchmarkRunner::Log(string message) {
121	fprintf(stderr, "%s", message.c_str());
122	}
123
124	void BenchmarkRunner::LogLine(string message) {
125	fprintf(stderr, "%s\n", message.c_str());
126	}
127
128	void BenchmarkRunner::LogResult(string message) {
129	LogLine(message);
130	if (out_file.good()) {
131	out_file << message << endl;
132	out_file.flush();
133	}
134	}
135
136	void BenchmarkRunner::LogOutput(string message) {
137	if (log_file.good()) {
138	log_file << message << endl;
139	log_file.flush();
140	}
141	}
142
143	void BenchmarkRunner::RunBenchmark(Benchmark *benchmark) {
144	Profiler profiler;
145	LogLine(string (benchmark->name.size() + `6`, `'-'`));
146	LogLine("\|\| " + benchmark->name + " \|\|");
147	LogLine(string (benchmark->name.size() + `6`, `'-'`));
148	auto state = benchmark->Initialize();
149	auto nruns = benchmark->NRuns();
150	for (size_t i = `0`; i < nruns + `1`; i++) {
151	bool hotrun = i > `0`;
152	if (hotrun) {
153	Log(StringUtil::Format("%d/%d...", i, nruns));
154	} else {
155	Log("Cold run...");
156	}
157	if (hotrun && benchmark->RequireReinit()) {
158	state = benchmark->Initialize();
159	}
160	is_active = true;
161	timeout = false;
162	thread interrupt_thread(sleep_thread, benchmark, state.get(), benchmark->Timeout());
163
164	profiler.Start();
165	benchmark->Run(state.get());
166	profiler.End();
167
168	benchmark->Cleanup(state.get());
169
170	is_active = false;
171	interrupt_thread.join();
172	if (hotrun) {
173	LogOutput(benchmark->GetLogOutput(state.get()));
174	if (timeout) {
175	// write timeout
176	LogResult("TIMEOUT");
177	break;
178	} else {
179	// write time
180	auto verify = benchmark->Verify(state.get());
181	if (!verify.empty()) {
182	LogResult("INCORRECT");
183	LogLine("INCORRECT RESULT: " + verify);
184	LogOutput("INCORRECT RESULT: " + verify);
185	break;
186	} else {
187	LogResult(to_string(profiler.Elapsed()));
188	}
189	}
190	} else {
191	LogLine("DONE");
192	}
193	}
194	benchmark->Finalize();
195	}
196
197	void BenchmarkRunner::RunBenchmarks() {
198	LogLine("Starting benchmark run.");
199	for (auto &benchmark : benchmarks) {
200	RunBenchmark(benchmark);
201	}
202	}
203
204	void print_help() {
205	fprintf(stderr, "Usage: benchmark_runner\n");
206	fprintf(stderr, " --list Show a list of all benchmarks\n");
207	fprintf(stderr, " --out=[file] Move benchmark output to file\n");
208	fprintf(stderr, " --log=[file] Move log output to file\n");
209	fprintf(stderr, " --info Prints info about the benchmark\n");
210	fprintf(stderr, " --group Prints group name of the benchmark\n");
211	fprintf(stderr, " --query Prints query of the benchmark\n");
212	fprintf(stderr, " [name_pattern] Run only the benchmark which names match the specified name pattern, "
213	"e.g., DS.* for TPC-DS benchmarks\n");
214	}
215
216	enum class BenchmarkMetaType { NONE, INFO, GROUP, QUERY };
217
218	struct BenchmarkConfiguration {
219	std::string name_pattern{};
220	BenchmarkMetaType meta = BenchmarkMetaType::NONE;
221	};
222
223	enum ConfigurationError { None, BenchmarkNotFound, InfoWithoutBenchmarkName };
224
225	/**
226	* Builds a configuration based on the passed arguments.
227	*/
228	BenchmarkConfiguration parse_arguments(const int arg_counter, char const *const *arg_values) {
229	auto &instance = BenchmarkRunner::GetInstance();
230	auto &benchmarks = instance.benchmarks;
231	BenchmarkConfiguration configuration;
232	for (int arg_index = `1`; arg_index < arg_counter; ++arg_index) {
233	string arg = arg_values[arg_index];
234	if (arg == "--list") {
235	// list names of all benchmarks
236	for (auto &benchmark : benchmarks) {
237	if (StringUtil::StartsWith(benchmark->name, "sqlite_")) {
238	continue;
239	}
240	fprintf(stdout, "%s\n", benchmark->name.c_str());
241	}
242	exit(`0`);
243	} else if (arg == "--info") {
244	// write info of benchmark
245	configuration.meta = BenchmarkMetaType::INFO;
246	} else if (arg == "--group") {
247	// write group of benchmark
248	configuration.meta = BenchmarkMetaType::GROUP;
249	} else if (arg == "--query") {
250	// write group of benchmark
251	configuration.meta = BenchmarkMetaType::QUERY;
252	} else if (StringUtil::StartsWith(arg, "--out=") \|\| StringUtil::StartsWith(arg, "--log=")) {
253	auto splits = StringUtil::Split(arg, `'='`);
254	if (splits.size() != `2`) {
255	print_help();
256	exit(`1`);
257	}
258	auto &file = StringUtil::StartsWith(arg, "--out=") ? instance.out_file : instance.log_file;
259	file.open(splits [`1`]);
260	if (!file.good()) {
261	fprintf(stderr, "Could not open file %s for writing\n", splits [`1`].c_str());
262	exit(`1`);
263	}
264	} else {
265	if (!configuration.name_pattern.empty()) {
266	fprintf(stderr, "Only one benchmark can be specified.\n");
267	print_help();
268	exit(`1`);
269	}
270	configuration.name_pattern = arg;
271	}
272	}
273	return configuration;
274	}
275
276	/**
277	* Runs the benchmarks specified by the configuration if possible.
278	* Returns an configuration error code.
279	*/
280	ConfigurationError run_benchmarks(const BenchmarkConfiguration &configuration) {
281	auto &instance = BenchmarkRunner::GetInstance();
282	auto &benchmarks = instance.benchmarks;
283	if (!configuration.name_pattern.empty()) {
284	// run only benchmarks which names matches the
285	// passed name pattern.
286	std::vector<int> benchmark_indices{};
287	benchmark_indices.reserve(benchmarks.size());
288	for (idx_t index = `0`; index < benchmarks.size(); ++index) {
289	if (RE2::FullMatch(benchmarks [index]->name, configuration.name_pattern)) {
290	benchmark_indices.emplace_back(index);
291	}
292	}
293	benchmark_indices.shrink_to_fit();
294	if (benchmark_indices.empty()) {
295	return ConfigurationError::BenchmarkNotFound;
296	}
297	if (configuration.meta == BenchmarkMetaType::INFO) {
298	// print info of benchmarks
299	for (const auto &benchmark_index : benchmark_indices) {
300	auto info = benchmarks [benchmark_index]->GetInfo();
301	fprintf(stdout, "%s\n", info.c_str());
302	}
303	} else if (configuration.meta == BenchmarkMetaType::GROUP) {
304	// print group of benchmarks
305	for (const auto &benchmark_index : benchmark_indices) {
306	fprintf(stdout, "%s\n", benchmarks [benchmark_index]->group.c_str());
307	}
308	} else if (configuration.meta == BenchmarkMetaType::QUERY) {
309	for (const auto &benchmark_index : benchmark_indices) {
310	auto duckdb_benchmark = dynamic_cast<DuckDBBenchmark *>(benchmarks [benchmark_index]);
311	if (!duckdb_benchmark) {
312	continue;
313	}
314	fprintf(stdout, "%s\n", duckdb_benchmark->GetQuery().c_str());
315	}
316	} else {
317	for (const auto &benchmark_index : benchmark_indices) {
318	instance.RunBenchmark(benchmarks [benchmark_index]);
319	}
320	}
321	} else {
322	if (configuration.meta != BenchmarkMetaType::NONE) {
323	return ConfigurationError::InfoWithoutBenchmarkName;
324	}
325	// default: run all benchmarks
326	instance.RunBenchmarks();
327	}
328	return ConfigurationError::None;
329	}
330
331	void print_error_message(const ConfigurationError &error) {
332	switch (error) {
333	case ConfigurationError::BenchmarkNotFound:
334	fprintf(stderr, "Benchmark to run could not be found.\n");
335	break;
336	case ConfigurationError::InfoWithoutBenchmarkName:
337	fprintf(stderr, "Info requires benchmark name pattern.\n");
338	break;
339	case ConfigurationError::None:
340	break;
341	}
342	print_help();
343	}
344
345	int main(int argc, char **argv) {
346	BenchmarkConfiguration configuration = parse_arguments(argc, argv);
347	const auto configuration_error = run_benchmarks(configuration);
348	if (configuration_error != ConfigurationError::None) {
349	print_error_message(configuration_error);
350	exit(`1`);
351	}
352	}
353

Browse the source code of DuckDB/benchmark/benchmark_runner.cpp