1#include "benchmark_runner.hpp"
2
3#include "duckdb/common/profiler.hpp"
4#include "duckdb/common/string_util.hpp"
5#include "duckdb.hpp"
6#include "duckdb_benchmark.hpp"
7
8#define CATCH_CONFIG_RUNNER
9#include "catch.hpp"
10#include "re2/re2.h"
11
12#include <fstream>
13#include <sstream>
14#include <thread>
15
16using namespace duckdb;
17using namespace std;
18
19void BenchmarkRunner::RegisterBenchmark(Benchmark *benchmark) {
20 GetInstance().benchmarks.push_back(benchmark);
21}
22
23Benchmark::Benchmark(bool register_benchmark, string name, string group) : name(name), group(group) {
24 if (register_benchmark) {
25 BenchmarkRunner::RegisterBenchmark(this);
26 }
27}
28
29void BenchmarkRunner::SaveDatabase(DuckDB &db, string name) {
30 auto &fs = *db.file_system;
31 // check if the database directory exists; if not create it
32 if (!fs.DirectoryExists(DUCKDB_BENCHMARK_DIRECTORY)) {
33 fs.CreateDirectory(DUCKDB_BENCHMARK_DIRECTORY);
34 }
35 // first export the schema
36 // create two files, "[name].sql" and "[name].list"
37 // [name].sql contains the SQL used to re-create the tables
38 // [name].list contains a list of the exported tables
39 ofstream sql_file(fs.JoinPath(DUCKDB_BENCHMARK_DIRECTORY, name + ".sql"));
40 ofstream list_file(fs.JoinPath(DUCKDB_BENCHMARK_DIRECTORY, name + ".list"));
41
42 vector<string> table_list;
43 Connection con(db);
44 auto result = con.Query("SELECT name, sql FROM sqlite_master()");
45 for (auto &row : *result) {
46 auto table_name = row.GetValue<string>(0);
47 auto table_sql = row.GetValue<string>(1);
48 table_list.push_back(table_name);
49
50 list_file << table_name << std::endl;
51 sql_file << table_sql << std::endl;
52 }
53 sql_file.close();
54 list_file.close();
55
56 // now for each table, write it to a separate file "[name]_[tablename].csv"
57 for (auto &table : table_list) {
58 auto target_path = fs.JoinPath(DUCKDB_BENCHMARK_DIRECTORY, name + "_" + table + ".csv");
59 result = con.Query("COPY " + table + " TO '" + target_path + "'");
60 if (!result->success) {
61 throw Exception("Failed to save database: " + result->error);
62 }
63 }
64}
65
66bool BenchmarkRunner::TryLoadDatabase(DuckDB &db, string name) {
67 auto &fs = *db.file_system;
68 if (!fs.DirectoryExists(DUCKDB_BENCHMARK_DIRECTORY)) {
69 return false;
70 }
71 auto sql_fname = fs.JoinPath(DUCKDB_BENCHMARK_DIRECTORY, name + ".sql");
72 auto list_fname = fs.JoinPath(DUCKDB_BENCHMARK_DIRECTORY, name + ".list");
73 // check if the [name].list and [name].sql files exist
74 if (!fs.FileExists(list_fname) || !fs.FileExists(sql_fname)) {
75 return false;
76 }
77 Connection con(db);
78 // the files exist, load the data into the database
79 // first load the entire SQL and execute it
80 ifstream sql_file(sql_fname);
81 std::stringstream buffer;
82 buffer << sql_file.rdbuf();
83 auto result = con.Query(buffer.str());
84 if (!result->success) {
85 throw Exception("Failed to load database: " + result->error);
86 }
87 // now read the tables line by line
88 ifstream list_file(list_fname);
89 string table_name;
90 while (getline(list_file, table_name)) {
91 // for each table, copy the files
92 auto target_path = fs.JoinPath(DUCKDB_BENCHMARK_DIRECTORY, name + "_" + table_name + ".csv");
93 result = con.Query("COPY " + table_name + " FROM '" + target_path + "'");
94 if (!result->success) {
95 throw Exception("Failed to load database: " + result->error);
96 }
97 }
98 return true;
99}
100
101volatile bool is_active = false;
102volatile bool timeout = false;
103
104void sleep_thread(Benchmark *benchmark, BenchmarkState *state, int timeout_duration) {
105 // timeout is given in seconds
106 // we wait 10ms per iteration, so timeout * 100 gives us the amount of
107 // iterations
108 if (timeout_duration < 0) {
109 return;
110 }
111 for (size_t i = 0; i < (size_t)(timeout_duration * 100) && is_active; i++) {
112 std::this_thread::sleep_for(std::chrono::milliseconds(10));
113 }
114 if (is_active) {
115 timeout = true;
116 benchmark->Interrupt(state);
117 }
118}
119
120void BenchmarkRunner::Log(string message) {
121 fprintf(stderr, "%s", message.c_str());
122}
123
124void BenchmarkRunner::LogLine(string message) {
125 fprintf(stderr, "%s\n", message.c_str());
126}
127
128void BenchmarkRunner::LogResult(string message) {
129 LogLine(message);
130 if (out_file.good()) {
131 out_file << message << endl;
132 out_file.flush();
133 }
134}
135
136void BenchmarkRunner::LogOutput(string message) {
137 if (log_file.good()) {
138 log_file << message << endl;
139 log_file.flush();
140 }
141}
142
143void BenchmarkRunner::RunBenchmark(Benchmark *benchmark) {
144 Profiler profiler;
145 LogLine(string(benchmark->name.size() + 6, '-'));
146 LogLine("|| " + benchmark->name + " ||");
147 LogLine(string(benchmark->name.size() + 6, '-'));
148 auto state = benchmark->Initialize();
149 auto nruns = benchmark->NRuns();
150 for (size_t i = 0; i < nruns + 1; i++) {
151 bool hotrun = i > 0;
152 if (hotrun) {
153 Log(StringUtil::Format("%d/%d...", i, nruns));
154 } else {
155 Log("Cold run...");
156 }
157 if (hotrun && benchmark->RequireReinit()) {
158 state = benchmark->Initialize();
159 }
160 is_active = true;
161 timeout = false;
162 thread interrupt_thread(sleep_thread, benchmark, state.get(), benchmark->Timeout());
163
164 profiler.Start();
165 benchmark->Run(state.get());
166 profiler.End();
167
168 benchmark->Cleanup(state.get());
169
170 is_active = false;
171 interrupt_thread.join();
172 if (hotrun) {
173 LogOutput(benchmark->GetLogOutput(state.get()));
174 if (timeout) {
175 // write timeout
176 LogResult("TIMEOUT");
177 break;
178 } else {
179 // write time
180 auto verify = benchmark->Verify(state.get());
181 if (!verify.empty()) {
182 LogResult("INCORRECT");
183 LogLine("INCORRECT RESULT: " + verify);
184 LogOutput("INCORRECT RESULT: " + verify);
185 break;
186 } else {
187 LogResult(to_string(profiler.Elapsed()));
188 }
189 }
190 } else {
191 LogLine("DONE");
192 }
193 }
194 benchmark->Finalize();
195}
196
197void BenchmarkRunner::RunBenchmarks() {
198 LogLine("Starting benchmark run.");
199 for (auto &benchmark : benchmarks) {
200 RunBenchmark(benchmark);
201 }
202}
203
204void print_help() {
205 fprintf(stderr, "Usage: benchmark_runner\n");
206 fprintf(stderr, " --list Show a list of all benchmarks\n");
207 fprintf(stderr, " --out=[file] Move benchmark output to file\n");
208 fprintf(stderr, " --log=[file] Move log output to file\n");
209 fprintf(stderr, " --info Prints info about the benchmark\n");
210 fprintf(stderr, " --group Prints group name of the benchmark\n");
211 fprintf(stderr, " --query Prints query of the benchmark\n");
212 fprintf(stderr, " [name_pattern] Run only the benchmark which names match the specified name pattern, "
213 "e.g., DS.* for TPC-DS benchmarks\n");
214}
215
216enum class BenchmarkMetaType { NONE, INFO, GROUP, QUERY };
217
218struct BenchmarkConfiguration {
219 std::string name_pattern{};
220 BenchmarkMetaType meta = BenchmarkMetaType::NONE;
221};
222
223enum ConfigurationError { None, BenchmarkNotFound, InfoWithoutBenchmarkName };
224
225/**
226 * Builds a configuration based on the passed arguments.
227 */
228BenchmarkConfiguration parse_arguments(const int arg_counter, char const *const *arg_values) {
229 auto &instance = BenchmarkRunner::GetInstance();
230 auto &benchmarks = instance.benchmarks;
231 BenchmarkConfiguration configuration;
232 for (int arg_index = 1; arg_index < arg_counter; ++arg_index) {
233 string arg = arg_values[arg_index];
234 if (arg == "--list") {
235 // list names of all benchmarks
236 for (auto &benchmark : benchmarks) {
237 if (StringUtil::StartsWith(benchmark->name, "sqlite_")) {
238 continue;
239 }
240 fprintf(stdout, "%s\n", benchmark->name.c_str());
241 }
242 exit(0);
243 } else if (arg == "--info") {
244 // write info of benchmark
245 configuration.meta = BenchmarkMetaType::INFO;
246 } else if (arg == "--group") {
247 // write group of benchmark
248 configuration.meta = BenchmarkMetaType::GROUP;
249 } else if (arg == "--query") {
250 // write group of benchmark
251 configuration.meta = BenchmarkMetaType::QUERY;
252 } else if (StringUtil::StartsWith(arg, "--out=") || StringUtil::StartsWith(arg, "--log=")) {
253 auto splits = StringUtil::Split(arg, '=');
254 if (splits.size() != 2) {
255 print_help();
256 exit(1);
257 }
258 auto &file = StringUtil::StartsWith(arg, "--out=") ? instance.out_file : instance.log_file;
259 file.open(splits[1]);
260 if (!file.good()) {
261 fprintf(stderr, "Could not open file %s for writing\n", splits[1].c_str());
262 exit(1);
263 }
264 } else {
265 if (!configuration.name_pattern.empty()) {
266 fprintf(stderr, "Only one benchmark can be specified.\n");
267 print_help();
268 exit(1);
269 }
270 configuration.name_pattern = arg;
271 }
272 }
273 return configuration;
274}
275
276/**
277 * Runs the benchmarks specified by the configuration if possible.
278 * Returns an configuration error code.
279 */
280ConfigurationError run_benchmarks(const BenchmarkConfiguration &configuration) {
281 auto &instance = BenchmarkRunner::GetInstance();
282 auto &benchmarks = instance.benchmarks;
283 if (!configuration.name_pattern.empty()) {
284 // run only benchmarks which names matches the
285 // passed name pattern.
286 std::vector<int> benchmark_indices{};
287 benchmark_indices.reserve(benchmarks.size());
288 for (idx_t index = 0; index < benchmarks.size(); ++index) {
289 if (RE2::FullMatch(benchmarks[index]->name, configuration.name_pattern)) {
290 benchmark_indices.emplace_back(index);
291 }
292 }
293 benchmark_indices.shrink_to_fit();
294 if (benchmark_indices.empty()) {
295 return ConfigurationError::BenchmarkNotFound;
296 }
297 if (configuration.meta == BenchmarkMetaType::INFO) {
298 // print info of benchmarks
299 for (const auto &benchmark_index : benchmark_indices) {
300 auto info = benchmarks[benchmark_index]->GetInfo();
301 fprintf(stdout, "%s\n", info.c_str());
302 }
303 } else if (configuration.meta == BenchmarkMetaType::GROUP) {
304 // print group of benchmarks
305 for (const auto &benchmark_index : benchmark_indices) {
306 fprintf(stdout, "%s\n", benchmarks[benchmark_index]->group.c_str());
307 }
308 } else if (configuration.meta == BenchmarkMetaType::QUERY) {
309 for (const auto &benchmark_index : benchmark_indices) {
310 auto duckdb_benchmark = dynamic_cast<DuckDBBenchmark *>(benchmarks[benchmark_index]);
311 if (!duckdb_benchmark) {
312 continue;
313 }
314 fprintf(stdout, "%s\n", duckdb_benchmark->GetQuery().c_str());
315 }
316 } else {
317 for (const auto &benchmark_index : benchmark_indices) {
318 instance.RunBenchmark(benchmarks[benchmark_index]);
319 }
320 }
321 } else {
322 if (configuration.meta != BenchmarkMetaType::NONE) {
323 return ConfigurationError::InfoWithoutBenchmarkName;
324 }
325 // default: run all benchmarks
326 instance.RunBenchmarks();
327 }
328 return ConfigurationError::None;
329}
330
331void print_error_message(const ConfigurationError &error) {
332 switch (error) {
333 case ConfigurationError::BenchmarkNotFound:
334 fprintf(stderr, "Benchmark to run could not be found.\n");
335 break;
336 case ConfigurationError::InfoWithoutBenchmarkName:
337 fprintf(stderr, "Info requires benchmark name pattern.\n");
338 break;
339 case ConfigurationError::None:
340 break;
341 }
342 print_help();
343}
344
345int main(int argc, char **argv) {
346 BenchmarkConfiguration configuration = parse_arguments(argc, argv);
347 const auto configuration_error = run_benchmarks(configuration);
348 if (configuration_error != ConfigurationError::None) {
349 print_error_message(configuration_error);
350 exit(1);
351 }
352}
353