1 | #include "benchmark_runner.hpp" |
2 | #include "duckdb_benchmark_macro.hpp" |
3 | #include "duckdb/main/appender.hpp" |
4 | |
5 | #include <random> |
6 | |
7 | using namespace duckdb; |
8 | using namespace std; |
9 | |
10 | #define STRING_COUNT 10000000 |
11 | #define STRING_LENGTH 4 |
12 | |
13 | #define STRING_DATA_GEN_BODY(STRING_LENGTH) \ |
14 | static constexpr const char *chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; \ |
15 | static string GenerateString(std::uniform_int_distribution<> &distribution, std::mt19937 &gen) { \ |
16 | string result; \ |
17 | for (size_t i = 0; i < STRING_LENGTH; i++) { \ |
18 | result += string(1, chars[distribution(gen)]); \ |
19 | } \ |
20 | return result; \ |
21 | } \ |
22 | void Load(DuckDBBenchmarkState *state) override { \ |
23 | std::uniform_int_distribution<> distribution(0, strlen(chars) - 1); \ |
24 | std::mt19937 gen; \ |
25 | gen.seed(42); \ |
26 | state->conn.Query("CREATE TABLE strings(s1 VARCHAR, s2 VARCHAR);"); \ |
27 | Appender appender(state->conn, "strings"); \ |
28 | for (size_t i = 0; i < STRING_COUNT; i++) { \ |
29 | appender.BeginRow(); \ |
30 | appender.Append<Value>(Value(GenerateString(distribution, gen))); \ |
31 | appender.Append<Value>(Value(GenerateString(distribution, gen))); \ |
32 | appender.EndRow(); \ |
33 | } \ |
34 | appender.Close(); \ |
35 | } \ |
36 | string VerifyResult(QueryResult *result) override { \ |
37 | if (!result->success) { \ |
38 | return result->error; \ |
39 | } \ |
40 | return string(); \ |
41 | } |
42 | |
43 | DUCKDB_BENCHMARK(StringConcatShort, "[string]" ) |
44 | STRING_DATA_GEN_BODY(4) |
45 | string GetQuery() override { |
46 | return "SELECT s1 || s2 FROM strings" ; |
47 | } |
48 | string BenchmarkInfo() override { |
49 | return "STRING CONCAT" ; |
50 | } |
51 | FINISH_BENCHMARK(StringConcatShort) |
52 | |
53 | DUCKDB_BENCHMARK(StringConcatLong, "[string]" ) |
54 | STRING_DATA_GEN_BODY(20) |
55 | string GetQuery() override { |
56 | return "SELECT s1 || s2 FROM strings" ; |
57 | } |
58 | string BenchmarkInfo() override { |
59 | return "STRING CONCAT" ; |
60 | } |
61 | FINISH_BENCHMARK(StringConcatLong) |
62 | |
63 | DUCKDB_BENCHMARK(StringEqualityShort, "[string]" ) |
64 | STRING_DATA_GEN_BODY(4) |
65 | string GetQuery() override { |
66 | return "SELECT s1=s2 FROM strings" ; |
67 | } |
68 | string BenchmarkInfo() override { |
69 | return "STRING COMPARISON" ; |
70 | } |
71 | FINISH_BENCHMARK(StringEqualityShort) |
72 | |
73 | DUCKDB_BENCHMARK(StringEqualityLong, "[string]" ) |
74 | STRING_DATA_GEN_BODY(20) |
75 | string GetQuery() override { |
76 | return "SELECT s1=s2 FROM strings" ; |
77 | } |
78 | string BenchmarkInfo() override { |
79 | return "STRING COMPARISON" ; |
80 | } |
81 | FINISH_BENCHMARK(StringEqualityLong) |
82 | |
83 | DUCKDB_BENCHMARK(StringGreaterThanShort, "[string]" ) |
84 | STRING_DATA_GEN_BODY(4) |
85 | string GetQuery() override { |
86 | return "SELECT s1>s2 FROM strings" ; |
87 | } |
88 | string BenchmarkInfo() override { |
89 | return "STRING COMPARISON" ; |
90 | } |
91 | FINISH_BENCHMARK(StringGreaterThanShort) |
92 | |
93 | DUCKDB_BENCHMARK(StringGreaterThanLong, "[string]" ) |
94 | STRING_DATA_GEN_BODY(20) |
95 | string GetQuery() override { |
96 | return "SELECT s1>s2 FROM strings" ; |
97 | } |
98 | string BenchmarkInfo() override { |
99 | return "STRING COMPARISON" ; |
100 | } |
101 | FINISH_BENCHMARK(StringGreaterThanLong) |
102 | |
103 | DUCKDB_BENCHMARK(StringLengthShort, "[string]" ) |
104 | STRING_DATA_GEN_BODY(5) |
105 | string GetQuery() override { |
106 | return "SELECT LENGTH(s1)+LENGTH(s2) FROM strings" ; |
107 | } |
108 | string BenchmarkInfo() override { |
109 | return "STRING LENGTH" ; |
110 | } |
111 | FINISH_BENCHMARK(StringLengthShort) |
112 | |
113 | DUCKDB_BENCHMARK(StringLengthLong, "[string]" ) |
114 | STRING_DATA_GEN_BODY(20) |
115 | string GetQuery() override { |
116 | return "SELECT LENGTH(s1)+LENGTH(s2) FROM strings" ; |
117 | } |
118 | string BenchmarkInfo() override { |
119 | return "STRING LENGTH" ; |
120 | } |
121 | FINISH_BENCHMARK(StringLengthLong) |
122 | |
123 | DUCKDB_BENCHMARK(StringAggShort, "[string]" ) |
124 | STRING_DATA_GEN_BODY(4) |
125 | string GetQuery() override { |
126 | return "SELECT STRING_AGG(s1, ' ') FROM strings" ; |
127 | } |
128 | string BenchmarkInfo() override { |
129 | return "STRING LENGTH" ; |
130 | } |
131 | FINISH_BENCHMARK(StringAggShort) |
132 | |
133 | DUCKDB_BENCHMARK(StringAggLong, "[string]" ) |
134 | STRING_DATA_GEN_BODY(20) |
135 | string GetQuery() override { |
136 | return "SELECT STRING_AGG(s1, ' ') FROM strings" ; |
137 | } |
138 | string BenchmarkInfo() override { |
139 | return "STRING LENGTH" ; |
140 | } |
141 | FINISH_BENCHMARK(StringAggLong) |
142 | |
143 | DUCKDB_BENCHMARK(StringInstr, "[string]" ) |
144 | STRING_DATA_GEN_BODY(4) |
145 | string GetQuery() override { |
146 | return "SELECT INSTR(s1, 'h') FROM strings" ; |
147 | } |
148 | string BenchmarkInfo() override { |
149 | return "STRING INSTR" ; |
150 | } |
151 | FINISH_BENCHMARK(StringInstr) |
152 | |
153 | DUCKDB_BENCHMARK(StringInstrNull, "[string]" ) |
154 | STRING_DATA_GEN_BODY(4) |
155 | string GetQuery() override { |
156 | return "SELECT INSTR(s1, '') FROM strings" ; |
157 | } |
158 | string BenchmarkInfo() override { |
159 | return "STRING INSTR" ; |
160 | } |
161 | FINISH_BENCHMARK(StringInstrNull) |
162 | |
163 | //------------------------- CONTAINS ----------------------------------------- |
164 | DUCKDB_BENCHMARK(StringContains, "[string]" ) |
165 | STRING_DATA_GEN_BODY(4) |
166 | string GetQuery() override { |
167 | return "SELECT CONTAINS(s1, 'h') FROM strings" ; |
168 | } |
169 | string BenchmarkInfo() override { |
170 | return "STRING CONTAINS" ; |
171 | } |
172 | FINISH_BENCHMARK(StringContains) |
173 | |
174 | DUCKDB_BENCHMARK(StringContainsNull, "[string]" ) |
175 | STRING_DATA_GEN_BODY(4) |
176 | string GetQuery() override { |
177 | return "SELECT CONTAINS(s1, '') FROM strings" ; |
178 | } |
179 | string BenchmarkInfo() override { |
180 | return "STRING CONTAINS" ; |
181 | } |
182 | FINISH_BENCHMARK(StringContainsNull) |
183 | //----------------------------------------------------------------------------- |
184 | |
185 | //------------------------- CONTAINS LIKE ------------------------------------- |
186 | DUCKDB_BENCHMARK(StringContainsLike, "[string]" ) |
187 | STRING_DATA_GEN_BODY(4) |
188 | string GetQuery() override { |
189 | return "SELECT s1 FROM strings WHERE s1 LIKE '%h%'" ; |
190 | } |
191 | string BenchmarkInfo() override { |
192 | return "STRING CONTAINS LIKE" ; |
193 | } |
194 | FINISH_BENCHMARK(StringContainsLike) |
195 | //----------------------------------------------------------------------------- |
196 | |
197 | DUCKDB_BENCHMARK(StringRegex, "[string]" ) |
198 | STRING_DATA_GEN_BODY(4) |
199 | string GetQuery() override { |
200 | return "SELECT REGEXP_MATCHES(s1, 'h') FROM strings" ; |
201 | } |
202 | string BenchmarkInfo() override { |
203 | return "STRING REGEX" ; |
204 | } |
205 | FINISH_BENCHMARK(StringRegex) |
206 | |
207 | DUCKDB_BENCHMARK(StringRegexNull, "[string]" ) |
208 | STRING_DATA_GEN_BODY(4) |
209 | string GetQuery() override { |
210 | return "SELECT REGEXP_MATCHES(s1, '') FROM strings" ; |
211 | } |
212 | string BenchmarkInfo() override { |
213 | return "STRING REGEX" ; |
214 | } |
215 | FINISH_BENCHMARK(StringRegexNull) |
216 | |
217 | //----------------------- PREFIX benchmark ----------------------------------// |
218 | DUCKDB_BENCHMARK(StringPrefix, "[string]" ) |
219 | STRING_DATA_GEN_BODY(4) |
220 | string GetQuery() override { |
221 | return "SELECT prefix(s1, 'a') FROM strings" ; |
222 | } |
223 | string BenchmarkInfo() override { |
224 | return "STRING PREFIX early out" ; |
225 | } |
226 | FINISH_BENCHMARK(StringPrefix) |
227 | |
228 | DUCKDB_BENCHMARK(StringPrefixInlined, "[string]" ) |
229 | STRING_DATA_GEN_BODY(12) |
230 | string GetQuery() override { |
231 | return "SELECT prefix(s1, 'a') FROM strings" ; |
232 | } |
233 | string BenchmarkInfo() override { |
234 | return "STRING PREFIX inlined" ; |
235 | } |
236 | FINISH_BENCHMARK(StringPrefixInlined) |
237 | |
238 | DUCKDB_BENCHMARK(StringPrefixPointer, "[string]" ) |
239 | STRING_DATA_GEN_BODY(20) |
240 | string GetQuery() override { |
241 | return "SELECT prefix(s1, 'a') FROM strings" ; |
242 | } |
243 | string BenchmarkInfo() override { |
244 | return "STRING PREFIX store pointer" ; |
245 | } |
246 | FINISH_BENCHMARK(StringPrefixPointer) |
247 | |
248 | //----------------------- PREFIX REGEX benchmark ----------------------------------// |
249 | DUCKDB_BENCHMARK(StringPrefixRegEX, "[string]" ) |
250 | STRING_DATA_GEN_BODY(4) |
251 | string GetQuery() override { |
252 | return "SELECT REGEXP_MATCHES(s1, 'a%') FROM strings" ; |
253 | } |
254 | string BenchmarkInfo() override { |
255 | return "STRING PREFIX REGEX" ; |
256 | } |
257 | FINISH_BENCHMARK(StringPrefixRegEX) |
258 | |
259 | //----------------------- PREFIX LIKE benchmark ----------------------------------// |
260 | DUCKDB_BENCHMARK(StringPrefixLike, "[string]" ) |
261 | STRING_DATA_GEN_BODY(4) |
262 | string GetQuery() override { |
263 | return "SELECT s1 FROM strings WHERE s1 LIKE 'a%'" ; |
264 | } |
265 | string BenchmarkInfo() override { |
266 | return "STRING PREFIX LIKE" ; |
267 | } |
268 | FINISH_BENCHMARK(StringPrefixLike) |
269 | |
270 | DUCKDB_BENCHMARK(StringPrefixInlinedLike, "[string]" ) |
271 | STRING_DATA_GEN_BODY(12) |
272 | string GetQuery() override { |
273 | return "SELECT s1 FROM strings WHERE s1 LIKE 'a%'" ; |
274 | } |
275 | string BenchmarkInfo() override { |
276 | return "STRING PREFIX inlined LIKE" ; |
277 | } |
278 | FINISH_BENCHMARK(StringPrefixInlinedLike) |
279 | |
280 | DUCKDB_BENCHMARK(StringPrefixPointerLike, "[string]" ) |
281 | STRING_DATA_GEN_BODY(20) |
282 | string GetQuery() override { |
283 | return "SELECT s1 FROM strings WHERE s1 LIKE 'a%'" ; |
284 | } |
285 | string BenchmarkInfo() override { |
286 | return "STRING PREFIX store pointer LIKE" ; |
287 | } |
288 | FINISH_BENCHMARK(StringPrefixPointerLike) |
289 | |
290 | //----------------------- SUFFIX benchmark ----------------------------------// |
291 | DUCKDB_BENCHMARK(StringSuffixShort, "[string]" ) |
292 | STRING_DATA_GEN_BODY(4) |
293 | string GetQuery() override { |
294 | return "SELECT suffix(s1, 'a') FROM strings" ; |
295 | } |
296 | string BenchmarkInfo() override { |
297 | return "Short string for suffix" ; |
298 | } |
299 | FINISH_BENCHMARK(StringSuffixShort) |
300 | |
301 | DUCKDB_BENCHMARK(StringSuffixLong, "[string]" ) |
302 | STRING_DATA_GEN_BODY(20) |
303 | string GetQuery() override { |
304 | return "SELECT suffix(s1, 'a') FROM strings" ; |
305 | } |
306 | string BenchmarkInfo() override { |
307 | return "Long string for suffix" ; |
308 | } |
309 | FINISH_BENCHMARK(StringSuffixLong) |
310 | |
311 | //----------------------- SUFFIX REGEX benchmark ----------------------------------// |
312 | DUCKDB_BENCHMARK(StringSuffixRegEX, "[string]" ) |
313 | STRING_DATA_GEN_BODY(4) |
314 | string GetQuery() override { |
315 | return "SELECT REGEXP_MATCHES(s1, '%a') FROM strings" ; |
316 | } |
317 | string BenchmarkInfo() override { |
318 | return "STRING suffix REGEX" ; |
319 | } |
320 | FINISH_BENCHMARK(StringSuffixRegEX) |
321 | |
322 | //----------------------- SUFFIX LIKE benchmark ----------------------------------// |
323 | DUCKDB_BENCHMARK(StringSuffixLikeShort, "[string]" ) |
324 | STRING_DATA_GEN_BODY(4) |
325 | string GetQuery() override { |
326 | return "SELECT s1 FROM strings WHERE s1 LIKE '%a'" ; |
327 | } |
328 | string BenchmarkInfo() override { |
329 | return "Short string for suffix with LIKE" ; |
330 | } |
331 | FINISH_BENCHMARK(StringSuffixLikeShort) |
332 | |
333 | DUCKDB_BENCHMARK(StringSuffixLikeLong, "[string]" ) |
334 | STRING_DATA_GEN_BODY(20) |
335 | string GetQuery() override { |
336 | return "SELECT s1 FROM strings WHERE s1 LIKE '%a'" ; |
337 | } |
338 | string BenchmarkInfo() override { |
339 | return "Long string for suffix with LIKE" ; |
340 | } |
341 | FINISH_BENCHMARK(StringSuffixLikeLong) |
342 | |