1#include "catch.hpp"
2#include "test_helpers.hpp"
3
4using namespace duckdb;
5using namespace std;
6
7TEST_CASE("Test aggregation/group by statements", "[aggregations]") {
8 unique_ptr<QueryResult> result;
9 DuckDB db(nullptr);
10 Connection con(db);
11 con.EnableQueryVerification();
12
13 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b INTEGER);"));
14 REQUIRE_NO_FAIL(con.Query("INSERT INTO test VALUES (11, 22), (13, 22), (12, 21)"));
15
16 // aggregates cannot be nested
17 REQUIRE_FAIL(con.Query("SELECT SUM(SUM(41)), COUNT(*);"));
18
19 // simple aggregates without group by
20 result = con.Query("SELECT SUM(a), COUNT(*), AVG(a) FROM test;");
21 REQUIRE(CHECK_COLUMN(result, 0, {36}));
22 REQUIRE(CHECK_COLUMN(result, 1, {3}));
23 REQUIRE(CHECK_COLUMN(result, 2, {12.0}));
24
25 result = con.Query("SELECT COUNT(*) FROM test;");
26 REQUIRE(CHECK_COLUMN(result, 0, {3}));
27
28 result = con.Query("SELECT SUM(a), COUNT(*) FROM test WHERE a = 11;");
29 REQUIRE(CHECK_COLUMN(result, 0, {11}));
30 REQUIRE(CHECK_COLUMN(result, 1, {1}));
31
32 result = con.Query("SELECT SUM(a), SUM(b), SUM(a) + SUM (b) FROM test;");
33 REQUIRE(CHECK_COLUMN(result, 0, {36}));
34 REQUIRE(CHECK_COLUMN(result, 1, {65}));
35 REQUIRE(CHECK_COLUMN(result, 2, {101}));
36
37 result = con.Query("SELECT SUM(a+2), SUM(a) + 2 * COUNT(*) FROM test;");
38 REQUIRE(CHECK_COLUMN(result, 0, {42}));
39 REQUIRE(CHECK_COLUMN(result, 1, {42}));
40
41 // aggregations with group by
42 result = con.Query("SELECT b, SUM(a), SUM(a+2), AVG(a) FROM test GROUP BY b ORDER BY b;");
43 REQUIRE(CHECK_COLUMN(result, 0, {21, 22}));
44 REQUIRE(CHECK_COLUMN(result, 1, {12, 24}));
45 REQUIRE(CHECK_COLUMN(result, 2, {14, 28}));
46 REQUIRE(CHECK_COLUMN(result, 3, {12, 12}));
47
48 // ORDER BY aggregation that does not occur in SELECT clause
49 result = con.Query("SELECT b, SUM(a) FROM test GROUP BY b ORDER BY COUNT(a);");
50 REQUIRE(CHECK_COLUMN(result, 0, {21, 22}));
51 REQUIRE(CHECK_COLUMN(result, 1, {12, 24}));
52
53 result = con.Query("SELECT b, SUM(a) FROM test GROUP BY b ORDER BY COUNT(a) DESC;");
54 REQUIRE(CHECK_COLUMN(result, 0, {22, 21}));
55 REQUIRE(CHECK_COLUMN(result, 1, {24, 12}));
56
57 result = con.Query("SELECT b, SUM(a), COUNT(*), SUM(a+2) FROM test GROUP "
58 "BY b ORDER BY b;");
59 REQUIRE(CHECK_COLUMN(result, 0, {21, 22}));
60 REQUIRE(CHECK_COLUMN(result, 1, {12, 24}));
61 REQUIRE(CHECK_COLUMN(result, 2, {1, 2}));
62 REQUIRE(CHECK_COLUMN(result, 3, {14, 28}));
63
64 // group by alias
65 result = con.Query("SELECT b % 2 AS f, SUM(a) FROM test GROUP BY f;");
66 REQUIRE(CHECK_COLUMN(result, 0, {0, 1}));
67 REQUIRE(CHECK_COLUMN(result, 1, {24, 12}));
68
69 // group by with filter
70 result = con.Query("SELECT b, SUM(a), COUNT(*), SUM(a+2) FROM test WHERE "
71 "a <= 12 GROUP "
72 "BY b ORDER BY b;");
73 REQUIRE(CHECK_COLUMN(result, 0, {21, 22}));
74 REQUIRE(CHECK_COLUMN(result, 1, {12, 11}));
75 REQUIRE(CHECK_COLUMN(result, 2, {1, 1}));
76 REQUIRE(CHECK_COLUMN(result, 3, {14, 13}));
77
78 // nested aggregate in group by
79 REQUIRE_FAIL(con.Query("SELECT b % 2 AS f, COUNT(SUM(a)) FROM test GROUP BY f;"));
80
81 con.Query("INSERT INTO test VALUES (12, 21), (12, 21), (12, 21)");
82
83 // group by with filter and multiple values per groups
84 result = con.Query("SELECT b, SUM(a), COUNT(*), SUM(a+2) FROM test WHERE "
85 "a <= 12 GROUP "
86 "BY b ORDER BY b;");
87 REQUIRE(CHECK_COLUMN(result, 0, {21, 22}));
88 REQUIRE(CHECK_COLUMN(result, 1, {12 * 4, 11}));
89 REQUIRE(CHECK_COLUMN(result, 2, {4, 1}));
90 REQUIRE(CHECK_COLUMN(result, 3, {12 * 4 + 2 * 4, 13}));
91
92 // group by with filter and multiple values per groups
93 REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER, j INTEGER);"));
94 REQUIRE_NO_FAIL(con.Query("INSERT INTO integers VALUES (3, 4), (3, 4), (2, 4);"));
95
96 // use GROUP BY column in math operator
97 result = con.Query("SELECT i, i + 10 FROM integers GROUP BY i ORDER BY i");
98 REQUIRE(CHECK_COLUMN(result, 0, {2, 3}));
99 REQUIRE(CHECK_COLUMN(result, 1, {12, 13}));
100
101 // using non-group column and non-aggregate should throw an error
102 REQUIRE_FAIL(con.Query("SELECT i, SUM(j), j FROM integers GROUP BY i ORDER BY i"));
103 // but it works if we wrap it in FIRST()
104 result = con.Query("SELECT i, SUM(j), FIRST(j) FROM integers GROUP BY i ORDER BY i");
105 REQUIRE(CHECK_COLUMN(result, 0, {2, 3}));
106 REQUIRE(CHECK_COLUMN(result, 1, {4, 8}));
107 REQUIRE(CHECK_COLUMN(result, 2, {4, 4}));
108
109 // group by constant alias
110 result = con.Query("SELECT 1 AS k, SUM(i) FROM integers GROUP BY k ORDER BY 2;");
111 REQUIRE(CHECK_COLUMN(result, 0, {1}));
112 REQUIRE(CHECK_COLUMN(result, 1, {8}));
113
114 // use an alias that is identical to a column name (should prioritize column name)
115 result = con.Query("SELECT 1 AS i, SUM(i) FROM integers GROUP BY i ORDER BY 2;");
116 REQUIRE(CHECK_COLUMN(result, 0, {1, 1}));
117 REQUIRE(CHECK_COLUMN(result, 1, {2, 6}));
118
119 // refer to the same alias twice
120 result = con.Query("SELECT i % 2 AS k, SUM(i) FROM integers GROUP BY k, k ORDER BY 1;");
121 REQUIRE(CHECK_COLUMN(result, 0, {0, 1}));
122 REQUIRE(CHECK_COLUMN(result, 1, {2, 6}));
123
124 REQUIRE_NO_FAIL(con.Query("DROP TABLE integers;"));
125 REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER);"));
126 REQUIRE_NO_FAIL(con.Query("INSERT INTO integers VALUES (1), (2), (3), (NULL);"));
127
128 // group by NULL
129 result = con.Query("SELECT i, SUM(i) FROM integers GROUP BY i ORDER BY 1;");
130 REQUIRE(CHECK_COLUMN(result, 0, {Value(), 1, 2, 3}));
131 REQUIRE(CHECK_COLUMN(result, 1, {Value(), 1, 2, 3}));
132
133 // column reference should have preference over alias reference in grouping
134 result = con.Query("SELECT i, i % 2 AS i, SUM(i) FROM integers GROUP BY i ORDER BY 1;");
135 REQUIRE(CHECK_COLUMN(result, 0, {Value(), 1, 2, 3}));
136 REQUIRE(CHECK_COLUMN(result, 1, {Value(), 1, 0, 1}));
137 REQUIRE(CHECK_COLUMN(result, 2, {Value(), 1, 2, 3}));
138
139 // aliases can only be referenced in the GROUP BY as the root column: operations not allowed
140 // CONTROVERSIAL: this query DOES work in SQLite
141 REQUIRE_FAIL(con.Query("SELECT 1 AS k, SUM(i) FROM integers GROUP BY k+1 ORDER BY 2;"));
142
143 // group by column refs should be recognized, even if one uses an explicit table specifier and the other does not
144 result = con.Query("SELECT test.b, SUM(a) FROM test GROUP BY b ORDER BY COUNT(a) DESC;");
145 REQUIRE(CHECK_COLUMN(result, 0, {21, 22}));
146 REQUIRE(CHECK_COLUMN(result, 1, {48, 24}));
147}
148
149TEST_CASE("Test aliases in group by/aggregation", "[aggregations]") {
150 unique_ptr<QueryResult> result;
151 DuckDB db(nullptr);
152 Connection con(db);
153 con.EnableQueryVerification();
154
155 REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER)"));
156 REQUIRE_NO_FAIL(con.Query("INSERT INTO integers VALUES (1), (2), (3), (NULL)"));
157
158 // use alias in HAVING clause
159 // CONTROVERSIAL: this query DOES NOT work in PostgreSQL
160 result = con.Query("SELECT i % 2 AS k, SUM(i) FROM integers WHERE i IS NOT NULL GROUP BY k HAVING k>0;");
161 REQUIRE(CHECK_COLUMN(result, 0, {1}));
162 REQUIRE(CHECK_COLUMN(result, 1, {4}));
163
164 // this is identical to this query
165 // CONTROVERSIAL: this query does not work in MonetDB
166 result = con.Query("SELECT i % 2 AS k, SUM(i) FROM integers WHERE i IS NOT NULL GROUP BY k HAVING i%2>0;");
167 REQUIRE(CHECK_COLUMN(result, 0, {1}));
168 REQUIRE(CHECK_COLUMN(result, 1, {4}));
169
170 // select groups by constant (similar to order by constant)
171 result = con.Query("SELECT i % 2 AS k, SUM(i) FROM integers WHERE i IS NOT NULL GROUP BY 1 HAVING i%2>0;");
172 REQUIRE(CHECK_COLUMN(result, 0, {1}));
173 REQUIRE(CHECK_COLUMN(result, 1, {4}));
174
175 // constant out of range
176 REQUIRE_FAIL(con.Query("SELECT i % 2 AS k, SUM(i) FROM integers WHERE i IS NOT NULL GROUP BY 42 HAVING i%2>0;"));
177
178 // entry in GROUP BY should refer to base column
179 // ...BUT the alias in ORDER BY should refer to the alias from the select list
180 // note that both Postgres and MonetDB reject this query because of ambiguity. SQLite accepts it though so we do
181 // too.
182 result = con.Query("SELECT i, i % 2 AS i, SUM(i) FROM integers GROUP BY i ORDER BY i, 3;");
183 REQUIRE(CHECK_COLUMN(result, 0, {Value(), 2, 1, 3}));
184 REQUIRE(CHECK_COLUMN(result, 1, {Value(), 0, 1, 1}));
185 REQUIRE(CHECK_COLUMN(result, 2, {Value(), 2, 1, 3}));
186
187 // changing the name of the alias makes it more explicit what should happen
188 result = con.Query("SELECT i, i % 2 AS k, SUM(i) FROM integers GROUP BY i ORDER BY k, 3;");
189 REQUIRE(CHECK_COLUMN(result, 0, {Value(), 2, 1, 3}));
190 REQUIRE(CHECK_COLUMN(result, 1, {Value(), 0, 1, 1}));
191 REQUIRE(CHECK_COLUMN(result, 2, {Value(), 2, 1, 3}));
192
193 // this now orders by the actual grouping column
194 result = con.Query("SELECT i, i % 2 AS k, SUM(i) FROM integers GROUP BY i ORDER BY i;");
195 REQUIRE(CHECK_COLUMN(result, 0, {Value(), 1, 2, 3}));
196 REQUIRE(CHECK_COLUMN(result, 1, {Value(), 1, 0, 1}));
197 REQUIRE(CHECK_COLUMN(result, 2, {Value(), 1, 2, 3}));
198
199 // cannot use GROUP BY column in an aggregation...
200 REQUIRE_FAIL(con.Query("SELECT i % 2 AS k, SUM(k) FROM integers GROUP BY k"));
201
202 // ...unless it is one of the base columns
203 result = con.Query("SELECT i, SUM(i) FROM integers GROUP BY i ORDER BY i");
204 REQUIRE(CHECK_COLUMN(result, 0, {Value(), 1, 2, 3}));
205 REQUIRE(CHECK_COLUMN(result, 1, {Value(), 1, 2, 3}));
206
207 // ORDER on a non-grouping column
208 // this query is refused by Postgres and MonetDB
209 // but SQLite resolves it by first pushing a "FIRST(i)" aggregate into the projection, and then ordering by that
210 // aggregate
211 REQUIRE_FAIL(con.Query("SELECT (10-i) AS k, SUM(i) FROM integers GROUP BY k ORDER BY i;"));
212
213 // we can manually get this behavior by pushing FIRST
214 result = con.Query("SELECT (10-i) AS k, SUM(i) FROM integers GROUP BY k ORDER BY FIRST(i);");
215 REQUIRE(CHECK_COLUMN(result, 0, {Value(), 9, 8, 7}));
216 REQUIRE(CHECK_COLUMN(result, 1, {Value(), 1, 2, 3}));
217}
218
219TEST_CASE("GROUP BY large strings", "[aggregations]") {
220 unique_ptr<QueryResult> result;
221 DuckDB db(nullptr);
222 Connection con(db);
223 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a VARCHAR, b INTEGER);"));
224 REQUIRE_NO_FAIL(con.Query("INSERT INTO test VALUES ('helloworld', 22), "
225 "('thisisalongstring', 22), ('helloworld', 21)"));
226
227 result = con.Query("SELECT a, SUM(b) FROM test GROUP BY a ORDER BY a");
228 REQUIRE(CHECK_COLUMN(result, 0, {"helloworld", "thisisalongstring"}));
229 REQUIRE(CHECK_COLUMN(result, 1, {43, 22}));
230}
231
232TEST_CASE("Group by multiple columns", "[aggregations]") {
233 unique_ptr<QueryResult> result;
234 DuckDB db(nullptr);
235 Connection con(db);
236
237 REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER, j INTEGER, k INTEGER);"));
238 REQUIRE_NO_FAIL(
239 con.Query("INSERT INTO integers VALUES (1, 1, 2), (1, 2, 2), (1, 1, 2), (2, 1, 2), (1, 2, 4), (1, 2, NULL);"));
240
241 result = con.Query("SELECT i, j, SUM(k), COUNT(*), COUNT(k) FROM integers GROUP BY i, j ORDER BY 1, 2");
242 REQUIRE(CHECK_COLUMN(result, 0, {1, 1, 2}));
243 REQUIRE(CHECK_COLUMN(result, 1, {1, 2, 1}));
244 REQUIRE(CHECK_COLUMN(result, 2, {4, 6, 2}));
245 REQUIRE(CHECK_COLUMN(result, 3, {2, 3, 1}));
246 REQUIRE(CHECK_COLUMN(result, 4, {2, 2, 1}));
247}
248
249TEST_CASE("Aggregate only COUNT STAR", "[aggregations]") {
250 unique_ptr<QueryResult> result;
251 DuckDB db(nullptr);
252 Connection con(db);
253
254 REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER, j INTEGER);"));
255 REQUIRE_NO_FAIL(con.Query("INSERT INTO integers VALUES (3, 4), (3, 4), (2, 4);"));
256
257 result = con.Query("SELECT i, COUNT(*) FROM integers GROUP BY i ORDER BY i");
258 REQUIRE(CHECK_COLUMN(result, 0, {2, 3}));
259 REQUIRE(CHECK_COLUMN(result, 1, {1, 2}));
260
261 // test COUNT without the *
262 result = con.Query("SELECT i, COUNT() FROM integers GROUP BY i ORDER BY i");
263 REQUIRE(CHECK_COLUMN(result, 0, {2, 3}));
264 REQUIRE(CHECK_COLUMN(result, 1, {1, 2}));
265}
266
267TEST_CASE("GROUP BY NULL value", "[aggregations]") {
268 unique_ptr<QueryResult> result;
269 DuckDB db(nullptr);
270 Connection con(db);
271
272 REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER, j INTEGER);"));
273 REQUIRE_NO_FAIL(con.Query("INSERT INTO integers VALUES (3, 4), (NULL, 4), (2, 4);"));
274
275 result = con.Query("SELECT i, SUM(j) FROM integers GROUP BY i ORDER BY i");
276 REQUIRE(CHECK_COLUMN(result, 0, {Value(), 2, 3}));
277 REQUIRE(CHECK_COLUMN(result, 1, {4, 4, 4}));
278}
279
280TEST_CASE("Aggregating from empty table", "[aggregations]") {
281 unique_ptr<QueryResult> result;
282 DuckDB db(nullptr);
283 Connection con(db);
284
285 REQUIRE_NO_FAIL(con.Query("CREATE TABLE emptyaggr(i INTEGER);"));
286
287 result = con.Query("SELECT COUNT(*) FROM emptyaggr");
288 REQUIRE(CHECK_COLUMN(result, 0, {0}));
289
290 result = con.Query("SELECT SUM(i), COUNT(i), COUNT(DISTINCT i), COUNT(*), AVG(i), "
291 "COUNT(*)+1, COUNT(i)+1, MIN(i), MIN(i+1), MIN(i)+1 FROM emptyaggr");
292 REQUIRE(CHECK_COLUMN(result, 0, {Value()}));
293 REQUIRE(CHECK_COLUMN(result, 1, {0}));
294 REQUIRE(CHECK_COLUMN(result, 2, {0}));
295 REQUIRE(CHECK_COLUMN(result, 3, {0}));
296 REQUIRE(CHECK_COLUMN(result, 4, {Value()}));
297 REQUIRE(CHECK_COLUMN(result, 5, {1}));
298 REQUIRE(CHECK_COLUMN(result, 6, {1}));
299 REQUIRE(CHECK_COLUMN(result, 7, {Value()}));
300 REQUIRE(CHECK_COLUMN(result, 8, {Value()}));
301 REQUIRE(CHECK_COLUMN(result, 9, {Value()}));
302}
303
304TEST_CASE("DISTINCT aggregations", "[aggregations]") {
305 unique_ptr<QueryResult> result;
306 DuckDB db(nullptr);
307 Connection con(db);
308
309 REQUIRE_NO_FAIL(con.Query("CREATE TABLE distinctagg(i INTEGER, j INTEGER);"));
310 REQUIRE_NO_FAIL(con.Query("INSERT INTO distinctagg VALUES (1,1),(1,1),(2,2), (1,2)"));
311
312 result = con.Query("SELECT COUNT(i), COUNT(DISTINCT i), SUM(i), "
313 "SUM(DISTINCT i) FROM distinctagg");
314
315 REQUIRE(CHECK_COLUMN(result, 0, {4}));
316 REQUIRE(CHECK_COLUMN(result, 1, {2}));
317 REQUIRE(CHECK_COLUMN(result, 2, {5}));
318 REQUIRE(CHECK_COLUMN(result, 3, {3}));
319
320 result = con.Query("SELECT COUNT(i), COUNT(DISTINCT i), SUM(i), SUM(DISTINCT i) "
321 "FROM distinctagg GROUP BY j ORDER BY j");
322
323 REQUIRE(CHECK_COLUMN(result, 0, {2, 2}));
324 REQUIRE(CHECK_COLUMN(result, 1, {1, 2}));
325 REQUIRE(CHECK_COLUMN(result, 2, {2, 3}));
326 REQUIRE(CHECK_COLUMN(result, 3, {1, 3}));
327}
328
329TEST_CASE("STDDEV aggregations", "[aggregations]") {
330 unique_ptr<QueryResult> result;
331 DuckDB db(nullptr);
332 Connection con(db);
333
334 REQUIRE_NO_FAIL(con.Query("create table stddev_test(val integer, grp integer)"));
335 REQUIRE_NO_FAIL(con.Query("insert into stddev_test values (42, 1), (43, "
336 "1), (42, 2), (1000, 2), (NULL, 1), (NULL, 3)"));
337
338 // stddev_samp
339 result = con.Query("select round(stddev_samp(val), 1) from stddev_test");
340 REQUIRE(CHECK_COLUMN(result, 0, {478.8}));
341
342 result = con.Query("select round(stddev_samp(val), 1) from stddev_test "
343 "where val is not null");
344 REQUIRE(CHECK_COLUMN(result, 0, {478.8}));
345
346 result = con.Query("select grp, sum(val), round(stddev_samp(val), 1), "
347 "min(val) from stddev_test group by grp order by grp");
348 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3}));
349 REQUIRE(CHECK_COLUMN(result, 1, {85, 1042, Value()}));
350 REQUIRE(CHECK_COLUMN(result, 2, {0.7, 677.4, Value()}));
351 REQUIRE(CHECK_COLUMN(result, 3, {42, 42, Value()}));
352
353 result = con.Query("select grp, sum(val), round(stddev_samp(val), 1), min(val) from "
354 "stddev_test where val is not null group by grp order by grp");
355 REQUIRE(CHECK_COLUMN(result, 0, {1, 2}));
356 REQUIRE(CHECK_COLUMN(result, 1, {85, 1042}));
357 REQUIRE(CHECK_COLUMN(result, 2, {0.7, 677.4}));
358 REQUIRE(CHECK_COLUMN(result, 3, {42, 42}));
359
360 // stddev_pop
361 result = con.Query("select round(stddev_pop(val), 1) from stddev_test");
362 REQUIRE(CHECK_COLUMN(result, 0, {414.7}));
363
364 result = con.Query("select round(stddev_pop(val), 1) from stddev_test "
365 "where val is not null");
366 REQUIRE(CHECK_COLUMN(result, 0, {414.7}));
367
368 result = con.Query("select grp, sum(val), round(stddev_pop(val), 1), "
369 "min(val) from stddev_test group by grp order by grp");
370 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3}));
371 REQUIRE(CHECK_COLUMN(result, 1, {85, 1042, Value()}));
372 REQUIRE(CHECK_COLUMN(result, 2, {0.5, 479.0, Value()}));
373 REQUIRE(CHECK_COLUMN(result, 3, {42, 42, Value()}));
374
375 result = con.Query("select grp, sum(val), round(stddev_pop(val), 1), min(val) from "
376 "stddev_test where val is not null group by grp order by grp");
377 REQUIRE(CHECK_COLUMN(result, 0, {1, 2}));
378 REQUIRE(CHECK_COLUMN(result, 1, {85, 1042}));
379 REQUIRE(CHECK_COLUMN(result, 2, {0.5, 479.0}));
380 REQUIRE(CHECK_COLUMN(result, 3, {42, 42}));
381
382 // var_samp
383 result = con.Query("select round(var_samp(val), 1) from stddev_test");
384 REQUIRE(CHECK_COLUMN(result, 0, {229281.6}));
385
386 result = con.Query("select round(var_samp(val), 1) from stddev_test "
387 "where val is not null");
388 REQUIRE(CHECK_COLUMN(result, 0, {229281.6}));
389
390 result = con.Query("select grp, sum(val), round(var_samp(val), 1), "
391 "min(val) from stddev_test group by grp order by grp");
392 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3}));
393 REQUIRE(CHECK_COLUMN(result, 1, {85, 1042, Value()}));
394 REQUIRE(CHECK_COLUMN(result, 2, {0.5, 458882.0, Value()}));
395 REQUIRE(CHECK_COLUMN(result, 3, {42, 42, Value()}));
396
397 result = con.Query("select grp, sum(val), round(var_samp(val), 1), min(val) from "
398 "stddev_test where val is not null group by grp order by grp");
399 REQUIRE(CHECK_COLUMN(result, 0, {1, 2}));
400 REQUIRE(CHECK_COLUMN(result, 1, {85, 1042}));
401 REQUIRE(CHECK_COLUMN(result, 2, {0.5, 458882.0}));
402 REQUIRE(CHECK_COLUMN(result, 3, {42, 42}));
403
404 // var_pop
405 result = con.Query("select round(var_pop(val), 1) from stddev_test");
406 REQUIRE(CHECK_COLUMN(result, 0, {171961.2}));
407
408 result = con.Query("select round(var_pop(val), 1) from stddev_test "
409 "where val is not null");
410 REQUIRE(CHECK_COLUMN(result, 0, {171961.2}));
411
412 result = con.Query("select grp, sum(val), round(var_pop(val), 2), "
413 "min(val) from stddev_test group by grp order by grp");
414 REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3}));
415 REQUIRE(CHECK_COLUMN(result, 1, {85, 1042, Value()}));
416 REQUIRE(CHECK_COLUMN(result, 2, {0.25, 229441.0, Value()}));
417 REQUIRE(CHECK_COLUMN(result, 3, {42, 42, Value()}));
418
419 result = con.Query("select grp, sum(val), round(var_pop(val), 2), min(val) from "
420 "stddev_test where val is not null group by grp order by grp");
421 REQUIRE(CHECK_COLUMN(result, 0, {1, 2}));
422 REQUIRE(CHECK_COLUMN(result, 1, {85, 1042}));
423 REQUIRE(CHECK_COLUMN(result, 2, {0.25, 229441.0}));
424 REQUIRE(CHECK_COLUMN(result, 3, {42, 42}));
425}
426
427TEST_CASE("Test aggregations on strings", "[aggregations]") {
428 unique_ptr<QueryResult> result;
429 DuckDB db(nullptr);
430 Connection con(db);
431 con.EnableQueryVerification();
432
433 result = con.Query("SELECT NULL as a, NULL as b, NULL as c, NULL as d, 1 as id UNION SELECT 'Кирилл' as a, "
434 "'Müller' as b, '我是谁' as c, 'ASCII' as d, 2 as id ORDER BY 1");
435 REQUIRE(CHECK_COLUMN(result, 0, {Value(), "Кирилл"}));
436 REQUIRE(CHECK_COLUMN(result, 1, {Value(), "Müller"}));
437 REQUIRE(CHECK_COLUMN(result, 2, {Value(), "我是谁"}));
438 REQUIRE(CHECK_COLUMN(result, 3, {Value(), "ASCII"}));
439 REQUIRE(CHECK_COLUMN(result, 4, {1, 2}));
440
441 REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, s VARCHAR);"));
442 REQUIRE_NO_FAIL(con.Query("INSERT INTO test VALUES (11, 'hello'), (12, 'world'), (11, NULL)"));
443
444 // scalar aggregation on string
445 result = con.Query("SELECT COUNT(*), COUNT(s) FROM test;");
446 REQUIRE(CHECK_COLUMN(result, 0, {3}));
447 REQUIRE(CHECK_COLUMN(result, 1, {2}));
448
449 // grouped aggregation on string
450 result = con.Query("SELECT a, COUNT(*), COUNT(s) FROM test GROUP BY a ORDER BY a;");
451 REQUIRE(CHECK_COLUMN(result, 0, {11, 12}));
452 REQUIRE(CHECK_COLUMN(result, 1, {2, 1}));
453 REQUIRE(CHECK_COLUMN(result, 2, {1, 1}));
454
455 // group by the strings
456 result = con.Query("SELECT s, SUM(a) FROM test GROUP BY s ORDER BY s;");
457 REQUIRE(CHECK_COLUMN(result, 0, {Value(), "hello", "world"}));
458 REQUIRE(CHECK_COLUMN(result, 1, {11, 11, 12}));
459
460 // distinct aggregations ons tring
461 REQUIRE_NO_FAIL(con.Query("INSERT INTO test VALUES (11, 'hello'), (12, 'world')"));
462
463 // scalar distinct
464 result = con.Query("SELECT COUNT(*), COUNT(s), COUNT(DISTINCT s) FROM test;");
465 REQUIRE(CHECK_COLUMN(result, 0, {5}));
466 REQUIRE(CHECK_COLUMN(result, 1, {4}));
467 REQUIRE(CHECK_COLUMN(result, 2, {2}));
468
469 // grouped distinct
470 result = con.Query("SELECT a, COUNT(*), COUNT(s), COUNT(DISTINCT s) FROM test GROUP BY a ORDER BY a;");
471 REQUIRE(CHECK_COLUMN(result, 0, {11, 12}));
472 REQUIRE(CHECK_COLUMN(result, 1, {3, 2}));
473 REQUIRE(CHECK_COLUMN(result, 2, {2, 2}));
474 REQUIRE(CHECK_COLUMN(result, 3, {1, 1}));
475
476 // now with WHERE clause
477 result = con.Query(
478 "SELECT a, COUNT(*), COUNT(s), COUNT(DISTINCT s) FROM test WHERE s IS NOT NULL GROUP BY a ORDER BY a;");
479 REQUIRE(CHECK_COLUMN(result, 0, {11, 12}));
480 REQUIRE(CHECK_COLUMN(result, 1, {2, 2}));
481 REQUIRE(CHECK_COLUMN(result, 2, {2, 2}));
482 REQUIRE(CHECK_COLUMN(result, 3, {1, 1}));
483}
484