| 1 | #include "catch.hpp" |
| 2 | #include "test_helpers.hpp" |
| 3 | |
| 4 | using namespace duckdb; |
| 5 | using namespace std; |
| 6 | |
| 7 | TEST_CASE("Test aggregation/group by statements" , "[aggregations]" ) { |
| 8 | unique_ptr<QueryResult> result; |
| 9 | DuckDB db(nullptr); |
| 10 | Connection con(db); |
| 11 | con.EnableQueryVerification(); |
| 12 | |
| 13 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, b INTEGER);" )); |
| 14 | REQUIRE_NO_FAIL(con.Query("INSERT INTO test VALUES (11, 22), (13, 22), (12, 21)" )); |
| 15 | |
| 16 | // aggregates cannot be nested |
| 17 | REQUIRE_FAIL(con.Query("SELECT SUM(SUM(41)), COUNT(*);" )); |
| 18 | |
| 19 | // simple aggregates without group by |
| 20 | result = con.Query("SELECT SUM(a), COUNT(*), AVG(a) FROM test;" ); |
| 21 | REQUIRE(CHECK_COLUMN(result, 0, {36})); |
| 22 | REQUIRE(CHECK_COLUMN(result, 1, {3})); |
| 23 | REQUIRE(CHECK_COLUMN(result, 2, {12.0})); |
| 24 | |
| 25 | result = con.Query("SELECT COUNT(*) FROM test;" ); |
| 26 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 27 | |
| 28 | result = con.Query("SELECT SUM(a), COUNT(*) FROM test WHERE a = 11;" ); |
| 29 | REQUIRE(CHECK_COLUMN(result, 0, {11})); |
| 30 | REQUIRE(CHECK_COLUMN(result, 1, {1})); |
| 31 | |
| 32 | result = con.Query("SELECT SUM(a), SUM(b), SUM(a) + SUM (b) FROM test;" ); |
| 33 | REQUIRE(CHECK_COLUMN(result, 0, {36})); |
| 34 | REQUIRE(CHECK_COLUMN(result, 1, {65})); |
| 35 | REQUIRE(CHECK_COLUMN(result, 2, {101})); |
| 36 | |
| 37 | result = con.Query("SELECT SUM(a+2), SUM(a) + 2 * COUNT(*) FROM test;" ); |
| 38 | REQUIRE(CHECK_COLUMN(result, 0, {42})); |
| 39 | REQUIRE(CHECK_COLUMN(result, 1, {42})); |
| 40 | |
| 41 | // aggregations with group by |
| 42 | result = con.Query("SELECT b, SUM(a), SUM(a+2), AVG(a) FROM test GROUP BY b ORDER BY b;" ); |
| 43 | REQUIRE(CHECK_COLUMN(result, 0, {21, 22})); |
| 44 | REQUIRE(CHECK_COLUMN(result, 1, {12, 24})); |
| 45 | REQUIRE(CHECK_COLUMN(result, 2, {14, 28})); |
| 46 | REQUIRE(CHECK_COLUMN(result, 3, {12, 12})); |
| 47 | |
| 48 | // ORDER BY aggregation that does not occur in SELECT clause |
| 49 | result = con.Query("SELECT b, SUM(a) FROM test GROUP BY b ORDER BY COUNT(a);" ); |
| 50 | REQUIRE(CHECK_COLUMN(result, 0, {21, 22})); |
| 51 | REQUIRE(CHECK_COLUMN(result, 1, {12, 24})); |
| 52 | |
| 53 | result = con.Query("SELECT b, SUM(a) FROM test GROUP BY b ORDER BY COUNT(a) DESC;" ); |
| 54 | REQUIRE(CHECK_COLUMN(result, 0, {22, 21})); |
| 55 | REQUIRE(CHECK_COLUMN(result, 1, {24, 12})); |
| 56 | |
| 57 | result = con.Query("SELECT b, SUM(a), COUNT(*), SUM(a+2) FROM test GROUP " |
| 58 | "BY b ORDER BY b;" ); |
| 59 | REQUIRE(CHECK_COLUMN(result, 0, {21, 22})); |
| 60 | REQUIRE(CHECK_COLUMN(result, 1, {12, 24})); |
| 61 | REQUIRE(CHECK_COLUMN(result, 2, {1, 2})); |
| 62 | REQUIRE(CHECK_COLUMN(result, 3, {14, 28})); |
| 63 | |
| 64 | // group by alias |
| 65 | result = con.Query("SELECT b % 2 AS f, SUM(a) FROM test GROUP BY f;" ); |
| 66 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1})); |
| 67 | REQUIRE(CHECK_COLUMN(result, 1, {24, 12})); |
| 68 | |
| 69 | // group by with filter |
| 70 | result = con.Query("SELECT b, SUM(a), COUNT(*), SUM(a+2) FROM test WHERE " |
| 71 | "a <= 12 GROUP " |
| 72 | "BY b ORDER BY b;" ); |
| 73 | REQUIRE(CHECK_COLUMN(result, 0, {21, 22})); |
| 74 | REQUIRE(CHECK_COLUMN(result, 1, {12, 11})); |
| 75 | REQUIRE(CHECK_COLUMN(result, 2, {1, 1})); |
| 76 | REQUIRE(CHECK_COLUMN(result, 3, {14, 13})); |
| 77 | |
| 78 | // nested aggregate in group by |
| 79 | REQUIRE_FAIL(con.Query("SELECT b % 2 AS f, COUNT(SUM(a)) FROM test GROUP BY f;" )); |
| 80 | |
| 81 | con.Query("INSERT INTO test VALUES (12, 21), (12, 21), (12, 21)" ); |
| 82 | |
| 83 | // group by with filter and multiple values per groups |
| 84 | result = con.Query("SELECT b, SUM(a), COUNT(*), SUM(a+2) FROM test WHERE " |
| 85 | "a <= 12 GROUP " |
| 86 | "BY b ORDER BY b;" ); |
| 87 | REQUIRE(CHECK_COLUMN(result, 0, {21, 22})); |
| 88 | REQUIRE(CHECK_COLUMN(result, 1, {12 * 4, 11})); |
| 89 | REQUIRE(CHECK_COLUMN(result, 2, {4, 1})); |
| 90 | REQUIRE(CHECK_COLUMN(result, 3, {12 * 4 + 2 * 4, 13})); |
| 91 | |
| 92 | // group by with filter and multiple values per groups |
| 93 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER, j INTEGER);" )); |
| 94 | REQUIRE_NO_FAIL(con.Query("INSERT INTO integers VALUES (3, 4), (3, 4), (2, 4);" )); |
| 95 | |
| 96 | // use GROUP BY column in math operator |
| 97 | result = con.Query("SELECT i, i + 10 FROM integers GROUP BY i ORDER BY i" ); |
| 98 | REQUIRE(CHECK_COLUMN(result, 0, {2, 3})); |
| 99 | REQUIRE(CHECK_COLUMN(result, 1, {12, 13})); |
| 100 | |
| 101 | // using non-group column and non-aggregate should throw an error |
| 102 | REQUIRE_FAIL(con.Query("SELECT i, SUM(j), j FROM integers GROUP BY i ORDER BY i" )); |
| 103 | // but it works if we wrap it in FIRST() |
| 104 | result = con.Query("SELECT i, SUM(j), FIRST(j) FROM integers GROUP BY i ORDER BY i" ); |
| 105 | REQUIRE(CHECK_COLUMN(result, 0, {2, 3})); |
| 106 | REQUIRE(CHECK_COLUMN(result, 1, {4, 8})); |
| 107 | REQUIRE(CHECK_COLUMN(result, 2, {4, 4})); |
| 108 | |
| 109 | // group by constant alias |
| 110 | result = con.Query("SELECT 1 AS k, SUM(i) FROM integers GROUP BY k ORDER BY 2;" ); |
| 111 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 112 | REQUIRE(CHECK_COLUMN(result, 1, {8})); |
| 113 | |
| 114 | // use an alias that is identical to a column name (should prioritize column name) |
| 115 | result = con.Query("SELECT 1 AS i, SUM(i) FROM integers GROUP BY i ORDER BY 2;" ); |
| 116 | REQUIRE(CHECK_COLUMN(result, 0, {1, 1})); |
| 117 | REQUIRE(CHECK_COLUMN(result, 1, {2, 6})); |
| 118 | |
| 119 | // refer to the same alias twice |
| 120 | result = con.Query("SELECT i % 2 AS k, SUM(i) FROM integers GROUP BY k, k ORDER BY 1;" ); |
| 121 | REQUIRE(CHECK_COLUMN(result, 0, {0, 1})); |
| 122 | REQUIRE(CHECK_COLUMN(result, 1, {2, 6})); |
| 123 | |
| 124 | REQUIRE_NO_FAIL(con.Query("DROP TABLE integers;" )); |
| 125 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER);" )); |
| 126 | REQUIRE_NO_FAIL(con.Query("INSERT INTO integers VALUES (1), (2), (3), (NULL);" )); |
| 127 | |
| 128 | // group by NULL |
| 129 | result = con.Query("SELECT i, SUM(i) FROM integers GROUP BY i ORDER BY 1;" ); |
| 130 | REQUIRE(CHECK_COLUMN(result, 0, {Value(), 1, 2, 3})); |
| 131 | REQUIRE(CHECK_COLUMN(result, 1, {Value(), 1, 2, 3})); |
| 132 | |
| 133 | // column reference should have preference over alias reference in grouping |
| 134 | result = con.Query("SELECT i, i % 2 AS i, SUM(i) FROM integers GROUP BY i ORDER BY 1;" ); |
| 135 | REQUIRE(CHECK_COLUMN(result, 0, {Value(), 1, 2, 3})); |
| 136 | REQUIRE(CHECK_COLUMN(result, 1, {Value(), 1, 0, 1})); |
| 137 | REQUIRE(CHECK_COLUMN(result, 2, {Value(), 1, 2, 3})); |
| 138 | |
| 139 | // aliases can only be referenced in the GROUP BY as the root column: operations not allowed |
| 140 | // CONTROVERSIAL: this query DOES work in SQLite |
| 141 | REQUIRE_FAIL(con.Query("SELECT 1 AS k, SUM(i) FROM integers GROUP BY k+1 ORDER BY 2;" )); |
| 142 | |
| 143 | // group by column refs should be recognized, even if one uses an explicit table specifier and the other does not |
| 144 | result = con.Query("SELECT test.b, SUM(a) FROM test GROUP BY b ORDER BY COUNT(a) DESC;" ); |
| 145 | REQUIRE(CHECK_COLUMN(result, 0, {21, 22})); |
| 146 | REQUIRE(CHECK_COLUMN(result, 1, {48, 24})); |
| 147 | } |
| 148 | |
| 149 | TEST_CASE("Test aliases in group by/aggregation" , "[aggregations]" ) { |
| 150 | unique_ptr<QueryResult> result; |
| 151 | DuckDB db(nullptr); |
| 152 | Connection con(db); |
| 153 | con.EnableQueryVerification(); |
| 154 | |
| 155 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER)" )); |
| 156 | REQUIRE_NO_FAIL(con.Query("INSERT INTO integers VALUES (1), (2), (3), (NULL)" )); |
| 157 | |
| 158 | // use alias in HAVING clause |
| 159 | // CONTROVERSIAL: this query DOES NOT work in PostgreSQL |
| 160 | result = con.Query("SELECT i % 2 AS k, SUM(i) FROM integers WHERE i IS NOT NULL GROUP BY k HAVING k>0;" ); |
| 161 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 162 | REQUIRE(CHECK_COLUMN(result, 1, {4})); |
| 163 | |
| 164 | // this is identical to this query |
| 165 | // CONTROVERSIAL: this query does not work in MonetDB |
| 166 | result = con.Query("SELECT i % 2 AS k, SUM(i) FROM integers WHERE i IS NOT NULL GROUP BY k HAVING i%2>0;" ); |
| 167 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 168 | REQUIRE(CHECK_COLUMN(result, 1, {4})); |
| 169 | |
| 170 | // select groups by constant (similar to order by constant) |
| 171 | result = con.Query("SELECT i % 2 AS k, SUM(i) FROM integers WHERE i IS NOT NULL GROUP BY 1 HAVING i%2>0;" ); |
| 172 | REQUIRE(CHECK_COLUMN(result, 0, {1})); |
| 173 | REQUIRE(CHECK_COLUMN(result, 1, {4})); |
| 174 | |
| 175 | // constant out of range |
| 176 | REQUIRE_FAIL(con.Query("SELECT i % 2 AS k, SUM(i) FROM integers WHERE i IS NOT NULL GROUP BY 42 HAVING i%2>0;" )); |
| 177 | |
| 178 | // entry in GROUP BY should refer to base column |
| 179 | // ...BUT the alias in ORDER BY should refer to the alias from the select list |
| 180 | // note that both Postgres and MonetDB reject this query because of ambiguity. SQLite accepts it though so we do |
| 181 | // too. |
| 182 | result = con.Query("SELECT i, i % 2 AS i, SUM(i) FROM integers GROUP BY i ORDER BY i, 3;" ); |
| 183 | REQUIRE(CHECK_COLUMN(result, 0, {Value(), 2, 1, 3})); |
| 184 | REQUIRE(CHECK_COLUMN(result, 1, {Value(), 0, 1, 1})); |
| 185 | REQUIRE(CHECK_COLUMN(result, 2, {Value(), 2, 1, 3})); |
| 186 | |
| 187 | // changing the name of the alias makes it more explicit what should happen |
| 188 | result = con.Query("SELECT i, i % 2 AS k, SUM(i) FROM integers GROUP BY i ORDER BY k, 3;" ); |
| 189 | REQUIRE(CHECK_COLUMN(result, 0, {Value(), 2, 1, 3})); |
| 190 | REQUIRE(CHECK_COLUMN(result, 1, {Value(), 0, 1, 1})); |
| 191 | REQUIRE(CHECK_COLUMN(result, 2, {Value(), 2, 1, 3})); |
| 192 | |
| 193 | // this now orders by the actual grouping column |
| 194 | result = con.Query("SELECT i, i % 2 AS k, SUM(i) FROM integers GROUP BY i ORDER BY i;" ); |
| 195 | REQUIRE(CHECK_COLUMN(result, 0, {Value(), 1, 2, 3})); |
| 196 | REQUIRE(CHECK_COLUMN(result, 1, {Value(), 1, 0, 1})); |
| 197 | REQUIRE(CHECK_COLUMN(result, 2, {Value(), 1, 2, 3})); |
| 198 | |
| 199 | // cannot use GROUP BY column in an aggregation... |
| 200 | REQUIRE_FAIL(con.Query("SELECT i % 2 AS k, SUM(k) FROM integers GROUP BY k" )); |
| 201 | |
| 202 | // ...unless it is one of the base columns |
| 203 | result = con.Query("SELECT i, SUM(i) FROM integers GROUP BY i ORDER BY i" ); |
| 204 | REQUIRE(CHECK_COLUMN(result, 0, {Value(), 1, 2, 3})); |
| 205 | REQUIRE(CHECK_COLUMN(result, 1, {Value(), 1, 2, 3})); |
| 206 | |
| 207 | // ORDER on a non-grouping column |
| 208 | // this query is refused by Postgres and MonetDB |
| 209 | // but SQLite resolves it by first pushing a "FIRST(i)" aggregate into the projection, and then ordering by that |
| 210 | // aggregate |
| 211 | REQUIRE_FAIL(con.Query("SELECT (10-i) AS k, SUM(i) FROM integers GROUP BY k ORDER BY i;" )); |
| 212 | |
| 213 | // we can manually get this behavior by pushing FIRST |
| 214 | result = con.Query("SELECT (10-i) AS k, SUM(i) FROM integers GROUP BY k ORDER BY FIRST(i);" ); |
| 215 | REQUIRE(CHECK_COLUMN(result, 0, {Value(), 9, 8, 7})); |
| 216 | REQUIRE(CHECK_COLUMN(result, 1, {Value(), 1, 2, 3})); |
| 217 | } |
| 218 | |
| 219 | TEST_CASE("GROUP BY large strings" , "[aggregations]" ) { |
| 220 | unique_ptr<QueryResult> result; |
| 221 | DuckDB db(nullptr); |
| 222 | Connection con(db); |
| 223 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a VARCHAR, b INTEGER);" )); |
| 224 | REQUIRE_NO_FAIL(con.Query("INSERT INTO test VALUES ('helloworld', 22), " |
| 225 | "('thisisalongstring', 22), ('helloworld', 21)" )); |
| 226 | |
| 227 | result = con.Query("SELECT a, SUM(b) FROM test GROUP BY a ORDER BY a" ); |
| 228 | REQUIRE(CHECK_COLUMN(result, 0, {"helloworld" , "thisisalongstring" })); |
| 229 | REQUIRE(CHECK_COLUMN(result, 1, {43, 22})); |
| 230 | } |
| 231 | |
| 232 | TEST_CASE("Group by multiple columns" , "[aggregations]" ) { |
| 233 | unique_ptr<QueryResult> result; |
| 234 | DuckDB db(nullptr); |
| 235 | Connection con(db); |
| 236 | |
| 237 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER, j INTEGER, k INTEGER);" )); |
| 238 | REQUIRE_NO_FAIL( |
| 239 | con.Query("INSERT INTO integers VALUES (1, 1, 2), (1, 2, 2), (1, 1, 2), (2, 1, 2), (1, 2, 4), (1, 2, NULL);" )); |
| 240 | |
| 241 | result = con.Query("SELECT i, j, SUM(k), COUNT(*), COUNT(k) FROM integers GROUP BY i, j ORDER BY 1, 2" ); |
| 242 | REQUIRE(CHECK_COLUMN(result, 0, {1, 1, 2})); |
| 243 | REQUIRE(CHECK_COLUMN(result, 1, {1, 2, 1})); |
| 244 | REQUIRE(CHECK_COLUMN(result, 2, {4, 6, 2})); |
| 245 | REQUIRE(CHECK_COLUMN(result, 3, {2, 3, 1})); |
| 246 | REQUIRE(CHECK_COLUMN(result, 4, {2, 2, 1})); |
| 247 | } |
| 248 | |
| 249 | TEST_CASE("Aggregate only COUNT STAR" , "[aggregations]" ) { |
| 250 | unique_ptr<QueryResult> result; |
| 251 | DuckDB db(nullptr); |
| 252 | Connection con(db); |
| 253 | |
| 254 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER, j INTEGER);" )); |
| 255 | REQUIRE_NO_FAIL(con.Query("INSERT INTO integers VALUES (3, 4), (3, 4), (2, 4);" )); |
| 256 | |
| 257 | result = con.Query("SELECT i, COUNT(*) FROM integers GROUP BY i ORDER BY i" ); |
| 258 | REQUIRE(CHECK_COLUMN(result, 0, {2, 3})); |
| 259 | REQUIRE(CHECK_COLUMN(result, 1, {1, 2})); |
| 260 | |
| 261 | // test COUNT without the * |
| 262 | result = con.Query("SELECT i, COUNT() FROM integers GROUP BY i ORDER BY i" ); |
| 263 | REQUIRE(CHECK_COLUMN(result, 0, {2, 3})); |
| 264 | REQUIRE(CHECK_COLUMN(result, 1, {1, 2})); |
| 265 | } |
| 266 | |
| 267 | TEST_CASE("GROUP BY NULL value" , "[aggregations]" ) { |
| 268 | unique_ptr<QueryResult> result; |
| 269 | DuckDB db(nullptr); |
| 270 | Connection con(db); |
| 271 | |
| 272 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE integers(i INTEGER, j INTEGER);" )); |
| 273 | REQUIRE_NO_FAIL(con.Query("INSERT INTO integers VALUES (3, 4), (NULL, 4), (2, 4);" )); |
| 274 | |
| 275 | result = con.Query("SELECT i, SUM(j) FROM integers GROUP BY i ORDER BY i" ); |
| 276 | REQUIRE(CHECK_COLUMN(result, 0, {Value(), 2, 3})); |
| 277 | REQUIRE(CHECK_COLUMN(result, 1, {4, 4, 4})); |
| 278 | } |
| 279 | |
| 280 | TEST_CASE("Aggregating from empty table" , "[aggregations]" ) { |
| 281 | unique_ptr<QueryResult> result; |
| 282 | DuckDB db(nullptr); |
| 283 | Connection con(db); |
| 284 | |
| 285 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE emptyaggr(i INTEGER);" )); |
| 286 | |
| 287 | result = con.Query("SELECT COUNT(*) FROM emptyaggr" ); |
| 288 | REQUIRE(CHECK_COLUMN(result, 0, {0})); |
| 289 | |
| 290 | result = con.Query("SELECT SUM(i), COUNT(i), COUNT(DISTINCT i), COUNT(*), AVG(i), " |
| 291 | "COUNT(*)+1, COUNT(i)+1, MIN(i), MIN(i+1), MIN(i)+1 FROM emptyaggr" ); |
| 292 | REQUIRE(CHECK_COLUMN(result, 0, {Value()})); |
| 293 | REQUIRE(CHECK_COLUMN(result, 1, {0})); |
| 294 | REQUIRE(CHECK_COLUMN(result, 2, {0})); |
| 295 | REQUIRE(CHECK_COLUMN(result, 3, {0})); |
| 296 | REQUIRE(CHECK_COLUMN(result, 4, {Value()})); |
| 297 | REQUIRE(CHECK_COLUMN(result, 5, {1})); |
| 298 | REQUIRE(CHECK_COLUMN(result, 6, {1})); |
| 299 | REQUIRE(CHECK_COLUMN(result, 7, {Value()})); |
| 300 | REQUIRE(CHECK_COLUMN(result, 8, {Value()})); |
| 301 | REQUIRE(CHECK_COLUMN(result, 9, {Value()})); |
| 302 | } |
| 303 | |
| 304 | TEST_CASE("DISTINCT aggregations" , "[aggregations]" ) { |
| 305 | unique_ptr<QueryResult> result; |
| 306 | DuckDB db(nullptr); |
| 307 | Connection con(db); |
| 308 | |
| 309 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE distinctagg(i INTEGER, j INTEGER);" )); |
| 310 | REQUIRE_NO_FAIL(con.Query("INSERT INTO distinctagg VALUES (1,1),(1,1),(2,2), (1,2)" )); |
| 311 | |
| 312 | result = con.Query("SELECT COUNT(i), COUNT(DISTINCT i), SUM(i), " |
| 313 | "SUM(DISTINCT i) FROM distinctagg" ); |
| 314 | |
| 315 | REQUIRE(CHECK_COLUMN(result, 0, {4})); |
| 316 | REQUIRE(CHECK_COLUMN(result, 1, {2})); |
| 317 | REQUIRE(CHECK_COLUMN(result, 2, {5})); |
| 318 | REQUIRE(CHECK_COLUMN(result, 3, {3})); |
| 319 | |
| 320 | result = con.Query("SELECT COUNT(i), COUNT(DISTINCT i), SUM(i), SUM(DISTINCT i) " |
| 321 | "FROM distinctagg GROUP BY j ORDER BY j" ); |
| 322 | |
| 323 | REQUIRE(CHECK_COLUMN(result, 0, {2, 2})); |
| 324 | REQUIRE(CHECK_COLUMN(result, 1, {1, 2})); |
| 325 | REQUIRE(CHECK_COLUMN(result, 2, {2, 3})); |
| 326 | REQUIRE(CHECK_COLUMN(result, 3, {1, 3})); |
| 327 | } |
| 328 | |
| 329 | TEST_CASE("STDDEV aggregations" , "[aggregations]" ) { |
| 330 | unique_ptr<QueryResult> result; |
| 331 | DuckDB db(nullptr); |
| 332 | Connection con(db); |
| 333 | |
| 334 | REQUIRE_NO_FAIL(con.Query("create table stddev_test(val integer, grp integer)" )); |
| 335 | REQUIRE_NO_FAIL(con.Query("insert into stddev_test values (42, 1), (43, " |
| 336 | "1), (42, 2), (1000, 2), (NULL, 1), (NULL, 3)" )); |
| 337 | |
| 338 | // stddev_samp |
| 339 | result = con.Query("select round(stddev_samp(val), 1) from stddev_test" ); |
| 340 | REQUIRE(CHECK_COLUMN(result, 0, {478.8})); |
| 341 | |
| 342 | result = con.Query("select round(stddev_samp(val), 1) from stddev_test " |
| 343 | "where val is not null" ); |
| 344 | REQUIRE(CHECK_COLUMN(result, 0, {478.8})); |
| 345 | |
| 346 | result = con.Query("select grp, sum(val), round(stddev_samp(val), 1), " |
| 347 | "min(val) from stddev_test group by grp order by grp" ); |
| 348 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3})); |
| 349 | REQUIRE(CHECK_COLUMN(result, 1, {85, 1042, Value()})); |
| 350 | REQUIRE(CHECK_COLUMN(result, 2, {0.7, 677.4, Value()})); |
| 351 | REQUIRE(CHECK_COLUMN(result, 3, {42, 42, Value()})); |
| 352 | |
| 353 | result = con.Query("select grp, sum(val), round(stddev_samp(val), 1), min(val) from " |
| 354 | "stddev_test where val is not null group by grp order by grp" ); |
| 355 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2})); |
| 356 | REQUIRE(CHECK_COLUMN(result, 1, {85, 1042})); |
| 357 | REQUIRE(CHECK_COLUMN(result, 2, {0.7, 677.4})); |
| 358 | REQUIRE(CHECK_COLUMN(result, 3, {42, 42})); |
| 359 | |
| 360 | // stddev_pop |
| 361 | result = con.Query("select round(stddev_pop(val), 1) from stddev_test" ); |
| 362 | REQUIRE(CHECK_COLUMN(result, 0, {414.7})); |
| 363 | |
| 364 | result = con.Query("select round(stddev_pop(val), 1) from stddev_test " |
| 365 | "where val is not null" ); |
| 366 | REQUIRE(CHECK_COLUMN(result, 0, {414.7})); |
| 367 | |
| 368 | result = con.Query("select grp, sum(val), round(stddev_pop(val), 1), " |
| 369 | "min(val) from stddev_test group by grp order by grp" ); |
| 370 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3})); |
| 371 | REQUIRE(CHECK_COLUMN(result, 1, {85, 1042, Value()})); |
| 372 | REQUIRE(CHECK_COLUMN(result, 2, {0.5, 479.0, Value()})); |
| 373 | REQUIRE(CHECK_COLUMN(result, 3, {42, 42, Value()})); |
| 374 | |
| 375 | result = con.Query("select grp, sum(val), round(stddev_pop(val), 1), min(val) from " |
| 376 | "stddev_test where val is not null group by grp order by grp" ); |
| 377 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2})); |
| 378 | REQUIRE(CHECK_COLUMN(result, 1, {85, 1042})); |
| 379 | REQUIRE(CHECK_COLUMN(result, 2, {0.5, 479.0})); |
| 380 | REQUIRE(CHECK_COLUMN(result, 3, {42, 42})); |
| 381 | |
| 382 | // var_samp |
| 383 | result = con.Query("select round(var_samp(val), 1) from stddev_test" ); |
| 384 | REQUIRE(CHECK_COLUMN(result, 0, {229281.6})); |
| 385 | |
| 386 | result = con.Query("select round(var_samp(val), 1) from stddev_test " |
| 387 | "where val is not null" ); |
| 388 | REQUIRE(CHECK_COLUMN(result, 0, {229281.6})); |
| 389 | |
| 390 | result = con.Query("select grp, sum(val), round(var_samp(val), 1), " |
| 391 | "min(val) from stddev_test group by grp order by grp" ); |
| 392 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3})); |
| 393 | REQUIRE(CHECK_COLUMN(result, 1, {85, 1042, Value()})); |
| 394 | REQUIRE(CHECK_COLUMN(result, 2, {0.5, 458882.0, Value()})); |
| 395 | REQUIRE(CHECK_COLUMN(result, 3, {42, 42, Value()})); |
| 396 | |
| 397 | result = con.Query("select grp, sum(val), round(var_samp(val), 1), min(val) from " |
| 398 | "stddev_test where val is not null group by grp order by grp" ); |
| 399 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2})); |
| 400 | REQUIRE(CHECK_COLUMN(result, 1, {85, 1042})); |
| 401 | REQUIRE(CHECK_COLUMN(result, 2, {0.5, 458882.0})); |
| 402 | REQUIRE(CHECK_COLUMN(result, 3, {42, 42})); |
| 403 | |
| 404 | // var_pop |
| 405 | result = con.Query("select round(var_pop(val), 1) from stddev_test" ); |
| 406 | REQUIRE(CHECK_COLUMN(result, 0, {171961.2})); |
| 407 | |
| 408 | result = con.Query("select round(var_pop(val), 1) from stddev_test " |
| 409 | "where val is not null" ); |
| 410 | REQUIRE(CHECK_COLUMN(result, 0, {171961.2})); |
| 411 | |
| 412 | result = con.Query("select grp, sum(val), round(var_pop(val), 2), " |
| 413 | "min(val) from stddev_test group by grp order by grp" ); |
| 414 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2, 3})); |
| 415 | REQUIRE(CHECK_COLUMN(result, 1, {85, 1042, Value()})); |
| 416 | REQUIRE(CHECK_COLUMN(result, 2, {0.25, 229441.0, Value()})); |
| 417 | REQUIRE(CHECK_COLUMN(result, 3, {42, 42, Value()})); |
| 418 | |
| 419 | result = con.Query("select grp, sum(val), round(var_pop(val), 2), min(val) from " |
| 420 | "stddev_test where val is not null group by grp order by grp" ); |
| 421 | REQUIRE(CHECK_COLUMN(result, 0, {1, 2})); |
| 422 | REQUIRE(CHECK_COLUMN(result, 1, {85, 1042})); |
| 423 | REQUIRE(CHECK_COLUMN(result, 2, {0.25, 229441.0})); |
| 424 | REQUIRE(CHECK_COLUMN(result, 3, {42, 42})); |
| 425 | } |
| 426 | |
| 427 | TEST_CASE("Test aggregations on strings" , "[aggregations]" ) { |
| 428 | unique_ptr<QueryResult> result; |
| 429 | DuckDB db(nullptr); |
| 430 | Connection con(db); |
| 431 | con.EnableQueryVerification(); |
| 432 | |
| 433 | result = con.Query("SELECT NULL as a, NULL as b, NULL as c, NULL as d, 1 as id UNION SELECT 'Кирилл' as a, " |
| 434 | "'Müller' as b, '我是谁' as c, 'ASCII' as d, 2 as id ORDER BY 1" ); |
| 435 | REQUIRE(CHECK_COLUMN(result, 0, {Value(), "Кирилл" })); |
| 436 | REQUIRE(CHECK_COLUMN(result, 1, {Value(), "Müller" })); |
| 437 | REQUIRE(CHECK_COLUMN(result, 2, {Value(), "我是谁" })); |
| 438 | REQUIRE(CHECK_COLUMN(result, 3, {Value(), "ASCII" })); |
| 439 | REQUIRE(CHECK_COLUMN(result, 4, {1, 2})); |
| 440 | |
| 441 | REQUIRE_NO_FAIL(con.Query("CREATE TABLE test (a INTEGER, s VARCHAR);" )); |
| 442 | REQUIRE_NO_FAIL(con.Query("INSERT INTO test VALUES (11, 'hello'), (12, 'world'), (11, NULL)" )); |
| 443 | |
| 444 | // scalar aggregation on string |
| 445 | result = con.Query("SELECT COUNT(*), COUNT(s) FROM test;" ); |
| 446 | REQUIRE(CHECK_COLUMN(result, 0, {3})); |
| 447 | REQUIRE(CHECK_COLUMN(result, 1, {2})); |
| 448 | |
| 449 | // grouped aggregation on string |
| 450 | result = con.Query("SELECT a, COUNT(*), COUNT(s) FROM test GROUP BY a ORDER BY a;" ); |
| 451 | REQUIRE(CHECK_COLUMN(result, 0, {11, 12})); |
| 452 | REQUIRE(CHECK_COLUMN(result, 1, {2, 1})); |
| 453 | REQUIRE(CHECK_COLUMN(result, 2, {1, 1})); |
| 454 | |
| 455 | // group by the strings |
| 456 | result = con.Query("SELECT s, SUM(a) FROM test GROUP BY s ORDER BY s;" ); |
| 457 | REQUIRE(CHECK_COLUMN(result, 0, {Value(), "hello" , "world" })); |
| 458 | REQUIRE(CHECK_COLUMN(result, 1, {11, 11, 12})); |
| 459 | |
| 460 | // distinct aggregations ons tring |
| 461 | REQUIRE_NO_FAIL(con.Query("INSERT INTO test VALUES (11, 'hello'), (12, 'world')" )); |
| 462 | |
| 463 | // scalar distinct |
| 464 | result = con.Query("SELECT COUNT(*), COUNT(s), COUNT(DISTINCT s) FROM test;" ); |
| 465 | REQUIRE(CHECK_COLUMN(result, 0, {5})); |
| 466 | REQUIRE(CHECK_COLUMN(result, 1, {4})); |
| 467 | REQUIRE(CHECK_COLUMN(result, 2, {2})); |
| 468 | |
| 469 | // grouped distinct |
| 470 | result = con.Query("SELECT a, COUNT(*), COUNT(s), COUNT(DISTINCT s) FROM test GROUP BY a ORDER BY a;" ); |
| 471 | REQUIRE(CHECK_COLUMN(result, 0, {11, 12})); |
| 472 | REQUIRE(CHECK_COLUMN(result, 1, {3, 2})); |
| 473 | REQUIRE(CHECK_COLUMN(result, 2, {2, 2})); |
| 474 | REQUIRE(CHECK_COLUMN(result, 3, {1, 1})); |
| 475 | |
| 476 | // now with WHERE clause |
| 477 | result = con.Query( |
| 478 | "SELECT a, COUNT(*), COUNT(s), COUNT(DISTINCT s) FROM test WHERE s IS NOT NULL GROUP BY a ORDER BY a;" ); |
| 479 | REQUIRE(CHECK_COLUMN(result, 0, {11, 12})); |
| 480 | REQUIRE(CHECK_COLUMN(result, 1, {2, 2})); |
| 481 | REQUIRE(CHECK_COLUMN(result, 2, {2, 2})); |
| 482 | REQUIRE(CHECK_COLUMN(result, 3, {1, 1})); |
| 483 | } |
| 484 | |