1 | #include "catch.hpp" |
---|---|
2 | #include "duckdb/common/types/hyperloglog.hpp" |
3 | |
4 | #include <vector> |
5 | |
6 | using namespace duckdb; |
7 | using namespace std; |
8 | |
9 | TEST_CASE("Test that hyperloglog works", "[hyperloglog]") { |
10 | HyperLogLog log; |
11 | // add a million elements of the same value |
12 | int x = 4; |
13 | for (size_t i = 0; i < 1000000; i++) { |
14 | log.Add((uint8_t *)&x, sizeof(int)); |
15 | } |
16 | REQUIRE(log.Count() == 1); |
17 | |
18 | // now add a million different values |
19 | HyperLogLog log2; |
20 | for (size_t i = 0; i < 1000000; i++) { |
21 | x = i; |
22 | log2.Add((uint8_t *)&x, sizeof(int)); |
23 | } |
24 | // the count is approximate, but should be pretty close to a million |
25 | size_t count = log2.Count(); |
26 | REQUIRE(count > 999000LL); |
27 | REQUIRE(count < 1001000LL); |
28 | |
29 | // now we can merge the HLLs |
30 | auto new_log = log.Merge(log2); |
31 | // the count should be pretty much the same |
32 | count = new_log->Count(); |
33 | REQUIRE(count > 999000LL); |
34 | REQUIRE(count < 1001000LL); |
35 | |
36 | // now test composability of the merge |
37 | // add everything to one big one |
38 | // add chunks to small ones and then merge them |
39 | // the result should be the same |
40 | HyperLogLog big; |
41 | HyperLogLog small[16]; |
42 | for (size_t i = 0; i < 1000000; i++) { |
43 | x = ((2 * i) + 3) % (i + 3 / 2); |
44 | big.Add((uint8_t *)&x, sizeof(int)); |
45 | small[i % 16].Add((uint8_t *)&x, sizeof(int)); |
46 | } |
47 | // now merge them into one big HyperLogLog |
48 | auto merged = HyperLogLog::Merge(small, 16); |
49 | // the result should be identical to the big one |
50 | REQUIRE(merged->Count() == big.Count()); |
51 | } |
52 |