| 1 | #include "catch.hpp" |
|---|---|
| 2 | #include "duckdb/common/types/hyperloglog.hpp" |
| 3 | |
| 4 | #include <vector> |
| 5 | |
| 6 | using namespace duckdb; |
| 7 | using namespace std; |
| 8 | |
| 9 | TEST_CASE("Test that hyperloglog works", "[hyperloglog]") { |
| 10 | HyperLogLog log; |
| 11 | // add a million elements of the same value |
| 12 | int x = 4; |
| 13 | for (size_t i = 0; i < 1000000; i++) { |
| 14 | log.Add((uint8_t *)&x, sizeof(int)); |
| 15 | } |
| 16 | REQUIRE(log.Count() == 1); |
| 17 | |
| 18 | // now add a million different values |
| 19 | HyperLogLog log2; |
| 20 | for (size_t i = 0; i < 1000000; i++) { |
| 21 | x = i; |
| 22 | log2.Add((uint8_t *)&x, sizeof(int)); |
| 23 | } |
| 24 | // the count is approximate, but should be pretty close to a million |
| 25 | size_t count = log2.Count(); |
| 26 | REQUIRE(count > 999000LL); |
| 27 | REQUIRE(count < 1001000LL); |
| 28 | |
| 29 | // now we can merge the HLLs |
| 30 | auto new_log = log.Merge(log2); |
| 31 | // the count should be pretty much the same |
| 32 | count = new_log->Count(); |
| 33 | REQUIRE(count > 999000LL); |
| 34 | REQUIRE(count < 1001000LL); |
| 35 | |
| 36 | // now test composability of the merge |
| 37 | // add everything to one big one |
| 38 | // add chunks to small ones and then merge them |
| 39 | // the result should be the same |
| 40 | HyperLogLog big; |
| 41 | HyperLogLog small[16]; |
| 42 | for (size_t i = 0; i < 1000000; i++) { |
| 43 | x = ((2 * i) + 3) % (i + 3 / 2); |
| 44 | big.Add((uint8_t *)&x, sizeof(int)); |
| 45 | small[i % 16].Add((uint8_t *)&x, sizeof(int)); |
| 46 | } |
| 47 | // now merge them into one big HyperLogLog |
| 48 | auto merged = HyperLogLog::Merge(small, 16); |
| 49 | // the result should be identical to the big one |
| 50 | REQUIRE(merged->Count() == big.Count()); |
| 51 | } |
| 52 |