| 1 | #include "catch.hpp" | 
|---|---|
| 2 | #include "duckdb/common/types/hyperloglog.hpp" | 
| 3 | |
| 4 | #include <vector> | 
| 5 | |
| 6 | using namespace duckdb; | 
| 7 | using namespace std; | 
| 8 | |
| 9 | TEST_CASE( "Test that hyperloglog works", "[hyperloglog]") { | 
| 10 | HyperLogLog log; | 
| 11 | // add a million elements of the same value | 
| 12 | int x = 4; | 
| 13 | for (size_t i = 0; i < 1000000; i++) { | 
| 14 | log.Add((uint8_t *)&x, sizeof(int)); | 
| 15 | } | 
| 16 | REQUIRE(log.Count() == 1); | 
| 17 | |
| 18 | // now add a million different values | 
| 19 | HyperLogLog log2; | 
| 20 | for (size_t i = 0; i < 1000000; i++) { | 
| 21 | x = i; | 
| 22 | log2.Add((uint8_t *)&x, sizeof(int)); | 
| 23 | } | 
| 24 | // the count is approximate, but should be pretty close to a million | 
| 25 | size_t count = log2.Count(); | 
| 26 | REQUIRE(count > 999000LL); | 
| 27 | REQUIRE(count < 1001000LL); | 
| 28 | |
| 29 | // now we can merge the HLLs | 
| 30 | auto new_log = log.Merge(log2); | 
| 31 | // the count should be pretty much the same | 
| 32 | count = new_log->Count(); | 
| 33 | REQUIRE(count > 999000LL); | 
| 34 | REQUIRE(count < 1001000LL); | 
| 35 | |
| 36 | // now test composability of the merge | 
| 37 | // add everything to one big one | 
| 38 | // add chunks to small ones and then merge them | 
| 39 | // the result should be the same | 
| 40 | HyperLogLog big; | 
| 41 | HyperLogLog small[16]; | 
| 42 | for (size_t i = 0; i < 1000000; i++) { | 
| 43 | x = ((2 * i) + 3) % (i + 3 / 2); | 
| 44 | big.Add((uint8_t *)&x, sizeof(int)); | 
| 45 | small[i % 16].Add((uint8_t *)&x, sizeof(int)); | 
| 46 | } | 
| 47 | // now merge them into one big HyperLogLog | 
| 48 | auto merged = HyperLogLog::Merge(small, 16); | 
| 49 | // the result should be identical to the big one | 
| 50 | REQUIRE(merged->Count() == big.Count()); | 
| 51 | } | 
| 52 | 
