1#include "catch.hpp"
2#include "duckdb/common/types/hyperloglog.hpp"
3
4#include <vector>
5
6using namespace duckdb;
7using namespace std;
8
9TEST_CASE("Test that hyperloglog works", "[hyperloglog]") {
10 HyperLogLog log;
11 // add a million elements of the same value
12 int x = 4;
13 for (size_t i = 0; i < 1000000; i++) {
14 log.Add((uint8_t *)&x, sizeof(int));
15 }
16 REQUIRE(log.Count() == 1);
17
18 // now add a million different values
19 HyperLogLog log2;
20 for (size_t i = 0; i < 1000000; i++) {
21 x = i;
22 log2.Add((uint8_t *)&x, sizeof(int));
23 }
24 // the count is approximate, but should be pretty close to a million
25 size_t count = log2.Count();
26 REQUIRE(count > 999000LL);
27 REQUIRE(count < 1001000LL);
28
29 // now we can merge the HLLs
30 auto new_log = log.Merge(log2);
31 // the count should be pretty much the same
32 count = new_log->Count();
33 REQUIRE(count > 999000LL);
34 REQUIRE(count < 1001000LL);
35
36 // now test composability of the merge
37 // add everything to one big one
38 // add chunks to small ones and then merge them
39 // the result should be the same
40 HyperLogLog big;
41 HyperLogLog small[16];
42 for (size_t i = 0; i < 1000000; i++) {
43 x = ((2 * i) + 3) % (i + 3 / 2);
44 big.Add((uint8_t *)&x, sizeof(int));
45 small[i % 16].Add((uint8_t *)&x, sizeof(int));
46 }
47 // now merge them into one big HyperLogLog
48 auto merged = HyperLogLog::Merge(small, 16);
49 // the result should be identical to the big one
50 REQUIRE(merged->Count() == big.Count());
51}
52