1//===----------------------------------------------------------------------===//
2// DuckDB
3//
4// duckdb/storage/statistics/distinct_statistics.hpp
5//
6//
7//===----------------------------------------------------------------------===//
8
9#pragma once
10
11#include "duckdb/common/atomic.hpp"
12#include "duckdb/common/types/hyperloglog.hpp"
13#include "duckdb/storage/statistics/base_statistics.hpp"
14
15namespace duckdb {
16class Serializer;
17class Deserializer;
18class Vector;
19
20class DistinctStatistics {
21public:
22 DistinctStatistics();
23 explicit DistinctStatistics(unique_ptr<HyperLogLog> log, idx_t sample_count, idx_t total_count);
24
25 //! The HLL of the table
26 unique_ptr<HyperLogLog> log;
27 //! How many values have been sampled into the HLL
28 atomic<idx_t> sample_count;
29 //! How many values have been inserted (before sampling)
30 atomic<idx_t> total_count;
31
32public:
33 void Merge(const DistinctStatistics &other);
34
35 unique_ptr<DistinctStatistics> Copy() const;
36
37 void Serialize(Serializer &serializer) const;
38 void Serialize(FieldWriter &writer) const;
39
40 static unique_ptr<DistinctStatistics> Deserialize(Deserializer &source);
41 static unique_ptr<DistinctStatistics> Deserialize(FieldReader &reader);
42
43 void Update(Vector &update, idx_t count, bool sample = true);
44 void Update(UnifiedVectorFormat &update_data, const LogicalType &ptype, idx_t count, bool sample = true);
45
46 string ToString() const;
47 idx_t GetCount() const;
48
49 static bool TypeIsSupported(const LogicalType &type);
50
51private:
52 //! For distinct statistics we sample the input to speed up insertions
53 static constexpr const double SAMPLE_RATE = 0.1;
54};
55
56} // namespace duckdb
57