1//===----------------------------------------------------------------------===//
2// DuckDB
3//
4// duckdb/storage/statistics/base_statistics.hpp
5//
6//
7//===----------------------------------------------------------------------===//
8
9#pragma once
10
11#include "duckdb/common/common.hpp"
12#include "duckdb/common/types.hpp"
13#include "duckdb/common/operator/comparison_operators.hpp"
14#include "duckdb/common/enums/expression_type.hpp"
15#include "duckdb/common/types/value.hpp"
16#include "duckdb/storage/statistics/numeric_stats.hpp"
17#include "duckdb/storage/statistics/string_stats.hpp"
18
19namespace duckdb {
20struct SelectionVector;
21
22class Serializer;
23class Deserializer;
24class FieldWriter;
25class FieldReader;
26class Vector;
27struct UnifiedVectorFormat;
28
29enum class StatsInfo : uint8_t {
30 CAN_HAVE_NULL_VALUES = 0,
31 CANNOT_HAVE_NULL_VALUES = 1,
32 CAN_HAVE_VALID_VALUES = 2,
33 CANNOT_HAVE_VALID_VALUES = 3,
34 CAN_HAVE_NULL_AND_VALID_VALUES = 4
35};
36
37enum class StatisticsType : uint8_t { NUMERIC_STATS, STRING_STATS, LIST_STATS, STRUCT_STATS, BASE_STATS };
38
39class BaseStatistics {
40 friend struct NumericStats;
41 friend struct StringStats;
42 friend struct StructStats;
43 friend struct ListStats;
44
45public:
46 DUCKDB_API ~BaseStatistics();
47 // disable copy constructors
48 BaseStatistics(const BaseStatistics &other) = delete;
49 BaseStatistics &operator=(const BaseStatistics &) = delete;
50 //! enable move constructors
51 DUCKDB_API BaseStatistics(BaseStatistics &&other) noexcept;
52 DUCKDB_API BaseStatistics &operator=(BaseStatistics &&) noexcept;
53
54public:
55 //! Creates a set of statistics for data that is unknown, i.e. "has_null" is true, "has_no_null" is true, etc
56 //! This can be used in case nothing is known about the data - or can be used as a baseline when only a few things
57 //! are known
58 static BaseStatistics CreateUnknown(LogicalType type);
59 //! Creates statistics for an empty database, i.e. "has_null" is false, "has_no_null" is false, etc
60 //! This is used when incrementally constructing statistics by constantly adding new values
61 static BaseStatistics CreateEmpty(LogicalType type);
62
63 DUCKDB_API StatisticsType GetStatsType() const;
64 DUCKDB_API static StatisticsType GetStatsType(const LogicalType &type);
65
66 DUCKDB_API bool CanHaveNull() const;
67 DUCKDB_API bool CanHaveNoNull() const;
68
69 void SetDistinctCount(idx_t distinct_count);
70
71 bool IsConstant() const;
72
73 const LogicalType &GetType() const {
74 return type;
75 }
76
77 void Set(StatsInfo info);
78 void CombineValidity(BaseStatistics &left, BaseStatistics &right);
79 void CopyValidity(BaseStatistics &stats);
80 inline void SetHasNull() {
81 has_null = true;
82 }
83 inline void SetHasNoNull() {
84 has_no_null = true;
85 }
86
87 void Merge(const BaseStatistics &other);
88
89 void Copy(const BaseStatistics &other);
90
91 BaseStatistics Copy() const;
92 unique_ptr<BaseStatistics> ToUnique() const;
93 void CopyBase(const BaseStatistics &orig);
94
95 void Serialize(Serializer &serializer) const;
96 void Serialize(FieldWriter &writer) const;
97
98 idx_t GetDistinctCount();
99
100 static BaseStatistics Deserialize(Deserializer &source, LogicalType type);
101
102 //! Verify that a vector does not violate the statistics
103 void Verify(Vector &vector, const SelectionVector &sel, idx_t count) const;
104 void Verify(Vector &vector, idx_t count) const;
105
106 string ToString() const;
107
108 static BaseStatistics FromConstant(const Value &input);
109
110private:
111 BaseStatistics();
112 explicit BaseStatistics(LogicalType type);
113
114 static void Construct(BaseStatistics &stats, LogicalType type);
115
116 void InitializeUnknown();
117 void InitializeEmpty();
118
119 static BaseStatistics CreateUnknownType(LogicalType type);
120 static BaseStatistics CreateEmptyType(LogicalType type);
121 static BaseStatistics DeserializeType(FieldReader &reader, LogicalType type);
122 static BaseStatistics FromConstantType(const Value &input);
123
124private:
125 //! The type of the logical segment
126 LogicalType type;
127 //! Whether or not the segment can contain NULL values
128 bool has_null;
129 //! Whether or not the segment can contain values that are not null
130 bool has_no_null;
131 // estimate that one may have even if distinct_stats==nullptr
132 idx_t distinct_count;
133 //! Numeric and String stats
134 union {
135 //! Numeric stats data, for numeric stats
136 NumericStatsData numeric_data;
137 //! String stats data, for string stats
138 StringStatsData string_data;
139 } stats_union;
140 //! Child stats (for LIST and STRUCT)
141 unsafe_unique_array<BaseStatistics> child_stats;
142};
143
144} // namespace duckdb
145