1 | //===----------------------------------------------------------------------===// |
2 | // DuckDB |
3 | // |
4 | // duckdb/storage/statistics/string_stats.hpp |
5 | // |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #pragma once |
10 | |
11 | #include "duckdb/common/common.hpp" |
12 | #include "duckdb/common/exception.hpp" |
13 | #include "duckdb/common/types/hugeint.hpp" |
14 | #include "duckdb/common/enums/filter_propagate_result.hpp" |
15 | #include "duckdb/common/enums/expression_type.hpp" |
16 | #include "duckdb/common/operator/comparison_operators.hpp" |
17 | |
18 | namespace duckdb { |
19 | class BaseStatistics; |
20 | class FieldWriter; |
21 | class FieldReader; |
22 | struct SelectionVector; |
23 | class Vector; |
24 | |
25 | struct StringStatsData { |
26 | constexpr static uint32_t MAX_STRING_MINMAX_SIZE = 8; |
27 | |
28 | //! The minimum value of the segment, potentially truncated |
29 | data_t min[MAX_STRING_MINMAX_SIZE]; |
30 | //! The maximum value of the segment, potentially truncated |
31 | data_t max[MAX_STRING_MINMAX_SIZE]; |
32 | //! Whether or not the column can contain unicode characters |
33 | bool has_unicode; |
34 | //! Whether or not the maximum string length is known |
35 | bool has_max_string_length; |
36 | //! The maximum string length in bytes |
37 | uint32_t max_string_length; |
38 | }; |
39 | |
40 | struct StringStats { |
41 | //! Unknown statistics - i.e. "has_unicode" is true, "max_string_length" is unknown, "min" is \0, max is \xFF |
42 | DUCKDB_API static BaseStatistics CreateUnknown(LogicalType type); |
43 | //! Empty statistics - i.e. "has_unicode" is false, "max_string_length" is 0, "min" is \xFF, max is \x00 |
44 | DUCKDB_API static BaseStatistics CreateEmpty(LogicalType type); |
45 | //! Whether or not the statistics have a maximum string length defined |
46 | DUCKDB_API static bool HasMaxStringLength(const BaseStatistics &stats); |
47 | //! Returns the maximum string length, or throws an exception if !HasMaxStringLength() |
48 | DUCKDB_API static uint32_t MaxStringLength(const BaseStatistics &stats); |
49 | //! Whether or not the strings can contain unicode |
50 | DUCKDB_API static bool CanContainUnicode(const BaseStatistics &stats); |
51 | |
52 | //! Resets the max string length so HasMaxStringLength() is false |
53 | DUCKDB_API static void ResetMaxStringLength(BaseStatistics &stats); |
54 | //! FIXME: make this part of Set on statistics |
55 | DUCKDB_API static void SetContainsUnicode(BaseStatistics &stats); |
56 | |
57 | DUCKDB_API static void Serialize(const BaseStatistics &stats, FieldWriter &writer); |
58 | DUCKDB_API static BaseStatistics Deserialize(FieldReader &reader, LogicalType type); |
59 | |
60 | DUCKDB_API static string ToString(const BaseStatistics &stats); |
61 | |
62 | DUCKDB_API static FilterPropagateResult CheckZonemap(const BaseStatistics &stats, ExpressionType comparison_type, |
63 | const string &value); |
64 | |
65 | DUCKDB_API static void Update(BaseStatistics &stats, const string_t &value); |
66 | DUCKDB_API static void Merge(BaseStatistics &stats, const BaseStatistics &other); |
67 | DUCKDB_API static void Verify(const BaseStatistics &stats, Vector &vector, const SelectionVector &sel, idx_t count); |
68 | |
69 | private: |
70 | static StringStatsData &GetDataUnsafe(BaseStatistics &stats); |
71 | static const StringStatsData &GetDataUnsafe(const BaseStatistics &stats); |
72 | }; |
73 | |
74 | } // namespace duckdb |
75 | |