| 1 | //===----------------------------------------------------------------------===// |
| 2 | // DuckDB |
| 3 | // |
| 4 | // duckdb/optimizer/statistics_propagator.hpp |
| 5 | // |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #pragma once |
| 10 | |
| 11 | #include "duckdb/common/common.hpp" |
| 12 | #include "duckdb/common/enums/filter_propagate_result.hpp" |
| 13 | #include "duckdb/common/types/value.hpp" |
| 14 | #include "duckdb/planner/bound_tokens.hpp" |
| 15 | #include "duckdb/planner/column_binding_map.hpp" |
| 16 | #include "duckdb/planner/logical_tokens.hpp" |
| 17 | #include "duckdb/storage/statistics/base_statistics.hpp" |
| 18 | #include "duckdb/storage/statistics/node_statistics.hpp" |
| 19 | |
| 20 | namespace duckdb { |
| 21 | class ClientContext; |
| 22 | class LogicalOperator; |
| 23 | class TableFilter; |
| 24 | struct BoundOrderByNode; |
| 25 | |
| 26 | class StatisticsPropagator { |
| 27 | public: |
| 28 | explicit StatisticsPropagator(ClientContext &context); |
| 29 | |
| 30 | unique_ptr<NodeStatistics> PropagateStatistics(unique_ptr<LogicalOperator> &node_ptr); |
| 31 | |
| 32 | private: |
| 33 | //! Propagate statistics through an operator |
| 34 | unique_ptr<NodeStatistics> PropagateStatistics(LogicalOperator &node, unique_ptr<LogicalOperator> *node_ptr); |
| 35 | |
| 36 | unique_ptr<NodeStatistics> PropagateStatistics(LogicalFilter &op, unique_ptr<LogicalOperator> *node_ptr); |
| 37 | unique_ptr<NodeStatistics> PropagateStatistics(LogicalGet &op, unique_ptr<LogicalOperator> *node_ptr); |
| 38 | unique_ptr<NodeStatistics> PropagateStatistics(LogicalJoin &op, unique_ptr<LogicalOperator> *node_ptr); |
| 39 | unique_ptr<NodeStatistics> PropagateStatistics(LogicalPositionalJoin &op, unique_ptr<LogicalOperator> *node_ptr); |
| 40 | unique_ptr<NodeStatistics> PropagateStatistics(LogicalProjection &op, unique_ptr<LogicalOperator> *node_ptr); |
| 41 | void PropagateStatistics(LogicalComparisonJoin &op, unique_ptr<LogicalOperator> *node_ptr); |
| 42 | void PropagateStatistics(LogicalAnyJoin &op, unique_ptr<LogicalOperator> *node_ptr); |
| 43 | unique_ptr<NodeStatistics> PropagateStatistics(LogicalSetOperation &op, unique_ptr<LogicalOperator> *node_ptr); |
| 44 | unique_ptr<NodeStatistics> PropagateStatistics(LogicalAggregate &op, unique_ptr<LogicalOperator> *node_ptr); |
| 45 | unique_ptr<NodeStatistics> PropagateStatistics(LogicalCrossProduct &op, unique_ptr<LogicalOperator> *node_ptr); |
| 46 | unique_ptr<NodeStatistics> PropagateStatistics(LogicalLimit &op, unique_ptr<LogicalOperator> *node_ptr); |
| 47 | unique_ptr<NodeStatistics> PropagateStatistics(LogicalOrder &op, unique_ptr<LogicalOperator> *node_ptr); |
| 48 | unique_ptr<NodeStatistics> PropagateStatistics(LogicalWindow &op, unique_ptr<LogicalOperator> *node_ptr); |
| 49 | |
| 50 | unique_ptr<NodeStatistics> PropagateChildren(LogicalOperator &node, unique_ptr<LogicalOperator> *node_ptr); |
| 51 | |
| 52 | //! Return statistics from a constant value |
| 53 | unique_ptr<BaseStatistics> StatisticsFromValue(const Value &input); |
| 54 | //! Run a comparison with two sets of statistics, returns if the comparison will always returns true/false or not |
| 55 | FilterPropagateResult PropagateComparison(BaseStatistics &left, BaseStatistics &right, ExpressionType comparison); |
| 56 | |
| 57 | //! Update filter statistics from a filter with a constant |
| 58 | void UpdateFilterStatistics(BaseStatistics &input, ExpressionType comparison_type, const Value &constant); |
| 59 | //! Update statistics from a filter between two stats |
| 60 | void UpdateFilterStatistics(BaseStatistics &lstats, BaseStatistics &rstats, ExpressionType comparison_type); |
| 61 | //! Update filter statistics from a generic comparison |
| 62 | void UpdateFilterStatistics(Expression &left, Expression &right, ExpressionType comparison_type); |
| 63 | //! Update filter statistics from an expression |
| 64 | void UpdateFilterStatistics(Expression &condition); |
| 65 | //! Set the statistics of a specific column binding to not contain null values |
| 66 | void SetStatisticsNotNull(ColumnBinding binding); |
| 67 | |
| 68 | //! Run a comparison between the statistics and the table filter; returns the prune result |
| 69 | FilterPropagateResult PropagateTableFilter(BaseStatistics &stats, TableFilter &filter); |
| 70 | //! Update filter statistics from a TableFilter |
| 71 | void UpdateFilterStatistics(BaseStatistics &input, TableFilter &filter); |
| 72 | |
| 73 | //! Add cardinalities together (i.e. new max is stats.max + new_stats.max): used for union |
| 74 | void AddCardinalities(unique_ptr<NodeStatistics> &stats, NodeStatistics &new_stats); |
| 75 | //! Multiply the cardinalities together (i.e. new max cardinality is stats.max * new_stats.max): used for |
| 76 | //! joins/cross products |
| 77 | void MultiplyCardinalities(unique_ptr<NodeStatistics> &stats, NodeStatistics &new_stats); |
| 78 | |
| 79 | unique_ptr<BaseStatistics> PropagateExpression(unique_ptr<Expression> &expr); |
| 80 | unique_ptr<BaseStatistics> PropagateExpression(Expression &expr, unique_ptr<Expression> *expr_ptr); |
| 81 | |
| 82 | unique_ptr<BaseStatistics> PropagateExpression(BoundAggregateExpression &expr, unique_ptr<Expression> *expr_ptr); |
| 83 | unique_ptr<BaseStatistics> PropagateExpression(BoundBetweenExpression &expr, unique_ptr<Expression> *expr_ptr); |
| 84 | unique_ptr<BaseStatistics> PropagateExpression(BoundCaseExpression &expr, unique_ptr<Expression> *expr_ptr); |
| 85 | unique_ptr<BaseStatistics> PropagateExpression(BoundCastExpression &expr, unique_ptr<Expression> *expr_ptr); |
| 86 | unique_ptr<BaseStatistics> PropagateExpression(BoundConjunctionExpression &expr, unique_ptr<Expression> *expr_ptr); |
| 87 | unique_ptr<BaseStatistics> PropagateExpression(BoundFunctionExpression &expr, unique_ptr<Expression> *expr_ptr); |
| 88 | unique_ptr<BaseStatistics> PropagateExpression(BoundComparisonExpression &expr, unique_ptr<Expression> *expr_ptr); |
| 89 | unique_ptr<BaseStatistics> PropagateExpression(BoundConstantExpression &expr, unique_ptr<Expression> *expr_ptr); |
| 90 | unique_ptr<BaseStatistics> PropagateExpression(BoundColumnRefExpression &expr, unique_ptr<Expression> *expr_ptr); |
| 91 | unique_ptr<BaseStatistics> PropagateExpression(BoundOperatorExpression &expr, unique_ptr<Expression> *expr_ptr); |
| 92 | |
| 93 | void PropagateAndCompress(unique_ptr<Expression> &expr, unique_ptr<BaseStatistics> &stats); |
| 94 | |
| 95 | void ReplaceWithEmptyResult(unique_ptr<LogicalOperator> &node); |
| 96 | |
| 97 | bool ExpressionIsConstant(Expression &expr, const Value &val); |
| 98 | bool ExpressionIsConstantOrNull(Expression &expr, const Value &val); |
| 99 | |
| 100 | private: |
| 101 | ClientContext &context; |
| 102 | //! The map of ColumnBinding -> statistics for the various nodes |
| 103 | column_binding_map_t<unique_ptr<BaseStatistics>> statistics_map; |
| 104 | //! Node stats for the current node |
| 105 | unique_ptr<NodeStatistics> node_stats; |
| 106 | }; |
| 107 | |
| 108 | } // namespace duckdb |
| 109 | |