1#include "duckdb/optimizer/statistics_propagator.hpp"
2#include "duckdb/planner/expression/bound_comparison_expression.hpp"
3#include "duckdb/planner/expression/bound_constant_expression.hpp"
4#include "duckdb/optimizer/expression_rewriter.hpp"
5
6namespace duckdb {
7
8FilterPropagateResult StatisticsPropagator::PropagateComparison(BaseStatistics &lstats, BaseStatistics &rstats,
9 ExpressionType comparison) {
10 // only handle numerics for now
11 switch (lstats.GetType().InternalType()) {
12 case PhysicalType::BOOL:
13 case PhysicalType::UINT8:
14 case PhysicalType::UINT16:
15 case PhysicalType::UINT32:
16 case PhysicalType::UINT64:
17 case PhysicalType::INT8:
18 case PhysicalType::INT16:
19 case PhysicalType::INT32:
20 case PhysicalType::INT64:
21 case PhysicalType::INT128:
22 case PhysicalType::FLOAT:
23 case PhysicalType::DOUBLE:
24 break;
25 default:
26 return FilterPropagateResult::NO_PRUNING_POSSIBLE;
27 }
28 if (!NumericStats::HasMinMax(stats: lstats) || !NumericStats::HasMinMax(stats: rstats)) {
29 // no stats available: nothing to prune
30 return FilterPropagateResult::NO_PRUNING_POSSIBLE;
31 }
32 // the result of the propagation depend on whether or not either side has null values
33 // if there are null values present, we cannot say whether or not
34 bool has_null = lstats.CanHaveNull() || rstats.CanHaveNull();
35 switch (comparison) {
36 case ExpressionType::COMPARE_EQUAL:
37 // l = r, if l.min > r.max or r.min > l.max equality is not possible
38 if (NumericStats::Min(stats: lstats) > NumericStats::Max(stats: rstats) ||
39 NumericStats::Min(stats: rstats) > NumericStats::Max(stats: lstats)) {
40 return has_null ? FilterPropagateResult::FILTER_FALSE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_FALSE;
41 } else {
42 return FilterPropagateResult::NO_PRUNING_POSSIBLE;
43 }
44 case ExpressionType::COMPARE_GREATERTHAN:
45 // l > r
46 if (NumericStats::Min(stats: lstats) > NumericStats::Max(stats: rstats)) {
47 // if l.min > r.max, it is always true ONLY if neither side contains nulls
48 return has_null ? FilterPropagateResult::FILTER_TRUE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_TRUE;
49 }
50 // if r.min is bigger or equal to l.max, the filter is always false
51 if (NumericStats::Min(stats: rstats) >= NumericStats::Max(stats: lstats)) {
52 return has_null ? FilterPropagateResult::FILTER_FALSE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_FALSE;
53 }
54 return FilterPropagateResult::NO_PRUNING_POSSIBLE;
55 case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
56 // l >= r
57 if (NumericStats::Min(stats: lstats) >= NumericStats::Max(stats: rstats)) {
58 // if l.min >= r.max, it is always true ONLY if neither side contains nulls
59 return has_null ? FilterPropagateResult::FILTER_TRUE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_TRUE;
60 }
61 // if r.min > l.max, the filter is always false
62 if (NumericStats::Min(stats: rstats) > NumericStats::Max(stats: lstats)) {
63 return has_null ? FilterPropagateResult::FILTER_FALSE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_FALSE;
64 }
65 return FilterPropagateResult::NO_PRUNING_POSSIBLE;
66 case ExpressionType::COMPARE_LESSTHAN:
67 // l < r
68 if (NumericStats::Max(stats: lstats) < NumericStats::Min(stats: rstats)) {
69 // if l.max < r.min, it is always true ONLY if neither side contains nulls
70 return has_null ? FilterPropagateResult::FILTER_TRUE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_TRUE;
71 }
72 // if l.min >= rstats.max, the filter is always false
73 if (NumericStats::Min(stats: lstats) >= NumericStats::Max(stats: rstats)) {
74 return has_null ? FilterPropagateResult::FILTER_FALSE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_FALSE;
75 }
76 return FilterPropagateResult::NO_PRUNING_POSSIBLE;
77 case ExpressionType::COMPARE_LESSTHANOREQUALTO:
78 // l <= r
79 if (NumericStats::Max(stats: lstats) <= NumericStats::Min(stats: rstats)) {
80 // if l.max <= r.min, it is always true ONLY if neither side contains nulls
81 return has_null ? FilterPropagateResult::FILTER_TRUE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_TRUE;
82 }
83 // if l.min > rstats.max, the filter is always false
84 if (NumericStats::Min(stats: lstats) > NumericStats::Max(stats: rstats)) {
85 return has_null ? FilterPropagateResult::FILTER_FALSE_OR_NULL : FilterPropagateResult::FILTER_ALWAYS_FALSE;
86 }
87 return FilterPropagateResult::NO_PRUNING_POSSIBLE;
88 default:
89 return FilterPropagateResult::NO_PRUNING_POSSIBLE;
90 }
91}
92
93unique_ptr<BaseStatistics> StatisticsPropagator::PropagateExpression(BoundComparisonExpression &expr,
94 unique_ptr<Expression> *expr_ptr) {
95 auto left_stats = PropagateExpression(expr&: expr.left);
96 auto right_stats = PropagateExpression(expr&: expr.right);
97 if (!left_stats || !right_stats) {
98 return nullptr;
99 }
100 // propagate the statistics of the comparison operator
101 auto propagate_result = PropagateComparison(lstats&: *left_stats, rstats&: *right_stats, comparison: expr.type);
102 switch (propagate_result) {
103 case FilterPropagateResult::FILTER_ALWAYS_TRUE:
104 *expr_ptr = make_uniq<BoundConstantExpression>(args: Value::BOOLEAN(value: true));
105 return PropagateExpression(expr&: *expr_ptr);
106 case FilterPropagateResult::FILTER_ALWAYS_FALSE:
107 *expr_ptr = make_uniq<BoundConstantExpression>(args: Value::BOOLEAN(value: false));
108 return PropagateExpression(expr&: *expr_ptr);
109 case FilterPropagateResult::FILTER_TRUE_OR_NULL: {
110 vector<unique_ptr<Expression>> children;
111 children.push_back(x: std::move(expr.left));
112 children.push_back(x: std::move(expr.right));
113 *expr_ptr = ExpressionRewriter::ConstantOrNull(children: std::move(children), value: Value::BOOLEAN(value: true));
114 return nullptr;
115 }
116 case FilterPropagateResult::FILTER_FALSE_OR_NULL: {
117 vector<unique_ptr<Expression>> children;
118 children.push_back(x: std::move(expr.left));
119 children.push_back(x: std::move(expr.right));
120 *expr_ptr = ExpressionRewriter::ConstantOrNull(children: std::move(children), value: Value::BOOLEAN(value: false));
121 return nullptr;
122 }
123 default:
124 // FIXME: we can propagate nulls here, i.e. this expression will have nulls only if left and right has nulls
125 return nullptr;
126 }
127}
128
129} // namespace duckdb
130