1 | #include "duckdb/optimizer/statistics_propagator.hpp" |
---|---|
2 | #include "duckdb/planner/operator/logical_set_operation.hpp" |
3 | |
4 | namespace duckdb { |
5 | |
6 | void StatisticsPropagator::AddCardinalities(unique_ptr<NodeStatistics> &stats, NodeStatistics &new_stats) { |
7 | if (!stats->has_estimated_cardinality || !new_stats.has_estimated_cardinality || !stats->has_max_cardinality || |
8 | !new_stats.has_max_cardinality) { |
9 | stats = nullptr; |
10 | return; |
11 | } |
12 | stats->estimated_cardinality += new_stats.estimated_cardinality; |
13 | auto new_max = Hugeint::Add(lhs: stats->max_cardinality, rhs: new_stats.max_cardinality); |
14 | if (new_max < NumericLimits<int64_t>::Maximum()) { |
15 | int64_t result; |
16 | if (!Hugeint::TryCast<int64_t>(input: new_max, result)) { |
17 | throw InternalException("Overflow in cast in statistics propagation"); |
18 | } |
19 | D_ASSERT(result >= 0); |
20 | stats->max_cardinality = idx_t(result); |
21 | } else { |
22 | stats = nullptr; |
23 | } |
24 | } |
25 | |
26 | unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalSetOperation &setop, |
27 | unique_ptr<LogicalOperator> *node_ptr) { |
28 | // first propagate statistics in the child nodes |
29 | auto left_stats = PropagateStatistics(node_ptr&: setop.children[0]); |
30 | auto right_stats = PropagateStatistics(node_ptr&: setop.children[1]); |
31 | |
32 | // now fetch the column bindings on both sides |
33 | auto left_bindings = setop.children[0]->GetColumnBindings(); |
34 | auto right_bindings = setop.children[1]->GetColumnBindings(); |
35 | |
36 | D_ASSERT(left_bindings.size() == right_bindings.size()); |
37 | D_ASSERT(left_bindings.size() == setop.column_count); |
38 | for (idx_t i = 0; i < setop.column_count; i++) { |
39 | // for each column binding, we fetch the statistics from both the lhs and the rhs |
40 | auto left_entry = statistics_map.find(x: left_bindings[i]); |
41 | auto right_entry = statistics_map.find(x: right_bindings[i]); |
42 | if (left_entry == statistics_map.end() || right_entry == statistics_map.end()) { |
43 | // no statistics on one of the sides: can't propagate stats |
44 | continue; |
45 | } |
46 | unique_ptr<BaseStatistics> new_stats; |
47 | switch (setop.type) { |
48 | case LogicalOperatorType::LOGICAL_UNION: |
49 | // union: merge the stats of the LHS and RHS together |
50 | new_stats = left_entry->second->ToUnique(); |
51 | new_stats->Merge(other: *right_entry->second); |
52 | break; |
53 | case LogicalOperatorType::LOGICAL_EXCEPT: |
54 | // except: use the stats of the LHS |
55 | new_stats = left_entry->second->ToUnique(); |
56 | break; |
57 | case LogicalOperatorType::LOGICAL_INTERSECT: |
58 | // intersect: intersect the two stats |
59 | // FIXME: for now we just use the stats of the LHS, as this is correct |
60 | // however, the stats can be further refined to the minimal subset of the LHS and RHS |
61 | new_stats = left_entry->second->ToUnique(); |
62 | break; |
63 | default: |
64 | throw InternalException("Unsupported setop type"); |
65 | } |
66 | ColumnBinding binding(setop.table_index, i); |
67 | statistics_map[binding] = std::move(new_stats); |
68 | } |
69 | if (!left_stats || !right_stats) { |
70 | return nullptr; |
71 | } |
72 | if (setop.type == LogicalOperatorType::LOGICAL_UNION) { |
73 | AddCardinalities(stats&: left_stats, new_stats&: *right_stats); |
74 | } |
75 | return left_stats; |
76 | } |
77 | |
78 | } // namespace duckdb |
79 |