| 1 | #include "duckdb/optimizer/statistics_propagator.hpp" |
|---|---|
| 2 | #include "duckdb/planner/operator/logical_set_operation.hpp" |
| 3 | |
| 4 | namespace duckdb { |
| 5 | |
| 6 | void StatisticsPropagator::AddCardinalities(unique_ptr<NodeStatistics> &stats, NodeStatistics &new_stats) { |
| 7 | if (!stats->has_estimated_cardinality || !new_stats.has_estimated_cardinality || !stats->has_max_cardinality || |
| 8 | !new_stats.has_max_cardinality) { |
| 9 | stats = nullptr; |
| 10 | return; |
| 11 | } |
| 12 | stats->estimated_cardinality += new_stats.estimated_cardinality; |
| 13 | auto new_max = Hugeint::Add(lhs: stats->max_cardinality, rhs: new_stats.max_cardinality); |
| 14 | if (new_max < NumericLimits<int64_t>::Maximum()) { |
| 15 | int64_t result; |
| 16 | if (!Hugeint::TryCast<int64_t>(input: new_max, result)) { |
| 17 | throw InternalException("Overflow in cast in statistics propagation"); |
| 18 | } |
| 19 | D_ASSERT(result >= 0); |
| 20 | stats->max_cardinality = idx_t(result); |
| 21 | } else { |
| 22 | stats = nullptr; |
| 23 | } |
| 24 | } |
| 25 | |
| 26 | unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalSetOperation &setop, |
| 27 | unique_ptr<LogicalOperator> *node_ptr) { |
| 28 | // first propagate statistics in the child nodes |
| 29 | auto left_stats = PropagateStatistics(node_ptr&: setop.children[0]); |
| 30 | auto right_stats = PropagateStatistics(node_ptr&: setop.children[1]); |
| 31 | |
| 32 | // now fetch the column bindings on both sides |
| 33 | auto left_bindings = setop.children[0]->GetColumnBindings(); |
| 34 | auto right_bindings = setop.children[1]->GetColumnBindings(); |
| 35 | |
| 36 | D_ASSERT(left_bindings.size() == right_bindings.size()); |
| 37 | D_ASSERT(left_bindings.size() == setop.column_count); |
| 38 | for (idx_t i = 0; i < setop.column_count; i++) { |
| 39 | // for each column binding, we fetch the statistics from both the lhs and the rhs |
| 40 | auto left_entry = statistics_map.find(x: left_bindings[i]); |
| 41 | auto right_entry = statistics_map.find(x: right_bindings[i]); |
| 42 | if (left_entry == statistics_map.end() || right_entry == statistics_map.end()) { |
| 43 | // no statistics on one of the sides: can't propagate stats |
| 44 | continue; |
| 45 | } |
| 46 | unique_ptr<BaseStatistics> new_stats; |
| 47 | switch (setop.type) { |
| 48 | case LogicalOperatorType::LOGICAL_UNION: |
| 49 | // union: merge the stats of the LHS and RHS together |
| 50 | new_stats = left_entry->second->ToUnique(); |
| 51 | new_stats->Merge(other: *right_entry->second); |
| 52 | break; |
| 53 | case LogicalOperatorType::LOGICAL_EXCEPT: |
| 54 | // except: use the stats of the LHS |
| 55 | new_stats = left_entry->second->ToUnique(); |
| 56 | break; |
| 57 | case LogicalOperatorType::LOGICAL_INTERSECT: |
| 58 | // intersect: intersect the two stats |
| 59 | // FIXME: for now we just use the stats of the LHS, as this is correct |
| 60 | // however, the stats can be further refined to the minimal subset of the LHS and RHS |
| 61 | new_stats = left_entry->second->ToUnique(); |
| 62 | break; |
| 63 | default: |
| 64 | throw InternalException("Unsupported setop type"); |
| 65 | } |
| 66 | ColumnBinding binding(setop.table_index, i); |
| 67 | statistics_map[binding] = std::move(new_stats); |
| 68 | } |
| 69 | if (!left_stats || !right_stats) { |
| 70 | return nullptr; |
| 71 | } |
| 72 | if (setop.type == LogicalOperatorType::LOGICAL_UNION) { |
| 73 | AddCardinalities(stats&: left_stats, new_stats&: *right_stats); |
| 74 | } |
| 75 | return left_stats; |
| 76 | } |
| 77 | |
| 78 | } // namespace duckdb |
| 79 |