1#include "duckdb/optimizer/statistics_propagator.hpp"
2#include "duckdb/planner/operator/logical_set_operation.hpp"
3
4namespace duckdb {
5
6void StatisticsPropagator::AddCardinalities(unique_ptr<NodeStatistics> &stats, NodeStatistics &new_stats) {
7 if (!stats->has_estimated_cardinality || !new_stats.has_estimated_cardinality || !stats->has_max_cardinality ||
8 !new_stats.has_max_cardinality) {
9 stats = nullptr;
10 return;
11 }
12 stats->estimated_cardinality += new_stats.estimated_cardinality;
13 auto new_max = Hugeint::Add(lhs: stats->max_cardinality, rhs: new_stats.max_cardinality);
14 if (new_max < NumericLimits<int64_t>::Maximum()) {
15 int64_t result;
16 if (!Hugeint::TryCast<int64_t>(input: new_max, result)) {
17 throw InternalException("Overflow in cast in statistics propagation");
18 }
19 D_ASSERT(result >= 0);
20 stats->max_cardinality = idx_t(result);
21 } else {
22 stats = nullptr;
23 }
24}
25
26unique_ptr<NodeStatistics> StatisticsPropagator::PropagateStatistics(LogicalSetOperation &setop,
27 unique_ptr<LogicalOperator> *node_ptr) {
28 // first propagate statistics in the child nodes
29 auto left_stats = PropagateStatistics(node_ptr&: setop.children[0]);
30 auto right_stats = PropagateStatistics(node_ptr&: setop.children[1]);
31
32 // now fetch the column bindings on both sides
33 auto left_bindings = setop.children[0]->GetColumnBindings();
34 auto right_bindings = setop.children[1]->GetColumnBindings();
35
36 D_ASSERT(left_bindings.size() == right_bindings.size());
37 D_ASSERT(left_bindings.size() == setop.column_count);
38 for (idx_t i = 0; i < setop.column_count; i++) {
39 // for each column binding, we fetch the statistics from both the lhs and the rhs
40 auto left_entry = statistics_map.find(x: left_bindings[i]);
41 auto right_entry = statistics_map.find(x: right_bindings[i]);
42 if (left_entry == statistics_map.end() || right_entry == statistics_map.end()) {
43 // no statistics on one of the sides: can't propagate stats
44 continue;
45 }
46 unique_ptr<BaseStatistics> new_stats;
47 switch (setop.type) {
48 case LogicalOperatorType::LOGICAL_UNION:
49 // union: merge the stats of the LHS and RHS together
50 new_stats = left_entry->second->ToUnique();
51 new_stats->Merge(other: *right_entry->second);
52 break;
53 case LogicalOperatorType::LOGICAL_EXCEPT:
54 // except: use the stats of the LHS
55 new_stats = left_entry->second->ToUnique();
56 break;
57 case LogicalOperatorType::LOGICAL_INTERSECT:
58 // intersect: intersect the two stats
59 // FIXME: for now we just use the stats of the LHS, as this is correct
60 // however, the stats can be further refined to the minimal subset of the LHS and RHS
61 new_stats = left_entry->second->ToUnique();
62 break;
63 default:
64 throw InternalException("Unsupported setop type");
65 }
66 ColumnBinding binding(setop.table_index, i);
67 statistics_map[binding] = std::move(new_stats);
68 }
69 if (!left_stats || !right_stats) {
70 return nullptr;
71 }
72 if (setop.type == LogicalOperatorType::LOGICAL_UNION) {
73 AddCardinalities(stats&: left_stats, new_stats&: *right_stats);
74 }
75 return left_stats;
76}
77
78} // namespace duckdb
79