1#include "duckdb/optimizer/column_lifetime_optimizer.hpp"
2
3#include "duckdb/planner/expression/bound_columnref_expression.hpp"
4
5#include "duckdb/planner/operator/logical_comparison_join.hpp"
6#include "duckdb/planner/operator/logical_delim_join.hpp"
7#include "duckdb/planner/operator/logical_filter.hpp"
8
9namespace duckdb {
10
11void ColumnLifetimeAnalyzer::ExtractUnusedColumnBindings(vector<ColumnBinding> bindings,
12 column_binding_set_t &unused_bindings) {
13 for (idx_t i = 0; i < bindings.size(); i++) {
14 if (column_references.find(x: bindings[i]) == column_references.end()) {
15 unused_bindings.insert(x: bindings[i]);
16 }
17 }
18}
19
20void ColumnLifetimeAnalyzer::GenerateProjectionMap(vector<ColumnBinding> bindings,
21 column_binding_set_t &unused_bindings,
22 vector<idx_t> &projection_map) {
23 if (unused_bindings.empty()) {
24 return;
25 }
26 // now iterate over the result bindings of the child
27 for (idx_t i = 0; i < bindings.size(); i++) {
28 // if this binding does not belong to the unused bindings, add it to the projection map
29 if (unused_bindings.find(x: bindings[i]) == unused_bindings.end()) {
30 projection_map.push_back(x: i);
31 }
32 }
33 if (projection_map.size() == bindings.size()) {
34 projection_map.clear();
35 }
36}
37
38void ColumnLifetimeAnalyzer::StandardVisitOperator(LogicalOperator &op) {
39 LogicalOperatorVisitor::VisitOperatorExpressions(op);
40 if (op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN) {
41 // visit the duplicate eliminated columns on the LHS, if any
42 auto &delim_join = op.Cast<LogicalDelimJoin>();
43 for (auto &expr : delim_join.duplicate_eliminated_columns) {
44 VisitExpression(expression: &expr);
45 }
46 }
47 LogicalOperatorVisitor::VisitOperatorChildren(op);
48}
49
50void ColumnLifetimeAnalyzer::VisitOperator(LogicalOperator &op) {
51 switch (op.type) {
52 case LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY: {
53 // FIXME: groups that are not referenced can be removed from projection
54 // recurse into the children of the aggregate
55 ColumnLifetimeAnalyzer analyzer;
56 analyzer.VisitOperatorExpressions(op);
57 analyzer.VisitOperator(op&: *op.children[0]);
58 return;
59 }
60 case LogicalOperatorType::LOGICAL_ASOF_JOIN:
61 case LogicalOperatorType::LOGICAL_DELIM_JOIN:
62 case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: {
63 if (everything_referenced) {
64 break;
65 }
66 auto &comp_join = op.Cast<LogicalComparisonJoin>();
67 if (comp_join.join_type == JoinType::MARK || comp_join.join_type == JoinType::SEMI ||
68 comp_join.join_type == JoinType::ANTI) {
69 break;
70 }
71 // FIXME for now, we only push into the projection map for equality (hash) joins
72 // FIXME: add projection to LHS as well
73 bool has_equality = false;
74 for (auto &cond : comp_join.conditions) {
75 if (cond.comparison == ExpressionType::COMPARE_EQUAL) {
76 has_equality = true;
77 }
78 }
79 if (!has_equality) {
80 break;
81 }
82 // now, for each of the columns of the RHS, check which columns need to be projected
83 column_binding_set_t unused_bindings;
84 ExtractUnusedColumnBindings(bindings: op.children[1]->GetColumnBindings(), unused_bindings);
85
86 // now recurse into the filter and its children
87 StandardVisitOperator(op);
88
89 // then generate the projection map
90 GenerateProjectionMap(bindings: op.children[1]->GetColumnBindings(), unused_bindings, projection_map&: comp_join.right_projection_map);
91 return;
92 }
93 case LogicalOperatorType::LOGICAL_UNION:
94 case LogicalOperatorType::LOGICAL_EXCEPT:
95 case LogicalOperatorType::LOGICAL_INTERSECT:
96 // for set operations we don't remove anything, just recursively visit the children
97 // FIXME: for UNION we can remove unreferenced columns as long as everything_referenced is false (i.e. we
98 // encounter a UNION node that is not preceded by a DISTINCT)
99 for (auto &child : op.children) {
100 ColumnLifetimeAnalyzer analyzer(true);
101 analyzer.VisitOperator(op&: *child);
102 }
103 return;
104 case LogicalOperatorType::LOGICAL_PROJECTION: {
105 // then recurse into the children of this projection
106 ColumnLifetimeAnalyzer analyzer;
107 analyzer.VisitOperatorExpressions(op);
108 analyzer.VisitOperator(op&: *op.children[0]);
109 return;
110 }
111 case LogicalOperatorType::LOGICAL_DISTINCT: {
112 // distinct, all projected columns are used for the DISTINCT computation
113 // mark all columns as used and continue to the children
114 // FIXME: DISTINCT with expression list does not implicitly reference everything
115 everything_referenced = true;
116 break;
117 }
118 case LogicalOperatorType::LOGICAL_FILTER: {
119 auto &filter = op.Cast<LogicalFilter>();
120 if (everything_referenced) {
121 break;
122 }
123 // filter, figure out which columns are not needed after the filter
124 column_binding_set_t unused_bindings;
125 ExtractUnusedColumnBindings(bindings: op.children[0]->GetColumnBindings(), unused_bindings);
126
127 // now recurse into the filter and its children
128 StandardVisitOperator(op);
129
130 // then generate the projection map
131 GenerateProjectionMap(bindings: op.children[0]->GetColumnBindings(), unused_bindings, projection_map&: filter.projection_map);
132 return;
133 }
134 default:
135 break;
136 }
137 StandardVisitOperator(op);
138}
139
140unique_ptr<Expression> ColumnLifetimeAnalyzer::VisitReplace(BoundColumnRefExpression &expr,
141 unique_ptr<Expression> *expr_ptr) {
142 column_references.insert(x: expr.binding);
143 return nullptr;
144}
145
146unique_ptr<Expression> ColumnLifetimeAnalyzer::VisitReplace(BoundReferenceExpression &expr,
147 unique_ptr<Expression> *expr_ptr) {
148 // BoundReferenceExpression should not be used here yet, they only belong in the physical plan
149 throw InternalException("BoundReferenceExpression should not be used here yet!");
150}
151
152} // namespace duckdb
153