1#include "duckdb/optimizer/column_lifetime_optimizer.hpp"
2
3#include "duckdb/planner/expression/bound_columnref_expression.hpp"
4
5#include "duckdb/planner/operator/logical_comparison_join.hpp"
6#include "duckdb/planner/operator/logical_delim_join.hpp"
7#include "duckdb/planner/operator/logical_filter.hpp"
8
9using namespace duckdb;
10using namespace std;
11
12void ColumnLifetimeAnalyzer::ExtractUnusedColumnBindings(vector<ColumnBinding> bindings,
13 column_binding_set_t &unused_bindings) {
14 for (idx_t i = 0; i < bindings.size(); i++) {
15 if (column_references.find(bindings[i]) == column_references.end()) {
16 unused_bindings.insert(bindings[i]);
17 }
18 }
19}
20
21void ColumnLifetimeAnalyzer::GenerateProjectionMap(vector<ColumnBinding> bindings,
22 column_binding_set_t &unused_bindings,
23 vector<idx_t> &projection_map) {
24 if (unused_bindings.size() == 0) {
25 return;
26 }
27 // now iterate over the result bindings of the child
28 for (idx_t i = 0; i < bindings.size(); i++) {
29 // if this binding does not belong to the unused bindings, add it to the projection map
30 if (unused_bindings.find(bindings[i]) == unused_bindings.end()) {
31 projection_map.push_back(i);
32 }
33 }
34 if (projection_map.size() == bindings.size()) {
35 projection_map.clear();
36 }
37}
38
39void ColumnLifetimeAnalyzer::StandardVisitOperator(LogicalOperator &op) {
40 LogicalOperatorVisitor::VisitOperatorExpressions(op);
41 if (op.type == LogicalOperatorType::DELIM_JOIN) {
42 // visit the duplicate eliminated columns on the LHS, if any
43 auto &delim_join = (LogicalDelimJoin &)op;
44 for (auto &expr : delim_join.duplicate_eliminated_columns) {
45 VisitExpression(&expr);
46 }
47 }
48 LogicalOperatorVisitor::VisitOperatorChildren(op);
49}
50
51void ColumnLifetimeAnalyzer::VisitOperator(LogicalOperator &op) {
52 switch (op.type) {
53 case LogicalOperatorType::AGGREGATE_AND_GROUP_BY: {
54 // FIXME: groups that are not referenced can be removed from projection
55 // recurse into the children of the aggregate
56 ColumnLifetimeAnalyzer analyzer;
57 analyzer.VisitOperatorExpressions(op);
58 analyzer.VisitOperator(*op.children[0]);
59 return;
60 }
61 case LogicalOperatorType::DELIM_JOIN:
62 case LogicalOperatorType::COMPARISON_JOIN: {
63 if (everything_referenced) {
64 break;
65 }
66 auto &comp_join = (LogicalComparisonJoin &)op;
67 if (comp_join.join_type == JoinType::MARK || comp_join.join_type == JoinType::SEMI ||
68 comp_join.join_type == JoinType::ANTI) {
69 break;
70 }
71 // FIXME for now, we only push into the projection map for equality (hash) joins
72 // FIXME: add projection to LHS as well
73 bool has_equality = false;
74 for (auto &cond : comp_join.conditions) {
75 if (cond.comparison == ExpressionType::COMPARE_EQUAL) {
76 has_equality = true;
77 }
78 }
79 if (!has_equality) {
80 break;
81 }
82 // now, for each of the columns of the RHS, check which columns need to be projected
83 column_binding_set_t unused_bindings;
84 ExtractUnusedColumnBindings(op.children[1]->GetColumnBindings(), unused_bindings);
85
86 // now recurse into the filter and its children
87 StandardVisitOperator(op);
88
89 // then generate the projection map
90 GenerateProjectionMap(op.children[1]->GetColumnBindings(), unused_bindings, comp_join.right_projection_map);
91 return;
92 }
93 case LogicalOperatorType::UNION:
94 case LogicalOperatorType::EXCEPT:
95 case LogicalOperatorType::INTERSECT:
96 // for set operations we don't remove anything, just recursively visit the children
97 // FIXME: for UNION we can remove unreferenced columns as long as everything_referenced is false (i.e. we
98 // encounter a UNION node that is not preceded by a DISTINCT)
99 for (auto &child : op.children) {
100 ColumnLifetimeAnalyzer analyzer(true);
101 analyzer.VisitOperator(*child);
102 }
103 return;
104 case LogicalOperatorType::PROJECTION: {
105 // then recurse into the children of this projection
106 ColumnLifetimeAnalyzer analyzer;
107 analyzer.VisitOperatorExpressions(op);
108 analyzer.VisitOperator(*op.children[0]);
109 return;
110 }
111 case LogicalOperatorType::DISTINCT: {
112 // distinct, all projected columns are used for the DISTINCT computation
113 // mark all columns as used and continue to the children
114 // FIXME: DISTINCT with expression list does not implicitly reference everything
115 everything_referenced = true;
116 break;
117 }
118 case LogicalOperatorType::FILTER: {
119 auto &filter = (LogicalFilter &)op;
120 if (everything_referenced) {
121 break;
122 }
123 // filter, figure out which columns are not needed after the filter
124 column_binding_set_t unused_bindings;
125 ExtractUnusedColumnBindings(op.children[0]->GetColumnBindings(), unused_bindings);
126
127 // now recurse into the filter and its children
128 StandardVisitOperator(op);
129
130 // then generate the projection map
131 GenerateProjectionMap(op.children[0]->GetColumnBindings(), unused_bindings, filter.projection_map);
132 return;
133 }
134 default:
135 break;
136 }
137 StandardVisitOperator(op);
138}
139
140unique_ptr<Expression> ColumnLifetimeAnalyzer::VisitReplace(BoundColumnRefExpression &expr,
141 unique_ptr<Expression> *expr_ptr) {
142 column_references.insert(expr.binding);
143 return nullptr;
144}
145
146unique_ptr<Expression> ColumnLifetimeAnalyzer::VisitReplace(BoundReferenceExpression &expr,
147 unique_ptr<Expression> *expr_ptr) {
148 // BoundReferenceExpression should not be used here yet, they only belong in the physical plan
149 throw InternalException("BoundReferenceExpression should not be used here yet!");
150}
151