1#include "duckdb/optimizer/expression_heuristics.hpp"
2#include "duckdb/planner/expression/list.hpp"
3
4using namespace duckdb;
5using namespace std;
6
7unique_ptr<LogicalOperator> ExpressionHeuristics::Rewrite(unique_ptr<LogicalOperator> op) {
8 VisitOperator(*op);
9 return op;
10}
11
12void ExpressionHeuristics::VisitOperator(LogicalOperator &op) {
13 if (op.type == LogicalOperatorType::FILTER) {
14 // reorder all filter expressions
15 if (op.expressions.size() > 1) {
16 ReorderExpressions(op.expressions);
17 }
18 }
19
20 // traverse recursively through the operator tree
21 VisitOperatorChildren(op);
22 VisitOperatorExpressions(op);
23}
24
25unique_ptr<Expression> ExpressionHeuristics::VisitReplace(BoundConjunctionExpression &expr,
26 unique_ptr<Expression> *expr_ptr) {
27 ReorderExpressions(expr.children);
28 return nullptr;
29}
30
31void ExpressionHeuristics::ReorderExpressions(vector<unique_ptr<Expression>> &expressions) {
32
33 struct ExpressionCosts {
34 unique_ptr<Expression> expr;
35 idx_t cost;
36
37 bool operator==(const ExpressionCosts &p) const {
38 return cost == p.cost;
39 }
40 bool operator<(const ExpressionCosts &p) const {
41 return cost < p.cost;
42 }
43 };
44
45 vector<ExpressionCosts> expression_costs;
46 // iterate expressions, get cost for each one
47 for (idx_t i = 0; i < expressions.size(); i++) {
48 idx_t cost = Cost(*expressions[i]);
49 expression_costs.push_back({move(expressions[i]), cost});
50 }
51
52 // sort by cost and put back in place
53 sort(expression_costs.begin(), expression_costs.end());
54 for (idx_t i = 0; i < expression_costs.size(); i++) {
55 expressions[i] = move(expression_costs[i].expr);
56 }
57}
58
59idx_t ExpressionHeuristics::ExpressionCost(BoundBetweenExpression &expr) {
60 return Cost(*expr.input) + Cost(*expr.lower) + Cost(*expr.upper) + 10;
61}
62
63idx_t ExpressionHeuristics::ExpressionCost(BoundCaseExpression &expr) {
64 // CASE WHEN check THEN result_if_true ELSE result_if_false END
65 return Cost(*expr.check) + Cost(*expr.result_if_true) + Cost(*expr.result_if_false) + 5;
66}
67
68idx_t ExpressionHeuristics::ExpressionCost(BoundCastExpression &expr) {
69 // OPERATOR_CAST
70 // determine cast cost by comparing cast_expr.source_type and cast_expr_target_type
71 idx_t cast_cost = 0;
72 if (expr.target_type != expr.source_type) {
73 // if cast from or to varchar
74 // TODO: we might want to add more cases
75 if (expr.target_type == SQLType::VARCHAR || expr.source_type == SQLType::VARCHAR ||
76 expr.target_type == SQLType::BLOB || expr.source_type == SQLType::BLOB) {
77 cast_cost = 200;
78 } else {
79 cast_cost = 5;
80 }
81 }
82 return Cost(*expr.child) + cast_cost;
83}
84
85idx_t ExpressionHeuristics::ExpressionCost(BoundComparisonExpression &expr) {
86 // COMPARE_EQUAL, COMPARE_NOTEQUAL, COMPARE_GREATERTHAN, COMPARE_GREATERTHANOREQUALTO, COMPARE_LESSTHAN,
87 // COMPARE_LESSTHANOREQUALTO
88 return Cost(*expr.left) + 5 + Cost(*expr.right);
89}
90
91idx_t ExpressionHeuristics::ExpressionCost(BoundConjunctionExpression &expr) {
92 // CONJUNCTION_AND, CONJUNCTION_OR
93 idx_t cost = 5;
94 for (auto &child : expr.children) {
95 cost += Cost(*child);
96 }
97 return cost;
98}
99
100idx_t ExpressionHeuristics::ExpressionCost(BoundFunctionExpression &expr) {
101 idx_t cost_children = 0;
102 for (auto &child : expr.children) {
103 cost_children += Cost(*child);
104 }
105
106 auto cost_function = function_costs.find(expr.function.name);
107 if (cost_function != function_costs.end()) {
108 return cost_children + cost_function->second;
109 } else {
110 return cost_children + 1000;
111 }
112}
113
114idx_t ExpressionHeuristics::ExpressionCost(BoundOperatorExpression &expr, ExpressionType &expr_type) {
115 idx_t sum = 0;
116 for (auto &child : expr.children) {
117 sum += Cost(*child);
118 }
119
120 // OPERATOR_IS_NULL, OPERATOR_IS_NOT_NULL
121 if (expr_type == ExpressionType::OPERATOR_IS_NULL || expr_type == ExpressionType::OPERATOR_IS_NOT_NULL) {
122 return sum + 5;
123 } else if (expr_type == ExpressionType::COMPARE_IN || expr_type == ExpressionType::COMPARE_NOT_IN) {
124 // COMPARE_IN, COMPARE_NOT_IN
125 return sum + (expr.children.size() - 1) * 100;
126 } else if (expr_type == ExpressionType::OPERATOR_NOT) {
127 // OPERATOR_NOT
128 return sum + 10; // TODO: evaluate via measured runtimes
129 } else {
130 return sum + 1000;
131 }
132}
133
134idx_t ExpressionHeuristics::ExpressionCost(TypeId &return_type, idx_t multiplier) {
135 // TODO: ajust values according to benchmark results
136 switch (return_type) {
137 case TypeId::VARCHAR:
138 return 5 * multiplier;
139 case TypeId::FLOAT:
140 return 2 * multiplier;
141 case TypeId::DOUBLE:
142 return 2 * multiplier;
143 default:
144 return 1 * multiplier;
145 }
146}
147
148idx_t ExpressionHeuristics::Cost(Expression &expr) {
149 switch (expr.expression_class) {
150 case ExpressionClass::BOUND_CASE: {
151 auto &case_expr = (BoundCaseExpression &)expr;
152 return ExpressionCost(case_expr);
153 }
154 case ExpressionClass::BOUND_BETWEEN: {
155 auto &between_expr = (BoundBetweenExpression &)expr;
156 return ExpressionCost(between_expr);
157 }
158 case ExpressionClass::BOUND_CAST: {
159 auto &cast_expr = (BoundCastExpression &)expr;
160 return ExpressionCost(cast_expr);
161 }
162 case ExpressionClass::BOUND_COMPARISON: {
163 auto &comp_expr = (BoundComparisonExpression &)expr;
164 return ExpressionCost(comp_expr);
165 }
166 case ExpressionClass::BOUND_CONJUNCTION: {
167 auto &conj_expr = (BoundConjunctionExpression &)expr;
168 return ExpressionCost(conj_expr);
169 }
170 case ExpressionClass::BOUND_FUNCTION: {
171 auto &func_expr = (BoundFunctionExpression &)expr;
172 return ExpressionCost(func_expr);
173 }
174 case ExpressionClass::BOUND_OPERATOR: {
175 auto &op_expr = (BoundOperatorExpression &)expr;
176 return ExpressionCost(op_expr, expr.type);
177 }
178 case ExpressionClass::BOUND_COLUMN_REF: {
179 auto &col_expr = (BoundColumnRefExpression &)expr;
180 return ExpressionCost(col_expr.return_type, 8);
181 }
182 case ExpressionClass::BOUND_CONSTANT: {
183 auto &const_expr = (BoundConstantExpression &)expr;
184 return ExpressionCost(const_expr.return_type, 1);
185 }
186 case ExpressionClass::BOUND_PARAMETER: {
187 auto &const_expr = (BoundConstantExpression &)expr;
188 return ExpressionCost(const_expr.return_type, 1);
189 }
190 case ExpressionClass::BOUND_REF: {
191 auto &col_expr = (BoundColumnRefExpression &)expr;
192 return ExpressionCost(col_expr.return_type, 8);
193 }
194 default: { break; }
195 }
196
197 // return a very high value if nothing matches
198 return 1000;
199}
200