1#include "duckdb/optimizer/join_order_optimizer.hpp"
2
3#include "duckdb/planner/expression/list.hpp"
4#include "duckdb/planner/expression_iterator.hpp"
5#include "duckdb/planner/operator/list.hpp"
6
7using namespace duckdb;
8using namespace std;
9
10using JoinNode = JoinOrderOptimizer::JoinNode;
11
12//! Returns true if A and B are disjoint, false otherwise
13template <class T> static bool Disjoint(unordered_set<T> &a, unordered_set<T> &b) {
14 for (auto &entry : a) {
15 if (b.find(entry) != b.end()) {
16 return false;
17 }
18 }
19 return true;
20}
21
22//! Extract the set of relations referred to inside an expression
23bool JoinOrderOptimizer::ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings) {
24 if (expression.type == ExpressionType::BOUND_COLUMN_REF) {
25 auto &colref = (BoundColumnRefExpression &)expression;
26 assert(colref.depth == 0);
27 assert(colref.binding.table_index != INVALID_INDEX);
28 // map the base table index to the relation index used by the JoinOrderOptimizer
29 assert(relation_mapping.find(colref.binding.table_index) != relation_mapping.end());
30 bindings.insert(relation_mapping[colref.binding.table_index]);
31 }
32 if (expression.type == ExpressionType::BOUND_REF) {
33 // bound expression
34 bindings.clear();
35 return false;
36 }
37 assert(expression.type != ExpressionType::SUBQUERY);
38 bool can_reorder = true;
39 ExpressionIterator::EnumerateChildren(expression, [&](Expression &expr) {
40 if (!ExtractBindings(expr, bindings)) {
41 can_reorder = false;
42 return;
43 }
44 });
45 return can_reorder;
46}
47
48static unique_ptr<LogicalOperator> PushFilter(unique_ptr<LogicalOperator> node, unique_ptr<Expression> expr) {
49 // push an expression into a filter
50 // first check if we have any filter to push it into
51 if (node->type != LogicalOperatorType::FILTER) {
52 // we don't, we need to create one
53 auto filter = make_unique<LogicalFilter>();
54 filter->children.push_back(move(node));
55 node = move(filter);
56 }
57 // push the filter into the LogicalFilter
58 assert(node->type == LogicalOperatorType::FILTER);
59 auto filter = (LogicalFilter *)node.get();
60 filter->expressions.push_back(move(expr));
61 return node;
62}
63
64bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<LogicalOperator *> &filter_operators,
65 LogicalOperator *parent) {
66 LogicalOperator *op = &input_op;
67 while (op->children.size() == 1 &&
68 (op->type != LogicalOperatorType::PROJECTION && op->type != LogicalOperatorType::EXPRESSION_GET)) {
69 if (op->type == LogicalOperatorType::FILTER) {
70 // extract join conditions from filter
71 filter_operators.push_back(op);
72 }
73 if (op->type == LogicalOperatorType::AGGREGATE_AND_GROUP_BY || op->type == LogicalOperatorType::WINDOW) {
74 // don't push filters through projection or aggregate and group by
75 JoinOrderOptimizer optimizer;
76 op->children[0] = optimizer.Optimize(move(op->children[0]));
77 return false;
78 }
79 op = op->children[0].get();
80 }
81 bool non_reorderable_operation = false;
82 if (op->type == LogicalOperatorType::UNION || op->type == LogicalOperatorType::EXCEPT ||
83 op->type == LogicalOperatorType::INTERSECT || op->type == LogicalOperatorType::DELIM_JOIN ||
84 op->type == LogicalOperatorType::ANY_JOIN) {
85 // set operation, optimize separately in children
86 non_reorderable_operation = true;
87 }
88
89 if (op->type == LogicalOperatorType::COMPARISON_JOIN) {
90 LogicalJoin *join = (LogicalJoin *)op;
91 if (join->join_type == JoinType::INNER) {
92 // extract join conditions from inner join
93 filter_operators.push_back(op);
94 } else {
95 // non-inner join, not reordarable yet
96 non_reorderable_operation = true;
97 }
98 }
99 if (non_reorderable_operation) {
100 // we encountered a non-reordable operation (setop or non-inner join)
101 // we do not reorder non-inner joins yet, however we do want to expand the potential join graph around them
102 // non-inner joins are also tricky because we can't freely make conditions through them
103 // e.g. suppose we have (left LEFT OUTER JOIN right WHERE right IS NOT NULL), the join can generate
104 // new NULL values in the right side, so pushing this condition through the join leads to incorrect results
105 // for this reason, we just start a new JoinOptimizer pass in each of the children of the join
106 for (idx_t i = 0; i < op->children.size(); i++) {
107 JoinOrderOptimizer optimizer;
108 op->children[i] = optimizer.Optimize(move(op->children[i]));
109 }
110 // after this we want to treat this node as one "end node" (like e.g. a base relation)
111 // however the join refers to multiple base relations
112 // enumerate all base relations obtained from this join and add them to the relation mapping
113 // also, we have to resolve the join conditions for the joins here
114 // get the left and right bindings
115 unordered_set<idx_t> bindings;
116 LogicalJoin::GetTableReferences(*op, bindings);
117 // now create the relation that refers to all these bindings
118 auto relation = make_unique<SingleJoinRelation>(&input_op, parent);
119 for (idx_t it : bindings) {
120 relation_mapping[it] = relations.size();
121 }
122 relations.push_back(move(relation));
123 return true;
124 }
125 if (op->type == LogicalOperatorType::COMPARISON_JOIN || op->type == LogicalOperatorType::CROSS_PRODUCT) {
126 // inner join or cross product
127 bool can_reorder_left = ExtractJoinRelations(*op->children[0], filter_operators, op);
128 bool can_reorder_right = ExtractJoinRelations(*op->children[1], filter_operators, op);
129 return can_reorder_left && can_reorder_right;
130 } else if (op->type == LogicalOperatorType::GET) {
131 // base table scan, add to set of relations
132 auto get = (LogicalGet *)op;
133 auto relation = make_unique<SingleJoinRelation>(&input_op, parent);
134 relation_mapping[get->table_index] = relations.size();
135 relations.push_back(move(relation));
136 return true;
137 } else if (op->type == LogicalOperatorType::EXPRESSION_GET) {
138 // base table scan, add to set of relations
139 auto get = (LogicalExpressionGet *)op;
140 auto relation = make_unique<SingleJoinRelation>(&input_op, parent);
141 relation_mapping[get->table_index] = relations.size();
142 relations.push_back(move(relation));
143 return true;
144 } else if (op->type == LogicalOperatorType::TABLE_FUNCTION) {
145 // table function call, add to set of relations
146 auto table_function = (LogicalTableFunction *)op;
147 auto relation = make_unique<SingleJoinRelation>(&input_op, parent);
148 relation_mapping[table_function->table_index] = relations.size();
149 relations.push_back(move(relation));
150 return true;
151 } else if (op->type == LogicalOperatorType::PROJECTION) {
152 auto proj = (LogicalProjection *)op;
153 // we run the join order optimizer witin the subquery as well
154 JoinOrderOptimizer optimizer;
155 op->children[0] = optimizer.Optimize(move(op->children[0]));
156 // projection, add to the set of relations
157 auto relation = make_unique<SingleJoinRelation>(&input_op, parent);
158 relation_mapping[proj->table_index] = relations.size();
159 relations.push_back(move(relation));
160 return true;
161 }
162 return false;
163}
164
165//! Update the exclusion set with all entries in the subgraph
166static void UpdateExclusionSet(JoinRelationSet *node, unordered_set<idx_t> &exclusion_set) {
167 for (idx_t i = 0; i < node->count; i++) {
168 exclusion_set.insert(node->relations[i]);
169 }
170}
171
172//! Create a new JoinTree node by joining together two previous JoinTree nodes
173static unique_ptr<JoinNode> CreateJoinTree(JoinRelationSet *set, NeighborInfo *info, JoinNode *left, JoinNode *right) {
174 // for the hash join we want the right side (build side) to have the smallest cardinality
175 // also just a heuristic but for now...
176 // FIXME: we should probably actually benchmark that as well
177 // FIXME: should consider different join algorithms, should we pick a join algorithm here as well? (probably)
178 if (left->cardinality < right->cardinality) {
179 return CreateJoinTree(set, info, right, left);
180 }
181 // the expected cardinality is the max of the child cardinalities
182 // FIXME: we should obviously use better cardinality estimation here
183 // but for now we just assume foreign key joins only
184 idx_t expected_cardinality;
185 if (info->filters.size() == 0) {
186 // cross product
187 expected_cardinality = left->cardinality * right->cardinality;
188 } else {
189 // normal join, expect foreign key join
190 expected_cardinality = std::max(left->cardinality, right->cardinality);
191 }
192 // cost is expected_cardinality plus the cost of the previous plans
193 idx_t cost = expected_cardinality;
194 return make_unique<JoinNode>(set, info, left, right, expected_cardinality, cost);
195}
196
197JoinNode *JoinOrderOptimizer::EmitPair(JoinRelationSet *left, JoinRelationSet *right, NeighborInfo *info) {
198 // get the left and right join plans
199 auto &left_plan = plans[left];
200 auto &right_plan = plans[right];
201 auto new_set = set_manager.Union(left, right);
202 // create the join tree based on combining the two plans
203 auto new_plan = CreateJoinTree(new_set, info, left_plan.get(), right_plan.get());
204 // check if this plan is the optimal plan we found for this set of relations
205 auto entry = plans.find(new_set);
206 if (entry == plans.end() || new_plan->cost < entry->second->cost) {
207 // the plan is the optimal plan, move it into the dynamic programming tree
208 auto result = new_plan.get();
209 plans[new_set] = move(new_plan);
210 return result;
211 }
212 return entry->second.get();
213}
214
215bool JoinOrderOptimizer::TryEmitPair(JoinRelationSet *left, JoinRelationSet *right, NeighborInfo *info) {
216 pairs++;
217 if (pairs >= 2000) {
218 // when the amount of pairs gets too large we exit the dynamic programming and resort to a greedy algorithm
219 // FIXME: simple heuristic currently
220 // at 10K pairs stop searching exactly and switch to heuristic
221 return false;
222 }
223 EmitPair(left, right, info);
224 return true;
225}
226
227bool JoinOrderOptimizer::EmitCSG(JoinRelationSet *node) {
228 // create the exclusion set as everything inside the subgraph AND anything with members BELOW it
229 unordered_set<idx_t> exclusion_set;
230 for (idx_t i = 0; i < node->relations[0]; i++) {
231 exclusion_set.insert(i);
232 }
233 UpdateExclusionSet(node, exclusion_set);
234 // find the neighbors given this exclusion set
235 auto neighbors = query_graph.GetNeighbors(node, exclusion_set);
236 if (neighbors.size() == 0) {
237 return true;
238 }
239 // we iterate over the neighbors ordered by their first node
240 sort(neighbors.begin(), neighbors.end());
241 for (auto neighbor : neighbors) {
242 // since the GetNeighbors only returns the smallest element in a list, the entry might not be connected to
243 // (only!) this neighbor, hence we have to do a connectedness check before we can emit it
244 auto neighbor_relation = set_manager.GetJoinRelation(neighbor);
245 auto connection = query_graph.GetConnection(node, neighbor_relation);
246 if (connection) {
247 if (!TryEmitPair(node, neighbor_relation, connection)) {
248 return false;
249 }
250 }
251 if (!EnumerateCmpRecursive(node, neighbor_relation, exclusion_set)) {
252 return false;
253 }
254 }
255 return true;
256}
257
258bool JoinOrderOptimizer::EnumerateCmpRecursive(JoinRelationSet *left, JoinRelationSet *right,
259 unordered_set<idx_t> exclusion_set) {
260 // get the neighbors of the second relation under the exclusion set
261 auto neighbors = query_graph.GetNeighbors(right, exclusion_set);
262 if (neighbors.size() == 0) {
263 return true;
264 }
265 vector<JoinRelationSet *> union_sets;
266 union_sets.resize(neighbors.size());
267 for (idx_t i = 0; i < neighbors.size(); i++) {
268 auto neighbor = set_manager.GetJoinRelation(neighbors[i]);
269 // emit the combinations of this node and its neighbors
270 auto combined_set = set_manager.Union(right, neighbor);
271 if (plans.find(combined_set) != plans.end()) {
272 auto connection = query_graph.GetConnection(left, combined_set);
273 if (connection) {
274 if (!TryEmitPair(left, combined_set, connection)) {
275 return false;
276 }
277 }
278 }
279 union_sets[i] = combined_set;
280 }
281 // recursively enumerate the sets
282 for (idx_t i = 0; i < neighbors.size(); i++) {
283 // updated the set of excluded entries with this neighbor
284 unordered_set<idx_t> new_exclusion_set = exclusion_set;
285 new_exclusion_set.insert(neighbors[i]);
286 if (!EnumerateCmpRecursive(left, union_sets[i], new_exclusion_set)) {
287 return false;
288 }
289 }
290 return true;
291}
292
293bool JoinOrderOptimizer::EnumerateCSGRecursive(JoinRelationSet *node, unordered_set<idx_t> &exclusion_set) {
294 // find neighbors of S under the exlusion set
295 auto neighbors = query_graph.GetNeighbors(node, exclusion_set);
296 if (neighbors.size() == 0) {
297 return true;
298 }
299 // now first emit the connected subgraphs of the neighbors
300 vector<JoinRelationSet *> union_sets;
301 union_sets.resize(neighbors.size());
302 for (idx_t i = 0; i < neighbors.size(); i++) {
303 auto neighbor = set_manager.GetJoinRelation(neighbors[i]);
304 // emit the combinations of this node and its neighbors
305 auto new_set = set_manager.Union(node, neighbor);
306 if (plans.find(new_set) != plans.end()) {
307 if (!EmitCSG(new_set)) {
308 return false;
309 }
310 }
311 union_sets[i] = new_set;
312 }
313 // recursively enumerate the sets
314 for (idx_t i = 0; i < neighbors.size(); i++) {
315 // updated the set of excluded entries with this neighbor
316 unordered_set<idx_t> new_exclusion_set = exclusion_set;
317 new_exclusion_set.insert(neighbors[i]);
318 if (!EnumerateCSGRecursive(union_sets[i], new_exclusion_set)) {
319 return false;
320 }
321 }
322 return true;
323}
324
325bool JoinOrderOptimizer::SolveJoinOrderExactly() {
326 // now we perform the actual dynamic programming to compute the final result
327 // we enumerate over all the possible pairs in the neighborhood
328 for (idx_t i = relations.size(); i > 0; i--) {
329 // for every node in the set, we consider it as the start node once
330 auto start_node = set_manager.GetJoinRelation(i - 1);
331 // emit the start node
332 if (!EmitCSG(start_node)) {
333 return false;
334 }
335 // initialize the set of exclusion_set as all the nodes with a number below this
336 unordered_set<idx_t> exclusion_set;
337 for (idx_t j = 0; j < i - 1; j++) {
338 exclusion_set.insert(j);
339 }
340 // then we recursively search for neighbors that do not belong to the banned entries
341 if (!EnumerateCSGRecursive(start_node, exclusion_set)) {
342 return false;
343 }
344 }
345 return true;
346}
347
348void JoinOrderOptimizer::SolveJoinOrderApproximately() {
349 // at this point, we exited the dynamic programming but did not compute the final join order because it took too
350 // long instead, we use a greedy heuristic to obtain a join ordering now we use Greedy Operator Ordering to
351 // construct the result tree first we start out with all the base relations (the to-be-joined relations)
352 vector<JoinRelationSet *> T;
353 for (idx_t i = 0; i < relations.size(); i++) {
354 T.push_back(set_manager.GetJoinRelation(i));
355 }
356 while (T.size() > 1) {
357 // now in every step of the algorithm, we greedily pick the join between the to-be-joined relations that has the
358 // smallest cost. This is O(r^2) per step, and every step will reduce the total amount of relations to-be-joined
359 // by 1, so the total cost is O(r^3) in the amount of relations
360 idx_t best_left = 0, best_right = 0;
361 JoinNode *best_connection = nullptr;
362 for (idx_t i = 0; i < T.size(); i++) {
363 auto left = T[i];
364 for (idx_t j = i + 1; j < T.size(); j++) {
365 auto right = T[j];
366 // check if we can connect these two relations
367 auto connection = query_graph.GetConnection(left, right);
368 if (connection) {
369 // we can! check the cost of this connection
370 auto node = EmitPair(left, right, connection);
371 if (!best_connection || node->cost < best_connection->cost) {
372 // best pair found so far
373 best_connection = node;
374 best_left = i;
375 best_right = j;
376 }
377 }
378 }
379 }
380 if (!best_connection) {
381 // could not find a connection, but we were not done with finding a completed plan
382 // we have to add a cross product; we add it between the two smallest relations
383 JoinNode *smallest_plans[2] = {nullptr};
384 idx_t smallest_index[2];
385 for (idx_t i = 0; i < T.size(); i++) {
386 // get the plan for this relation
387 auto current_plan = plans[T[i]].get();
388 // check if the cardinality is smaller than the smallest two found so far
389 for (idx_t j = 0; j < 2; j++) {
390 if (!smallest_plans[j] || smallest_plans[j]->cardinality > current_plan->cardinality) {
391 smallest_plans[j] = current_plan;
392 smallest_index[j] = i;
393 break;
394 }
395 }
396 }
397 assert(smallest_plans[0] && smallest_plans[1]);
398 assert(smallest_index[0] != smallest_index[1]);
399 auto left = smallest_plans[0]->set, right = smallest_plans[1]->set;
400 // create a cross product edge (i.e. edge with empty filter) between these two sets in the query graph
401 query_graph.CreateEdge(left, right, nullptr);
402 // now emit the pair and continue with the algorithm
403 auto connection = query_graph.GetConnection(left, right);
404 assert(connection);
405
406 best_connection = EmitPair(left, right, connection);
407 best_left = smallest_index[0];
408 best_right = smallest_index[1];
409 // the code below assumes best_right > best_left
410 if (best_left > best_right) {
411 swap(best_left, best_right);
412 }
413 }
414 // now update the to-be-checked pairs
415 // remove left and right, and add the combination
416
417 // important to erase the biggest element first
418 // if we erase the smallest element first the index of the biggest element changes
419 assert(best_right > best_left);
420 T.erase(T.begin() + best_right);
421 T.erase(T.begin() + best_left);
422 T.push_back(best_connection->set);
423 }
424}
425
426void JoinOrderOptimizer::SolveJoinOrder() {
427 // first try to solve the join order exactly
428 if (!SolveJoinOrderExactly()) {
429 // otherwise, if that times out we resort to a greedy algorithm
430 SolveJoinOrderApproximately();
431 }
432}
433
434void JoinOrderOptimizer::GenerateCrossProducts() {
435 // generate a set of cross products to combine the currently available plans into a full join plan
436 // we create edges between every relation with a high cost
437 for (idx_t i = 0; i < relations.size(); i++) {
438 auto left = set_manager.GetJoinRelation(i);
439 for (idx_t j = 0; j < relations.size(); j++) {
440 if (i != j) {
441 auto right = set_manager.GetJoinRelation(j);
442 query_graph.CreateEdge(left, right, nullptr);
443 query_graph.CreateEdge(right, left, nullptr);
444 }
445 }
446 }
447}
448
449static unique_ptr<LogicalOperator> ExtractJoinRelation(SingleJoinRelation &rel) {
450 auto &children = rel.parent->children;
451 for (idx_t i = 0; i < children.size(); i++) {
452 if (children[i].get() == rel.op) {
453 // found it! take ownership of it from the parent
454 auto result = move(children[i]);
455 children.erase(children.begin() + i);
456 return result;
457 }
458 }
459 throw Exception("Could not find relation in parent node (?)");
460}
461
462pair<JoinRelationSet *, unique_ptr<LogicalOperator>>
463JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted_relations, JoinNode *node) {
464 JoinRelationSet *left_node = nullptr, *right_node = nullptr;
465 JoinRelationSet *result_relation;
466 unique_ptr<LogicalOperator> result_operator;
467 if (node->left && node->right) {
468 // generate the left and right children
469 auto left = GenerateJoins(extracted_relations, node->left);
470 auto right = GenerateJoins(extracted_relations, node->right);
471
472 if (node->info->filters.size() == 0) {
473 // no filters, create a cross product
474 auto join = make_unique<LogicalCrossProduct>();
475 join->children.push_back(move(left.second));
476 join->children.push_back(move(right.second));
477 result_operator = move(join);
478 } else {
479 // we have filters, create a join node
480 auto join = make_unique<LogicalComparisonJoin>(JoinType::INNER);
481 join->children.push_back(move(left.second));
482 join->children.push_back(move(right.second));
483 // set the join conditions from the join node
484 for (auto &f : node->info->filters) {
485 // extract the filter from the operator it originally belonged to
486 assert(filters[f->filter_index]);
487 auto condition = move(filters[f->filter_index]);
488 // now create the actual join condition
489 assert((JoinRelationSet::IsSubset(left.first, f->left_set) &&
490 JoinRelationSet::IsSubset(right.first, f->right_set)) ||
491 (JoinRelationSet::IsSubset(left.first, f->right_set) &&
492 JoinRelationSet::IsSubset(right.first, f->left_set)));
493 JoinCondition cond;
494 assert(condition->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON);
495 auto &comparison = (BoundComparisonExpression &)*condition;
496 // we need to figure out which side is which by looking at the relations available to us
497 bool invert = JoinRelationSet::IsSubset(left.first, f->left_set) ? false : true;
498 cond.left = !invert ? move(comparison.left) : move(comparison.right);
499 cond.right = !invert ? move(comparison.right) : move(comparison.left);
500 cond.comparison = condition->type;
501 if (invert) {
502 // reverse comparison expression if we reverse the order of the children
503 cond.comparison = FlipComparisionExpression(cond.comparison);
504 }
505 join->conditions.push_back(move(cond));
506 }
507 assert(join->conditions.size() > 0);
508 result_operator = move(join);
509 }
510 left_node = left.first;
511 right_node = right.first;
512 result_relation = set_manager.Union(left_node, right_node);
513 } else {
514 // base node, get the entry from the list of extracted relations
515 assert(node->set->count == 1);
516 assert(extracted_relations[node->set->relations[0]]);
517 result_relation = node->set;
518 result_operator = move(extracted_relations[node->set->relations[0]]);
519 }
520 // check if we should do a pushdown on this node
521 // basically, any remaining filter that is a subset of the current relation will no longer be used in joins
522 // hence we should push it here
523 for (idx_t i = 0; i < filter_infos.size(); i++) {
524 // check if the filter has already been extracted
525 auto info = filter_infos[i].get();
526 if (filters[info->filter_index]) {
527 // now check if the filter is a subset of the current relation
528 // note that infos with an empty relation set are a special case and we do not push them down
529 if (info->set->count > 0 && JoinRelationSet::IsSubset(result_relation, info->set)) {
530 auto filter = move(filters[info->filter_index]);
531 // if it is, we can push the filter
532 // we can push it either into a join or as a filter
533 // check if we are in a join or in a base table
534 if (!left_node || !info->left_set) {
535 // base table or non-comparison expression, push it as a filter
536 result_operator = PushFilter(move(result_operator), move(filter));
537 continue;
538 }
539 // the node below us is a join or cross product and the expression is a comparison
540 // check if the nodes can be split up into left/right
541 bool found_subset = false;
542 bool invert = false;
543 if (JoinRelationSet::IsSubset(left_node, info->left_set) &&
544 JoinRelationSet::IsSubset(right_node, info->right_set)) {
545 found_subset = true;
546 } else if (JoinRelationSet::IsSubset(right_node, info->left_set) &&
547 JoinRelationSet::IsSubset(left_node, info->right_set)) {
548 invert = true;
549 found_subset = true;
550 }
551 if (!found_subset) {
552 // could not be split up into left/right
553 result_operator = PushFilter(move(result_operator), move(filter));
554 continue;
555 }
556 // create the join condition
557 JoinCondition cond;
558 assert(filter->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON);
559 auto &comparison = (BoundComparisonExpression &)*filter;
560 // we need to figure out which side is which by looking at the relations available to us
561 cond.left = !invert ? move(comparison.left) : move(comparison.right);
562 cond.right = !invert ? move(comparison.right) : move(comparison.left);
563 cond.comparison = comparison.type;
564 if (invert) {
565 // reverse comparison expression if we reverse the order of the children
566 cond.comparison = FlipComparisionExpression(comparison.type);
567 }
568 // now find the join to push it into
569 auto node = result_operator.get();
570 if (node->type == LogicalOperatorType::FILTER) {
571 node = node->children[0].get();
572 }
573 if (node->type == LogicalOperatorType::CROSS_PRODUCT) {
574 // turn into comparison join
575 auto comp_join = make_unique<LogicalComparisonJoin>(JoinType::INNER);
576 comp_join->children.push_back(move(node->children[0]));
577 comp_join->children.push_back(move(node->children[1]));
578 comp_join->conditions.push_back(move(cond));
579 if (node == result_operator.get()) {
580 result_operator = move(comp_join);
581 } else {
582 assert(result_operator->type == LogicalOperatorType::FILTER);
583 result_operator->children[0] = move(comp_join);
584 }
585 } else {
586 assert(node->type == LogicalOperatorType::COMPARISON_JOIN);
587 auto &comp_join = (LogicalComparisonJoin &)*node;
588 comp_join.conditions.push_back(move(cond));
589 }
590 }
591 }
592 }
593 return make_pair(result_relation, move(result_operator));
594}
595
596unique_ptr<LogicalOperator> JoinOrderOptimizer::RewritePlan(unique_ptr<LogicalOperator> plan, JoinNode *node) {
597 // now we have to rewrite the plan
598 bool root_is_join = plan->children.size() > 1;
599
600 // first we will extract all relations from the main plan
601 vector<unique_ptr<LogicalOperator>> extracted_relations;
602 for (idx_t i = 0; i < relations.size(); i++) {
603 extracted_relations.push_back(ExtractJoinRelation(*relations[i]));
604 }
605 // now we generate the actual joins
606 auto join_tree = GenerateJoins(extracted_relations, node);
607 // perform the final pushdown of remaining filters
608 for (idx_t i = 0; i < filters.size(); i++) {
609 // check if the filter has already been extracted
610 if (filters[i]) {
611 // if not we need to push it
612 join_tree.second = PushFilter(move(join_tree.second), move(filters[i]));
613 }
614 }
615
616 // find the first join in the relation to know where to place this node
617 if (root_is_join) {
618 // first node is the join, return it immediately
619 return move(join_tree.second);
620 }
621 assert(plan->children.size() == 1);
622 // have to move up through the relations
623 auto op = plan.get();
624 auto parent = plan.get();
625 while (op->type != LogicalOperatorType::CROSS_PRODUCT && op->type != LogicalOperatorType::COMPARISON_JOIN) {
626 assert(op->children.size() == 1);
627 parent = op;
628 op = op->children[0].get();
629 }
630 // have to replace at this node
631 parent->children[0] = move(join_tree.second);
632 return plan;
633}
634
635// the join ordering is pretty much a straight implementation of the paper "Dynamic Programming Strikes Back" by Guido
636// Moerkotte and Thomas Neumannn, see that paper for additional info/documentation bonus slides:
637// https://db.in.tum.de/teaching/ws1415/queryopt/chapter3.pdf?lang=de
638// FIXME: incorporate cardinality estimation into the plans, possibly by pushing samples?
639unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOperator> plan) {
640 assert(filters.size() == 0 && relations.size() == 0); // assert that the JoinOrderOptimizer has not been used before
641 LogicalOperator *op = plan.get();
642 // now we optimize the current plan
643 // we skip past until we find the first projection, we do this because the HAVING clause inserts a Filter AFTER the
644 // group by and this filter cannot be reordered
645 // extract a list of all relations that have to be joined together
646 // and a list of all conditions that is applied to them
647 vector<LogicalOperator *> filter_operators;
648 if (!ExtractJoinRelations(*op, filter_operators)) {
649 // do not support reordering this type of plan
650 return plan;
651 }
652 if (relations.size() <= 1) {
653 // at most one relation, nothing to reorder
654 return plan;
655 }
656 // now that we know we are going to perform join ordering we actually extract the filters, eliminating duplicate
657 // filters in the process
658 expression_set_t filter_set;
659 for (auto &op : filter_operators) {
660 if (op->type == LogicalOperatorType::COMPARISON_JOIN) {
661 auto &join = (LogicalComparisonJoin &)*op;
662 assert(join.join_type == JoinType::INNER);
663 assert(join.expressions.size() == 0);
664 for (auto &cond : join.conditions) {
665 auto comparison =
666 make_unique<BoundComparisonExpression>(cond.comparison, move(cond.left), move(cond.right));
667 if (filter_set.find(comparison.get()) == filter_set.end()) {
668 filter_set.insert(comparison.get());
669 filters.push_back(move(comparison));
670 }
671 }
672 join.conditions.clear();
673 } else {
674 for (idx_t i = 0; i < op->expressions.size(); i++) {
675 if (filter_set.find(op->expressions[i].get()) == filter_set.end()) {
676 filter_set.insert(op->expressions[i].get());
677 filters.push_back(move(op->expressions[i]));
678 }
679 }
680 op->expressions.clear();
681 }
682 }
683 // create potential edges from the comparisons
684 for (idx_t i = 0; i < filters.size(); i++) {
685 auto &filter = filters[i];
686 auto info = make_unique<FilterInfo>();
687 auto filter_info = info.get();
688 filter_infos.push_back(move(info));
689 // first extract the relation set for the entire filter
690 unordered_set<idx_t> bindings;
691 ExtractBindings(*filter, bindings);
692 filter_info->set = set_manager.GetJoinRelation(bindings);
693 filter_info->filter_index = i;
694 // now check if it can be used as a join predicate
695 if (filter->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON) {
696 auto comparison = (BoundComparisonExpression *)filter.get();
697 // extract the bindings that are required for the left and right side of the comparison
698 unordered_set<idx_t> left_bindings, right_bindings;
699 ExtractBindings(*comparison->left, left_bindings);
700 ExtractBindings(*comparison->right, right_bindings);
701 if (left_bindings.size() > 0 && right_bindings.size() > 0) {
702 // both the left and the right side have bindings
703 // first create the relation sets, if they do not exist
704 filter_info->left_set = set_manager.GetJoinRelation(left_bindings);
705 filter_info->right_set = set_manager.GetJoinRelation(right_bindings);
706 // we can only create a meaningful edge if the sets are not exactly the same
707 if (filter_info->left_set != filter_info->right_set) {
708 // check if the sets are disjoint
709 if (Disjoint(left_bindings, right_bindings)) {
710 // they are disjoint, we only need to create one set of edges in the join graph
711 query_graph.CreateEdge(filter_info->left_set, filter_info->right_set, filter_info);
712 query_graph.CreateEdge(filter_info->right_set, filter_info->left_set, filter_info);
713 } else {
714 continue;
715 // the sets are not disjoint, we create two sets of edges
716 // auto left_difference = set_manager.Difference(filter_info->left_set, filter_info->right_set);
717 // auto right_difference = set_manager.Difference(filter_info->right_set,
718 // filter_info->left_set);
719 // // -> LEFT <-> RIGHT \ LEFT
720 // query_graph.CreateEdge(filter_info->left_set, right_difference, filter_info);
721 // query_graph.CreateEdge(right_difference, filter_info->left_set, filter_info);
722 // // -> RIGHT <-> LEFT \ RIGHT
723 // query_graph.CreateEdge(left_difference, filter_info->right_set, filter_info);
724 // query_graph.CreateEdge(filter_info->right_set, left_difference, filter_info);
725 }
726 continue;
727 }
728 }
729 }
730 }
731 // now use dynamic programming to figure out the optimal join order
732 // First we initialize each of the single-node plans with themselves and with their cardinalities these are the leaf
733 // nodes of the join tree NOTE: we can just use pointers to JoinRelationSet* here because the GetJoinRelation
734 // function ensures that a unique combination of relations will have a unique JoinRelationSet object.
735 for (idx_t i = 0; i < relations.size(); i++) {
736 auto &rel = *relations[i];
737 auto node = set_manager.GetJoinRelation(i);
738 plans[node] = make_unique<JoinNode>(node, rel.op->EstimateCardinality());
739 }
740 // now we perform the actual dynamic programming to compute the final result
741 SolveJoinOrder();
742 // now the optimal join path should have been found
743 // get it from the node
744 unordered_set<idx_t> bindings;
745 for (idx_t i = 0; i < relations.size(); i++) {
746 bindings.insert(i);
747 }
748 auto total_relation = set_manager.GetJoinRelation(bindings);
749 auto final_plan = plans.find(total_relation);
750 if (final_plan == plans.end()) {
751 // could not find the final plan
752 // this should only happen in case the sets are actually disjunct
753 // in this case we need to generate cross product to connect the disjoint sets
754 GenerateCrossProducts();
755 //! solve the join order again
756 SolveJoinOrder();
757 // now we can obtain the final plan!
758 final_plan = plans.find(total_relation);
759 assert(final_plan != plans.end());
760 }
761 // now perform the actual reordering
762 return RewritePlan(move(plan), final_plan->second.get());
763}
764