join_order_optimizer.cpp source code [DuckDB/src/optimizer/join_order_optimizer.cpp]

1	#include "duckdb/optimizer/join_order_optimizer.hpp"
2
3	#include "duckdb/planner/expression/list.hpp"
4	#include "duckdb/planner/expression_iterator.hpp"
5	#include "duckdb/planner/operator/list.hpp"
6
7	using namespace duckdb;
8	using namespace std;
9
10	using JoinNode = JoinOrderOptimizer::JoinNode;
11
12	//! Returns true if A and B are disjoint, false otherwise
13	template <class T> static bool Disjoint(unordered_set<T> &a, unordered_set<T> &b) {
14	for (auto &entry : a) {
15	if (b.find(entry) != b.end()) {
16	return false;
17	}
18	}
19	return true;
20	}
21
22	//! Extract the set of relations referred to inside an expression
23	bool JoinOrderOptimizer::ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings) {
24	if (expression.type == ExpressionType::BOUND_COLUMN_REF) {
25	auto &colref = (BoundColumnRefExpression &)expression;
26	assert(colref.depth == `0`);
27	assert(colref.binding.table_index != INVALID_INDEX);
28	// map the base table index to the relation index used by the JoinOrderOptimizer
29	assert(relation_mapping.find(colref.binding.table_index) != relation_mapping.end());
30	bindings.insert(relation_mapping [colref.binding.table_index]);
31	}
32	if (expression.type == ExpressionType::BOUND_REF) {
33	// bound expression
34	bindings.clear();
35	return false;
36	}
37	assert(expression.type != ExpressionType::SUBQUERY);
38	bool can_reorder = true;
39	ExpressionIterator::EnumerateChildren(expression, [&](Expression &expr) {
40	if (!ExtractBindings(expr, bindings)) {
41	can_reorder = false;
42	return;
43	}
44	});
45	return can_reorder;
46	}
47
48	static unique_ptr<LogicalOperator> PushFilter(unique_ptr<LogicalOperator> node, unique_ptr<Expression> expr) {
49	// push an expression into a filter
50	// first check if we have any filter to push it into
51	if (node ->type != LogicalOperatorType::FILTER) {
52	// we don't, we need to create one
53	auto filter = make_unique<LogicalFilter>();
54	filter ->children.push_back(move(node));
55	node = move(filter);
56	}
57	// push the filter into the LogicalFilter
58	assert(node->type == LogicalOperatorType::FILTER);
59	auto filter = (LogicalFilter *)node.get();
60	filter->expressions.push_back(move(expr));
61	return node;
62	}
63
64	bool JoinOrderOptimizer::ExtractJoinRelations(LogicalOperator &input_op, vector<LogicalOperator *> &filter_operators,
65	LogicalOperator *parent) {
66	LogicalOperator *op = &input_op;
67	while (op->children.size() == `1` &&
68	(op->type != LogicalOperatorType::PROJECTION && op->type != LogicalOperatorType::EXPRESSION_GET)) {
69	if (op->type == LogicalOperatorType::FILTER) {
70	// extract join conditions from filter
71	filter_operators.push_back(op);
72	}
73	if (op->type == LogicalOperatorType::AGGREGATE_AND_GROUP_BY \|\| op->type == LogicalOperatorType::WINDOW) {
74	// don't push filters through projection or aggregate and group by
75	JoinOrderOptimizer optimizer;
76	op->children [`0`] = optimizer.Optimize(move(op->children [`0`]));
77	return false;
78	}
79	op = op->children [`0`].get();
80	}
81	bool non_reorderable_operation = false;
82	if (op->type == LogicalOperatorType::UNION \|\| op->type == LogicalOperatorType::EXCEPT \|\|
83	op->type == LogicalOperatorType::INTERSECT \|\| op->type == LogicalOperatorType::DELIM_JOIN \|\|
84	op->type == LogicalOperatorType::ANY_JOIN) {
85	// set operation, optimize separately in children
86	non_reorderable_operation = true;
87	}
88
89	if (op->type == LogicalOperatorType::COMPARISON_JOIN) {
90	LogicalJoin join = (LogicalJoin )op;
91	if (join->join_type == JoinType::INNER) {
92	// extract join conditions from inner join
93	filter_operators.push_back(op);
94	} else {
95	// non-inner join, not reordarable yet
96	non_reorderable_operation = true;
97	}
98	}
99	if (non_reorderable_operation) {
100	// we encountered a non-reordable operation (setop or non-inner join)
101	// we do not reorder non-inner joins yet, however we do want to expand the potential join graph around them
102	// non-inner joins are also tricky because we can't freely make conditions through them
103	// e.g. suppose we have (left LEFT OUTER JOIN right WHERE right IS NOT NULL), the join can generate
104	// new NULL values in the right side, so pushing this condition through the join leads to incorrect results
105	// for this reason, we just start a new JoinOptimizer pass in each of the children of the join
106	for (idx_t i = `0`; i < op->children.size(); i++) {
107	JoinOrderOptimizer optimizer;
108	op->children [i] = optimizer.Optimize(move(op->children [i]));
109	}
110	// after this we want to treat this node as one "end node" (like e.g. a base relation)
111	// however the join refers to multiple base relations
112	// enumerate all base relations obtained from this join and add them to the relation mapping
113	// also, we have to resolve the join conditions for the joins here
114	// get the left and right bindings
115	unordered_set<idx_t> bindings;
116	LogicalJoin::GetTableReferences(*op, bindings);
117	// now create the relation that refers to all these bindings
118	auto relation = make_unique<SingleJoinRelation>(&input_op, parent);
119	for (idx_t it : bindings) {
120	relation_mapping [it] = relations.size();
121	}
122	relations.push_back(move(relation));
123	return true;
124	}
125	if (op->type == LogicalOperatorType::COMPARISON_JOIN \|\| op->type == LogicalOperatorType::CROSS_PRODUCT) {
126	// inner join or cross product
127	bool can_reorder_left = ExtractJoinRelations(*op->children [`0`], filter_operators, op);
128	bool can_reorder_right = ExtractJoinRelations(*op->children [`1`], filter_operators, op);
129	return can_reorder_left && can_reorder_right;
130	} else if (op->type == LogicalOperatorType::GET) {
131	// base table scan, add to set of relations
132	auto get = (LogicalGet *)op;
133	auto relation = make_unique<SingleJoinRelation>(&input_op, parent);
134	relation_mapping [get->table_index] = relations.size();
135	relations.push_back(move(relation));
136	return true;
137	} else if (op->type == LogicalOperatorType::EXPRESSION_GET) {
138	// base table scan, add to set of relations
139	auto get = (LogicalExpressionGet *)op;
140	auto relation = make_unique<SingleJoinRelation>(&input_op, parent);
141	relation_mapping [get->table_index] = relations.size();
142	relations.push_back(move(relation));
143	return true;
144	} else if (op->type == LogicalOperatorType::TABLE_FUNCTION) {
145	// table function call, add to set of relations
146	auto table_function = (LogicalTableFunction *)op;
147	auto relation = make_unique<SingleJoinRelation>(&input_op, parent);
148	relation_mapping [table_function->table_index] = relations.size();
149	relations.push_back(move(relation));
150	return true;
151	} else if (op->type == LogicalOperatorType::PROJECTION) {
152	auto proj = (LogicalProjection *)op;
153	// we run the join order optimizer witin the subquery as well
154	JoinOrderOptimizer optimizer;
155	op->children [`0`] = optimizer.Optimize(move(op->children [`0`]));
156	// projection, add to the set of relations
157	auto relation = make_unique<SingleJoinRelation>(&input_op, parent);
158	relation_mapping [proj->table_index] = relations.size();
159	relations.push_back(move(relation));
160	return true;
161	}
162	return false;
163	}
164
165	//! Update the exclusion set with all entries in the subgraph
166	static void UpdateExclusionSet(JoinRelationSet *node, unordered_set<idx_t> &exclusion_set) {
167	for (idx_t i = `0`; i < node->count; i++) {
168	exclusion_set.insert(node->relations [i]);
169	}
170	}
171
172	//! Create a new JoinTree node by joining together two previous JoinTree nodes
173	static unique_ptr<JoinNode> CreateJoinTree(JoinRelationSet set, NeighborInfo info, JoinNode left, JoinNode right) {
174	// for the hash join we want the right side (build side) to have the smallest cardinality
175	// also just a heuristic but for now...
176	// FIXME: we should probably actually benchmark that as well
177	// FIXME: should consider different join algorithms, should we pick a join algorithm here as well? (probably)
178	if (left->cardinality < right->cardinality) {
179	return CreateJoinTree(set, info, right, left);
180	}
181	// the expected cardinality is the max of the child cardinalities
182	// FIXME: we should obviously use better cardinality estimation here
183	// but for now we just assume foreign key joins only
184	idx_t expected_cardinality;
185	if (info->filters.size() == `0`) {
186	// cross product
187	expected_cardinality = left->cardinality * right->cardinality;
188	} else {
189	// normal join, expect foreign key join
190	expected_cardinality = std::max(left->cardinality, right->cardinality);
191	}
192	// cost is expected_cardinality plus the cost of the previous plans
193	idx_t cost = expected_cardinality;
194	return make_unique<JoinNode>(set, info, left, right, expected_cardinality, cost);
195	}
196
197	JoinNode JoinOrderOptimizer::EmitPair(JoinRelationSet left, JoinRelationSet right, NeighborInfo info) {
198	// get the left and right join plans
199	auto &left_plan = plans [left];
200	auto &right_plan = plans [right];
201	auto new_set = set_manager.Union(left, right);
202	// create the join tree based on combining the two plans
203	auto new_plan = CreateJoinTree(new_set, info, left_plan.get(), right_plan.get());
204	// check if this plan is the optimal plan we found for this set of relations
205	auto entry = plans.find(new_set);
206	if (entry == plans.end() \|\| new_plan ->cost < entry ->second ->cost) {
207	// the plan is the optimal plan, move it into the dynamic programming tree
208	auto result = new_plan.get();
209	plans [new_set] = move(new_plan);
210	return result;
211	}
212	return entry ->second.get();
213	}
214
215	bool JoinOrderOptimizer::TryEmitPair(JoinRelationSet left, JoinRelationSet right, NeighborInfo *info) {
216	pairs++;
217	if (pairs >= `2000`) {
218	// when the amount of pairs gets too large we exit the dynamic programming and resort to a greedy algorithm
219	// FIXME: simple heuristic currently
220	// at 10K pairs stop searching exactly and switch to heuristic
221	return false;
222	}
223	EmitPair(left, right, info);
224	return true;
225	}
226
227	bool JoinOrderOptimizer::EmitCSG(JoinRelationSet *node) {
228	// create the exclusion set as everything inside the subgraph AND anything with members BELOW it
229	unordered_set<idx_t> exclusion_set;
230	for (idx_t i = `0`; i < node->relations [`0`]; i++) {
231	exclusion_set.insert(i);
232	}
233	UpdateExclusionSet(node, exclusion_set);
234	// find the neighbors given this exclusion set
235	auto neighbors = query_graph.GetNeighbors(node, exclusion_set);
236	if (neighbors.size() == `0`) {
237	return true;
238	}
239	// we iterate over the neighbors ordered by their first node
240	sort(neighbors.begin(), neighbors.end());
241	for (auto neighbor : neighbors) {
242	// since the GetNeighbors only returns the smallest element in a list, the entry might not be connected to
243	// (only!) this neighbor, hence we have to do a connectedness check before we can emit it
244	auto neighbor_relation = set_manager.GetJoinRelation(neighbor);
245	auto connection = query_graph.GetConnection(node, neighbor_relation);
246	if (connection) {
247	if (!TryEmitPair(node, neighbor_relation, connection)) {
248	return false;
249	}
250	}
251	if (!EnumerateCmpRecursive(node, neighbor_relation, exclusion_set)) {
252	return false;
253	}
254	}
255	return true;
256	}
257
258	bool JoinOrderOptimizer::EnumerateCmpRecursive(JoinRelationSet left, JoinRelationSet right,
259	unordered_set<idx_t> exclusion_set) {
260	// get the neighbors of the second relation under the exclusion set
261	auto neighbors = query_graph.GetNeighbors(right, exclusion_set);
262	if (neighbors.size() == `0`) {
263	return true;
264	}
265	vector<JoinRelationSet *> union_sets;
266	union_sets.resize(neighbors.size());
267	for (idx_t i = `0`; i < neighbors.size(); i++) {
268	auto neighbor = set_manager.GetJoinRelation(neighbors [i]);
269	// emit the combinations of this node and its neighbors
270	auto combined_set = set_manager.Union(right, neighbor);
271	if (plans.find(combined_set) != plans.end()) {
272	auto connection = query_graph.GetConnection(left, combined_set);
273	if (connection) {
274	if (!TryEmitPair(left, combined_set, connection)) {
275	return false;
276	}
277	}
278	}
279	union_sets [i] = combined_set;
280	}
281	// recursively enumerate the sets
282	for (idx_t i = `0`; i < neighbors.size(); i++) {
283	// updated the set of excluded entries with this neighbor
284	unordered_set<idx_t> new_exclusion_set = exclusion_set;
285	new_exclusion_set.insert(neighbors [i]);
286	if (!EnumerateCmpRecursive(left, union_sets [i], new_exclusion_set)) {
287	return false;
288	}
289	}
290	return true;
291	}
292
293	bool JoinOrderOptimizer::EnumerateCSGRecursive(JoinRelationSet *node, unordered_set<idx_t> &exclusion_set) {
294	// find neighbors of S under the exlusion set
295	auto neighbors = query_graph.GetNeighbors(node, exclusion_set);
296	if (neighbors.size() == `0`) {
297	return true;
298	}
299	// now first emit the connected subgraphs of the neighbors
300	vector<JoinRelationSet *> union_sets;
301	union_sets.resize(neighbors.size());
302	for (idx_t i = `0`; i < neighbors.size(); i++) {
303	auto neighbor = set_manager.GetJoinRelation(neighbors [i]);
304	// emit the combinations of this node and its neighbors
305	auto new_set = set_manager.Union(node, neighbor);
306	if (plans.find(new_set) != plans.end()) {
307	if (!EmitCSG(new_set)) {
308	return false;
309	}
310	}
311	union_sets [i] = new_set;
312	}
313	// recursively enumerate the sets
314	for (idx_t i = `0`; i < neighbors.size(); i++) {
315	// updated the set of excluded entries with this neighbor
316	unordered_set<idx_t> new_exclusion_set = exclusion_set;
317	new_exclusion_set.insert(neighbors [i]);
318	if (!EnumerateCSGRecursive(union_sets [i], new_exclusion_set)) {
319	return false;
320	}
321	}
322	return true;
323	}
324
325	bool JoinOrderOptimizer::SolveJoinOrderExactly() {
326	// now we perform the actual dynamic programming to compute the final result
327	// we enumerate over all the possible pairs in the neighborhood
328	for (idx_t i = relations.size(); i > `0`; i--) {
329	// for every node in the set, we consider it as the start node once
330	auto start_node = set_manager.GetJoinRelation(i - `1`);
331	// emit the start node
332	if (!EmitCSG(start_node)) {
333	return false;
334	}
335	// initialize the set of exclusion_set as all the nodes with a number below this
336	unordered_set<idx_t> exclusion_set;
337	for (idx_t j = `0`; j < i - `1`; j++) {
338	exclusion_set.insert(j);
339	}
340	// then we recursively search for neighbors that do not belong to the banned entries
341	if (!EnumerateCSGRecursive(start_node, exclusion_set)) {
342	return false;
343	}
344	}
345	return true;
346	}
347
348	void JoinOrderOptimizer::SolveJoinOrderApproximately() {
349	// at this point, we exited the dynamic programming but did not compute the final join order because it took too
350	// long instead, we use a greedy heuristic to obtain a join ordering now we use Greedy Operator Ordering to
351	// construct the result tree first we start out with all the base relations (the to-be-joined relations)
352	vector<JoinRelationSet *> T;
353	for (idx_t i = `0`; i < relations.size(); i++) {
354	T.push_back(set_manager.GetJoinRelation(i));
355	}
356	while (T.size() > `1`) {
357	// now in every step of the algorithm, we greedily pick the join between the to-be-joined relations that has the
358	// smallest cost. This is O(r^2) per step, and every step will reduce the total amount of relations to-be-joined
359	// by 1, so the total cost is O(r^3) in the amount of relations
360	idx_t best_left = `0`, best_right = `0`;
361	JoinNode best_connection = nullptr*;
362	for (idx_t i = `0`; i < T.size(); i++) {
363	auto left = T [i];
364	for (idx_t j = i + `1`; j < T.size(); j++) {
365	auto right = T [j];
366	// check if we can connect these two relations
367	auto connection = query_graph.GetConnection(left, right);
368	if (connection) {
369	// we can! check the cost of this connection
370	auto node = EmitPair(left, right, connection);
371	if (!best_connection \|\| node->cost < best_connection->cost) {
372	// best pair found so far
373	best_connection = node;
374	best_left = i;
375	best_right = j;
376	}
377	}
378	}
379	}
380	if (!best_connection) {
381	// could not find a connection, but we were not done with finding a completed plan
382	// we have to add a cross product; we add it between the two smallest relations
383	JoinNode smallest_plans[`2`] = {nullptr*};
384	idx_t smallest_index[`2`];
385	for (idx_t i = `0`; i < T.size(); i++) {
386	// get the plan for this relation
387	auto current_plan = plans [T [i]].get();
388	// check if the cardinality is smaller than the smallest two found so far
389	for (idx_t j = `0`; j < `2`; j++) {
390	if (!smallest_plans[j] \|\| smallest_plans[j]->cardinality > current_plan->cardinality) {
391	smallest_plans[j] = current_plan;
392	smallest_index[j] = i;
393	break;
394	}
395	}
396	}
397	assert(smallest_plans[`0`] && smallest_plans[`1`]);
398	assert(smallest_index[`0`] != smallest_index[`1`]);
399	auto left = smallest_plans[`0`]->set, right = smallest_plans[`1`]->set;
400	// create a cross product edge (i.e. edge with empty filter) between these two sets in the query graph
401	query_graph.CreateEdge(left, right, nullptr);
402	// now emit the pair and continue with the algorithm
403	auto connection = query_graph.GetConnection(left, right);
404	assert(connection);
405
406	best_connection = EmitPair(left, right, connection);
407	best_left = smallest_index[`0`];
408	best_right = smallest_index[`1`];
409	// the code below assumes best_right > best_left
410	if (best_left > best_right) {
411	swap(best_left, best_right);
412	}
413	}
414	// now update the to-be-checked pairs
415	// remove left and right, and add the combination
416
417	// important to erase the biggest element first
418	// if we erase the smallest element first the index of the biggest element changes
419	assert(best_right > best_left);
420	T.erase(T.begin() + best_right);
421	T.erase(T.begin() + best_left);
422	T.push_back(best_connection->set);
423	}
424	}
425
426	void JoinOrderOptimizer::SolveJoinOrder() {
427	// first try to solve the join order exactly
428	if (!SolveJoinOrderExactly()) {
429	// otherwise, if that times out we resort to a greedy algorithm
430	SolveJoinOrderApproximately();
431	}
432	}
433
434	void JoinOrderOptimizer::GenerateCrossProducts() {
435	// generate a set of cross products to combine the currently available plans into a full join plan
436	// we create edges between every relation with a high cost
437	for (idx_t i = `0`; i < relations.size(); i++) {
438	auto left = set_manager.GetJoinRelation(i);
439	for (idx_t j = `0`; j < relations.size(); j++) {
440	if (i != j) {
441	auto right = set_manager.GetJoinRelation(j);
442	query_graph.CreateEdge(left, right, nullptr);
443	query_graph.CreateEdge(right, left, nullptr);
444	}
445	}
446	}
447	}
448
449	static unique_ptr<LogicalOperator> ExtractJoinRelation(SingleJoinRelation &rel) {
450	auto &children = rel.parent->children;
451	for (idx_t i = `0`; i < children.size(); i++) {
452	if (children [i].get() == rel.op) {
453	// found it! take ownership of it from the parent
454	auto result = move(children [i]);
455	children.erase(children.begin() + i);
456	return result;
457	}
458	}
459	throw Exception ("Could not find relation in parent node (?)");
460	}
461
462	pair<JoinRelationSet *, unique_ptr<LogicalOperator>>
463	JoinOrderOptimizer::GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted_relations, JoinNode *node) {
464	JoinRelationSet left_node = nullptr, right_node = nullptr;
465	JoinRelationSet *result_relation;
466	unique_ptr<LogicalOperator> result_operator;
467	if (node->left && node->right) {
468	// generate the left and right children
469	auto left = GenerateJoins(extracted_relations, node->left);
470	auto right = GenerateJoins(extracted_relations, node->right);
471
472	if (node->info->filters.size() == `0`) {
473	// no filters, create a cross product
474	auto join = make_unique<LogicalCrossProduct>();
475	join ->children.push_back(move(left.second));
476	join ->children.push_back(move(right.second));
477	result_operator = move(join);
478	} else {
479	// we have filters, create a join node
480	auto join = make_unique<LogicalComparisonJoin>(JoinType::INNER);
481	join ->children.push_back(move(left.second));
482	join ->children.push_back(move(right.second));
483	// set the join conditions from the join node
484	for (auto &f : node->info->filters) {
485	// extract the filter from the operator it originally belonged to
486	assert(filters[f->filter_index]);
487	auto condition = move(filters [f->filter_index]);
488	// now create the actual join condition
489	assert((JoinRelationSet::IsSubset(left.first, f->left_set) &&
490	JoinRelationSet::IsSubset(right.first, f->right_set)) \|\|
491	(JoinRelationSet::IsSubset(left.first, f->right_set) &&
492	JoinRelationSet::IsSubset(right.first, f->left_set)));
493	JoinCondition cond;
494	assert(condition->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON);
495	auto &comparison = (BoundComparisonExpression &)*condition;
496	// we need to figure out which side is which by looking at the relations available to us
497	bool invert = JoinRelationSet::IsSubset(left.first, f->left_set) ? false : true;
498	cond.left = !invert ? move(comparison.left) : move(comparison.right);
499	cond.right = !invert ? move(comparison.right) : move(comparison.left);
500	cond.comparison = condition ->type;
501	if (invert) {
502	// reverse comparison expression if we reverse the order of the children
503	cond.comparison = FlipComparisionExpression(cond.comparison);
504	}
505	join ->conditions.push_back(move(cond));
506	}
507	assert(join->conditions.size() > `0`);
508	result_operator = move(join);
509	}
510	left_node = left.first;
511	right_node = right.first;
512	result_relation = set_manager.Union(left_node, right_node);
513	} else {
514	// base node, get the entry from the list of extracted relations
515	assert(node->set->count == `1`);
516	assert(extracted_relations[node->set->relations[`0`]]);
517	result_relation = node->set;
518	result_operator = move(extracted_relations [node->set->relations [`0`]]);
519	}
520	// check if we should do a pushdown on this node
521	// basically, any remaining filter that is a subset of the current relation will no longer be used in joins
522	// hence we should push it here
523	for (idx_t i = `0`; i < filter_infos.size(); i++) {
524	// check if the filter has already been extracted
525	auto info = filter_infos [i].get();
526	if (filters [info->filter_index]) {
527	// now check if the filter is a subset of the current relation
528	// note that infos with an empty relation set are a special case and we do not push them down
529	if (info->set->count > `0` && JoinRelationSet::IsSubset(result_relation, info->set)) {
530	auto filter = move(filters [info->filter_index]);
531	// if it is, we can push the filter
532	// we can push it either into a join or as a filter
533	// check if we are in a join or in a base table
534	if (!left_node \|\| !info->left_set) {
535	// base table or non-comparison expression, push it as a filter
536	result_operator = PushFilter(move(result_operator), move(filter));
537	continue;
538	}
539	// the node below us is a join or cross product and the expression is a comparison
540	// check if the nodes can be split up into left/right
541	bool found_subset = false;
542	bool invert = false;
543	if (JoinRelationSet::IsSubset(left_node, info->left_set) &&
544	JoinRelationSet::IsSubset(right_node, info->right_set)) {
545	found_subset = true;
546	} else if (JoinRelationSet::IsSubset(right_node, info->left_set) &&
547	JoinRelationSet::IsSubset(left_node, info->right_set)) {
548	invert = true;
549	found_subset = true;
550	}
551	if (!found_subset) {
552	// could not be split up into left/right
553	result_operator = PushFilter(move(result_operator), move(filter));
554	continue;
555	}
556	// create the join condition
557	JoinCondition cond;
558	assert(filter->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON);
559	auto &comparison = (BoundComparisonExpression &)*filter;
560	// we need to figure out which side is which by looking at the relations available to us
561	cond.left = !invert ? move(comparison.left) : move(comparison.right);
562	cond.right = !invert ? move(comparison.right) : move(comparison.left);
563	cond.comparison = comparison.type;
564	if (invert) {
565	// reverse comparison expression if we reverse the order of the children
566	cond.comparison = FlipComparisionExpression(comparison.type);
567	}
568	// now find the join to push it into
569	auto node = result_operator.get();
570	if (node->type == LogicalOperatorType::FILTER) {
571	node = node->children [`0`].get();
572	}
573	if (node->type == LogicalOperatorType::CROSS_PRODUCT) {
574	// turn into comparison join
575	auto comp_join = make_unique<LogicalComparisonJoin>(JoinType::INNER);
576	comp_join ->children.push_back(move(node->children [`0`]));
577	comp_join ->children.push_back(move(node->children [`1`]));
578	comp_join ->conditions.push_back(move(cond));
579	if (node == result_operator.get()) {
580	result_operator = move(comp_join);
581	} else {
582	assert(result_operator->type == LogicalOperatorType::FILTER);
583	result_operator ->children [`0`] = move(comp_join);
584	}
585	} else {
586	assert(node->type == LogicalOperatorType::COMPARISON_JOIN);
587	auto &comp_join = (LogicalComparisonJoin &)*node;
588	comp_join.conditions.push_back(move(cond));
589	}
590	}
591	}
592	}
593	return make_pair(result_relation, move(result_operator));
594	}
595
596	unique_ptr<LogicalOperator> JoinOrderOptimizer::RewritePlan(unique_ptr<LogicalOperator> plan, JoinNode *node) {
597	// now we have to rewrite the plan
598	bool root_is_join = plan ->children.size() > `1`;
599
600	// first we will extract all relations from the main plan
601	vector<unique_ptr<LogicalOperator>> extracted_relations;
602	for (idx_t i = `0`; i < relations.size(); i++) {
603	extracted_relations.push_back(ExtractJoinRelation(*relations [i]));
604	}
605	// now we generate the actual joins
606	auto join_tree = GenerateJoins(extracted_relations, node);
607	// perform the final pushdown of remaining filters
608	for (idx_t i = `0`; i < filters.size(); i++) {
609	// check if the filter has already been extracted
610	if (filters [i]) {
611	// if not we need to push it
612	join_tree.second = PushFilter(move(join_tree.second), move(filters [i]));
613	}
614	}
615
616	// find the first join in the relation to know where to place this node
617	if (root_is_join) {
618	// first node is the join, return it immediately
619	return move(join_tree.second);
620	}
621	assert(plan->children.size() == `1`);
622	// have to move up through the relations
623	auto op = plan.get();
624	auto parent = plan.get();
625	while (op->type != LogicalOperatorType::CROSS_PRODUCT && op->type != LogicalOperatorType::COMPARISON_JOIN) {
626	assert(op->children.size() == `1`);
627	parent = op;
628	op = op->children [`0`].get();
629	}
630	// have to replace at this node
631	parent->children [`0`] = move(join_tree.second);
632	return plan;
633	}
634
635	// the join ordering is pretty much a straight implementation of the paper "Dynamic Programming Strikes Back" by Guido
636	// Moerkotte and Thomas Neumannn, see that paper for additional info/documentation bonus slides:
637	// https://db.in.tum.de/teaching/ws1415/queryopt/chapter3.pdf?lang=de
638	// FIXME: incorporate cardinality estimation into the plans, possibly by pushing samples?
639	unique_ptr<LogicalOperator> JoinOrderOptimizer::Optimize(unique_ptr<LogicalOperator> plan) {
640	assert(filters.size() == `0` && relations.size() == `0`); // assert that the JoinOrderOptimizer has not been used before
641	LogicalOperator *op = plan.get();
642	// now we optimize the current plan
643	// we skip past until we find the first projection, we do this because the HAVING clause inserts a Filter AFTER the
644	// group by and this filter cannot be reordered
645	// extract a list of all relations that have to be joined together
646	// and a list of all conditions that is applied to them
647	vector<LogicalOperator *> filter_operators;
648	if (!ExtractJoinRelations(*op, filter_operators)) {
649	// do not support reordering this type of plan
650	return plan;
651	}
652	if (relations.size() <= `1`) {
653	// at most one relation, nothing to reorder
654	return plan;
655	}
656	// now that we know we are going to perform join ordering we actually extract the filters, eliminating duplicate
657	// filters in the process
658	expression_set_t filter_set;
659	for (auto &op : filter_operators) {
660	if (op->type == LogicalOperatorType::COMPARISON_JOIN) {
661	auto &join = (LogicalComparisonJoin &)*op;
662	assert(join.join_type == JoinType::INNER);
663	assert(join.expressions.size() == `0`);
664	for (auto &cond : join.conditions) {
665	auto comparison =
666	make_unique<BoundComparisonExpression>(cond.comparison, move(cond.left), move(cond.right));
667	if (filter_set.find(comparison.get()) == filter_set.end()) {
668	filter_set.insert(comparison.get());
669	filters.push_back(move(comparison));
670	}
671	}
672	join.conditions.clear();
673	} else {
674	for (idx_t i = `0`; i < op->expressions.size(); i++) {
675	if (filter_set.find(op->expressions [i].get()) == filter_set.end()) {
676	filter_set.insert(op->expressions [i].get());
677	filters.push_back(move(op->expressions [i]));
678	}
679	}
680	op->expressions.clear();
681	}
682	}
683	// create potential edges from the comparisons
684	for (idx_t i = `0`; i < filters.size(); i++) {
685	auto &filter = filters [i];
686	auto info = make_unique<FilterInfo>();
687	auto filter_info = info.get();
688	filter_infos.push_back(move(info));
689	// first extract the relation set for the entire filter
690	unordered_set<idx_t> bindings;
691	ExtractBindings(*filter, bindings);
692	filter_info->set = set_manager.GetJoinRelation(bindings);
693	filter_info->filter_index = i;
694	// now check if it can be used as a join predicate
695	if (filter ->GetExpressionClass() == ExpressionClass::BOUND_COMPARISON) {
696	auto comparison = (BoundComparisonExpression *)filter.get();
697	// extract the bindings that are required for the left and right side of the comparison
698	unordered_set<idx_t> left_bindings, right_bindings;
699	ExtractBindings(*comparison->left, left_bindings);
700	ExtractBindings(*comparison->right, right_bindings);
701	if (left_bindings.size() > `0` && right_bindings.size() > `0`) {
702	// both the left and the right side have bindings
703	// first create the relation sets, if they do not exist
704	filter_info->left_set = set_manager.GetJoinRelation(left_bindings);
705	filter_info->right_set = set_manager.GetJoinRelation(right_bindings);
706	// we can only create a meaningful edge if the sets are not exactly the same
707	if (filter_info->left_set != filter_info->right_set) {
708	// check if the sets are disjoint
709	if (Disjoint(left_bindings, right_bindings)) {
710	// they are disjoint, we only need to create one set of edges in the join graph
711	query_graph.CreateEdge(filter_info->left_set, filter_info->right_set, filter_info);
712	query_graph.CreateEdge(filter_info->right_set, filter_info->left_set, filter_info);
713	} else {
714	continue;
715	// the sets are not disjoint, we create two sets of edges
716	// auto left_difference = set_manager.Difference(filter_info->left_set, filter_info->right_set);
717	// auto right_difference = set_manager.Difference(filter_info->right_set,
718	// filter_info->left_set);
719	// // -> LEFT <-> RIGHT \ LEFT
720	// query_graph.CreateEdge(filter_info->left_set, right_difference, filter_info);
721	// query_graph.CreateEdge(right_difference, filter_info->left_set, filter_info);
722	// // -> RIGHT <-> LEFT \ RIGHT
723	// query_graph.CreateEdge(left_difference, filter_info->right_set, filter_info);
724	// query_graph.CreateEdge(filter_info->right_set, left_difference, filter_info);
725	}
726	continue;
727	}
728	}
729	}
730	}
731	// now use dynamic programming to figure out the optimal join order
732	// First we initialize each of the single-node plans with themselves and with their cardinalities these are the leaf
733	// nodes of the join tree NOTE: we can just use pointers to JoinRelationSet here because the GetJoinRelation*
734	// function ensures that a unique combination of relations will have a unique JoinRelationSet object.
735	for (idx_t i = `0`; i < relations.size(); i++) {
736	auto &rel = *relations [i];
737	auto node = set_manager.GetJoinRelation(i);
738	plans [node] = make_unique<JoinNode>(node, rel.op->EstimateCardinality());
739	}
740	// now we perform the actual dynamic programming to compute the final result
741	SolveJoinOrder();
742	// now the optimal join path should have been found
743	// get it from the node
744	unordered_set<idx_t> bindings;
745	for (idx_t i = `0`; i < relations.size(); i++) {
746	bindings.insert(i);
747	}
748	auto total_relation = set_manager.GetJoinRelation(bindings);
749	auto final_plan = plans.find(total_relation);
750	if (final_plan == plans.end()) {
751	// could not find the final plan
752	// this should only happen in case the sets are actually disjunct
753	// in this case we need to generate cross product to connect the disjoint sets
754	GenerateCrossProducts();
755	//! solve the join order again
756	SolveJoinOrder();
757	// now we can obtain the final plan!
758	final_plan = plans.find(total_relation);
759	assert(final_plan != plans.end());
760	}
761	// now perform the actual reordering
762	return RewritePlan(move(plan), final_plan ->second.get());
763	}
764

Browse the source code of DuckDB/src/optimizer/join_order_optimizer.cpp