| 1 | #include "duckdb/planner/expression/bound_columnref_expression.hpp" | 
| 2 | #include "duckdb/planner/expression/bound_comparison_expression.hpp" | 
| 3 | #include "duckdb/planner/expression/bound_conjunction_expression.hpp" | 
| 4 | #include "duckdb/planner/expression/bound_constant_expression.hpp" | 
| 5 | #include "duckdb/planner/expression/bound_operator_expression.hpp" | 
| 6 | #include "duckdb/planner/expression/bound_subquery_expression.hpp" | 
| 7 | #include "duckdb/planner/expression_iterator.hpp" | 
| 8 | #include "duckdb/planner/binder.hpp" | 
| 9 | #include "duckdb/planner/operator/logical_any_join.hpp" | 
| 10 | #include "duckdb/planner/operator/logical_asof_join.hpp" | 
| 11 | #include "duckdb/planner/operator/logical_comparison_join.hpp" | 
| 12 | #include "duckdb/planner/operator/logical_cross_product.hpp" | 
| 13 | #include "duckdb/planner/operator/logical_filter.hpp" | 
| 14 | #include "duckdb/planner/operator/logical_positional_join.hpp" | 
| 15 | #include "duckdb/planner/tableref/bound_joinref.hpp" | 
| 16 | #include "duckdb/main/client_context.hpp" | 
| 17 | #include "duckdb/planner/expression_binder/lateral_binder.hpp" | 
| 18 |  | 
| 19 | namespace duckdb { | 
| 20 |  | 
| 21 | //! Create a JoinCondition from a comparison | 
| 22 | static bool CreateJoinCondition(Expression &expr, const unordered_set<idx_t> &left_bindings, | 
| 23 |                                 const unordered_set<idx_t> &right_bindings, vector<JoinCondition> &conditions) { | 
| 24 | 	// comparison | 
| 25 | 	auto &comparison = expr.Cast<BoundComparisonExpression>(); | 
| 26 | 	auto left_side = JoinSide::GetJoinSide(expression&: *comparison.left, left_bindings, right_bindings); | 
| 27 | 	auto right_side = JoinSide::GetJoinSide(expression&: *comparison.right, left_bindings, right_bindings); | 
| 28 | 	if (left_side != JoinSide::BOTH && right_side != JoinSide::BOTH) { | 
| 29 | 		// join condition can be divided in a left/right side | 
| 30 | 		JoinCondition condition; | 
| 31 | 		condition.comparison = expr.type; | 
| 32 | 		auto left = std::move(comparison.left); | 
| 33 | 		auto right = std::move(comparison.right); | 
| 34 | 		if (left_side == JoinSide::RIGHT) { | 
| 35 | 			// left = right, right = left, flip the comparison symbol and reverse sides | 
| 36 | 			swap(a&: left, b&: right); | 
| 37 | 			condition.comparison = FlipComparisonExpression(type: expr.type); | 
| 38 | 		} | 
| 39 | 		condition.left = std::move(left); | 
| 40 | 		condition.right = std::move(right); | 
| 41 | 		conditions.push_back(x: std::move(condition)); | 
| 42 | 		return true; | 
| 43 | 	} | 
| 44 | 	return false; | 
| 45 | } | 
| 46 |  | 
| 47 | void LogicalComparisonJoin::(JoinType type, unique_ptr<LogicalOperator> &left_child, | 
| 48 |                                                   unique_ptr<LogicalOperator> &right_child, | 
| 49 |                                                   const unordered_set<idx_t> &left_bindings, | 
| 50 |                                                   const unordered_set<idx_t> &right_bindings, | 
| 51 |                                                   vector<unique_ptr<Expression>> &expressions, | 
| 52 |                                                   vector<JoinCondition> &conditions, | 
| 53 |                                                   vector<unique_ptr<Expression>> &arbitrary_expressions) { | 
| 54 | 	for (auto &expr : expressions) { | 
| 55 | 		auto total_side = JoinSide::GetJoinSide(expression&: *expr, left_bindings, right_bindings); | 
| 56 | 		if (total_side != JoinSide::BOTH) { | 
| 57 | 			// join condition does not reference both sides, add it as filter under the join | 
| 58 | 			if (type == JoinType::LEFT && total_side == JoinSide::RIGHT) { | 
| 59 | 				// filter is on RHS and the join is a LEFT OUTER join, we can push it in the right child | 
| 60 | 				if (right_child->type != LogicalOperatorType::LOGICAL_FILTER) { | 
| 61 | 					// not a filter yet, push a new empty filter | 
| 62 | 					auto filter = make_uniq<LogicalFilter>(); | 
| 63 | 					filter->AddChild(child: std::move(right_child)); | 
| 64 | 					right_child = std::move(filter); | 
| 65 | 				} | 
| 66 | 				// push the expression into the filter | 
| 67 | 				auto &filter = right_child->Cast<LogicalFilter>(); | 
| 68 | 				filter.expressions.push_back(x: std::move(expr)); | 
| 69 | 				continue; | 
| 70 | 			} | 
| 71 | 		} else if ((expr->type >= ExpressionType::COMPARE_EQUAL && | 
| 72 | 		            expr->type <= ExpressionType::COMPARE_GREATERTHANOREQUALTO) || | 
| 73 | 		           expr->type == ExpressionType::COMPARE_DISTINCT_FROM || | 
| 74 | 		           expr->type == ExpressionType::COMPARE_NOT_DISTINCT_FROM) { | 
| 75 | 			// comparison, check if we can create a comparison JoinCondition | 
| 76 | 			if (CreateJoinCondition(expr&: *expr, left_bindings, right_bindings, conditions)) { | 
| 77 | 				// successfully created the join condition | 
| 78 | 				continue; | 
| 79 | 			} | 
| 80 | 		} | 
| 81 | 		arbitrary_expressions.push_back(x: std::move(expr)); | 
| 82 | 	} | 
| 83 | } | 
| 84 |  | 
| 85 | void LogicalComparisonJoin::(JoinType type, unique_ptr<LogicalOperator> &left_child, | 
| 86 |                                                   unique_ptr<LogicalOperator> &right_child, | 
| 87 |                                                   vector<unique_ptr<Expression>> &expressions, | 
| 88 |                                                   vector<JoinCondition> &conditions, | 
| 89 |                                                   vector<unique_ptr<Expression>> &arbitrary_expressions) { | 
| 90 | 	unordered_set<idx_t> left_bindings, right_bindings; | 
| 91 | 	LogicalJoin::GetTableReferences(op&: *left_child, bindings&: left_bindings); | 
| 92 | 	LogicalJoin::GetTableReferences(op&: *right_child, bindings&: right_bindings); | 
| 93 | 	return ExtractJoinConditions(type, left_child, right_child, left_bindings, right_bindings, expressions, conditions, | 
| 94 | 	                             arbitrary_expressions); | 
| 95 | } | 
| 96 |  | 
| 97 | void LogicalComparisonJoin::(JoinType type, unique_ptr<LogicalOperator> &left_child, | 
| 98 |                                                   unique_ptr<LogicalOperator> &right_child, | 
| 99 |                                                   unique_ptr<Expression> condition, vector<JoinCondition> &conditions, | 
| 100 |                                                   vector<unique_ptr<Expression>> &arbitrary_expressions) { | 
| 101 | 	// split the expressions by the AND clause | 
| 102 | 	vector<unique_ptr<Expression>> expressions; | 
| 103 | 	expressions.push_back(x: std::move(condition)); | 
| 104 | 	LogicalFilter::SplitPredicates(expressions); | 
| 105 | 	return ExtractJoinConditions(type, left_child, right_child, expressions, conditions, arbitrary_expressions); | 
| 106 | } | 
| 107 |  | 
| 108 | unique_ptr<LogicalOperator> LogicalComparisonJoin::CreateJoin(JoinType type, JoinRefType reftype, | 
| 109 |                                                               unique_ptr<LogicalOperator> left_child, | 
| 110 |                                                               unique_ptr<LogicalOperator> right_child, | 
| 111 |                                                               vector<JoinCondition> conditions, | 
| 112 |                                                               vector<unique_ptr<Expression>> arbitrary_expressions) { | 
| 113 | 	// Validate the conditions | 
| 114 | 	bool need_to_consider_arbitrary_expressions = true; | 
| 115 | 	switch (reftype) { | 
| 116 | 	case JoinRefType::ASOF: { | 
| 117 | 		need_to_consider_arbitrary_expressions = false; | 
| 118 | 		auto asof_idx = conditions.size(); | 
| 119 | 		for (size_t c = 0; c < conditions.size(); ++c) { | 
| 120 | 			auto &cond = conditions[c]; | 
| 121 | 			switch (cond.comparison) { | 
| 122 | 			case ExpressionType::COMPARE_EQUAL: | 
| 123 | 			case ExpressionType::COMPARE_NOT_DISTINCT_FROM: | 
| 124 | 				break; | 
| 125 | 			case ExpressionType::COMPARE_GREATERTHANOREQUALTO: | 
| 126 | 				if (asof_idx < conditions.size()) { | 
| 127 | 					throw BinderException("Multiple ASOF JOIN inequalities" ); | 
| 128 | 				} | 
| 129 | 				asof_idx = c; | 
| 130 | 				break; | 
| 131 | 			default: | 
| 132 | 				throw BinderException("Invalid ASOF JOIN comparison" ); | 
| 133 | 			} | 
| 134 | 		} | 
| 135 | 		if (asof_idx == conditions.size()) { | 
| 136 | 			throw BinderException("Missing ASOF JOIN inequality" ); | 
| 137 | 		} | 
| 138 | 		break; | 
| 139 | 	} | 
| 140 | 	default: | 
| 141 | 		break; | 
| 142 | 	} | 
| 143 |  | 
| 144 | 	if (type == JoinType::INNER && reftype == JoinRefType::REGULAR) { | 
| 145 | 		// for inner joins we can push arbitrary expressions as a filter | 
| 146 | 		// here we prefer to create a comparison join if possible | 
| 147 | 		// that way we can use the much faster hash join to process the main join | 
| 148 | 		// rather than doing a nested loop join to handle arbitrary expressions | 
| 149 |  | 
| 150 | 		// for left and full outer joins we HAVE to process all join conditions | 
| 151 | 		// because pushing a filter will lead to an incorrect result, as non-matching tuples cannot be filtered out | 
| 152 | 		need_to_consider_arbitrary_expressions = false; | 
| 153 | 	} | 
| 154 | 	if ((need_to_consider_arbitrary_expressions && !arbitrary_expressions.empty()) || conditions.empty()) { | 
| 155 | 		if (arbitrary_expressions.empty()) { | 
| 156 | 			// all conditions were pushed down, add TRUE predicate | 
| 157 | 			arbitrary_expressions.push_back(x: make_uniq<BoundConstantExpression>(args: Value::BOOLEAN(value: true))); | 
| 158 | 		} | 
| 159 | 		for (auto &condition : conditions) { | 
| 160 | 			arbitrary_expressions.push_back(x: JoinCondition::CreateExpression(cond: std::move(condition))); | 
| 161 | 		} | 
| 162 | 		// if we get here we could not create any JoinConditions | 
| 163 | 		// turn this into an arbitrary expression join | 
| 164 | 		auto any_join = make_uniq<LogicalAnyJoin>(args&: type); | 
| 165 | 		// create the condition | 
| 166 | 		any_join->children.push_back(x: std::move(left_child)); | 
| 167 | 		any_join->children.push_back(x: std::move(right_child)); | 
| 168 | 		// AND all the arbitrary expressions together | 
| 169 | 		// do the same with any remaining conditions | 
| 170 | 		any_join->condition = std::move(arbitrary_expressions[0]); | 
| 171 | 		for (idx_t i = 1; i < arbitrary_expressions.size(); i++) { | 
| 172 | 			any_join->condition = make_uniq<BoundConjunctionExpression>( | 
| 173 | 			    args: ExpressionType::CONJUNCTION_AND, args: std::move(any_join->condition), args: std::move(arbitrary_expressions[i])); | 
| 174 | 		} | 
| 175 | 		return std::move(any_join); | 
| 176 | 	} else { | 
| 177 | 		// we successfully converted expressions into JoinConditions | 
| 178 | 		// create a LogicalComparisonJoin | 
| 179 | 		unique_ptr<LogicalComparisonJoin> comp_join; | 
| 180 | 		if (reftype == JoinRefType::ASOF) { | 
| 181 | 			comp_join = make_uniq<LogicalAsOfJoin>(args&: type); | 
| 182 | 		} else { | 
| 183 | 			comp_join = make_uniq<LogicalComparisonJoin>(args&: type); | 
| 184 | 		} | 
| 185 | 		comp_join->conditions = std::move(conditions); | 
| 186 | 		comp_join->children.push_back(x: std::move(left_child)); | 
| 187 | 		comp_join->children.push_back(x: std::move(right_child)); | 
| 188 | 		if (!arbitrary_expressions.empty()) { | 
| 189 | 			// we have some arbitrary expressions as well | 
| 190 | 			// add them to a filter | 
| 191 | 			auto filter = make_uniq<LogicalFilter>(); | 
| 192 | 			for (auto &expr : arbitrary_expressions) { | 
| 193 | 				filter->expressions.push_back(x: std::move(expr)); | 
| 194 | 			} | 
| 195 | 			LogicalFilter::SplitPredicates(expressions&: filter->expressions); | 
| 196 | 			filter->children.push_back(x: std::move(comp_join)); | 
| 197 | 			return std::move(filter); | 
| 198 | 		} | 
| 199 | 		return std::move(comp_join); | 
| 200 | 	} | 
| 201 | } | 
| 202 |  | 
| 203 | static bool HasCorrelatedColumns(Expression &expression) { | 
| 204 | 	if (expression.type == ExpressionType::BOUND_COLUMN_REF) { | 
| 205 | 		auto &colref = expression.Cast<BoundColumnRefExpression>(); | 
| 206 | 		if (colref.depth > 0) { | 
| 207 | 			return true; | 
| 208 | 		} | 
| 209 | 	} | 
| 210 | 	bool has_correlated_columns = false; | 
| 211 | 	ExpressionIterator::EnumerateChildren(expression, callback: [&](Expression &child) { | 
| 212 | 		if (HasCorrelatedColumns(expression&: child)) { | 
| 213 | 			has_correlated_columns = true; | 
| 214 | 		} | 
| 215 | 	}); | 
| 216 | 	return has_correlated_columns; | 
| 217 | } | 
| 218 |  | 
| 219 | unique_ptr<LogicalOperator> LogicalComparisonJoin::CreateJoin(JoinType type, JoinRefType reftype, | 
| 220 |                                                               unique_ptr<LogicalOperator> left_child, | 
| 221 |                                                               unique_ptr<LogicalOperator> right_child, | 
| 222 |                                                               unique_ptr<Expression> condition) { | 
| 223 | 	vector<JoinCondition> conditions; | 
| 224 | 	vector<unique_ptr<Expression>> arbitrary_expressions; | 
| 225 | 	LogicalComparisonJoin::ExtractJoinConditions(type, left_child, right_child, condition: std::move(condition), conditions, | 
| 226 | 	                                             arbitrary_expressions); | 
| 227 | 	return LogicalComparisonJoin::CreateJoin(type, reftype, left_child: std::move(left_child), right_child: std::move(right_child), | 
| 228 | 	                                         conditions: std::move(conditions), arbitrary_expressions: std::move(arbitrary_expressions)); | 
| 229 | } | 
| 230 |  | 
| 231 | unique_ptr<LogicalOperator> Binder::CreatePlan(BoundJoinRef &ref) { | 
| 232 | 	auto left = CreatePlan(ref&: *ref.left); | 
| 233 | 	auto right = CreatePlan(ref&: *ref.right); | 
| 234 | 	if (!ref.lateral && !ref.correlated_columns.empty()) { | 
| 235 | 		// non-lateral join with correlated columns | 
| 236 | 		// this happens if there is a join (or cross product) in a correlated subquery | 
| 237 | 		// due to the lateral binder the expression depth of all correlated columns in the "ref.correlated_columns" set | 
| 238 | 		// is 1 too high | 
| 239 | 		// we reduce expression depth of all columns in the "ref.correlated_columns" set by 1 | 
| 240 | 		LateralBinder::ReduceExpressionDepth(op&: *right, info: ref.correlated_columns); | 
| 241 | 	} | 
| 242 | 	if (ref.type == JoinType::RIGHT && ref.ref_type != JoinRefType::ASOF && | 
| 243 | 	    ClientConfig::GetConfig(context).enable_optimizer) { | 
| 244 | 		// we turn any right outer joins into left outer joins for optimization purposes | 
| 245 | 		// they are the same but with sides flipped, so treating them the same simplifies life | 
| 246 | 		ref.type = JoinType::LEFT; | 
| 247 | 		std::swap(a&: left, b&: right); | 
| 248 | 	} | 
| 249 | 	if (ref.lateral) { | 
| 250 | 		// lateral join | 
| 251 | 		return PlanLateralJoin(left: std::move(left), right: std::move(right), correlated_columns&: ref.correlated_columns, join_type: ref.type, | 
| 252 | 		                       condition: std::move(ref.condition)); | 
| 253 | 	} | 
| 254 | 	switch (ref.ref_type) { | 
| 255 | 	case JoinRefType::CROSS: | 
| 256 | 		return LogicalCrossProduct::Create(left: std::move(left), right: std::move(right)); | 
| 257 | 	case JoinRefType::POSITIONAL: | 
| 258 | 		return LogicalPositionalJoin::Create(left: std::move(left), right: std::move(right)); | 
| 259 | 	default: | 
| 260 | 		break; | 
| 261 | 	} | 
| 262 | 	if (ref.type == JoinType::INNER && (ref.condition->HasSubquery() || HasCorrelatedColumns(expression&: *ref.condition)) && | 
| 263 | 	    ref.ref_type == JoinRefType::REGULAR) { | 
| 264 | 		// inner join, generate a cross product + filter | 
| 265 | 		// this will be later turned into a proper join by the join order optimizer | 
| 266 | 		auto root = LogicalCrossProduct::Create(left: std::move(left), right: std::move(right)); | 
| 267 |  | 
| 268 | 		auto filter = make_uniq<LogicalFilter>(args: std::move(ref.condition)); | 
| 269 | 		// visit the expressions in the filter | 
| 270 | 		for (auto &expression : filter->expressions) { | 
| 271 | 			PlanSubqueries(expr&: expression, root); | 
| 272 | 		} | 
| 273 | 		filter->AddChild(child: std::move(root)); | 
| 274 | 		return std::move(filter); | 
| 275 | 	} | 
| 276 |  | 
| 277 | 	// now create the join operator from the join condition | 
| 278 | 	auto result = LogicalComparisonJoin::CreateJoin(type: ref.type, reftype: ref.ref_type, left_child: std::move(left), right_child: std::move(right), | 
| 279 | 	                                                condition: std::move(ref.condition)); | 
| 280 |  | 
| 281 | 	optional_ptr<LogicalOperator> join; | 
| 282 | 	if (result->type == LogicalOperatorType::LOGICAL_FILTER) { | 
| 283 | 		join = result->children[0].get(); | 
| 284 | 	} else { | 
| 285 | 		join = result.get(); | 
| 286 | 	} | 
| 287 | 	for (auto &child : join->children) { | 
| 288 | 		if (child->type == LogicalOperatorType::LOGICAL_FILTER) { | 
| 289 | 			auto &filter = child->Cast<LogicalFilter>(); | 
| 290 | 			for (auto &expr : filter.expressions) { | 
| 291 | 				PlanSubqueries(expr, root&: filter.children[0]); | 
| 292 | 			} | 
| 293 | 		} | 
| 294 | 	} | 
| 295 |  | 
| 296 | 	// we visit the expressions depending on the type of join | 
| 297 | 	switch (join->type) { | 
| 298 | 	case LogicalOperatorType::LOGICAL_ASOF_JOIN: | 
| 299 | 	case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: { | 
| 300 | 		// comparison join | 
| 301 | 		// in this join we visit the expressions on the LHS with the LHS as root node | 
| 302 | 		// and the expressions on the RHS with the RHS as root node | 
| 303 | 		auto &comp_join = join->Cast<LogicalComparisonJoin>(); | 
| 304 | 		for (idx_t i = 0; i < comp_join.conditions.size(); i++) { | 
| 305 | 			PlanSubqueries(expr&: comp_join.conditions[i].left, root&: comp_join.children[0]); | 
| 306 | 			PlanSubqueries(expr&: comp_join.conditions[i].right, root&: comp_join.children[1]); | 
| 307 | 		} | 
| 308 | 		break; | 
| 309 | 	} | 
| 310 | 	case LogicalOperatorType::LOGICAL_ANY_JOIN: { | 
| 311 | 		auto &any_join = join->Cast<LogicalAnyJoin>(); | 
| 312 | 		// for the any join we just visit the condition | 
| 313 | 		if (any_join.condition->HasSubquery()) { | 
| 314 | 			throw NotImplementedException("Cannot perform non-inner join on subquery!" ); | 
| 315 | 		} | 
| 316 | 		break; | 
| 317 | 	} | 
| 318 | 	default: | 
| 319 | 		break; | 
| 320 | 	} | 
| 321 | 	return result; | 
| 322 | } | 
| 323 |  | 
| 324 | } // namespace duckdb | 
| 325 |  |