| 1 | #include "duckdb/planner/expression/bound_columnref_expression.hpp" |
| 2 | #include "duckdb/planner/expression/bound_comparison_expression.hpp" |
| 3 | #include "duckdb/planner/expression/bound_conjunction_expression.hpp" |
| 4 | #include "duckdb/planner/expression/bound_constant_expression.hpp" |
| 5 | #include "duckdb/planner/expression/bound_operator_expression.hpp" |
| 6 | #include "duckdb/planner/expression/bound_subquery_expression.hpp" |
| 7 | #include "duckdb/planner/expression_iterator.hpp" |
| 8 | #include "duckdb/planner/binder.hpp" |
| 9 | #include "duckdb/planner/operator/logical_any_join.hpp" |
| 10 | #include "duckdb/planner/operator/logical_asof_join.hpp" |
| 11 | #include "duckdb/planner/operator/logical_comparison_join.hpp" |
| 12 | #include "duckdb/planner/operator/logical_cross_product.hpp" |
| 13 | #include "duckdb/planner/operator/logical_filter.hpp" |
| 14 | #include "duckdb/planner/operator/logical_positional_join.hpp" |
| 15 | #include "duckdb/planner/tableref/bound_joinref.hpp" |
| 16 | #include "duckdb/main/client_context.hpp" |
| 17 | #include "duckdb/planner/expression_binder/lateral_binder.hpp" |
| 18 | |
| 19 | namespace duckdb { |
| 20 | |
| 21 | //! Create a JoinCondition from a comparison |
| 22 | static bool CreateJoinCondition(Expression &expr, const unordered_set<idx_t> &left_bindings, |
| 23 | const unordered_set<idx_t> &right_bindings, vector<JoinCondition> &conditions) { |
| 24 | // comparison |
| 25 | auto &comparison = expr.Cast<BoundComparisonExpression>(); |
| 26 | auto left_side = JoinSide::GetJoinSide(expression&: *comparison.left, left_bindings, right_bindings); |
| 27 | auto right_side = JoinSide::GetJoinSide(expression&: *comparison.right, left_bindings, right_bindings); |
| 28 | if (left_side != JoinSide::BOTH && right_side != JoinSide::BOTH) { |
| 29 | // join condition can be divided in a left/right side |
| 30 | JoinCondition condition; |
| 31 | condition.comparison = expr.type; |
| 32 | auto left = std::move(comparison.left); |
| 33 | auto right = std::move(comparison.right); |
| 34 | if (left_side == JoinSide::RIGHT) { |
| 35 | // left = right, right = left, flip the comparison symbol and reverse sides |
| 36 | swap(a&: left, b&: right); |
| 37 | condition.comparison = FlipComparisonExpression(type: expr.type); |
| 38 | } |
| 39 | condition.left = std::move(left); |
| 40 | condition.right = std::move(right); |
| 41 | conditions.push_back(x: std::move(condition)); |
| 42 | return true; |
| 43 | } |
| 44 | return false; |
| 45 | } |
| 46 | |
| 47 | void LogicalComparisonJoin::(JoinType type, unique_ptr<LogicalOperator> &left_child, |
| 48 | unique_ptr<LogicalOperator> &right_child, |
| 49 | const unordered_set<idx_t> &left_bindings, |
| 50 | const unordered_set<idx_t> &right_bindings, |
| 51 | vector<unique_ptr<Expression>> &expressions, |
| 52 | vector<JoinCondition> &conditions, |
| 53 | vector<unique_ptr<Expression>> &arbitrary_expressions) { |
| 54 | for (auto &expr : expressions) { |
| 55 | auto total_side = JoinSide::GetJoinSide(expression&: *expr, left_bindings, right_bindings); |
| 56 | if (total_side != JoinSide::BOTH) { |
| 57 | // join condition does not reference both sides, add it as filter under the join |
| 58 | if (type == JoinType::LEFT && total_side == JoinSide::RIGHT) { |
| 59 | // filter is on RHS and the join is a LEFT OUTER join, we can push it in the right child |
| 60 | if (right_child->type != LogicalOperatorType::LOGICAL_FILTER) { |
| 61 | // not a filter yet, push a new empty filter |
| 62 | auto filter = make_uniq<LogicalFilter>(); |
| 63 | filter->AddChild(child: std::move(right_child)); |
| 64 | right_child = std::move(filter); |
| 65 | } |
| 66 | // push the expression into the filter |
| 67 | auto &filter = right_child->Cast<LogicalFilter>(); |
| 68 | filter.expressions.push_back(x: std::move(expr)); |
| 69 | continue; |
| 70 | } |
| 71 | } else if ((expr->type >= ExpressionType::COMPARE_EQUAL && |
| 72 | expr->type <= ExpressionType::COMPARE_GREATERTHANOREQUALTO) || |
| 73 | expr->type == ExpressionType::COMPARE_DISTINCT_FROM || |
| 74 | expr->type == ExpressionType::COMPARE_NOT_DISTINCT_FROM) { |
| 75 | // comparison, check if we can create a comparison JoinCondition |
| 76 | if (CreateJoinCondition(expr&: *expr, left_bindings, right_bindings, conditions)) { |
| 77 | // successfully created the join condition |
| 78 | continue; |
| 79 | } |
| 80 | } |
| 81 | arbitrary_expressions.push_back(x: std::move(expr)); |
| 82 | } |
| 83 | } |
| 84 | |
| 85 | void LogicalComparisonJoin::(JoinType type, unique_ptr<LogicalOperator> &left_child, |
| 86 | unique_ptr<LogicalOperator> &right_child, |
| 87 | vector<unique_ptr<Expression>> &expressions, |
| 88 | vector<JoinCondition> &conditions, |
| 89 | vector<unique_ptr<Expression>> &arbitrary_expressions) { |
| 90 | unordered_set<idx_t> left_bindings, right_bindings; |
| 91 | LogicalJoin::GetTableReferences(op&: *left_child, bindings&: left_bindings); |
| 92 | LogicalJoin::GetTableReferences(op&: *right_child, bindings&: right_bindings); |
| 93 | return ExtractJoinConditions(type, left_child, right_child, left_bindings, right_bindings, expressions, conditions, |
| 94 | arbitrary_expressions); |
| 95 | } |
| 96 | |
| 97 | void LogicalComparisonJoin::(JoinType type, unique_ptr<LogicalOperator> &left_child, |
| 98 | unique_ptr<LogicalOperator> &right_child, |
| 99 | unique_ptr<Expression> condition, vector<JoinCondition> &conditions, |
| 100 | vector<unique_ptr<Expression>> &arbitrary_expressions) { |
| 101 | // split the expressions by the AND clause |
| 102 | vector<unique_ptr<Expression>> expressions; |
| 103 | expressions.push_back(x: std::move(condition)); |
| 104 | LogicalFilter::SplitPredicates(expressions); |
| 105 | return ExtractJoinConditions(type, left_child, right_child, expressions, conditions, arbitrary_expressions); |
| 106 | } |
| 107 | |
| 108 | unique_ptr<LogicalOperator> LogicalComparisonJoin::CreateJoin(JoinType type, JoinRefType reftype, |
| 109 | unique_ptr<LogicalOperator> left_child, |
| 110 | unique_ptr<LogicalOperator> right_child, |
| 111 | vector<JoinCondition> conditions, |
| 112 | vector<unique_ptr<Expression>> arbitrary_expressions) { |
| 113 | // Validate the conditions |
| 114 | bool need_to_consider_arbitrary_expressions = true; |
| 115 | switch (reftype) { |
| 116 | case JoinRefType::ASOF: { |
| 117 | need_to_consider_arbitrary_expressions = false; |
| 118 | auto asof_idx = conditions.size(); |
| 119 | for (size_t c = 0; c < conditions.size(); ++c) { |
| 120 | auto &cond = conditions[c]; |
| 121 | switch (cond.comparison) { |
| 122 | case ExpressionType::COMPARE_EQUAL: |
| 123 | case ExpressionType::COMPARE_NOT_DISTINCT_FROM: |
| 124 | break; |
| 125 | case ExpressionType::COMPARE_GREATERTHANOREQUALTO: |
| 126 | if (asof_idx < conditions.size()) { |
| 127 | throw BinderException("Multiple ASOF JOIN inequalities" ); |
| 128 | } |
| 129 | asof_idx = c; |
| 130 | break; |
| 131 | default: |
| 132 | throw BinderException("Invalid ASOF JOIN comparison" ); |
| 133 | } |
| 134 | } |
| 135 | if (asof_idx == conditions.size()) { |
| 136 | throw BinderException("Missing ASOF JOIN inequality" ); |
| 137 | } |
| 138 | break; |
| 139 | } |
| 140 | default: |
| 141 | break; |
| 142 | } |
| 143 | |
| 144 | if (type == JoinType::INNER && reftype == JoinRefType::REGULAR) { |
| 145 | // for inner joins we can push arbitrary expressions as a filter |
| 146 | // here we prefer to create a comparison join if possible |
| 147 | // that way we can use the much faster hash join to process the main join |
| 148 | // rather than doing a nested loop join to handle arbitrary expressions |
| 149 | |
| 150 | // for left and full outer joins we HAVE to process all join conditions |
| 151 | // because pushing a filter will lead to an incorrect result, as non-matching tuples cannot be filtered out |
| 152 | need_to_consider_arbitrary_expressions = false; |
| 153 | } |
| 154 | if ((need_to_consider_arbitrary_expressions && !arbitrary_expressions.empty()) || conditions.empty()) { |
| 155 | if (arbitrary_expressions.empty()) { |
| 156 | // all conditions were pushed down, add TRUE predicate |
| 157 | arbitrary_expressions.push_back(x: make_uniq<BoundConstantExpression>(args: Value::BOOLEAN(value: true))); |
| 158 | } |
| 159 | for (auto &condition : conditions) { |
| 160 | arbitrary_expressions.push_back(x: JoinCondition::CreateExpression(cond: std::move(condition))); |
| 161 | } |
| 162 | // if we get here we could not create any JoinConditions |
| 163 | // turn this into an arbitrary expression join |
| 164 | auto any_join = make_uniq<LogicalAnyJoin>(args&: type); |
| 165 | // create the condition |
| 166 | any_join->children.push_back(x: std::move(left_child)); |
| 167 | any_join->children.push_back(x: std::move(right_child)); |
| 168 | // AND all the arbitrary expressions together |
| 169 | // do the same with any remaining conditions |
| 170 | any_join->condition = std::move(arbitrary_expressions[0]); |
| 171 | for (idx_t i = 1; i < arbitrary_expressions.size(); i++) { |
| 172 | any_join->condition = make_uniq<BoundConjunctionExpression>( |
| 173 | args: ExpressionType::CONJUNCTION_AND, args: std::move(any_join->condition), args: std::move(arbitrary_expressions[i])); |
| 174 | } |
| 175 | return std::move(any_join); |
| 176 | } else { |
| 177 | // we successfully converted expressions into JoinConditions |
| 178 | // create a LogicalComparisonJoin |
| 179 | unique_ptr<LogicalComparisonJoin> comp_join; |
| 180 | if (reftype == JoinRefType::ASOF) { |
| 181 | comp_join = make_uniq<LogicalAsOfJoin>(args&: type); |
| 182 | } else { |
| 183 | comp_join = make_uniq<LogicalComparisonJoin>(args&: type); |
| 184 | } |
| 185 | comp_join->conditions = std::move(conditions); |
| 186 | comp_join->children.push_back(x: std::move(left_child)); |
| 187 | comp_join->children.push_back(x: std::move(right_child)); |
| 188 | if (!arbitrary_expressions.empty()) { |
| 189 | // we have some arbitrary expressions as well |
| 190 | // add them to a filter |
| 191 | auto filter = make_uniq<LogicalFilter>(); |
| 192 | for (auto &expr : arbitrary_expressions) { |
| 193 | filter->expressions.push_back(x: std::move(expr)); |
| 194 | } |
| 195 | LogicalFilter::SplitPredicates(expressions&: filter->expressions); |
| 196 | filter->children.push_back(x: std::move(comp_join)); |
| 197 | return std::move(filter); |
| 198 | } |
| 199 | return std::move(comp_join); |
| 200 | } |
| 201 | } |
| 202 | |
| 203 | static bool HasCorrelatedColumns(Expression &expression) { |
| 204 | if (expression.type == ExpressionType::BOUND_COLUMN_REF) { |
| 205 | auto &colref = expression.Cast<BoundColumnRefExpression>(); |
| 206 | if (colref.depth > 0) { |
| 207 | return true; |
| 208 | } |
| 209 | } |
| 210 | bool has_correlated_columns = false; |
| 211 | ExpressionIterator::EnumerateChildren(expression, callback: [&](Expression &child) { |
| 212 | if (HasCorrelatedColumns(expression&: child)) { |
| 213 | has_correlated_columns = true; |
| 214 | } |
| 215 | }); |
| 216 | return has_correlated_columns; |
| 217 | } |
| 218 | |
| 219 | unique_ptr<LogicalOperator> LogicalComparisonJoin::CreateJoin(JoinType type, JoinRefType reftype, |
| 220 | unique_ptr<LogicalOperator> left_child, |
| 221 | unique_ptr<LogicalOperator> right_child, |
| 222 | unique_ptr<Expression> condition) { |
| 223 | vector<JoinCondition> conditions; |
| 224 | vector<unique_ptr<Expression>> arbitrary_expressions; |
| 225 | LogicalComparisonJoin::ExtractJoinConditions(type, left_child, right_child, condition: std::move(condition), conditions, |
| 226 | arbitrary_expressions); |
| 227 | return LogicalComparisonJoin::CreateJoin(type, reftype, left_child: std::move(left_child), right_child: std::move(right_child), |
| 228 | conditions: std::move(conditions), arbitrary_expressions: std::move(arbitrary_expressions)); |
| 229 | } |
| 230 | |
| 231 | unique_ptr<LogicalOperator> Binder::CreatePlan(BoundJoinRef &ref) { |
| 232 | auto left = CreatePlan(ref&: *ref.left); |
| 233 | auto right = CreatePlan(ref&: *ref.right); |
| 234 | if (!ref.lateral && !ref.correlated_columns.empty()) { |
| 235 | // non-lateral join with correlated columns |
| 236 | // this happens if there is a join (or cross product) in a correlated subquery |
| 237 | // due to the lateral binder the expression depth of all correlated columns in the "ref.correlated_columns" set |
| 238 | // is 1 too high |
| 239 | // we reduce expression depth of all columns in the "ref.correlated_columns" set by 1 |
| 240 | LateralBinder::ReduceExpressionDepth(op&: *right, info: ref.correlated_columns); |
| 241 | } |
| 242 | if (ref.type == JoinType::RIGHT && ref.ref_type != JoinRefType::ASOF && |
| 243 | ClientConfig::GetConfig(context).enable_optimizer) { |
| 244 | // we turn any right outer joins into left outer joins for optimization purposes |
| 245 | // they are the same but with sides flipped, so treating them the same simplifies life |
| 246 | ref.type = JoinType::LEFT; |
| 247 | std::swap(a&: left, b&: right); |
| 248 | } |
| 249 | if (ref.lateral) { |
| 250 | // lateral join |
| 251 | return PlanLateralJoin(left: std::move(left), right: std::move(right), correlated_columns&: ref.correlated_columns, join_type: ref.type, |
| 252 | condition: std::move(ref.condition)); |
| 253 | } |
| 254 | switch (ref.ref_type) { |
| 255 | case JoinRefType::CROSS: |
| 256 | return LogicalCrossProduct::Create(left: std::move(left), right: std::move(right)); |
| 257 | case JoinRefType::POSITIONAL: |
| 258 | return LogicalPositionalJoin::Create(left: std::move(left), right: std::move(right)); |
| 259 | default: |
| 260 | break; |
| 261 | } |
| 262 | if (ref.type == JoinType::INNER && (ref.condition->HasSubquery() || HasCorrelatedColumns(expression&: *ref.condition)) && |
| 263 | ref.ref_type == JoinRefType::REGULAR) { |
| 264 | // inner join, generate a cross product + filter |
| 265 | // this will be later turned into a proper join by the join order optimizer |
| 266 | auto root = LogicalCrossProduct::Create(left: std::move(left), right: std::move(right)); |
| 267 | |
| 268 | auto filter = make_uniq<LogicalFilter>(args: std::move(ref.condition)); |
| 269 | // visit the expressions in the filter |
| 270 | for (auto &expression : filter->expressions) { |
| 271 | PlanSubqueries(expr&: expression, root); |
| 272 | } |
| 273 | filter->AddChild(child: std::move(root)); |
| 274 | return std::move(filter); |
| 275 | } |
| 276 | |
| 277 | // now create the join operator from the join condition |
| 278 | auto result = LogicalComparisonJoin::CreateJoin(type: ref.type, reftype: ref.ref_type, left_child: std::move(left), right_child: std::move(right), |
| 279 | condition: std::move(ref.condition)); |
| 280 | |
| 281 | optional_ptr<LogicalOperator> join; |
| 282 | if (result->type == LogicalOperatorType::LOGICAL_FILTER) { |
| 283 | join = result->children[0].get(); |
| 284 | } else { |
| 285 | join = result.get(); |
| 286 | } |
| 287 | for (auto &child : join->children) { |
| 288 | if (child->type == LogicalOperatorType::LOGICAL_FILTER) { |
| 289 | auto &filter = child->Cast<LogicalFilter>(); |
| 290 | for (auto &expr : filter.expressions) { |
| 291 | PlanSubqueries(expr, root&: filter.children[0]); |
| 292 | } |
| 293 | } |
| 294 | } |
| 295 | |
| 296 | // we visit the expressions depending on the type of join |
| 297 | switch (join->type) { |
| 298 | case LogicalOperatorType::LOGICAL_ASOF_JOIN: |
| 299 | case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: { |
| 300 | // comparison join |
| 301 | // in this join we visit the expressions on the LHS with the LHS as root node |
| 302 | // and the expressions on the RHS with the RHS as root node |
| 303 | auto &comp_join = join->Cast<LogicalComparisonJoin>(); |
| 304 | for (idx_t i = 0; i < comp_join.conditions.size(); i++) { |
| 305 | PlanSubqueries(expr&: comp_join.conditions[i].left, root&: comp_join.children[0]); |
| 306 | PlanSubqueries(expr&: comp_join.conditions[i].right, root&: comp_join.children[1]); |
| 307 | } |
| 308 | break; |
| 309 | } |
| 310 | case LogicalOperatorType::LOGICAL_ANY_JOIN: { |
| 311 | auto &any_join = join->Cast<LogicalAnyJoin>(); |
| 312 | // for the any join we just visit the condition |
| 313 | if (any_join.condition->HasSubquery()) { |
| 314 | throw NotImplementedException("Cannot perform non-inner join on subquery!" ); |
| 315 | } |
| 316 | break; |
| 317 | } |
| 318 | default: |
| 319 | break; |
| 320 | } |
| 321 | return result; |
| 322 | } |
| 323 | |
| 324 | } // namespace duckdb |
| 325 | |