1#include "duckdb/execution/aggregate_hashtable.hpp"
2#include "duckdb/execution/operator/join/physical_delim_join.hpp"
3#include "duckdb/execution/operator/join/physical_hash_join.hpp"
4#include "duckdb/execution/operator/projection/physical_projection.hpp"
5#include "duckdb/execution/physical_plan_generator.hpp"
6#include "duckdb/planner/operator/logical_delim_join.hpp"
7#include "duckdb/planner/expression/bound_aggregate_expression.hpp"
8#include "duckdb/planner/expression/bound_reference_expression.hpp"
9#include "duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp"
10
11namespace duckdb {
12
13static void GatherDelimScans(const PhysicalOperator &op, vector<const_reference<PhysicalOperator>> &delim_scans) {
14 if (op.type == PhysicalOperatorType::DELIM_SCAN) {
15 delim_scans.push_back(x: op);
16 }
17 for (auto &child : op.children) {
18 GatherDelimScans(op: *child, delim_scans);
19 }
20}
21
22unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalDelimJoin &op) {
23 // first create the underlying join
24 auto plan = CreatePlan(op&: op.Cast<LogicalComparisonJoin>());
25 // this should create a join, not a cross product
26 D_ASSERT(plan && plan->type != PhysicalOperatorType::CROSS_PRODUCT);
27 // duplicate eliminated join
28 // first gather the scans on the duplicate eliminated data set from the RHS
29 vector<const_reference<PhysicalOperator>> delim_scans;
30 GatherDelimScans(op: *plan->children[1], delim_scans);
31 if (delim_scans.empty()) {
32 // no duplicate eliminated scans in the RHS!
33 // in this case we don't need to create a delim join
34 // just push the normal join
35 return plan;
36 }
37 vector<LogicalType> delim_types;
38 vector<unique_ptr<Expression>> distinct_groups, distinct_expressions;
39 for (auto &delim_expr : op.duplicate_eliminated_columns) {
40 D_ASSERT(delim_expr->type == ExpressionType::BOUND_REF);
41 auto &bound_ref = delim_expr->Cast<BoundReferenceExpression>();
42 delim_types.push_back(x: bound_ref.return_type);
43 distinct_groups.push_back(x: make_uniq<BoundReferenceExpression>(args&: bound_ref.return_type, args&: bound_ref.index));
44 }
45 // now create the duplicate eliminated join
46 auto delim_join = make_uniq<PhysicalDelimJoin>(args&: op.types, args: std::move(plan), args&: delim_scans, args&: op.estimated_cardinality);
47 // we still have to create the DISTINCT clause that is used to generate the duplicate eliminated chunk
48 delim_join->distinct = make_uniq<PhysicalHashAggregate>(args&: context, args&: delim_types, args: std::move(distinct_expressions),
49 args: std::move(distinct_groups), args&: op.estimated_cardinality);
50 return std::move(delim_join);
51}
52
53} // namespace duckdb
54