1#include "duckdb/parser/expression/columnref_expression.hpp"
2#include "duckdb/parser/expression/constant_expression.hpp"
3#include "duckdb/parser/expression_map.hpp"
4#include "duckdb/parser/query_node/select_node.hpp"
5#include "duckdb/parser/query_node/set_operation_node.hpp"
6#include "duckdb/planner/binder.hpp"
7#include "duckdb/planner/expression/bound_columnref_expression.hpp"
8#include "duckdb/planner/expression/bound_constant_expression.hpp"
9#include "duckdb/planner/expression_binder/order_binder.hpp"
10#include "duckdb/planner/query_node/bound_select_node.hpp"
11#include "duckdb/planner/query_node/bound_set_operation_node.hpp"
12
13namespace duckdb {
14
15static void GatherAliases(BoundQueryNode &node, case_insensitive_map_t<idx_t> &aliases,
16 parsed_expression_map_t<idx_t> &expressions, const vector<idx_t> &reorder_idx) {
17 if (node.type == QueryNodeType::SET_OPERATION_NODE) {
18 // setop, recurse
19 auto &setop = node.Cast<BoundSetOperationNode>();
20
21 // create new reorder index
22 if (setop.setop_type == SetOperationType::UNION_BY_NAME) {
23 vector<idx_t> new_left_reorder_idx(setop.left_reorder_idx.size());
24 vector<idx_t> new_right_reorder_idx(setop.right_reorder_idx.size());
25 for (idx_t i = 0; i < setop.left_reorder_idx.size(); ++i) {
26 new_left_reorder_idx[i] = reorder_idx[setop.left_reorder_idx[i]];
27 }
28
29 for (idx_t i = 0; i < setop.right_reorder_idx.size(); ++i) {
30 new_right_reorder_idx[i] = reorder_idx[setop.right_reorder_idx[i]];
31 }
32
33 // use new reorder index
34 GatherAliases(node&: *setop.left, aliases, expressions, reorder_idx: new_left_reorder_idx);
35 GatherAliases(node&: *setop.right, aliases, expressions, reorder_idx: new_right_reorder_idx);
36 return;
37 }
38
39 GatherAliases(node&: *setop.left, aliases, expressions, reorder_idx);
40 GatherAliases(node&: *setop.right, aliases, expressions, reorder_idx);
41 } else {
42 // query node
43 D_ASSERT(node.type == QueryNodeType::SELECT_NODE);
44 auto &select = node.Cast<BoundSelectNode>();
45 // fill the alias lists
46 for (idx_t i = 0; i < select.names.size(); i++) {
47 auto &name = select.names[i];
48 auto &expr = select.original_expressions[i];
49 // first check if the alias is already in there
50 auto entry = aliases.find(x: name);
51
52 idx_t index = reorder_idx[i];
53
54 if (entry != aliases.end()) {
55 // the alias already exists
56 // check if there is a conflict
57
58 if (entry->second != index) {
59 // there is a conflict
60 // we place "-1" in the aliases map at this location
61 // "-1" signifies that there is an ambiguous reference
62 aliases[name] = DConstants::INVALID_INDEX;
63 }
64 } else {
65 // the alias is not in there yet, just assign it
66 aliases[name] = index;
67 }
68 // now check if the node is already in the set of expressions
69 auto expr_entry = expressions.find(x: *expr);
70 if (expr_entry != expressions.end()) {
71 // the node is in there
72 // repeat the same as with the alias: if there is an ambiguity we insert "-1"
73 if (expr_entry->second != index) {
74 expressions[*expr] = DConstants::INVALID_INDEX;
75 }
76 } else {
77 // not in there yet, just place it in there
78 expressions[*expr] = index;
79 }
80 }
81 }
82}
83
84static void BuildUnionByNameInfo(BoundSetOperationNode &result, bool can_contain_nulls) {
85 D_ASSERT(result.setop_type == SetOperationType::UNION_BY_NAME);
86 case_insensitive_map_t<idx_t> left_names_map;
87 case_insensitive_map_t<idx_t> right_names_map;
88
89 BoundQueryNode *left_node = result.left.get();
90 BoundQueryNode *right_node = result.right.get();
91
92 // Build a name_map to use to check if a name exists
93 // We throw a binder exception if two same name in the SELECT list
94 for (idx_t i = 0; i < left_node->names.size(); ++i) {
95 if (left_names_map.find(x: left_node->names[i]) != left_names_map.end()) {
96 throw BinderException("UNION(ALL) BY NAME operation doesn't support same name in SELECT list");
97 }
98 left_names_map[left_node->names[i]] = i;
99 }
100
101 for (idx_t i = 0; i < right_node->names.size(); ++i) {
102 if (right_names_map.find(x: right_node->names[i]) != right_names_map.end()) {
103 throw BinderException("UNION(ALL) BY NAME operation doesn't support same name in SELECT list");
104 }
105 if (left_names_map.find(x: right_node->names[i]) == left_names_map.end()) {
106 result.names.push_back(x: right_node->names[i]);
107 }
108 right_names_map[right_node->names[i]] = i;
109 }
110
111 idx_t new_size = result.names.size();
112 bool need_reorder = false;
113 vector<idx_t> left_reorder_idx(left_node->names.size());
114 vector<idx_t> right_reorder_idx(right_node->names.size());
115
116 // Construct return type and reorder_idxs
117 // reorder_idxs is used to gather correct alias_map
118 // and expression_map in GatherAlias(...)
119 for (idx_t i = 0; i < new_size; ++i) {
120 auto left_index = left_names_map.find(x: result.names[i]);
121 auto right_index = right_names_map.find(x: result.names[i]);
122 bool left_exist = left_index != left_names_map.end();
123 bool right_exist = right_index != right_names_map.end();
124 LogicalType result_type;
125 if (left_exist && right_exist) {
126 result_type = LogicalType::MaxLogicalType(left: left_node->types[left_index->second],
127 right: right_node->types[right_index->second]);
128 if (left_index->second != i || right_index->second != i) {
129 need_reorder = true;
130 }
131 left_reorder_idx[left_index->second] = i;
132 right_reorder_idx[right_index->second] = i;
133 } else if (left_exist) {
134 result_type = left_node->types[left_index->second];
135 need_reorder = true;
136 left_reorder_idx[left_index->second] = i;
137 } else {
138 D_ASSERT(right_exist);
139 result_type = right_node->types[right_index->second];
140 need_reorder = true;
141 right_reorder_idx[right_index->second] = i;
142 }
143
144 if (!can_contain_nulls) {
145 if (ExpressionBinder::ContainsNullType(type: result_type)) {
146 result_type = ExpressionBinder::ExchangeNullType(type: result_type);
147 }
148 }
149
150 result.types.push_back(x: result_type);
151 }
152
153 result.left_reorder_idx = std::move(left_reorder_idx);
154 result.right_reorder_idx = std::move(right_reorder_idx);
155
156 // If reorder is required, collect reorder expressions for push projection
157 // into the two child nodes of union node
158 if (need_reorder) {
159 for (idx_t i = 0; i < new_size; ++i) {
160 auto left_index = left_names_map.find(x: result.names[i]);
161 auto right_index = right_names_map.find(x: result.names[i]);
162 bool left_exist = left_index != left_names_map.end();
163 bool right_exist = right_index != right_names_map.end();
164 unique_ptr<Expression> left_reorder_expr;
165 unique_ptr<Expression> right_reorder_expr;
166 if (left_exist && right_exist) {
167 left_reorder_expr = make_uniq<BoundColumnRefExpression>(
168 args&: left_node->types[left_index->second], args: ColumnBinding(left_node->GetRootIndex(), left_index->second));
169 right_reorder_expr =
170 make_uniq<BoundColumnRefExpression>(args&: right_node->types[right_index->second],
171 args: ColumnBinding(right_node->GetRootIndex(), right_index->second));
172 } else if (left_exist) {
173 left_reorder_expr = make_uniq<BoundColumnRefExpression>(
174 args&: left_node->types[left_index->second], args: ColumnBinding(left_node->GetRootIndex(), left_index->second));
175 // create null value here
176 right_reorder_expr = make_uniq<BoundConstantExpression>(args: Value(result.types[i]));
177 } else {
178 D_ASSERT(right_exist);
179 left_reorder_expr = make_uniq<BoundConstantExpression>(args: Value(result.types[i]));
180 right_reorder_expr =
181 make_uniq<BoundColumnRefExpression>(args&: right_node->types[right_index->second],
182 args: ColumnBinding(right_node->GetRootIndex(), right_index->second));
183 }
184 result.left_reorder_exprs.push_back(x: std::move(left_reorder_expr));
185 result.right_reorder_exprs.push_back(x: std::move(right_reorder_expr));
186 }
187 }
188}
189
190unique_ptr<BoundQueryNode> Binder::BindNode(SetOperationNode &statement) {
191 auto result = make_uniq<BoundSetOperationNode>();
192 result->setop_type = statement.setop_type;
193
194 // first recursively visit the set operations
195 // both the left and right sides have an independent BindContext and Binder
196 D_ASSERT(statement.left);
197 D_ASSERT(statement.right);
198
199 result->setop_index = GenerateTableIndex();
200
201 result->left_binder = Binder::CreateBinder(context, parent: this);
202 result->left_binder->can_contain_nulls = true;
203 result->left = result->left_binder->BindNode(node&: *statement.left);
204 result->right_binder = Binder::CreateBinder(context, parent: this);
205 result->right_binder->can_contain_nulls = true;
206 result->right = result->right_binder->BindNode(node&: *statement.right);
207
208 result->names = result->left->names;
209
210 // move the correlated expressions from the child binders to this binder
211 MoveCorrelatedExpressions(other&: *result->left_binder);
212 MoveCorrelatedExpressions(other&: *result->right_binder);
213
214 // now both sides have been bound we can resolve types
215 if (result->setop_type != SetOperationType::UNION_BY_NAME &&
216 result->left->types.size() != result->right->types.size()) {
217 throw BinderException("Set operations can only apply to expressions with the "
218 "same number of result columns");
219 }
220
221 if (result->setop_type == SetOperationType::UNION_BY_NAME) {
222 BuildUnionByNameInfo(result&: *result, can_contain_nulls);
223
224 } else {
225 // figure out the types of the setop result by picking the max of both
226 for (idx_t i = 0; i < result->left->types.size(); i++) {
227 auto result_type = LogicalType::MaxLogicalType(left: result->left->types[i], right: result->right->types[i]);
228 if (!can_contain_nulls) {
229 if (ExpressionBinder::ContainsNullType(type: result_type)) {
230 result_type = ExpressionBinder::ExchangeNullType(type: result_type);
231 }
232 }
233 result->types.push_back(x: result_type);
234 }
235 }
236
237 if (!statement.modifiers.empty()) {
238 // handle the ORDER BY/DISTINCT clauses
239
240 // we recursively visit the children of this node to extract aliases and expressions that can be referenced
241 // in the ORDER BY
242 case_insensitive_map_t<idx_t> alias_map;
243 parsed_expression_map_t<idx_t> expression_map;
244
245 if (result->setop_type == SetOperationType::UNION_BY_NAME) {
246 GatherAliases(node&: *result->left, aliases&: alias_map, expressions&: expression_map, reorder_idx: result->left_reorder_idx);
247 GatherAliases(node&: *result->right, aliases&: alias_map, expressions&: expression_map, reorder_idx: result->right_reorder_idx);
248 } else {
249 vector<idx_t> reorder_idx;
250 for (idx_t i = 0; i < result->names.size(); i++) {
251 reorder_idx.push_back(x: i);
252 }
253 GatherAliases(node&: *result, aliases&: alias_map, expressions&: expression_map, reorder_idx);
254 }
255 // now we perform the actual resolution of the ORDER BY/DISTINCT expressions
256 OrderBinder order_binder({result->left_binder.get(), result->right_binder.get()}, result->setop_index,
257 alias_map, expression_map, result->names.size());
258 BindModifiers(order_binder, statement, result&: *result);
259 }
260
261 // finally bind the types of the ORDER/DISTINCT clause expressions
262 BindModifierTypes(result&: *result, sql_types: result->types, projection_index: result->setop_index);
263 return std::move(result);
264}
265
266} // namespace duckdb
267