1#include "duckdb/catalog/catalog.hpp"
2#include "duckdb/parser/expression/constant_expression.hpp"
3#include "duckdb/parser/statement/insert_statement.hpp"
4#include "duckdb/parser/query_node/select_node.hpp"
5#include "duckdb/parser/tableref/expressionlistref.hpp"
6#include "duckdb/planner/binder.hpp"
7#include "duckdb/planner/expression_binder/insert_binder.hpp"
8#include "duckdb/planner/operator/logical_insert.hpp"
9#include "duckdb/planner/operator/logical_get.hpp"
10#include "duckdb/common/string_util.hpp"
11#include "duckdb/function/table/table_scan.hpp"
12#include "duckdb/planner/operator/logical_dummy_scan.hpp"
13#include "duckdb/planner/operator/logical_projection.hpp"
14#include "duckdb/planner/expression_iterator.hpp"
15#include "duckdb/planner/expression_binder/returning_binder.hpp"
16#include "duckdb/planner/expression_binder/where_binder.hpp"
17#include "duckdb/planner/expression_binder/update_binder.hpp"
18#include "duckdb/planner/operator/logical_filter.hpp"
19#include "duckdb/parser/statement/update_statement.hpp"
20#include "duckdb/planner/expression/bound_default_expression.hpp"
21#include "duckdb/storage/data_table.hpp"
22#include "duckdb/catalog/catalog_entry/index_catalog_entry.hpp"
23#include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp"
24#include "duckdb/planner/bound_tableref.hpp"
25#include "duckdb/planner/tableref/bound_basetableref.hpp"
26#include "duckdb/planner/tableref/bound_dummytableref.hpp"
27#include "duckdb/parser/parsed_expression_iterator.hpp"
28#include "duckdb/storage/table_storage_info.hpp"
29
30namespace duckdb {
31
32static void CheckInsertColumnCountMismatch(int64_t expected_columns, int64_t result_columns, bool columns_provided,
33 const char *tname) {
34 if (result_columns != expected_columns) {
35 string msg = StringUtil::Format(fmt_str: !columns_provided ? "table %s has %lld columns but %lld values were supplied"
36 : "Column name/value mismatch for insert on %s: "
37 "expected %lld columns but %lld values were supplied",
38 params: tname, params: expected_columns, params: result_columns);
39 throw BinderException(msg);
40 }
41}
42
43unique_ptr<ParsedExpression> ExpandDefaultExpression(const ColumnDefinition &column) {
44 if (column.DefaultValue()) {
45 return column.DefaultValue()->Copy();
46 } else {
47 return make_uniq<ConstantExpression>(args: Value(column.Type()));
48 }
49}
50
51void ReplaceDefaultExpression(unique_ptr<ParsedExpression> &expr, const ColumnDefinition &column) {
52 D_ASSERT(expr->type == ExpressionType::VALUE_DEFAULT);
53 expr = ExpandDefaultExpression(column);
54}
55
56void QualifyColumnReferences(unique_ptr<ParsedExpression> &expr, const string &table_name) {
57 // To avoid ambiguity with 'excluded', we explicitly qualify all column references
58 if (expr->type == ExpressionType::COLUMN_REF) {
59 auto &column_ref = expr->Cast<ColumnRefExpression>();
60 if (column_ref.IsQualified()) {
61 return;
62 }
63 auto column_name = column_ref.GetColumnName();
64 expr = make_uniq<ColumnRefExpression>(args&: column_name, args: table_name);
65 }
66 ParsedExpressionIterator::EnumerateChildren(
67 expr&: *expr, callback: [&](unique_ptr<ParsedExpression> &child) { QualifyColumnReferences(expr&: child, table_name); });
68}
69
70// Replace binding.table_index with 'dest' if it's 'source'
71void ReplaceColumnBindings(Expression &expr, idx_t source, idx_t dest) {
72 if (expr.type == ExpressionType::BOUND_COLUMN_REF) {
73 auto &bound_columnref = expr.Cast<BoundColumnRefExpression>();
74 if (bound_columnref.binding.table_index == source) {
75 bound_columnref.binding.table_index = dest;
76 }
77 }
78 ExpressionIterator::EnumerateChildren(
79 expression&: expr, callback: [&](unique_ptr<Expression> &child) { ReplaceColumnBindings(expr&: *child, source, dest); });
80}
81
82void Binder::BindDoUpdateSetExpressions(const string &table_alias, LogicalInsert &insert, UpdateSetInfo &set_info,
83 TableCatalogEntry &table, TableStorageInfo &storage_info) {
84 D_ASSERT(insert.children.size() == 1);
85 D_ASSERT(insert.children[0]->type == LogicalOperatorType::LOGICAL_PROJECTION);
86
87 vector<column_t> logical_column_ids;
88 vector<string> column_names;
89 D_ASSERT(set_info.columns.size() == set_info.expressions.size());
90
91 for (idx_t i = 0; i < set_info.columns.size(); i++) {
92 auto &colname = set_info.columns[i];
93 auto &expr = set_info.expressions[i];
94 if (!table.ColumnExists(name: colname)) {
95 throw BinderException("Referenced update column %s not found in table!", colname);
96 }
97 auto &column = table.GetColumn(name: colname);
98 if (column.Generated()) {
99 throw BinderException("Cant update column \"%s\" because it is a generated column!", column.Name());
100 }
101 if (std::find(first: insert.set_columns.begin(), last: insert.set_columns.end(), val: column.Physical()) !=
102 insert.set_columns.end()) {
103 throw BinderException("Multiple assignments to same column \"%s\"", colname);
104 }
105 insert.set_columns.push_back(x: column.Physical());
106 logical_column_ids.push_back(x: column.Oid());
107 insert.set_types.push_back(x: column.Type());
108 column_names.push_back(x: colname);
109 if (expr->type == ExpressionType::VALUE_DEFAULT) {
110 expr = ExpandDefaultExpression(column);
111 }
112 UpdateBinder binder(*this, context);
113 binder.target_type = column.Type();
114
115 // Avoid ambiguity issues
116 QualifyColumnReferences(expr, table_name: table_alias);
117
118 auto bound_expr = binder.Bind(expr);
119 D_ASSERT(bound_expr);
120 if (bound_expr->expression_class == ExpressionClass::BOUND_SUBQUERY) {
121 throw BinderException("Expression in the DO UPDATE SET clause can not be a subquery");
122 }
123
124 insert.expressions.push_back(x: std::move(bound_expr));
125 }
126
127 // Figure out which columns are indexed on
128 unordered_set<column_t> indexed_columns;
129 for (auto &index : storage_info.index_info) {
130 for (auto &column_id : index.column_set) {
131 indexed_columns.insert(x: column_id);
132 }
133 }
134
135 // Verify that none of the columns that are targeted with a SET expression are indexed on
136 for (idx_t i = 0; i < logical_column_ids.size(); i++) {
137 auto &column = logical_column_ids[i];
138 if (indexed_columns.count(x: column)) {
139 throw BinderException("Can not assign to column '%s' because it has a UNIQUE/PRIMARY KEY constraint",
140 column_names[i]);
141 }
142 }
143}
144
145unique_ptr<UpdateSetInfo> CreateSetInfoForReplace(TableCatalogEntry &table, InsertStatement &insert,
146 TableStorageInfo &storage_info) {
147 auto set_info = make_uniq<UpdateSetInfo>();
148
149 auto &columns = set_info->columns;
150 // Figure out which columns are indexed on
151
152 unordered_set<column_t> indexed_columns;
153 for (auto &index : storage_info.index_info) {
154 for (auto &column_id : index.column_set) {
155 indexed_columns.insert(x: column_id);
156 }
157 }
158
159 auto &column_list = table.GetColumns();
160 if (insert.columns.empty()) {
161 for (auto &column : column_list.Physical()) {
162 auto &name = column.Name();
163 // FIXME: can these column names be aliased somehow?
164 if (indexed_columns.count(x: column.Oid())) {
165 continue;
166 }
167 columns.push_back(x: name);
168 }
169 } else {
170 // a list of columns was explicitly supplied, only update those
171 for (auto &name : insert.columns) {
172 auto &column = column_list.GetColumn(name);
173 if (indexed_columns.count(x: column.Oid())) {
174 continue;
175 }
176 columns.push_back(x: name);
177 }
178 }
179
180 // Create 'excluded' qualified column references of these columns
181 for (auto &column : columns) {
182 set_info->expressions.push_back(x: make_uniq<ColumnRefExpression>(args&: column, args: "excluded"));
183 }
184
185 return set_info;
186}
187
188void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &table, InsertStatement &stmt) {
189 if (!stmt.on_conflict_info) {
190 insert.action_type = OnConflictAction::THROW;
191 return;
192 }
193 D_ASSERT(stmt.table_ref->type == TableReferenceType::BASE_TABLE);
194
195 // visit the table reference
196 auto bound_table = Bind(ref&: *stmt.table_ref);
197 if (bound_table->type != TableReferenceType::BASE_TABLE) {
198 throw BinderException("Can only update base table!");
199 }
200
201 auto &table_ref = stmt.table_ref->Cast<BaseTableRef>();
202 const string &table_alias = !table_ref.alias.empty() ? table_ref.alias : table_ref.table_name;
203
204 auto &on_conflict = *stmt.on_conflict_info;
205 D_ASSERT(on_conflict.action_type != OnConflictAction::THROW);
206 insert.action_type = on_conflict.action_type;
207
208 // obtain the table storage info
209 auto storage_info = table.GetStorageInfo(context);
210
211 auto &columns = table.GetColumns();
212 if (!on_conflict.indexed_columns.empty()) {
213 // Bind the ON CONFLICT (<columns>)
214
215 // create a mapping of (list index) -> (column index)
216 case_insensitive_map_t<idx_t> specified_columns;
217 for (idx_t i = 0; i < on_conflict.indexed_columns.size(); i++) {
218 specified_columns[on_conflict.indexed_columns[i]] = i;
219 auto column_index = table.GetColumnIndex(name&: on_conflict.indexed_columns[i]);
220 if (column_index.index == COLUMN_IDENTIFIER_ROW_ID) {
221 throw BinderException("Cannot specify ROWID as ON CONFLICT target");
222 }
223 auto &col = columns.GetColumn(index: column_index);
224 if (col.Generated()) {
225 throw BinderException("Cannot specify a generated column as ON CONFLICT target");
226 }
227 }
228 for (auto &col : columns.Physical()) {
229 auto entry = specified_columns.find(x: col.Name());
230 if (entry != specified_columns.end()) {
231 // column was specified, set to the index
232 insert.on_conflict_filter.insert(x: col.Oid());
233 }
234 }
235 bool index_references_columns = false;
236 for (auto &index : storage_info.index_info) {
237 if (!index.is_unique) {
238 continue;
239 }
240 bool index_matches = insert.on_conflict_filter == index.column_set;
241 if (index_matches) {
242 index_references_columns = true;
243 break;
244 }
245 }
246 if (!index_references_columns) {
247 // Same as before, this is essentially a no-op, turning this into a DO THROW instead
248 // But since this makes no logical sense, it's probably better to throw an error
249 throw BinderException(
250 "The specified columns as conflict target are not referenced by a UNIQUE/PRIMARY KEY CONSTRAINT");
251 }
252 } else {
253 // When omitting the conflict target, the ON CONFLICT applies to every UNIQUE/PRIMARY KEY on the table
254
255 // We check if there are any constraints on the table, if there aren't we throw an error.
256 idx_t found_matching_indexes = 0;
257 for (auto &index : storage_info.index_info) {
258 if (!index.is_unique) {
259 continue;
260 }
261 // does this work with multi-column indexes?
262 auto &indexed_columns = index.column_set;
263 for (auto &column : table.GetColumns().Physical()) {
264 if (indexed_columns.count(x: column.Physical().index)) {
265 found_matching_indexes++;
266 }
267 }
268 }
269 if (!found_matching_indexes) {
270 throw BinderException(
271 "There are no UNIQUE/PRIMARY KEY Indexes that refer to this table, ON CONFLICT is a no-op");
272 }
273 if (insert.action_type != OnConflictAction::NOTHING && found_matching_indexes != 1) {
274 // When no conflict target is provided, and the action type is UPDATE,
275 // we only allow the operation when only a single Index exists
276 throw BinderException("Conflict target has to be provided for a DO UPDATE operation when the table has "
277 "multiple UNIQUE/PRIMARY KEY constraints");
278 }
279 }
280
281 // add the 'excluded' dummy table binding
282 AddTableName(table_name: "excluded");
283 // add a bind context entry for it
284 auto excluded_index = GenerateTableIndex();
285 insert.excluded_table_index = excluded_index;
286 auto table_column_names = columns.GetColumnNames();
287 auto table_column_types = columns.GetColumnTypes();
288 bind_context.AddGenericBinding(index: excluded_index, alias: "excluded", names: table_column_names, types: table_column_types);
289
290 if (on_conflict.condition) {
291 // Avoid ambiguity between <table_name> binding and 'excluded'
292 QualifyColumnReferences(expr&: on_conflict.condition, table_name: table_alias);
293 // Bind the ON CONFLICT ... WHERE clause
294 WhereBinder where_binder(*this, context);
295 auto condition = where_binder.Bind(expr&: on_conflict.condition);
296 if (condition && condition->expression_class == ExpressionClass::BOUND_SUBQUERY) {
297 throw BinderException("conflict_target WHERE clause can not be a subquery");
298 }
299 insert.on_conflict_condition = std::move(condition);
300 }
301
302 auto bindings = insert.children[0]->GetColumnBindings();
303 idx_t projection_index = DConstants::INVALID_INDEX;
304 vector<unique_ptr<LogicalOperator>> *insert_child_operators;
305 insert_child_operators = &insert.children;
306 while (projection_index == DConstants::INVALID_INDEX) {
307 if (insert_child_operators->empty()) {
308 // No further children to visit
309 break;
310 }
311 D_ASSERT(insert_child_operators->size() >= 1);
312 auto &current_child = (*insert_child_operators)[0];
313 auto table_indices = current_child->GetTableIndex();
314 if (table_indices.empty()) {
315 // This operator does not have a table index to refer to, we have to visit its children
316 insert_child_operators = &current_child->children;
317 continue;
318 }
319 projection_index = table_indices[0];
320 }
321 if (projection_index == DConstants::INVALID_INDEX) {
322 throw InternalException("Could not locate a table_index from the children of the insert");
323 }
324
325 string unused;
326 auto original_binding = bind_context.GetBinding(name: table_alias, out_error&: unused);
327 D_ASSERT(original_binding);
328
329 auto table_index = original_binding->index;
330
331 // Replace any column bindings to refer to the projection table_index, rather than the source table
332 if (insert.on_conflict_condition) {
333 ReplaceColumnBindings(expr&: *insert.on_conflict_condition, source: table_index, dest: projection_index);
334 }
335
336 if (insert.action_type == OnConflictAction::REPLACE) {
337 D_ASSERT(on_conflict.set_info == nullptr);
338 on_conflict.set_info = CreateSetInfoForReplace(table, insert&: stmt, storage_info);
339 insert.action_type = OnConflictAction::UPDATE;
340 }
341 if (on_conflict.set_info && on_conflict.set_info->columns.empty()) {
342 // if we are doing INSERT OR REPLACE on a table with no columns outside of the primary key column
343 // convert to INSERT OR IGNORE
344 insert.action_type = OnConflictAction::NOTHING;
345 }
346 if (insert.action_type == OnConflictAction::NOTHING) {
347 if (!insert.on_conflict_condition) {
348 return;
349 }
350 // Get the column_ids we need to fetch later on from the conflicting tuples
351 // of the original table, to execute the expressions
352 D_ASSERT(original_binding->binding_type == BindingType::TABLE);
353 auto &table_binding = original_binding->Cast<TableBinding>();
354 insert.columns_to_fetch = table_binding.GetBoundColumnIds();
355 return;
356 }
357
358 D_ASSERT(on_conflict.set_info);
359 auto &set_info = *on_conflict.set_info;
360 D_ASSERT(set_info.columns.size() == set_info.expressions.size());
361
362 if (set_info.condition) {
363 // Avoid ambiguity between <table_name> binding and 'excluded'
364 QualifyColumnReferences(expr&: set_info.condition, table_name: table_alias);
365 // Bind the SET ... WHERE clause
366 WhereBinder where_binder(*this, context);
367 auto condition = where_binder.Bind(expr&: set_info.condition);
368 if (condition && condition->expression_class == ExpressionClass::BOUND_SUBQUERY) {
369 throw BinderException("conflict_target WHERE clause can not be a subquery");
370 }
371 insert.do_update_condition = std::move(condition);
372 }
373
374 BindDoUpdateSetExpressions(table_alias, insert, set_info, table, storage_info);
375
376 // Get the column_ids we need to fetch later on from the conflicting tuples
377 // of the original table, to execute the expressions
378 D_ASSERT(original_binding->binding_type == BindingType::TABLE);
379 auto &table_binding = original_binding->Cast<TableBinding>();
380 insert.columns_to_fetch = table_binding.GetBoundColumnIds();
381
382 // Replace the column bindings to refer to the child operator
383 for (auto &expr : insert.expressions) {
384 // Change the non-excluded column references to refer to the projection index
385 ReplaceColumnBindings(expr&: *expr, source: table_index, dest: projection_index);
386 }
387 // Do the same for the (optional) DO UPDATE condition
388 if (insert.do_update_condition) {
389 ReplaceColumnBindings(expr&: *insert.do_update_condition, source: table_index, dest: projection_index);
390 }
391}
392
393BoundStatement Binder::Bind(InsertStatement &stmt) {
394 BoundStatement result;
395 result.names = {"Count"};
396 result.types = {LogicalType::BIGINT};
397
398 BindSchemaOrCatalog(catalog&: stmt.catalog, schema&: stmt.schema);
399 auto &table = Catalog::GetEntry<TableCatalogEntry>(context, catalog_name: stmt.catalog, schema_name: stmt.schema, name: stmt.table);
400 if (!table.temporary) {
401 // inserting into a non-temporary table: alters underlying database
402 properties.modified_databases.insert(x: table.catalog.GetName());
403 }
404
405 auto insert = make_uniq<LogicalInsert>(args&: table, args: GenerateTableIndex());
406 // Add CTEs as bindable
407 AddCTEMap(cte_map&: stmt.cte_map);
408
409 auto values_list = stmt.GetValuesList();
410
411 // bind the root select node (if any)
412 BoundStatement root_select;
413 if (stmt.column_order == InsertColumnOrder::INSERT_BY_NAME) {
414 if (values_list) {
415 throw BinderException("INSERT BY NAME can only be used when inserting from a SELECT statement");
416 }
417 if (!stmt.columns.empty()) {
418 throw BinderException("INSERT BY NAME cannot be combined with an explicit column list");
419 }
420 D_ASSERT(stmt.select_statement);
421 // INSERT BY NAME - generate the columns from the names of the SELECT statement
422 auto select_binder = Binder::CreateBinder(context, parent: this);
423 root_select = select_binder->Bind(stmt&: *stmt.select_statement);
424 MoveCorrelatedExpressions(other&: *select_binder);
425
426 stmt.columns = root_select.names;
427 }
428
429 vector<LogicalIndex> named_column_map;
430 if (!stmt.columns.empty() || stmt.default_values) {
431 // insertion statement specifies column list
432
433 // create a mapping of (list index) -> (column index)
434 case_insensitive_map_t<idx_t> column_name_map;
435 for (idx_t i = 0; i < stmt.columns.size(); i++) {
436 auto entry = column_name_map.insert(x: make_pair(x&: stmt.columns[i], y&: i));
437 if (!entry.second) {
438 throw BinderException("Duplicate column name \"%s\" in INSERT", stmt.columns[i]);
439 }
440 column_name_map[stmt.columns[i]] = i;
441 auto column_index = table.GetColumnIndex(name&: stmt.columns[i]);
442 if (column_index.index == COLUMN_IDENTIFIER_ROW_ID) {
443 throw BinderException("Cannot explicitly insert values into rowid column");
444 }
445 auto &col = table.GetColumn(idx: column_index);
446 if (col.Generated()) {
447 throw BinderException("Cannot insert into a generated column");
448 }
449 insert->expected_types.push_back(x: col.Type());
450 named_column_map.push_back(x: column_index);
451 }
452 for (auto &col : table.GetColumns().Physical()) {
453 auto entry = column_name_map.find(x: col.Name());
454 if (entry == column_name_map.end()) {
455 // column not specified, set index to DConstants::INVALID_INDEX
456 insert->column_index_map.push_back(element: DConstants::INVALID_INDEX);
457 } else {
458 // column was specified, set to the index
459 insert->column_index_map.push_back(element: entry->second);
460 }
461 }
462 } else {
463 // insert by position and no columns specified - insertion into all columns of the table
464 // intentionally don't populate 'column_index_map' as an indication of this
465 for (auto &col : table.GetColumns().Physical()) {
466 named_column_map.push_back(x: col.Logical());
467 insert->expected_types.push_back(x: col.Type());
468 }
469 }
470
471 // bind the default values
472 BindDefaultValues(columns: table.GetColumns(), bound_defaults&: insert->bound_defaults);
473 if (!stmt.select_statement && !stmt.default_values) {
474 result.plan = std::move(insert);
475 return result;
476 }
477 // Exclude the generated columns from this amount
478 idx_t expected_columns = stmt.columns.empty() ? table.GetColumns().PhysicalColumnCount() : stmt.columns.size();
479
480 // special case: check if we are inserting from a VALUES statement
481 if (values_list) {
482 auto &expr_list = values_list->Cast<ExpressionListRef>();
483 expr_list.expected_types.resize(new_size: expected_columns);
484 expr_list.expected_names.resize(new_size: expected_columns);
485
486 D_ASSERT(expr_list.values.size() > 0);
487 CheckInsertColumnCountMismatch(expected_columns, result_columns: expr_list.values[0].size(), columns_provided: !stmt.columns.empty(),
488 tname: table.name.c_str());
489
490 // VALUES list!
491 for (idx_t col_idx = 0; col_idx < expected_columns; col_idx++) {
492 D_ASSERT(named_column_map.size() >= col_idx);
493 auto &table_col_idx = named_column_map[col_idx];
494
495 // set the expected types as the types for the INSERT statement
496 auto &column = table.GetColumn(idx: table_col_idx);
497 expr_list.expected_types[col_idx] = column.Type();
498 expr_list.expected_names[col_idx] = column.Name();
499
500 // now replace any DEFAULT values with the corresponding default expression
501 for (idx_t list_idx = 0; list_idx < expr_list.values.size(); list_idx++) {
502 if (expr_list.values[list_idx][col_idx]->type == ExpressionType::VALUE_DEFAULT) {
503 // DEFAULT value! replace the entry
504 ReplaceDefaultExpression(expr&: expr_list.values[list_idx][col_idx], column);
505 }
506 }
507 }
508 }
509
510 // parse select statement and add to logical plan
511 unique_ptr<LogicalOperator> root;
512 if (stmt.select_statement) {
513 if (stmt.column_order == InsertColumnOrder::INSERT_BY_POSITION) {
514 auto select_binder = Binder::CreateBinder(context, parent: this);
515 root_select = select_binder->Bind(stmt&: *stmt.select_statement);
516 MoveCorrelatedExpressions(other&: *select_binder);
517 }
518 // inserting from a select - check if the column count matches
519 CheckInsertColumnCountMismatch(expected_columns, result_columns: root_select.types.size(), columns_provided: !stmt.columns.empty(),
520 tname: table.name.c_str());
521
522 root = CastLogicalOperatorToTypes(source_types&: root_select.types, target_types&: insert->expected_types, op: std::move(root_select.plan));
523 } else {
524 root = make_uniq<LogicalDummyScan>(args: GenerateTableIndex());
525 }
526 insert->AddChild(child: std::move(root));
527
528 BindOnConflictClause(insert&: *insert, table, stmt);
529
530 if (!stmt.returning_list.empty()) {
531 insert->return_chunk = true;
532 result.types.clear();
533 result.names.clear();
534 auto insert_table_index = GenerateTableIndex();
535 insert->table_index = insert_table_index;
536 unique_ptr<LogicalOperator> index_as_logicaloperator = std::move(insert);
537
538 return BindReturning(returning_list: std::move(stmt.returning_list), table, alias: stmt.table_ref ? stmt.table_ref->alias : string(),
539 update_table_index: insert_table_index, child_operator: std::move(index_as_logicaloperator), result: std::move(result));
540 }
541
542 D_ASSERT(result.types.size() == result.names.size());
543 result.plan = std::move(insert);
544 properties.allow_stream_result = false;
545 properties.return_type = StatementReturnType::CHANGED_ROWS;
546 return result;
547}
548
549} // namespace duckdb
550