nodeModifyTable.c source code [PostgreSQL/src/backend/executor/nodeModifyTable.c]

1	/-------------------------------------------------------------------------*
2	*
3	* nodeModifyTable.c
4	* routines to handle ModifyTable nodes.
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	* Portions Copyright (c) 1994, Regents of the University of California
8	*
9	*
10	* IDENTIFICATION
11	* src/backend/executor/nodeModifyTable.c
12	*
13	*-------------------------------------------------------------------------
14	*/
15	/ INTERFACE ROUTINES*
16	* ExecInitModifyTable - initialize the ModifyTable node
17	* ExecModifyTable - retrieve the next tuple from the node
18	* ExecEndModifyTable - shut down the ModifyTable node
19	* ExecReScanModifyTable - rescan the ModifyTable node
20	*
21	* NOTES
22	* Each ModifyTable node contains a list of one or more subplans,
23	* much like an Append node. There is one subplan per result relation.
24	* The key reason for this is that in an inherited UPDATE command, each
25	* result relation could have a different schema (more or different
26	* columns) requiring a different plan tree to produce it. In an
27	* inherited DELETE, all the subplans should produce the same output
28	* rowtype, but we might still find that different plans are appropriate
29	* for different child relations.
30	*
31	* If the query specifies RETURNING, then the ModifyTable returns a
32	* RETURNING tuple after completing each row insert, update, or delete.
33	* It must be called again to continue the operation. Without RETURNING,
34	* we just loop within the node until all the work is done, then
35	* return NULL. This avoids useless call/return overhead.
36	*/
37
38	#include "postgres.h"
39
40	#include "access/heapam.h"
41	#include "access/htup_details.h"
42	#include "access/tableam.h"
43	#include "access/xact.h"
44	#include "catalog/catalog.h"
45	#include "commands/trigger.h"
46	#include "executor/execPartition.h"
47	#include "executor/executor.h"
48	#include "executor/nodeModifyTable.h"
49	#include "foreign/fdwapi.h"
50	#include "miscadmin.h"
51	#include "nodes/nodeFuncs.h"
52	#include "rewrite/rewriteHandler.h"
53	#include "storage/bufmgr.h"
54	#include "storage/lmgr.h"
55	#include "utils/builtins.h"
56	#include "utils/datum.h"
57	#include "utils/memutils.h"
58	#include "utils/rel.h"
59
60
61	static bool ExecOnConflictUpdate(ModifyTableState *mtstate,
62	ResultRelInfo *resultRelInfo,
63	ItemPointer conflictTid,
64	TupleTableSlot *planSlot,
65	TupleTableSlot *excludedSlot,
66	EState *estate,
67	bool canSetTag,
68	TupleTableSlot **returning);
69	static TupleTableSlot ExecPrepareTupleRouting(ModifyTableState mtstate,
70	EState *estate,
71	PartitionTupleRouting *proute,
72	ResultRelInfo *targetRelInfo,
73	TupleTableSlot *slot);
74	static ResultRelInfo getTargetResultRelInfo(ModifyTableState node);
75	static void ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate);
76	static TupleConversionMap tupconv_map_for_subplan(ModifyTableState node,
77	int whichplan);
78
79	/*
80	* Verify that the tuples to be produced by INSERT or UPDATE match the
81	* target relation's rowtype
82	*
83	* We do this to guard against stale plans. If plan invalidation is
84	* functioning properly then we should never get a failure here, but better
85	* safe than sorry. Note that this is called after we have obtained lock
86	* on the target rel, so the rowtype can't change underneath us.
87	*
88	* The plan output is represented by its targetlist, because that makes
89	* handling the dropped-column case easier.
90	*/
91	static void
92	ExecCheckPlanOutput(Relation resultRel, List *targetList)
93	{
94	TupleDesc resultDesc = RelationGetDescr(resultRel);
95	int attno = `0`;
96	ListCell *lc;
97
98	foreach(lc, targetList)
99	{
100	TargetEntry tle = (TargetEntry ) lfirst(lc);
101	Form_pg_attribute attr;
102
103	if (tle->resjunk)
104	continue; / ignore junk tlist items /
105
106	if (attno >= resultDesc->natts)
107	ereport(ERROR,
108	(errcode(ERRCODE_DATATYPE_MISMATCH),
109	errmsg("table row type and query-specified row type do not match"),
110	errdetail("Query has too many columns.")));
111	attr = TupleDescAttr(resultDesc, attno);
112	attno++;
113
114	if (!attr->attisdropped)
115	{
116	/ Normal case: demand type match /
117	if (exprType((Node *) tle->expr) != attr->atttypid)
118	ereport(ERROR,
119	(errcode(ERRCODE_DATATYPE_MISMATCH),
120	errmsg("table row type and query-specified row type do not match"),
121	errdetail("Table has type %s at ordinal position %d, but query expects %s.",
122	format_type_be(attr->atttypid),
123	attno,
124	format_type_be(exprType((Node *) tle->expr)))));
125	}
126	else
127	{
128	/*
129	* For a dropped column, we can't check atttypid (it's likely 0).
130	* In any case the planner has most likely inserted an INT4 null.
131	* What we insist on is just some NULL constant.
132	*/
133	if (!IsA(tle->expr, Const) \|\|
134	!((Const *) tle->expr)->constisnull)
135	ereport(ERROR,
136	(errcode(ERRCODE_DATATYPE_MISMATCH),
137	errmsg("table row type and query-specified row type do not match"),
138	errdetail("Query provides a value for a dropped column at ordinal position %d.",
139	attno)));
140	}
141	}
142	if (attno != resultDesc->natts)
143	ereport(ERROR,
144	(errcode(ERRCODE_DATATYPE_MISMATCH),
145	errmsg("table row type and query-specified row type do not match"),
146	errdetail("Query has too few columns.")));
147	}
148
149	/*
150	* ExecProcessReturning --- evaluate a RETURNING list
151	*
152	* resultRelInfo: current result rel
153	* tupleSlot: slot holding tuple actually inserted/updated/deleted
154	* planSlot: slot holding tuple returned by top subplan node
155	*
156	* Note: If tupleSlot is NULL, the FDW should have already provided econtext's
157	* scan tuple.
158	*
159	* Returns a slot holding the result tuple
160	*/
161	static TupleTableSlot *
162	ExecProcessReturning(ResultRelInfo *resultRelInfo,
163	TupleTableSlot *tupleSlot,
164	TupleTableSlot *planSlot)
165	{
166	ProjectionInfo *projectReturning = resultRelInfo->ri_projectReturning;
167	ExprContext *econtext = projectReturning->pi_exprContext;
168
169	/ Make tuple and any needed join variables available to ExecProject /
170	if (tupleSlot)
171	econtext->ecxt_scantuple = tupleSlot;
172	econtext->ecxt_outertuple = planSlot;
173
174	/*
175	* RETURNING expressions might reference the tableoid column, so
176	* reinitialize tts_tableOid before evaluating them.
177	*/
178	econtext->ecxt_scantuple->tts_tableOid =
179	RelationGetRelid(resultRelInfo->ri_RelationDesc);
180
181	/ Compute the RETURNING expressions /
182	return ExecProject(projectReturning);
183	}
184
185	/*
186	* ExecCheckTupleVisible -- verify tuple is visible
187	*
188	* It would not be consistent with guarantees of the higher isolation levels to
189	* proceed with avoiding insertion (taking speculative insertion's alternative
190	* path) on the basis of another tuple that is not visible to MVCC snapshot.
191	* Check for the need to raise a serialization failure, and do so as necessary.
192	*/
193	static void
194	ExecCheckTupleVisible(EState *estate,
195	Relation rel,
196	TupleTableSlot *slot)
197	{
198	if (!IsolationUsesXactSnapshot())
199	return;
200
201	if (!table_tuple_satisfies_snapshot(rel, slot, estate->es_snapshot))
202	{
203	Datum xminDatum;
204	TransactionId xmin;
205	bool isnull;
206
207	xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
208	Assert(!isnull);
209	xmin = DatumGetTransactionId(xminDatum);
210
211	/*
212	* We should not raise a serialization failure if the conflict is
213	* against a tuple inserted by our own transaction, even if it's not
214	* visible to our snapshot. (This would happen, for example, if
215	* conflicting keys are proposed for insertion in a single command.)
216	*/
217	if (!TransactionIdIsCurrentTransactionId(xmin))
218	ereport(ERROR,
219	(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
220	errmsg("could not serialize access due to concurrent update")));
221	}
222	}
223
224	/*
225	* ExecCheckTIDVisible -- convenience variant of ExecCheckTupleVisible()
226	*/
227	static void
228	ExecCheckTIDVisible(EState *estate,
229	ResultRelInfo *relinfo,
230	ItemPointer tid,
231	TupleTableSlot *tempSlot)
232	{
233	Relation rel = relinfo->ri_RelationDesc;
234
235	/ Redundantly check isolation level /
236	if (!IsolationUsesXactSnapshot())
237	return;
238
239	if (!table_tuple_fetch_row_version(rel, tid, SnapshotAny, tempSlot))
240	elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
241	ExecCheckTupleVisible(estate, rel, tempSlot);
242	ExecClearTuple(tempSlot);
243	}
244
245	/*
246	* Compute stored generated columns for a tuple
247	*/
248	void
249	ExecComputeStoredGenerated(EState estate, TupleTableSlot slot)
250	{
251	ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
252	Relation rel = resultRelInfo->ri_RelationDesc;
253	TupleDesc tupdesc = RelationGetDescr(rel);
254	int natts = tupdesc->natts;
255	MemoryContext oldContext;
256	Datum *values;
257	bool *nulls;
258
259	Assert(tupdesc->constr && tupdesc->constr->has_generated_stored);
260
261	/*
262	* If first time through for this result relation, build expression
263	* nodetrees for rel's stored generation expressions. Keep them in the
264	* per-query memory context so they'll survive throughout the query.
265	*/
266	if (resultRelInfo->ri_GeneratedExprs == NULL)
267	{
268	oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
269
270	resultRelInfo->ri_GeneratedExprs =
271	(ExprState *) palloc(natts sizeof(ExprState *));
272
273	for (int i = `0`; i < natts; i++)
274	{
275	if (TupleDescAttr(tupdesc, i)->attgenerated == ATTRIBUTE_GENERATED_STORED)
276	{
277	Expr *expr;
278
279	expr = (Expr *) build_column_default(rel, i + `1`);
280	if (expr == NULL)
281	elog(ERROR, "no generation expression found for column number %d of table \"%s\"",
282	i + `1`, RelationGetRelationName(rel));
283
284	resultRelInfo->ri_GeneratedExprs[i] = ExecPrepareExpr(expr, estate);
285	}
286	}
287
288	MemoryContextSwitchTo(oldContext);
289	}
290
291	oldContext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
292
293	values = palloc(sizeof(values) natts);
294	nulls = palloc(sizeof(nulls) natts);
295
296	slot_getallattrs(slot);
297	memcpy(nulls, slot->tts_isnull, sizeof(nulls) natts);
298
299	for (int i = `0`; i < natts; i++)
300	{
301	Form_pg_attribute attr = TupleDescAttr(tupdesc, i);
302
303	if (attr->attgenerated == ATTRIBUTE_GENERATED_STORED)
304	{
305	ExprContext *econtext;
306	Datum val;
307	bool isnull;
308
309	econtext = GetPerTupleExprContext(estate);
310	econtext->ecxt_scantuple = slot;
311
312	val = ExecEvalExpr(resultRelInfo->ri_GeneratedExprs[i], econtext, &isnull);
313
314	values[i] = val;
315	nulls[i] = isnull;
316	}
317	else
318	{
319	if (!nulls[i])
320	values[i] = datumCopy(slot->tts_values[i], attr->attbyval, attr->attlen);
321	}
322	}
323
324	ExecClearTuple(slot);
325	memcpy(slot->tts_values, values, sizeof(values) natts);
326	memcpy(slot->tts_isnull, nulls, sizeof(nulls) natts);
327	ExecStoreVirtualTuple(slot);
328	ExecMaterializeSlot(slot);
329
330	MemoryContextSwitchTo(oldContext);
331	}
332
333	/ ----------------------------------------------------------------*
334	* ExecInsert
335	*
336	* For INSERT, we have to insert the tuple into the target relation
337	* and insert appropriate tuples into the index relations.
338	*
339	* Returns RETURNING result if any, otherwise NULL.
340	* ----------------------------------------------------------------
341	*/
342	static TupleTableSlot *
343	ExecInsert(ModifyTableState *mtstate,
344	TupleTableSlot *slot,
345	TupleTableSlot *planSlot,
346	EState *estate,
347	bool canSetTag)
348	{
349	ResultRelInfo *resultRelInfo;
350	Relation resultRelationDesc;
351	List *recheckIndexes = NIL;
352	TupleTableSlot *result = NULL;
353	TransitionCaptureState *ar_insert_trig_tcs;
354	ModifyTable node = (ModifyTable ) mtstate->ps.plan;
355	OnConflictAction onconflict = node->onConflictAction;
356
357	ExecMaterializeSlot(slot);
358
359	/*
360	* get information on the (current) result relation
361	*/
362	resultRelInfo = estate->es_result_relation_info;
363	resultRelationDesc = resultRelInfo->ri_RelationDesc;
364
365	/*
366	* BEFORE ROW INSERT Triggers.
367	*
368	* Note: We fire BEFORE ROW TRIGGERS for every attempted insertion in an
369	* INSERT ... ON CONFLICT statement. We cannot check for constraint
370	* violations before firing these triggers, because they can change the
371	* values to insert. Also, they can run arbitrary user-defined code with
372	* side-effects that we can't cancel by just not inserting the tuple.
373	*/
374	if (resultRelInfo->ri_TrigDesc &&
375	resultRelInfo->ri_TrigDesc->trig_insert_before_row)
376	{
377	if (!ExecBRInsertTriggers(estate, resultRelInfo, slot))
378	return NULL; / "do nothing" /
379	}
380
381	/ INSTEAD OF ROW INSERT Triggers /
382	if (resultRelInfo->ri_TrigDesc &&
383	resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
384	{
385	if (!ExecIRInsertTriggers(estate, resultRelInfo, slot))
386	return NULL; / "do nothing" /
387	}
388	else if (resultRelInfo->ri_FdwRoutine)
389	{
390	/*
391	* Compute stored generated columns
392	*/
393	if (resultRelationDesc->rd_att->constr &&
394	resultRelationDesc->rd_att->constr->has_generated_stored)
395	ExecComputeStoredGenerated(estate, slot);
396
397	/*
398	* insert into foreign table: let the FDW do it
399	*/
400	slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
401	resultRelInfo,
402	slot,
403	planSlot);
404
405	if (slot == NULL) / "do nothing" /
406	return NULL;
407
408	/*
409	* AFTER ROW Triggers or RETURNING expressions might reference the
410	* tableoid column, so (re-)initialize tts_tableOid before evaluating
411	* them.
412	*/
413	slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
414
415	}
416	else
417	{
418	WCOKind wco_kind;
419
420	/*
421	* Constraints might reference the tableoid column, so (re-)initialize
422	* tts_tableOid before evaluating them.
423	*/
424	slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
425
426	/*
427	* Compute stored generated columns
428	*/
429	if (resultRelationDesc->rd_att->constr &&
430	resultRelationDesc->rd_att->constr->has_generated_stored)
431	ExecComputeStoredGenerated(estate, slot);
432
433	/*
434	* Check any RLS WITH CHECK policies.
435	*
436	* Normally we should check INSERT policies. But if the insert is the
437	* result of a partition key update that moved the tuple to a new
438	* partition, we should instead check UPDATE policies, because we are
439	* executing policies defined on the target table, and not those
440	* defined on the child partitions.
441	*/
442	wco_kind = (mtstate->operation == CMD_UPDATE) ?
443	WCO_RLS_UPDATE_CHECK : WCO_RLS_INSERT_CHECK;
444
445	/*
446	* ExecWithCheckOptions() will skip any WCOs which are not of the kind
447	* we are looking for at this point.
448	*/
449	if (resultRelInfo->ri_WithCheckOptions != NIL)
450	ExecWithCheckOptions(wco_kind, resultRelInfo, slot, estate);
451
452	/*
453	* Check the constraints of the tuple.
454	*/
455	if (resultRelationDesc->rd_att->constr)
456	ExecConstraints(resultRelInfo, slot, estate);
457
458	/*
459	* Also check the tuple against the partition constraint, if there is
460	* one; except that if we got here via tuple-routing, we don't need to
461	* if there's no BR trigger defined on the partition.
462	*/
463	if (resultRelInfo->ri_PartitionCheck &&
464	(resultRelInfo->ri_PartitionRoot == NULL \|\|
465	(resultRelInfo->ri_TrigDesc &&
466	resultRelInfo->ri_TrigDesc->trig_insert_before_row)))
467	ExecPartitionCheck(resultRelInfo, slot, estate, true);
468
469	if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > `0`)
470	{
471	/ Perform a speculative insertion. /
472	uint32 specToken;
473	ItemPointerData conflictTid;
474	bool specConflict;
475	List *arbiterIndexes;
476
477	arbiterIndexes = resultRelInfo->ri_onConflictArbiterIndexes;
478
479	/*
480	* Do a non-conclusive check for conflicts first.
481	*
482	* We're not holding any locks yet, so this doesn't guarantee that
483	* the later insert won't conflict. But it avoids leaving behind
484	* a lot of canceled speculative insertions, if you run a lot of
485	* INSERT ON CONFLICT statements that do conflict.
486	*
487	* We loop back here if we find a conflict below, either during
488	* the pre-check, or when we re-check after inserting the tuple
489	* speculatively.
490	*/
491	vlock:
492	specConflict = false;
493	if (!ExecCheckIndexConstraints(slot, estate, &conflictTid,
494	arbiterIndexes))
495	{
496	/ committed conflict tuple found /
497	if (onconflict == ONCONFLICT_UPDATE)
498	{
499	/*
500	* In case of ON CONFLICT DO UPDATE, execute the UPDATE
501	* part. Be prepared to retry if the UPDATE fails because
502	* of another concurrent UPDATE/DELETE to the conflict
503	* tuple.
504	*/
505	TupleTableSlot *returning = NULL;
506
507	if (ExecOnConflictUpdate(mtstate, resultRelInfo,
508	&conflictTid, planSlot, slot,
509	estate, canSetTag, &returning))
510	{
511	InstrCountTuples2(&mtstate->ps, `1`);
512	return returning;
513	}
514	else
515	goto vlock;
516	}
517	else
518	{
519	/*
520	* In case of ON CONFLICT DO NOTHING, do nothing. However,
521	* verify that the tuple is visible to the executor's MVCC
522	* snapshot at higher isolation levels.
523	*
524	* Using ExecGetReturningSlot() to store the tuple for the
525	* recheck isn't that pretty, but we can't trivially use
526	* the input slot, because it might not be of a compatible
527	* type. As there's no conflicting usage of
528	* ExecGetReturningSlot() in the DO NOTHING case...
529	*/
530	Assert(onconflict == ONCONFLICT_NOTHING);
531	ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid,
532	ExecGetReturningSlot(estate, resultRelInfo));
533	InstrCountTuples2(&mtstate->ps, `1`);
534	return NULL;
535	}
536	}
537
538	/*
539	* Before we start insertion proper, acquire our "speculative
540	* insertion lock". Others can use that to wait for us to decide
541	* if we're going to go ahead with the insertion, instead of
542	* waiting for the whole transaction to complete.
543	*/
544	specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
545
546	/ insert the tuple, with the speculative token /
547	table_tuple_insert_speculative(resultRelationDesc, slot,
548	estate->es_output_cid,
549	`0`,
550	NULL,
551	specToken);
552
553	/ insert index entries for tuple /
554	recheckIndexes = ExecInsertIndexTuples(slot, estate, true,
555	&specConflict,
556	arbiterIndexes);
557
558	/ adjust the tuple's state accordingly /
559	table_tuple_complete_speculative(resultRelationDesc, slot,
560	specToken, !specConflict);
561
562	/*
563	* Wake up anyone waiting for our decision. They will re-check
564	* the tuple, see that it's no longer speculative, and wait on our
565	* XID as if this was a regularly inserted tuple all along. Or if
566	* we killed the tuple, they will see it's dead, and proceed as if
567	* the tuple never existed.
568	*/
569	SpeculativeInsertionLockRelease(GetCurrentTransactionId());
570
571	/*
572	* If there was a conflict, start from the beginning. We'll do
573	* the pre-check again, which will now find the conflicting tuple
574	* (unless it aborts before we get there).
575	*/
576	if (specConflict)
577	{
578	list_free(recheckIndexes);
579	goto vlock;
580	}
581
582	/ Since there was no insertion conflict, we're done /
583	}
584	else
585	{
586	/ insert the tuple normally /
587	table_tuple_insert(resultRelationDesc, slot,
588	estate->es_output_cid,
589	`0`, NULL);
590
591	/ insert index entries for tuple /
592	if (resultRelInfo->ri_NumIndices > `0`)
593	recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL,
594	NIL);
595	}
596	}
597
598	if (canSetTag)
599	{
600	(estate->es_processed)++;
601	setLastTid(&slot->tts_tid);
602	}
603
604	/*
605	* If this insert is the result of a partition key update that moved the
606	* tuple to a new partition, put this row into the transition NEW TABLE,
607	* if there is one. We need to do this separately for DELETE and INSERT
608	* because they happen on different tables.
609	*/
610	ar_insert_trig_tcs = mtstate->mt_transition_capture;
611	if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
612	&& mtstate->mt_transition_capture->tcs_update_new_table)
613	{
614	ExecARUpdateTriggers(estate, resultRelInfo, NULL,
615	NULL,
616	slot,
617	NULL,
618	mtstate->mt_transition_capture);
619
620	/*
621	* We've already captured the NEW TABLE row, so make sure any AR
622	* INSERT trigger fired below doesn't capture it again.
623	*/
624	ar_insert_trig_tcs = NULL;
625	}
626
627	/ AFTER ROW INSERT Triggers /
628	ExecARInsertTriggers(estate, resultRelInfo, slot, recheckIndexes,
629	ar_insert_trig_tcs);
630
631	list_free(recheckIndexes);
632
633	/*
634	* Check any WITH CHECK OPTION constraints from parent views. We are
635	* required to do this after testing all constraints and uniqueness
636	* violations per the SQL spec, so we do it after actually inserting the
637	* record into the heap and all indexes.
638	*
639	* ExecWithCheckOptions will elog(ERROR) if a violation is found, so the
640	* tuple will never be seen, if it violates the WITH CHECK OPTION.
641	*
642	* ExecWithCheckOptions() will skip any WCOs which are not of the kind we
643	* are looking for at this point.
644	*/
645	if (resultRelInfo->ri_WithCheckOptions != NIL)
646	ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
647
648	/ Process RETURNING if present /
649	if (resultRelInfo->ri_projectReturning)
650	result = ExecProcessReturning(resultRelInfo, slot, planSlot);
651
652	return result;
653	}
654
655	/ ----------------------------------------------------------------*
656	* ExecDelete
657	*
658	* DELETE is like UPDATE, except that we delete the tuple and no
659	* index modifications are needed.
660	*
661	* When deleting from a table, tupleid identifies the tuple to
662	* delete and oldtuple is NULL. When deleting from a view,
663	* oldtuple is passed to the INSTEAD OF triggers and identifies
664	* what to delete, and tupleid is invalid. When deleting from a
665	* foreign table, tupleid is invalid; the FDW has to figure out
666	* which row to delete using data from the planSlot. oldtuple is
667	* passed to foreign table triggers; it is NULL when the foreign
668	* table has no relevant triggers. We use tupleDeleted to indicate
669	* whether the tuple is actually deleted, callers can use it to
670	* decide whether to continue the operation. When this DELETE is a
671	* part of an UPDATE of partition-key, then the slot returned by
672	* EvalPlanQual() is passed back using output parameter epqslot.
673	*
674	* Returns RETURNING result if any, otherwise NULL.
675	* ----------------------------------------------------------------
676	*/
677	static TupleTableSlot *
678	ExecDelete(ModifyTableState *mtstate,
679	ItemPointer tupleid,
680	HeapTuple oldtuple,
681	TupleTableSlot *planSlot,
682	EPQState *epqstate,
683	EState *estate,
684	bool processReturning,
685	bool canSetTag,
686	bool changingPart,
687	bool *tupleDeleted,
688	TupleTableSlot **epqreturnslot)
689	{
690	ResultRelInfo *resultRelInfo;
691	Relation resultRelationDesc;
692	TM_Result result;
693	TM_FailureData tmfd;
694	TupleTableSlot *slot = NULL;
695	TransitionCaptureState *ar_delete_trig_tcs;
696
697	if (tupleDeleted)
698	*tupleDeleted = false;
699
700	/*
701	* get information on the (current) result relation
702	*/
703	resultRelInfo = estate->es_result_relation_info;
704	resultRelationDesc = resultRelInfo->ri_RelationDesc;
705
706	/ BEFORE ROW DELETE Triggers /
707	if (resultRelInfo->ri_TrigDesc &&
708	resultRelInfo->ri_TrigDesc->trig_delete_before_row)
709	{
710	bool dodelete;
711
712	dodelete = ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
713	tupleid, oldtuple, epqreturnslot);
714
715	if (!dodelete) / "do nothing" /
716	return NULL;
717	}
718
719	/ INSTEAD OF ROW DELETE Triggers /
720	if (resultRelInfo->ri_TrigDesc &&
721	resultRelInfo->ri_TrigDesc->trig_delete_instead_row)
722	{
723	bool dodelete;
724
725	Assert(oldtuple != NULL);
726	dodelete = ExecIRDeleteTriggers(estate, resultRelInfo, oldtuple);
727
728	if (!dodelete) / "do nothing" /
729	return NULL;
730	}
731	else if (resultRelInfo->ri_FdwRoutine)
732	{
733	/*
734	* delete from foreign table: let the FDW do it
735	*
736	* We offer the returning slot as a place to store RETURNING data,
737	* although the FDW can return some other slot if it wants.
738	*/
739	slot = ExecGetReturningSlot(estate, resultRelInfo);
740	slot = resultRelInfo->ri_FdwRoutine->ExecForeignDelete(estate,
741	resultRelInfo,
742	slot,
743	planSlot);
744
745	if (slot == NULL) / "do nothing" /
746	return NULL;
747
748	/*
749	* RETURNING expressions might reference the tableoid column, so
750	* (re)initialize tts_tableOid before evaluating them.
751	*/
752	if (TTS_EMPTY(slot))
753	ExecStoreAllNullTuple(slot);
754
755	slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
756	}
757	else
758	{
759	/*
760	* delete the tuple
761	*
762	* Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
763	* that the row to be deleted is visible to that snapshot, and throw a
764	* can't-serialize error if not. This is a special-case behavior
765	* needed for referential integrity updates in transaction-snapshot
766	* mode transactions.
767	*/
768	ldelete:;
769	result = table_tuple_delete(resultRelationDesc, tupleid,
770	estate->es_output_cid,
771	estate->es_snapshot,
772	estate->es_crosscheck_snapshot,
773	true / wait for commit / ,
774	&tmfd,
775	changingPart);
776
777	switch (result)
778	{
779	case TM_SelfModified:
780
781	/*
782	* The target tuple was already updated or deleted by the
783	* current command, or by a later command in the current
784	* transaction. The former case is possible in a join DELETE
785	* where multiple tuples join to the same target tuple. This
786	* is somewhat questionable, but Postgres has always allowed
787	* it: we just ignore additional deletion attempts.
788	*
789	* The latter case arises if the tuple is modified by a
790	* command in a BEFORE trigger, or perhaps by a command in a
791	* volatile function used in the query. In such situations we
792	* should not ignore the deletion, but it is equally unsafe to
793	* proceed. We don't want to discard the original DELETE
794	* while keeping the triggered actions based on its deletion;
795	* and it would be no better to allow the original DELETE
796	* while discarding updates that it triggered. The row update
797	* carries some information that might be important according
798	* to business rules; so throwing an error is the only safe
799	* course.
800	*
801	* If a trigger actually intends this type of interaction, it
802	* can re-execute the DELETE and then return NULL to cancel
803	* the outer delete.
804	*/
805	if (tmfd.cmax != estate->es_output_cid)
806	ereport(ERROR,
807	(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
808	errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
809	errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
810
811	/ Else, already deleted by self; nothing to do /
812	return NULL;
813
814	case TM_Ok:
815	break;
816
817	case TM_Updated:
818	{
819	TupleTableSlot *inputslot;
820	TupleTableSlot *epqslot;
821
822	if (IsolationUsesXactSnapshot())
823	ereport(ERROR,
824	(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
825	errmsg("could not serialize access due to concurrent update")));
826
827	/*
828	* Already know that we're going to need to do EPQ, so
829	* fetch tuple directly into the right slot.
830	*/
831	EvalPlanQualBegin(epqstate);
832	inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc,
833	resultRelInfo->ri_RangeTableIndex);
834
835	result = table_tuple_lock(resultRelationDesc, tupleid,
836	estate->es_snapshot,
837	inputslot, estate->es_output_cid,
838	LockTupleExclusive, LockWaitBlock,
839	TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
840	&tmfd);
841
842	switch (result)
843	{
844	case TM_Ok:
845	Assert(tmfd.traversed);
846	epqslot = EvalPlanQual(epqstate,
847	resultRelationDesc,
848	resultRelInfo->ri_RangeTableIndex,
849	inputslot);
850	if (TupIsNull(epqslot))
851	/ Tuple not passing quals anymore, exiting... /
852	return NULL;
853
854	/*
855	* If requested, skip delete and pass back the
856	* updated row.
857	*/
858	if (epqreturnslot)
859	{
860	*epqreturnslot = epqslot;
861	return NULL;
862	}
863	else
864	goto ldelete;
865
866	case TM_SelfModified:
867
868	/*
869	* This can be reached when following an update
870	* chain from a tuple updated by another session,
871	* reaching a tuple that was already updated in
872	* this transaction. If previously updated by this
873	* command, ignore the delete, otherwise error
874	* out.
875	*
876	* See also TM_SelfModified response to
877	* table_tuple_delete() above.
878	*/
879	if (tmfd.cmax != estate->es_output_cid)
880	ereport(ERROR,
881	(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
882	errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
883	errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
884	return NULL;
885
886	case TM_Deleted:
887	/ tuple already deleted; nothing to do /
888	return NULL;
889
890	default:
891
892	/*
893	* TM_Invisible should be impossible because we're
894	* waiting for updated row versions, and would
895	* already have errored out if the first version
896	* is invisible.
897	*
898	* TM_Updated should be impossible, because we're
899	* locking the latest version via
900	* TUPLE_LOCK_FLAG_FIND_LAST_VERSION.
901	*/
902	elog(ERROR, "unexpected table_tuple_lock status: %u",
903	result);
904	return NULL;
905	}
906
907	Assert(false);
908	break;
909	}
910
911	case TM_Deleted:
912	if (IsolationUsesXactSnapshot())
913	ereport(ERROR,
914	(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
915	errmsg("could not serialize access due to concurrent delete")));
916	/ tuple already deleted; nothing to do /
917	return NULL;
918
919	default:
920	elog(ERROR, "unrecognized table_tuple_delete status: %u",
921	result);
922	return NULL;
923	}
924
925	/*
926	* Note: Normally one would think that we have to delete index tuples
927	* associated with the heap tuple now...
928	*
929	* ... but in POSTGRES, we have no need to do this because VACUUM will
930	* take care of it later. We can't delete index tuples immediately
931	* anyway, since the tuple is still visible to other transactions.
932	*/
933	}
934
935	if (canSetTag)
936	(estate->es_processed)++;
937
938	/ Tell caller that the delete actually happened. /
939	if (tupleDeleted)
940	*tupleDeleted = true;
941
942	/*
943	* If this delete is the result of a partition key update that moved the
944	* tuple to a new partition, put this row into the transition OLD TABLE,
945	* if there is one. We need to do this separately for DELETE and INSERT
946	* because they happen on different tables.
947	*/
948	ar_delete_trig_tcs = mtstate->mt_transition_capture;
949	if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
950	&& mtstate->mt_transition_capture->tcs_update_old_table)
951	{
952	ExecARUpdateTriggers(estate, resultRelInfo,
953	tupleid,
954	oldtuple,
955	NULL,
956	NULL,
957	mtstate->mt_transition_capture);
958
959	/*
960	* We've already captured the NEW TABLE row, so make sure any AR
961	* DELETE trigger fired below doesn't capture it again.
962	*/
963	ar_delete_trig_tcs = NULL;
964	}
965
966	/ AFTER ROW DELETE Triggers /
967	ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
968	ar_delete_trig_tcs);
969
970	/ Process RETURNING if present and if requested /
971	if (processReturning && resultRelInfo->ri_projectReturning)
972	{
973	/*
974	* We have to put the target tuple into a slot, which means first we
975	* gotta fetch it. We can use the trigger tuple slot.
976	*/
977	TupleTableSlot *rslot;
978
979	if (resultRelInfo->ri_FdwRoutine)
980	{
981	/ FDW must have provided a slot containing the deleted row /
982	Assert(!TupIsNull(slot));
983	}
984	else
985	{
986	slot = ExecGetReturningSlot(estate, resultRelInfo);
987	if (oldtuple != NULL)
988	{
989	ExecForceStoreHeapTuple(oldtuple, slot, false);
990	}
991	else
992	{
993	if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid,
994	SnapshotAny, slot))
995	elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
996	}
997	}
998
999	rslot = ExecProcessReturning(resultRelInfo, slot, planSlot);
1000
1001	/*
1002	* Before releasing the target tuple again, make sure rslot has a
1003	* local copy of any pass-by-reference values.
1004	*/
1005	ExecMaterializeSlot(rslot);
1006
1007	ExecClearTuple(slot);
1008
1009	return rslot;
1010	}
1011
1012	return NULL;
1013	}
1014
1015	/ ----------------------------------------------------------------*
1016	* ExecUpdate
1017	*
1018	* note: we can't run UPDATE queries with transactions
1019	* off because UPDATEs are actually INSERTs and our
1020	* scan will mistakenly loop forever, updating the tuple
1021	* it just inserted.. This should be fixed but until it
1022	* is, we don't want to get stuck in an infinite loop
1023	* which corrupts your database..
1024	*
1025	* When updating a table, tupleid identifies the tuple to
1026	* update and oldtuple is NULL. When updating a view, oldtuple
1027	* is passed to the INSTEAD OF triggers and identifies what to
1028	* update, and tupleid is invalid. When updating a foreign table,
1029	* tupleid is invalid; the FDW has to figure out which row to
1030	* update using data from the planSlot. oldtuple is passed to
1031	* foreign table triggers; it is NULL when the foreign table has
1032	* no relevant triggers.
1033	*
1034	* Returns RETURNING result if any, otherwise NULL.
1035	* ----------------------------------------------------------------
1036	*/
1037	static TupleTableSlot *
1038	ExecUpdate(ModifyTableState *mtstate,
1039	ItemPointer tupleid,
1040	HeapTuple oldtuple,
1041	TupleTableSlot *slot,
1042	TupleTableSlot *planSlot,
1043	EPQState *epqstate,
1044	EState *estate,
1045	bool canSetTag)
1046	{
1047	ResultRelInfo *resultRelInfo;
1048	Relation resultRelationDesc;
1049	TM_Result result;
1050	TM_FailureData tmfd;
1051	List *recheckIndexes = NIL;
1052	TupleConversionMap *saved_tcs_map = NULL;
1053
1054	/*
1055	* abort the operation if not running transactions
1056	*/
1057	if (IsBootstrapProcessingMode())
1058	elog(ERROR, "cannot UPDATE during bootstrap");
1059
1060	ExecMaterializeSlot(slot);
1061
1062	/*
1063	* get information on the (current) result relation
1064	*/
1065	resultRelInfo = estate->es_result_relation_info;
1066	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1067
1068	/ BEFORE ROW UPDATE Triggers /
1069	if (resultRelInfo->ri_TrigDesc &&
1070	resultRelInfo->ri_TrigDesc->trig_update_before_row)
1071	{
1072	if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
1073	tupleid, oldtuple, slot))
1074	return NULL; / "do nothing" /
1075	}
1076
1077	/ INSTEAD OF ROW UPDATE Triggers /
1078	if (resultRelInfo->ri_TrigDesc &&
1079	resultRelInfo->ri_TrigDesc->trig_update_instead_row)
1080	{
1081	if (!ExecIRUpdateTriggers(estate, resultRelInfo,
1082	oldtuple, slot))
1083	return NULL; / "do nothing" /
1084	}
1085	else if (resultRelInfo->ri_FdwRoutine)
1086	{
1087	/*
1088	* Compute stored generated columns
1089	*/
1090	if (resultRelationDesc->rd_att->constr &&
1091	resultRelationDesc->rd_att->constr->has_generated_stored)
1092	ExecComputeStoredGenerated(estate, slot);
1093
1094	/*
1095	* update in foreign table: let the FDW do it
1096	*/
1097	slot = resultRelInfo->ri_FdwRoutine->ExecForeignUpdate(estate,
1098	resultRelInfo,
1099	slot,
1100	planSlot);
1101
1102	if (slot == NULL) / "do nothing" /
1103	return NULL;
1104
1105	/*
1106	* AFTER ROW Triggers or RETURNING expressions might reference the
1107	* tableoid column, so (re-)initialize tts_tableOid before evaluating
1108	* them.
1109	*/
1110	slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
1111	}
1112	else
1113	{
1114	LockTupleMode lockmode;
1115	bool partition_constraint_failed;
1116	bool update_indexes;
1117
1118	/*
1119	* Constraints might reference the tableoid column, so (re-)initialize
1120	* tts_tableOid before evaluating them.
1121	*/
1122	slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
1123
1124	/*
1125	* Compute stored generated columns
1126	*/
1127	if (resultRelationDesc->rd_att->constr &&
1128	resultRelationDesc->rd_att->constr->has_generated_stored)
1129	ExecComputeStoredGenerated(estate, slot);
1130
1131	/*
1132	* Check any RLS UPDATE WITH CHECK policies
1133	*
1134	* If we generate a new candidate tuple after EvalPlanQual testing, we
1135	* must loop back here and recheck any RLS policies and constraints.
1136	* (We don't need to redo triggers, however. If there are any BEFORE
1137	* triggers then trigger.c will have done table_tuple_lock to lock the
1138	* correct tuple, so there's no need to do them again.)
1139	*/
1140	lreplace:;
1141
1142	/ ensure slot is independent, consider e.g. EPQ /
1143	ExecMaterializeSlot(slot);
1144
1145	/*
1146	* If partition constraint fails, this row might get moved to another
1147	* partition, in which case we should check the RLS CHECK policy just
1148	* before inserting into the new partition, rather than doing it here.
1149	* This is because a trigger on that partition might again change the
1150	* row. So skip the WCO checks if the partition constraint fails.
1151	*/
1152	partition_constraint_failed =
1153	resultRelInfo->ri_PartitionCheck &&
1154	!ExecPartitionCheck(resultRelInfo, slot, estate, false);
1155
1156	if (!partition_constraint_failed &&
1157	resultRelInfo->ri_WithCheckOptions != NIL)
1158	{
1159	/*
1160	* ExecWithCheckOptions() will skip any WCOs which are not of the
1161	* kind we are looking for at this point.
1162	*/
1163	ExecWithCheckOptions(WCO_RLS_UPDATE_CHECK,
1164	resultRelInfo, slot, estate);
1165	}
1166
1167	/*
1168	* If a partition check failed, try to move the row into the right
1169	* partition.
1170	*/
1171	if (partition_constraint_failed)
1172	{
1173	bool tuple_deleted;
1174	TupleTableSlot *ret_slot;
1175	TupleTableSlot *epqslot = NULL;
1176	PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
1177	int map_index;
1178	TupleConversionMap *tupconv_map;
1179
1180	/*
1181	* Disallow an INSERT ON CONFLICT DO UPDATE that causes the
1182	* original row to migrate to a different partition. Maybe this
1183	* can be implemented some day, but it seems a fringe feature with
1184	* little redeeming value.
1185	*/
1186	if (((ModifyTable *) mtstate->ps.plan)->onConflictAction == ONCONFLICT_UPDATE)
1187	ereport(ERROR,
1188	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1189	errmsg("invalid ON UPDATE specification"),
1190	errdetail("The result tuple would appear in a different partition than the original tuple.")));
1191
1192	/*
1193	* When an UPDATE is run on a leaf partition, we will not have
1194	* partition tuple routing set up. In that case, fail with
1195	* partition constraint violation error.
1196	*/
1197	if (proute == NULL)
1198	ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
1199
1200	/*
1201	* Row movement, part 1. Delete the tuple, but skip RETURNING
1202	* processing. We want to return rows from INSERT.
1203	*/
1204	ExecDelete(mtstate, tupleid, oldtuple, planSlot, epqstate,
1205	estate, false, false / canSetTag / ,
1206	true / changingPart / , &tuple_deleted, &epqslot);
1207
1208	/*
1209	* For some reason if DELETE didn't happen (e.g. trigger prevented
1210	* it, or it was already deleted by self, or it was concurrently
1211	* deleted by another transaction), then we should skip the insert
1212	* as well; otherwise, an UPDATE could cause an increase in the
1213	* total number of rows across all partitions, which is clearly
1214	* wrong.
1215	*
1216	* For a normal UPDATE, the case where the tuple has been the
1217	* subject of a concurrent UPDATE or DELETE would be handled by
1218	* the EvalPlanQual machinery, but for an UPDATE that we've
1219	* translated into a DELETE from this partition and an INSERT into
1220	* some other partition, that's not available, because CTID chains
1221	* can't span relation boundaries. We mimic the semantics to a
1222	* limited extent by skipping the INSERT if the DELETE fails to
1223	* find a tuple. This ensures that two concurrent attempts to
1224	* UPDATE the same tuple at the same time can't turn one tuple
1225	* into two, and that an UPDATE of a just-deleted tuple can't
1226	* resurrect it.
1227	*/
1228	if (!tuple_deleted)
1229	{
1230	/*
1231	* epqslot will be typically NULL. But when ExecDelete()
1232	* finds that another transaction has concurrently updated the
1233	* same row, it re-fetches the row, skips the delete, and
1234	* epqslot is set to the re-fetched tuple slot. In that case,
1235	* we need to do all the checks again.
1236	*/
1237	if (TupIsNull(epqslot))
1238	return NULL;
1239	else
1240	{
1241	slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
1242	goto lreplace;
1243	}
1244	}
1245
1246	/*
1247	* Updates set the transition capture map only when a new subplan
1248	* is chosen. But for inserts, it is set for each row. So after
1249	* INSERT, we need to revert back to the map created for UPDATE;
1250	* otherwise the next UPDATE will incorrectly use the one created
1251	* for INSERT. So first save the one created for UPDATE.
1252	*/
1253	if (mtstate->mt_transition_capture)
1254	saved_tcs_map = mtstate->mt_transition_capture->tcs_map;
1255
1256	/*
1257	* resultRelInfo is one of the per-subplan resultRelInfos. So we
1258	* should convert the tuple into root's tuple descriptor, since
1259	* ExecInsert() starts the search from root. The tuple conversion
1260	* map list is in the order of mtstate->resultRelInfo[], so to
1261	* retrieve the one for this resultRel, we need to know the
1262	* position of the resultRel in mtstate->resultRelInfo[].
1263	*/
1264	map_index = resultRelInfo - mtstate->resultRelInfo;
1265	Assert(map_index >= `0` && map_index < mtstate->mt_nplans);
1266	tupconv_map = tupconv_map_for_subplan(mtstate, map_index);
1267	if (tupconv_map != NULL)
1268	slot = execute_attr_map_slot(tupconv_map->attrMap,
1269	slot,
1270	mtstate->mt_root_tuple_slot);
1271
1272	/*
1273	* Prepare for tuple routing, making it look like we're inserting
1274	* into the root.
1275	*/
1276	Assert(mtstate->rootResultRelInfo != NULL);
1277	slot = ExecPrepareTupleRouting(mtstate, estate, proute,
1278	mtstate->rootResultRelInfo, slot);
1279
1280	ret_slot = ExecInsert(mtstate, slot, planSlot,
1281	estate, canSetTag);
1282
1283	/ Revert ExecPrepareTupleRouting's node change. /
1284	estate->es_result_relation_info = resultRelInfo;
1285	if (mtstate->mt_transition_capture)
1286	{
1287	mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
1288	mtstate->mt_transition_capture->tcs_map = saved_tcs_map;
1289	}
1290
1291	return ret_slot;
1292	}
1293
1294	/*
1295	* Check the constraints of the tuple. We've already checked the
1296	* partition constraint above; however, we must still ensure the tuple
1297	* passes all other constraints, so we will call ExecConstraints() and
1298	* have it validate all remaining checks.
1299	*/
1300	if (resultRelationDesc->rd_att->constr)
1301	ExecConstraints(resultRelInfo, slot, estate);
1302
1303	/*
1304	* replace the heap tuple
1305	*
1306	* Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
1307	* that the row to be updated is visible to that snapshot, and throw a
1308	* can't-serialize error if not. This is a special-case behavior
1309	* needed for referential integrity updates in transaction-snapshot
1310	* mode transactions.
1311	*/
1312	result = table_tuple_update(resultRelationDesc, tupleid, slot,
1313	estate->es_output_cid,
1314	estate->es_snapshot,
1315	estate->es_crosscheck_snapshot,
1316	true / wait for commit / ,
1317	&tmfd, &lockmode, &update_indexes);
1318
1319	switch (result)
1320	{
1321	case TM_SelfModified:
1322
1323	/*
1324	* The target tuple was already updated or deleted by the
1325	* current command, or by a later command in the current
1326	* transaction. The former case is possible in a join UPDATE
1327	* where multiple tuples join to the same target tuple. This
1328	* is pretty questionable, but Postgres has always allowed it:
1329	* we just execute the first update action and ignore
1330	* additional update attempts.
1331	*
1332	* The latter case arises if the tuple is modified by a
1333	* command in a BEFORE trigger, or perhaps by a command in a
1334	* volatile function used in the query. In such situations we
1335	* should not ignore the update, but it is equally unsafe to
1336	* proceed. We don't want to discard the original UPDATE
1337	* while keeping the triggered actions based on it; and we
1338	* have no principled way to merge this update with the
1339	* previous ones. So throwing an error is the only safe
1340	* course.
1341	*
1342	* If a trigger actually intends this type of interaction, it
1343	* can re-execute the UPDATE (assuming it can figure out how)
1344	* and then return NULL to cancel the outer update.
1345	*/
1346	if (tmfd.cmax != estate->es_output_cid)
1347	ereport(ERROR,
1348	(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1349	errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
1350	errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1351
1352	/ Else, already updated by self; nothing to do /
1353	return NULL;
1354
1355	case TM_Ok:
1356	break;
1357
1358	case TM_Updated:
1359	{
1360	TupleTableSlot *inputslot;
1361	TupleTableSlot *epqslot;
1362
1363	if (IsolationUsesXactSnapshot())
1364	ereport(ERROR,
1365	(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1366	errmsg("could not serialize access due to concurrent update")));
1367
1368	/*
1369	* Already know that we're going to need to do EPQ, so
1370	* fetch tuple directly into the right slot.
1371	*/
1372	inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc,
1373	resultRelInfo->ri_RangeTableIndex);
1374
1375	result = table_tuple_lock(resultRelationDesc, tupleid,
1376	estate->es_snapshot,
1377	inputslot, estate->es_output_cid,
1378	lockmode, LockWaitBlock,
1379	TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
1380	&tmfd);
1381
1382	switch (result)
1383	{
1384	case TM_Ok:
1385	Assert(tmfd.traversed);
1386
1387	epqslot = EvalPlanQual(epqstate,
1388	resultRelationDesc,
1389	resultRelInfo->ri_RangeTableIndex,
1390	inputslot);
1391	if (TupIsNull(epqslot))
1392	/ Tuple not passing quals anymore, exiting... /
1393	return NULL;
1394
1395	slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
1396	goto lreplace;
1397
1398	case TM_Deleted:
1399	/ tuple already deleted; nothing to do /
1400	return NULL;
1401
1402	case TM_SelfModified:
1403
1404	/*
1405	* This can be reached when following an update
1406	* chain from a tuple updated by another session,
1407	* reaching a tuple that was already updated in
1408	* this transaction. If previously modified by
1409	* this command, ignore the redundant update,
1410	* otherwise error out.
1411	*
1412	* See also TM_SelfModified response to
1413	* table_tuple_update() above.
1414	*/
1415	if (tmfd.cmax != estate->es_output_cid)
1416	ereport(ERROR,
1417	(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1418	errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
1419	errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1420	return NULL;
1421
1422	default:
1423	/ see table_tuple_lock call in ExecDelete() /
1424	elog(ERROR, "unexpected table_tuple_lock status: %u",
1425	result);
1426	return NULL;
1427	}
1428	}
1429
1430	break;
1431
1432	case TM_Deleted:
1433	if (IsolationUsesXactSnapshot())
1434	ereport(ERROR,
1435	(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1436	errmsg("could not serialize access due to concurrent delete")));
1437	/ tuple already deleted; nothing to do /
1438	return NULL;
1439
1440	default:
1441	elog(ERROR, "unrecognized table_tuple_update status: %u",
1442	result);
1443	return NULL;
1444	}
1445
1446	/ insert index entries for tuple if necessary /
1447	if (resultRelInfo->ri_NumIndices > `0` && update_indexes)
1448	recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL, NIL);
1449	}
1450
1451	if (canSetTag)
1452	(estate->es_processed)++;
1453
1454	/ AFTER ROW UPDATE Triggers /
1455	ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, slot,
1456	recheckIndexes,
1457	mtstate->operation == CMD_INSERT ?
1458	mtstate->mt_oc_transition_capture :
1459	mtstate->mt_transition_capture);
1460
1461	list_free(recheckIndexes);
1462
1463	/*
1464	* Check any WITH CHECK OPTION constraints from parent views. We are
1465	* required to do this after testing all constraints and uniqueness
1466	* violations per the SQL spec, so we do it after actually updating the
1467	* record in the heap and all indexes.
1468	*
1469	* ExecWithCheckOptions() will skip any WCOs which are not of the kind we
1470	* are looking for at this point.
1471	*/
1472	if (resultRelInfo->ri_WithCheckOptions != NIL)
1473	ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
1474
1475	/ Process RETURNING if present /
1476	if (resultRelInfo->ri_projectReturning)
1477	return ExecProcessReturning(resultRelInfo, slot, planSlot);
1478
1479	return NULL;
1480	}
1481
1482	/*
1483	* ExecOnConflictUpdate --- execute UPDATE of INSERT ON CONFLICT DO UPDATE
1484	*
1485	* Try to lock tuple for update as part of speculative insertion. If
1486	* a qual originating from ON CONFLICT DO UPDATE is satisfied, update
1487	* (but still lock row, even though it may not satisfy estate's
1488	* snapshot).
1489	*
1490	* Returns true if we're done (with or without an update), or false if
1491	* the caller must retry the INSERT from scratch.
1492	*/
1493	static bool
1494	ExecOnConflictUpdate(ModifyTableState *mtstate,
1495	ResultRelInfo *resultRelInfo,
1496	ItemPointer conflictTid,
1497	TupleTableSlot *planSlot,
1498	TupleTableSlot *excludedSlot,
1499	EState *estate,
1500	bool canSetTag,
1501	TupleTableSlot **returning)
1502	{
1503	ExprContext *econtext = mtstate->ps.ps_ExprContext;
1504	Relation relation = resultRelInfo->ri_RelationDesc;
1505	ExprState *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
1506	TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing;
1507	TM_FailureData tmfd;
1508	LockTupleMode lockmode;
1509	TM_Result test;
1510	Datum xminDatum;
1511	TransactionId xmin;
1512	bool isnull;
1513
1514	/ Determine lock mode to use /
1515	lockmode = ExecUpdateLockMode(estate, resultRelInfo);
1516
1517	/*
1518	* Lock tuple for update. Don't follow updates when tuple cannot be
1519	* locked without doing so. A row locking conflict here means our
1520	* previous conclusion that the tuple is conclusively committed is not
1521	* true anymore.
1522	*/
1523	test = table_tuple_lock(relation, conflictTid,
1524	estate->es_snapshot,
1525	existing, estate->es_output_cid,
1526	lockmode, LockWaitBlock, `0`,
1527	&tmfd);
1528	switch (test)
1529	{
1530	case TM_Ok:
1531	/ success! /
1532	break;
1533
1534	case TM_Invisible:
1535
1536	/*
1537	* This can occur when a just inserted tuple is updated again in
1538	* the same command. E.g. because multiple rows with the same
1539	* conflicting key values are inserted.
1540	*
1541	* This is somewhat similar to the ExecUpdate() TM_SelfModified
1542	* case. We do not want to proceed because it would lead to the
1543	* same row being updated a second time in some unspecified order,
1544	* and in contrast to plain UPDATEs there's no historical behavior
1545	* to break.
1546	*
1547	* It is the user's responsibility to prevent this situation from
1548	* occurring. These problems are why SQL-2003 similarly specifies
1549	* that for SQL MERGE, an exception must be raised in the event of
1550	* an attempt to update the same row twice.
1551	*/
1552	xminDatum = slot_getsysattr(existing,
1553	MinTransactionIdAttributeNumber,
1554	&isnull);
1555	Assert(!isnull);
1556	xmin = DatumGetTransactionId(xminDatum);
1557
1558	if (TransactionIdIsCurrentTransactionId(xmin))
1559	ereport(ERROR,
1560	(errcode(ERRCODE_CARDINALITY_VIOLATION),
1561	errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"),
1562	errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
1563
1564	/ This shouldn't happen /
1565	elog(ERROR, "attempted to lock invisible tuple");
1566	break;
1567
1568	case TM_SelfModified:
1569
1570	/*
1571	* This state should never be reached. As a dirty snapshot is used
1572	* to find conflicting tuples, speculative insertion wouldn't have
1573	* seen this row to conflict with.
1574	*/
1575	elog(ERROR, "unexpected self-updated tuple");
1576	break;
1577
1578	case TM_Updated:
1579	if (IsolationUsesXactSnapshot())
1580	ereport(ERROR,
1581	(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1582	errmsg("could not serialize access due to concurrent update")));
1583
1584	/*
1585	* As long as we don't support an UPDATE of INSERT ON CONFLICT for
1586	* a partitioned table we shouldn't reach to a case where tuple to
1587	* be lock is moved to another partition due to concurrent update
1588	* of the partition key.
1589	*/
1590	Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
1591
1592	/*
1593	* Tell caller to try again from the very start.
1594	*
1595	* It does not make sense to use the usual EvalPlanQual() style
1596	* loop here, as the new version of the row might not conflict
1597	* anymore, or the conflicting tuple has actually been deleted.
1598	*/
1599	ExecClearTuple(existing);
1600	return false;
1601
1602	case TM_Deleted:
1603	if (IsolationUsesXactSnapshot())
1604	ereport(ERROR,
1605	(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1606	errmsg("could not serialize access due to concurrent delete")));
1607
1608	/ see TM_Updated case /
1609	Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
1610	ExecClearTuple(existing);
1611	return false;
1612
1613	default:
1614	elog(ERROR, "unrecognized table_tuple_lock status: %u", test);
1615	}
1616
1617	/ Success, the tuple is locked. /
1618
1619	/*
1620	* Verify that the tuple is visible to our MVCC snapshot if the current
1621	* isolation level mandates that.
1622	*
1623	* It's not sufficient to rely on the check within ExecUpdate() as e.g.
1624	* CONFLICT ... WHERE clause may prevent us from reaching that.
1625	*
1626	* This means we only ever continue when a new command in the current
1627	* transaction could see the row, even though in READ COMMITTED mode the
1628	* tuple will not be visible according to the current statement's
1629	* snapshot. This is in line with the way UPDATE deals with newer tuple
1630	* versions.
1631	*/
1632	ExecCheckTupleVisible(estate, relation, existing);
1633
1634	/*
1635	* Make tuple and any needed join variables available to ExecQual and
1636	* ExecProject. The EXCLUDED tuple is installed in ecxt_innertuple, while
1637	* the target's existing tuple is installed in the scantuple. EXCLUDED
1638	* has been made to reference INNER_VAR in setrefs.c, but there is no
1639	* other redirection.
1640	*/
1641	econtext->ecxt_scantuple = existing;
1642	econtext->ecxt_innertuple = excludedSlot;
1643	econtext->ecxt_outertuple = NULL;
1644
1645	if (!ExecQual(onConflictSetWhere, econtext))
1646	{
1647	ExecClearTuple(existing); / see return below /
1648	InstrCountFiltered1(&mtstate->ps, `1`);
1649	return true; / done with the tuple /
1650	}
1651
1652	if (resultRelInfo->ri_WithCheckOptions != NIL)
1653	{
1654	/*
1655	* Check target's existing tuple against UPDATE-applicable USING
1656	* security barrier quals (if any), enforced here as RLS checks/WCOs.
1657	*
1658	* The rewriter creates UPDATE RLS checks/WCOs for UPDATE security
1659	* quals, and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK,
1660	* but that's almost the extent of its special handling for ON
1661	* CONFLICT DO UPDATE.
1662	*
1663	* The rewriter will also have associated UPDATE applicable straight
1664	* RLS checks/WCOs for the benefit of the ExecUpdate() call that
1665	* follows. INSERTs and UPDATEs naturally have mutually exclusive WCO
1666	* kinds, so there is no danger of spurious over-enforcement in the
1667	* INSERT or UPDATE path.
1668	*/
1669	ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo,
1670	existing,
1671	mtstate->ps.state);
1672	}
1673
1674	/ Project the new tuple version /
1675	ExecProject(resultRelInfo->ri_onConflict->oc_ProjInfo);
1676
1677	/*
1678	* Note that it is possible that the target tuple has been modified in
1679	* this session, after the above table_tuple_lock. We choose to not error
1680	* out in that case, in line with ExecUpdate's treatment of similar cases.
1681	* This can happen if an UPDATE is triggered from within ExecQual(),
1682	* ExecWithCheckOptions() or ExecProject() above, e.g. by selecting from a
1683	* wCTE in the ON CONFLICT's SET.
1684	*/
1685
1686	/ Execute UPDATE with projection /
1687	*returning = ExecUpdate(mtstate, conflictTid, NULL,
1688	resultRelInfo->ri_onConflict->oc_ProjSlot,
1689	planSlot,
1690	&mtstate->mt_epqstate, mtstate->ps.state,
1691	canSetTag);
1692
1693	/*
1694	* Clear out existing tuple, as there might not be another conflict among
1695	* the next input rows. Don't want to hold resources till the end of the
1696	* query.
1697	*/
1698	ExecClearTuple(existing);
1699	return true;
1700	}
1701
1702
1703	/*
1704	* Process BEFORE EACH STATEMENT triggers
1705	*/
1706	static void
1707	fireBSTriggers(ModifyTableState *node)
1708	{
1709	ModifyTable plan = (ModifyTable ) node->ps.plan;
1710	ResultRelInfo *resultRelInfo = node->resultRelInfo;
1711
1712	/*
1713	* If the node modifies a partitioned table, we must fire its triggers.
1714	* Note that in that case, node->resultRelInfo points to the first leaf
1715	* partition, not the root table.
1716	*/
1717	if (node->rootResultRelInfo != NULL)
1718	resultRelInfo = node->rootResultRelInfo;
1719
1720	switch (node->operation)
1721	{
1722	case CMD_INSERT:
1723	ExecBSInsertTriggers(node->ps.state, resultRelInfo);
1724	if (plan->onConflictAction == ONCONFLICT_UPDATE)
1725	ExecBSUpdateTriggers(node->ps.state,
1726	resultRelInfo);
1727	break;
1728	case CMD_UPDATE:
1729	ExecBSUpdateTriggers(node->ps.state, resultRelInfo);
1730	break;
1731	case CMD_DELETE:
1732	ExecBSDeleteTriggers(node->ps.state, resultRelInfo);
1733	break;
1734	default:
1735	elog(ERROR, "unknown operation");
1736	break;
1737	}
1738	}
1739
1740	/*
1741	* Return the target rel ResultRelInfo.
1742	*
1743	* This relation is the same as :
1744	* - the relation for which we will fire AFTER STATEMENT triggers.
1745	* - the relation into whose tuple format all captured transition tuples must
1746	* be converted.
1747	* - the root partitioned table.
1748	*/
1749	static ResultRelInfo *
1750	getTargetResultRelInfo(ModifyTableState *node)
1751	{
1752	/*
1753	* Note that if the node modifies a partitioned table, node->resultRelInfo
1754	* points to the first leaf partition, not the root table.
1755	*/
1756	if (node->rootResultRelInfo != NULL)
1757	return node->rootResultRelInfo;
1758	else
1759	return node->resultRelInfo;
1760	}
1761
1762	/*
1763	* Process AFTER EACH STATEMENT triggers
1764	*/
1765	static void
1766	fireASTriggers(ModifyTableState *node)
1767	{
1768	ModifyTable plan = (ModifyTable ) node->ps.plan;
1769	ResultRelInfo *resultRelInfo = getTargetResultRelInfo(node);
1770
1771	switch (node->operation)
1772	{
1773	case CMD_INSERT:
1774	if (plan->onConflictAction == ONCONFLICT_UPDATE)
1775	ExecASUpdateTriggers(node->ps.state,
1776	resultRelInfo,
1777	node->mt_oc_transition_capture);
1778	ExecASInsertTriggers(node->ps.state, resultRelInfo,
1779	node->mt_transition_capture);
1780	break;
1781	case CMD_UPDATE:
1782	ExecASUpdateTriggers(node->ps.state, resultRelInfo,
1783	node->mt_transition_capture);
1784	break;
1785	case CMD_DELETE:
1786	ExecASDeleteTriggers(node->ps.state, resultRelInfo,
1787	node->mt_transition_capture);
1788	break;
1789	default:
1790	elog(ERROR, "unknown operation");
1791	break;
1792	}
1793	}
1794
1795	/*
1796	* Set up the state needed for collecting transition tuples for AFTER
1797	* triggers.
1798	*/
1799	static void
1800	ExecSetupTransitionCaptureState(ModifyTableState mtstate, EState estate)
1801	{
1802	ModifyTable plan = (ModifyTable ) mtstate->ps.plan;
1803	ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate);
1804
1805	/ Check for transition tables on the directly targeted relation. /
1806	mtstate->mt_transition_capture =
1807	MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
1808	RelationGetRelid(targetRelInfo->ri_RelationDesc),
1809	mtstate->operation);
1810	if (plan->operation == CMD_INSERT &&
1811	plan->onConflictAction == ONCONFLICT_UPDATE)
1812	mtstate->mt_oc_transition_capture =
1813	MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
1814	RelationGetRelid(targetRelInfo->ri_RelationDesc),
1815	CMD_UPDATE);
1816
1817	/*
1818	* If we found that we need to collect transition tuples then we may also
1819	* need tuple conversion maps for any children that have TupleDescs that
1820	* aren't compatible with the tuplestores. (We can share these maps
1821	* between the regular and ON CONFLICT cases.)
1822	*/
1823	if (mtstate->mt_transition_capture != NULL \|\|
1824	mtstate->mt_oc_transition_capture != NULL)
1825	{
1826	ExecSetupChildParentMapForSubplan(mtstate);
1827
1828	/*
1829	* Install the conversion map for the first plan for UPDATE and DELETE
1830	* operations. It will be advanced each time we switch to the next
1831	* plan. (INSERT operations set it every time, so we need not update
1832	* mtstate->mt_oc_transition_capture here.)
1833	*/
1834	if (mtstate->mt_transition_capture && mtstate->operation != CMD_INSERT)
1835	mtstate->mt_transition_capture->tcs_map =
1836	tupconv_map_for_subplan(mtstate, `0`);
1837	}
1838	}
1839
1840	/*
1841	* ExecPrepareTupleRouting --- prepare for routing one tuple
1842	*
1843	* Determine the partition in which the tuple in slot is to be inserted,
1844	* and modify mtstate and estate to prepare for it.
1845	*
1846	* Caller must revert the estate changes after executing the insertion!
1847	* In mtstate, transition capture changes may also need to be reverted.
1848	*
1849	* Returns a slot holding the tuple of the partition rowtype.
1850	*/
1851	static TupleTableSlot *
1852	ExecPrepareTupleRouting(ModifyTableState *mtstate,
1853	EState *estate,
1854	PartitionTupleRouting *proute,
1855	ResultRelInfo *targetRelInfo,
1856	TupleTableSlot *slot)
1857	{
1858	ResultRelInfo *partrel;
1859	PartitionRoutingInfo *partrouteinfo;
1860	TupleConversionMap *map;
1861
1862	/*
1863	* Lookup the target partition's ResultRelInfo. If ExecFindPartition does
1864	* not find a valid partition for the tuple in 'slot' then an error is
1865	* raised. An error may also be raised if the found partition is not a
1866	* valid target for INSERTs. This is required since a partitioned table
1867	* UPDATE to another partition becomes a DELETE+INSERT.
1868	*/
1869	partrel = ExecFindPartition(mtstate, targetRelInfo, proute, slot, estate);
1870	partrouteinfo = partrel->ri_PartitionInfo;
1871	Assert(partrouteinfo != NULL);
1872
1873	/*
1874	* Make it look like we are inserting into the partition.
1875	*/
1876	estate->es_result_relation_info = partrel;
1877
1878	/*
1879	* If we're capturing transition tuples, we might need to convert from the
1880	* partition rowtype to root partitioned table's rowtype.
1881	*/
1882	if (mtstate->mt_transition_capture != NULL)
1883	{
1884	if (partrel->ri_TrigDesc &&
1885	partrel->ri_TrigDesc->trig_insert_before_row)
1886	{
1887	/*
1888	* If there are any BEFORE triggers on the partition, we'll have
1889	* to be ready to convert their result back to tuplestore format.
1890	*/
1891	mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
1892	mtstate->mt_transition_capture->tcs_map =
1893	partrouteinfo->pi_PartitionToRootMap;
1894	}
1895	else
1896	{
1897	/*
1898	* Otherwise, just remember the original unconverted tuple, to
1899	* avoid a needless round trip conversion.
1900	*/
1901	mtstate->mt_transition_capture->tcs_original_insert_tuple = slot;
1902	mtstate->mt_transition_capture->tcs_map = NULL;
1903	}
1904	}
1905	if (mtstate->mt_oc_transition_capture != NULL)
1906	{
1907	mtstate->mt_oc_transition_capture->tcs_map =
1908	partrouteinfo->pi_PartitionToRootMap;
1909	}
1910
1911	/*
1912	* Convert the tuple, if necessary.
1913	*/
1914	map = partrouteinfo->pi_RootToPartitionMap;
1915	if (map != NULL)
1916	{
1917	TupleTableSlot *new_slot = partrouteinfo->pi_PartitionTupleSlot;
1918
1919	slot = execute_attr_map_slot(map->attrMap, slot, new_slot);
1920	}
1921
1922	return slot;
1923	}
1924
1925	/*
1926	* Initialize the child-to-root tuple conversion map array for UPDATE subplans.
1927	*
1928	* This map array is required to convert the tuple from the subplan result rel
1929	* to the target table descriptor. This requirement arises for two independent
1930	* scenarios:
1931	* 1. For update-tuple-routing.
1932	* 2. For capturing tuples in transition tables.
1933	*/
1934	static void
1935	ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate)
1936	{
1937	ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate);
1938	ResultRelInfo *resultRelInfos = mtstate->resultRelInfo;
1939	TupleDesc outdesc;
1940	int numResultRelInfos = mtstate->mt_nplans;
1941	int i;
1942
1943	/*
1944	* Build array of conversion maps from each child's TupleDesc to the one
1945	* used in the target relation. The map pointers may be NULL when no
1946	* conversion is necessary, which is hopefully a common case.
1947	*/
1948
1949	/ Get tuple descriptor of the target rel. /
1950	outdesc = RelationGetDescr(targetRelInfo->ri_RelationDesc);
1951
1952	mtstate->mt_per_subplan_tupconv_maps = (TupleConversionMap **)
1953	palloc(sizeof(TupleConversionMap ) numResultRelInfos);
1954
1955	for (i = `0`; i < numResultRelInfos; ++i)
1956	{
1957	mtstate->mt_per_subplan_tupconv_maps[i] =
1958	convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc),
1959	outdesc,
1960	gettext_noop("could not convert row type"));
1961	}
1962	}
1963
1964	/*
1965	* For a given subplan index, get the tuple conversion map.
1966	*/
1967	static TupleConversionMap *
1968	tupconv_map_for_subplan(ModifyTableState mtstate, int* whichplan)
1969	{
1970	/ If nobody else set the per-subplan array of maps, do so ourselves. /
1971	if (mtstate->mt_per_subplan_tupconv_maps == NULL)
1972	ExecSetupChildParentMapForSubplan(mtstate);
1973
1974	Assert(whichplan >= `0` && whichplan < mtstate->mt_nplans);
1975	return mtstate->mt_per_subplan_tupconv_maps[whichplan];
1976	}
1977
1978	/ ----------------------------------------------------------------*
1979	* ExecModifyTable
1980	*
1981	* Perform table modifications as required, and return RETURNING results
1982	* if needed.
1983	* ----------------------------------------------------------------
1984	*/
1985	static TupleTableSlot *
1986	ExecModifyTable(PlanState *pstate)
1987	{
1988	ModifyTableState *node = castNode(ModifyTableState, pstate);
1989	PartitionTupleRouting *proute = node->mt_partition_tuple_routing;
1990	EState *estate = node->ps.state;
1991	CmdType operation = node->operation;
1992	ResultRelInfo *saved_resultRelInfo;
1993	ResultRelInfo *resultRelInfo;
1994	PlanState *subplanstate;
1995	JunkFilter *junkfilter;
1996	TupleTableSlot *slot;
1997	TupleTableSlot *planSlot;
1998	ItemPointer tupleid;
1999	ItemPointerData tuple_ctid;
2000	HeapTupleData oldtupdata;
2001	HeapTuple oldtuple;
2002
2003	CHECK_FOR_INTERRUPTS();
2004
2005	/*
2006	* This should NOT get called during EvalPlanQual; we should have passed a
2007	* subplan tree to EvalPlanQual, instead. Use a runtime test not just
2008	* Assert because this condition is easy to miss in testing. (Note:
2009	* although ModifyTable should not get executed within an EvalPlanQual
2010	* operation, we do have to allow it to be initialized and shut down in
2011	* case it is within a CTE subplan. Hence this test must be here, not in
2012	* ExecInitModifyTable.)
2013	*/
2014	if (estate->es_epq_active != NULL)
2015	elog(ERROR, "ModifyTable should not be called during EvalPlanQual");
2016
2017	/*
2018	* If we've already completed processing, don't try to do more. We need
2019	* this test because ExecPostprocessPlan might call us an extra time, and
2020	* our subplan's nodes aren't necessarily robust against being called
2021	* extra times.
2022	*/
2023	if (node->mt_done)
2024	return NULL;
2025
2026	/*
2027	* On first call, fire BEFORE STATEMENT triggers before proceeding.
2028	*/
2029	if (node->fireBSTriggers)
2030	{
2031	fireBSTriggers(node);
2032	node->fireBSTriggers = false;
2033	}
2034
2035	/ Preload local variables /
2036	resultRelInfo = node->resultRelInfo + node->mt_whichplan;
2037	subplanstate = node->mt_plans[node->mt_whichplan];
2038	junkfilter = resultRelInfo->ri_junkFilter;
2039
2040	/*
2041	* es_result_relation_info must point to the currently active result
2042	* relation while we are within this ModifyTable node. Even though
2043	* ModifyTable nodes can't be nested statically, they can be nested
2044	* dynamically (since our subplan could include a reference to a modifying
2045	* CTE). So we have to save and restore the caller's value.
2046	*/
2047	saved_resultRelInfo = estate->es_result_relation_info;
2048
2049	estate->es_result_relation_info = resultRelInfo;
2050
2051	/*
2052	* Fetch rows from subplan(s), and execute the required table modification
2053	* for each row.
2054	*/
2055	for (;;)
2056	{
2057	/*
2058	* Reset the per-output-tuple exprcontext. This is needed because
2059	* triggers expect to use that context as workspace. It's a bit ugly
2060	* to do this below the top level of the plan, however. We might need
2061	* to rethink this later.
2062	*/
2063	ResetPerTupleExprContext(estate);
2064
2065	/*
2066	* Reset per-tuple memory context used for processing on conflict and
2067	* returning clauses, to free any expression evaluation storage
2068	* allocated in the previous cycle.
2069	*/
2070	if (pstate->ps_ExprContext)
2071	ResetExprContext(pstate->ps_ExprContext);
2072
2073	planSlot = ExecProcNode(subplanstate);
2074
2075	if (TupIsNull(planSlot))
2076	{
2077	/ advance to next subplan if any /
2078	node->mt_whichplan++;
2079	if (node->mt_whichplan < node->mt_nplans)
2080	{
2081	resultRelInfo++;
2082	subplanstate = node->mt_plans[node->mt_whichplan];
2083	junkfilter = resultRelInfo->ri_junkFilter;
2084	estate->es_result_relation_info = resultRelInfo;
2085	EvalPlanQualSetPlan(&node->mt_epqstate, subplanstate->plan,
2086	node->mt_arowmarks[node->mt_whichplan]);
2087	/ Prepare to convert transition tuples from this child. /
2088	if (node->mt_transition_capture != NULL)
2089	{
2090	node->mt_transition_capture->tcs_map =
2091	tupconv_map_for_subplan(node, node->mt_whichplan);
2092	}
2093	if (node->mt_oc_transition_capture != NULL)
2094	{
2095	node->mt_oc_transition_capture->tcs_map =
2096	tupconv_map_for_subplan(node, node->mt_whichplan);
2097	}
2098	continue;
2099	}
2100	else
2101	break;
2102	}
2103
2104	/*
2105	* Ensure input tuple is the right format for the target relation.
2106	*/
2107	if (node->mt_scans[node->mt_whichplan]->tts_ops != planSlot->tts_ops)
2108	{
2109	ExecCopySlot(node->mt_scans[node->mt_whichplan], planSlot);
2110	planSlot = node->mt_scans[node->mt_whichplan];
2111	}
2112
2113	/*
2114	* If resultRelInfo->ri_usesFdwDirectModify is true, all we need to do
2115	* here is compute the RETURNING expressions.
2116	*/
2117	if (resultRelInfo->ri_usesFdwDirectModify)
2118	{
2119	Assert(resultRelInfo->ri_projectReturning);
2120
2121	/*
2122	* A scan slot containing the data that was actually inserted,
2123	* updated or deleted has already been made available to
2124	* ExecProcessReturning by IterateDirectModify, so no need to
2125	* provide it here.
2126	*/
2127	slot = ExecProcessReturning(resultRelInfo, NULL, planSlot);
2128
2129	estate->es_result_relation_info = saved_resultRelInfo;
2130	return slot;
2131	}
2132
2133	EvalPlanQualSetSlot(&node->mt_epqstate, planSlot);
2134	slot = planSlot;
2135
2136	tupleid = NULL;
2137	oldtuple = NULL;
2138	if (junkfilter != NULL)
2139	{
2140	/*
2141	* extract the 'ctid' or 'wholerow' junk attribute.
2142	*/
2143	if (operation == CMD_UPDATE \|\| operation == CMD_DELETE)
2144	{
2145	char relkind;
2146	Datum datum;
2147	bool isNull;
2148
2149	relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
2150	if (relkind == RELKIND_RELATION \|\| relkind == RELKIND_MATVIEW)
2151	{
2152	datum = ExecGetJunkAttribute(slot,
2153	junkfilter->jf_junkAttNo,
2154	&isNull);
2155	/ shouldn't ever get a null result... /
2156	if (isNull)
2157	elog(ERROR, "ctid is NULL");
2158
2159	tupleid = (ItemPointer) DatumGetPointer(datum);
2160	tuple_ctid = tupleid; /* be sure we don't free ctid!! /
2161	tupleid = &tuple_ctid;
2162	}
2163
2164	/*
2165	* Use the wholerow attribute, when available, to reconstruct
2166	* the old relation tuple.
2167	*
2168	* Foreign table updates have a wholerow attribute when the
2169	* relation has a row-level trigger. Note that the wholerow
2170	* attribute does not carry system columns. Foreign table
2171	* triggers miss seeing those, except that we know enough here
2172	* to set t_tableOid. Quite separately from this, the FDW may
2173	* fetch its own junk attrs to identify the row.
2174	*
2175	* Other relevant relkinds, currently limited to views, always
2176	* have a wholerow attribute.
2177	*/
2178	else if (AttributeNumberIsValid(junkfilter->jf_junkAttNo))
2179	{
2180	datum = ExecGetJunkAttribute(slot,
2181	junkfilter->jf_junkAttNo,
2182	&isNull);
2183	/ shouldn't ever get a null result... /
2184	if (isNull)
2185	elog(ERROR, "wholerow is NULL");
2186
2187	oldtupdata.t_data = DatumGetHeapTupleHeader(datum);
2188	oldtupdata.t_len =
2189	HeapTupleHeaderGetDatumLength(oldtupdata.t_data);
2190	ItemPointerSetInvalid(&(oldtupdata.t_self));
2191	/ Historically, view triggers see invalid t_tableOid. /
2192	oldtupdata.t_tableOid =
2193	(relkind == RELKIND_VIEW) ? InvalidOid :
2194	RelationGetRelid(resultRelInfo->ri_RelationDesc);
2195
2196	oldtuple = &oldtupdata;
2197	}
2198	else
2199	Assert(relkind == RELKIND_FOREIGN_TABLE);
2200	}
2201
2202	/*
2203	* apply the junkfilter if needed.
2204	*/
2205	if (operation != CMD_DELETE)
2206	slot = ExecFilterJunk(junkfilter, slot);
2207	}
2208
2209	switch (operation)
2210	{
2211	case CMD_INSERT:
2212	/ Prepare for tuple routing if needed. /
2213	if (proute)
2214	slot = ExecPrepareTupleRouting(node, estate, proute,
2215	resultRelInfo, slot);
2216	slot = ExecInsert(node, slot, planSlot,
2217	estate, node->canSetTag);
2218	/ Revert ExecPrepareTupleRouting's state change. /
2219	if (proute)
2220	estate->es_result_relation_info = resultRelInfo;
2221	break;
2222	case CMD_UPDATE:
2223	slot = ExecUpdate(node, tupleid, oldtuple, slot, planSlot,
2224	&node->mt_epqstate, estate, node->canSetTag);
2225	break;
2226	case CMD_DELETE:
2227	slot = ExecDelete(node, tupleid, oldtuple, planSlot,
2228	&node->mt_epqstate, estate,
2229	true, node->canSetTag,
2230	false / changingPart / , NULL, NULL);
2231	break;
2232	default:
2233	elog(ERROR, "unknown operation");
2234	break;
2235	}
2236
2237	/*
2238	* If we got a RETURNING result, return it to caller. We'll continue
2239	* the work on next call.
2240	*/
2241	if (slot)
2242	{
2243	estate->es_result_relation_info = saved_resultRelInfo;
2244	return slot;
2245	}
2246	}
2247
2248	/ Restore es_result_relation_info before exiting /
2249	estate->es_result_relation_info = saved_resultRelInfo;
2250
2251	/*
2252	* We're done, but fire AFTER STATEMENT triggers before exiting.
2253	*/
2254	fireASTriggers(node);
2255
2256	node->mt_done = true;
2257
2258	return NULL;
2259	}
2260
2261	/ ----------------------------------------------------------------*
2262	* ExecInitModifyTable
2263	* ----------------------------------------------------------------
2264	*/
2265	ModifyTableState *
2266	ExecInitModifyTable(ModifyTable node, EState estate, int eflags)
2267	{
2268	ModifyTableState *mtstate;
2269	CmdType operation = node->operation;
2270	int nplans = list_length(node->plans);
2271	ResultRelInfo *saved_resultRelInfo;
2272	ResultRelInfo *resultRelInfo;
2273	Plan *subplan;
2274	ListCell *l;
2275	int i;
2276	Relation rel;
2277	bool update_tuple_routing_needed = node->partColsUpdated;
2278
2279	/ check for unsupported flags /
2280	Assert(!(eflags & (EXEC_FLAG_BACKWARD \| EXEC_FLAG_MARK)));
2281
2282	/*
2283	* create state structure
2284	*/
2285	mtstate = makeNode(ModifyTableState);
2286	mtstate->ps.plan = (Plan *) node;
2287	mtstate->ps.state = estate;
2288	mtstate->ps.ExecProcNode = ExecModifyTable;
2289
2290	mtstate->operation = operation;
2291	mtstate->canSetTag = node->canSetTag;
2292	mtstate->mt_done = false;
2293
2294	mtstate->mt_plans = (PlanState ) palloc0(sizeof*(PlanState ) * nplans);
2295	mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;
2296	mtstate->mt_scans = (TupleTableSlot ) palloc0(sizeof*(TupleTableSlot ) * nplans);
2297
2298	/ If modifying a partitioned table, initialize the root table info /
2299	if (node->rootResultRelIndex >= `0`)
2300	mtstate->rootResultRelInfo = estate->es_root_result_relations +
2301	node->rootResultRelIndex;
2302
2303	mtstate->mt_arowmarks = (List ) palloc0(sizeof*(List ) * nplans);
2304	mtstate->mt_nplans = nplans;
2305
2306	/ set up epqstate with dummy subplan data for the moment /
2307	EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam);
2308	mtstate->fireBSTriggers = true;
2309
2310	/*
2311	* call ExecInitNode on each of the plans to be executed and save the
2312	* results into the array "mt_plans". This is also a convenient place to
2313	* verify that the proposed target relations are valid and open their
2314	* indexes for insertion of new index entries. Note we must set
2315	* estate->es_result_relation_info correctly while we initialize each
2316	* sub-plan; external modules such as FDWs may depend on that (see
2317	* contrib/postgres_fdw/postgres_fdw.c: postgresBeginDirectModify() as one
2318	* example).
2319	*/
2320	saved_resultRelInfo = estate->es_result_relation_info;
2321
2322	resultRelInfo = mtstate->resultRelInfo;
2323	i = `0`;
2324	foreach(l, node->plans)
2325	{
2326	subplan = (Plan *) lfirst(l);
2327
2328	/ Initialize the usesFdwDirectModify flag /
2329	resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i,
2330	node->fdwDirectModifyPlans);
2331
2332	/*
2333	* Verify result relation is a valid target for the current operation
2334	*/
2335	CheckValidResultRel(resultRelInfo, operation);
2336
2337	/*
2338	* If there are indices on the result relation, open them and save
2339	* descriptors in the result relation info, so that we can add new
2340	* index entries for the tuples we add/update. We need not do this
2341	* for a DELETE, however, since deletion doesn't affect indexes. Also,
2342	* inside an EvalPlanQual operation, the indexes might be open
2343	* already, since we share the resultrel state with the original
2344	* query.
2345	*/
2346	if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex &&
2347	operation != CMD_DELETE &&
2348	resultRelInfo->ri_IndexRelationDescs == NULL)
2349	ExecOpenIndices(resultRelInfo,
2350	node->onConflictAction != ONCONFLICT_NONE);
2351
2352	/*
2353	* If this is an UPDATE and a BEFORE UPDATE trigger is present, the
2354	* trigger itself might modify the partition-key values. So arrange
2355	* for tuple routing.
2356	*/
2357	if (resultRelInfo->ri_TrigDesc &&
2358	resultRelInfo->ri_TrigDesc->trig_update_before_row &&
2359	operation == CMD_UPDATE)
2360	update_tuple_routing_needed = true;
2361
2362	/ Now init the plan for this result rel /
2363	estate->es_result_relation_info = resultRelInfo;
2364	mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags);
2365	mtstate->mt_scans[i] =
2366	ExecInitExtraTupleSlot(mtstate->ps.state, ExecGetResultType(mtstate->mt_plans[i]),
2367	table_slot_callbacks(resultRelInfo->ri_RelationDesc));
2368
2369	/ Also let FDWs init themselves for foreign-table result rels /
2370	if (!resultRelInfo->ri_usesFdwDirectModify &&
2371	resultRelInfo->ri_FdwRoutine != NULL &&
2372	resultRelInfo->ri_FdwRoutine->BeginForeignModify != NULL)
2373	{
2374	List fdw_private = (List ) list_nth(node->fdwPrivLists, i);
2375
2376	resultRelInfo->ri_FdwRoutine->BeginForeignModify(mtstate,
2377	resultRelInfo,
2378	fdw_private,
2379	i,
2380	eflags);
2381	}
2382
2383	resultRelInfo++;
2384	i++;
2385	}
2386
2387	estate->es_result_relation_info = saved_resultRelInfo;
2388
2389	/ Get the target relation /
2390	rel = (getTargetResultRelInfo(mtstate))->ri_RelationDesc;
2391
2392	/*
2393	* If it's not a partitioned table after all, UPDATE tuple routing should
2394	* not be attempted.
2395	*/
2396	if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
2397	update_tuple_routing_needed = false;
2398
2399	/*
2400	* Build state for tuple routing if it's an INSERT or if it's an UPDATE of
2401	* partition key.
2402	*/
2403	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
2404	(operation == CMD_INSERT \|\| update_tuple_routing_needed))
2405	mtstate->mt_partition_tuple_routing =
2406	ExecSetupPartitionTupleRouting(estate, mtstate, rel);
2407
2408	/*
2409	* Build state for collecting transition tuples. This requires having a
2410	* valid trigger query context, so skip it in explain-only mode.
2411	*/
2412	if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY))
2413	ExecSetupTransitionCaptureState(mtstate, estate);
2414
2415	/*
2416	* Construct mapping from each of the per-subplan partition attnos to the
2417	* root attno. This is required when during update row movement the tuple
2418	* descriptor of a source partition does not match the root partitioned
2419	* table descriptor. In such a case we need to convert tuples to the root
2420	* tuple descriptor, because the search for destination partition starts
2421	* from the root. We'll also need a slot to store these converted tuples.
2422	* We can skip this setup if it's not a partition key update.
2423	*/
2424	if (update_tuple_routing_needed)
2425	{
2426	ExecSetupChildParentMapForSubplan(mtstate);
2427	mtstate->mt_root_tuple_slot = table_slot_create(rel, NULL);
2428	}
2429
2430	/*
2431	* Initialize any WITH CHECK OPTION constraints if needed.
2432	*/
2433	resultRelInfo = mtstate->resultRelInfo;
2434	i = `0`;
2435	foreach(l, node->withCheckOptionLists)
2436	{
2437	List wcoList = (List ) lfirst(l);
2438	List *wcoExprs = NIL;
2439	ListCell *ll;
2440
2441	foreach(ll, wcoList)
2442	{
2443	WithCheckOption wco = (WithCheckOption ) lfirst(ll);
2444	ExprState wcoExpr = ExecInitQual((List ) wco->qual,
2445	&mtstate->ps);
2446
2447	wcoExprs = lappend(wcoExprs, wcoExpr);
2448	}
2449
2450	resultRelInfo->ri_WithCheckOptions = wcoList;
2451	resultRelInfo->ri_WithCheckOptionExprs = wcoExprs;
2452	resultRelInfo++;
2453	i++;
2454	}
2455
2456	/*
2457	* Initialize RETURNING projections if needed.
2458	*/
2459	if (node->returningLists)
2460	{
2461	TupleTableSlot *slot;
2462	ExprContext *econtext;
2463
2464	/*
2465	* Initialize result tuple slot and assign its rowtype using the first
2466	* RETURNING list. We assume the rest will look the same.
2467	*/
2468	mtstate->ps.plan->targetlist = (List *) linitial(node->returningLists);
2469
2470	/ Set up a slot for the output of the RETURNING projection(s) /
2471	ExecInitResultTupleSlotTL(&mtstate->ps, &TTSOpsVirtual);
2472	slot = mtstate->ps.ps_ResultTupleSlot;
2473
2474	/ Need an econtext too /
2475	if (mtstate->ps.ps_ExprContext == NULL)
2476	ExecAssignExprContext(estate, &mtstate->ps);
2477	econtext = mtstate->ps.ps_ExprContext;
2478
2479	/*
2480	* Build a projection for each result rel.
2481	*/
2482	resultRelInfo = mtstate->resultRelInfo;
2483	foreach(l, node->returningLists)
2484	{
2485	List rlist = (List ) lfirst(l);
2486
2487	resultRelInfo->ri_returningList = rlist;
2488	resultRelInfo->ri_projectReturning =
2489	ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps,
2490	resultRelInfo->ri_RelationDesc->rd_att);
2491	resultRelInfo++;
2492	}
2493	}
2494	else
2495	{
2496	/*
2497	* We still must construct a dummy result tuple type, because InitPlan
2498	* expects one (maybe should change that?).
2499	*/
2500	mtstate->ps.plan->targetlist = NIL;
2501	ExecInitResultTypeTL(&mtstate->ps);
2502
2503	mtstate->ps.ps_ExprContext = NULL;
2504	}
2505
2506	/ Set the list of arbiter indexes if needed for ON CONFLICT /
2507	resultRelInfo = mtstate->resultRelInfo;
2508	if (node->onConflictAction != ONCONFLICT_NONE)
2509	resultRelInfo->ri_onConflictArbiterIndexes = node->arbiterIndexes;
2510
2511	/*
2512	* If needed, Initialize target list, projection and qual for ON CONFLICT
2513	* DO UPDATE.
2514	*/
2515	if (node->onConflictAction == ONCONFLICT_UPDATE)
2516	{
2517	ExprContext *econtext;
2518	TupleDesc relationDesc;
2519	TupleDesc tupDesc;
2520
2521	/ insert may only have one plan, inheritance is not expanded /
2522	Assert(nplans == `1`);
2523
2524	/ already exists if created by RETURNING processing above /
2525	if (mtstate->ps.ps_ExprContext == NULL)
2526	ExecAssignExprContext(estate, &mtstate->ps);
2527
2528	econtext = mtstate->ps.ps_ExprContext;
2529	relationDesc = resultRelInfo->ri_RelationDesc->rd_att;
2530
2531	/ carried forward solely for the benefit of explain /
2532	mtstate->mt_excludedtlist = node->exclRelTlist;
2533
2534	/ create state for DO UPDATE SET operation /
2535	resultRelInfo->ri_onConflict = makeNode(OnConflictSetState);
2536
2537	/ initialize slot for the existing tuple /
2538	resultRelInfo->ri_onConflict->oc_Existing =
2539	table_slot_create(resultRelInfo->ri_RelationDesc,
2540	&mtstate->ps.state->es_tupleTable);
2541
2542	/*
2543	* Create the tuple slot for the UPDATE SET projection. We want a slot
2544	* of the table's type here, because the slot will be used to insert
2545	* into the table, and for RETURNING processing - which may access
2546	* system attributes.
2547	*/
2548	tupDesc = ExecTypeFromTL((List *) node->onConflictSet);
2549	resultRelInfo->ri_onConflict->oc_ProjSlot =
2550	ExecInitExtraTupleSlot(mtstate->ps.state, tupDesc,
2551	table_slot_callbacks(resultRelInfo->ri_RelationDesc));
2552
2553	/ build UPDATE SET projection state /
2554	resultRelInfo->ri_onConflict->oc_ProjInfo =
2555	ExecBuildProjectionInfo(node->onConflictSet, econtext,
2556	resultRelInfo->ri_onConflict->oc_ProjSlot,
2557	&mtstate->ps,
2558	relationDesc);
2559
2560	/ initialize state to evaluate the WHERE clause, if any /
2561	if (node->onConflictWhere)
2562	{
2563	ExprState *qualexpr;
2564
2565	qualexpr = ExecInitQual((List *) node->onConflictWhere,
2566	&mtstate->ps);
2567	resultRelInfo->ri_onConflict->oc_WhereClause = qualexpr;
2568	}
2569	}
2570
2571	/*
2572	* If we have any secondary relations in an UPDATE or DELETE, they need to
2573	* be treated like non-locked relations in SELECT FOR UPDATE, ie, the
2574	* EvalPlanQual mechanism needs to be told about them. Locate the
2575	* relevant ExecRowMarks.
2576	*/
2577	foreach(l, node->rowMarks)
2578	{
2579	PlanRowMark *rc = lfirst_node(PlanRowMark, l);
2580	ExecRowMark *erm;
2581
2582	/ ignore "parent" rowmarks; they are irrelevant at runtime /
2583	if (rc->isParent)
2584	continue;
2585
2586	/ find ExecRowMark (same for all subplans) /
2587	erm = ExecFindRowMark(estate, rc->rti, false);
2588
2589	/ build ExecAuxRowMark for each subplan /
2590	for (i = `0`; i < nplans; i++)
2591	{
2592	ExecAuxRowMark *aerm;
2593
2594	subplan = mtstate->mt_plans[i]->plan;
2595	aerm = ExecBuildAuxRowMark(erm, subplan->targetlist);
2596	mtstate->mt_arowmarks[i] = lappend(mtstate->mt_arowmarks[i], aerm);
2597	}
2598	}
2599
2600	/ select first subplan /
2601	mtstate->mt_whichplan = `0`;
2602	subplan = (Plan *) linitial(node->plans);
2603	EvalPlanQualSetPlan(&mtstate->mt_epqstate, subplan,
2604	mtstate->mt_arowmarks[`0`]);
2605
2606	/*
2607	* Initialize the junk filter(s) if needed. INSERT queries need a filter
2608	* if there are any junk attrs in the tlist. UPDATE and DELETE always
2609	* need a filter, since there's always at least one junk attribute present
2610	* --- no need to look first. Typically, this will be a 'ctid' or
2611	* 'wholerow' attribute, but in the case of a foreign data wrapper it
2612	* might be a set of junk attributes sufficient to identify the remote
2613	* row.
2614	*
2615	* If there are multiple result relations, each one needs its own junk
2616	* filter. Note multiple rels are only possible for UPDATE/DELETE, so we
2617	* can't be fooled by some needing a filter and some not.
2618	*
2619	* This section of code is also a convenient place to verify that the
2620	* output of an INSERT or UPDATE matches the target table(s).
2621	*/
2622	{
2623	bool junk_filter_needed = false;
2624
2625	switch (operation)
2626	{
2627	case CMD_INSERT:
2628	foreach(l, subplan->targetlist)
2629	{
2630	TargetEntry tle = (TargetEntry ) lfirst(l);
2631
2632	if (tle->resjunk)
2633	{
2634	junk_filter_needed = true;
2635	break;
2636	}
2637	}
2638	break;
2639	case CMD_UPDATE:
2640	case CMD_DELETE:
2641	junk_filter_needed = true;
2642	break;
2643	default:
2644	elog(ERROR, "unknown operation");
2645	break;
2646	}
2647
2648	if (junk_filter_needed)
2649	{
2650	resultRelInfo = mtstate->resultRelInfo;
2651	for (i = `0`; i < nplans; i++)
2652	{
2653	JunkFilter *j;
2654	TupleTableSlot *junkresslot;
2655
2656	subplan = mtstate->mt_plans[i]->plan;
2657	if (operation == CMD_INSERT \|\| operation == CMD_UPDATE)
2658	ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc,
2659	subplan->targetlist);
2660
2661	junkresslot =
2662	ExecInitExtraTupleSlot(estate, NULL,
2663	table_slot_callbacks(resultRelInfo->ri_RelationDesc));
2664	j = ExecInitJunkFilter(subplan->targetlist,
2665	junkresslot);
2666
2667	if (operation == CMD_UPDATE \|\| operation == CMD_DELETE)
2668	{
2669	/ For UPDATE/DELETE, find the appropriate junk attr now /
2670	char relkind;
2671
2672	relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
2673	if (relkind == RELKIND_RELATION \|\|
2674	relkind == RELKIND_MATVIEW \|\|
2675	relkind == RELKIND_PARTITIONED_TABLE)
2676	{
2677	j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
2678	if (!AttributeNumberIsValid(j->jf_junkAttNo))
2679	elog(ERROR, "could not find junk ctid column");
2680	}
2681	else if (relkind == RELKIND_FOREIGN_TABLE)
2682	{
2683	/*
2684	* When there is a row-level trigger, there should be
2685	* a wholerow attribute.
2686	*/
2687	j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow");
2688	}
2689	else
2690	{
2691	j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow");
2692	if (!AttributeNumberIsValid(j->jf_junkAttNo))
2693	elog(ERROR, "could not find junk wholerow column");
2694	}
2695	}
2696
2697	resultRelInfo->ri_junkFilter = j;
2698	resultRelInfo++;
2699	}
2700	}
2701	else
2702	{
2703	if (operation == CMD_INSERT)
2704	ExecCheckPlanOutput(mtstate->resultRelInfo->ri_RelationDesc,
2705	subplan->targetlist);
2706	}
2707	}
2708
2709	/*
2710	* Lastly, if this is not the primary (canSetTag) ModifyTable node, add it
2711	* to estate->es_auxmodifytables so that it will be run to completion by
2712	* ExecPostprocessPlan. (It'd actually work fine to add the primary
2713	* ModifyTable node too, but there's no need.) Note the use of lcons not
2714	* lappend: we need later-initialized ModifyTable nodes to be shut down
2715	* before earlier ones. This ensures that we don't throw away RETURNING
2716	* rows that need to be seen by a later CTE subplan.
2717	*/
2718	if (!mtstate->canSetTag)
2719	estate->es_auxmodifytables = lcons(mtstate,
2720	estate->es_auxmodifytables);
2721
2722	return mtstate;
2723	}
2724
2725	/ ----------------------------------------------------------------*
2726	* ExecEndModifyTable
2727	*
2728	* Shuts down the plan.
2729	*
2730	* Returns nothing of interest.
2731	* ----------------------------------------------------------------
2732	*/
2733	void
2734	ExecEndModifyTable(ModifyTableState *node)
2735	{
2736	int i;
2737
2738	/*
2739	* Allow any FDWs to shut down
2740	*/
2741	for (i = `0`; i < node->mt_nplans; i++)
2742	{
2743	ResultRelInfo *resultRelInfo = node->resultRelInfo + i;
2744
2745	if (!resultRelInfo->ri_usesFdwDirectModify &&
2746	resultRelInfo->ri_FdwRoutine != NULL &&
2747	resultRelInfo->ri_FdwRoutine->EndForeignModify != NULL)
2748	resultRelInfo->ri_FdwRoutine->EndForeignModify(node->ps.state,
2749	resultRelInfo);
2750	}
2751
2752	/*
2753	* Close all the partitioned tables, leaf partitions, and their indices
2754	* and release the slot used for tuple routing, if set.
2755	*/
2756	if (node->mt_partition_tuple_routing)
2757	{
2758	ExecCleanupTupleRouting(node, node->mt_partition_tuple_routing);
2759
2760	if (node->mt_root_tuple_slot)
2761	ExecDropSingleTupleTableSlot(node->mt_root_tuple_slot);
2762	}
2763
2764	/*
2765	* Free the exprcontext
2766	*/
2767	ExecFreeExprContext(&node->ps);
2768
2769	/*
2770	* clean out the tuple table
2771	*/
2772	if (node->ps.ps_ResultTupleSlot)
2773	ExecClearTuple(node->ps.ps_ResultTupleSlot);
2774
2775	/*
2776	* Terminate EPQ execution if active
2777	*/
2778	EvalPlanQualEnd(&node->mt_epqstate);
2779
2780	/*
2781	* shut down subplans
2782	*/
2783	for (i = `0`; i < node->mt_nplans; i++)
2784	ExecEndNode(node->mt_plans[i]);
2785	}
2786
2787	void
2788	ExecReScanModifyTable(ModifyTableState *node)
2789	{
2790	/*
2791	* Currently, we don't need to support rescan on ModifyTable nodes. The
2792	* semantics of that would be a bit debatable anyway.
2793	*/
2794	elog(ERROR, "ExecReScanModifyTable is not implemented");
2795	}
2796

Browse the source code of PostgreSQL/src/backend/executor/nodeModifyTable.c