nodeSetOp.c source code [PostgreSQL/src/backend/executor/nodeSetOp.c]

1	/-------------------------------------------------------------------------*
2	*
3	* nodeSetOp.c
4	* Routines to handle INTERSECT and EXCEPT selection
5	*
6	* The input of a SetOp node consists of tuples from two relations,
7	* which have been combined into one dataset, with a junk attribute added
8	* that shows which relation each tuple came from. In SETOP_SORTED mode,
9	* the input has furthermore been sorted according to all the grouping
10	* columns (ie, all the non-junk attributes). The SetOp node scans each
11	* group of identical tuples to determine how many came from each input
12	* relation. Then it is a simple matter to emit the output demanded by the
13	* SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
14	*
15	* In SETOP_HASHED mode, the input is delivered in no particular order,
16	* except that we know all the tuples from one input relation will come before
17	* all the tuples of the other. The planner guarantees that the first input
18	* relation is the left-hand one for EXCEPT, and tries to make the smaller
19	* input relation come first for INTERSECT. We build a hash table in memory
20	* with one entry for each group of identical tuples, and count the number of
21	* tuples in the group from each relation. After seeing all the input, we
22	* scan the hashtable and generate the correct output using those counts.
23	* We can avoid making hashtable entries for any tuples appearing only in the
24	* second input relation, since they cannot result in any output.
25	*
26	* This node type is not used for UNION or UNION ALL, since those can be
27	* implemented more cheaply (there's no need for the junk attribute to
28	* identify the source relation).
29	*
30	* Note that SetOp does no qual checking nor projection. The delivered
31	* output tuples are just copies of the first-to-arrive tuple in each
32	* input group.
33	*
34	*
35	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
36	* Portions Copyright (c) 1994, Regents of the University of California
37	*
38	*
39	* IDENTIFICATION
40	* src/backend/executor/nodeSetOp.c
41	*
42	*-------------------------------------------------------------------------
43	*/
44
45	#include "postgres.h"
46
47	#include "access/htup_details.h"
48	#include "executor/executor.h"
49	#include "executor/nodeSetOp.h"
50	#include "miscadmin.h"
51	#include "utils/memutils.h"
52
53
54	/*
55	* SetOpStatePerGroupData - per-group working state
56	*
57	* These values are working state that is initialized at the start of
58	* an input tuple group and updated for each input tuple.
59	*
60	* In SETOP_SORTED mode, we need only one of these structs, and it's kept in
61	* the plan state node. In SETOP_HASHED mode, the hash table contains one
62	* of these for each tuple group.
63	*/
64	typedef struct SetOpStatePerGroupData
65	{
66	long numLeft; / number of left-input dups in group /
67	long numRight; / number of right-input dups in group /
68	} SetOpStatePerGroupData;
69
70
71	static TupleTableSlot setop_retrieve_direct(SetOpState setopstate);
72	static void setop_fill_hash_table(SetOpState *setopstate);
73	static TupleTableSlot setop_retrieve_hash_table(SetOpState setopstate);
74
75
76	/*
77	* Initialize state for a new group of input values.
78	*/
79	static inline void
80	initialize_counts(SetOpStatePerGroup pergroup)
81	{
82	pergroup->numLeft = pergroup->numRight = `0`;
83	}
84
85	/*
86	* Advance the appropriate counter for one input tuple.
87	*/
88	static inline void
89	advance_counts(SetOpStatePerGroup pergroup, int flag)
90	{
91	if (flag)
92	pergroup->numRight++;
93	else
94	pergroup->numLeft++;
95	}
96
97	/*
98	* Fetch the "flag" column from an input tuple.
99	* This is an integer column with value 0 for left side, 1 for right side.
100	*/
101	static int
102	fetch_tuple_flag(SetOpState setopstate, TupleTableSlot inputslot)
103	{
104	SetOp node = (SetOp ) setopstate->ps.plan;
105	int flag;
106	bool isNull;
107
108	flag = DatumGetInt32(slot_getattr(inputslot,
109	node->flagColIdx,
110	&isNull));
111	Assert(!isNull);
112	Assert(flag == `0` \|\| flag == `1`);
113	return flag;
114	}
115
116	/*
117	* Initialize the hash table to empty.
118	*/
119	static void
120	build_hash_table(SetOpState *setopstate)
121	{
122	SetOp node = (SetOp ) setopstate->ps.plan;
123	ExprContext *econtext = setopstate->ps.ps_ExprContext;
124	TupleDesc desc = ExecGetResultType(outerPlanState(setopstate));
125
126	Assert(node->strategy == SETOP_HASHED);
127	Assert(node->numGroups > `0`);
128
129	setopstate->hashtable = BuildTupleHashTableExt(&setopstate->ps,
130	desc,
131	node->numCols,
132	node->dupColIdx,
133	setopstate->eqfuncoids,
134	setopstate->hashfunctions,
135	node->dupCollations,
136	node->numGroups,
137	`0`,
138	setopstate->ps.state->es_query_cxt,
139	setopstate->tableContext,
140	econtext->ecxt_per_tuple_memory,
141	false);
142	}
143
144	/*
145	* We've completed processing a tuple group. Decide how many copies (if any)
146	* of its representative row to emit, and store the count into numOutput.
147	* This logic is straight from the SQL92 specification.
148	*/
149	static void
150	set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
151	{
152	SetOp plannode = (SetOp ) setopstate->ps.plan;
153
154	switch (plannode->cmd)
155	{
156	case SETOPCMD_INTERSECT:
157	if (pergroup->numLeft > `0` && pergroup->numRight > `0`)
158	setopstate->numOutput = `1`;
159	else
160	setopstate->numOutput = `0`;
161	break;
162	case SETOPCMD_INTERSECT_ALL:
163	setopstate->numOutput =
164	(pergroup->numLeft < pergroup->numRight) ?
165	pergroup->numLeft : pergroup->numRight;
166	break;
167	case SETOPCMD_EXCEPT:
168	if (pergroup->numLeft > `0` && pergroup->numRight == `0`)
169	setopstate->numOutput = `1`;
170	else
171	setopstate->numOutput = `0`;
172	break;
173	case SETOPCMD_EXCEPT_ALL:
174	setopstate->numOutput =
175	(pergroup->numLeft < pergroup->numRight) ?
176	`0` : (pergroup->numLeft - pergroup->numRight);
177	break;
178	default:
179	elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
180	break;
181	}
182	}
183
184
185	/ ----------------------------------------------------------------*
186	* ExecSetOp
187	* ----------------------------------------------------------------
188	*/
189	static TupleTableSlot * / return: a tuple or NULL /
190	ExecSetOp(PlanState *pstate)
191	{
192	SetOpState *node = castNode(SetOpState, pstate);
193	SetOp plannode = (SetOp ) node->ps.plan;
194	TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
195
196	CHECK_FOR_INTERRUPTS();
197
198	/*
199	* If the previously-returned tuple needs to be returned more than once,
200	* keep returning it.
201	*/
202	if (node->numOutput > `0`)
203	{
204	node->numOutput--;
205	return resultTupleSlot;
206	}
207
208	/ Otherwise, we're done if we are out of groups /
209	if (node->setop_done)
210	return NULL;
211
212	/ Fetch the next tuple group according to the correct strategy /
213	if (plannode->strategy == SETOP_HASHED)
214	{
215	if (!node->table_filled)
216	setop_fill_hash_table(node);
217	return setop_retrieve_hash_table(node);
218	}
219	else
220	return setop_retrieve_direct(node);
221	}
222
223	/*
224	* ExecSetOp for non-hashed case
225	*/
226	static TupleTableSlot *
227	setop_retrieve_direct(SetOpState *setopstate)
228	{
229	PlanState *outerPlan;
230	SetOpStatePerGroup pergroup;
231	TupleTableSlot *outerslot;
232	TupleTableSlot *resultTupleSlot;
233	ExprContext *econtext = setopstate->ps.ps_ExprContext;
234
235	/*
236	* get state info from node
237	*/
238	outerPlan = outerPlanState(setopstate);
239	pergroup = (SetOpStatePerGroup) setopstate->pergroup;
240	resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
241
242	/*
243	* We loop retrieving groups until we find one we should return
244	*/
245	while (!setopstate->setop_done)
246	{
247	/*
248	* If we don't already have the first tuple of the new group, fetch it
249	* from the outer plan.
250	*/
251	if (setopstate->grp_firstTuple == NULL)
252	{
253	outerslot = ExecProcNode(outerPlan);
254	if (!TupIsNull(outerslot))
255	{
256	/ Make a copy of the first input tuple /
257	setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
258	}
259	else
260	{
261	/ outer plan produced no tuples at all /
262	setopstate->setop_done = true;
263	return NULL;
264	}
265	}
266
267	/*
268	* Store the copied first input tuple in the tuple table slot reserved
269	* for it. The tuple will be deleted when it is cleared from the
270	* slot.
271	*/
272	ExecStoreHeapTuple(setopstate->grp_firstTuple,
273	resultTupleSlot,
274	true);
275	setopstate->grp_firstTuple = NULL; / don't keep two pointers /
276
277	/ Initialize working state for a new input tuple group /
278	initialize_counts(pergroup);
279
280	/ Count the first input tuple /
281	advance_counts(pergroup,
282	fetch_tuple_flag(setopstate, resultTupleSlot));
283
284	/*
285	* Scan the outer plan until we exhaust it or cross a group boundary.
286	*/
287	for (;;)
288	{
289	outerslot = ExecProcNode(outerPlan);
290	if (TupIsNull(outerslot))
291	{
292	/ no more outer-plan tuples available /
293	setopstate->setop_done = true;
294	break;
295	}
296
297	/*
298	* Check whether we've crossed a group boundary.
299	*/
300	econtext->ecxt_outertuple = resultTupleSlot;
301	econtext->ecxt_innertuple = outerslot;
302
303	if (!ExecQualAndReset(setopstate->eqfunction, econtext))
304	{
305	/*
306	* Save the first input tuple of the next group.
307	*/
308	setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
309	break;
310	}
311
312	/ Still in same group, so count this tuple /
313	advance_counts(pergroup,
314	fetch_tuple_flag(setopstate, outerslot));
315	}
316
317	/*
318	* Done scanning input tuple group. See if we should emit any copies
319	* of result tuple, and if so return the first copy.
320	*/
321	set_output_count(setopstate, pergroup);
322
323	if (setopstate->numOutput > `0`)
324	{
325	setopstate->numOutput--;
326	return resultTupleSlot;
327	}
328	}
329
330	/ No more groups /
331	ExecClearTuple(resultTupleSlot);
332	return NULL;
333	}
334
335	/*
336	* ExecSetOp for hashed case: phase 1, read input and build hash table
337	*/
338	static void
339	setop_fill_hash_table(SetOpState *setopstate)
340	{
341	SetOp node = (SetOp ) setopstate->ps.plan;
342	PlanState *outerPlan;
343	int firstFlag;
344	bool in_first_rel PG_USED_FOR_ASSERTS_ONLY;
345	ExprContext *econtext = setopstate->ps.ps_ExprContext;
346
347	/*
348	* get state info from node
349	*/
350	outerPlan = outerPlanState(setopstate);
351	firstFlag = node->firstFlag;
352	/ verify planner didn't mess up /
353	Assert(firstFlag == `0` \|\|
354	(firstFlag == `1` &&
355	(node->cmd == SETOPCMD_INTERSECT \|\|
356	node->cmd == SETOPCMD_INTERSECT_ALL)));
357
358	/*
359	* Process each outer-plan tuple, and then fetch the next one, until we
360	* exhaust the outer plan.
361	*/
362	in_first_rel = true;
363	for (;;)
364	{
365	TupleTableSlot *outerslot;
366	int flag;
367	TupleHashEntryData *entry;
368	bool isnew;
369
370	outerslot = ExecProcNode(outerPlan);
371	if (TupIsNull(outerslot))
372	break;
373
374	/ Identify whether it's left or right input /
375	flag = fetch_tuple_flag(setopstate, outerslot);
376
377	if (flag == firstFlag)
378	{
379	/ (still) in first input relation /
380	Assert(in_first_rel);
381
382	/ Find or build hashtable entry for this tuple's group /
383	entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
384	&isnew);
385
386	/ If new tuple group, initialize counts /
387	if (isnew)
388	{
389	entry->additional = (SetOpStatePerGroup)
390	MemoryContextAlloc(setopstate->hashtable->tablecxt,
391	sizeof(SetOpStatePerGroupData));
392	initialize_counts((SetOpStatePerGroup) entry->additional);
393	}
394
395	/ Advance the counts /
396	advance_counts((SetOpStatePerGroup) entry->additional, flag);
397	}
398	else
399	{
400	/ reached second relation /
401	in_first_rel = false;
402
403	/ For tuples not seen previously, do not make hashtable entry /
404	entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
405	NULL);
406
407	/ Advance the counts if entry is already present /
408	if (entry)
409	advance_counts((SetOpStatePerGroup) entry->additional, flag);
410	}
411
412	/ Must reset expression context after each hashtable lookup /
413	ResetExprContext(econtext);
414	}
415
416	setopstate->table_filled = true;
417	/ Initialize to walk the hash table /
418	ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
419	}
420
421	/*
422	* ExecSetOp for hashed case: phase 2, retrieving groups from hash table
423	*/
424	static TupleTableSlot *
425	setop_retrieve_hash_table(SetOpState *setopstate)
426	{
427	TupleHashEntryData *entry;
428	TupleTableSlot *resultTupleSlot;
429
430	/*
431	* get state info from node
432	*/
433	resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
434
435	/*
436	* We loop retrieving groups until we find one we should return
437	*/
438	while (!setopstate->setop_done)
439	{
440	CHECK_FOR_INTERRUPTS();
441
442	/*
443	* Find the next entry in the hash table
444	*/
445	entry = ScanTupleHashTable(setopstate->hashtable, &setopstate->hashiter);
446	if (entry == NULL)
447	{
448	/ No more entries in hashtable, so done /
449	setopstate->setop_done = true;
450	return NULL;
451	}
452
453	/*
454	* See if we should emit any copies of this tuple, and if so return
455	* the first copy.
456	*/
457	set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);
458
459	if (setopstate->numOutput > `0`)
460	{
461	setopstate->numOutput--;
462	return ExecStoreMinimalTuple(entry->firstTuple,
463	resultTupleSlot,
464	false);
465	}
466	}
467
468	/ No more groups /
469	ExecClearTuple(resultTupleSlot);
470	return NULL;
471	}
472
473	/ ----------------------------------------------------------------*
474	* ExecInitSetOp
475	*
476	* This initializes the setop node state structures and
477	* the node's subplan.
478	* ----------------------------------------------------------------
479	*/
480	SetOpState *
481	ExecInitSetOp(SetOp node, EState estate, int eflags)
482	{
483	SetOpState *setopstate;
484	TupleDesc outerDesc;
485
486	/ check for unsupported flags /
487	Assert(!(eflags & (EXEC_FLAG_BACKWARD \| EXEC_FLAG_MARK)));
488
489	/*
490	* create state structure
491	*/
492	setopstate = makeNode(SetOpState);
493	setopstate->ps.plan = (Plan *) node;
494	setopstate->ps.state = estate;
495	setopstate->ps.ExecProcNode = ExecSetOp;
496
497	setopstate->eqfuncoids = NULL;
498	setopstate->hashfunctions = NULL;
499	setopstate->setop_done = false;
500	setopstate->numOutput = `0`;
501	setopstate->pergroup = NULL;
502	setopstate->grp_firstTuple = NULL;
503	setopstate->hashtable = NULL;
504	setopstate->tableContext = NULL;
505
506	/*
507	* create expression context
508	*/
509	ExecAssignExprContext(estate, &setopstate->ps);
510
511	/*
512	* If hashing, we also need a longer-lived context to store the hash
513	* table. The table can't just be kept in the per-query context because
514	* we want to be able to throw it away in ExecReScanSetOp.
515	*/
516	if (node->strategy == SETOP_HASHED)
517	setopstate->tableContext =
518	AllocSetContextCreate(CurrentMemoryContext,
519	"SetOp hash table",
520	ALLOCSET_DEFAULT_SIZES);
521
522	/*
523	* initialize child nodes
524	*
525	* If we are hashing then the child plan does not need to handle REWIND
526	* efficiently; see ExecReScanSetOp.
527	*/
528	if (node->strategy == SETOP_HASHED)
529	eflags &= ~EXEC_FLAG_REWIND;
530	outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
531	outerDesc = ExecGetResultType(outerPlanState(setopstate));
532
533	/*
534	* Initialize result slot and type. Setop nodes do no projections, so
535	* initialize projection info for this node appropriately.
536	*/
537	ExecInitResultTupleSlotTL(&setopstate->ps,
538	node->strategy == SETOP_HASHED ?
539	&TTSOpsMinimalTuple : &TTSOpsHeapTuple);
540	setopstate->ps.ps_ProjInfo = NULL;
541
542	/*
543	* Precompute fmgr lookup data for inner loop. We need both equality and
544	* hashing functions to do it by hashing, but only equality if not
545	* hashing.
546	*/
547	if (node->strategy == SETOP_HASHED)
548	execTuplesHashPrepare(node->numCols,
549	node->dupOperators,
550	&setopstate->eqfuncoids,
551	&setopstate->hashfunctions);
552	else
553	setopstate->eqfunction =
554	execTuplesMatchPrepare(outerDesc,
555	node->numCols,
556	node->dupColIdx,
557	node->dupOperators,
558	node->dupCollations,
559	&setopstate->ps);
560
561	if (node->strategy == SETOP_HASHED)
562	{
563	build_hash_table(setopstate);
564	setopstate->table_filled = false;
565	}
566	else
567	{
568	setopstate->pergroup =
569	(SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
570	}
571
572	return setopstate;
573	}
574
575	/ ----------------------------------------------------------------*
576	* ExecEndSetOp
577	*
578	* This shuts down the subplan and frees resources allocated
579	* to this node.
580	* ----------------------------------------------------------------
581	*/
582	void
583	ExecEndSetOp(SetOpState *node)
584	{
585	/ clean up tuple table /
586	ExecClearTuple(node->ps.ps_ResultTupleSlot);
587
588	/ free subsidiary stuff including hashtable /
589	if (node->tableContext)
590	MemoryContextDelete(node->tableContext);
591	ExecFreeExprContext(&node->ps);
592
593	ExecEndNode(outerPlanState(node));
594	}
595
596
597	void
598	ExecReScanSetOp(SetOpState *node)
599	{
600	ExecClearTuple(node->ps.ps_ResultTupleSlot);
601	node->setop_done = false;
602	node->numOutput = `0`;
603
604	if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
605	{
606	/*
607	* In the hashed case, if we haven't yet built the hash table then we
608	* can just return; nothing done yet, so nothing to undo. If subnode's
609	* chgParam is not NULL then it will be re-scanned by ExecProcNode,
610	* else no reason to re-scan it at all.
611	*/
612	if (!node->table_filled)
613	return;
614
615	/*
616	* If we do have the hash table and the subplan does not have any
617	* parameter changes, then we can just rescan the existing hash table;
618	* no need to build it again.
619	*/
620	if (node->ps.lefttree->chgParam == NULL)
621	{
622	ResetTupleHashIterator(node->hashtable, &node->hashiter);
623	return;
624	}
625	}
626
627	/ Release first tuple of group, if we have made a copy /
628	if (node->grp_firstTuple != NULL)
629	{
630	heap_freetuple(node->grp_firstTuple);
631	node->grp_firstTuple = NULL;
632	}
633
634	/ Release any hashtable storage /
635	if (node->tableContext)
636	MemoryContextResetAndDeleteChildren(node->tableContext);
637
638	/ And rebuild empty hashtable if needed /
639	if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
640	{
641	ResetTupleHashTable(node->hashtable);
642	node->table_filled = false;
643	}
644
645	/*
646	* if chgParam of subnode is not null then plan will be re-scanned by
647	* first ExecProcNode.
648	*/
649	if (node->ps.lefttree->chgParam == NULL)
650	ExecReScan(node->ps.lefttree);
651	}
652

Browse the source code of PostgreSQL/src/backend/executor/nodeSetOp.c