1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * nodeSetOp.c |
4 | * Routines to handle INTERSECT and EXCEPT selection |
5 | * |
6 | * The input of a SetOp node consists of tuples from two relations, |
7 | * which have been combined into one dataset, with a junk attribute added |
8 | * that shows which relation each tuple came from. In SETOP_SORTED mode, |
9 | * the input has furthermore been sorted according to all the grouping |
10 | * columns (ie, all the non-junk attributes). The SetOp node scans each |
11 | * group of identical tuples to determine how many came from each input |
12 | * relation. Then it is a simple matter to emit the output demanded by the |
13 | * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL. |
14 | * |
15 | * In SETOP_HASHED mode, the input is delivered in no particular order, |
16 | * except that we know all the tuples from one input relation will come before |
17 | * all the tuples of the other. The planner guarantees that the first input |
18 | * relation is the left-hand one for EXCEPT, and tries to make the smaller |
19 | * input relation come first for INTERSECT. We build a hash table in memory |
20 | * with one entry for each group of identical tuples, and count the number of |
21 | * tuples in the group from each relation. After seeing all the input, we |
22 | * scan the hashtable and generate the correct output using those counts. |
23 | * We can avoid making hashtable entries for any tuples appearing only in the |
24 | * second input relation, since they cannot result in any output. |
25 | * |
26 | * This node type is not used for UNION or UNION ALL, since those can be |
27 | * implemented more cheaply (there's no need for the junk attribute to |
28 | * identify the source relation). |
29 | * |
30 | * Note that SetOp does no qual checking nor projection. The delivered |
31 | * output tuples are just copies of the first-to-arrive tuple in each |
32 | * input group. |
33 | * |
34 | * |
35 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
36 | * Portions Copyright (c) 1994, Regents of the University of California |
37 | * |
38 | * |
39 | * IDENTIFICATION |
40 | * src/backend/executor/nodeSetOp.c |
41 | * |
42 | *------------------------------------------------------------------------- |
43 | */ |
44 | |
45 | #include "postgres.h" |
46 | |
47 | #include "access/htup_details.h" |
48 | #include "executor/executor.h" |
49 | #include "executor/nodeSetOp.h" |
50 | #include "miscadmin.h" |
51 | #include "utils/memutils.h" |
52 | |
53 | |
54 | /* |
55 | * SetOpStatePerGroupData - per-group working state |
56 | * |
57 | * These values are working state that is initialized at the start of |
58 | * an input tuple group and updated for each input tuple. |
59 | * |
60 | * In SETOP_SORTED mode, we need only one of these structs, and it's kept in |
61 | * the plan state node. In SETOP_HASHED mode, the hash table contains one |
62 | * of these for each tuple group. |
63 | */ |
64 | typedef struct SetOpStatePerGroupData |
65 | { |
66 | long numLeft; /* number of left-input dups in group */ |
67 | long numRight; /* number of right-input dups in group */ |
68 | } SetOpStatePerGroupData; |
69 | |
70 | |
71 | static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate); |
72 | static void setop_fill_hash_table(SetOpState *setopstate); |
73 | static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate); |
74 | |
75 | |
76 | /* |
77 | * Initialize state for a new group of input values. |
78 | */ |
79 | static inline void |
80 | initialize_counts(SetOpStatePerGroup pergroup) |
81 | { |
82 | pergroup->numLeft = pergroup->numRight = 0; |
83 | } |
84 | |
85 | /* |
86 | * Advance the appropriate counter for one input tuple. |
87 | */ |
88 | static inline void |
89 | advance_counts(SetOpStatePerGroup pergroup, int flag) |
90 | { |
91 | if (flag) |
92 | pergroup->numRight++; |
93 | else |
94 | pergroup->numLeft++; |
95 | } |
96 | |
97 | /* |
98 | * Fetch the "flag" column from an input tuple. |
99 | * This is an integer column with value 0 for left side, 1 for right side. |
100 | */ |
101 | static int |
102 | fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot) |
103 | { |
104 | SetOp *node = (SetOp *) setopstate->ps.plan; |
105 | int flag; |
106 | bool isNull; |
107 | |
108 | flag = DatumGetInt32(slot_getattr(inputslot, |
109 | node->flagColIdx, |
110 | &isNull)); |
111 | Assert(!isNull); |
112 | Assert(flag == 0 || flag == 1); |
113 | return flag; |
114 | } |
115 | |
116 | /* |
117 | * Initialize the hash table to empty. |
118 | */ |
119 | static void |
120 | build_hash_table(SetOpState *setopstate) |
121 | { |
122 | SetOp *node = (SetOp *) setopstate->ps.plan; |
123 | ExprContext *econtext = setopstate->ps.ps_ExprContext; |
124 | TupleDesc desc = ExecGetResultType(outerPlanState(setopstate)); |
125 | |
126 | Assert(node->strategy == SETOP_HASHED); |
127 | Assert(node->numGroups > 0); |
128 | |
129 | setopstate->hashtable = BuildTupleHashTableExt(&setopstate->ps, |
130 | desc, |
131 | node->numCols, |
132 | node->dupColIdx, |
133 | setopstate->eqfuncoids, |
134 | setopstate->hashfunctions, |
135 | node->dupCollations, |
136 | node->numGroups, |
137 | 0, |
138 | setopstate->ps.state->es_query_cxt, |
139 | setopstate->tableContext, |
140 | econtext->ecxt_per_tuple_memory, |
141 | false); |
142 | } |
143 | |
144 | /* |
145 | * We've completed processing a tuple group. Decide how many copies (if any) |
146 | * of its representative row to emit, and store the count into numOutput. |
147 | * This logic is straight from the SQL92 specification. |
148 | */ |
149 | static void |
150 | set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup) |
151 | { |
152 | SetOp *plannode = (SetOp *) setopstate->ps.plan; |
153 | |
154 | switch (plannode->cmd) |
155 | { |
156 | case SETOPCMD_INTERSECT: |
157 | if (pergroup->numLeft > 0 && pergroup->numRight > 0) |
158 | setopstate->numOutput = 1; |
159 | else |
160 | setopstate->numOutput = 0; |
161 | break; |
162 | case SETOPCMD_INTERSECT_ALL: |
163 | setopstate->numOutput = |
164 | (pergroup->numLeft < pergroup->numRight) ? |
165 | pergroup->numLeft : pergroup->numRight; |
166 | break; |
167 | case SETOPCMD_EXCEPT: |
168 | if (pergroup->numLeft > 0 && pergroup->numRight == 0) |
169 | setopstate->numOutput = 1; |
170 | else |
171 | setopstate->numOutput = 0; |
172 | break; |
173 | case SETOPCMD_EXCEPT_ALL: |
174 | setopstate->numOutput = |
175 | (pergroup->numLeft < pergroup->numRight) ? |
176 | 0 : (pergroup->numLeft - pergroup->numRight); |
177 | break; |
178 | default: |
179 | elog(ERROR, "unrecognized set op: %d" , (int) plannode->cmd); |
180 | break; |
181 | } |
182 | } |
183 | |
184 | |
185 | /* ---------------------------------------------------------------- |
186 | * ExecSetOp |
187 | * ---------------------------------------------------------------- |
188 | */ |
189 | static TupleTableSlot * /* return: a tuple or NULL */ |
190 | ExecSetOp(PlanState *pstate) |
191 | { |
192 | SetOpState *node = castNode(SetOpState, pstate); |
193 | SetOp *plannode = (SetOp *) node->ps.plan; |
194 | TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot; |
195 | |
196 | CHECK_FOR_INTERRUPTS(); |
197 | |
198 | /* |
199 | * If the previously-returned tuple needs to be returned more than once, |
200 | * keep returning it. |
201 | */ |
202 | if (node->numOutput > 0) |
203 | { |
204 | node->numOutput--; |
205 | return resultTupleSlot; |
206 | } |
207 | |
208 | /* Otherwise, we're done if we are out of groups */ |
209 | if (node->setop_done) |
210 | return NULL; |
211 | |
212 | /* Fetch the next tuple group according to the correct strategy */ |
213 | if (plannode->strategy == SETOP_HASHED) |
214 | { |
215 | if (!node->table_filled) |
216 | setop_fill_hash_table(node); |
217 | return setop_retrieve_hash_table(node); |
218 | } |
219 | else |
220 | return setop_retrieve_direct(node); |
221 | } |
222 | |
223 | /* |
224 | * ExecSetOp for non-hashed case |
225 | */ |
226 | static TupleTableSlot * |
227 | setop_retrieve_direct(SetOpState *setopstate) |
228 | { |
229 | PlanState *outerPlan; |
230 | SetOpStatePerGroup pergroup; |
231 | TupleTableSlot *outerslot; |
232 | TupleTableSlot *resultTupleSlot; |
233 | ExprContext *econtext = setopstate->ps.ps_ExprContext; |
234 | |
235 | /* |
236 | * get state info from node |
237 | */ |
238 | outerPlan = outerPlanState(setopstate); |
239 | pergroup = (SetOpStatePerGroup) setopstate->pergroup; |
240 | resultTupleSlot = setopstate->ps.ps_ResultTupleSlot; |
241 | |
242 | /* |
243 | * We loop retrieving groups until we find one we should return |
244 | */ |
245 | while (!setopstate->setop_done) |
246 | { |
247 | /* |
248 | * If we don't already have the first tuple of the new group, fetch it |
249 | * from the outer plan. |
250 | */ |
251 | if (setopstate->grp_firstTuple == NULL) |
252 | { |
253 | outerslot = ExecProcNode(outerPlan); |
254 | if (!TupIsNull(outerslot)) |
255 | { |
256 | /* Make a copy of the first input tuple */ |
257 | setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot); |
258 | } |
259 | else |
260 | { |
261 | /* outer plan produced no tuples at all */ |
262 | setopstate->setop_done = true; |
263 | return NULL; |
264 | } |
265 | } |
266 | |
267 | /* |
268 | * Store the copied first input tuple in the tuple table slot reserved |
269 | * for it. The tuple will be deleted when it is cleared from the |
270 | * slot. |
271 | */ |
272 | ExecStoreHeapTuple(setopstate->grp_firstTuple, |
273 | resultTupleSlot, |
274 | true); |
275 | setopstate->grp_firstTuple = NULL; /* don't keep two pointers */ |
276 | |
277 | /* Initialize working state for a new input tuple group */ |
278 | initialize_counts(pergroup); |
279 | |
280 | /* Count the first input tuple */ |
281 | advance_counts(pergroup, |
282 | fetch_tuple_flag(setopstate, resultTupleSlot)); |
283 | |
284 | /* |
285 | * Scan the outer plan until we exhaust it or cross a group boundary. |
286 | */ |
287 | for (;;) |
288 | { |
289 | outerslot = ExecProcNode(outerPlan); |
290 | if (TupIsNull(outerslot)) |
291 | { |
292 | /* no more outer-plan tuples available */ |
293 | setopstate->setop_done = true; |
294 | break; |
295 | } |
296 | |
297 | /* |
298 | * Check whether we've crossed a group boundary. |
299 | */ |
300 | econtext->ecxt_outertuple = resultTupleSlot; |
301 | econtext->ecxt_innertuple = outerslot; |
302 | |
303 | if (!ExecQualAndReset(setopstate->eqfunction, econtext)) |
304 | { |
305 | /* |
306 | * Save the first input tuple of the next group. |
307 | */ |
308 | setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot); |
309 | break; |
310 | } |
311 | |
312 | /* Still in same group, so count this tuple */ |
313 | advance_counts(pergroup, |
314 | fetch_tuple_flag(setopstate, outerslot)); |
315 | } |
316 | |
317 | /* |
318 | * Done scanning input tuple group. See if we should emit any copies |
319 | * of result tuple, and if so return the first copy. |
320 | */ |
321 | set_output_count(setopstate, pergroup); |
322 | |
323 | if (setopstate->numOutput > 0) |
324 | { |
325 | setopstate->numOutput--; |
326 | return resultTupleSlot; |
327 | } |
328 | } |
329 | |
330 | /* No more groups */ |
331 | ExecClearTuple(resultTupleSlot); |
332 | return NULL; |
333 | } |
334 | |
335 | /* |
336 | * ExecSetOp for hashed case: phase 1, read input and build hash table |
337 | */ |
338 | static void |
339 | setop_fill_hash_table(SetOpState *setopstate) |
340 | { |
341 | SetOp *node = (SetOp *) setopstate->ps.plan; |
342 | PlanState *outerPlan; |
343 | int firstFlag; |
344 | bool in_first_rel PG_USED_FOR_ASSERTS_ONLY; |
345 | ExprContext *econtext = setopstate->ps.ps_ExprContext; |
346 | |
347 | /* |
348 | * get state info from node |
349 | */ |
350 | outerPlan = outerPlanState(setopstate); |
351 | firstFlag = node->firstFlag; |
352 | /* verify planner didn't mess up */ |
353 | Assert(firstFlag == 0 || |
354 | (firstFlag == 1 && |
355 | (node->cmd == SETOPCMD_INTERSECT || |
356 | node->cmd == SETOPCMD_INTERSECT_ALL))); |
357 | |
358 | /* |
359 | * Process each outer-plan tuple, and then fetch the next one, until we |
360 | * exhaust the outer plan. |
361 | */ |
362 | in_first_rel = true; |
363 | for (;;) |
364 | { |
365 | TupleTableSlot *outerslot; |
366 | int flag; |
367 | TupleHashEntryData *entry; |
368 | bool isnew; |
369 | |
370 | outerslot = ExecProcNode(outerPlan); |
371 | if (TupIsNull(outerslot)) |
372 | break; |
373 | |
374 | /* Identify whether it's left or right input */ |
375 | flag = fetch_tuple_flag(setopstate, outerslot); |
376 | |
377 | if (flag == firstFlag) |
378 | { |
379 | /* (still) in first input relation */ |
380 | Assert(in_first_rel); |
381 | |
382 | /* Find or build hashtable entry for this tuple's group */ |
383 | entry = LookupTupleHashEntry(setopstate->hashtable, outerslot, |
384 | &isnew); |
385 | |
386 | /* If new tuple group, initialize counts */ |
387 | if (isnew) |
388 | { |
389 | entry->additional = (SetOpStatePerGroup) |
390 | MemoryContextAlloc(setopstate->hashtable->tablecxt, |
391 | sizeof(SetOpStatePerGroupData)); |
392 | initialize_counts((SetOpStatePerGroup) entry->additional); |
393 | } |
394 | |
395 | /* Advance the counts */ |
396 | advance_counts((SetOpStatePerGroup) entry->additional, flag); |
397 | } |
398 | else |
399 | { |
400 | /* reached second relation */ |
401 | in_first_rel = false; |
402 | |
403 | /* For tuples not seen previously, do not make hashtable entry */ |
404 | entry = LookupTupleHashEntry(setopstate->hashtable, outerslot, |
405 | NULL); |
406 | |
407 | /* Advance the counts if entry is already present */ |
408 | if (entry) |
409 | advance_counts((SetOpStatePerGroup) entry->additional, flag); |
410 | } |
411 | |
412 | /* Must reset expression context after each hashtable lookup */ |
413 | ResetExprContext(econtext); |
414 | } |
415 | |
416 | setopstate->table_filled = true; |
417 | /* Initialize to walk the hash table */ |
418 | ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter); |
419 | } |
420 | |
421 | /* |
422 | * ExecSetOp for hashed case: phase 2, retrieving groups from hash table |
423 | */ |
424 | static TupleTableSlot * |
425 | setop_retrieve_hash_table(SetOpState *setopstate) |
426 | { |
427 | TupleHashEntryData *entry; |
428 | TupleTableSlot *resultTupleSlot; |
429 | |
430 | /* |
431 | * get state info from node |
432 | */ |
433 | resultTupleSlot = setopstate->ps.ps_ResultTupleSlot; |
434 | |
435 | /* |
436 | * We loop retrieving groups until we find one we should return |
437 | */ |
438 | while (!setopstate->setop_done) |
439 | { |
440 | CHECK_FOR_INTERRUPTS(); |
441 | |
442 | /* |
443 | * Find the next entry in the hash table |
444 | */ |
445 | entry = ScanTupleHashTable(setopstate->hashtable, &setopstate->hashiter); |
446 | if (entry == NULL) |
447 | { |
448 | /* No more entries in hashtable, so done */ |
449 | setopstate->setop_done = true; |
450 | return NULL; |
451 | } |
452 | |
453 | /* |
454 | * See if we should emit any copies of this tuple, and if so return |
455 | * the first copy. |
456 | */ |
457 | set_output_count(setopstate, (SetOpStatePerGroup) entry->additional); |
458 | |
459 | if (setopstate->numOutput > 0) |
460 | { |
461 | setopstate->numOutput--; |
462 | return ExecStoreMinimalTuple(entry->firstTuple, |
463 | resultTupleSlot, |
464 | false); |
465 | } |
466 | } |
467 | |
468 | /* No more groups */ |
469 | ExecClearTuple(resultTupleSlot); |
470 | return NULL; |
471 | } |
472 | |
473 | /* ---------------------------------------------------------------- |
474 | * ExecInitSetOp |
475 | * |
476 | * This initializes the setop node state structures and |
477 | * the node's subplan. |
478 | * ---------------------------------------------------------------- |
479 | */ |
480 | SetOpState * |
481 | ExecInitSetOp(SetOp *node, EState *estate, int eflags) |
482 | { |
483 | SetOpState *setopstate; |
484 | TupleDesc outerDesc; |
485 | |
486 | /* check for unsupported flags */ |
487 | Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); |
488 | |
489 | /* |
490 | * create state structure |
491 | */ |
492 | setopstate = makeNode(SetOpState); |
493 | setopstate->ps.plan = (Plan *) node; |
494 | setopstate->ps.state = estate; |
495 | setopstate->ps.ExecProcNode = ExecSetOp; |
496 | |
497 | setopstate->eqfuncoids = NULL; |
498 | setopstate->hashfunctions = NULL; |
499 | setopstate->setop_done = false; |
500 | setopstate->numOutput = 0; |
501 | setopstate->pergroup = NULL; |
502 | setopstate->grp_firstTuple = NULL; |
503 | setopstate->hashtable = NULL; |
504 | setopstate->tableContext = NULL; |
505 | |
506 | /* |
507 | * create expression context |
508 | */ |
509 | ExecAssignExprContext(estate, &setopstate->ps); |
510 | |
511 | /* |
512 | * If hashing, we also need a longer-lived context to store the hash |
513 | * table. The table can't just be kept in the per-query context because |
514 | * we want to be able to throw it away in ExecReScanSetOp. |
515 | */ |
516 | if (node->strategy == SETOP_HASHED) |
517 | setopstate->tableContext = |
518 | AllocSetContextCreate(CurrentMemoryContext, |
519 | "SetOp hash table" , |
520 | ALLOCSET_DEFAULT_SIZES); |
521 | |
522 | /* |
523 | * initialize child nodes |
524 | * |
525 | * If we are hashing then the child plan does not need to handle REWIND |
526 | * efficiently; see ExecReScanSetOp. |
527 | */ |
528 | if (node->strategy == SETOP_HASHED) |
529 | eflags &= ~EXEC_FLAG_REWIND; |
530 | outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags); |
531 | outerDesc = ExecGetResultType(outerPlanState(setopstate)); |
532 | |
533 | /* |
534 | * Initialize result slot and type. Setop nodes do no projections, so |
535 | * initialize projection info for this node appropriately. |
536 | */ |
537 | ExecInitResultTupleSlotTL(&setopstate->ps, |
538 | node->strategy == SETOP_HASHED ? |
539 | &TTSOpsMinimalTuple : &TTSOpsHeapTuple); |
540 | setopstate->ps.ps_ProjInfo = NULL; |
541 | |
542 | /* |
543 | * Precompute fmgr lookup data for inner loop. We need both equality and |
544 | * hashing functions to do it by hashing, but only equality if not |
545 | * hashing. |
546 | */ |
547 | if (node->strategy == SETOP_HASHED) |
548 | execTuplesHashPrepare(node->numCols, |
549 | node->dupOperators, |
550 | &setopstate->eqfuncoids, |
551 | &setopstate->hashfunctions); |
552 | else |
553 | setopstate->eqfunction = |
554 | execTuplesMatchPrepare(outerDesc, |
555 | node->numCols, |
556 | node->dupColIdx, |
557 | node->dupOperators, |
558 | node->dupCollations, |
559 | &setopstate->ps); |
560 | |
561 | if (node->strategy == SETOP_HASHED) |
562 | { |
563 | build_hash_table(setopstate); |
564 | setopstate->table_filled = false; |
565 | } |
566 | else |
567 | { |
568 | setopstate->pergroup = |
569 | (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData)); |
570 | } |
571 | |
572 | return setopstate; |
573 | } |
574 | |
575 | /* ---------------------------------------------------------------- |
576 | * ExecEndSetOp |
577 | * |
578 | * This shuts down the subplan and frees resources allocated |
579 | * to this node. |
580 | * ---------------------------------------------------------------- |
581 | */ |
582 | void |
583 | ExecEndSetOp(SetOpState *node) |
584 | { |
585 | /* clean up tuple table */ |
586 | ExecClearTuple(node->ps.ps_ResultTupleSlot); |
587 | |
588 | /* free subsidiary stuff including hashtable */ |
589 | if (node->tableContext) |
590 | MemoryContextDelete(node->tableContext); |
591 | ExecFreeExprContext(&node->ps); |
592 | |
593 | ExecEndNode(outerPlanState(node)); |
594 | } |
595 | |
596 | |
597 | void |
598 | ExecReScanSetOp(SetOpState *node) |
599 | { |
600 | ExecClearTuple(node->ps.ps_ResultTupleSlot); |
601 | node->setop_done = false; |
602 | node->numOutput = 0; |
603 | |
604 | if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED) |
605 | { |
606 | /* |
607 | * In the hashed case, if we haven't yet built the hash table then we |
608 | * can just return; nothing done yet, so nothing to undo. If subnode's |
609 | * chgParam is not NULL then it will be re-scanned by ExecProcNode, |
610 | * else no reason to re-scan it at all. |
611 | */ |
612 | if (!node->table_filled) |
613 | return; |
614 | |
615 | /* |
616 | * If we do have the hash table and the subplan does not have any |
617 | * parameter changes, then we can just rescan the existing hash table; |
618 | * no need to build it again. |
619 | */ |
620 | if (node->ps.lefttree->chgParam == NULL) |
621 | { |
622 | ResetTupleHashIterator(node->hashtable, &node->hashiter); |
623 | return; |
624 | } |
625 | } |
626 | |
627 | /* Release first tuple of group, if we have made a copy */ |
628 | if (node->grp_firstTuple != NULL) |
629 | { |
630 | heap_freetuple(node->grp_firstTuple); |
631 | node->grp_firstTuple = NULL; |
632 | } |
633 | |
634 | /* Release any hashtable storage */ |
635 | if (node->tableContext) |
636 | MemoryContextResetAndDeleteChildren(node->tableContext); |
637 | |
638 | /* And rebuild empty hashtable if needed */ |
639 | if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED) |
640 | { |
641 | ResetTupleHashTable(node->hashtable); |
642 | node->table_filled = false; |
643 | } |
644 | |
645 | /* |
646 | * if chgParam of subnode is not null then plan will be re-scanned by |
647 | * first ExecProcNode. |
648 | */ |
649 | if (node->ps.lefttree->chgParam == NULL) |
650 | ExecReScan(node->ps.lefttree); |
651 | } |
652 | |