1/*-------------------------------------------------------------------------
2 *
3 * nodeAppend.c
4 * routines to handle append nodes.
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/executor/nodeAppend.c
12 *
13 *-------------------------------------------------------------------------
14 */
15/* INTERFACE ROUTINES
16 * ExecInitAppend - initialize the append node
17 * ExecAppend - retrieve the next tuple from the node
18 * ExecEndAppend - shut down the append node
19 * ExecReScanAppend - rescan the append node
20 *
21 * NOTES
22 * Each append node contains a list of one or more subplans which
23 * must be iteratively processed (forwards or backwards).
24 * Tuples are retrieved by executing the 'whichplan'th subplan
25 * until the subplan stops returning tuples, at which point that
26 * plan is shut down and the next started up.
27 *
28 * Append nodes don't make use of their left and right
29 * subtrees, rather they maintain a list of subplans so
30 * a typical append node looks like this in the plan tree:
31 *
32 * ...
33 * /
34 * Append -------+------+------+--- nil
35 * / \ | | |
36 * nil nil ... ... ...
37 * subplans
38 *
39 * Append nodes are currently used for unions, and to support
40 * inheritance queries, where several relations need to be scanned.
41 * For example, in our standard person/student/employee/student-emp
42 * example, where student and employee inherit from person
43 * and student-emp inherits from student and employee, the
44 * query:
45 *
46 * select name from person
47 *
48 * generates the plan:
49 *
50 * |
51 * Append -------+-------+--------+--------+
52 * / \ | | | |
53 * nil nil Scan Scan Scan Scan
54 * | | | |
55 * person employee student student-emp
56 */
57
58#include "postgres.h"
59
60#include "executor/execdebug.h"
61#include "executor/execPartition.h"
62#include "executor/nodeAppend.h"
63#include "miscadmin.h"
64
65/* Shared state for parallel-aware Append. */
66struct ParallelAppendState
67{
68 LWLock pa_lock; /* mutual exclusion to choose next subplan */
69 int pa_next_plan; /* next plan to choose by any worker */
70
71 /*
72 * pa_finished[i] should be true if no more workers should select subplan
73 * i. for a non-partial plan, this should be set to true as soon as a
74 * worker selects the plan; for a partial plan, it remains false until
75 * some worker executes the plan to completion.
76 */
77 bool pa_finished[FLEXIBLE_ARRAY_MEMBER];
78};
79
80#define INVALID_SUBPLAN_INDEX -1
81#define NO_MATCHING_SUBPLANS -2
82
83static TupleTableSlot *ExecAppend(PlanState *pstate);
84static bool choose_next_subplan_locally(AppendState *node);
85static bool choose_next_subplan_for_leader(AppendState *node);
86static bool choose_next_subplan_for_worker(AppendState *node);
87static void mark_invalid_subplans_as_finished(AppendState *node);
88
89/* ----------------------------------------------------------------
90 * ExecInitAppend
91 *
92 * Begin all of the subscans of the append node.
93 *
94 * (This is potentially wasteful, since the entire result of the
95 * append node may not be scanned, but this way all of the
96 * structures get allocated in the executor's top level memory
97 * block instead of that of the call to ExecAppend.)
98 * ----------------------------------------------------------------
99 */
100AppendState *
101ExecInitAppend(Append *node, EState *estate, int eflags)
102{
103 AppendState *appendstate = makeNode(AppendState);
104 PlanState **appendplanstates;
105 Bitmapset *validsubplans;
106 int nplans;
107 int firstvalid;
108 int i,
109 j;
110 ListCell *lc;
111
112 /* check for unsupported flags */
113 Assert(!(eflags & EXEC_FLAG_MARK));
114
115 /*
116 * create new AppendState for our append node
117 */
118 appendstate->ps.plan = (Plan *) node;
119 appendstate->ps.state = estate;
120 appendstate->ps.ExecProcNode = ExecAppend;
121
122 /* Let choose_next_subplan_* function handle setting the first subplan */
123 appendstate->as_whichplan = INVALID_SUBPLAN_INDEX;
124
125 /* If run-time partition pruning is enabled, then set that up now */
126 if (node->part_prune_info != NULL)
127 {
128 PartitionPruneState *prunestate;
129
130 /* We may need an expression context to evaluate partition exprs */
131 ExecAssignExprContext(estate, &appendstate->ps);
132
133 /* Create the working data structure for pruning. */
134 prunestate = ExecCreatePartitionPruneState(&appendstate->ps,
135 node->part_prune_info);
136 appendstate->as_prune_state = prunestate;
137
138 /* Perform an initial partition prune, if required. */
139 if (prunestate->do_initial_prune)
140 {
141 /* Determine which subplans survive initial pruning */
142 validsubplans = ExecFindInitialMatchingSubPlans(prunestate,
143 list_length(node->appendplans));
144
145 /*
146 * The case where no subplans survive pruning must be handled
147 * specially. The problem here is that code in explain.c requires
148 * an Append to have at least one subplan in order for it to
149 * properly determine the Vars in that subplan's targetlist. We
150 * sidestep this issue by just initializing the first subplan and
151 * setting as_whichplan to NO_MATCHING_SUBPLANS to indicate that
152 * we don't really need to scan any subnodes.
153 */
154 if (bms_is_empty(validsubplans))
155 {
156 appendstate->as_whichplan = NO_MATCHING_SUBPLANS;
157
158 /* Mark the first as valid so that it's initialized below */
159 validsubplans = bms_make_singleton(0);
160 }
161
162 nplans = bms_num_members(validsubplans);
163 }
164 else
165 {
166 /* We'll need to initialize all subplans */
167 nplans = list_length(node->appendplans);
168 Assert(nplans > 0);
169 validsubplans = bms_add_range(NULL, 0, nplans - 1);
170 }
171
172 /*
173 * If no runtime pruning is required, we can fill as_valid_subplans
174 * immediately, preventing later calls to ExecFindMatchingSubPlans.
175 */
176 if (!prunestate->do_exec_prune)
177 {
178 Assert(nplans > 0);
179 appendstate->as_valid_subplans = bms_add_range(NULL, 0, nplans - 1);
180 }
181 }
182 else
183 {
184 nplans = list_length(node->appendplans);
185
186 /*
187 * When run-time partition pruning is not enabled we can just mark all
188 * subplans as valid; they must also all be initialized.
189 */
190 Assert(nplans > 0);
191 appendstate->as_valid_subplans = validsubplans =
192 bms_add_range(NULL, 0, nplans - 1);
193 appendstate->as_prune_state = NULL;
194 }
195
196 /*
197 * Initialize result tuple type and slot.
198 */
199 ExecInitResultTupleSlotTL(&appendstate->ps, &TTSOpsVirtual);
200
201 /* node returns slots from each of its subnodes, therefore not fixed */
202 appendstate->ps.resultopsset = true;
203 appendstate->ps.resultopsfixed = false;
204
205 appendplanstates = (PlanState **) palloc(nplans *
206 sizeof(PlanState *));
207
208 /*
209 * call ExecInitNode on each of the valid plans to be executed and save
210 * the results into the appendplanstates array.
211 *
212 * While at it, find out the first valid partial plan.
213 */
214 j = i = 0;
215 firstvalid = nplans;
216 foreach(lc, node->appendplans)
217 {
218 if (bms_is_member(i, validsubplans))
219 {
220 Plan *initNode = (Plan *) lfirst(lc);
221
222 /*
223 * Record the lowest appendplans index which is a valid partial
224 * plan.
225 */
226 if (i >= node->first_partial_plan && j < firstvalid)
227 firstvalid = j;
228
229 appendplanstates[j++] = ExecInitNode(initNode, estate, eflags);
230 }
231 i++;
232 }
233
234 appendstate->as_first_partial_plan = firstvalid;
235 appendstate->appendplans = appendplanstates;
236 appendstate->as_nplans = nplans;
237
238 /*
239 * Miscellaneous initialization
240 */
241
242 appendstate->ps.ps_ProjInfo = NULL;
243
244 /* For parallel query, this will be overridden later. */
245 appendstate->choose_next_subplan = choose_next_subplan_locally;
246
247 return appendstate;
248}
249
250/* ----------------------------------------------------------------
251 * ExecAppend
252 *
253 * Handles iteration over multiple subplans.
254 * ----------------------------------------------------------------
255 */
256static TupleTableSlot *
257ExecAppend(PlanState *pstate)
258{
259 AppendState *node = castNode(AppendState, pstate);
260
261 if (node->as_whichplan < 0)
262 {
263 /*
264 * If no subplan has been chosen, we must choose one before
265 * proceeding.
266 */
267 if (node->as_whichplan == INVALID_SUBPLAN_INDEX &&
268 !node->choose_next_subplan(node))
269 return ExecClearTuple(node->ps.ps_ResultTupleSlot);
270
271 /* Nothing to do if there are no matching subplans */
272 else if (node->as_whichplan == NO_MATCHING_SUBPLANS)
273 return ExecClearTuple(node->ps.ps_ResultTupleSlot);
274 }
275
276 for (;;)
277 {
278 PlanState *subnode;
279 TupleTableSlot *result;
280
281 CHECK_FOR_INTERRUPTS();
282
283 /*
284 * figure out which subplan we are currently processing
285 */
286 Assert(node->as_whichplan >= 0 && node->as_whichplan < node->as_nplans);
287 subnode = node->appendplans[node->as_whichplan];
288
289 /*
290 * get a tuple from the subplan
291 */
292 result = ExecProcNode(subnode);
293
294 if (!TupIsNull(result))
295 {
296 /*
297 * If the subplan gave us something then return it as-is. We do
298 * NOT make use of the result slot that was set up in
299 * ExecInitAppend; there's no need for it.
300 */
301 return result;
302 }
303
304 /* choose new subplan; if none, we're done */
305 if (!node->choose_next_subplan(node))
306 return ExecClearTuple(node->ps.ps_ResultTupleSlot);
307 }
308}
309
310/* ----------------------------------------------------------------
311 * ExecEndAppend
312 *
313 * Shuts down the subscans of the append node.
314 *
315 * Returns nothing of interest.
316 * ----------------------------------------------------------------
317 */
318void
319ExecEndAppend(AppendState *node)
320{
321 PlanState **appendplans;
322 int nplans;
323 int i;
324
325 /*
326 * get information from the node
327 */
328 appendplans = node->appendplans;
329 nplans = node->as_nplans;
330
331 /*
332 * shut down each of the subscans
333 */
334 for (i = 0; i < nplans; i++)
335 ExecEndNode(appendplans[i]);
336}
337
338void
339ExecReScanAppend(AppendState *node)
340{
341 int i;
342
343 /*
344 * If any PARAM_EXEC Params used in pruning expressions have changed, then
345 * we'd better unset the valid subplans so that they are reselected for
346 * the new parameter values.
347 */
348 if (node->as_prune_state &&
349 bms_overlap(node->ps.chgParam,
350 node->as_prune_state->execparamids))
351 {
352 bms_free(node->as_valid_subplans);
353 node->as_valid_subplans = NULL;
354 }
355
356 for (i = 0; i < node->as_nplans; i++)
357 {
358 PlanState *subnode = node->appendplans[i];
359
360 /*
361 * ExecReScan doesn't know about my subplans, so I have to do
362 * changed-parameter signaling myself.
363 */
364 if (node->ps.chgParam != NULL)
365 UpdateChangedParamSet(subnode, node->ps.chgParam);
366
367 /*
368 * If chgParam of subnode is not null then plan will be re-scanned by
369 * first ExecProcNode.
370 */
371 if (subnode->chgParam == NULL)
372 ExecReScan(subnode);
373 }
374
375 /* Let choose_next_subplan_* function handle setting the first subplan */
376 node->as_whichplan = INVALID_SUBPLAN_INDEX;
377}
378
379/* ----------------------------------------------------------------
380 * Parallel Append Support
381 * ----------------------------------------------------------------
382 */
383
384/* ----------------------------------------------------------------
385 * ExecAppendEstimate
386 *
387 * Compute the amount of space we'll need in the parallel
388 * query DSM, and inform pcxt->estimator about our needs.
389 * ----------------------------------------------------------------
390 */
391void
392ExecAppendEstimate(AppendState *node,
393 ParallelContext *pcxt)
394{
395 node->pstate_len =
396 add_size(offsetof(ParallelAppendState, pa_finished),
397 sizeof(bool) * node->as_nplans);
398
399 shm_toc_estimate_chunk(&pcxt->estimator, node->pstate_len);
400 shm_toc_estimate_keys(&pcxt->estimator, 1);
401}
402
403
404/* ----------------------------------------------------------------
405 * ExecAppendInitializeDSM
406 *
407 * Set up shared state for Parallel Append.
408 * ----------------------------------------------------------------
409 */
410void
411ExecAppendInitializeDSM(AppendState *node,
412 ParallelContext *pcxt)
413{
414 ParallelAppendState *pstate;
415
416 pstate = shm_toc_allocate(pcxt->toc, node->pstate_len);
417 memset(pstate, 0, node->pstate_len);
418 LWLockInitialize(&pstate->pa_lock, LWTRANCHE_PARALLEL_APPEND);
419 shm_toc_insert(pcxt->toc, node->ps.plan->plan_node_id, pstate);
420
421 node->as_pstate = pstate;
422 node->choose_next_subplan = choose_next_subplan_for_leader;
423}
424
425/* ----------------------------------------------------------------
426 * ExecAppendReInitializeDSM
427 *
428 * Reset shared state before beginning a fresh scan.
429 * ----------------------------------------------------------------
430 */
431void
432ExecAppendReInitializeDSM(AppendState *node, ParallelContext *pcxt)
433{
434 ParallelAppendState *pstate = node->as_pstate;
435
436 pstate->pa_next_plan = 0;
437 memset(pstate->pa_finished, 0, sizeof(bool) * node->as_nplans);
438}
439
440/* ----------------------------------------------------------------
441 * ExecAppendInitializeWorker
442 *
443 * Copy relevant information from TOC into planstate, and initialize
444 * whatever is required to choose and execute the optimal subplan.
445 * ----------------------------------------------------------------
446 */
447void
448ExecAppendInitializeWorker(AppendState *node, ParallelWorkerContext *pwcxt)
449{
450 node->as_pstate = shm_toc_lookup(pwcxt->toc, node->ps.plan->plan_node_id, false);
451 node->choose_next_subplan = choose_next_subplan_for_worker;
452}
453
454/* ----------------------------------------------------------------
455 * choose_next_subplan_locally
456 *
457 * Choose next subplan for a non-parallel-aware Append,
458 * returning false if there are no more.
459 * ----------------------------------------------------------------
460 */
461static bool
462choose_next_subplan_locally(AppendState *node)
463{
464 int whichplan = node->as_whichplan;
465 int nextplan;
466
467 /* We should never be called when there are no subplans */
468 Assert(whichplan != NO_MATCHING_SUBPLANS);
469
470 /*
471 * If first call then have the bms member function choose the first valid
472 * subplan by initializing whichplan to -1. If there happen to be no
473 * valid subplans then the bms member function will handle that by
474 * returning a negative number which will allow us to exit returning a
475 * false value.
476 */
477 if (whichplan == INVALID_SUBPLAN_INDEX)
478 {
479 if (node->as_valid_subplans == NULL)
480 node->as_valid_subplans =
481 ExecFindMatchingSubPlans(node->as_prune_state);
482
483 whichplan = -1;
484 }
485
486 /* Ensure whichplan is within the expected range */
487 Assert(whichplan >= -1 && whichplan <= node->as_nplans);
488
489 if (ScanDirectionIsForward(node->ps.state->es_direction))
490 nextplan = bms_next_member(node->as_valid_subplans, whichplan);
491 else
492 nextplan = bms_prev_member(node->as_valid_subplans, whichplan);
493
494 if (nextplan < 0)
495 return false;
496
497 node->as_whichplan = nextplan;
498
499 return true;
500}
501
502/* ----------------------------------------------------------------
503 * choose_next_subplan_for_leader
504 *
505 * Try to pick a plan which doesn't commit us to doing much
506 * work locally, so that as much work as possible is done in
507 * the workers. Cheapest subplans are at the end.
508 * ----------------------------------------------------------------
509 */
510static bool
511choose_next_subplan_for_leader(AppendState *node)
512{
513 ParallelAppendState *pstate = node->as_pstate;
514
515 /* Backward scan is not supported by parallel-aware plans */
516 Assert(ScanDirectionIsForward(node->ps.state->es_direction));
517
518 /* We should never be called when there are no subplans */
519 Assert(node->as_whichplan != NO_MATCHING_SUBPLANS);
520
521 LWLockAcquire(&pstate->pa_lock, LW_EXCLUSIVE);
522
523 if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
524 {
525 /* Mark just-completed subplan as finished. */
526 node->as_pstate->pa_finished[node->as_whichplan] = true;
527 }
528 else
529 {
530 /* Start with last subplan. */
531 node->as_whichplan = node->as_nplans - 1;
532
533 /*
534 * If we've yet to determine the valid subplans then do so now. If
535 * run-time pruning is disabled then the valid subplans will always be
536 * set to all subplans.
537 */
538 if (node->as_valid_subplans == NULL)
539 {
540 node->as_valid_subplans =
541 ExecFindMatchingSubPlans(node->as_prune_state);
542
543 /*
544 * Mark each invalid plan as finished to allow the loop below to
545 * select the first valid subplan.
546 */
547 mark_invalid_subplans_as_finished(node);
548 }
549 }
550
551 /* Loop until we find a subplan to execute. */
552 while (pstate->pa_finished[node->as_whichplan])
553 {
554 if (node->as_whichplan == 0)
555 {
556 pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
557 node->as_whichplan = INVALID_SUBPLAN_INDEX;
558 LWLockRelease(&pstate->pa_lock);
559 return false;
560 }
561
562 /*
563 * We needn't pay attention to as_valid_subplans here as all invalid
564 * plans have been marked as finished.
565 */
566 node->as_whichplan--;
567 }
568
569 /* If non-partial, immediately mark as finished. */
570 if (node->as_whichplan < node->as_first_partial_plan)
571 node->as_pstate->pa_finished[node->as_whichplan] = true;
572
573 LWLockRelease(&pstate->pa_lock);
574
575 return true;
576}
577
578/* ----------------------------------------------------------------
579 * choose_next_subplan_for_worker
580 *
581 * Choose next subplan for a parallel-aware Append, returning
582 * false if there are no more.
583 *
584 * We start from the first plan and advance through the list;
585 * when we get back to the end, we loop back to the first
586 * partial plan. This assigns the non-partial plans first in
587 * order of descending cost and then spreads out the workers
588 * as evenly as possible across the remaining partial plans.
589 * ----------------------------------------------------------------
590 */
591static bool
592choose_next_subplan_for_worker(AppendState *node)
593{
594 ParallelAppendState *pstate = node->as_pstate;
595
596 /* Backward scan is not supported by parallel-aware plans */
597 Assert(ScanDirectionIsForward(node->ps.state->es_direction));
598
599 /* We should never be called when there are no subplans */
600 Assert(node->as_whichplan != NO_MATCHING_SUBPLANS);
601
602 LWLockAcquire(&pstate->pa_lock, LW_EXCLUSIVE);
603
604 /* Mark just-completed subplan as finished. */
605 if (node->as_whichplan != INVALID_SUBPLAN_INDEX)
606 node->as_pstate->pa_finished[node->as_whichplan] = true;
607
608 /*
609 * If we've yet to determine the valid subplans then do so now. If
610 * run-time pruning is disabled then the valid subplans will always be set
611 * to all subplans.
612 */
613 else if (node->as_valid_subplans == NULL)
614 {
615 node->as_valid_subplans =
616 ExecFindMatchingSubPlans(node->as_prune_state);
617 mark_invalid_subplans_as_finished(node);
618 }
619
620 /* If all the plans are already done, we have nothing to do */
621 if (pstate->pa_next_plan == INVALID_SUBPLAN_INDEX)
622 {
623 LWLockRelease(&pstate->pa_lock);
624 return false;
625 }
626
627 /* Save the plan from which we are starting the search. */
628 node->as_whichplan = pstate->pa_next_plan;
629
630 /* Loop until we find a valid subplan to execute. */
631 while (pstate->pa_finished[pstate->pa_next_plan])
632 {
633 int nextplan;
634
635 nextplan = bms_next_member(node->as_valid_subplans,
636 pstate->pa_next_plan);
637 if (nextplan >= 0)
638 {
639 /* Advance to the next valid plan. */
640 pstate->pa_next_plan = nextplan;
641 }
642 else if (node->as_whichplan > node->as_first_partial_plan)
643 {
644 /*
645 * Try looping back to the first valid partial plan, if there is
646 * one. If there isn't, arrange to bail out below.
647 */
648 nextplan = bms_next_member(node->as_valid_subplans,
649 node->as_first_partial_plan - 1);
650 pstate->pa_next_plan =
651 nextplan < 0 ? node->as_whichplan : nextplan;
652 }
653 else
654 {
655 /*
656 * At last plan, and either there are no partial plans or we've
657 * tried them all. Arrange to bail out.
658 */
659 pstate->pa_next_plan = node->as_whichplan;
660 }
661
662 if (pstate->pa_next_plan == node->as_whichplan)
663 {
664 /* We've tried everything! */
665 pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
666 LWLockRelease(&pstate->pa_lock);
667 return false;
668 }
669 }
670
671 /* Pick the plan we found, and advance pa_next_plan one more time. */
672 node->as_whichplan = pstate->pa_next_plan;
673 pstate->pa_next_plan = bms_next_member(node->as_valid_subplans,
674 pstate->pa_next_plan);
675
676 /*
677 * If there are no more valid plans then try setting the next plan to the
678 * first valid partial plan.
679 */
680 if (pstate->pa_next_plan < 0)
681 {
682 int nextplan = bms_next_member(node->as_valid_subplans,
683 node->as_first_partial_plan - 1);
684
685 if (nextplan >= 0)
686 pstate->pa_next_plan = nextplan;
687 else
688 {
689 /*
690 * There are no valid partial plans, and we already chose the last
691 * non-partial plan; so flag that there's nothing more for our
692 * fellow workers to do.
693 */
694 pstate->pa_next_plan = INVALID_SUBPLAN_INDEX;
695 }
696 }
697
698 /* If non-partial, immediately mark as finished. */
699 if (node->as_whichplan < node->as_first_partial_plan)
700 node->as_pstate->pa_finished[node->as_whichplan] = true;
701
702 LWLockRelease(&pstate->pa_lock);
703
704 return true;
705}
706
707/*
708 * mark_invalid_subplans_as_finished
709 * Marks the ParallelAppendState's pa_finished as true for each invalid
710 * subplan.
711 *
712 * This function should only be called for parallel Append with run-time
713 * pruning enabled.
714 */
715static void
716mark_invalid_subplans_as_finished(AppendState *node)
717{
718 int i;
719
720 /* Only valid to call this while in parallel Append mode */
721 Assert(node->as_pstate);
722
723 /* Shouldn't have been called when run-time pruning is not enabled */
724 Assert(node->as_prune_state);
725
726 /* Nothing to do if all plans are valid */
727 if (bms_num_members(node->as_valid_subplans) == node->as_nplans)
728 return;
729
730 /* Mark all non-valid plans as finished */
731 for (i = 0; i < node->as_nplans; i++)
732 {
733 if (!bms_is_member(i, node->as_valid_subplans))
734 node->as_pstate->pa_finished[i] = true;
735 }
736}
737