1/*-------------------------------------------------------------------------
2 *
3 * execPartition.c
4 * Support routines for partitioning.
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 * IDENTIFICATION
10 * src/backend/executor/execPartition.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "access/table.h"
17#include "access/tableam.h"
18#include "catalog/partition.h"
19#include "catalog/pg_inherits.h"
20#include "catalog/pg_type.h"
21#include "executor/execPartition.h"
22#include "executor/executor.h"
23#include "foreign/fdwapi.h"
24#include "mb/pg_wchar.h"
25#include "miscadmin.h"
26#include "nodes/makefuncs.h"
27#include "partitioning/partbounds.h"
28#include "partitioning/partdesc.h"
29#include "partitioning/partprune.h"
30#include "rewrite/rewriteManip.h"
31#include "utils/lsyscache.h"
32#include "utils/partcache.h"
33#include "utils/rel.h"
34#include "utils/rls.h"
35#include "utils/ruleutils.h"
36
37
38/*-----------------------
39 * PartitionTupleRouting - Encapsulates all information required to
40 * route a tuple inserted into a partitioned table to one of its leaf
41 * partitions.
42 *
43 * partition_root
44 * The partitioned table that's the target of the command.
45 *
46 * partition_dispatch_info
47 * Array of 'max_dispatch' elements containing a pointer to a
48 * PartitionDispatch object for every partitioned table touched by tuple
49 * routing. The entry for the target partitioned table is *always*
50 * present in the 0th element of this array. See comment for
51 * PartitionDispatchData->indexes for details on how this array is
52 * indexed.
53 *
54 * num_dispatch
55 * The current number of items stored in the 'partition_dispatch_info'
56 * array. Also serves as the index of the next free array element for
57 * new PartitionDispatch objects that need to be stored.
58 *
59 * max_dispatch
60 * The current allocated size of the 'partition_dispatch_info' array.
61 *
62 * partitions
63 * Array of 'max_partitions' elements containing a pointer to a
64 * ResultRelInfo for every leaf partitions touched by tuple routing.
65 * Some of these are pointers to ResultRelInfos which are borrowed out of
66 * 'subplan_resultrel_htab'. The remainder have been built especially
67 * for tuple routing. See comment for PartitionDispatchData->indexes for
68 * details on how this array is indexed.
69 *
70 * num_partitions
71 * The current number of items stored in the 'partitions' array. Also
72 * serves as the index of the next free array element for new
73 * ResultRelInfo objects that need to be stored.
74 *
75 * max_partitions
76 * The current allocated size of the 'partitions' array.
77 *
78 * subplan_resultrel_htab
79 * Hash table to store subplan ResultRelInfos by Oid. This is used to
80 * cache ResultRelInfos from subplans of an UPDATE ModifyTable node;
81 * NULL in other cases. Some of these may be useful for tuple routing
82 * to save having to build duplicates.
83 *
84 * memcxt
85 * Memory context used to allocate subsidiary structs.
86 *-----------------------
87 */
88struct PartitionTupleRouting
89{
90 Relation partition_root;
91 PartitionDispatch *partition_dispatch_info;
92 int num_dispatch;
93 int max_dispatch;
94 ResultRelInfo **partitions;
95 int num_partitions;
96 int max_partitions;
97 HTAB *subplan_resultrel_htab;
98 MemoryContext memcxt;
99};
100
101/*-----------------------
102 * PartitionDispatch - information about one partitioned table in a partition
103 * hierarchy required to route a tuple to any of its partitions. A
104 * PartitionDispatch is always encapsulated inside a PartitionTupleRouting
105 * struct and stored inside its 'partition_dispatch_info' array.
106 *
107 * reldesc
108 * Relation descriptor of the table
109 *
110 * key
111 * Partition key information of the table
112 *
113 * keystate
114 * Execution state required for expressions in the partition key
115 *
116 * partdesc
117 * Partition descriptor of the table
118 *
119 * tupslot
120 * A standalone TupleTableSlot initialized with this table's tuple
121 * descriptor, or NULL if no tuple conversion between the parent is
122 * required.
123 *
124 * tupmap
125 * TupleConversionMap to convert from the parent's rowtype to this table's
126 * rowtype (when extracting the partition key of a tuple just before
127 * routing it through this table). A NULL value is stored if no tuple
128 * conversion is required.
129 *
130 * indexes
131 * Array of partdesc->nparts elements. For leaf partitions the index
132 * corresponds to the partition's ResultRelInfo in the encapsulating
133 * PartitionTupleRouting's partitions array. For partitioned partitions,
134 * the index corresponds to the PartitionDispatch for it in its
135 * partition_dispatch_info array. -1 indicates we've not yet allocated
136 * anything in PartitionTupleRouting for the partition.
137 *-----------------------
138 */
139typedef struct PartitionDispatchData
140{
141 Relation reldesc;
142 PartitionKey key;
143 List *keystate; /* list of ExprState */
144 PartitionDesc partdesc;
145 TupleTableSlot *tupslot;
146 AttrNumber *tupmap;
147 int indexes[FLEXIBLE_ARRAY_MEMBER];
148} PartitionDispatchData;
149
150/* struct to hold result relations coming from UPDATE subplans */
151typedef struct SubplanResultRelHashElem
152{
153 Oid relid; /* hash key -- must be first */
154 ResultRelInfo *rri;
155} SubplanResultRelHashElem;
156
157
158static void ExecHashSubPlanResultRelsByOid(ModifyTableState *mtstate,
159 PartitionTupleRouting *proute);
160static ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate,
161 EState *estate, PartitionTupleRouting *proute,
162 PartitionDispatch dispatch,
163 ResultRelInfo *rootResultRelInfo,
164 int partidx);
165static void ExecInitRoutingInfo(ModifyTableState *mtstate,
166 EState *estate,
167 PartitionTupleRouting *proute,
168 PartitionDispatch dispatch,
169 ResultRelInfo *partRelInfo,
170 int partidx);
171static PartitionDispatch ExecInitPartitionDispatchInfo(EState *estate,
172 PartitionTupleRouting *proute,
173 Oid partoid, PartitionDispatch parent_pd, int partidx);
174static void FormPartitionKeyDatum(PartitionDispatch pd,
175 TupleTableSlot *slot,
176 EState *estate,
177 Datum *values,
178 bool *isnull);
179static int get_partition_for_tuple(PartitionDispatch pd, Datum *values,
180 bool *isnull);
181static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
182 Datum *values,
183 bool *isnull,
184 int maxfieldlen);
185static List *adjust_partition_tlist(List *tlist, TupleConversionMap *map);
186static void ExecInitPruningContext(PartitionPruneContext *context,
187 List *pruning_steps,
188 PartitionDesc partdesc,
189 PartitionKey partkey,
190 PlanState *planstate);
191static void find_matching_subplans_recurse(PartitionPruningData *prunedata,
192 PartitionedRelPruningData *pprune,
193 bool initial_prune,
194 Bitmapset **validsubplans);
195
196
197/*
198 * ExecSetupPartitionTupleRouting - sets up information needed during
199 * tuple routing for partitioned tables, encapsulates it in
200 * PartitionTupleRouting, and returns it.
201 *
202 * Callers must use the returned PartitionTupleRouting during calls to
203 * ExecFindPartition(). The actual ResultRelInfo for a partition is only
204 * allocated when the partition is found for the first time.
205 *
206 * The current memory context is used to allocate this struct and all
207 * subsidiary structs that will be allocated from it later on. Typically
208 * it should be estate->es_query_cxt.
209 */
210PartitionTupleRouting *
211ExecSetupPartitionTupleRouting(EState *estate, ModifyTableState *mtstate,
212 Relation rel)
213{
214 PartitionTupleRouting *proute;
215 ModifyTable *node = mtstate ? (ModifyTable *) mtstate->ps.plan : NULL;
216
217 /*
218 * Here we attempt to expend as little effort as possible in setting up
219 * the PartitionTupleRouting. Each partition's ResultRelInfo is built on
220 * demand, only when we actually need to route a tuple to that partition.
221 * The reason for this is that a common case is for INSERT to insert a
222 * single tuple into a partitioned table and this must be fast.
223 */
224 proute = (PartitionTupleRouting *) palloc0(sizeof(PartitionTupleRouting));
225 proute->partition_root = rel;
226 proute->memcxt = CurrentMemoryContext;
227 /* Rest of members initialized by zeroing */
228
229 /*
230 * Initialize this table's PartitionDispatch object. Here we pass in the
231 * parent as NULL as we don't need to care about any parent of the target
232 * partitioned table.
233 */
234 ExecInitPartitionDispatchInfo(estate, proute, RelationGetRelid(rel),
235 NULL, 0);
236
237 /*
238 * If performing an UPDATE with tuple routing, we can reuse partition
239 * sub-plan result rels. We build a hash table to map the OIDs of
240 * partitions present in mtstate->resultRelInfo to their ResultRelInfos.
241 * Every time a tuple is routed to a partition that we've yet to set the
242 * ResultRelInfo for, before we go to the trouble of making one, we check
243 * for a pre-made one in the hash table.
244 */
245 if (node && node->operation == CMD_UPDATE)
246 ExecHashSubPlanResultRelsByOid(mtstate, proute);
247
248 return proute;
249}
250
251/*
252 * ExecFindPartition -- Return the ResultRelInfo for the leaf partition that
253 * the tuple contained in *slot should belong to.
254 *
255 * If the partition's ResultRelInfo does not yet exist in 'proute' then we set
256 * one up or reuse one from mtstate's resultRelInfo array. When reusing a
257 * ResultRelInfo from the mtstate we verify that the relation is a valid
258 * target for INSERTs and then set up a PartitionRoutingInfo for it.
259 *
260 * rootResultRelInfo is the relation named in the query.
261 *
262 * estate must be non-NULL; we'll need it to compute any expressions in the
263 * partition keys. Also, its per-tuple contexts are used as evaluation
264 * scratch space.
265 *
266 * If no leaf partition is found, this routine errors out with the appropriate
267 * error message. An error may also be raised if the found target partition
268 * is not a valid target for an INSERT.
269 */
270ResultRelInfo *
271ExecFindPartition(ModifyTableState *mtstate,
272 ResultRelInfo *rootResultRelInfo,
273 PartitionTupleRouting *proute,
274 TupleTableSlot *slot, EState *estate)
275{
276 PartitionDispatch *pd = proute->partition_dispatch_info;
277 Datum values[PARTITION_MAX_KEYS];
278 bool isnull[PARTITION_MAX_KEYS];
279 Relation rel;
280 PartitionDispatch dispatch;
281 PartitionDesc partdesc;
282 ExprContext *ecxt = GetPerTupleExprContext(estate);
283 TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple;
284 TupleTableSlot *myslot = NULL;
285 MemoryContext oldcxt;
286
287 /* use per-tuple context here to avoid leaking memory */
288 oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
289
290 /*
291 * First check the root table's partition constraint, if any. No point in
292 * routing the tuple if it doesn't belong in the root table itself.
293 */
294 if (rootResultRelInfo->ri_PartitionCheck)
295 ExecPartitionCheck(rootResultRelInfo, slot, estate, true);
296
297 /* start with the root partitioned table */
298 dispatch = pd[0];
299 while (true)
300 {
301 AttrNumber *map = dispatch->tupmap;
302 int partidx = -1;
303
304 CHECK_FOR_INTERRUPTS();
305
306 rel = dispatch->reldesc;
307 partdesc = dispatch->partdesc;
308
309 /*
310 * Convert the tuple to this parent's layout, if different from the
311 * current relation.
312 */
313 myslot = dispatch->tupslot;
314 if (myslot != NULL)
315 {
316 Assert(map != NULL);
317 slot = execute_attr_map_slot(map, slot, myslot);
318 }
319
320 /*
321 * Extract partition key from tuple. Expression evaluation machinery
322 * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to
323 * point to the correct tuple slot. The slot might have changed from
324 * what was used for the parent table if the table of the current
325 * partitioning level has different tuple descriptor from the parent.
326 * So update ecxt_scantuple accordingly.
327 */
328 ecxt->ecxt_scantuple = slot;
329 FormPartitionKeyDatum(dispatch, slot, estate, values, isnull);
330
331 /*
332 * If this partitioned table has no partitions or no partition for
333 * these values, error out.
334 */
335 if (partdesc->nparts == 0 ||
336 (partidx = get_partition_for_tuple(dispatch, values, isnull)) < 0)
337 {
338 char *val_desc;
339
340 val_desc = ExecBuildSlotPartitionKeyDescription(rel,
341 values, isnull, 64);
342 Assert(OidIsValid(RelationGetRelid(rel)));
343 ereport(ERROR,
344 (errcode(ERRCODE_CHECK_VIOLATION),
345 errmsg("no partition of relation \"%s\" found for row",
346 RelationGetRelationName(rel)),
347 val_desc ?
348 errdetail("Partition key of the failing row contains %s.",
349 val_desc) : 0));
350 }
351
352 if (partdesc->is_leaf[partidx])
353 {
354 ResultRelInfo *rri;
355
356 /*
357 * Look to see if we've already got a ResultRelInfo for this
358 * partition.
359 */
360 if (likely(dispatch->indexes[partidx] >= 0))
361 {
362 /* ResultRelInfo already built */
363 Assert(dispatch->indexes[partidx] < proute->num_partitions);
364 rri = proute->partitions[dispatch->indexes[partidx]];
365 }
366 else
367 {
368 bool found = false;
369
370 /*
371 * We have not yet set up a ResultRelInfo for this partition,
372 * but if we have a subplan hash table, we might have one
373 * there. If not, we'll have to create one.
374 */
375 if (proute->subplan_resultrel_htab)
376 {
377 Oid partoid = partdesc->oids[partidx];
378 SubplanResultRelHashElem *elem;
379
380 elem = hash_search(proute->subplan_resultrel_htab,
381 &partoid, HASH_FIND, NULL);
382 if (elem)
383 {
384 found = true;
385 rri = elem->rri;
386
387 /* Verify this ResultRelInfo allows INSERTs */
388 CheckValidResultRel(rri, CMD_INSERT);
389
390 /* Set up the PartitionRoutingInfo for it */
391 ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
392 rri, partidx);
393 }
394 }
395
396 /* We need to create a new one. */
397 if (!found)
398 rri = ExecInitPartitionInfo(mtstate, estate, proute,
399 dispatch,
400 rootResultRelInfo, partidx);
401 }
402
403 /* Release the tuple in the lowest parent's dedicated slot. */
404 if (slot == myslot)
405 ExecClearTuple(myslot);
406
407 MemoryContextSwitchTo(oldcxt);
408 ecxt->ecxt_scantuple = ecxt_scantuple_old;
409 return rri;
410 }
411 else
412 {
413 /*
414 * Partition is a sub-partitioned table; get the PartitionDispatch
415 */
416 if (likely(dispatch->indexes[partidx] >= 0))
417 {
418 /* Already built. */
419 Assert(dispatch->indexes[partidx] < proute->num_dispatch);
420
421 /*
422 * Move down to the next partition level and search again
423 * until we find a leaf partition that matches this tuple
424 */
425 dispatch = pd[dispatch->indexes[partidx]];
426 }
427 else
428 {
429 /* Not yet built. Do that now. */
430 PartitionDispatch subdispatch;
431
432 /*
433 * Create the new PartitionDispatch. We pass the current one
434 * in as the parent PartitionDispatch
435 */
436 subdispatch = ExecInitPartitionDispatchInfo(mtstate->ps.state,
437 proute,
438 partdesc->oids[partidx],
439 dispatch, partidx);
440 Assert(dispatch->indexes[partidx] >= 0 &&
441 dispatch->indexes[partidx] < proute->num_dispatch);
442 dispatch = subdispatch;
443 }
444 }
445 }
446}
447
448/*
449 * ExecHashSubPlanResultRelsByOid
450 * Build a hash table to allow fast lookups of subplan ResultRelInfos by
451 * partition Oid. We also populate the subplan ResultRelInfo with an
452 * ri_PartitionRoot.
453 */
454static void
455ExecHashSubPlanResultRelsByOid(ModifyTableState *mtstate,
456 PartitionTupleRouting *proute)
457{
458 HASHCTL ctl;
459 HTAB *htab;
460 int i;
461
462 memset(&ctl, 0, sizeof(ctl));
463 ctl.keysize = sizeof(Oid);
464 ctl.entrysize = sizeof(SubplanResultRelHashElem);
465 ctl.hcxt = CurrentMemoryContext;
466
467 htab = hash_create("PartitionTupleRouting table", mtstate->mt_nplans,
468 &ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
469 proute->subplan_resultrel_htab = htab;
470
471 /* Hash all subplans by their Oid */
472 for (i = 0; i < mtstate->mt_nplans; i++)
473 {
474 ResultRelInfo *rri = &mtstate->resultRelInfo[i];
475 bool found;
476 Oid partoid = RelationGetRelid(rri->ri_RelationDesc);
477 SubplanResultRelHashElem *elem;
478
479 elem = (SubplanResultRelHashElem *)
480 hash_search(htab, &partoid, HASH_ENTER, &found);
481 Assert(!found);
482 elem->rri = rri;
483
484 /*
485 * This is required in order to convert the partition's tuple to be
486 * compatible with the root partitioned table's tuple descriptor. When
487 * generating the per-subplan result rels, this was not set.
488 */
489 rri->ri_PartitionRoot = proute->partition_root;
490 }
491}
492
493/*
494 * ExecInitPartitionInfo
495 * Lock the partition and initialize ResultRelInfo. Also setup other
496 * information for the partition and store it in the next empty slot in
497 * the proute->partitions array.
498 *
499 * Returns the ResultRelInfo
500 */
501static ResultRelInfo *
502ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate,
503 PartitionTupleRouting *proute,
504 PartitionDispatch dispatch,
505 ResultRelInfo *rootResultRelInfo,
506 int partidx)
507{
508 ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
509 Relation rootrel = rootResultRelInfo->ri_RelationDesc,
510 partrel;
511 Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
512 ResultRelInfo *leaf_part_rri;
513 MemoryContext oldcxt;
514 AttrNumber *part_attnos = NULL;
515 bool found_whole_row;
516
517 oldcxt = MemoryContextSwitchTo(proute->memcxt);
518
519 partrel = table_open(dispatch->partdesc->oids[partidx], RowExclusiveLock);
520
521 leaf_part_rri = makeNode(ResultRelInfo);
522 InitResultRelInfo(leaf_part_rri,
523 partrel,
524 node ? node->rootRelation : 1,
525 rootrel,
526 estate->es_instrument);
527
528 /*
529 * Verify result relation is a valid target for an INSERT. An UPDATE of a
530 * partition-key becomes a DELETE+INSERT operation, so this check is still
531 * required when the operation is CMD_UPDATE.
532 */
533 CheckValidResultRel(leaf_part_rri, CMD_INSERT);
534
535 /*
536 * Open partition indices. The user may have asked to check for conflicts
537 * within this leaf partition and do "nothing" instead of throwing an
538 * error. Be prepared in that case by initializing the index information
539 * needed by ExecInsert() to perform speculative insertions.
540 */
541 if (partrel->rd_rel->relhasindex &&
542 leaf_part_rri->ri_IndexRelationDescs == NULL)
543 ExecOpenIndices(leaf_part_rri,
544 (node != NULL &&
545 node->onConflictAction != ONCONFLICT_NONE));
546
547 /*
548 * Build WITH CHECK OPTION constraints for the partition. Note that we
549 * didn't build the withCheckOptionList for partitions within the planner,
550 * but simple translation of varattnos will suffice. This only occurs for
551 * the INSERT case or in the case of UPDATE tuple routing where we didn't
552 * find a result rel to reuse in ExecSetupPartitionTupleRouting().
553 */
554 if (node && node->withCheckOptionLists != NIL)
555 {
556 List *wcoList;
557 List *wcoExprs = NIL;
558 ListCell *ll;
559 int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
560
561 /*
562 * In the case of INSERT on a partitioned table, there is only one
563 * plan. Likewise, there is only one WCO list, not one per partition.
564 * For UPDATE, there are as many WCO lists as there are plans.
565 */
566 Assert((node->operation == CMD_INSERT &&
567 list_length(node->withCheckOptionLists) == 1 &&
568 list_length(node->plans) == 1) ||
569 (node->operation == CMD_UPDATE &&
570 list_length(node->withCheckOptionLists) ==
571 list_length(node->plans)));
572
573 /*
574 * Use the WCO list of the first plan as a reference to calculate
575 * attno's for the WCO list of this partition. In the INSERT case,
576 * that refers to the root partitioned table, whereas in the UPDATE
577 * tuple routing case, that refers to the first partition in the
578 * mtstate->resultRelInfo array. In any case, both that relation and
579 * this partition should have the same columns, so we should be able
580 * to map attributes successfully.
581 */
582 wcoList = linitial(node->withCheckOptionLists);
583
584 /*
585 * Convert Vars in it to contain this partition's attribute numbers.
586 */
587 part_attnos =
588 convert_tuples_by_name_map(RelationGetDescr(partrel),
589 RelationGetDescr(firstResultRel),
590 gettext_noop("could not convert row type"));
591 wcoList = (List *)
592 map_variable_attnos((Node *) wcoList,
593 firstVarno, 0,
594 part_attnos,
595 RelationGetDescr(firstResultRel)->natts,
596 RelationGetForm(partrel)->reltype,
597 &found_whole_row);
598 /* We ignore the value of found_whole_row. */
599
600 foreach(ll, wcoList)
601 {
602 WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll));
603 ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
604 &mtstate->ps);
605
606 wcoExprs = lappend(wcoExprs, wcoExpr);
607 }
608
609 leaf_part_rri->ri_WithCheckOptions = wcoList;
610 leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs;
611 }
612
613 /*
614 * Build the RETURNING projection for the partition. Note that we didn't
615 * build the returningList for partitions within the planner, but simple
616 * translation of varattnos will suffice. This only occurs for the INSERT
617 * case or in the case of UPDATE tuple routing where we didn't find a
618 * result rel to reuse in ExecSetupPartitionTupleRouting().
619 */
620 if (node && node->returningLists != NIL)
621 {
622 TupleTableSlot *slot;
623 ExprContext *econtext;
624 List *returningList;
625 int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
626
627 /* See the comment above for WCO lists. */
628 Assert((node->operation == CMD_INSERT &&
629 list_length(node->returningLists) == 1 &&
630 list_length(node->plans) == 1) ||
631 (node->operation == CMD_UPDATE &&
632 list_length(node->returningLists) ==
633 list_length(node->plans)));
634
635 /*
636 * Use the RETURNING list of the first plan as a reference to
637 * calculate attno's for the RETURNING list of this partition. See
638 * the comment above for WCO lists for more details on why this is
639 * okay.
640 */
641 returningList = linitial(node->returningLists);
642
643 /*
644 * Convert Vars in it to contain this partition's attribute numbers.
645 */
646 if (part_attnos == NULL)
647 part_attnos =
648 convert_tuples_by_name_map(RelationGetDescr(partrel),
649 RelationGetDescr(firstResultRel),
650 gettext_noop("could not convert row type"));
651 returningList = (List *)
652 map_variable_attnos((Node *) returningList,
653 firstVarno, 0,
654 part_attnos,
655 RelationGetDescr(firstResultRel)->natts,
656 RelationGetForm(partrel)->reltype,
657 &found_whole_row);
658 /* We ignore the value of found_whole_row. */
659
660 leaf_part_rri->ri_returningList = returningList;
661
662 /*
663 * Initialize the projection itself.
664 *
665 * Use the slot and the expression context that would have been set up
666 * in ExecInitModifyTable() for projection's output.
667 */
668 Assert(mtstate->ps.ps_ResultTupleSlot != NULL);
669 slot = mtstate->ps.ps_ResultTupleSlot;
670 Assert(mtstate->ps.ps_ExprContext != NULL);
671 econtext = mtstate->ps.ps_ExprContext;
672 leaf_part_rri->ri_projectReturning =
673 ExecBuildProjectionInfo(returningList, econtext, slot,
674 &mtstate->ps, RelationGetDescr(partrel));
675 }
676
677 /* Set up information needed for routing tuples to the partition. */
678 ExecInitRoutingInfo(mtstate, estate, proute, dispatch,
679 leaf_part_rri, partidx);
680
681 /*
682 * If there is an ON CONFLICT clause, initialize state for it.
683 */
684 if (node && node->onConflictAction != ONCONFLICT_NONE)
685 {
686 int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
687 TupleDesc partrelDesc = RelationGetDescr(partrel);
688 ExprContext *econtext = mtstate->ps.ps_ExprContext;
689 ListCell *lc;
690 List *arbiterIndexes = NIL;
691
692 /*
693 * If there is a list of arbiter indexes, map it to a list of indexes
694 * in the partition. We do that by scanning the partition's index
695 * list and searching for ancestry relationships to each index in the
696 * ancestor table.
697 */
698 if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) > 0)
699 {
700 List *childIdxs;
701
702 childIdxs = RelationGetIndexList(leaf_part_rri->ri_RelationDesc);
703
704 foreach(lc, childIdxs)
705 {
706 Oid childIdx = lfirst_oid(lc);
707 List *ancestors;
708 ListCell *lc2;
709
710 ancestors = get_partition_ancestors(childIdx);
711 foreach(lc2, rootResultRelInfo->ri_onConflictArbiterIndexes)
712 {
713 if (list_member_oid(ancestors, lfirst_oid(lc2)))
714 arbiterIndexes = lappend_oid(arbiterIndexes, childIdx);
715 }
716 list_free(ancestors);
717 }
718 }
719
720 /*
721 * If the resulting lists are of inequal length, something is wrong.
722 * (This shouldn't happen, since arbiter index selection should not
723 * pick up an invalid index.)
724 */
725 if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) !=
726 list_length(arbiterIndexes))
727 elog(ERROR, "invalid arbiter index list");
728 leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
729
730 /*
731 * In the DO UPDATE case, we have some more state to initialize.
732 */
733 if (node->onConflictAction == ONCONFLICT_UPDATE)
734 {
735 TupleConversionMap *map;
736
737 map = leaf_part_rri->ri_PartitionInfo->pi_RootToPartitionMap;
738
739 Assert(node->onConflictSet != NIL);
740 Assert(rootResultRelInfo->ri_onConflict != NULL);
741
742 leaf_part_rri->ri_onConflict = makeNode(OnConflictSetState);
743
744 /*
745 * Need a separate existing slot for each partition, as the
746 * partition could be of a different AM, even if the tuple
747 * descriptors match.
748 */
749 leaf_part_rri->ri_onConflict->oc_Existing =
750 table_slot_create(leaf_part_rri->ri_RelationDesc,
751 &mtstate->ps.state->es_tupleTable);
752
753 /*
754 * If the partition's tuple descriptor matches exactly the root
755 * parent (the common case), we can re-use most of the parent's ON
756 * CONFLICT SET state, skipping a bunch of work. Otherwise, we
757 * need to create state specific to this partition.
758 */
759 if (map == NULL)
760 {
761 /*
762 * It's safe to reuse these from the partition root, as we
763 * only process one tuple at a time (therefore we won't
764 * overwrite needed data in slots), and the results of
765 * projections are independent of the underlying storage.
766 * Projections and where clauses themselves don't store state
767 * / are independent of the underlying storage.
768 */
769 leaf_part_rri->ri_onConflict->oc_ProjSlot =
770 rootResultRelInfo->ri_onConflict->oc_ProjSlot;
771 leaf_part_rri->ri_onConflict->oc_ProjInfo =
772 rootResultRelInfo->ri_onConflict->oc_ProjInfo;
773 leaf_part_rri->ri_onConflict->oc_WhereClause =
774 rootResultRelInfo->ri_onConflict->oc_WhereClause;
775 }
776 else
777 {
778 List *onconflset;
779 TupleDesc tupDesc;
780 bool found_whole_row;
781
782 /*
783 * Translate expressions in onConflictSet to account for
784 * different attribute numbers. For that, map partition
785 * varattnos twice: first to catch the EXCLUDED
786 * pseudo-relation (INNER_VAR), and second to handle the main
787 * target relation (firstVarno).
788 */
789 onconflset = (List *) copyObject((Node *) node->onConflictSet);
790 if (part_attnos == NULL)
791 part_attnos =
792 convert_tuples_by_name_map(RelationGetDescr(partrel),
793 RelationGetDescr(firstResultRel),
794 gettext_noop("could not convert row type"));
795 onconflset = (List *)
796 map_variable_attnos((Node *) onconflset,
797 INNER_VAR, 0,
798 part_attnos,
799 RelationGetDescr(firstResultRel)->natts,
800 RelationGetForm(partrel)->reltype,
801 &found_whole_row);
802 /* We ignore the value of found_whole_row. */
803 onconflset = (List *)
804 map_variable_attnos((Node *) onconflset,
805 firstVarno, 0,
806 part_attnos,
807 RelationGetDescr(firstResultRel)->natts,
808 RelationGetForm(partrel)->reltype,
809 &found_whole_row);
810 /* We ignore the value of found_whole_row. */
811
812 /* Finally, adjust this tlist to match the partition. */
813 onconflset = adjust_partition_tlist(onconflset, map);
814
815 /* create the tuple slot for the UPDATE SET projection */
816 tupDesc = ExecTypeFromTL(onconflset);
817 leaf_part_rri->ri_onConflict->oc_ProjSlot =
818 ExecInitExtraTupleSlot(mtstate->ps.state, tupDesc,
819 &TTSOpsVirtual);
820
821 /* build UPDATE SET projection state */
822 leaf_part_rri->ri_onConflict->oc_ProjInfo =
823 ExecBuildProjectionInfo(onconflset, econtext,
824 leaf_part_rri->ri_onConflict->oc_ProjSlot,
825 &mtstate->ps, partrelDesc);
826
827 /*
828 * If there is a WHERE clause, initialize state where it will
829 * be evaluated, mapping the attribute numbers appropriately.
830 * As with onConflictSet, we need to map partition varattnos
831 * to the partition's tupdesc.
832 */
833 if (node->onConflictWhere)
834 {
835 List *clause;
836
837 clause = copyObject((List *) node->onConflictWhere);
838 clause = (List *)
839 map_variable_attnos((Node *) clause,
840 INNER_VAR, 0,
841 part_attnos,
842 RelationGetDescr(firstResultRel)->natts,
843 RelationGetForm(partrel)->reltype,
844 &found_whole_row);
845 /* We ignore the value of found_whole_row. */
846 clause = (List *)
847 map_variable_attnos((Node *) clause,
848 firstVarno, 0,
849 part_attnos,
850 RelationGetDescr(firstResultRel)->natts,
851 RelationGetForm(partrel)->reltype,
852 &found_whole_row);
853 /* We ignore the value of found_whole_row. */
854 leaf_part_rri->ri_onConflict->oc_WhereClause =
855 ExecInitQual((List *) clause, &mtstate->ps);
856 }
857 }
858 }
859 }
860
861 /*
862 * Since we've just initialized this ResultRelInfo, it's not in any list
863 * attached to the estate as yet. Add it, so that it can be found later.
864 *
865 * Note that the entries in this list appear in no predetermined order,
866 * because partition result rels are initialized as and when they're
867 * needed.
868 */
869 MemoryContextSwitchTo(estate->es_query_cxt);
870 estate->es_tuple_routing_result_relations =
871 lappend(estate->es_tuple_routing_result_relations,
872 leaf_part_rri);
873
874 MemoryContextSwitchTo(oldcxt);
875
876 return leaf_part_rri;
877}
878
879/*
880 * ExecInitRoutingInfo
881 * Set up information needed for translating tuples between root
882 * partitioned table format and partition format, and keep track of it
883 * in PartitionTupleRouting.
884 */
885static void
886ExecInitRoutingInfo(ModifyTableState *mtstate,
887 EState *estate,
888 PartitionTupleRouting *proute,
889 PartitionDispatch dispatch,
890 ResultRelInfo *partRelInfo,
891 int partidx)
892{
893 MemoryContext oldcxt;
894 PartitionRoutingInfo *partrouteinfo;
895 int rri_index;
896
897 oldcxt = MemoryContextSwitchTo(proute->memcxt);
898
899 partrouteinfo = palloc(sizeof(PartitionRoutingInfo));
900
901 /*
902 * Set up a tuple conversion map to convert a tuple routed to the
903 * partition from the parent's type to the partition's.
904 */
905 partrouteinfo->pi_RootToPartitionMap =
906 convert_tuples_by_name(RelationGetDescr(partRelInfo->ri_PartitionRoot),
907 RelationGetDescr(partRelInfo->ri_RelationDesc),
908 gettext_noop("could not convert row type"));
909
910 /*
911 * If a partition has a different rowtype than the root parent, initialize
912 * a slot dedicated to storing this partition's tuples. The slot is used
913 * for various operations that are applied to tuples after routing, such
914 * as checking constraints.
915 */
916 if (partrouteinfo->pi_RootToPartitionMap != NULL)
917 {
918 Relation partrel = partRelInfo->ri_RelationDesc;
919
920 /*
921 * Initialize the slot itself setting its descriptor to this
922 * partition's TupleDesc; TupleDesc reference will be released at the
923 * end of the command.
924 */
925 partrouteinfo->pi_PartitionTupleSlot =
926 table_slot_create(partrel, &estate->es_tupleTable);
927 }
928 else
929 partrouteinfo->pi_PartitionTupleSlot = NULL;
930
931 /*
932 * Also, if transition capture is required, store a map to convert tuples
933 * from partition's rowtype to the root partition table's.
934 */
935 if (mtstate &&
936 (mtstate->mt_transition_capture || mtstate->mt_oc_transition_capture))
937 {
938 partrouteinfo->pi_PartitionToRootMap =
939 convert_tuples_by_name(RelationGetDescr(partRelInfo->ri_RelationDesc),
940 RelationGetDescr(partRelInfo->ri_PartitionRoot),
941 gettext_noop("could not convert row type"));
942 }
943 else
944 partrouteinfo->pi_PartitionToRootMap = NULL;
945
946 /*
947 * If the partition is a foreign table, let the FDW init itself for
948 * routing tuples to the partition.
949 */
950 if (partRelInfo->ri_FdwRoutine != NULL &&
951 partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL)
952 partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo);
953
954 partRelInfo->ri_PartitionInfo = partrouteinfo;
955 partRelInfo->ri_CopyMultiInsertBuffer = NULL;
956
957 /*
958 * Keep track of it in the PartitionTupleRouting->partitions array.
959 */
960 Assert(dispatch->indexes[partidx] == -1);
961
962 rri_index = proute->num_partitions++;
963
964 /* Allocate or enlarge the array, as needed */
965 if (proute->num_partitions >= proute->max_partitions)
966 {
967 if (proute->max_partitions == 0)
968 {
969 proute->max_partitions = 8;
970 proute->partitions = (ResultRelInfo **)
971 palloc(sizeof(ResultRelInfo *) * proute->max_partitions);
972 }
973 else
974 {
975 proute->max_partitions *= 2;
976 proute->partitions = (ResultRelInfo **)
977 repalloc(proute->partitions, sizeof(ResultRelInfo *) *
978 proute->max_partitions);
979 }
980 }
981
982 proute->partitions[rri_index] = partRelInfo;
983 dispatch->indexes[partidx] = rri_index;
984
985 MemoryContextSwitchTo(oldcxt);
986}
987
988/*
989 * ExecInitPartitionDispatchInfo
990 * Lock the partitioned table (if not locked already) and initialize
991 * PartitionDispatch for a partitioned table and store it in the next
992 * available slot in the proute->partition_dispatch_info array. Also,
993 * record the index into this array in the parent_pd->indexes[] array in
994 * the partidx element so that we can properly retrieve the newly created
995 * PartitionDispatch later.
996 */
997static PartitionDispatch
998ExecInitPartitionDispatchInfo(EState *estate,
999 PartitionTupleRouting *proute, Oid partoid,
1000 PartitionDispatch parent_pd, int partidx)
1001{
1002 Relation rel;
1003 PartitionDesc partdesc;
1004 PartitionDispatch pd;
1005 int dispatchidx;
1006 MemoryContext oldcxt;
1007
1008 if (estate->es_partition_directory == NULL)
1009 estate->es_partition_directory =
1010 CreatePartitionDirectory(estate->es_query_cxt);
1011
1012 oldcxt = MemoryContextSwitchTo(proute->memcxt);
1013
1014 /*
1015 * Only sub-partitioned tables need to be locked here. The root
1016 * partitioned table will already have been locked as it's referenced in
1017 * the query's rtable.
1018 */
1019 if (partoid != RelationGetRelid(proute->partition_root))
1020 rel = table_open(partoid, RowExclusiveLock);
1021 else
1022 rel = proute->partition_root;
1023 partdesc = PartitionDirectoryLookup(estate->es_partition_directory, rel);
1024
1025 pd = (PartitionDispatch) palloc(offsetof(PartitionDispatchData, indexes) +
1026 partdesc->nparts * sizeof(int));
1027 pd->reldesc = rel;
1028 pd->key = RelationGetPartitionKey(rel);
1029 pd->keystate = NIL;
1030 pd->partdesc = partdesc;
1031 if (parent_pd != NULL)
1032 {
1033 TupleDesc tupdesc = RelationGetDescr(rel);
1034
1035 /*
1036 * For sub-partitioned tables where the column order differs from its
1037 * direct parent partitioned table, we must store a tuple table slot
1038 * initialized with its tuple descriptor and a tuple conversion map to
1039 * convert a tuple from its parent's rowtype to its own. This is to
1040 * make sure that we are looking at the correct row using the correct
1041 * tuple descriptor when computing its partition key for tuple
1042 * routing.
1043 */
1044 pd->tupmap = convert_tuples_by_name_map_if_req(RelationGetDescr(parent_pd->reldesc),
1045 tupdesc,
1046 gettext_noop("could not convert row type"));
1047 pd->tupslot = pd->tupmap ?
1048 MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual) : NULL;
1049 }
1050 else
1051 {
1052 /* Not required for the root partitioned table */
1053 pd->tupmap = NULL;
1054 pd->tupslot = NULL;
1055 }
1056
1057 /*
1058 * Initialize with -1 to signify that the corresponding partition's
1059 * ResultRelInfo or PartitionDispatch has not been created yet.
1060 */
1061 memset(pd->indexes, -1, sizeof(int) * partdesc->nparts);
1062
1063 /* Track in PartitionTupleRouting for later use */
1064 dispatchidx = proute->num_dispatch++;
1065
1066 /* Allocate or enlarge the array, as needed */
1067 if (proute->num_dispatch >= proute->max_dispatch)
1068 {
1069 if (proute->max_dispatch == 0)
1070 {
1071 proute->max_dispatch = 4;
1072 proute->partition_dispatch_info = (PartitionDispatch *)
1073 palloc(sizeof(PartitionDispatch) * proute->max_dispatch);
1074 }
1075 else
1076 {
1077 proute->max_dispatch *= 2;
1078 proute->partition_dispatch_info = (PartitionDispatch *)
1079 repalloc(proute->partition_dispatch_info,
1080 sizeof(PartitionDispatch) * proute->max_dispatch);
1081 }
1082 }
1083 proute->partition_dispatch_info[dispatchidx] = pd;
1084
1085 /*
1086 * Finally, if setting up a PartitionDispatch for a sub-partitioned table,
1087 * install a downlink in the parent to allow quick descent.
1088 */
1089 if (parent_pd)
1090 {
1091 Assert(parent_pd->indexes[partidx] == -1);
1092 parent_pd->indexes[partidx] = dispatchidx;
1093 }
1094
1095 MemoryContextSwitchTo(oldcxt);
1096
1097 return pd;
1098}
1099
1100/*
1101 * ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple
1102 * routing.
1103 *
1104 * Close all the partitioned tables, leaf partitions, and their indices.
1105 */
1106void
1107ExecCleanupTupleRouting(ModifyTableState *mtstate,
1108 PartitionTupleRouting *proute)
1109{
1110 HTAB *htab = proute->subplan_resultrel_htab;
1111 int i;
1112
1113 /*
1114 * Remember, proute->partition_dispatch_info[0] corresponds to the root
1115 * partitioned table, which we must not try to close, because it is the
1116 * main target table of the query that will be closed by callers such as
1117 * ExecEndPlan() or DoCopy(). Also, tupslot is NULL for the root
1118 * partitioned table.
1119 */
1120 for (i = 1; i < proute->num_dispatch; i++)
1121 {
1122 PartitionDispatch pd = proute->partition_dispatch_info[i];
1123
1124 table_close(pd->reldesc, NoLock);
1125
1126 if (pd->tupslot)
1127 ExecDropSingleTupleTableSlot(pd->tupslot);
1128 }
1129
1130 for (i = 0; i < proute->num_partitions; i++)
1131 {
1132 ResultRelInfo *resultRelInfo = proute->partitions[i];
1133
1134 /* Allow any FDWs to shut down */
1135 if (resultRelInfo->ri_FdwRoutine != NULL &&
1136 resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL)
1137 resultRelInfo->ri_FdwRoutine->EndForeignInsert(mtstate->ps.state,
1138 resultRelInfo);
1139
1140 /*
1141 * Check if this result rel is one belonging to the node's subplans,
1142 * if so, let ExecEndPlan() clean it up.
1143 */
1144 if (htab)
1145 {
1146 Oid partoid;
1147 bool found;
1148
1149 partoid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
1150
1151 (void) hash_search(htab, &partoid, HASH_FIND, &found);
1152 if (found)
1153 continue;
1154 }
1155
1156 ExecCloseIndices(resultRelInfo);
1157 table_close(resultRelInfo->ri_RelationDesc, NoLock);
1158 }
1159}
1160
1161/* ----------------
1162 * FormPartitionKeyDatum
1163 * Construct values[] and isnull[] arrays for the partition key
1164 * of a tuple.
1165 *
1166 * pd Partition dispatch object of the partitioned table
1167 * slot Heap tuple from which to extract partition key
1168 * estate executor state for evaluating any partition key
1169 * expressions (must be non-NULL)
1170 * values Array of partition key Datums (output area)
1171 * isnull Array of is-null indicators (output area)
1172 *
1173 * the ecxt_scantuple slot of estate's per-tuple expr context must point to
1174 * the heap tuple passed in.
1175 * ----------------
1176 */
1177static void
1178FormPartitionKeyDatum(PartitionDispatch pd,
1179 TupleTableSlot *slot,
1180 EState *estate,
1181 Datum *values,
1182 bool *isnull)
1183{
1184 ListCell *partexpr_item;
1185 int i;
1186
1187 if (pd->key->partexprs != NIL && pd->keystate == NIL)
1188 {
1189 /* Check caller has set up context correctly */
1190 Assert(estate != NULL &&
1191 GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1192
1193 /* First time through, set up expression evaluation state */
1194 pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate);
1195 }
1196
1197 partexpr_item = list_head(pd->keystate);
1198 for (i = 0; i < pd->key->partnatts; i++)
1199 {
1200 AttrNumber keycol = pd->key->partattrs[i];
1201 Datum datum;
1202 bool isNull;
1203
1204 if (keycol != 0)
1205 {
1206 /* Plain column; get the value directly from the heap tuple */
1207 datum = slot_getattr(slot, keycol, &isNull);
1208 }
1209 else
1210 {
1211 /* Expression; need to evaluate it */
1212 if (partexpr_item == NULL)
1213 elog(ERROR, "wrong number of partition key expressions");
1214 datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item),
1215 GetPerTupleExprContext(estate),
1216 &isNull);
1217 partexpr_item = lnext(partexpr_item);
1218 }
1219 values[i] = datum;
1220 isnull[i] = isNull;
1221 }
1222
1223 if (partexpr_item != NULL)
1224 elog(ERROR, "wrong number of partition key expressions");
1225}
1226
1227/*
1228 * get_partition_for_tuple
1229 * Finds partition of relation which accepts the partition key specified
1230 * in values and isnull
1231 *
1232 * Return value is index of the partition (>= 0 and < partdesc->nparts) if one
1233 * found or -1 if none found.
1234 */
1235static int
1236get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull)
1237{
1238 int bound_offset;
1239 int part_index = -1;
1240 PartitionKey key = pd->key;
1241 PartitionDesc partdesc = pd->partdesc;
1242 PartitionBoundInfo boundinfo = partdesc->boundinfo;
1243
1244 /* Route as appropriate based on partitioning strategy. */
1245 switch (key->strategy)
1246 {
1247 case PARTITION_STRATEGY_HASH:
1248 {
1249 int greatest_modulus;
1250 uint64 rowHash;
1251
1252 greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
1253 rowHash = compute_partition_hash_value(key->partnatts,
1254 key->partsupfunc,
1255 key->partcollation,
1256 values, isnull);
1257
1258 part_index = boundinfo->indexes[rowHash % greatest_modulus];
1259 }
1260 break;
1261
1262 case PARTITION_STRATEGY_LIST:
1263 if (isnull[0])
1264 {
1265 if (partition_bound_accepts_nulls(boundinfo))
1266 part_index = boundinfo->null_index;
1267 }
1268 else
1269 {
1270 bool equal = false;
1271
1272 bound_offset = partition_list_bsearch(key->partsupfunc,
1273 key->partcollation,
1274 boundinfo,
1275 values[0], &equal);
1276 if (bound_offset >= 0 && equal)
1277 part_index = boundinfo->indexes[bound_offset];
1278 }
1279 break;
1280
1281 case PARTITION_STRATEGY_RANGE:
1282 {
1283 bool equal = false,
1284 range_partkey_has_null = false;
1285 int i;
1286
1287 /*
1288 * No range includes NULL, so this will be accepted by the
1289 * default partition if there is one, and otherwise rejected.
1290 */
1291 for (i = 0; i < key->partnatts; i++)
1292 {
1293 if (isnull[i])
1294 {
1295 range_partkey_has_null = true;
1296 break;
1297 }
1298 }
1299
1300 if (!range_partkey_has_null)
1301 {
1302 bound_offset = partition_range_datum_bsearch(key->partsupfunc,
1303 key->partcollation,
1304 boundinfo,
1305 key->partnatts,
1306 values,
1307 &equal);
1308
1309 /*
1310 * The bound at bound_offset is less than or equal to the
1311 * tuple value, so the bound at offset+1 is the upper
1312 * bound of the partition we're looking for, if there
1313 * actually exists one.
1314 */
1315 part_index = boundinfo->indexes[bound_offset + 1];
1316 }
1317 }
1318 break;
1319
1320 default:
1321 elog(ERROR, "unexpected partition strategy: %d",
1322 (int) key->strategy);
1323 }
1324
1325 /*
1326 * part_index < 0 means we failed to find a partition of this parent. Use
1327 * the default partition, if there is one.
1328 */
1329 if (part_index < 0)
1330 part_index = boundinfo->default_index;
1331
1332 return part_index;
1333}
1334
1335/*
1336 * ExecBuildSlotPartitionKeyDescription
1337 *
1338 * This works very much like BuildIndexValueDescription() and is currently
1339 * used for building error messages when ExecFindPartition() fails to find
1340 * partition for a row.
1341 */
1342static char *
1343ExecBuildSlotPartitionKeyDescription(Relation rel,
1344 Datum *values,
1345 bool *isnull,
1346 int maxfieldlen)
1347{
1348 StringInfoData buf;
1349 PartitionKey key = RelationGetPartitionKey(rel);
1350 int partnatts = get_partition_natts(key);
1351 int i;
1352 Oid relid = RelationGetRelid(rel);
1353 AclResult aclresult;
1354
1355 if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED)
1356 return NULL;
1357
1358 /* If the user has table-level access, just go build the description. */
1359 aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT);
1360 if (aclresult != ACLCHECK_OK)
1361 {
1362 /*
1363 * Step through the columns of the partition key and make sure the
1364 * user has SELECT rights on all of them.
1365 */
1366 for (i = 0; i < partnatts; i++)
1367 {
1368 AttrNumber attnum = get_partition_col_attnum(key, i);
1369
1370 /*
1371 * If this partition key column is an expression, we return no
1372 * detail rather than try to figure out what column(s) the
1373 * expression includes and if the user has SELECT rights on them.
1374 */
1375 if (attnum == InvalidAttrNumber ||
1376 pg_attribute_aclcheck(relid, attnum, GetUserId(),
1377 ACL_SELECT) != ACLCHECK_OK)
1378 return NULL;
1379 }
1380 }
1381
1382 initStringInfo(&buf);
1383 appendStringInfo(&buf, "(%s) = (",
1384 pg_get_partkeydef_columns(relid, true));
1385
1386 for (i = 0; i < partnatts; i++)
1387 {
1388 char *val;
1389 int vallen;
1390
1391 if (isnull[i])
1392 val = "null";
1393 else
1394 {
1395 Oid foutoid;
1396 bool typisvarlena;
1397
1398 getTypeOutputInfo(get_partition_col_typid(key, i),
1399 &foutoid, &typisvarlena);
1400 val = OidOutputFunctionCall(foutoid, values[i]);
1401 }
1402
1403 if (i > 0)
1404 appendStringInfoString(&buf, ", ");
1405
1406 /* truncate if needed */
1407 vallen = strlen(val);
1408 if (vallen <= maxfieldlen)
1409 appendStringInfoString(&buf, val);
1410 else
1411 {
1412 vallen = pg_mbcliplen(val, vallen, maxfieldlen);
1413 appendBinaryStringInfo(&buf, val, vallen);
1414 appendStringInfoString(&buf, "...");
1415 }
1416 }
1417
1418 appendStringInfoChar(&buf, ')');
1419
1420 return buf.data;
1421}
1422
1423/*
1424 * adjust_partition_tlist
1425 * Adjust the targetlist entries for a given partition to account for
1426 * attribute differences between parent and the partition
1427 *
1428 * The expressions have already been fixed, but here we fix the list to make
1429 * target resnos match the partition's attribute numbers. This results in a
1430 * copy of the original target list in which the entries appear in resno
1431 * order, including both the existing entries (that may have their resno
1432 * changed in-place) and the newly added entries for columns that don't exist
1433 * in the parent.
1434 *
1435 * Scribbles on the input tlist, so callers must make sure to make a copy
1436 * before passing it to us.
1437 */
1438static List *
1439adjust_partition_tlist(List *tlist, TupleConversionMap *map)
1440{
1441 List *new_tlist = NIL;
1442 TupleDesc tupdesc = map->outdesc;
1443 AttrNumber *attrMap = map->attrMap;
1444 AttrNumber attrno;
1445
1446 for (attrno = 1; attrno <= tupdesc->natts; attrno++)
1447 {
1448 Form_pg_attribute att_tup = TupleDescAttr(tupdesc, attrno - 1);
1449 TargetEntry *tle;
1450
1451 if (attrMap[attrno - 1] != InvalidAttrNumber)
1452 {
1453 Assert(!att_tup->attisdropped);
1454
1455 /*
1456 * Use the corresponding entry from the parent's tlist, adjusting
1457 * the resno the match the partition's attno.
1458 */
1459 tle = (TargetEntry *) list_nth(tlist, attrMap[attrno - 1] - 1);
1460 tle->resno = attrno;
1461 }
1462 else
1463 {
1464 Const *expr;
1465
1466 /*
1467 * For a dropped attribute in the partition, generate a dummy
1468 * entry with resno matching the partition's attno.
1469 */
1470 Assert(att_tup->attisdropped);
1471 expr = makeConst(INT4OID,
1472 -1,
1473 InvalidOid,
1474 sizeof(int32),
1475 (Datum) 0,
1476 true, /* isnull */
1477 true /* byval */ );
1478 tle = makeTargetEntry((Expr *) expr,
1479 attrno,
1480 pstrdup(NameStr(att_tup->attname)),
1481 false);
1482 }
1483
1484 new_tlist = lappend(new_tlist, tle);
1485 }
1486
1487 return new_tlist;
1488}
1489
1490/*-------------------------------------------------------------------------
1491 * Run-Time Partition Pruning Support.
1492 *
1493 * The following series of functions exist to support the removal of unneeded
1494 * subplans for queries against partitioned tables. The supporting functions
1495 * here are designed to work with any plan type which supports an arbitrary
1496 * number of subplans, e.g. Append, MergeAppend.
1497 *
1498 * When pruning involves comparison of a partition key to a constant, it's
1499 * done by the planner. However, if we have a comparison to a non-constant
1500 * but not volatile expression, that presents an opportunity for run-time
1501 * pruning by the executor, allowing irrelevant partitions to be skipped
1502 * dynamically.
1503 *
1504 * We must distinguish expressions containing PARAM_EXEC Params from
1505 * expressions that don't contain those. Even though a PARAM_EXEC Param is
1506 * considered to be a stable expression, it can change value from one plan
1507 * node scan to the next during query execution. Stable comparison
1508 * expressions that don't involve such Params allow partition pruning to be
1509 * done once during executor startup. Expressions that do involve such Params
1510 * require us to prune separately for each scan of the parent plan node.
1511 *
1512 * Note that pruning away unneeded subplans during executor startup has the
1513 * added benefit of not having to initialize the unneeded subplans at all.
1514 *
1515 *
1516 * Functions:
1517 *
1518 * ExecCreatePartitionPruneState:
1519 * Creates the PartitionPruneState required by each of the two pruning
1520 * functions. Details stored include how to map the partition index
1521 * returned by the partition pruning code into subplan indexes.
1522 *
1523 * ExecFindInitialMatchingSubPlans:
1524 * Returns indexes of matching subplans. Partition pruning is attempted
1525 * without any evaluation of expressions containing PARAM_EXEC Params.
1526 * This function must be called during executor startup for the parent
1527 * plan before the subplans themselves are initialized. Subplans which
1528 * are found not to match by this function must be removed from the
1529 * plan's list of subplans during execution, as this function performs a
1530 * remap of the partition index to subplan index map and the newly
1531 * created map provides indexes only for subplans which remain after
1532 * calling this function.
1533 *
1534 * ExecFindMatchingSubPlans:
1535 * Returns indexes of matching subplans after evaluating all available
1536 * expressions. This function can only be called during execution and
1537 * must be called again each time the value of a Param listed in
1538 * PartitionPruneState's 'execparamids' changes.
1539 *-------------------------------------------------------------------------
1540 */
1541
1542/*
1543 * ExecCreatePartitionPruneState
1544 * Build the data structure required for calling
1545 * ExecFindInitialMatchingSubPlans and ExecFindMatchingSubPlans.
1546 *
1547 * 'planstate' is the parent plan node's execution state.
1548 *
1549 * 'partitionpruneinfo' is a PartitionPruneInfo as generated by
1550 * make_partition_pruneinfo. Here we build a PartitionPruneState containing a
1551 * PartitionPruningData for each partitioning hierarchy (i.e., each sublist of
1552 * partitionpruneinfo->prune_infos), each of which contains a
1553 * PartitionedRelPruningData for each PartitionedRelPruneInfo appearing in
1554 * that sublist. This two-level system is needed to keep from confusing the
1555 * different hierarchies when a UNION ALL contains multiple partitioned tables
1556 * as children. The data stored in each PartitionedRelPruningData can be
1557 * re-used each time we re-evaluate which partitions match the pruning steps
1558 * provided in each PartitionedRelPruneInfo.
1559 */
1560PartitionPruneState *
1561ExecCreatePartitionPruneState(PlanState *planstate,
1562 PartitionPruneInfo *partitionpruneinfo)
1563{
1564 EState *estate = planstate->state;
1565 PartitionPruneState *prunestate;
1566 int n_part_hierarchies;
1567 ListCell *lc;
1568 int i;
1569
1570 if (estate->es_partition_directory == NULL)
1571 estate->es_partition_directory =
1572 CreatePartitionDirectory(estate->es_query_cxt);
1573
1574 n_part_hierarchies = list_length(partitionpruneinfo->prune_infos);
1575 Assert(n_part_hierarchies > 0);
1576
1577 /*
1578 * Allocate the data structure
1579 */
1580 prunestate = (PartitionPruneState *)
1581 palloc(offsetof(PartitionPruneState, partprunedata) +
1582 sizeof(PartitionPruningData *) * n_part_hierarchies);
1583
1584 prunestate->execparamids = NULL;
1585 /* other_subplans can change at runtime, so we need our own copy */
1586 prunestate->other_subplans = bms_copy(partitionpruneinfo->other_subplans);
1587 prunestate->do_initial_prune = false; /* may be set below */
1588 prunestate->do_exec_prune = false; /* may be set below */
1589 prunestate->num_partprunedata = n_part_hierarchies;
1590
1591 /*
1592 * Create a short-term memory context which we'll use when making calls to
1593 * the partition pruning functions. This avoids possible memory leaks,
1594 * since the pruning functions call comparison functions that aren't under
1595 * our control.
1596 */
1597 prunestate->prune_context =
1598 AllocSetContextCreate(CurrentMemoryContext,
1599 "Partition Prune",
1600 ALLOCSET_DEFAULT_SIZES);
1601
1602 i = 0;
1603 foreach(lc, partitionpruneinfo->prune_infos)
1604 {
1605 List *partrelpruneinfos = lfirst_node(List, lc);
1606 int npartrelpruneinfos = list_length(partrelpruneinfos);
1607 PartitionPruningData *prunedata;
1608 ListCell *lc2;
1609 int j;
1610
1611 prunedata = (PartitionPruningData *)
1612 palloc(offsetof(PartitionPruningData, partrelprunedata) +
1613 npartrelpruneinfos * sizeof(PartitionedRelPruningData));
1614 prunestate->partprunedata[i] = prunedata;
1615 prunedata->num_partrelprunedata = npartrelpruneinfos;
1616
1617 j = 0;
1618 foreach(lc2, partrelpruneinfos)
1619 {
1620 PartitionedRelPruneInfo *pinfo = lfirst_node(PartitionedRelPruneInfo, lc2);
1621 PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
1622 Relation partrel;
1623 PartitionDesc partdesc;
1624 PartitionKey partkey;
1625
1626 /*
1627 * We can rely on the copies of the partitioned table's partition
1628 * key and partition descriptor appearing in its relcache entry,
1629 * because that entry will be held open and locked for the
1630 * duration of this executor run.
1631 */
1632 partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex);
1633 partkey = RelationGetPartitionKey(partrel);
1634 partdesc = PartitionDirectoryLookup(estate->es_partition_directory,
1635 partrel);
1636
1637 /*
1638 * Initialize the subplan_map and subpart_map. Since detaching a
1639 * partition requires AccessExclusiveLock, no partitions can have
1640 * disappeared, nor can the bounds for any partition have changed.
1641 * However, new partitions may have been added.
1642 */
1643 Assert(partdesc->nparts >= pinfo->nparts);
1644 pprune->nparts = partdesc->nparts;
1645 pprune->subplan_map = palloc(sizeof(int) * partdesc->nparts);
1646 if (partdesc->nparts == pinfo->nparts)
1647 {
1648 /*
1649 * There are no new partitions, so this is simple. We can
1650 * simply point to the subpart_map from the plan, but we must
1651 * copy the subplan_map since we may change it later.
1652 */
1653 pprune->subpart_map = pinfo->subpart_map;
1654 memcpy(pprune->subplan_map, pinfo->subplan_map,
1655 sizeof(int) * pinfo->nparts);
1656
1657 /*
1658 * Double-check that the list of unpruned relations has not
1659 * changed. (Pruned partitions are not in relid_map[].)
1660 */
1661#ifdef USE_ASSERT_CHECKING
1662 for (int k = 0; k < pinfo->nparts; k++)
1663 {
1664 Assert(partdesc->oids[k] == pinfo->relid_map[k] ||
1665 pinfo->subplan_map[k] == -1);
1666 }
1667#endif
1668 }
1669 else
1670 {
1671 int pd_idx = 0;
1672 int pp_idx;
1673
1674 /*
1675 * Some new partitions have appeared since plan time, and
1676 * those are reflected in our PartitionDesc but were not
1677 * present in the one used to construct subplan_map and
1678 * subpart_map. So we must construct new and longer arrays
1679 * where the partitions that were originally present map to
1680 * the same place, and any added indexes map to -1, as if the
1681 * new partitions had been pruned.
1682 */
1683 pprune->subpart_map = palloc(sizeof(int) * partdesc->nparts);
1684 for (pp_idx = 0; pp_idx < partdesc->nparts; ++pp_idx)
1685 {
1686 if (pinfo->relid_map[pd_idx] != partdesc->oids[pp_idx])
1687 {
1688 pprune->subplan_map[pp_idx] = -1;
1689 pprune->subpart_map[pp_idx] = -1;
1690 }
1691 else
1692 {
1693 pprune->subplan_map[pp_idx] =
1694 pinfo->subplan_map[pd_idx];
1695 pprune->subpart_map[pp_idx] =
1696 pinfo->subpart_map[pd_idx++];
1697 }
1698 }
1699 Assert(pd_idx == pinfo->nparts);
1700 }
1701
1702 /* present_parts is also subject to later modification */
1703 pprune->present_parts = bms_copy(pinfo->present_parts);
1704
1705 /*
1706 * Initialize pruning contexts as needed.
1707 */
1708 pprune->initial_pruning_steps = pinfo->initial_pruning_steps;
1709 if (pinfo->initial_pruning_steps)
1710 {
1711 ExecInitPruningContext(&pprune->initial_context,
1712 pinfo->initial_pruning_steps,
1713 partdesc, partkey, planstate);
1714 /* Record whether initial pruning is needed at any level */
1715 prunestate->do_initial_prune = true;
1716 }
1717 pprune->exec_pruning_steps = pinfo->exec_pruning_steps;
1718 if (pinfo->exec_pruning_steps)
1719 {
1720 ExecInitPruningContext(&pprune->exec_context,
1721 pinfo->exec_pruning_steps,
1722 partdesc, partkey, planstate);
1723 /* Record whether exec pruning is needed at any level */
1724 prunestate->do_exec_prune = true;
1725 }
1726
1727 /*
1728 * Accumulate the IDs of all PARAM_EXEC Params affecting the
1729 * partitioning decisions at this plan node.
1730 */
1731 prunestate->execparamids = bms_add_members(prunestate->execparamids,
1732 pinfo->execparamids);
1733
1734 j++;
1735 }
1736 i++;
1737 }
1738
1739 return prunestate;
1740}
1741
1742/*
1743 * Initialize a PartitionPruneContext for the given list of pruning steps.
1744 */
1745static void
1746ExecInitPruningContext(PartitionPruneContext *context,
1747 List *pruning_steps,
1748 PartitionDesc partdesc,
1749 PartitionKey partkey,
1750 PlanState *planstate)
1751{
1752 int n_steps;
1753 int partnatts;
1754 ListCell *lc;
1755
1756 n_steps = list_length(pruning_steps);
1757
1758 context->strategy = partkey->strategy;
1759 context->partnatts = partnatts = partkey->partnatts;
1760 context->nparts = partdesc->nparts;
1761 context->boundinfo = partdesc->boundinfo;
1762 context->partcollation = partkey->partcollation;
1763 context->partsupfunc = partkey->partsupfunc;
1764
1765 /* We'll look up type-specific support functions as needed */
1766 context->stepcmpfuncs = (FmgrInfo *)
1767 palloc0(sizeof(FmgrInfo) * n_steps * partnatts);
1768
1769 context->ppccontext = CurrentMemoryContext;
1770 context->planstate = planstate;
1771
1772 /* Initialize expression state for each expression we need */
1773 context->exprstates = (ExprState **)
1774 palloc0(sizeof(ExprState *) * n_steps * partnatts);
1775 foreach(lc, pruning_steps)
1776 {
1777 PartitionPruneStepOp *step = (PartitionPruneStepOp *) lfirst(lc);
1778 ListCell *lc2;
1779 int keyno;
1780
1781 /* not needed for other step kinds */
1782 if (!IsA(step, PartitionPruneStepOp))
1783 continue;
1784
1785 Assert(list_length(step->exprs) <= partnatts);
1786
1787 keyno = 0;
1788 foreach(lc2, step->exprs)
1789 {
1790 Expr *expr = (Expr *) lfirst(lc2);
1791
1792 /* not needed for Consts */
1793 if (!IsA(expr, Const))
1794 {
1795 int stateidx = PruneCxtStateIdx(partnatts,
1796 step->step.step_id,
1797 keyno);
1798
1799 context->exprstates[stateidx] =
1800 ExecInitExpr(expr, context->planstate);
1801 }
1802 keyno++;
1803 }
1804 }
1805}
1806
1807/*
1808 * ExecFindInitialMatchingSubPlans
1809 * Identify the set of subplans that cannot be eliminated by initial
1810 * pruning, disregarding any pruning constraints involving PARAM_EXEC
1811 * Params.
1812 *
1813 * If additional pruning passes will be required (because of PARAM_EXEC
1814 * Params), we must also update the translation data that allows conversion
1815 * of partition indexes into subplan indexes to account for the unneeded
1816 * subplans having been removed.
1817 *
1818 * Must only be called once per 'prunestate', and only if initial pruning
1819 * is required.
1820 *
1821 * 'nsubplans' must be passed as the total number of unpruned subplans.
1822 */
1823Bitmapset *
1824ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans)
1825{
1826 Bitmapset *result = NULL;
1827 MemoryContext oldcontext;
1828 int i;
1829
1830 /* Caller error if we get here without do_initial_prune */
1831 Assert(prunestate->do_initial_prune);
1832
1833 /*
1834 * Switch to a temp context to avoid leaking memory in the executor's
1835 * query-lifespan memory context.
1836 */
1837 oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
1838
1839 /*
1840 * For each hierarchy, do the pruning tests, and add nondeletable
1841 * subplans' indexes to "result".
1842 */
1843 for (i = 0; i < prunestate->num_partprunedata; i++)
1844 {
1845 PartitionPruningData *prunedata;
1846 PartitionedRelPruningData *pprune;
1847
1848 prunedata = prunestate->partprunedata[i];
1849 pprune = &prunedata->partrelprunedata[0];
1850
1851 /* Perform pruning without using PARAM_EXEC Params */
1852 find_matching_subplans_recurse(prunedata, pprune, true, &result);
1853
1854 /* Expression eval may have used space in node's ps_ExprContext too */
1855 if (pprune->initial_pruning_steps)
1856 ResetExprContext(pprune->initial_context.planstate->ps_ExprContext);
1857 }
1858
1859 /* Add in any subplans that partition pruning didn't account for */
1860 result = bms_add_members(result, prunestate->other_subplans);
1861
1862 MemoryContextSwitchTo(oldcontext);
1863
1864 /* Copy result out of the temp context before we reset it */
1865 result = bms_copy(result);
1866
1867 MemoryContextReset(prunestate->prune_context);
1868
1869 /*
1870 * If exec-time pruning is required and we pruned subplans above, then we
1871 * must re-sequence the subplan indexes so that ExecFindMatchingSubPlans
1872 * properly returns the indexes from the subplans which will remain after
1873 * execution of this function.
1874 *
1875 * We can safely skip this when !do_exec_prune, even though that leaves
1876 * invalid data in prunestate, because that data won't be consulted again
1877 * (cf initial Assert in ExecFindMatchingSubPlans).
1878 */
1879 if (prunestate->do_exec_prune && bms_num_members(result) < nsubplans)
1880 {
1881 int *new_subplan_indexes;
1882 Bitmapset *new_other_subplans;
1883 int i;
1884 int newidx;
1885
1886 /*
1887 * First we must build a temporary array which maps old subplan
1888 * indexes to new ones. For convenience of initialization, we use
1889 * 1-based indexes in this array and leave pruned items as 0.
1890 */
1891 new_subplan_indexes = (int *) palloc0(sizeof(int) * nsubplans);
1892 newidx = 1;
1893 i = -1;
1894 while ((i = bms_next_member(result, i)) >= 0)
1895 {
1896 Assert(i < nsubplans);
1897 new_subplan_indexes[i] = newidx++;
1898 }
1899
1900 /*
1901 * Now we can update each PartitionedRelPruneInfo's subplan_map with
1902 * new subplan indexes. We must also recompute its present_parts
1903 * bitmap.
1904 */
1905 for (i = 0; i < prunestate->num_partprunedata; i++)
1906 {
1907 PartitionPruningData *prunedata = prunestate->partprunedata[i];
1908 int j;
1909
1910 /*
1911 * Within each hierarchy, we perform this loop in back-to-front
1912 * order so that we determine present_parts for the lowest-level
1913 * partitioned tables first. This way we can tell whether a
1914 * sub-partitioned table's partitions were entirely pruned so we
1915 * can exclude it from the current level's present_parts.
1916 */
1917 for (j = prunedata->num_partrelprunedata - 1; j >= 0; j--)
1918 {
1919 PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j];
1920 int nparts = pprune->nparts;
1921 int k;
1922
1923 /* We just rebuild present_parts from scratch */
1924 bms_free(pprune->present_parts);
1925 pprune->present_parts = NULL;
1926
1927 for (k = 0; k < nparts; k++)
1928 {
1929 int oldidx = pprune->subplan_map[k];
1930 int subidx;
1931
1932 /*
1933 * If this partition existed as a subplan then change the
1934 * old subplan index to the new subplan index. The new
1935 * index may become -1 if the partition was pruned above,
1936 * or it may just come earlier in the subplan list due to
1937 * some subplans being removed earlier in the list. If
1938 * it's a subpartition, add it to present_parts unless
1939 * it's entirely pruned.
1940 */
1941 if (oldidx >= 0)
1942 {
1943 Assert(oldidx < nsubplans);
1944 pprune->subplan_map[k] = new_subplan_indexes[oldidx] - 1;
1945
1946 if (new_subplan_indexes[oldidx] > 0)
1947 pprune->present_parts =
1948 bms_add_member(pprune->present_parts, k);
1949 }
1950 else if ((subidx = pprune->subpart_map[k]) >= 0)
1951 {
1952 PartitionedRelPruningData *subprune;
1953
1954 subprune = &prunedata->partrelprunedata[subidx];
1955
1956 if (!bms_is_empty(subprune->present_parts))
1957 pprune->present_parts =
1958 bms_add_member(pprune->present_parts, k);
1959 }
1960 }
1961 }
1962 }
1963
1964 /*
1965 * We must also recompute the other_subplans set, since indexes in it
1966 * may change.
1967 */
1968 new_other_subplans = NULL;
1969 i = -1;
1970 while ((i = bms_next_member(prunestate->other_subplans, i)) >= 0)
1971 new_other_subplans = bms_add_member(new_other_subplans,
1972 new_subplan_indexes[i] - 1);
1973
1974 bms_free(prunestate->other_subplans);
1975 prunestate->other_subplans = new_other_subplans;
1976
1977 pfree(new_subplan_indexes);
1978 }
1979
1980 return result;
1981}
1982
1983/*
1984 * ExecFindMatchingSubPlans
1985 * Determine which subplans match the pruning steps detailed in
1986 * 'prunestate' for the current comparison expression values.
1987 *
1988 * Here we assume we may evaluate PARAM_EXEC Params.
1989 */
1990Bitmapset *
1991ExecFindMatchingSubPlans(PartitionPruneState *prunestate)
1992{
1993 Bitmapset *result = NULL;
1994 MemoryContext oldcontext;
1995 int i;
1996
1997 /*
1998 * If !do_exec_prune, we've got problems because
1999 * ExecFindInitialMatchingSubPlans will not have bothered to update
2000 * prunestate for whatever pruning it did.
2001 */
2002 Assert(prunestate->do_exec_prune);
2003
2004 /*
2005 * Switch to a temp context to avoid leaking memory in the executor's
2006 * query-lifespan memory context.
2007 */
2008 oldcontext = MemoryContextSwitchTo(prunestate->prune_context);
2009
2010 /*
2011 * For each hierarchy, do the pruning tests, and add nondeletable
2012 * subplans' indexes to "result".
2013 */
2014 for (i = 0; i < prunestate->num_partprunedata; i++)
2015 {
2016 PartitionPruningData *prunedata;
2017 PartitionedRelPruningData *pprune;
2018
2019 prunedata = prunestate->partprunedata[i];
2020 pprune = &prunedata->partrelprunedata[0];
2021
2022 find_matching_subplans_recurse(prunedata, pprune, false, &result);
2023
2024 /* Expression eval may have used space in node's ps_ExprContext too */
2025 if (pprune->exec_pruning_steps)
2026 ResetExprContext(pprune->exec_context.planstate->ps_ExprContext);
2027 }
2028
2029 /* Add in any subplans that partition pruning didn't account for */
2030 result = bms_add_members(result, prunestate->other_subplans);
2031
2032 MemoryContextSwitchTo(oldcontext);
2033
2034 /* Copy result out of the temp context before we reset it */
2035 result = bms_copy(result);
2036
2037 MemoryContextReset(prunestate->prune_context);
2038
2039 return result;
2040}
2041
2042/*
2043 * find_matching_subplans_recurse
2044 * Recursive worker function for ExecFindMatchingSubPlans and
2045 * ExecFindInitialMatchingSubPlans
2046 *
2047 * Adds valid (non-prunable) subplan IDs to *validsubplans
2048 */
2049static void
2050find_matching_subplans_recurse(PartitionPruningData *prunedata,
2051 PartitionedRelPruningData *pprune,
2052 bool initial_prune,
2053 Bitmapset **validsubplans)
2054{
2055 Bitmapset *partset;
2056 int i;
2057
2058 /* Guard against stack overflow due to overly deep partition hierarchy. */
2059 check_stack_depth();
2060
2061 /* Only prune if pruning would be useful at this level. */
2062 if (initial_prune && pprune->initial_pruning_steps)
2063 {
2064 partset = get_matching_partitions(&pprune->initial_context,
2065 pprune->initial_pruning_steps);
2066 }
2067 else if (!initial_prune && pprune->exec_pruning_steps)
2068 {
2069 partset = get_matching_partitions(&pprune->exec_context,
2070 pprune->exec_pruning_steps);
2071 }
2072 else
2073 {
2074 /*
2075 * If no pruning is to be done, just include all partitions at this
2076 * level.
2077 */
2078 partset = pprune->present_parts;
2079 }
2080
2081 /* Translate partset into subplan indexes */
2082 i = -1;
2083 while ((i = bms_next_member(partset, i)) >= 0)
2084 {
2085 if (pprune->subplan_map[i] >= 0)
2086 *validsubplans = bms_add_member(*validsubplans,
2087 pprune->subplan_map[i]);
2088 else
2089 {
2090 int partidx = pprune->subpart_map[i];
2091
2092 if (partidx >= 0)
2093 find_matching_subplans_recurse(prunedata,
2094 &prunedata->partrelprunedata[partidx],
2095 initial_prune, validsubplans);
2096 else
2097 {
2098 /*
2099 * We get here if the planner already pruned all the sub-
2100 * partitions for this partition. Silently ignore this
2101 * partition in this case. The end result is the same: we
2102 * would have pruned all partitions just the same, but we
2103 * don't have any pruning steps to execute to verify this.
2104 */
2105 }
2106 }
2107 }
2108}
2109