1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * execPartition.c |
4 | * Support routines for partitioning. |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | * |
9 | * IDENTIFICATION |
10 | * src/backend/executor/execPartition.c |
11 | * |
12 | *------------------------------------------------------------------------- |
13 | */ |
14 | #include "postgres.h" |
15 | |
16 | #include "access/table.h" |
17 | #include "access/tableam.h" |
18 | #include "catalog/partition.h" |
19 | #include "catalog/pg_inherits.h" |
20 | #include "catalog/pg_type.h" |
21 | #include "executor/execPartition.h" |
22 | #include "executor/executor.h" |
23 | #include "foreign/fdwapi.h" |
24 | #include "mb/pg_wchar.h" |
25 | #include "miscadmin.h" |
26 | #include "nodes/makefuncs.h" |
27 | #include "partitioning/partbounds.h" |
28 | #include "partitioning/partdesc.h" |
29 | #include "partitioning/partprune.h" |
30 | #include "rewrite/rewriteManip.h" |
31 | #include "utils/lsyscache.h" |
32 | #include "utils/partcache.h" |
33 | #include "utils/rel.h" |
34 | #include "utils/rls.h" |
35 | #include "utils/ruleutils.h" |
36 | |
37 | |
38 | /*----------------------- |
39 | * PartitionTupleRouting - Encapsulates all information required to |
40 | * route a tuple inserted into a partitioned table to one of its leaf |
41 | * partitions. |
42 | * |
43 | * partition_root |
44 | * The partitioned table that's the target of the command. |
45 | * |
46 | * partition_dispatch_info |
47 | * Array of 'max_dispatch' elements containing a pointer to a |
48 | * PartitionDispatch object for every partitioned table touched by tuple |
49 | * routing. The entry for the target partitioned table is *always* |
50 | * present in the 0th element of this array. See comment for |
51 | * PartitionDispatchData->indexes for details on how this array is |
52 | * indexed. |
53 | * |
54 | * num_dispatch |
55 | * The current number of items stored in the 'partition_dispatch_info' |
56 | * array. Also serves as the index of the next free array element for |
57 | * new PartitionDispatch objects that need to be stored. |
58 | * |
59 | * max_dispatch |
60 | * The current allocated size of the 'partition_dispatch_info' array. |
61 | * |
62 | * partitions |
63 | * Array of 'max_partitions' elements containing a pointer to a |
64 | * ResultRelInfo for every leaf partitions touched by tuple routing. |
65 | * Some of these are pointers to ResultRelInfos which are borrowed out of |
66 | * 'subplan_resultrel_htab'. The remainder have been built especially |
67 | * for tuple routing. See comment for PartitionDispatchData->indexes for |
68 | * details on how this array is indexed. |
69 | * |
70 | * num_partitions |
71 | * The current number of items stored in the 'partitions' array. Also |
72 | * serves as the index of the next free array element for new |
73 | * ResultRelInfo objects that need to be stored. |
74 | * |
75 | * max_partitions |
76 | * The current allocated size of the 'partitions' array. |
77 | * |
78 | * subplan_resultrel_htab |
79 | * Hash table to store subplan ResultRelInfos by Oid. This is used to |
80 | * cache ResultRelInfos from subplans of an UPDATE ModifyTable node; |
81 | * NULL in other cases. Some of these may be useful for tuple routing |
82 | * to save having to build duplicates. |
83 | * |
84 | * memcxt |
85 | * Memory context used to allocate subsidiary structs. |
86 | *----------------------- |
87 | */ |
88 | struct PartitionTupleRouting |
89 | { |
90 | Relation partition_root; |
91 | PartitionDispatch *partition_dispatch_info; |
92 | int num_dispatch; |
93 | int max_dispatch; |
94 | ResultRelInfo **partitions; |
95 | int num_partitions; |
96 | int max_partitions; |
97 | HTAB *subplan_resultrel_htab; |
98 | MemoryContext memcxt; |
99 | }; |
100 | |
101 | /*----------------------- |
102 | * PartitionDispatch - information about one partitioned table in a partition |
103 | * hierarchy required to route a tuple to any of its partitions. A |
104 | * PartitionDispatch is always encapsulated inside a PartitionTupleRouting |
105 | * struct and stored inside its 'partition_dispatch_info' array. |
106 | * |
107 | * reldesc |
108 | * Relation descriptor of the table |
109 | * |
110 | * key |
111 | * Partition key information of the table |
112 | * |
113 | * keystate |
114 | * Execution state required for expressions in the partition key |
115 | * |
116 | * partdesc |
117 | * Partition descriptor of the table |
118 | * |
119 | * tupslot |
120 | * A standalone TupleTableSlot initialized with this table's tuple |
121 | * descriptor, or NULL if no tuple conversion between the parent is |
122 | * required. |
123 | * |
124 | * tupmap |
125 | * TupleConversionMap to convert from the parent's rowtype to this table's |
126 | * rowtype (when extracting the partition key of a tuple just before |
127 | * routing it through this table). A NULL value is stored if no tuple |
128 | * conversion is required. |
129 | * |
130 | * indexes |
131 | * Array of partdesc->nparts elements. For leaf partitions the index |
132 | * corresponds to the partition's ResultRelInfo in the encapsulating |
133 | * PartitionTupleRouting's partitions array. For partitioned partitions, |
134 | * the index corresponds to the PartitionDispatch for it in its |
135 | * partition_dispatch_info array. -1 indicates we've not yet allocated |
136 | * anything in PartitionTupleRouting for the partition. |
137 | *----------------------- |
138 | */ |
139 | typedef struct PartitionDispatchData |
140 | { |
141 | Relation reldesc; |
142 | PartitionKey key; |
143 | List *keystate; /* list of ExprState */ |
144 | PartitionDesc partdesc; |
145 | TupleTableSlot *tupslot; |
146 | AttrNumber *tupmap; |
147 | int indexes[FLEXIBLE_ARRAY_MEMBER]; |
148 | } PartitionDispatchData; |
149 | |
150 | /* struct to hold result relations coming from UPDATE subplans */ |
151 | typedef struct SubplanResultRelHashElem |
152 | { |
153 | Oid relid; /* hash key -- must be first */ |
154 | ResultRelInfo *rri; |
155 | } SubplanResultRelHashElem; |
156 | |
157 | |
158 | static void ExecHashSubPlanResultRelsByOid(ModifyTableState *mtstate, |
159 | PartitionTupleRouting *proute); |
160 | static ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate, |
161 | EState *estate, PartitionTupleRouting *proute, |
162 | PartitionDispatch dispatch, |
163 | ResultRelInfo *rootResultRelInfo, |
164 | int partidx); |
165 | static void ExecInitRoutingInfo(ModifyTableState *mtstate, |
166 | EState *estate, |
167 | PartitionTupleRouting *proute, |
168 | PartitionDispatch dispatch, |
169 | ResultRelInfo *partRelInfo, |
170 | int partidx); |
171 | static PartitionDispatch ExecInitPartitionDispatchInfo(EState *estate, |
172 | PartitionTupleRouting *proute, |
173 | Oid partoid, PartitionDispatch parent_pd, int partidx); |
174 | static void FormPartitionKeyDatum(PartitionDispatch pd, |
175 | TupleTableSlot *slot, |
176 | EState *estate, |
177 | Datum *values, |
178 | bool *isnull); |
179 | static int get_partition_for_tuple(PartitionDispatch pd, Datum *values, |
180 | bool *isnull); |
181 | static char *ExecBuildSlotPartitionKeyDescription(Relation rel, |
182 | Datum *values, |
183 | bool *isnull, |
184 | int maxfieldlen); |
185 | static List *adjust_partition_tlist(List *tlist, TupleConversionMap *map); |
186 | static void ExecInitPruningContext(PartitionPruneContext *context, |
187 | List *pruning_steps, |
188 | PartitionDesc partdesc, |
189 | PartitionKey partkey, |
190 | PlanState *planstate); |
191 | static void find_matching_subplans_recurse(PartitionPruningData *prunedata, |
192 | PartitionedRelPruningData *pprune, |
193 | bool initial_prune, |
194 | Bitmapset **validsubplans); |
195 | |
196 | |
197 | /* |
198 | * ExecSetupPartitionTupleRouting - sets up information needed during |
199 | * tuple routing for partitioned tables, encapsulates it in |
200 | * PartitionTupleRouting, and returns it. |
201 | * |
202 | * Callers must use the returned PartitionTupleRouting during calls to |
203 | * ExecFindPartition(). The actual ResultRelInfo for a partition is only |
204 | * allocated when the partition is found for the first time. |
205 | * |
206 | * The current memory context is used to allocate this struct and all |
207 | * subsidiary structs that will be allocated from it later on. Typically |
208 | * it should be estate->es_query_cxt. |
209 | */ |
210 | PartitionTupleRouting * |
211 | ExecSetupPartitionTupleRouting(EState *estate, ModifyTableState *mtstate, |
212 | Relation rel) |
213 | { |
214 | PartitionTupleRouting *proute; |
215 | ModifyTable *node = mtstate ? (ModifyTable *) mtstate->ps.plan : NULL; |
216 | |
217 | /* |
218 | * Here we attempt to expend as little effort as possible in setting up |
219 | * the PartitionTupleRouting. Each partition's ResultRelInfo is built on |
220 | * demand, only when we actually need to route a tuple to that partition. |
221 | * The reason for this is that a common case is for INSERT to insert a |
222 | * single tuple into a partitioned table and this must be fast. |
223 | */ |
224 | proute = (PartitionTupleRouting *) palloc0(sizeof(PartitionTupleRouting)); |
225 | proute->partition_root = rel; |
226 | proute->memcxt = CurrentMemoryContext; |
227 | /* Rest of members initialized by zeroing */ |
228 | |
229 | /* |
230 | * Initialize this table's PartitionDispatch object. Here we pass in the |
231 | * parent as NULL as we don't need to care about any parent of the target |
232 | * partitioned table. |
233 | */ |
234 | ExecInitPartitionDispatchInfo(estate, proute, RelationGetRelid(rel), |
235 | NULL, 0); |
236 | |
237 | /* |
238 | * If performing an UPDATE with tuple routing, we can reuse partition |
239 | * sub-plan result rels. We build a hash table to map the OIDs of |
240 | * partitions present in mtstate->resultRelInfo to their ResultRelInfos. |
241 | * Every time a tuple is routed to a partition that we've yet to set the |
242 | * ResultRelInfo for, before we go to the trouble of making one, we check |
243 | * for a pre-made one in the hash table. |
244 | */ |
245 | if (node && node->operation == CMD_UPDATE) |
246 | ExecHashSubPlanResultRelsByOid(mtstate, proute); |
247 | |
248 | return proute; |
249 | } |
250 | |
251 | /* |
252 | * ExecFindPartition -- Return the ResultRelInfo for the leaf partition that |
253 | * the tuple contained in *slot should belong to. |
254 | * |
255 | * If the partition's ResultRelInfo does not yet exist in 'proute' then we set |
256 | * one up or reuse one from mtstate's resultRelInfo array. When reusing a |
257 | * ResultRelInfo from the mtstate we verify that the relation is a valid |
258 | * target for INSERTs and then set up a PartitionRoutingInfo for it. |
259 | * |
260 | * rootResultRelInfo is the relation named in the query. |
261 | * |
262 | * estate must be non-NULL; we'll need it to compute any expressions in the |
263 | * partition keys. Also, its per-tuple contexts are used as evaluation |
264 | * scratch space. |
265 | * |
266 | * If no leaf partition is found, this routine errors out with the appropriate |
267 | * error message. An error may also be raised if the found target partition |
268 | * is not a valid target for an INSERT. |
269 | */ |
270 | ResultRelInfo * |
271 | ExecFindPartition(ModifyTableState *mtstate, |
272 | ResultRelInfo *rootResultRelInfo, |
273 | PartitionTupleRouting *proute, |
274 | TupleTableSlot *slot, EState *estate) |
275 | { |
276 | PartitionDispatch *pd = proute->partition_dispatch_info; |
277 | Datum values[PARTITION_MAX_KEYS]; |
278 | bool isnull[PARTITION_MAX_KEYS]; |
279 | Relation rel; |
280 | PartitionDispatch dispatch; |
281 | PartitionDesc partdesc; |
282 | ExprContext *ecxt = GetPerTupleExprContext(estate); |
283 | TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple; |
284 | TupleTableSlot *myslot = NULL; |
285 | MemoryContext oldcxt; |
286 | |
287 | /* use per-tuple context here to avoid leaking memory */ |
288 | oldcxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); |
289 | |
290 | /* |
291 | * First check the root table's partition constraint, if any. No point in |
292 | * routing the tuple if it doesn't belong in the root table itself. |
293 | */ |
294 | if (rootResultRelInfo->ri_PartitionCheck) |
295 | ExecPartitionCheck(rootResultRelInfo, slot, estate, true); |
296 | |
297 | /* start with the root partitioned table */ |
298 | dispatch = pd[0]; |
299 | while (true) |
300 | { |
301 | AttrNumber *map = dispatch->tupmap; |
302 | int partidx = -1; |
303 | |
304 | CHECK_FOR_INTERRUPTS(); |
305 | |
306 | rel = dispatch->reldesc; |
307 | partdesc = dispatch->partdesc; |
308 | |
309 | /* |
310 | * Convert the tuple to this parent's layout, if different from the |
311 | * current relation. |
312 | */ |
313 | myslot = dispatch->tupslot; |
314 | if (myslot != NULL) |
315 | { |
316 | Assert(map != NULL); |
317 | slot = execute_attr_map_slot(map, slot, myslot); |
318 | } |
319 | |
320 | /* |
321 | * Extract partition key from tuple. Expression evaluation machinery |
322 | * that FormPartitionKeyDatum() invokes expects ecxt_scantuple to |
323 | * point to the correct tuple slot. The slot might have changed from |
324 | * what was used for the parent table if the table of the current |
325 | * partitioning level has different tuple descriptor from the parent. |
326 | * So update ecxt_scantuple accordingly. |
327 | */ |
328 | ecxt->ecxt_scantuple = slot; |
329 | FormPartitionKeyDatum(dispatch, slot, estate, values, isnull); |
330 | |
331 | /* |
332 | * If this partitioned table has no partitions or no partition for |
333 | * these values, error out. |
334 | */ |
335 | if (partdesc->nparts == 0 || |
336 | (partidx = get_partition_for_tuple(dispatch, values, isnull)) < 0) |
337 | { |
338 | char *val_desc; |
339 | |
340 | val_desc = ExecBuildSlotPartitionKeyDescription(rel, |
341 | values, isnull, 64); |
342 | Assert(OidIsValid(RelationGetRelid(rel))); |
343 | ereport(ERROR, |
344 | (errcode(ERRCODE_CHECK_VIOLATION), |
345 | errmsg("no partition of relation \"%s\" found for row" , |
346 | RelationGetRelationName(rel)), |
347 | val_desc ? |
348 | errdetail("Partition key of the failing row contains %s." , |
349 | val_desc) : 0)); |
350 | } |
351 | |
352 | if (partdesc->is_leaf[partidx]) |
353 | { |
354 | ResultRelInfo *rri; |
355 | |
356 | /* |
357 | * Look to see if we've already got a ResultRelInfo for this |
358 | * partition. |
359 | */ |
360 | if (likely(dispatch->indexes[partidx] >= 0)) |
361 | { |
362 | /* ResultRelInfo already built */ |
363 | Assert(dispatch->indexes[partidx] < proute->num_partitions); |
364 | rri = proute->partitions[dispatch->indexes[partidx]]; |
365 | } |
366 | else |
367 | { |
368 | bool found = false; |
369 | |
370 | /* |
371 | * We have not yet set up a ResultRelInfo for this partition, |
372 | * but if we have a subplan hash table, we might have one |
373 | * there. If not, we'll have to create one. |
374 | */ |
375 | if (proute->subplan_resultrel_htab) |
376 | { |
377 | Oid partoid = partdesc->oids[partidx]; |
378 | SubplanResultRelHashElem *elem; |
379 | |
380 | elem = hash_search(proute->subplan_resultrel_htab, |
381 | &partoid, HASH_FIND, NULL); |
382 | if (elem) |
383 | { |
384 | found = true; |
385 | rri = elem->rri; |
386 | |
387 | /* Verify this ResultRelInfo allows INSERTs */ |
388 | CheckValidResultRel(rri, CMD_INSERT); |
389 | |
390 | /* Set up the PartitionRoutingInfo for it */ |
391 | ExecInitRoutingInfo(mtstate, estate, proute, dispatch, |
392 | rri, partidx); |
393 | } |
394 | } |
395 | |
396 | /* We need to create a new one. */ |
397 | if (!found) |
398 | rri = ExecInitPartitionInfo(mtstate, estate, proute, |
399 | dispatch, |
400 | rootResultRelInfo, partidx); |
401 | } |
402 | |
403 | /* Release the tuple in the lowest parent's dedicated slot. */ |
404 | if (slot == myslot) |
405 | ExecClearTuple(myslot); |
406 | |
407 | MemoryContextSwitchTo(oldcxt); |
408 | ecxt->ecxt_scantuple = ecxt_scantuple_old; |
409 | return rri; |
410 | } |
411 | else |
412 | { |
413 | /* |
414 | * Partition is a sub-partitioned table; get the PartitionDispatch |
415 | */ |
416 | if (likely(dispatch->indexes[partidx] >= 0)) |
417 | { |
418 | /* Already built. */ |
419 | Assert(dispatch->indexes[partidx] < proute->num_dispatch); |
420 | |
421 | /* |
422 | * Move down to the next partition level and search again |
423 | * until we find a leaf partition that matches this tuple |
424 | */ |
425 | dispatch = pd[dispatch->indexes[partidx]]; |
426 | } |
427 | else |
428 | { |
429 | /* Not yet built. Do that now. */ |
430 | PartitionDispatch subdispatch; |
431 | |
432 | /* |
433 | * Create the new PartitionDispatch. We pass the current one |
434 | * in as the parent PartitionDispatch |
435 | */ |
436 | subdispatch = ExecInitPartitionDispatchInfo(mtstate->ps.state, |
437 | proute, |
438 | partdesc->oids[partidx], |
439 | dispatch, partidx); |
440 | Assert(dispatch->indexes[partidx] >= 0 && |
441 | dispatch->indexes[partidx] < proute->num_dispatch); |
442 | dispatch = subdispatch; |
443 | } |
444 | } |
445 | } |
446 | } |
447 | |
448 | /* |
449 | * ExecHashSubPlanResultRelsByOid |
450 | * Build a hash table to allow fast lookups of subplan ResultRelInfos by |
451 | * partition Oid. We also populate the subplan ResultRelInfo with an |
452 | * ri_PartitionRoot. |
453 | */ |
454 | static void |
455 | ExecHashSubPlanResultRelsByOid(ModifyTableState *mtstate, |
456 | PartitionTupleRouting *proute) |
457 | { |
458 | HASHCTL ctl; |
459 | HTAB *htab; |
460 | int i; |
461 | |
462 | memset(&ctl, 0, sizeof(ctl)); |
463 | ctl.keysize = sizeof(Oid); |
464 | ctl.entrysize = sizeof(SubplanResultRelHashElem); |
465 | ctl.hcxt = CurrentMemoryContext; |
466 | |
467 | htab = hash_create("PartitionTupleRouting table" , mtstate->mt_nplans, |
468 | &ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); |
469 | proute->subplan_resultrel_htab = htab; |
470 | |
471 | /* Hash all subplans by their Oid */ |
472 | for (i = 0; i < mtstate->mt_nplans; i++) |
473 | { |
474 | ResultRelInfo *rri = &mtstate->resultRelInfo[i]; |
475 | bool found; |
476 | Oid partoid = RelationGetRelid(rri->ri_RelationDesc); |
477 | SubplanResultRelHashElem *elem; |
478 | |
479 | elem = (SubplanResultRelHashElem *) |
480 | hash_search(htab, &partoid, HASH_ENTER, &found); |
481 | Assert(!found); |
482 | elem->rri = rri; |
483 | |
484 | /* |
485 | * This is required in order to convert the partition's tuple to be |
486 | * compatible with the root partitioned table's tuple descriptor. When |
487 | * generating the per-subplan result rels, this was not set. |
488 | */ |
489 | rri->ri_PartitionRoot = proute->partition_root; |
490 | } |
491 | } |
492 | |
493 | /* |
494 | * ExecInitPartitionInfo |
495 | * Lock the partition and initialize ResultRelInfo. Also setup other |
496 | * information for the partition and store it in the next empty slot in |
497 | * the proute->partitions array. |
498 | * |
499 | * Returns the ResultRelInfo |
500 | */ |
501 | static ResultRelInfo * |
502 | ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, |
503 | PartitionTupleRouting *proute, |
504 | PartitionDispatch dispatch, |
505 | ResultRelInfo *rootResultRelInfo, |
506 | int partidx) |
507 | { |
508 | ModifyTable *node = (ModifyTable *) mtstate->ps.plan; |
509 | Relation rootrel = rootResultRelInfo->ri_RelationDesc, |
510 | partrel; |
511 | Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; |
512 | ResultRelInfo *leaf_part_rri; |
513 | MemoryContext oldcxt; |
514 | AttrNumber *part_attnos = NULL; |
515 | bool found_whole_row; |
516 | |
517 | oldcxt = MemoryContextSwitchTo(proute->memcxt); |
518 | |
519 | partrel = table_open(dispatch->partdesc->oids[partidx], RowExclusiveLock); |
520 | |
521 | leaf_part_rri = makeNode(ResultRelInfo); |
522 | InitResultRelInfo(leaf_part_rri, |
523 | partrel, |
524 | node ? node->rootRelation : 1, |
525 | rootrel, |
526 | estate->es_instrument); |
527 | |
528 | /* |
529 | * Verify result relation is a valid target for an INSERT. An UPDATE of a |
530 | * partition-key becomes a DELETE+INSERT operation, so this check is still |
531 | * required when the operation is CMD_UPDATE. |
532 | */ |
533 | CheckValidResultRel(leaf_part_rri, CMD_INSERT); |
534 | |
535 | /* |
536 | * Open partition indices. The user may have asked to check for conflicts |
537 | * within this leaf partition and do "nothing" instead of throwing an |
538 | * error. Be prepared in that case by initializing the index information |
539 | * needed by ExecInsert() to perform speculative insertions. |
540 | */ |
541 | if (partrel->rd_rel->relhasindex && |
542 | leaf_part_rri->ri_IndexRelationDescs == NULL) |
543 | ExecOpenIndices(leaf_part_rri, |
544 | (node != NULL && |
545 | node->onConflictAction != ONCONFLICT_NONE)); |
546 | |
547 | /* |
548 | * Build WITH CHECK OPTION constraints for the partition. Note that we |
549 | * didn't build the withCheckOptionList for partitions within the planner, |
550 | * but simple translation of varattnos will suffice. This only occurs for |
551 | * the INSERT case or in the case of UPDATE tuple routing where we didn't |
552 | * find a result rel to reuse in ExecSetupPartitionTupleRouting(). |
553 | */ |
554 | if (node && node->withCheckOptionLists != NIL) |
555 | { |
556 | List *wcoList; |
557 | List *wcoExprs = NIL; |
558 | ListCell *ll; |
559 | int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; |
560 | |
561 | /* |
562 | * In the case of INSERT on a partitioned table, there is only one |
563 | * plan. Likewise, there is only one WCO list, not one per partition. |
564 | * For UPDATE, there are as many WCO lists as there are plans. |
565 | */ |
566 | Assert((node->operation == CMD_INSERT && |
567 | list_length(node->withCheckOptionLists) == 1 && |
568 | list_length(node->plans) == 1) || |
569 | (node->operation == CMD_UPDATE && |
570 | list_length(node->withCheckOptionLists) == |
571 | list_length(node->plans))); |
572 | |
573 | /* |
574 | * Use the WCO list of the first plan as a reference to calculate |
575 | * attno's for the WCO list of this partition. In the INSERT case, |
576 | * that refers to the root partitioned table, whereas in the UPDATE |
577 | * tuple routing case, that refers to the first partition in the |
578 | * mtstate->resultRelInfo array. In any case, both that relation and |
579 | * this partition should have the same columns, so we should be able |
580 | * to map attributes successfully. |
581 | */ |
582 | wcoList = linitial(node->withCheckOptionLists); |
583 | |
584 | /* |
585 | * Convert Vars in it to contain this partition's attribute numbers. |
586 | */ |
587 | part_attnos = |
588 | convert_tuples_by_name_map(RelationGetDescr(partrel), |
589 | RelationGetDescr(firstResultRel), |
590 | gettext_noop("could not convert row type" )); |
591 | wcoList = (List *) |
592 | map_variable_attnos((Node *) wcoList, |
593 | firstVarno, 0, |
594 | part_attnos, |
595 | RelationGetDescr(firstResultRel)->natts, |
596 | RelationGetForm(partrel)->reltype, |
597 | &found_whole_row); |
598 | /* We ignore the value of found_whole_row. */ |
599 | |
600 | foreach(ll, wcoList) |
601 | { |
602 | WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll)); |
603 | ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual), |
604 | &mtstate->ps); |
605 | |
606 | wcoExprs = lappend(wcoExprs, wcoExpr); |
607 | } |
608 | |
609 | leaf_part_rri->ri_WithCheckOptions = wcoList; |
610 | leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs; |
611 | } |
612 | |
613 | /* |
614 | * Build the RETURNING projection for the partition. Note that we didn't |
615 | * build the returningList for partitions within the planner, but simple |
616 | * translation of varattnos will suffice. This only occurs for the INSERT |
617 | * case or in the case of UPDATE tuple routing where we didn't find a |
618 | * result rel to reuse in ExecSetupPartitionTupleRouting(). |
619 | */ |
620 | if (node && node->returningLists != NIL) |
621 | { |
622 | TupleTableSlot *slot; |
623 | ExprContext *econtext; |
624 | List *returningList; |
625 | int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; |
626 | |
627 | /* See the comment above for WCO lists. */ |
628 | Assert((node->operation == CMD_INSERT && |
629 | list_length(node->returningLists) == 1 && |
630 | list_length(node->plans) == 1) || |
631 | (node->operation == CMD_UPDATE && |
632 | list_length(node->returningLists) == |
633 | list_length(node->plans))); |
634 | |
635 | /* |
636 | * Use the RETURNING list of the first plan as a reference to |
637 | * calculate attno's for the RETURNING list of this partition. See |
638 | * the comment above for WCO lists for more details on why this is |
639 | * okay. |
640 | */ |
641 | returningList = linitial(node->returningLists); |
642 | |
643 | /* |
644 | * Convert Vars in it to contain this partition's attribute numbers. |
645 | */ |
646 | if (part_attnos == NULL) |
647 | part_attnos = |
648 | convert_tuples_by_name_map(RelationGetDescr(partrel), |
649 | RelationGetDescr(firstResultRel), |
650 | gettext_noop("could not convert row type" )); |
651 | returningList = (List *) |
652 | map_variable_attnos((Node *) returningList, |
653 | firstVarno, 0, |
654 | part_attnos, |
655 | RelationGetDescr(firstResultRel)->natts, |
656 | RelationGetForm(partrel)->reltype, |
657 | &found_whole_row); |
658 | /* We ignore the value of found_whole_row. */ |
659 | |
660 | leaf_part_rri->ri_returningList = returningList; |
661 | |
662 | /* |
663 | * Initialize the projection itself. |
664 | * |
665 | * Use the slot and the expression context that would have been set up |
666 | * in ExecInitModifyTable() for projection's output. |
667 | */ |
668 | Assert(mtstate->ps.ps_ResultTupleSlot != NULL); |
669 | slot = mtstate->ps.ps_ResultTupleSlot; |
670 | Assert(mtstate->ps.ps_ExprContext != NULL); |
671 | econtext = mtstate->ps.ps_ExprContext; |
672 | leaf_part_rri->ri_projectReturning = |
673 | ExecBuildProjectionInfo(returningList, econtext, slot, |
674 | &mtstate->ps, RelationGetDescr(partrel)); |
675 | } |
676 | |
677 | /* Set up information needed for routing tuples to the partition. */ |
678 | ExecInitRoutingInfo(mtstate, estate, proute, dispatch, |
679 | leaf_part_rri, partidx); |
680 | |
681 | /* |
682 | * If there is an ON CONFLICT clause, initialize state for it. |
683 | */ |
684 | if (node && node->onConflictAction != ONCONFLICT_NONE) |
685 | { |
686 | int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; |
687 | TupleDesc partrelDesc = RelationGetDescr(partrel); |
688 | ExprContext *econtext = mtstate->ps.ps_ExprContext; |
689 | ListCell *lc; |
690 | List *arbiterIndexes = NIL; |
691 | |
692 | /* |
693 | * If there is a list of arbiter indexes, map it to a list of indexes |
694 | * in the partition. We do that by scanning the partition's index |
695 | * list and searching for ancestry relationships to each index in the |
696 | * ancestor table. |
697 | */ |
698 | if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) > 0) |
699 | { |
700 | List *childIdxs; |
701 | |
702 | childIdxs = RelationGetIndexList(leaf_part_rri->ri_RelationDesc); |
703 | |
704 | foreach(lc, childIdxs) |
705 | { |
706 | Oid childIdx = lfirst_oid(lc); |
707 | List *ancestors; |
708 | ListCell *lc2; |
709 | |
710 | ancestors = get_partition_ancestors(childIdx); |
711 | foreach(lc2, rootResultRelInfo->ri_onConflictArbiterIndexes) |
712 | { |
713 | if (list_member_oid(ancestors, lfirst_oid(lc2))) |
714 | arbiterIndexes = lappend_oid(arbiterIndexes, childIdx); |
715 | } |
716 | list_free(ancestors); |
717 | } |
718 | } |
719 | |
720 | /* |
721 | * If the resulting lists are of inequal length, something is wrong. |
722 | * (This shouldn't happen, since arbiter index selection should not |
723 | * pick up an invalid index.) |
724 | */ |
725 | if (list_length(rootResultRelInfo->ri_onConflictArbiterIndexes) != |
726 | list_length(arbiterIndexes)) |
727 | elog(ERROR, "invalid arbiter index list" ); |
728 | leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes; |
729 | |
730 | /* |
731 | * In the DO UPDATE case, we have some more state to initialize. |
732 | */ |
733 | if (node->onConflictAction == ONCONFLICT_UPDATE) |
734 | { |
735 | TupleConversionMap *map; |
736 | |
737 | map = leaf_part_rri->ri_PartitionInfo->pi_RootToPartitionMap; |
738 | |
739 | Assert(node->onConflictSet != NIL); |
740 | Assert(rootResultRelInfo->ri_onConflict != NULL); |
741 | |
742 | leaf_part_rri->ri_onConflict = makeNode(OnConflictSetState); |
743 | |
744 | /* |
745 | * Need a separate existing slot for each partition, as the |
746 | * partition could be of a different AM, even if the tuple |
747 | * descriptors match. |
748 | */ |
749 | leaf_part_rri->ri_onConflict->oc_Existing = |
750 | table_slot_create(leaf_part_rri->ri_RelationDesc, |
751 | &mtstate->ps.state->es_tupleTable); |
752 | |
753 | /* |
754 | * If the partition's tuple descriptor matches exactly the root |
755 | * parent (the common case), we can re-use most of the parent's ON |
756 | * CONFLICT SET state, skipping a bunch of work. Otherwise, we |
757 | * need to create state specific to this partition. |
758 | */ |
759 | if (map == NULL) |
760 | { |
761 | /* |
762 | * It's safe to reuse these from the partition root, as we |
763 | * only process one tuple at a time (therefore we won't |
764 | * overwrite needed data in slots), and the results of |
765 | * projections are independent of the underlying storage. |
766 | * Projections and where clauses themselves don't store state |
767 | * / are independent of the underlying storage. |
768 | */ |
769 | leaf_part_rri->ri_onConflict->oc_ProjSlot = |
770 | rootResultRelInfo->ri_onConflict->oc_ProjSlot; |
771 | leaf_part_rri->ri_onConflict->oc_ProjInfo = |
772 | rootResultRelInfo->ri_onConflict->oc_ProjInfo; |
773 | leaf_part_rri->ri_onConflict->oc_WhereClause = |
774 | rootResultRelInfo->ri_onConflict->oc_WhereClause; |
775 | } |
776 | else |
777 | { |
778 | List *onconflset; |
779 | TupleDesc tupDesc; |
780 | bool found_whole_row; |
781 | |
782 | /* |
783 | * Translate expressions in onConflictSet to account for |
784 | * different attribute numbers. For that, map partition |
785 | * varattnos twice: first to catch the EXCLUDED |
786 | * pseudo-relation (INNER_VAR), and second to handle the main |
787 | * target relation (firstVarno). |
788 | */ |
789 | onconflset = (List *) copyObject((Node *) node->onConflictSet); |
790 | if (part_attnos == NULL) |
791 | part_attnos = |
792 | convert_tuples_by_name_map(RelationGetDescr(partrel), |
793 | RelationGetDescr(firstResultRel), |
794 | gettext_noop("could not convert row type" )); |
795 | onconflset = (List *) |
796 | map_variable_attnos((Node *) onconflset, |
797 | INNER_VAR, 0, |
798 | part_attnos, |
799 | RelationGetDescr(firstResultRel)->natts, |
800 | RelationGetForm(partrel)->reltype, |
801 | &found_whole_row); |
802 | /* We ignore the value of found_whole_row. */ |
803 | onconflset = (List *) |
804 | map_variable_attnos((Node *) onconflset, |
805 | firstVarno, 0, |
806 | part_attnos, |
807 | RelationGetDescr(firstResultRel)->natts, |
808 | RelationGetForm(partrel)->reltype, |
809 | &found_whole_row); |
810 | /* We ignore the value of found_whole_row. */ |
811 | |
812 | /* Finally, adjust this tlist to match the partition. */ |
813 | onconflset = adjust_partition_tlist(onconflset, map); |
814 | |
815 | /* create the tuple slot for the UPDATE SET projection */ |
816 | tupDesc = ExecTypeFromTL(onconflset); |
817 | leaf_part_rri->ri_onConflict->oc_ProjSlot = |
818 | ExecInitExtraTupleSlot(mtstate->ps.state, tupDesc, |
819 | &TTSOpsVirtual); |
820 | |
821 | /* build UPDATE SET projection state */ |
822 | leaf_part_rri->ri_onConflict->oc_ProjInfo = |
823 | ExecBuildProjectionInfo(onconflset, econtext, |
824 | leaf_part_rri->ri_onConflict->oc_ProjSlot, |
825 | &mtstate->ps, partrelDesc); |
826 | |
827 | /* |
828 | * If there is a WHERE clause, initialize state where it will |
829 | * be evaluated, mapping the attribute numbers appropriately. |
830 | * As with onConflictSet, we need to map partition varattnos |
831 | * to the partition's tupdesc. |
832 | */ |
833 | if (node->onConflictWhere) |
834 | { |
835 | List *clause; |
836 | |
837 | clause = copyObject((List *) node->onConflictWhere); |
838 | clause = (List *) |
839 | map_variable_attnos((Node *) clause, |
840 | INNER_VAR, 0, |
841 | part_attnos, |
842 | RelationGetDescr(firstResultRel)->natts, |
843 | RelationGetForm(partrel)->reltype, |
844 | &found_whole_row); |
845 | /* We ignore the value of found_whole_row. */ |
846 | clause = (List *) |
847 | map_variable_attnos((Node *) clause, |
848 | firstVarno, 0, |
849 | part_attnos, |
850 | RelationGetDescr(firstResultRel)->natts, |
851 | RelationGetForm(partrel)->reltype, |
852 | &found_whole_row); |
853 | /* We ignore the value of found_whole_row. */ |
854 | leaf_part_rri->ri_onConflict->oc_WhereClause = |
855 | ExecInitQual((List *) clause, &mtstate->ps); |
856 | } |
857 | } |
858 | } |
859 | } |
860 | |
861 | /* |
862 | * Since we've just initialized this ResultRelInfo, it's not in any list |
863 | * attached to the estate as yet. Add it, so that it can be found later. |
864 | * |
865 | * Note that the entries in this list appear in no predetermined order, |
866 | * because partition result rels are initialized as and when they're |
867 | * needed. |
868 | */ |
869 | MemoryContextSwitchTo(estate->es_query_cxt); |
870 | estate->es_tuple_routing_result_relations = |
871 | lappend(estate->es_tuple_routing_result_relations, |
872 | leaf_part_rri); |
873 | |
874 | MemoryContextSwitchTo(oldcxt); |
875 | |
876 | return leaf_part_rri; |
877 | } |
878 | |
879 | /* |
880 | * ExecInitRoutingInfo |
881 | * Set up information needed for translating tuples between root |
882 | * partitioned table format and partition format, and keep track of it |
883 | * in PartitionTupleRouting. |
884 | */ |
885 | static void |
886 | ExecInitRoutingInfo(ModifyTableState *mtstate, |
887 | EState *estate, |
888 | PartitionTupleRouting *proute, |
889 | PartitionDispatch dispatch, |
890 | ResultRelInfo *partRelInfo, |
891 | int partidx) |
892 | { |
893 | MemoryContext oldcxt; |
894 | PartitionRoutingInfo *partrouteinfo; |
895 | int rri_index; |
896 | |
897 | oldcxt = MemoryContextSwitchTo(proute->memcxt); |
898 | |
899 | partrouteinfo = palloc(sizeof(PartitionRoutingInfo)); |
900 | |
901 | /* |
902 | * Set up a tuple conversion map to convert a tuple routed to the |
903 | * partition from the parent's type to the partition's. |
904 | */ |
905 | partrouteinfo->pi_RootToPartitionMap = |
906 | convert_tuples_by_name(RelationGetDescr(partRelInfo->ri_PartitionRoot), |
907 | RelationGetDescr(partRelInfo->ri_RelationDesc), |
908 | gettext_noop("could not convert row type" )); |
909 | |
910 | /* |
911 | * If a partition has a different rowtype than the root parent, initialize |
912 | * a slot dedicated to storing this partition's tuples. The slot is used |
913 | * for various operations that are applied to tuples after routing, such |
914 | * as checking constraints. |
915 | */ |
916 | if (partrouteinfo->pi_RootToPartitionMap != NULL) |
917 | { |
918 | Relation partrel = partRelInfo->ri_RelationDesc; |
919 | |
920 | /* |
921 | * Initialize the slot itself setting its descriptor to this |
922 | * partition's TupleDesc; TupleDesc reference will be released at the |
923 | * end of the command. |
924 | */ |
925 | partrouteinfo->pi_PartitionTupleSlot = |
926 | table_slot_create(partrel, &estate->es_tupleTable); |
927 | } |
928 | else |
929 | partrouteinfo->pi_PartitionTupleSlot = NULL; |
930 | |
931 | /* |
932 | * Also, if transition capture is required, store a map to convert tuples |
933 | * from partition's rowtype to the root partition table's. |
934 | */ |
935 | if (mtstate && |
936 | (mtstate->mt_transition_capture || mtstate->mt_oc_transition_capture)) |
937 | { |
938 | partrouteinfo->pi_PartitionToRootMap = |
939 | convert_tuples_by_name(RelationGetDescr(partRelInfo->ri_RelationDesc), |
940 | RelationGetDescr(partRelInfo->ri_PartitionRoot), |
941 | gettext_noop("could not convert row type" )); |
942 | } |
943 | else |
944 | partrouteinfo->pi_PartitionToRootMap = NULL; |
945 | |
946 | /* |
947 | * If the partition is a foreign table, let the FDW init itself for |
948 | * routing tuples to the partition. |
949 | */ |
950 | if (partRelInfo->ri_FdwRoutine != NULL && |
951 | partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL) |
952 | partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo); |
953 | |
954 | partRelInfo->ri_PartitionInfo = partrouteinfo; |
955 | partRelInfo->ri_CopyMultiInsertBuffer = NULL; |
956 | |
957 | /* |
958 | * Keep track of it in the PartitionTupleRouting->partitions array. |
959 | */ |
960 | Assert(dispatch->indexes[partidx] == -1); |
961 | |
962 | rri_index = proute->num_partitions++; |
963 | |
964 | /* Allocate or enlarge the array, as needed */ |
965 | if (proute->num_partitions >= proute->max_partitions) |
966 | { |
967 | if (proute->max_partitions == 0) |
968 | { |
969 | proute->max_partitions = 8; |
970 | proute->partitions = (ResultRelInfo **) |
971 | palloc(sizeof(ResultRelInfo *) * proute->max_partitions); |
972 | } |
973 | else |
974 | { |
975 | proute->max_partitions *= 2; |
976 | proute->partitions = (ResultRelInfo **) |
977 | repalloc(proute->partitions, sizeof(ResultRelInfo *) * |
978 | proute->max_partitions); |
979 | } |
980 | } |
981 | |
982 | proute->partitions[rri_index] = partRelInfo; |
983 | dispatch->indexes[partidx] = rri_index; |
984 | |
985 | MemoryContextSwitchTo(oldcxt); |
986 | } |
987 | |
988 | /* |
989 | * ExecInitPartitionDispatchInfo |
990 | * Lock the partitioned table (if not locked already) and initialize |
991 | * PartitionDispatch for a partitioned table and store it in the next |
992 | * available slot in the proute->partition_dispatch_info array. Also, |
993 | * record the index into this array in the parent_pd->indexes[] array in |
994 | * the partidx element so that we can properly retrieve the newly created |
995 | * PartitionDispatch later. |
996 | */ |
997 | static PartitionDispatch |
998 | ExecInitPartitionDispatchInfo(EState *estate, |
999 | PartitionTupleRouting *proute, Oid partoid, |
1000 | PartitionDispatch parent_pd, int partidx) |
1001 | { |
1002 | Relation rel; |
1003 | PartitionDesc partdesc; |
1004 | PartitionDispatch pd; |
1005 | int dispatchidx; |
1006 | MemoryContext oldcxt; |
1007 | |
1008 | if (estate->es_partition_directory == NULL) |
1009 | estate->es_partition_directory = |
1010 | CreatePartitionDirectory(estate->es_query_cxt); |
1011 | |
1012 | oldcxt = MemoryContextSwitchTo(proute->memcxt); |
1013 | |
1014 | /* |
1015 | * Only sub-partitioned tables need to be locked here. The root |
1016 | * partitioned table will already have been locked as it's referenced in |
1017 | * the query's rtable. |
1018 | */ |
1019 | if (partoid != RelationGetRelid(proute->partition_root)) |
1020 | rel = table_open(partoid, RowExclusiveLock); |
1021 | else |
1022 | rel = proute->partition_root; |
1023 | partdesc = PartitionDirectoryLookup(estate->es_partition_directory, rel); |
1024 | |
1025 | pd = (PartitionDispatch) palloc(offsetof(PartitionDispatchData, indexes) + |
1026 | partdesc->nparts * sizeof(int)); |
1027 | pd->reldesc = rel; |
1028 | pd->key = RelationGetPartitionKey(rel); |
1029 | pd->keystate = NIL; |
1030 | pd->partdesc = partdesc; |
1031 | if (parent_pd != NULL) |
1032 | { |
1033 | TupleDesc tupdesc = RelationGetDescr(rel); |
1034 | |
1035 | /* |
1036 | * For sub-partitioned tables where the column order differs from its |
1037 | * direct parent partitioned table, we must store a tuple table slot |
1038 | * initialized with its tuple descriptor and a tuple conversion map to |
1039 | * convert a tuple from its parent's rowtype to its own. This is to |
1040 | * make sure that we are looking at the correct row using the correct |
1041 | * tuple descriptor when computing its partition key for tuple |
1042 | * routing. |
1043 | */ |
1044 | pd->tupmap = convert_tuples_by_name_map_if_req(RelationGetDescr(parent_pd->reldesc), |
1045 | tupdesc, |
1046 | gettext_noop("could not convert row type" )); |
1047 | pd->tupslot = pd->tupmap ? |
1048 | MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual) : NULL; |
1049 | } |
1050 | else |
1051 | { |
1052 | /* Not required for the root partitioned table */ |
1053 | pd->tupmap = NULL; |
1054 | pd->tupslot = NULL; |
1055 | } |
1056 | |
1057 | /* |
1058 | * Initialize with -1 to signify that the corresponding partition's |
1059 | * ResultRelInfo or PartitionDispatch has not been created yet. |
1060 | */ |
1061 | memset(pd->indexes, -1, sizeof(int) * partdesc->nparts); |
1062 | |
1063 | /* Track in PartitionTupleRouting for later use */ |
1064 | dispatchidx = proute->num_dispatch++; |
1065 | |
1066 | /* Allocate or enlarge the array, as needed */ |
1067 | if (proute->num_dispatch >= proute->max_dispatch) |
1068 | { |
1069 | if (proute->max_dispatch == 0) |
1070 | { |
1071 | proute->max_dispatch = 4; |
1072 | proute->partition_dispatch_info = (PartitionDispatch *) |
1073 | palloc(sizeof(PartitionDispatch) * proute->max_dispatch); |
1074 | } |
1075 | else |
1076 | { |
1077 | proute->max_dispatch *= 2; |
1078 | proute->partition_dispatch_info = (PartitionDispatch *) |
1079 | repalloc(proute->partition_dispatch_info, |
1080 | sizeof(PartitionDispatch) * proute->max_dispatch); |
1081 | } |
1082 | } |
1083 | proute->partition_dispatch_info[dispatchidx] = pd; |
1084 | |
1085 | /* |
1086 | * Finally, if setting up a PartitionDispatch for a sub-partitioned table, |
1087 | * install a downlink in the parent to allow quick descent. |
1088 | */ |
1089 | if (parent_pd) |
1090 | { |
1091 | Assert(parent_pd->indexes[partidx] == -1); |
1092 | parent_pd->indexes[partidx] = dispatchidx; |
1093 | } |
1094 | |
1095 | MemoryContextSwitchTo(oldcxt); |
1096 | |
1097 | return pd; |
1098 | } |
1099 | |
1100 | /* |
1101 | * ExecCleanupTupleRouting -- Clean up objects allocated for partition tuple |
1102 | * routing. |
1103 | * |
1104 | * Close all the partitioned tables, leaf partitions, and their indices. |
1105 | */ |
1106 | void |
1107 | ExecCleanupTupleRouting(ModifyTableState *mtstate, |
1108 | PartitionTupleRouting *proute) |
1109 | { |
1110 | HTAB *htab = proute->subplan_resultrel_htab; |
1111 | int i; |
1112 | |
1113 | /* |
1114 | * Remember, proute->partition_dispatch_info[0] corresponds to the root |
1115 | * partitioned table, which we must not try to close, because it is the |
1116 | * main target table of the query that will be closed by callers such as |
1117 | * ExecEndPlan() or DoCopy(). Also, tupslot is NULL for the root |
1118 | * partitioned table. |
1119 | */ |
1120 | for (i = 1; i < proute->num_dispatch; i++) |
1121 | { |
1122 | PartitionDispatch pd = proute->partition_dispatch_info[i]; |
1123 | |
1124 | table_close(pd->reldesc, NoLock); |
1125 | |
1126 | if (pd->tupslot) |
1127 | ExecDropSingleTupleTableSlot(pd->tupslot); |
1128 | } |
1129 | |
1130 | for (i = 0; i < proute->num_partitions; i++) |
1131 | { |
1132 | ResultRelInfo *resultRelInfo = proute->partitions[i]; |
1133 | |
1134 | /* Allow any FDWs to shut down */ |
1135 | if (resultRelInfo->ri_FdwRoutine != NULL && |
1136 | resultRelInfo->ri_FdwRoutine->EndForeignInsert != NULL) |
1137 | resultRelInfo->ri_FdwRoutine->EndForeignInsert(mtstate->ps.state, |
1138 | resultRelInfo); |
1139 | |
1140 | /* |
1141 | * Check if this result rel is one belonging to the node's subplans, |
1142 | * if so, let ExecEndPlan() clean it up. |
1143 | */ |
1144 | if (htab) |
1145 | { |
1146 | Oid partoid; |
1147 | bool found; |
1148 | |
1149 | partoid = RelationGetRelid(resultRelInfo->ri_RelationDesc); |
1150 | |
1151 | (void) hash_search(htab, &partoid, HASH_FIND, &found); |
1152 | if (found) |
1153 | continue; |
1154 | } |
1155 | |
1156 | ExecCloseIndices(resultRelInfo); |
1157 | table_close(resultRelInfo->ri_RelationDesc, NoLock); |
1158 | } |
1159 | } |
1160 | |
1161 | /* ---------------- |
1162 | * FormPartitionKeyDatum |
1163 | * Construct values[] and isnull[] arrays for the partition key |
1164 | * of a tuple. |
1165 | * |
1166 | * pd Partition dispatch object of the partitioned table |
1167 | * slot Heap tuple from which to extract partition key |
1168 | * estate executor state for evaluating any partition key |
1169 | * expressions (must be non-NULL) |
1170 | * values Array of partition key Datums (output area) |
1171 | * isnull Array of is-null indicators (output area) |
1172 | * |
1173 | * the ecxt_scantuple slot of estate's per-tuple expr context must point to |
1174 | * the heap tuple passed in. |
1175 | * ---------------- |
1176 | */ |
1177 | static void |
1178 | FormPartitionKeyDatum(PartitionDispatch pd, |
1179 | TupleTableSlot *slot, |
1180 | EState *estate, |
1181 | Datum *values, |
1182 | bool *isnull) |
1183 | { |
1184 | ListCell *partexpr_item; |
1185 | int i; |
1186 | |
1187 | if (pd->key->partexprs != NIL && pd->keystate == NIL) |
1188 | { |
1189 | /* Check caller has set up context correctly */ |
1190 | Assert(estate != NULL && |
1191 | GetPerTupleExprContext(estate)->ecxt_scantuple == slot); |
1192 | |
1193 | /* First time through, set up expression evaluation state */ |
1194 | pd->keystate = ExecPrepareExprList(pd->key->partexprs, estate); |
1195 | } |
1196 | |
1197 | partexpr_item = list_head(pd->keystate); |
1198 | for (i = 0; i < pd->key->partnatts; i++) |
1199 | { |
1200 | AttrNumber keycol = pd->key->partattrs[i]; |
1201 | Datum datum; |
1202 | bool isNull; |
1203 | |
1204 | if (keycol != 0) |
1205 | { |
1206 | /* Plain column; get the value directly from the heap tuple */ |
1207 | datum = slot_getattr(slot, keycol, &isNull); |
1208 | } |
1209 | else |
1210 | { |
1211 | /* Expression; need to evaluate it */ |
1212 | if (partexpr_item == NULL) |
1213 | elog(ERROR, "wrong number of partition key expressions" ); |
1214 | datum = ExecEvalExprSwitchContext((ExprState *) lfirst(partexpr_item), |
1215 | GetPerTupleExprContext(estate), |
1216 | &isNull); |
1217 | partexpr_item = lnext(partexpr_item); |
1218 | } |
1219 | values[i] = datum; |
1220 | isnull[i] = isNull; |
1221 | } |
1222 | |
1223 | if (partexpr_item != NULL) |
1224 | elog(ERROR, "wrong number of partition key expressions" ); |
1225 | } |
1226 | |
1227 | /* |
1228 | * get_partition_for_tuple |
1229 | * Finds partition of relation which accepts the partition key specified |
1230 | * in values and isnull |
1231 | * |
1232 | * Return value is index of the partition (>= 0 and < partdesc->nparts) if one |
1233 | * found or -1 if none found. |
1234 | */ |
1235 | static int |
1236 | get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull) |
1237 | { |
1238 | int bound_offset; |
1239 | int part_index = -1; |
1240 | PartitionKey key = pd->key; |
1241 | PartitionDesc partdesc = pd->partdesc; |
1242 | PartitionBoundInfo boundinfo = partdesc->boundinfo; |
1243 | |
1244 | /* Route as appropriate based on partitioning strategy. */ |
1245 | switch (key->strategy) |
1246 | { |
1247 | case PARTITION_STRATEGY_HASH: |
1248 | { |
1249 | int greatest_modulus; |
1250 | uint64 rowHash; |
1251 | |
1252 | greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); |
1253 | rowHash = compute_partition_hash_value(key->partnatts, |
1254 | key->partsupfunc, |
1255 | key->partcollation, |
1256 | values, isnull); |
1257 | |
1258 | part_index = boundinfo->indexes[rowHash % greatest_modulus]; |
1259 | } |
1260 | break; |
1261 | |
1262 | case PARTITION_STRATEGY_LIST: |
1263 | if (isnull[0]) |
1264 | { |
1265 | if (partition_bound_accepts_nulls(boundinfo)) |
1266 | part_index = boundinfo->null_index; |
1267 | } |
1268 | else |
1269 | { |
1270 | bool equal = false; |
1271 | |
1272 | bound_offset = partition_list_bsearch(key->partsupfunc, |
1273 | key->partcollation, |
1274 | boundinfo, |
1275 | values[0], &equal); |
1276 | if (bound_offset >= 0 && equal) |
1277 | part_index = boundinfo->indexes[bound_offset]; |
1278 | } |
1279 | break; |
1280 | |
1281 | case PARTITION_STRATEGY_RANGE: |
1282 | { |
1283 | bool equal = false, |
1284 | range_partkey_has_null = false; |
1285 | int i; |
1286 | |
1287 | /* |
1288 | * No range includes NULL, so this will be accepted by the |
1289 | * default partition if there is one, and otherwise rejected. |
1290 | */ |
1291 | for (i = 0; i < key->partnatts; i++) |
1292 | { |
1293 | if (isnull[i]) |
1294 | { |
1295 | range_partkey_has_null = true; |
1296 | break; |
1297 | } |
1298 | } |
1299 | |
1300 | if (!range_partkey_has_null) |
1301 | { |
1302 | bound_offset = partition_range_datum_bsearch(key->partsupfunc, |
1303 | key->partcollation, |
1304 | boundinfo, |
1305 | key->partnatts, |
1306 | values, |
1307 | &equal); |
1308 | |
1309 | /* |
1310 | * The bound at bound_offset is less than or equal to the |
1311 | * tuple value, so the bound at offset+1 is the upper |
1312 | * bound of the partition we're looking for, if there |
1313 | * actually exists one. |
1314 | */ |
1315 | part_index = boundinfo->indexes[bound_offset + 1]; |
1316 | } |
1317 | } |
1318 | break; |
1319 | |
1320 | default: |
1321 | elog(ERROR, "unexpected partition strategy: %d" , |
1322 | (int) key->strategy); |
1323 | } |
1324 | |
1325 | /* |
1326 | * part_index < 0 means we failed to find a partition of this parent. Use |
1327 | * the default partition, if there is one. |
1328 | */ |
1329 | if (part_index < 0) |
1330 | part_index = boundinfo->default_index; |
1331 | |
1332 | return part_index; |
1333 | } |
1334 | |
1335 | /* |
1336 | * ExecBuildSlotPartitionKeyDescription |
1337 | * |
1338 | * This works very much like BuildIndexValueDescription() and is currently |
1339 | * used for building error messages when ExecFindPartition() fails to find |
1340 | * partition for a row. |
1341 | */ |
1342 | static char * |
1343 | ExecBuildSlotPartitionKeyDescription(Relation rel, |
1344 | Datum *values, |
1345 | bool *isnull, |
1346 | int maxfieldlen) |
1347 | { |
1348 | StringInfoData buf; |
1349 | PartitionKey key = RelationGetPartitionKey(rel); |
1350 | int partnatts = get_partition_natts(key); |
1351 | int i; |
1352 | Oid relid = RelationGetRelid(rel); |
1353 | AclResult aclresult; |
1354 | |
1355 | if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) |
1356 | return NULL; |
1357 | |
1358 | /* If the user has table-level access, just go build the description. */ |
1359 | aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_SELECT); |
1360 | if (aclresult != ACLCHECK_OK) |
1361 | { |
1362 | /* |
1363 | * Step through the columns of the partition key and make sure the |
1364 | * user has SELECT rights on all of them. |
1365 | */ |
1366 | for (i = 0; i < partnatts; i++) |
1367 | { |
1368 | AttrNumber attnum = get_partition_col_attnum(key, i); |
1369 | |
1370 | /* |
1371 | * If this partition key column is an expression, we return no |
1372 | * detail rather than try to figure out what column(s) the |
1373 | * expression includes and if the user has SELECT rights on them. |
1374 | */ |
1375 | if (attnum == InvalidAttrNumber || |
1376 | pg_attribute_aclcheck(relid, attnum, GetUserId(), |
1377 | ACL_SELECT) != ACLCHECK_OK) |
1378 | return NULL; |
1379 | } |
1380 | } |
1381 | |
1382 | initStringInfo(&buf); |
1383 | appendStringInfo(&buf, "(%s) = (" , |
1384 | pg_get_partkeydef_columns(relid, true)); |
1385 | |
1386 | for (i = 0; i < partnatts; i++) |
1387 | { |
1388 | char *val; |
1389 | int vallen; |
1390 | |
1391 | if (isnull[i]) |
1392 | val = "null" ; |
1393 | else |
1394 | { |
1395 | Oid foutoid; |
1396 | bool typisvarlena; |
1397 | |
1398 | getTypeOutputInfo(get_partition_col_typid(key, i), |
1399 | &foutoid, &typisvarlena); |
1400 | val = OidOutputFunctionCall(foutoid, values[i]); |
1401 | } |
1402 | |
1403 | if (i > 0) |
1404 | appendStringInfoString(&buf, ", " ); |
1405 | |
1406 | /* truncate if needed */ |
1407 | vallen = strlen(val); |
1408 | if (vallen <= maxfieldlen) |
1409 | appendStringInfoString(&buf, val); |
1410 | else |
1411 | { |
1412 | vallen = pg_mbcliplen(val, vallen, maxfieldlen); |
1413 | appendBinaryStringInfo(&buf, val, vallen); |
1414 | appendStringInfoString(&buf, "..." ); |
1415 | } |
1416 | } |
1417 | |
1418 | appendStringInfoChar(&buf, ')'); |
1419 | |
1420 | return buf.data; |
1421 | } |
1422 | |
1423 | /* |
1424 | * adjust_partition_tlist |
1425 | * Adjust the targetlist entries for a given partition to account for |
1426 | * attribute differences between parent and the partition |
1427 | * |
1428 | * The expressions have already been fixed, but here we fix the list to make |
1429 | * target resnos match the partition's attribute numbers. This results in a |
1430 | * copy of the original target list in which the entries appear in resno |
1431 | * order, including both the existing entries (that may have their resno |
1432 | * changed in-place) and the newly added entries for columns that don't exist |
1433 | * in the parent. |
1434 | * |
1435 | * Scribbles on the input tlist, so callers must make sure to make a copy |
1436 | * before passing it to us. |
1437 | */ |
1438 | static List * |
1439 | adjust_partition_tlist(List *tlist, TupleConversionMap *map) |
1440 | { |
1441 | List *new_tlist = NIL; |
1442 | TupleDesc tupdesc = map->outdesc; |
1443 | AttrNumber *attrMap = map->attrMap; |
1444 | AttrNumber attrno; |
1445 | |
1446 | for (attrno = 1; attrno <= tupdesc->natts; attrno++) |
1447 | { |
1448 | Form_pg_attribute att_tup = TupleDescAttr(tupdesc, attrno - 1); |
1449 | TargetEntry *tle; |
1450 | |
1451 | if (attrMap[attrno - 1] != InvalidAttrNumber) |
1452 | { |
1453 | Assert(!att_tup->attisdropped); |
1454 | |
1455 | /* |
1456 | * Use the corresponding entry from the parent's tlist, adjusting |
1457 | * the resno the match the partition's attno. |
1458 | */ |
1459 | tle = (TargetEntry *) list_nth(tlist, attrMap[attrno - 1] - 1); |
1460 | tle->resno = attrno; |
1461 | } |
1462 | else |
1463 | { |
1464 | Const *expr; |
1465 | |
1466 | /* |
1467 | * For a dropped attribute in the partition, generate a dummy |
1468 | * entry with resno matching the partition's attno. |
1469 | */ |
1470 | Assert(att_tup->attisdropped); |
1471 | expr = makeConst(INT4OID, |
1472 | -1, |
1473 | InvalidOid, |
1474 | sizeof(int32), |
1475 | (Datum) 0, |
1476 | true, /* isnull */ |
1477 | true /* byval */ ); |
1478 | tle = makeTargetEntry((Expr *) expr, |
1479 | attrno, |
1480 | pstrdup(NameStr(att_tup->attname)), |
1481 | false); |
1482 | } |
1483 | |
1484 | new_tlist = lappend(new_tlist, tle); |
1485 | } |
1486 | |
1487 | return new_tlist; |
1488 | } |
1489 | |
1490 | /*------------------------------------------------------------------------- |
1491 | * Run-Time Partition Pruning Support. |
1492 | * |
1493 | * The following series of functions exist to support the removal of unneeded |
1494 | * subplans for queries against partitioned tables. The supporting functions |
1495 | * here are designed to work with any plan type which supports an arbitrary |
1496 | * number of subplans, e.g. Append, MergeAppend. |
1497 | * |
1498 | * When pruning involves comparison of a partition key to a constant, it's |
1499 | * done by the planner. However, if we have a comparison to a non-constant |
1500 | * but not volatile expression, that presents an opportunity for run-time |
1501 | * pruning by the executor, allowing irrelevant partitions to be skipped |
1502 | * dynamically. |
1503 | * |
1504 | * We must distinguish expressions containing PARAM_EXEC Params from |
1505 | * expressions that don't contain those. Even though a PARAM_EXEC Param is |
1506 | * considered to be a stable expression, it can change value from one plan |
1507 | * node scan to the next during query execution. Stable comparison |
1508 | * expressions that don't involve such Params allow partition pruning to be |
1509 | * done once during executor startup. Expressions that do involve such Params |
1510 | * require us to prune separately for each scan of the parent plan node. |
1511 | * |
1512 | * Note that pruning away unneeded subplans during executor startup has the |
1513 | * added benefit of not having to initialize the unneeded subplans at all. |
1514 | * |
1515 | * |
1516 | * Functions: |
1517 | * |
1518 | * ExecCreatePartitionPruneState: |
1519 | * Creates the PartitionPruneState required by each of the two pruning |
1520 | * functions. Details stored include how to map the partition index |
1521 | * returned by the partition pruning code into subplan indexes. |
1522 | * |
1523 | * ExecFindInitialMatchingSubPlans: |
1524 | * Returns indexes of matching subplans. Partition pruning is attempted |
1525 | * without any evaluation of expressions containing PARAM_EXEC Params. |
1526 | * This function must be called during executor startup for the parent |
1527 | * plan before the subplans themselves are initialized. Subplans which |
1528 | * are found not to match by this function must be removed from the |
1529 | * plan's list of subplans during execution, as this function performs a |
1530 | * remap of the partition index to subplan index map and the newly |
1531 | * created map provides indexes only for subplans which remain after |
1532 | * calling this function. |
1533 | * |
1534 | * ExecFindMatchingSubPlans: |
1535 | * Returns indexes of matching subplans after evaluating all available |
1536 | * expressions. This function can only be called during execution and |
1537 | * must be called again each time the value of a Param listed in |
1538 | * PartitionPruneState's 'execparamids' changes. |
1539 | *------------------------------------------------------------------------- |
1540 | */ |
1541 | |
1542 | /* |
1543 | * ExecCreatePartitionPruneState |
1544 | * Build the data structure required for calling |
1545 | * ExecFindInitialMatchingSubPlans and ExecFindMatchingSubPlans. |
1546 | * |
1547 | * 'planstate' is the parent plan node's execution state. |
1548 | * |
1549 | * 'partitionpruneinfo' is a PartitionPruneInfo as generated by |
1550 | * make_partition_pruneinfo. Here we build a PartitionPruneState containing a |
1551 | * PartitionPruningData for each partitioning hierarchy (i.e., each sublist of |
1552 | * partitionpruneinfo->prune_infos), each of which contains a |
1553 | * PartitionedRelPruningData for each PartitionedRelPruneInfo appearing in |
1554 | * that sublist. This two-level system is needed to keep from confusing the |
1555 | * different hierarchies when a UNION ALL contains multiple partitioned tables |
1556 | * as children. The data stored in each PartitionedRelPruningData can be |
1557 | * re-used each time we re-evaluate which partitions match the pruning steps |
1558 | * provided in each PartitionedRelPruneInfo. |
1559 | */ |
1560 | PartitionPruneState * |
1561 | ExecCreatePartitionPruneState(PlanState *planstate, |
1562 | PartitionPruneInfo *partitionpruneinfo) |
1563 | { |
1564 | EState *estate = planstate->state; |
1565 | PartitionPruneState *prunestate; |
1566 | int n_part_hierarchies; |
1567 | ListCell *lc; |
1568 | int i; |
1569 | |
1570 | if (estate->es_partition_directory == NULL) |
1571 | estate->es_partition_directory = |
1572 | CreatePartitionDirectory(estate->es_query_cxt); |
1573 | |
1574 | n_part_hierarchies = list_length(partitionpruneinfo->prune_infos); |
1575 | Assert(n_part_hierarchies > 0); |
1576 | |
1577 | /* |
1578 | * Allocate the data structure |
1579 | */ |
1580 | prunestate = (PartitionPruneState *) |
1581 | palloc(offsetof(PartitionPruneState, partprunedata) + |
1582 | sizeof(PartitionPruningData *) * n_part_hierarchies); |
1583 | |
1584 | prunestate->execparamids = NULL; |
1585 | /* other_subplans can change at runtime, so we need our own copy */ |
1586 | prunestate->other_subplans = bms_copy(partitionpruneinfo->other_subplans); |
1587 | prunestate->do_initial_prune = false; /* may be set below */ |
1588 | prunestate->do_exec_prune = false; /* may be set below */ |
1589 | prunestate->num_partprunedata = n_part_hierarchies; |
1590 | |
1591 | /* |
1592 | * Create a short-term memory context which we'll use when making calls to |
1593 | * the partition pruning functions. This avoids possible memory leaks, |
1594 | * since the pruning functions call comparison functions that aren't under |
1595 | * our control. |
1596 | */ |
1597 | prunestate->prune_context = |
1598 | AllocSetContextCreate(CurrentMemoryContext, |
1599 | "Partition Prune" , |
1600 | ALLOCSET_DEFAULT_SIZES); |
1601 | |
1602 | i = 0; |
1603 | foreach(lc, partitionpruneinfo->prune_infos) |
1604 | { |
1605 | List *partrelpruneinfos = lfirst_node(List, lc); |
1606 | int npartrelpruneinfos = list_length(partrelpruneinfos); |
1607 | PartitionPruningData *prunedata; |
1608 | ListCell *lc2; |
1609 | int j; |
1610 | |
1611 | prunedata = (PartitionPruningData *) |
1612 | palloc(offsetof(PartitionPruningData, partrelprunedata) + |
1613 | npartrelpruneinfos * sizeof(PartitionedRelPruningData)); |
1614 | prunestate->partprunedata[i] = prunedata; |
1615 | prunedata->num_partrelprunedata = npartrelpruneinfos; |
1616 | |
1617 | j = 0; |
1618 | foreach(lc2, partrelpruneinfos) |
1619 | { |
1620 | PartitionedRelPruneInfo *pinfo = lfirst_node(PartitionedRelPruneInfo, lc2); |
1621 | PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j]; |
1622 | Relation partrel; |
1623 | PartitionDesc partdesc; |
1624 | PartitionKey partkey; |
1625 | |
1626 | /* |
1627 | * We can rely on the copies of the partitioned table's partition |
1628 | * key and partition descriptor appearing in its relcache entry, |
1629 | * because that entry will be held open and locked for the |
1630 | * duration of this executor run. |
1631 | */ |
1632 | partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex); |
1633 | partkey = RelationGetPartitionKey(partrel); |
1634 | partdesc = PartitionDirectoryLookup(estate->es_partition_directory, |
1635 | partrel); |
1636 | |
1637 | /* |
1638 | * Initialize the subplan_map and subpart_map. Since detaching a |
1639 | * partition requires AccessExclusiveLock, no partitions can have |
1640 | * disappeared, nor can the bounds for any partition have changed. |
1641 | * However, new partitions may have been added. |
1642 | */ |
1643 | Assert(partdesc->nparts >= pinfo->nparts); |
1644 | pprune->nparts = partdesc->nparts; |
1645 | pprune->subplan_map = palloc(sizeof(int) * partdesc->nparts); |
1646 | if (partdesc->nparts == pinfo->nparts) |
1647 | { |
1648 | /* |
1649 | * There are no new partitions, so this is simple. We can |
1650 | * simply point to the subpart_map from the plan, but we must |
1651 | * copy the subplan_map since we may change it later. |
1652 | */ |
1653 | pprune->subpart_map = pinfo->subpart_map; |
1654 | memcpy(pprune->subplan_map, pinfo->subplan_map, |
1655 | sizeof(int) * pinfo->nparts); |
1656 | |
1657 | /* |
1658 | * Double-check that the list of unpruned relations has not |
1659 | * changed. (Pruned partitions are not in relid_map[].) |
1660 | */ |
1661 | #ifdef USE_ASSERT_CHECKING |
1662 | for (int k = 0; k < pinfo->nparts; k++) |
1663 | { |
1664 | Assert(partdesc->oids[k] == pinfo->relid_map[k] || |
1665 | pinfo->subplan_map[k] == -1); |
1666 | } |
1667 | #endif |
1668 | } |
1669 | else |
1670 | { |
1671 | int pd_idx = 0; |
1672 | int pp_idx; |
1673 | |
1674 | /* |
1675 | * Some new partitions have appeared since plan time, and |
1676 | * those are reflected in our PartitionDesc but were not |
1677 | * present in the one used to construct subplan_map and |
1678 | * subpart_map. So we must construct new and longer arrays |
1679 | * where the partitions that were originally present map to |
1680 | * the same place, and any added indexes map to -1, as if the |
1681 | * new partitions had been pruned. |
1682 | */ |
1683 | pprune->subpart_map = palloc(sizeof(int) * partdesc->nparts); |
1684 | for (pp_idx = 0; pp_idx < partdesc->nparts; ++pp_idx) |
1685 | { |
1686 | if (pinfo->relid_map[pd_idx] != partdesc->oids[pp_idx]) |
1687 | { |
1688 | pprune->subplan_map[pp_idx] = -1; |
1689 | pprune->subpart_map[pp_idx] = -1; |
1690 | } |
1691 | else |
1692 | { |
1693 | pprune->subplan_map[pp_idx] = |
1694 | pinfo->subplan_map[pd_idx]; |
1695 | pprune->subpart_map[pp_idx] = |
1696 | pinfo->subpart_map[pd_idx++]; |
1697 | } |
1698 | } |
1699 | Assert(pd_idx == pinfo->nparts); |
1700 | } |
1701 | |
1702 | /* present_parts is also subject to later modification */ |
1703 | pprune->present_parts = bms_copy(pinfo->present_parts); |
1704 | |
1705 | /* |
1706 | * Initialize pruning contexts as needed. |
1707 | */ |
1708 | pprune->initial_pruning_steps = pinfo->initial_pruning_steps; |
1709 | if (pinfo->initial_pruning_steps) |
1710 | { |
1711 | ExecInitPruningContext(&pprune->initial_context, |
1712 | pinfo->initial_pruning_steps, |
1713 | partdesc, partkey, planstate); |
1714 | /* Record whether initial pruning is needed at any level */ |
1715 | prunestate->do_initial_prune = true; |
1716 | } |
1717 | pprune->exec_pruning_steps = pinfo->exec_pruning_steps; |
1718 | if (pinfo->exec_pruning_steps) |
1719 | { |
1720 | ExecInitPruningContext(&pprune->exec_context, |
1721 | pinfo->exec_pruning_steps, |
1722 | partdesc, partkey, planstate); |
1723 | /* Record whether exec pruning is needed at any level */ |
1724 | prunestate->do_exec_prune = true; |
1725 | } |
1726 | |
1727 | /* |
1728 | * Accumulate the IDs of all PARAM_EXEC Params affecting the |
1729 | * partitioning decisions at this plan node. |
1730 | */ |
1731 | prunestate->execparamids = bms_add_members(prunestate->execparamids, |
1732 | pinfo->execparamids); |
1733 | |
1734 | j++; |
1735 | } |
1736 | i++; |
1737 | } |
1738 | |
1739 | return prunestate; |
1740 | } |
1741 | |
1742 | /* |
1743 | * Initialize a PartitionPruneContext for the given list of pruning steps. |
1744 | */ |
1745 | static void |
1746 | ExecInitPruningContext(PartitionPruneContext *context, |
1747 | List *pruning_steps, |
1748 | PartitionDesc partdesc, |
1749 | PartitionKey partkey, |
1750 | PlanState *planstate) |
1751 | { |
1752 | int n_steps; |
1753 | int partnatts; |
1754 | ListCell *lc; |
1755 | |
1756 | n_steps = list_length(pruning_steps); |
1757 | |
1758 | context->strategy = partkey->strategy; |
1759 | context->partnatts = partnatts = partkey->partnatts; |
1760 | context->nparts = partdesc->nparts; |
1761 | context->boundinfo = partdesc->boundinfo; |
1762 | context->partcollation = partkey->partcollation; |
1763 | context->partsupfunc = partkey->partsupfunc; |
1764 | |
1765 | /* We'll look up type-specific support functions as needed */ |
1766 | context->stepcmpfuncs = (FmgrInfo *) |
1767 | palloc0(sizeof(FmgrInfo) * n_steps * partnatts); |
1768 | |
1769 | context->ppccontext = CurrentMemoryContext; |
1770 | context->planstate = planstate; |
1771 | |
1772 | /* Initialize expression state for each expression we need */ |
1773 | context->exprstates = (ExprState **) |
1774 | palloc0(sizeof(ExprState *) * n_steps * partnatts); |
1775 | foreach(lc, pruning_steps) |
1776 | { |
1777 | PartitionPruneStepOp *step = (PartitionPruneStepOp *) lfirst(lc); |
1778 | ListCell *lc2; |
1779 | int keyno; |
1780 | |
1781 | /* not needed for other step kinds */ |
1782 | if (!IsA(step, PartitionPruneStepOp)) |
1783 | continue; |
1784 | |
1785 | Assert(list_length(step->exprs) <= partnatts); |
1786 | |
1787 | keyno = 0; |
1788 | foreach(lc2, step->exprs) |
1789 | { |
1790 | Expr *expr = (Expr *) lfirst(lc2); |
1791 | |
1792 | /* not needed for Consts */ |
1793 | if (!IsA(expr, Const)) |
1794 | { |
1795 | int stateidx = PruneCxtStateIdx(partnatts, |
1796 | step->step.step_id, |
1797 | keyno); |
1798 | |
1799 | context->exprstates[stateidx] = |
1800 | ExecInitExpr(expr, context->planstate); |
1801 | } |
1802 | keyno++; |
1803 | } |
1804 | } |
1805 | } |
1806 | |
1807 | /* |
1808 | * ExecFindInitialMatchingSubPlans |
1809 | * Identify the set of subplans that cannot be eliminated by initial |
1810 | * pruning, disregarding any pruning constraints involving PARAM_EXEC |
1811 | * Params. |
1812 | * |
1813 | * If additional pruning passes will be required (because of PARAM_EXEC |
1814 | * Params), we must also update the translation data that allows conversion |
1815 | * of partition indexes into subplan indexes to account for the unneeded |
1816 | * subplans having been removed. |
1817 | * |
1818 | * Must only be called once per 'prunestate', and only if initial pruning |
1819 | * is required. |
1820 | * |
1821 | * 'nsubplans' must be passed as the total number of unpruned subplans. |
1822 | */ |
1823 | Bitmapset * |
1824 | ExecFindInitialMatchingSubPlans(PartitionPruneState *prunestate, int nsubplans) |
1825 | { |
1826 | Bitmapset *result = NULL; |
1827 | MemoryContext oldcontext; |
1828 | int i; |
1829 | |
1830 | /* Caller error if we get here without do_initial_prune */ |
1831 | Assert(prunestate->do_initial_prune); |
1832 | |
1833 | /* |
1834 | * Switch to a temp context to avoid leaking memory in the executor's |
1835 | * query-lifespan memory context. |
1836 | */ |
1837 | oldcontext = MemoryContextSwitchTo(prunestate->prune_context); |
1838 | |
1839 | /* |
1840 | * For each hierarchy, do the pruning tests, and add nondeletable |
1841 | * subplans' indexes to "result". |
1842 | */ |
1843 | for (i = 0; i < prunestate->num_partprunedata; i++) |
1844 | { |
1845 | PartitionPruningData *prunedata; |
1846 | PartitionedRelPruningData *pprune; |
1847 | |
1848 | prunedata = prunestate->partprunedata[i]; |
1849 | pprune = &prunedata->partrelprunedata[0]; |
1850 | |
1851 | /* Perform pruning without using PARAM_EXEC Params */ |
1852 | find_matching_subplans_recurse(prunedata, pprune, true, &result); |
1853 | |
1854 | /* Expression eval may have used space in node's ps_ExprContext too */ |
1855 | if (pprune->initial_pruning_steps) |
1856 | ResetExprContext(pprune->initial_context.planstate->ps_ExprContext); |
1857 | } |
1858 | |
1859 | /* Add in any subplans that partition pruning didn't account for */ |
1860 | result = bms_add_members(result, prunestate->other_subplans); |
1861 | |
1862 | MemoryContextSwitchTo(oldcontext); |
1863 | |
1864 | /* Copy result out of the temp context before we reset it */ |
1865 | result = bms_copy(result); |
1866 | |
1867 | MemoryContextReset(prunestate->prune_context); |
1868 | |
1869 | /* |
1870 | * If exec-time pruning is required and we pruned subplans above, then we |
1871 | * must re-sequence the subplan indexes so that ExecFindMatchingSubPlans |
1872 | * properly returns the indexes from the subplans which will remain after |
1873 | * execution of this function. |
1874 | * |
1875 | * We can safely skip this when !do_exec_prune, even though that leaves |
1876 | * invalid data in prunestate, because that data won't be consulted again |
1877 | * (cf initial Assert in ExecFindMatchingSubPlans). |
1878 | */ |
1879 | if (prunestate->do_exec_prune && bms_num_members(result) < nsubplans) |
1880 | { |
1881 | int *new_subplan_indexes; |
1882 | Bitmapset *new_other_subplans; |
1883 | int i; |
1884 | int newidx; |
1885 | |
1886 | /* |
1887 | * First we must build a temporary array which maps old subplan |
1888 | * indexes to new ones. For convenience of initialization, we use |
1889 | * 1-based indexes in this array and leave pruned items as 0. |
1890 | */ |
1891 | new_subplan_indexes = (int *) palloc0(sizeof(int) * nsubplans); |
1892 | newidx = 1; |
1893 | i = -1; |
1894 | while ((i = bms_next_member(result, i)) >= 0) |
1895 | { |
1896 | Assert(i < nsubplans); |
1897 | new_subplan_indexes[i] = newidx++; |
1898 | } |
1899 | |
1900 | /* |
1901 | * Now we can update each PartitionedRelPruneInfo's subplan_map with |
1902 | * new subplan indexes. We must also recompute its present_parts |
1903 | * bitmap. |
1904 | */ |
1905 | for (i = 0; i < prunestate->num_partprunedata; i++) |
1906 | { |
1907 | PartitionPruningData *prunedata = prunestate->partprunedata[i]; |
1908 | int j; |
1909 | |
1910 | /* |
1911 | * Within each hierarchy, we perform this loop in back-to-front |
1912 | * order so that we determine present_parts for the lowest-level |
1913 | * partitioned tables first. This way we can tell whether a |
1914 | * sub-partitioned table's partitions were entirely pruned so we |
1915 | * can exclude it from the current level's present_parts. |
1916 | */ |
1917 | for (j = prunedata->num_partrelprunedata - 1; j >= 0; j--) |
1918 | { |
1919 | PartitionedRelPruningData *pprune = &prunedata->partrelprunedata[j]; |
1920 | int nparts = pprune->nparts; |
1921 | int k; |
1922 | |
1923 | /* We just rebuild present_parts from scratch */ |
1924 | bms_free(pprune->present_parts); |
1925 | pprune->present_parts = NULL; |
1926 | |
1927 | for (k = 0; k < nparts; k++) |
1928 | { |
1929 | int oldidx = pprune->subplan_map[k]; |
1930 | int subidx; |
1931 | |
1932 | /* |
1933 | * If this partition existed as a subplan then change the |
1934 | * old subplan index to the new subplan index. The new |
1935 | * index may become -1 if the partition was pruned above, |
1936 | * or it may just come earlier in the subplan list due to |
1937 | * some subplans being removed earlier in the list. If |
1938 | * it's a subpartition, add it to present_parts unless |
1939 | * it's entirely pruned. |
1940 | */ |
1941 | if (oldidx >= 0) |
1942 | { |
1943 | Assert(oldidx < nsubplans); |
1944 | pprune->subplan_map[k] = new_subplan_indexes[oldidx] - 1; |
1945 | |
1946 | if (new_subplan_indexes[oldidx] > 0) |
1947 | pprune->present_parts = |
1948 | bms_add_member(pprune->present_parts, k); |
1949 | } |
1950 | else if ((subidx = pprune->subpart_map[k]) >= 0) |
1951 | { |
1952 | PartitionedRelPruningData *subprune; |
1953 | |
1954 | subprune = &prunedata->partrelprunedata[subidx]; |
1955 | |
1956 | if (!bms_is_empty(subprune->present_parts)) |
1957 | pprune->present_parts = |
1958 | bms_add_member(pprune->present_parts, k); |
1959 | } |
1960 | } |
1961 | } |
1962 | } |
1963 | |
1964 | /* |
1965 | * We must also recompute the other_subplans set, since indexes in it |
1966 | * may change. |
1967 | */ |
1968 | new_other_subplans = NULL; |
1969 | i = -1; |
1970 | while ((i = bms_next_member(prunestate->other_subplans, i)) >= 0) |
1971 | new_other_subplans = bms_add_member(new_other_subplans, |
1972 | new_subplan_indexes[i] - 1); |
1973 | |
1974 | bms_free(prunestate->other_subplans); |
1975 | prunestate->other_subplans = new_other_subplans; |
1976 | |
1977 | pfree(new_subplan_indexes); |
1978 | } |
1979 | |
1980 | return result; |
1981 | } |
1982 | |
1983 | /* |
1984 | * ExecFindMatchingSubPlans |
1985 | * Determine which subplans match the pruning steps detailed in |
1986 | * 'prunestate' for the current comparison expression values. |
1987 | * |
1988 | * Here we assume we may evaluate PARAM_EXEC Params. |
1989 | */ |
1990 | Bitmapset * |
1991 | ExecFindMatchingSubPlans(PartitionPruneState *prunestate) |
1992 | { |
1993 | Bitmapset *result = NULL; |
1994 | MemoryContext oldcontext; |
1995 | int i; |
1996 | |
1997 | /* |
1998 | * If !do_exec_prune, we've got problems because |
1999 | * ExecFindInitialMatchingSubPlans will not have bothered to update |
2000 | * prunestate for whatever pruning it did. |
2001 | */ |
2002 | Assert(prunestate->do_exec_prune); |
2003 | |
2004 | /* |
2005 | * Switch to a temp context to avoid leaking memory in the executor's |
2006 | * query-lifespan memory context. |
2007 | */ |
2008 | oldcontext = MemoryContextSwitchTo(prunestate->prune_context); |
2009 | |
2010 | /* |
2011 | * For each hierarchy, do the pruning tests, and add nondeletable |
2012 | * subplans' indexes to "result". |
2013 | */ |
2014 | for (i = 0; i < prunestate->num_partprunedata; i++) |
2015 | { |
2016 | PartitionPruningData *prunedata; |
2017 | PartitionedRelPruningData *pprune; |
2018 | |
2019 | prunedata = prunestate->partprunedata[i]; |
2020 | pprune = &prunedata->partrelprunedata[0]; |
2021 | |
2022 | find_matching_subplans_recurse(prunedata, pprune, false, &result); |
2023 | |
2024 | /* Expression eval may have used space in node's ps_ExprContext too */ |
2025 | if (pprune->exec_pruning_steps) |
2026 | ResetExprContext(pprune->exec_context.planstate->ps_ExprContext); |
2027 | } |
2028 | |
2029 | /* Add in any subplans that partition pruning didn't account for */ |
2030 | result = bms_add_members(result, prunestate->other_subplans); |
2031 | |
2032 | MemoryContextSwitchTo(oldcontext); |
2033 | |
2034 | /* Copy result out of the temp context before we reset it */ |
2035 | result = bms_copy(result); |
2036 | |
2037 | MemoryContextReset(prunestate->prune_context); |
2038 | |
2039 | return result; |
2040 | } |
2041 | |
2042 | /* |
2043 | * find_matching_subplans_recurse |
2044 | * Recursive worker function for ExecFindMatchingSubPlans and |
2045 | * ExecFindInitialMatchingSubPlans |
2046 | * |
2047 | * Adds valid (non-prunable) subplan IDs to *validsubplans |
2048 | */ |
2049 | static void |
2050 | find_matching_subplans_recurse(PartitionPruningData *prunedata, |
2051 | PartitionedRelPruningData *pprune, |
2052 | bool initial_prune, |
2053 | Bitmapset **validsubplans) |
2054 | { |
2055 | Bitmapset *partset; |
2056 | int i; |
2057 | |
2058 | /* Guard against stack overflow due to overly deep partition hierarchy. */ |
2059 | check_stack_depth(); |
2060 | |
2061 | /* Only prune if pruning would be useful at this level. */ |
2062 | if (initial_prune && pprune->initial_pruning_steps) |
2063 | { |
2064 | partset = get_matching_partitions(&pprune->initial_context, |
2065 | pprune->initial_pruning_steps); |
2066 | } |
2067 | else if (!initial_prune && pprune->exec_pruning_steps) |
2068 | { |
2069 | partset = get_matching_partitions(&pprune->exec_context, |
2070 | pprune->exec_pruning_steps); |
2071 | } |
2072 | else |
2073 | { |
2074 | /* |
2075 | * If no pruning is to be done, just include all partitions at this |
2076 | * level. |
2077 | */ |
2078 | partset = pprune->present_parts; |
2079 | } |
2080 | |
2081 | /* Translate partset into subplan indexes */ |
2082 | i = -1; |
2083 | while ((i = bms_next_member(partset, i)) >= 0) |
2084 | { |
2085 | if (pprune->subplan_map[i] >= 0) |
2086 | *validsubplans = bms_add_member(*validsubplans, |
2087 | pprune->subplan_map[i]); |
2088 | else |
2089 | { |
2090 | int partidx = pprune->subpart_map[i]; |
2091 | |
2092 | if (partidx >= 0) |
2093 | find_matching_subplans_recurse(prunedata, |
2094 | &prunedata->partrelprunedata[partidx], |
2095 | initial_prune, validsubplans); |
2096 | else |
2097 | { |
2098 | /* |
2099 | * We get here if the planner already pruned all the sub- |
2100 | * partitions for this partition. Silently ignore this |
2101 | * partition in this case. The end result is the same: we |
2102 | * would have pruned all partitions just the same, but we |
2103 | * don't have any pruning steps to execute to verify this. |
2104 | */ |
2105 | } |
2106 | } |
2107 | } |
2108 | } |
2109 | |