1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * execAmi.c |
4 | * miscellaneous executor access method routines |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | * |
9 | * src/backend/executor/execAmi.c |
10 | * |
11 | *------------------------------------------------------------------------- |
12 | */ |
13 | #include "postgres.h" |
14 | |
15 | #include "access/amapi.h" |
16 | #include "access/htup_details.h" |
17 | #include "executor/execdebug.h" |
18 | #include "executor/nodeAgg.h" |
19 | #include "executor/nodeAppend.h" |
20 | #include "executor/nodeBitmapAnd.h" |
21 | #include "executor/nodeBitmapHeapscan.h" |
22 | #include "executor/nodeBitmapIndexscan.h" |
23 | #include "executor/nodeBitmapOr.h" |
24 | #include "executor/nodeCtescan.h" |
25 | #include "executor/nodeCustom.h" |
26 | #include "executor/nodeForeignscan.h" |
27 | #include "executor/nodeFunctionscan.h" |
28 | #include "executor/nodeGather.h" |
29 | #include "executor/nodeGatherMerge.h" |
30 | #include "executor/nodeGroup.h" |
31 | #include "executor/nodeGroup.h" |
32 | #include "executor/nodeHash.h" |
33 | #include "executor/nodeHashjoin.h" |
34 | #include "executor/nodeIndexonlyscan.h" |
35 | #include "executor/nodeIndexscan.h" |
36 | #include "executor/nodeLimit.h" |
37 | #include "executor/nodeLockRows.h" |
38 | #include "executor/nodeMaterial.h" |
39 | #include "executor/nodeMergeAppend.h" |
40 | #include "executor/nodeMergejoin.h" |
41 | #include "executor/nodeModifyTable.h" |
42 | #include "executor/nodeNamedtuplestorescan.h" |
43 | #include "executor/nodeNestloop.h" |
44 | #include "executor/nodeProjectSet.h" |
45 | #include "executor/nodeRecursiveunion.h" |
46 | #include "executor/nodeResult.h" |
47 | #include "executor/nodeSamplescan.h" |
48 | #include "executor/nodeSeqscan.h" |
49 | #include "executor/nodeSetOp.h" |
50 | #include "executor/nodeSort.h" |
51 | #include "executor/nodeSubplan.h" |
52 | #include "executor/nodeSubqueryscan.h" |
53 | #include "executor/nodeTableFuncscan.h" |
54 | #include "executor/nodeTidscan.h" |
55 | #include "executor/nodeUnique.h" |
56 | #include "executor/nodeValuesscan.h" |
57 | #include "executor/nodeWindowAgg.h" |
58 | #include "executor/nodeWorktablescan.h" |
59 | #include "nodes/extensible.h" |
60 | #include "nodes/nodeFuncs.h" |
61 | #include "nodes/pathnodes.h" |
62 | #include "utils/rel.h" |
63 | #include "utils/syscache.h" |
64 | |
65 | |
66 | static bool IndexSupportsBackwardScan(Oid indexid); |
67 | |
68 | |
69 | /* |
70 | * ExecReScan |
71 | * Reset a plan node so that its output can be re-scanned. |
72 | * |
73 | * Note that if the plan node has parameters that have changed value, |
74 | * the output might be different from last time. |
75 | */ |
76 | void |
77 | ExecReScan(PlanState *node) |
78 | { |
79 | /* If collecting timing stats, update them */ |
80 | if (node->instrument) |
81 | InstrEndLoop(node->instrument); |
82 | |
83 | /* |
84 | * If we have changed parameters, propagate that info. |
85 | * |
86 | * Note: ExecReScanSetParamPlan() can add bits to node->chgParam, |
87 | * corresponding to the output param(s) that the InitPlan will update. |
88 | * Since we make only one pass over the list, that means that an InitPlan |
89 | * can depend on the output param(s) of a sibling InitPlan only if that |
90 | * sibling appears earlier in the list. This is workable for now given |
91 | * the limited ways in which one InitPlan could depend on another, but |
92 | * eventually we might need to work harder (or else make the planner |
93 | * enlarge the extParam/allParam sets to include the params of depended-on |
94 | * InitPlans). |
95 | */ |
96 | if (node->chgParam != NULL) |
97 | { |
98 | ListCell *l; |
99 | |
100 | foreach(l, node->initPlan) |
101 | { |
102 | SubPlanState *sstate = (SubPlanState *) lfirst(l); |
103 | PlanState *splan = sstate->planstate; |
104 | |
105 | if (splan->plan->extParam != NULL) /* don't care about child |
106 | * local Params */ |
107 | UpdateChangedParamSet(splan, node->chgParam); |
108 | if (splan->chgParam != NULL) |
109 | ExecReScanSetParamPlan(sstate, node); |
110 | } |
111 | foreach(l, node->subPlan) |
112 | { |
113 | SubPlanState *sstate = (SubPlanState *) lfirst(l); |
114 | PlanState *splan = sstate->planstate; |
115 | |
116 | if (splan->plan->extParam != NULL) |
117 | UpdateChangedParamSet(splan, node->chgParam); |
118 | } |
119 | /* Well. Now set chgParam for left/right trees. */ |
120 | if (node->lefttree != NULL) |
121 | UpdateChangedParamSet(node->lefttree, node->chgParam); |
122 | if (node->righttree != NULL) |
123 | UpdateChangedParamSet(node->righttree, node->chgParam); |
124 | } |
125 | |
126 | /* Call expression callbacks */ |
127 | if (node->ps_ExprContext) |
128 | ReScanExprContext(node->ps_ExprContext); |
129 | |
130 | /* And do node-type-specific processing */ |
131 | switch (nodeTag(node)) |
132 | { |
133 | case T_ResultState: |
134 | ExecReScanResult((ResultState *) node); |
135 | break; |
136 | |
137 | case T_ProjectSetState: |
138 | ExecReScanProjectSet((ProjectSetState *) node); |
139 | break; |
140 | |
141 | case T_ModifyTableState: |
142 | ExecReScanModifyTable((ModifyTableState *) node); |
143 | break; |
144 | |
145 | case T_AppendState: |
146 | ExecReScanAppend((AppendState *) node); |
147 | break; |
148 | |
149 | case T_MergeAppendState: |
150 | ExecReScanMergeAppend((MergeAppendState *) node); |
151 | break; |
152 | |
153 | case T_RecursiveUnionState: |
154 | ExecReScanRecursiveUnion((RecursiveUnionState *) node); |
155 | break; |
156 | |
157 | case T_BitmapAndState: |
158 | ExecReScanBitmapAnd((BitmapAndState *) node); |
159 | break; |
160 | |
161 | case T_BitmapOrState: |
162 | ExecReScanBitmapOr((BitmapOrState *) node); |
163 | break; |
164 | |
165 | case T_SeqScanState: |
166 | ExecReScanSeqScan((SeqScanState *) node); |
167 | break; |
168 | |
169 | case T_SampleScanState: |
170 | ExecReScanSampleScan((SampleScanState *) node); |
171 | break; |
172 | |
173 | case T_GatherState: |
174 | ExecReScanGather((GatherState *) node); |
175 | break; |
176 | |
177 | case T_GatherMergeState: |
178 | ExecReScanGatherMerge((GatherMergeState *) node); |
179 | break; |
180 | |
181 | case T_IndexScanState: |
182 | ExecReScanIndexScan((IndexScanState *) node); |
183 | break; |
184 | |
185 | case T_IndexOnlyScanState: |
186 | ExecReScanIndexOnlyScan((IndexOnlyScanState *) node); |
187 | break; |
188 | |
189 | case T_BitmapIndexScanState: |
190 | ExecReScanBitmapIndexScan((BitmapIndexScanState *) node); |
191 | break; |
192 | |
193 | case T_BitmapHeapScanState: |
194 | ExecReScanBitmapHeapScan((BitmapHeapScanState *) node); |
195 | break; |
196 | |
197 | case T_TidScanState: |
198 | ExecReScanTidScan((TidScanState *) node); |
199 | break; |
200 | |
201 | case T_SubqueryScanState: |
202 | ExecReScanSubqueryScan((SubqueryScanState *) node); |
203 | break; |
204 | |
205 | case T_FunctionScanState: |
206 | ExecReScanFunctionScan((FunctionScanState *) node); |
207 | break; |
208 | |
209 | case T_TableFuncScanState: |
210 | ExecReScanTableFuncScan((TableFuncScanState *) node); |
211 | break; |
212 | |
213 | case T_ValuesScanState: |
214 | ExecReScanValuesScan((ValuesScanState *) node); |
215 | break; |
216 | |
217 | case T_CteScanState: |
218 | ExecReScanCteScan((CteScanState *) node); |
219 | break; |
220 | |
221 | case T_NamedTuplestoreScanState: |
222 | ExecReScanNamedTuplestoreScan((NamedTuplestoreScanState *) node); |
223 | break; |
224 | |
225 | case T_WorkTableScanState: |
226 | ExecReScanWorkTableScan((WorkTableScanState *) node); |
227 | break; |
228 | |
229 | case T_ForeignScanState: |
230 | ExecReScanForeignScan((ForeignScanState *) node); |
231 | break; |
232 | |
233 | case T_CustomScanState: |
234 | ExecReScanCustomScan((CustomScanState *) node); |
235 | break; |
236 | |
237 | case T_NestLoopState: |
238 | ExecReScanNestLoop((NestLoopState *) node); |
239 | break; |
240 | |
241 | case T_MergeJoinState: |
242 | ExecReScanMergeJoin((MergeJoinState *) node); |
243 | break; |
244 | |
245 | case T_HashJoinState: |
246 | ExecReScanHashJoin((HashJoinState *) node); |
247 | break; |
248 | |
249 | case T_MaterialState: |
250 | ExecReScanMaterial((MaterialState *) node); |
251 | break; |
252 | |
253 | case T_SortState: |
254 | ExecReScanSort((SortState *) node); |
255 | break; |
256 | |
257 | case T_GroupState: |
258 | ExecReScanGroup((GroupState *) node); |
259 | break; |
260 | |
261 | case T_AggState: |
262 | ExecReScanAgg((AggState *) node); |
263 | break; |
264 | |
265 | case T_WindowAggState: |
266 | ExecReScanWindowAgg((WindowAggState *) node); |
267 | break; |
268 | |
269 | case T_UniqueState: |
270 | ExecReScanUnique((UniqueState *) node); |
271 | break; |
272 | |
273 | case T_HashState: |
274 | ExecReScanHash((HashState *) node); |
275 | break; |
276 | |
277 | case T_SetOpState: |
278 | ExecReScanSetOp((SetOpState *) node); |
279 | break; |
280 | |
281 | case T_LockRowsState: |
282 | ExecReScanLockRows((LockRowsState *) node); |
283 | break; |
284 | |
285 | case T_LimitState: |
286 | ExecReScanLimit((LimitState *) node); |
287 | break; |
288 | |
289 | default: |
290 | elog(ERROR, "unrecognized node type: %d" , (int) nodeTag(node)); |
291 | break; |
292 | } |
293 | |
294 | if (node->chgParam != NULL) |
295 | { |
296 | bms_free(node->chgParam); |
297 | node->chgParam = NULL; |
298 | } |
299 | } |
300 | |
301 | /* |
302 | * ExecMarkPos |
303 | * |
304 | * Marks the current scan position. |
305 | * |
306 | * NOTE: mark/restore capability is currently needed only for plan nodes |
307 | * that are the immediate inner child of a MergeJoin node. Since MergeJoin |
308 | * requires sorted input, there is never any need to support mark/restore in |
309 | * node types that cannot produce sorted output. There are some cases in |
310 | * which a node can pass through sorted data from its child; if we don't |
311 | * implement mark/restore for such a node type, the planner compensates by |
312 | * inserting a Material node above that node. |
313 | */ |
314 | void |
315 | ExecMarkPos(PlanState *node) |
316 | { |
317 | switch (nodeTag(node)) |
318 | { |
319 | case T_IndexScanState: |
320 | ExecIndexMarkPos((IndexScanState *) node); |
321 | break; |
322 | |
323 | case T_IndexOnlyScanState: |
324 | ExecIndexOnlyMarkPos((IndexOnlyScanState *) node); |
325 | break; |
326 | |
327 | case T_CustomScanState: |
328 | ExecCustomMarkPos((CustomScanState *) node); |
329 | break; |
330 | |
331 | case T_MaterialState: |
332 | ExecMaterialMarkPos((MaterialState *) node); |
333 | break; |
334 | |
335 | case T_SortState: |
336 | ExecSortMarkPos((SortState *) node); |
337 | break; |
338 | |
339 | case T_ResultState: |
340 | ExecResultMarkPos((ResultState *) node); |
341 | break; |
342 | |
343 | default: |
344 | /* don't make hard error unless caller asks to restore... */ |
345 | elog(DEBUG2, "unrecognized node type: %d" , (int) nodeTag(node)); |
346 | break; |
347 | } |
348 | } |
349 | |
350 | /* |
351 | * ExecRestrPos |
352 | * |
353 | * restores the scan position previously saved with ExecMarkPos() |
354 | * |
355 | * NOTE: the semantics of this are that the first ExecProcNode following |
356 | * the restore operation will yield the same tuple as the first one following |
357 | * the mark operation. It is unspecified what happens to the plan node's |
358 | * result TupleTableSlot. (In most cases the result slot is unchanged by |
359 | * a restore, but the node may choose to clear it or to load it with the |
360 | * restored-to tuple.) Hence the caller should discard any previously |
361 | * returned TupleTableSlot after doing a restore. |
362 | */ |
363 | void |
364 | ExecRestrPos(PlanState *node) |
365 | { |
366 | switch (nodeTag(node)) |
367 | { |
368 | case T_IndexScanState: |
369 | ExecIndexRestrPos((IndexScanState *) node); |
370 | break; |
371 | |
372 | case T_IndexOnlyScanState: |
373 | ExecIndexOnlyRestrPos((IndexOnlyScanState *) node); |
374 | break; |
375 | |
376 | case T_CustomScanState: |
377 | ExecCustomRestrPos((CustomScanState *) node); |
378 | break; |
379 | |
380 | case T_MaterialState: |
381 | ExecMaterialRestrPos((MaterialState *) node); |
382 | break; |
383 | |
384 | case T_SortState: |
385 | ExecSortRestrPos((SortState *) node); |
386 | break; |
387 | |
388 | case T_ResultState: |
389 | ExecResultRestrPos((ResultState *) node); |
390 | break; |
391 | |
392 | default: |
393 | elog(ERROR, "unrecognized node type: %d" , (int) nodeTag(node)); |
394 | break; |
395 | } |
396 | } |
397 | |
398 | /* |
399 | * ExecSupportsMarkRestore - does a Path support mark/restore? |
400 | * |
401 | * This is used during planning and so must accept a Path, not a Plan. |
402 | * We keep it here to be adjacent to the routines above, which also must |
403 | * know which plan types support mark/restore. |
404 | */ |
405 | bool |
406 | ExecSupportsMarkRestore(Path *pathnode) |
407 | { |
408 | /* |
409 | * For consistency with the routines above, we do not examine the nodeTag |
410 | * but rather the pathtype, which is the Plan node type the Path would |
411 | * produce. |
412 | */ |
413 | switch (pathnode->pathtype) |
414 | { |
415 | case T_IndexScan: |
416 | case T_IndexOnlyScan: |
417 | case T_Material: |
418 | case T_Sort: |
419 | return true; |
420 | |
421 | case T_CustomScan: |
422 | { |
423 | CustomPath *customPath = castNode(CustomPath, pathnode); |
424 | |
425 | if (customPath->flags & CUSTOMPATH_SUPPORT_MARK_RESTORE) |
426 | return true; |
427 | return false; |
428 | } |
429 | case T_Result: |
430 | |
431 | /* |
432 | * Result supports mark/restore iff it has a child plan that does. |
433 | * |
434 | * We have to be careful here because there is more than one Path |
435 | * type that can produce a Result plan node. |
436 | */ |
437 | if (IsA(pathnode, ProjectionPath)) |
438 | return ExecSupportsMarkRestore(((ProjectionPath *) pathnode)->subpath); |
439 | else if (IsA(pathnode, MinMaxAggPath)) |
440 | return false; /* childless Result */ |
441 | else if (IsA(pathnode, GroupResultPath)) |
442 | return false; /* childless Result */ |
443 | else |
444 | { |
445 | /* Simple RTE_RESULT base relation */ |
446 | Assert(IsA(pathnode, Path)); |
447 | return false; /* childless Result */ |
448 | } |
449 | |
450 | case T_Append: |
451 | { |
452 | AppendPath *appendPath = castNode(AppendPath, pathnode); |
453 | |
454 | /* |
455 | * If there's exactly one child, then there will be no Append |
456 | * in the final plan, so we can handle mark/restore if the |
457 | * child plan node can. |
458 | */ |
459 | if (list_length(appendPath->subpaths) == 1) |
460 | return ExecSupportsMarkRestore((Path *) linitial(appendPath->subpaths)); |
461 | /* Otherwise, Append can't handle it */ |
462 | return false; |
463 | } |
464 | |
465 | case T_MergeAppend: |
466 | { |
467 | MergeAppendPath *mapath = castNode(MergeAppendPath, pathnode); |
468 | |
469 | /* |
470 | * Like the Append case above, single-subpath MergeAppends |
471 | * won't be in the final plan, so just return the child's |
472 | * mark/restore ability. |
473 | */ |
474 | if (list_length(mapath->subpaths) == 1) |
475 | return ExecSupportsMarkRestore((Path *) linitial(mapath->subpaths)); |
476 | /* Otherwise, MergeAppend can't handle it */ |
477 | return false; |
478 | } |
479 | |
480 | default: |
481 | break; |
482 | } |
483 | |
484 | return false; |
485 | } |
486 | |
487 | /* |
488 | * ExecSupportsBackwardScan - does a plan type support backwards scanning? |
489 | * |
490 | * Ideally, all plan types would support backwards scan, but that seems |
491 | * unlikely to happen soon. In some cases, a plan node passes the backwards |
492 | * scan down to its children, and so supports backwards scan only if its |
493 | * children do. Therefore, this routine must be passed a complete plan tree. |
494 | */ |
495 | bool |
496 | ExecSupportsBackwardScan(Plan *node) |
497 | { |
498 | if (node == NULL) |
499 | return false; |
500 | |
501 | /* |
502 | * Parallel-aware nodes return a subset of the tuples in each worker, and |
503 | * in general we can't expect to have enough bookkeeping state to know |
504 | * which ones we returned in this worker as opposed to some other worker. |
505 | */ |
506 | if (node->parallel_aware) |
507 | return false; |
508 | |
509 | switch (nodeTag(node)) |
510 | { |
511 | case T_Result: |
512 | if (outerPlan(node) != NULL) |
513 | return ExecSupportsBackwardScan(outerPlan(node)); |
514 | else |
515 | return false; |
516 | |
517 | case T_Append: |
518 | { |
519 | ListCell *l; |
520 | |
521 | foreach(l, ((Append *) node)->appendplans) |
522 | { |
523 | if (!ExecSupportsBackwardScan((Plan *) lfirst(l))) |
524 | return false; |
525 | } |
526 | /* need not check tlist because Append doesn't evaluate it */ |
527 | return true; |
528 | } |
529 | |
530 | case T_SampleScan: |
531 | /* Simplify life for tablesample methods by disallowing this */ |
532 | return false; |
533 | |
534 | case T_Gather: |
535 | return false; |
536 | |
537 | case T_IndexScan: |
538 | return IndexSupportsBackwardScan(((IndexScan *) node)->indexid); |
539 | |
540 | case T_IndexOnlyScan: |
541 | return IndexSupportsBackwardScan(((IndexOnlyScan *) node)->indexid); |
542 | |
543 | case T_SubqueryScan: |
544 | return ExecSupportsBackwardScan(((SubqueryScan *) node)->subplan); |
545 | |
546 | case T_CustomScan: |
547 | { |
548 | uint32 flags = ((CustomScan *) node)->flags; |
549 | |
550 | if (flags & CUSTOMPATH_SUPPORT_BACKWARD_SCAN) |
551 | return true; |
552 | } |
553 | return false; |
554 | |
555 | case T_SeqScan: |
556 | case T_TidScan: |
557 | case T_FunctionScan: |
558 | case T_ValuesScan: |
559 | case T_CteScan: |
560 | case T_Material: |
561 | case T_Sort: |
562 | return true; |
563 | |
564 | case T_LockRows: |
565 | case T_Limit: |
566 | return ExecSupportsBackwardScan(outerPlan(node)); |
567 | |
568 | default: |
569 | return false; |
570 | } |
571 | } |
572 | |
573 | /* |
574 | * An IndexScan or IndexOnlyScan node supports backward scan only if the |
575 | * index's AM does. |
576 | */ |
577 | static bool |
578 | IndexSupportsBackwardScan(Oid indexid) |
579 | { |
580 | bool result; |
581 | HeapTuple ht_idxrel; |
582 | Form_pg_class idxrelrec; |
583 | IndexAmRoutine *amroutine; |
584 | |
585 | /* Fetch the pg_class tuple of the index relation */ |
586 | ht_idxrel = SearchSysCache1(RELOID, ObjectIdGetDatum(indexid)); |
587 | if (!HeapTupleIsValid(ht_idxrel)) |
588 | elog(ERROR, "cache lookup failed for relation %u" , indexid); |
589 | idxrelrec = (Form_pg_class) GETSTRUCT(ht_idxrel); |
590 | |
591 | /* Fetch the index AM's API struct */ |
592 | amroutine = GetIndexAmRoutineByAmId(idxrelrec->relam, false); |
593 | |
594 | result = amroutine->amcanbackward; |
595 | |
596 | pfree(amroutine); |
597 | ReleaseSysCache(ht_idxrel); |
598 | |
599 | return result; |
600 | } |
601 | |
602 | /* |
603 | * ExecMaterializesOutput - does a plan type materialize its output? |
604 | * |
605 | * Returns true if the plan node type is one that automatically materializes |
606 | * its output (typically by keeping it in a tuplestore). For such plans, |
607 | * a rescan without any parameter change will have zero startup cost and |
608 | * very low per-tuple cost. |
609 | */ |
610 | bool |
611 | ExecMaterializesOutput(NodeTag plantype) |
612 | { |
613 | switch (plantype) |
614 | { |
615 | case T_Material: |
616 | case T_FunctionScan: |
617 | case T_TableFuncScan: |
618 | case T_CteScan: |
619 | case T_NamedTuplestoreScan: |
620 | case T_WorkTableScan: |
621 | case T_Sort: |
622 | return true; |
623 | |
624 | default: |
625 | break; |
626 | } |
627 | |
628 | return false; |
629 | } |
630 | |