1/*-------------------------------------------------------------------------
2 *
3 * nodeTidscan.c
4 * Routines to support direct tid scans of relations
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/executor/nodeTidscan.c
12 *
13 *-------------------------------------------------------------------------
14 */
15/*
16 * INTERFACE ROUTINES
17 *
18 * ExecTidScan scans a relation using tids
19 * ExecInitTidScan creates and initializes state info.
20 * ExecReScanTidScan rescans the tid relation.
21 * ExecEndTidScan releases all storage.
22 */
23#include "postgres.h"
24
25#include "access/sysattr.h"
26#include "access/tableam.h"
27#include "catalog/pg_type.h"
28#include "executor/execdebug.h"
29#include "executor/nodeTidscan.h"
30#include "miscadmin.h"
31#include "nodes/nodeFuncs.h"
32#include "storage/bufmgr.h"
33#include "utils/array.h"
34#include "utils/rel.h"
35
36
37#define IsCTIDVar(node) \
38 ((node) != NULL && \
39 IsA((node), Var) && \
40 ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \
41 ((Var *) (node))->varlevelsup == 0)
42
43/* one element in tss_tidexprs */
44typedef struct TidExpr
45{
46 ExprState *exprstate; /* ExprState for a TID-yielding subexpr */
47 bool isarray; /* if true, it yields tid[] not just tid */
48 CurrentOfExpr *cexpr; /* alternatively, we can have CURRENT OF */
49} TidExpr;
50
51static void TidExprListCreate(TidScanState *tidstate);
52static void TidListEval(TidScanState *tidstate);
53static int itemptr_comparator(const void *a, const void *b);
54static TupleTableSlot *TidNext(TidScanState *node);
55
56
57/*
58 * Extract the qual subexpressions that yield TIDs to search for,
59 * and compile them into ExprStates if they're ordinary expressions.
60 *
61 * CURRENT OF is a special case that we can't compile usefully;
62 * just drop it into the TidExpr list as-is.
63 */
64static void
65TidExprListCreate(TidScanState *tidstate)
66{
67 TidScan *node = (TidScan *) tidstate->ss.ps.plan;
68 ListCell *l;
69
70 tidstate->tss_tidexprs = NIL;
71 tidstate->tss_isCurrentOf = false;
72
73 foreach(l, node->tidquals)
74 {
75 Expr *expr = (Expr *) lfirst(l);
76 TidExpr *tidexpr = (TidExpr *) palloc0(sizeof(TidExpr));
77
78 if (is_opclause(expr))
79 {
80 Node *arg1;
81 Node *arg2;
82
83 arg1 = get_leftop(expr);
84 arg2 = get_rightop(expr);
85 if (IsCTIDVar(arg1))
86 tidexpr->exprstate = ExecInitExpr((Expr *) arg2,
87 &tidstate->ss.ps);
88 else if (IsCTIDVar(arg2))
89 tidexpr->exprstate = ExecInitExpr((Expr *) arg1,
90 &tidstate->ss.ps);
91 else
92 elog(ERROR, "could not identify CTID variable");
93 tidexpr->isarray = false;
94 }
95 else if (expr && IsA(expr, ScalarArrayOpExpr))
96 {
97 ScalarArrayOpExpr *saex = (ScalarArrayOpExpr *) expr;
98
99 Assert(IsCTIDVar(linitial(saex->args)));
100 tidexpr->exprstate = ExecInitExpr(lsecond(saex->args),
101 &tidstate->ss.ps);
102 tidexpr->isarray = true;
103 }
104 else if (expr && IsA(expr, CurrentOfExpr))
105 {
106 CurrentOfExpr *cexpr = (CurrentOfExpr *) expr;
107
108 tidexpr->cexpr = cexpr;
109 tidstate->tss_isCurrentOf = true;
110 }
111 else
112 elog(ERROR, "could not identify CTID expression");
113
114 tidstate->tss_tidexprs = lappend(tidstate->tss_tidexprs, tidexpr);
115 }
116
117 /* CurrentOfExpr could never appear OR'd with something else */
118 Assert(list_length(tidstate->tss_tidexprs) == 1 ||
119 !tidstate->tss_isCurrentOf);
120}
121
122/*
123 * Compute the list of TIDs to be visited, by evaluating the expressions
124 * for them.
125 *
126 * (The result is actually an array, not a list.)
127 */
128static void
129TidListEval(TidScanState *tidstate)
130{
131 ExprContext *econtext = tidstate->ss.ps.ps_ExprContext;
132 TableScanDesc scan;
133 ItemPointerData *tidList;
134 int numAllocTids;
135 int numTids;
136 ListCell *l;
137
138 /*
139 * Start scan on-demand - initializing a scan isn't free (e.g. heap stats
140 * the size of the table), so it makes sense to delay that until needed -
141 * the node might never get executed.
142 */
143 if (tidstate->ss.ss_currentScanDesc == NULL)
144 tidstate->ss.ss_currentScanDesc =
145 table_beginscan(tidstate->ss.ss_currentRelation,
146 tidstate->ss.ps.state->es_snapshot,
147 0, NULL);
148 scan = tidstate->ss.ss_currentScanDesc;
149
150 /*
151 * We initialize the array with enough slots for the case that all quals
152 * are simple OpExprs or CurrentOfExprs. If there are any
153 * ScalarArrayOpExprs, we may have to enlarge the array.
154 */
155 numAllocTids = list_length(tidstate->tss_tidexprs);
156 tidList = (ItemPointerData *)
157 palloc(numAllocTids * sizeof(ItemPointerData));
158 numTids = 0;
159
160 foreach(l, tidstate->tss_tidexprs)
161 {
162 TidExpr *tidexpr = (TidExpr *) lfirst(l);
163 ItemPointer itemptr;
164 bool isNull;
165
166 if (tidexpr->exprstate && !tidexpr->isarray)
167 {
168 itemptr = (ItemPointer)
169 DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate,
170 econtext,
171 &isNull));
172 if (isNull)
173 continue;
174
175 /*
176 * We silently discard any TIDs that the AM considers invalid
177 * (E.g. for heap, they could be out of range at the time of scan
178 * start. Since we hold at least AccessShareLock on the table, it
179 * won't be possible for someone to truncate away the blocks we
180 * intend to visit.).
181 */
182 if (!table_tuple_tid_valid(scan, itemptr))
183 continue;
184
185 if (numTids >= numAllocTids)
186 {
187 numAllocTids *= 2;
188 tidList = (ItemPointerData *)
189 repalloc(tidList,
190 numAllocTids * sizeof(ItemPointerData));
191 }
192 tidList[numTids++] = *itemptr;
193 }
194 else if (tidexpr->exprstate && tidexpr->isarray)
195 {
196 Datum arraydatum;
197 ArrayType *itemarray;
198 Datum *ipdatums;
199 bool *ipnulls;
200 int ndatums;
201 int i;
202
203 arraydatum = ExecEvalExprSwitchContext(tidexpr->exprstate,
204 econtext,
205 &isNull);
206 if (isNull)
207 continue;
208 itemarray = DatumGetArrayTypeP(arraydatum);
209 deconstruct_array(itemarray,
210 TIDOID, sizeof(ItemPointerData), false, 's',
211 &ipdatums, &ipnulls, &ndatums);
212 if (numTids + ndatums > numAllocTids)
213 {
214 numAllocTids = numTids + ndatums;
215 tidList = (ItemPointerData *)
216 repalloc(tidList,
217 numAllocTids * sizeof(ItemPointerData));
218 }
219 for (i = 0; i < ndatums; i++)
220 {
221 if (ipnulls[i])
222 continue;
223
224 itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]);
225
226 if (!table_tuple_tid_valid(scan, itemptr))
227 continue;
228
229 tidList[numTids++] = *itemptr;
230 }
231 pfree(ipdatums);
232 pfree(ipnulls);
233 }
234 else
235 {
236 ItemPointerData cursor_tid;
237
238 Assert(tidexpr->cexpr);
239 if (execCurrentOf(tidexpr->cexpr, econtext,
240 RelationGetRelid(tidstate->ss.ss_currentRelation),
241 &cursor_tid))
242 {
243 if (numTids >= numAllocTids)
244 {
245 numAllocTids *= 2;
246 tidList = (ItemPointerData *)
247 repalloc(tidList,
248 numAllocTids * sizeof(ItemPointerData));
249 }
250 tidList[numTids++] = cursor_tid;
251 }
252 }
253 }
254
255 /*
256 * Sort the array of TIDs into order, and eliminate duplicates.
257 * Eliminating duplicates is necessary since we want OR semantics across
258 * the list. Sorting makes it easier to detect duplicates, and as a bonus
259 * ensures that we will visit the heap in the most efficient way.
260 */
261 if (numTids > 1)
262 {
263 int lastTid;
264 int i;
265
266 /* CurrentOfExpr could never appear OR'd with something else */
267 Assert(!tidstate->tss_isCurrentOf);
268
269 qsort((void *) tidList, numTids, sizeof(ItemPointerData),
270 itemptr_comparator);
271 lastTid = 0;
272 for (i = 1; i < numTids; i++)
273 {
274 if (!ItemPointerEquals(&tidList[lastTid], &tidList[i]))
275 tidList[++lastTid] = tidList[i];
276 }
277 numTids = lastTid + 1;
278 }
279
280 tidstate->tss_TidList = tidList;
281 tidstate->tss_NumTids = numTids;
282 tidstate->tss_TidPtr = -1;
283}
284
285/*
286 * qsort comparator for ItemPointerData items
287 */
288static int
289itemptr_comparator(const void *a, const void *b)
290{
291 const ItemPointerData *ipa = (const ItemPointerData *) a;
292 const ItemPointerData *ipb = (const ItemPointerData *) b;
293 BlockNumber ba = ItemPointerGetBlockNumber(ipa);
294 BlockNumber bb = ItemPointerGetBlockNumber(ipb);
295 OffsetNumber oa = ItemPointerGetOffsetNumber(ipa);
296 OffsetNumber ob = ItemPointerGetOffsetNumber(ipb);
297
298 if (ba < bb)
299 return -1;
300 if (ba > bb)
301 return 1;
302 if (oa < ob)
303 return -1;
304 if (oa > ob)
305 return 1;
306 return 0;
307}
308
309/* ----------------------------------------------------------------
310 * TidNext
311 *
312 * Retrieve a tuple from the TidScan node's currentRelation
313 * using the tids in the TidScanState information.
314 *
315 * ----------------------------------------------------------------
316 */
317static TupleTableSlot *
318TidNext(TidScanState *node)
319{
320 EState *estate;
321 ScanDirection direction;
322 Snapshot snapshot;
323 TableScanDesc scan;
324 Relation heapRelation;
325 TupleTableSlot *slot;
326 ItemPointerData *tidList;
327 int numTids;
328 bool bBackward;
329
330 /*
331 * extract necessary information from tid scan node
332 */
333 estate = node->ss.ps.state;
334 direction = estate->es_direction;
335 snapshot = estate->es_snapshot;
336 heapRelation = node->ss.ss_currentRelation;
337 slot = node->ss.ss_ScanTupleSlot;
338
339 /*
340 * First time through, compute the list of TIDs to be visited
341 */
342 if (node->tss_TidList == NULL)
343 TidListEval(node);
344
345 scan = node->ss.ss_currentScanDesc;
346 tidList = node->tss_TidList;
347 numTids = node->tss_NumTids;
348
349 /*
350 * Initialize or advance scan position, depending on direction.
351 */
352 bBackward = ScanDirectionIsBackward(direction);
353 if (bBackward)
354 {
355 if (node->tss_TidPtr < 0)
356 {
357 /* initialize for backward scan */
358 node->tss_TidPtr = numTids - 1;
359 }
360 else
361 node->tss_TidPtr--;
362 }
363 else
364 {
365 if (node->tss_TidPtr < 0)
366 {
367 /* initialize for forward scan */
368 node->tss_TidPtr = 0;
369 }
370 else
371 node->tss_TidPtr++;
372 }
373
374 while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids)
375 {
376 ItemPointerData tid = tidList[node->tss_TidPtr];
377
378 /*
379 * For WHERE CURRENT OF, the tuple retrieved from the cursor might
380 * since have been updated; if so, we should fetch the version that is
381 * current according to our snapshot.
382 */
383 if (node->tss_isCurrentOf)
384 table_tuple_get_latest_tid(scan, &tid);
385
386 if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot))
387 return slot;
388
389 /* Bad TID or failed snapshot qual; try next */
390 if (bBackward)
391 node->tss_TidPtr--;
392 else
393 node->tss_TidPtr++;
394
395 CHECK_FOR_INTERRUPTS();
396 }
397
398 /*
399 * if we get here it means the tid scan failed so we are at the end of the
400 * scan..
401 */
402 return ExecClearTuple(slot);
403}
404
405/*
406 * TidRecheck -- access method routine to recheck a tuple in EvalPlanQual
407 */
408static bool
409TidRecheck(TidScanState *node, TupleTableSlot *slot)
410{
411 /*
412 * XXX shouldn't we check here to make sure tuple matches TID list? In
413 * runtime-key case this is not certain, is it? However, in the WHERE
414 * CURRENT OF case it might not match anyway ...
415 */
416 return true;
417}
418
419
420/* ----------------------------------------------------------------
421 * ExecTidScan(node)
422 *
423 * Scans the relation using tids and returns
424 * the next qualifying tuple in the direction specified.
425 * We call the ExecScan() routine and pass it the appropriate
426 * access method functions.
427 *
428 * Conditions:
429 * -- the "cursor" maintained by the AMI is positioned at the tuple
430 * returned previously.
431 *
432 * Initial States:
433 * -- the relation indicated is opened for scanning so that the
434 * "cursor" is positioned before the first qualifying tuple.
435 * -- tidPtr is -1.
436 * ----------------------------------------------------------------
437 */
438static TupleTableSlot *
439ExecTidScan(PlanState *pstate)
440{
441 TidScanState *node = castNode(TidScanState, pstate);
442
443 return ExecScan(&node->ss,
444 (ExecScanAccessMtd) TidNext,
445 (ExecScanRecheckMtd) TidRecheck);
446}
447
448/* ----------------------------------------------------------------
449 * ExecReScanTidScan(node)
450 * ----------------------------------------------------------------
451 */
452void
453ExecReScanTidScan(TidScanState *node)
454{
455 if (node->tss_TidList)
456 pfree(node->tss_TidList);
457 node->tss_TidList = NULL;
458 node->tss_NumTids = 0;
459 node->tss_TidPtr = -1;
460
461 /* not really necessary, but seems good form */
462 if (node->ss.ss_currentScanDesc)
463 table_rescan(node->ss.ss_currentScanDesc, NULL);
464
465 ExecScanReScan(&node->ss);
466}
467
468/* ----------------------------------------------------------------
469 * ExecEndTidScan
470 *
471 * Releases any storage allocated through C routines.
472 * Returns nothing.
473 * ----------------------------------------------------------------
474 */
475void
476ExecEndTidScan(TidScanState *node)
477{
478 if (node->ss.ss_currentScanDesc)
479 table_endscan(node->ss.ss_currentScanDesc);
480
481 /*
482 * Free the exprcontext
483 */
484 ExecFreeExprContext(&node->ss.ps);
485
486 /*
487 * clear out tuple table slots
488 */
489 if (node->ss.ps.ps_ResultTupleSlot)
490 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
491 ExecClearTuple(node->ss.ss_ScanTupleSlot);
492}
493
494/* ----------------------------------------------------------------
495 * ExecInitTidScan
496 *
497 * Initializes the tid scan's state information, creates
498 * scan keys, and opens the base and tid relations.
499 *
500 * Parameters:
501 * node: TidNode node produced by the planner.
502 * estate: the execution state initialized in InitPlan.
503 * ----------------------------------------------------------------
504 */
505TidScanState *
506ExecInitTidScan(TidScan *node, EState *estate, int eflags)
507{
508 TidScanState *tidstate;
509 Relation currentRelation;
510
511 /*
512 * create state structure
513 */
514 tidstate = makeNode(TidScanState);
515 tidstate->ss.ps.plan = (Plan *) node;
516 tidstate->ss.ps.state = estate;
517 tidstate->ss.ps.ExecProcNode = ExecTidScan;
518
519 /*
520 * Miscellaneous initialization
521 *
522 * create expression context for node
523 */
524 ExecAssignExprContext(estate, &tidstate->ss.ps);
525
526 /*
527 * mark tid list as not computed yet
528 */
529 tidstate->tss_TidList = NULL;
530 tidstate->tss_NumTids = 0;
531 tidstate->tss_TidPtr = -1;
532
533 /*
534 * open the scan relation
535 */
536 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
537
538 tidstate->ss.ss_currentRelation = currentRelation;
539 tidstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
540
541 /*
542 * get the scan type from the relation descriptor.
543 */
544 ExecInitScanTupleSlot(estate, &tidstate->ss,
545 RelationGetDescr(currentRelation),
546 table_slot_callbacks(currentRelation));
547
548 /*
549 * Initialize result type and projection.
550 */
551 ExecInitResultTypeTL(&tidstate->ss.ps);
552 ExecAssignScanProjectionInfo(&tidstate->ss);
553
554 /*
555 * initialize child expressions
556 */
557 tidstate->ss.ps.qual =
558 ExecInitQual(node->scan.plan.qual, (PlanState *) tidstate);
559
560 TidExprListCreate(tidstate);
561
562 /*
563 * all done.
564 */
565 return tidstate;
566}
567