1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * nodeTidscan.c |
4 | * Routines to support direct tid scans of relations |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | * |
9 | * |
10 | * IDENTIFICATION |
11 | * src/backend/executor/nodeTidscan.c |
12 | * |
13 | *------------------------------------------------------------------------- |
14 | */ |
15 | /* |
16 | * INTERFACE ROUTINES |
17 | * |
18 | * ExecTidScan scans a relation using tids |
19 | * ExecInitTidScan creates and initializes state info. |
20 | * ExecReScanTidScan rescans the tid relation. |
21 | * ExecEndTidScan releases all storage. |
22 | */ |
23 | #include "postgres.h" |
24 | |
25 | #include "access/sysattr.h" |
26 | #include "access/tableam.h" |
27 | #include "catalog/pg_type.h" |
28 | #include "executor/execdebug.h" |
29 | #include "executor/nodeTidscan.h" |
30 | #include "miscadmin.h" |
31 | #include "nodes/nodeFuncs.h" |
32 | #include "storage/bufmgr.h" |
33 | #include "utils/array.h" |
34 | #include "utils/rel.h" |
35 | |
36 | |
37 | #define IsCTIDVar(node) \ |
38 | ((node) != NULL && \ |
39 | IsA((node), Var) && \ |
40 | ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \ |
41 | ((Var *) (node))->varlevelsup == 0) |
42 | |
43 | /* one element in tss_tidexprs */ |
44 | typedef struct TidExpr |
45 | { |
46 | ExprState *exprstate; /* ExprState for a TID-yielding subexpr */ |
47 | bool isarray; /* if true, it yields tid[] not just tid */ |
48 | CurrentOfExpr *cexpr; /* alternatively, we can have CURRENT OF */ |
49 | } TidExpr; |
50 | |
51 | static void TidExprListCreate(TidScanState *tidstate); |
52 | static void TidListEval(TidScanState *tidstate); |
53 | static int itemptr_comparator(const void *a, const void *b); |
54 | static TupleTableSlot *TidNext(TidScanState *node); |
55 | |
56 | |
57 | /* |
58 | * Extract the qual subexpressions that yield TIDs to search for, |
59 | * and compile them into ExprStates if they're ordinary expressions. |
60 | * |
61 | * CURRENT OF is a special case that we can't compile usefully; |
62 | * just drop it into the TidExpr list as-is. |
63 | */ |
64 | static void |
65 | TidExprListCreate(TidScanState *tidstate) |
66 | { |
67 | TidScan *node = (TidScan *) tidstate->ss.ps.plan; |
68 | ListCell *l; |
69 | |
70 | tidstate->tss_tidexprs = NIL; |
71 | tidstate->tss_isCurrentOf = false; |
72 | |
73 | foreach(l, node->tidquals) |
74 | { |
75 | Expr *expr = (Expr *) lfirst(l); |
76 | TidExpr *tidexpr = (TidExpr *) palloc0(sizeof(TidExpr)); |
77 | |
78 | if (is_opclause(expr)) |
79 | { |
80 | Node *arg1; |
81 | Node *arg2; |
82 | |
83 | arg1 = get_leftop(expr); |
84 | arg2 = get_rightop(expr); |
85 | if (IsCTIDVar(arg1)) |
86 | tidexpr->exprstate = ExecInitExpr((Expr *) arg2, |
87 | &tidstate->ss.ps); |
88 | else if (IsCTIDVar(arg2)) |
89 | tidexpr->exprstate = ExecInitExpr((Expr *) arg1, |
90 | &tidstate->ss.ps); |
91 | else |
92 | elog(ERROR, "could not identify CTID variable" ); |
93 | tidexpr->isarray = false; |
94 | } |
95 | else if (expr && IsA(expr, ScalarArrayOpExpr)) |
96 | { |
97 | ScalarArrayOpExpr *saex = (ScalarArrayOpExpr *) expr; |
98 | |
99 | Assert(IsCTIDVar(linitial(saex->args))); |
100 | tidexpr->exprstate = ExecInitExpr(lsecond(saex->args), |
101 | &tidstate->ss.ps); |
102 | tidexpr->isarray = true; |
103 | } |
104 | else if (expr && IsA(expr, CurrentOfExpr)) |
105 | { |
106 | CurrentOfExpr *cexpr = (CurrentOfExpr *) expr; |
107 | |
108 | tidexpr->cexpr = cexpr; |
109 | tidstate->tss_isCurrentOf = true; |
110 | } |
111 | else |
112 | elog(ERROR, "could not identify CTID expression" ); |
113 | |
114 | tidstate->tss_tidexprs = lappend(tidstate->tss_tidexprs, tidexpr); |
115 | } |
116 | |
117 | /* CurrentOfExpr could never appear OR'd with something else */ |
118 | Assert(list_length(tidstate->tss_tidexprs) == 1 || |
119 | !tidstate->tss_isCurrentOf); |
120 | } |
121 | |
122 | /* |
123 | * Compute the list of TIDs to be visited, by evaluating the expressions |
124 | * for them. |
125 | * |
126 | * (The result is actually an array, not a list.) |
127 | */ |
128 | static void |
129 | TidListEval(TidScanState *tidstate) |
130 | { |
131 | ExprContext *econtext = tidstate->ss.ps.ps_ExprContext; |
132 | TableScanDesc scan; |
133 | ItemPointerData *tidList; |
134 | int numAllocTids; |
135 | int numTids; |
136 | ListCell *l; |
137 | |
138 | /* |
139 | * Start scan on-demand - initializing a scan isn't free (e.g. heap stats |
140 | * the size of the table), so it makes sense to delay that until needed - |
141 | * the node might never get executed. |
142 | */ |
143 | if (tidstate->ss.ss_currentScanDesc == NULL) |
144 | tidstate->ss.ss_currentScanDesc = |
145 | table_beginscan(tidstate->ss.ss_currentRelation, |
146 | tidstate->ss.ps.state->es_snapshot, |
147 | 0, NULL); |
148 | scan = tidstate->ss.ss_currentScanDesc; |
149 | |
150 | /* |
151 | * We initialize the array with enough slots for the case that all quals |
152 | * are simple OpExprs or CurrentOfExprs. If there are any |
153 | * ScalarArrayOpExprs, we may have to enlarge the array. |
154 | */ |
155 | numAllocTids = list_length(tidstate->tss_tidexprs); |
156 | tidList = (ItemPointerData *) |
157 | palloc(numAllocTids * sizeof(ItemPointerData)); |
158 | numTids = 0; |
159 | |
160 | foreach(l, tidstate->tss_tidexprs) |
161 | { |
162 | TidExpr *tidexpr = (TidExpr *) lfirst(l); |
163 | ItemPointer itemptr; |
164 | bool isNull; |
165 | |
166 | if (tidexpr->exprstate && !tidexpr->isarray) |
167 | { |
168 | itemptr = (ItemPointer) |
169 | DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate, |
170 | econtext, |
171 | &isNull)); |
172 | if (isNull) |
173 | continue; |
174 | |
175 | /* |
176 | * We silently discard any TIDs that the AM considers invalid |
177 | * (E.g. for heap, they could be out of range at the time of scan |
178 | * start. Since we hold at least AccessShareLock on the table, it |
179 | * won't be possible for someone to truncate away the blocks we |
180 | * intend to visit.). |
181 | */ |
182 | if (!table_tuple_tid_valid(scan, itemptr)) |
183 | continue; |
184 | |
185 | if (numTids >= numAllocTids) |
186 | { |
187 | numAllocTids *= 2; |
188 | tidList = (ItemPointerData *) |
189 | repalloc(tidList, |
190 | numAllocTids * sizeof(ItemPointerData)); |
191 | } |
192 | tidList[numTids++] = *itemptr; |
193 | } |
194 | else if (tidexpr->exprstate && tidexpr->isarray) |
195 | { |
196 | Datum arraydatum; |
197 | ArrayType *itemarray; |
198 | Datum *ipdatums; |
199 | bool *ipnulls; |
200 | int ndatums; |
201 | int i; |
202 | |
203 | arraydatum = ExecEvalExprSwitchContext(tidexpr->exprstate, |
204 | econtext, |
205 | &isNull); |
206 | if (isNull) |
207 | continue; |
208 | itemarray = DatumGetArrayTypeP(arraydatum); |
209 | deconstruct_array(itemarray, |
210 | TIDOID, sizeof(ItemPointerData), false, 's', |
211 | &ipdatums, &ipnulls, &ndatums); |
212 | if (numTids + ndatums > numAllocTids) |
213 | { |
214 | numAllocTids = numTids + ndatums; |
215 | tidList = (ItemPointerData *) |
216 | repalloc(tidList, |
217 | numAllocTids * sizeof(ItemPointerData)); |
218 | } |
219 | for (i = 0; i < ndatums; i++) |
220 | { |
221 | if (ipnulls[i]) |
222 | continue; |
223 | |
224 | itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]); |
225 | |
226 | if (!table_tuple_tid_valid(scan, itemptr)) |
227 | continue; |
228 | |
229 | tidList[numTids++] = *itemptr; |
230 | } |
231 | pfree(ipdatums); |
232 | pfree(ipnulls); |
233 | } |
234 | else |
235 | { |
236 | ItemPointerData cursor_tid; |
237 | |
238 | Assert(tidexpr->cexpr); |
239 | if (execCurrentOf(tidexpr->cexpr, econtext, |
240 | RelationGetRelid(tidstate->ss.ss_currentRelation), |
241 | &cursor_tid)) |
242 | { |
243 | if (numTids >= numAllocTids) |
244 | { |
245 | numAllocTids *= 2; |
246 | tidList = (ItemPointerData *) |
247 | repalloc(tidList, |
248 | numAllocTids * sizeof(ItemPointerData)); |
249 | } |
250 | tidList[numTids++] = cursor_tid; |
251 | } |
252 | } |
253 | } |
254 | |
255 | /* |
256 | * Sort the array of TIDs into order, and eliminate duplicates. |
257 | * Eliminating duplicates is necessary since we want OR semantics across |
258 | * the list. Sorting makes it easier to detect duplicates, and as a bonus |
259 | * ensures that we will visit the heap in the most efficient way. |
260 | */ |
261 | if (numTids > 1) |
262 | { |
263 | int lastTid; |
264 | int i; |
265 | |
266 | /* CurrentOfExpr could never appear OR'd with something else */ |
267 | Assert(!tidstate->tss_isCurrentOf); |
268 | |
269 | qsort((void *) tidList, numTids, sizeof(ItemPointerData), |
270 | itemptr_comparator); |
271 | lastTid = 0; |
272 | for (i = 1; i < numTids; i++) |
273 | { |
274 | if (!ItemPointerEquals(&tidList[lastTid], &tidList[i])) |
275 | tidList[++lastTid] = tidList[i]; |
276 | } |
277 | numTids = lastTid + 1; |
278 | } |
279 | |
280 | tidstate->tss_TidList = tidList; |
281 | tidstate->tss_NumTids = numTids; |
282 | tidstate->tss_TidPtr = -1; |
283 | } |
284 | |
285 | /* |
286 | * qsort comparator for ItemPointerData items |
287 | */ |
288 | static int |
289 | itemptr_comparator(const void *a, const void *b) |
290 | { |
291 | const ItemPointerData *ipa = (const ItemPointerData *) a; |
292 | const ItemPointerData *ipb = (const ItemPointerData *) b; |
293 | BlockNumber ba = ItemPointerGetBlockNumber(ipa); |
294 | BlockNumber bb = ItemPointerGetBlockNumber(ipb); |
295 | OffsetNumber oa = ItemPointerGetOffsetNumber(ipa); |
296 | OffsetNumber ob = ItemPointerGetOffsetNumber(ipb); |
297 | |
298 | if (ba < bb) |
299 | return -1; |
300 | if (ba > bb) |
301 | return 1; |
302 | if (oa < ob) |
303 | return -1; |
304 | if (oa > ob) |
305 | return 1; |
306 | return 0; |
307 | } |
308 | |
309 | /* ---------------------------------------------------------------- |
310 | * TidNext |
311 | * |
312 | * Retrieve a tuple from the TidScan node's currentRelation |
313 | * using the tids in the TidScanState information. |
314 | * |
315 | * ---------------------------------------------------------------- |
316 | */ |
317 | static TupleTableSlot * |
318 | TidNext(TidScanState *node) |
319 | { |
320 | EState *estate; |
321 | ScanDirection direction; |
322 | Snapshot snapshot; |
323 | TableScanDesc scan; |
324 | Relation heapRelation; |
325 | TupleTableSlot *slot; |
326 | ItemPointerData *tidList; |
327 | int numTids; |
328 | bool bBackward; |
329 | |
330 | /* |
331 | * extract necessary information from tid scan node |
332 | */ |
333 | estate = node->ss.ps.state; |
334 | direction = estate->es_direction; |
335 | snapshot = estate->es_snapshot; |
336 | heapRelation = node->ss.ss_currentRelation; |
337 | slot = node->ss.ss_ScanTupleSlot; |
338 | |
339 | /* |
340 | * First time through, compute the list of TIDs to be visited |
341 | */ |
342 | if (node->tss_TidList == NULL) |
343 | TidListEval(node); |
344 | |
345 | scan = node->ss.ss_currentScanDesc; |
346 | tidList = node->tss_TidList; |
347 | numTids = node->tss_NumTids; |
348 | |
349 | /* |
350 | * Initialize or advance scan position, depending on direction. |
351 | */ |
352 | bBackward = ScanDirectionIsBackward(direction); |
353 | if (bBackward) |
354 | { |
355 | if (node->tss_TidPtr < 0) |
356 | { |
357 | /* initialize for backward scan */ |
358 | node->tss_TidPtr = numTids - 1; |
359 | } |
360 | else |
361 | node->tss_TidPtr--; |
362 | } |
363 | else |
364 | { |
365 | if (node->tss_TidPtr < 0) |
366 | { |
367 | /* initialize for forward scan */ |
368 | node->tss_TidPtr = 0; |
369 | } |
370 | else |
371 | node->tss_TidPtr++; |
372 | } |
373 | |
374 | while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids) |
375 | { |
376 | ItemPointerData tid = tidList[node->tss_TidPtr]; |
377 | |
378 | /* |
379 | * For WHERE CURRENT OF, the tuple retrieved from the cursor might |
380 | * since have been updated; if so, we should fetch the version that is |
381 | * current according to our snapshot. |
382 | */ |
383 | if (node->tss_isCurrentOf) |
384 | table_tuple_get_latest_tid(scan, &tid); |
385 | |
386 | if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot)) |
387 | return slot; |
388 | |
389 | /* Bad TID or failed snapshot qual; try next */ |
390 | if (bBackward) |
391 | node->tss_TidPtr--; |
392 | else |
393 | node->tss_TidPtr++; |
394 | |
395 | CHECK_FOR_INTERRUPTS(); |
396 | } |
397 | |
398 | /* |
399 | * if we get here it means the tid scan failed so we are at the end of the |
400 | * scan.. |
401 | */ |
402 | return ExecClearTuple(slot); |
403 | } |
404 | |
405 | /* |
406 | * TidRecheck -- access method routine to recheck a tuple in EvalPlanQual |
407 | */ |
408 | static bool |
409 | TidRecheck(TidScanState *node, TupleTableSlot *slot) |
410 | { |
411 | /* |
412 | * XXX shouldn't we check here to make sure tuple matches TID list? In |
413 | * runtime-key case this is not certain, is it? However, in the WHERE |
414 | * CURRENT OF case it might not match anyway ... |
415 | */ |
416 | return true; |
417 | } |
418 | |
419 | |
420 | /* ---------------------------------------------------------------- |
421 | * ExecTidScan(node) |
422 | * |
423 | * Scans the relation using tids and returns |
424 | * the next qualifying tuple in the direction specified. |
425 | * We call the ExecScan() routine and pass it the appropriate |
426 | * access method functions. |
427 | * |
428 | * Conditions: |
429 | * -- the "cursor" maintained by the AMI is positioned at the tuple |
430 | * returned previously. |
431 | * |
432 | * Initial States: |
433 | * -- the relation indicated is opened for scanning so that the |
434 | * "cursor" is positioned before the first qualifying tuple. |
435 | * -- tidPtr is -1. |
436 | * ---------------------------------------------------------------- |
437 | */ |
438 | static TupleTableSlot * |
439 | ExecTidScan(PlanState *pstate) |
440 | { |
441 | TidScanState *node = castNode(TidScanState, pstate); |
442 | |
443 | return ExecScan(&node->ss, |
444 | (ExecScanAccessMtd) TidNext, |
445 | (ExecScanRecheckMtd) TidRecheck); |
446 | } |
447 | |
448 | /* ---------------------------------------------------------------- |
449 | * ExecReScanTidScan(node) |
450 | * ---------------------------------------------------------------- |
451 | */ |
452 | void |
453 | ExecReScanTidScan(TidScanState *node) |
454 | { |
455 | if (node->tss_TidList) |
456 | pfree(node->tss_TidList); |
457 | node->tss_TidList = NULL; |
458 | node->tss_NumTids = 0; |
459 | node->tss_TidPtr = -1; |
460 | |
461 | /* not really necessary, but seems good form */ |
462 | if (node->ss.ss_currentScanDesc) |
463 | table_rescan(node->ss.ss_currentScanDesc, NULL); |
464 | |
465 | ExecScanReScan(&node->ss); |
466 | } |
467 | |
468 | /* ---------------------------------------------------------------- |
469 | * ExecEndTidScan |
470 | * |
471 | * Releases any storage allocated through C routines. |
472 | * Returns nothing. |
473 | * ---------------------------------------------------------------- |
474 | */ |
475 | void |
476 | ExecEndTidScan(TidScanState *node) |
477 | { |
478 | if (node->ss.ss_currentScanDesc) |
479 | table_endscan(node->ss.ss_currentScanDesc); |
480 | |
481 | /* |
482 | * Free the exprcontext |
483 | */ |
484 | ExecFreeExprContext(&node->ss.ps); |
485 | |
486 | /* |
487 | * clear out tuple table slots |
488 | */ |
489 | if (node->ss.ps.ps_ResultTupleSlot) |
490 | ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); |
491 | ExecClearTuple(node->ss.ss_ScanTupleSlot); |
492 | } |
493 | |
494 | /* ---------------------------------------------------------------- |
495 | * ExecInitTidScan |
496 | * |
497 | * Initializes the tid scan's state information, creates |
498 | * scan keys, and opens the base and tid relations. |
499 | * |
500 | * Parameters: |
501 | * node: TidNode node produced by the planner. |
502 | * estate: the execution state initialized in InitPlan. |
503 | * ---------------------------------------------------------------- |
504 | */ |
505 | TidScanState * |
506 | ExecInitTidScan(TidScan *node, EState *estate, int eflags) |
507 | { |
508 | TidScanState *tidstate; |
509 | Relation currentRelation; |
510 | |
511 | /* |
512 | * create state structure |
513 | */ |
514 | tidstate = makeNode(TidScanState); |
515 | tidstate->ss.ps.plan = (Plan *) node; |
516 | tidstate->ss.ps.state = estate; |
517 | tidstate->ss.ps.ExecProcNode = ExecTidScan; |
518 | |
519 | /* |
520 | * Miscellaneous initialization |
521 | * |
522 | * create expression context for node |
523 | */ |
524 | ExecAssignExprContext(estate, &tidstate->ss.ps); |
525 | |
526 | /* |
527 | * mark tid list as not computed yet |
528 | */ |
529 | tidstate->tss_TidList = NULL; |
530 | tidstate->tss_NumTids = 0; |
531 | tidstate->tss_TidPtr = -1; |
532 | |
533 | /* |
534 | * open the scan relation |
535 | */ |
536 | currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); |
537 | |
538 | tidstate->ss.ss_currentRelation = currentRelation; |
539 | tidstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */ |
540 | |
541 | /* |
542 | * get the scan type from the relation descriptor. |
543 | */ |
544 | ExecInitScanTupleSlot(estate, &tidstate->ss, |
545 | RelationGetDescr(currentRelation), |
546 | table_slot_callbacks(currentRelation)); |
547 | |
548 | /* |
549 | * Initialize result type and projection. |
550 | */ |
551 | ExecInitResultTypeTL(&tidstate->ss.ps); |
552 | ExecAssignScanProjectionInfo(&tidstate->ss); |
553 | |
554 | /* |
555 | * initialize child expressions |
556 | */ |
557 | tidstate->ss.ps.qual = |
558 | ExecInitQual(node->scan.plan.qual, (PlanState *) tidstate); |
559 | |
560 | TidExprListCreate(tidstate); |
561 | |
562 | /* |
563 | * all done. |
564 | */ |
565 | return tidstate; |
566 | } |
567 | |