| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * nodeTidscan.c |
| 4 | * Routines to support direct tid scans of relations |
| 5 | * |
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 7 | * Portions Copyright (c) 1994, Regents of the University of California |
| 8 | * |
| 9 | * |
| 10 | * IDENTIFICATION |
| 11 | * src/backend/executor/nodeTidscan.c |
| 12 | * |
| 13 | *------------------------------------------------------------------------- |
| 14 | */ |
| 15 | /* |
| 16 | * INTERFACE ROUTINES |
| 17 | * |
| 18 | * ExecTidScan scans a relation using tids |
| 19 | * ExecInitTidScan creates and initializes state info. |
| 20 | * ExecReScanTidScan rescans the tid relation. |
| 21 | * ExecEndTidScan releases all storage. |
| 22 | */ |
| 23 | #include "postgres.h" |
| 24 | |
| 25 | #include "access/sysattr.h" |
| 26 | #include "access/tableam.h" |
| 27 | #include "catalog/pg_type.h" |
| 28 | #include "executor/execdebug.h" |
| 29 | #include "executor/nodeTidscan.h" |
| 30 | #include "miscadmin.h" |
| 31 | #include "nodes/nodeFuncs.h" |
| 32 | #include "storage/bufmgr.h" |
| 33 | #include "utils/array.h" |
| 34 | #include "utils/rel.h" |
| 35 | |
| 36 | |
| 37 | #define IsCTIDVar(node) \ |
| 38 | ((node) != NULL && \ |
| 39 | IsA((node), Var) && \ |
| 40 | ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \ |
| 41 | ((Var *) (node))->varlevelsup == 0) |
| 42 | |
| 43 | /* one element in tss_tidexprs */ |
| 44 | typedef struct TidExpr |
| 45 | { |
| 46 | ExprState *exprstate; /* ExprState for a TID-yielding subexpr */ |
| 47 | bool isarray; /* if true, it yields tid[] not just tid */ |
| 48 | CurrentOfExpr *cexpr; /* alternatively, we can have CURRENT OF */ |
| 49 | } TidExpr; |
| 50 | |
| 51 | static void TidExprListCreate(TidScanState *tidstate); |
| 52 | static void TidListEval(TidScanState *tidstate); |
| 53 | static int itemptr_comparator(const void *a, const void *b); |
| 54 | static TupleTableSlot *TidNext(TidScanState *node); |
| 55 | |
| 56 | |
| 57 | /* |
| 58 | * Extract the qual subexpressions that yield TIDs to search for, |
| 59 | * and compile them into ExprStates if they're ordinary expressions. |
| 60 | * |
| 61 | * CURRENT OF is a special case that we can't compile usefully; |
| 62 | * just drop it into the TidExpr list as-is. |
| 63 | */ |
| 64 | static void |
| 65 | TidExprListCreate(TidScanState *tidstate) |
| 66 | { |
| 67 | TidScan *node = (TidScan *) tidstate->ss.ps.plan; |
| 68 | ListCell *l; |
| 69 | |
| 70 | tidstate->tss_tidexprs = NIL; |
| 71 | tidstate->tss_isCurrentOf = false; |
| 72 | |
| 73 | foreach(l, node->tidquals) |
| 74 | { |
| 75 | Expr *expr = (Expr *) lfirst(l); |
| 76 | TidExpr *tidexpr = (TidExpr *) palloc0(sizeof(TidExpr)); |
| 77 | |
| 78 | if (is_opclause(expr)) |
| 79 | { |
| 80 | Node *arg1; |
| 81 | Node *arg2; |
| 82 | |
| 83 | arg1 = get_leftop(expr); |
| 84 | arg2 = get_rightop(expr); |
| 85 | if (IsCTIDVar(arg1)) |
| 86 | tidexpr->exprstate = ExecInitExpr((Expr *) arg2, |
| 87 | &tidstate->ss.ps); |
| 88 | else if (IsCTIDVar(arg2)) |
| 89 | tidexpr->exprstate = ExecInitExpr((Expr *) arg1, |
| 90 | &tidstate->ss.ps); |
| 91 | else |
| 92 | elog(ERROR, "could not identify CTID variable" ); |
| 93 | tidexpr->isarray = false; |
| 94 | } |
| 95 | else if (expr && IsA(expr, ScalarArrayOpExpr)) |
| 96 | { |
| 97 | ScalarArrayOpExpr *saex = (ScalarArrayOpExpr *) expr; |
| 98 | |
| 99 | Assert(IsCTIDVar(linitial(saex->args))); |
| 100 | tidexpr->exprstate = ExecInitExpr(lsecond(saex->args), |
| 101 | &tidstate->ss.ps); |
| 102 | tidexpr->isarray = true; |
| 103 | } |
| 104 | else if (expr && IsA(expr, CurrentOfExpr)) |
| 105 | { |
| 106 | CurrentOfExpr *cexpr = (CurrentOfExpr *) expr; |
| 107 | |
| 108 | tidexpr->cexpr = cexpr; |
| 109 | tidstate->tss_isCurrentOf = true; |
| 110 | } |
| 111 | else |
| 112 | elog(ERROR, "could not identify CTID expression" ); |
| 113 | |
| 114 | tidstate->tss_tidexprs = lappend(tidstate->tss_tidexprs, tidexpr); |
| 115 | } |
| 116 | |
| 117 | /* CurrentOfExpr could never appear OR'd with something else */ |
| 118 | Assert(list_length(tidstate->tss_tidexprs) == 1 || |
| 119 | !tidstate->tss_isCurrentOf); |
| 120 | } |
| 121 | |
| 122 | /* |
| 123 | * Compute the list of TIDs to be visited, by evaluating the expressions |
| 124 | * for them. |
| 125 | * |
| 126 | * (The result is actually an array, not a list.) |
| 127 | */ |
| 128 | static void |
| 129 | TidListEval(TidScanState *tidstate) |
| 130 | { |
| 131 | ExprContext *econtext = tidstate->ss.ps.ps_ExprContext; |
| 132 | TableScanDesc scan; |
| 133 | ItemPointerData *tidList; |
| 134 | int numAllocTids; |
| 135 | int numTids; |
| 136 | ListCell *l; |
| 137 | |
| 138 | /* |
| 139 | * Start scan on-demand - initializing a scan isn't free (e.g. heap stats |
| 140 | * the size of the table), so it makes sense to delay that until needed - |
| 141 | * the node might never get executed. |
| 142 | */ |
| 143 | if (tidstate->ss.ss_currentScanDesc == NULL) |
| 144 | tidstate->ss.ss_currentScanDesc = |
| 145 | table_beginscan(tidstate->ss.ss_currentRelation, |
| 146 | tidstate->ss.ps.state->es_snapshot, |
| 147 | 0, NULL); |
| 148 | scan = tidstate->ss.ss_currentScanDesc; |
| 149 | |
| 150 | /* |
| 151 | * We initialize the array with enough slots for the case that all quals |
| 152 | * are simple OpExprs or CurrentOfExprs. If there are any |
| 153 | * ScalarArrayOpExprs, we may have to enlarge the array. |
| 154 | */ |
| 155 | numAllocTids = list_length(tidstate->tss_tidexprs); |
| 156 | tidList = (ItemPointerData *) |
| 157 | palloc(numAllocTids * sizeof(ItemPointerData)); |
| 158 | numTids = 0; |
| 159 | |
| 160 | foreach(l, tidstate->tss_tidexprs) |
| 161 | { |
| 162 | TidExpr *tidexpr = (TidExpr *) lfirst(l); |
| 163 | ItemPointer itemptr; |
| 164 | bool isNull; |
| 165 | |
| 166 | if (tidexpr->exprstate && !tidexpr->isarray) |
| 167 | { |
| 168 | itemptr = (ItemPointer) |
| 169 | DatumGetPointer(ExecEvalExprSwitchContext(tidexpr->exprstate, |
| 170 | econtext, |
| 171 | &isNull)); |
| 172 | if (isNull) |
| 173 | continue; |
| 174 | |
| 175 | /* |
| 176 | * We silently discard any TIDs that the AM considers invalid |
| 177 | * (E.g. for heap, they could be out of range at the time of scan |
| 178 | * start. Since we hold at least AccessShareLock on the table, it |
| 179 | * won't be possible for someone to truncate away the blocks we |
| 180 | * intend to visit.). |
| 181 | */ |
| 182 | if (!table_tuple_tid_valid(scan, itemptr)) |
| 183 | continue; |
| 184 | |
| 185 | if (numTids >= numAllocTids) |
| 186 | { |
| 187 | numAllocTids *= 2; |
| 188 | tidList = (ItemPointerData *) |
| 189 | repalloc(tidList, |
| 190 | numAllocTids * sizeof(ItemPointerData)); |
| 191 | } |
| 192 | tidList[numTids++] = *itemptr; |
| 193 | } |
| 194 | else if (tidexpr->exprstate && tidexpr->isarray) |
| 195 | { |
| 196 | Datum arraydatum; |
| 197 | ArrayType *itemarray; |
| 198 | Datum *ipdatums; |
| 199 | bool *ipnulls; |
| 200 | int ndatums; |
| 201 | int i; |
| 202 | |
| 203 | arraydatum = ExecEvalExprSwitchContext(tidexpr->exprstate, |
| 204 | econtext, |
| 205 | &isNull); |
| 206 | if (isNull) |
| 207 | continue; |
| 208 | itemarray = DatumGetArrayTypeP(arraydatum); |
| 209 | deconstruct_array(itemarray, |
| 210 | TIDOID, sizeof(ItemPointerData), false, 's', |
| 211 | &ipdatums, &ipnulls, &ndatums); |
| 212 | if (numTids + ndatums > numAllocTids) |
| 213 | { |
| 214 | numAllocTids = numTids + ndatums; |
| 215 | tidList = (ItemPointerData *) |
| 216 | repalloc(tidList, |
| 217 | numAllocTids * sizeof(ItemPointerData)); |
| 218 | } |
| 219 | for (i = 0; i < ndatums; i++) |
| 220 | { |
| 221 | if (ipnulls[i]) |
| 222 | continue; |
| 223 | |
| 224 | itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]); |
| 225 | |
| 226 | if (!table_tuple_tid_valid(scan, itemptr)) |
| 227 | continue; |
| 228 | |
| 229 | tidList[numTids++] = *itemptr; |
| 230 | } |
| 231 | pfree(ipdatums); |
| 232 | pfree(ipnulls); |
| 233 | } |
| 234 | else |
| 235 | { |
| 236 | ItemPointerData cursor_tid; |
| 237 | |
| 238 | Assert(tidexpr->cexpr); |
| 239 | if (execCurrentOf(tidexpr->cexpr, econtext, |
| 240 | RelationGetRelid(tidstate->ss.ss_currentRelation), |
| 241 | &cursor_tid)) |
| 242 | { |
| 243 | if (numTids >= numAllocTids) |
| 244 | { |
| 245 | numAllocTids *= 2; |
| 246 | tidList = (ItemPointerData *) |
| 247 | repalloc(tidList, |
| 248 | numAllocTids * sizeof(ItemPointerData)); |
| 249 | } |
| 250 | tidList[numTids++] = cursor_tid; |
| 251 | } |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | /* |
| 256 | * Sort the array of TIDs into order, and eliminate duplicates. |
| 257 | * Eliminating duplicates is necessary since we want OR semantics across |
| 258 | * the list. Sorting makes it easier to detect duplicates, and as a bonus |
| 259 | * ensures that we will visit the heap in the most efficient way. |
| 260 | */ |
| 261 | if (numTids > 1) |
| 262 | { |
| 263 | int lastTid; |
| 264 | int i; |
| 265 | |
| 266 | /* CurrentOfExpr could never appear OR'd with something else */ |
| 267 | Assert(!tidstate->tss_isCurrentOf); |
| 268 | |
| 269 | qsort((void *) tidList, numTids, sizeof(ItemPointerData), |
| 270 | itemptr_comparator); |
| 271 | lastTid = 0; |
| 272 | for (i = 1; i < numTids; i++) |
| 273 | { |
| 274 | if (!ItemPointerEquals(&tidList[lastTid], &tidList[i])) |
| 275 | tidList[++lastTid] = tidList[i]; |
| 276 | } |
| 277 | numTids = lastTid + 1; |
| 278 | } |
| 279 | |
| 280 | tidstate->tss_TidList = tidList; |
| 281 | tidstate->tss_NumTids = numTids; |
| 282 | tidstate->tss_TidPtr = -1; |
| 283 | } |
| 284 | |
| 285 | /* |
| 286 | * qsort comparator for ItemPointerData items |
| 287 | */ |
| 288 | static int |
| 289 | itemptr_comparator(const void *a, const void *b) |
| 290 | { |
| 291 | const ItemPointerData *ipa = (const ItemPointerData *) a; |
| 292 | const ItemPointerData *ipb = (const ItemPointerData *) b; |
| 293 | BlockNumber ba = ItemPointerGetBlockNumber(ipa); |
| 294 | BlockNumber bb = ItemPointerGetBlockNumber(ipb); |
| 295 | OffsetNumber oa = ItemPointerGetOffsetNumber(ipa); |
| 296 | OffsetNumber ob = ItemPointerGetOffsetNumber(ipb); |
| 297 | |
| 298 | if (ba < bb) |
| 299 | return -1; |
| 300 | if (ba > bb) |
| 301 | return 1; |
| 302 | if (oa < ob) |
| 303 | return -1; |
| 304 | if (oa > ob) |
| 305 | return 1; |
| 306 | return 0; |
| 307 | } |
| 308 | |
| 309 | /* ---------------------------------------------------------------- |
| 310 | * TidNext |
| 311 | * |
| 312 | * Retrieve a tuple from the TidScan node's currentRelation |
| 313 | * using the tids in the TidScanState information. |
| 314 | * |
| 315 | * ---------------------------------------------------------------- |
| 316 | */ |
| 317 | static TupleTableSlot * |
| 318 | TidNext(TidScanState *node) |
| 319 | { |
| 320 | EState *estate; |
| 321 | ScanDirection direction; |
| 322 | Snapshot snapshot; |
| 323 | TableScanDesc scan; |
| 324 | Relation heapRelation; |
| 325 | TupleTableSlot *slot; |
| 326 | ItemPointerData *tidList; |
| 327 | int numTids; |
| 328 | bool bBackward; |
| 329 | |
| 330 | /* |
| 331 | * extract necessary information from tid scan node |
| 332 | */ |
| 333 | estate = node->ss.ps.state; |
| 334 | direction = estate->es_direction; |
| 335 | snapshot = estate->es_snapshot; |
| 336 | heapRelation = node->ss.ss_currentRelation; |
| 337 | slot = node->ss.ss_ScanTupleSlot; |
| 338 | |
| 339 | /* |
| 340 | * First time through, compute the list of TIDs to be visited |
| 341 | */ |
| 342 | if (node->tss_TidList == NULL) |
| 343 | TidListEval(node); |
| 344 | |
| 345 | scan = node->ss.ss_currentScanDesc; |
| 346 | tidList = node->tss_TidList; |
| 347 | numTids = node->tss_NumTids; |
| 348 | |
| 349 | /* |
| 350 | * Initialize or advance scan position, depending on direction. |
| 351 | */ |
| 352 | bBackward = ScanDirectionIsBackward(direction); |
| 353 | if (bBackward) |
| 354 | { |
| 355 | if (node->tss_TidPtr < 0) |
| 356 | { |
| 357 | /* initialize for backward scan */ |
| 358 | node->tss_TidPtr = numTids - 1; |
| 359 | } |
| 360 | else |
| 361 | node->tss_TidPtr--; |
| 362 | } |
| 363 | else |
| 364 | { |
| 365 | if (node->tss_TidPtr < 0) |
| 366 | { |
| 367 | /* initialize for forward scan */ |
| 368 | node->tss_TidPtr = 0; |
| 369 | } |
| 370 | else |
| 371 | node->tss_TidPtr++; |
| 372 | } |
| 373 | |
| 374 | while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids) |
| 375 | { |
| 376 | ItemPointerData tid = tidList[node->tss_TidPtr]; |
| 377 | |
| 378 | /* |
| 379 | * For WHERE CURRENT OF, the tuple retrieved from the cursor might |
| 380 | * since have been updated; if so, we should fetch the version that is |
| 381 | * current according to our snapshot. |
| 382 | */ |
| 383 | if (node->tss_isCurrentOf) |
| 384 | table_tuple_get_latest_tid(scan, &tid); |
| 385 | |
| 386 | if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot)) |
| 387 | return slot; |
| 388 | |
| 389 | /* Bad TID or failed snapshot qual; try next */ |
| 390 | if (bBackward) |
| 391 | node->tss_TidPtr--; |
| 392 | else |
| 393 | node->tss_TidPtr++; |
| 394 | |
| 395 | CHECK_FOR_INTERRUPTS(); |
| 396 | } |
| 397 | |
| 398 | /* |
| 399 | * if we get here it means the tid scan failed so we are at the end of the |
| 400 | * scan.. |
| 401 | */ |
| 402 | return ExecClearTuple(slot); |
| 403 | } |
| 404 | |
| 405 | /* |
| 406 | * TidRecheck -- access method routine to recheck a tuple in EvalPlanQual |
| 407 | */ |
| 408 | static bool |
| 409 | TidRecheck(TidScanState *node, TupleTableSlot *slot) |
| 410 | { |
| 411 | /* |
| 412 | * XXX shouldn't we check here to make sure tuple matches TID list? In |
| 413 | * runtime-key case this is not certain, is it? However, in the WHERE |
| 414 | * CURRENT OF case it might not match anyway ... |
| 415 | */ |
| 416 | return true; |
| 417 | } |
| 418 | |
| 419 | |
| 420 | /* ---------------------------------------------------------------- |
| 421 | * ExecTidScan(node) |
| 422 | * |
| 423 | * Scans the relation using tids and returns |
| 424 | * the next qualifying tuple in the direction specified. |
| 425 | * We call the ExecScan() routine and pass it the appropriate |
| 426 | * access method functions. |
| 427 | * |
| 428 | * Conditions: |
| 429 | * -- the "cursor" maintained by the AMI is positioned at the tuple |
| 430 | * returned previously. |
| 431 | * |
| 432 | * Initial States: |
| 433 | * -- the relation indicated is opened for scanning so that the |
| 434 | * "cursor" is positioned before the first qualifying tuple. |
| 435 | * -- tidPtr is -1. |
| 436 | * ---------------------------------------------------------------- |
| 437 | */ |
| 438 | static TupleTableSlot * |
| 439 | ExecTidScan(PlanState *pstate) |
| 440 | { |
| 441 | TidScanState *node = castNode(TidScanState, pstate); |
| 442 | |
| 443 | return ExecScan(&node->ss, |
| 444 | (ExecScanAccessMtd) TidNext, |
| 445 | (ExecScanRecheckMtd) TidRecheck); |
| 446 | } |
| 447 | |
| 448 | /* ---------------------------------------------------------------- |
| 449 | * ExecReScanTidScan(node) |
| 450 | * ---------------------------------------------------------------- |
| 451 | */ |
| 452 | void |
| 453 | ExecReScanTidScan(TidScanState *node) |
| 454 | { |
| 455 | if (node->tss_TidList) |
| 456 | pfree(node->tss_TidList); |
| 457 | node->tss_TidList = NULL; |
| 458 | node->tss_NumTids = 0; |
| 459 | node->tss_TidPtr = -1; |
| 460 | |
| 461 | /* not really necessary, but seems good form */ |
| 462 | if (node->ss.ss_currentScanDesc) |
| 463 | table_rescan(node->ss.ss_currentScanDesc, NULL); |
| 464 | |
| 465 | ExecScanReScan(&node->ss); |
| 466 | } |
| 467 | |
| 468 | /* ---------------------------------------------------------------- |
| 469 | * ExecEndTidScan |
| 470 | * |
| 471 | * Releases any storage allocated through C routines. |
| 472 | * Returns nothing. |
| 473 | * ---------------------------------------------------------------- |
| 474 | */ |
| 475 | void |
| 476 | ExecEndTidScan(TidScanState *node) |
| 477 | { |
| 478 | if (node->ss.ss_currentScanDesc) |
| 479 | table_endscan(node->ss.ss_currentScanDesc); |
| 480 | |
| 481 | /* |
| 482 | * Free the exprcontext |
| 483 | */ |
| 484 | ExecFreeExprContext(&node->ss.ps); |
| 485 | |
| 486 | /* |
| 487 | * clear out tuple table slots |
| 488 | */ |
| 489 | if (node->ss.ps.ps_ResultTupleSlot) |
| 490 | ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); |
| 491 | ExecClearTuple(node->ss.ss_ScanTupleSlot); |
| 492 | } |
| 493 | |
| 494 | /* ---------------------------------------------------------------- |
| 495 | * ExecInitTidScan |
| 496 | * |
| 497 | * Initializes the tid scan's state information, creates |
| 498 | * scan keys, and opens the base and tid relations. |
| 499 | * |
| 500 | * Parameters: |
| 501 | * node: TidNode node produced by the planner. |
| 502 | * estate: the execution state initialized in InitPlan. |
| 503 | * ---------------------------------------------------------------- |
| 504 | */ |
| 505 | TidScanState * |
| 506 | ExecInitTidScan(TidScan *node, EState *estate, int eflags) |
| 507 | { |
| 508 | TidScanState *tidstate; |
| 509 | Relation currentRelation; |
| 510 | |
| 511 | /* |
| 512 | * create state structure |
| 513 | */ |
| 514 | tidstate = makeNode(TidScanState); |
| 515 | tidstate->ss.ps.plan = (Plan *) node; |
| 516 | tidstate->ss.ps.state = estate; |
| 517 | tidstate->ss.ps.ExecProcNode = ExecTidScan; |
| 518 | |
| 519 | /* |
| 520 | * Miscellaneous initialization |
| 521 | * |
| 522 | * create expression context for node |
| 523 | */ |
| 524 | ExecAssignExprContext(estate, &tidstate->ss.ps); |
| 525 | |
| 526 | /* |
| 527 | * mark tid list as not computed yet |
| 528 | */ |
| 529 | tidstate->tss_TidList = NULL; |
| 530 | tidstate->tss_NumTids = 0; |
| 531 | tidstate->tss_TidPtr = -1; |
| 532 | |
| 533 | /* |
| 534 | * open the scan relation |
| 535 | */ |
| 536 | currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); |
| 537 | |
| 538 | tidstate->ss.ss_currentRelation = currentRelation; |
| 539 | tidstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */ |
| 540 | |
| 541 | /* |
| 542 | * get the scan type from the relation descriptor. |
| 543 | */ |
| 544 | ExecInitScanTupleSlot(estate, &tidstate->ss, |
| 545 | RelationGetDescr(currentRelation), |
| 546 | table_slot_callbacks(currentRelation)); |
| 547 | |
| 548 | /* |
| 549 | * Initialize result type and projection. |
| 550 | */ |
| 551 | ExecInitResultTypeTL(&tidstate->ss.ps); |
| 552 | ExecAssignScanProjectionInfo(&tidstate->ss); |
| 553 | |
| 554 | /* |
| 555 | * initialize child expressions |
| 556 | */ |
| 557 | tidstate->ss.ps.qual = |
| 558 | ExecInitQual(node->scan.plan.qual, (PlanState *) tidstate); |
| 559 | |
| 560 | TidExprListCreate(tidstate); |
| 561 | |
| 562 | /* |
| 563 | * all done. |
| 564 | */ |
| 565 | return tidstate; |
| 566 | } |
| 567 | |