1/*-------------------------------------------------------------------------
2 *
3 * nodeBitmapHeapscan.c
4 * Routines to support bitmapped scans of relations
5 *
6 * NOTE: it is critical that this plan type only be used with MVCC-compliant
7 * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8 * special snapshots). The reason is that since index and heap scans are
9 * decoupled, there can be no assurance that the index tuple prompting a
10 * visit to a particular heap TID still exists when the visit is made.
11 * Therefore the tuple might not exist anymore either (which is OK because
12 * heap_fetch will cope) --- but worse, the tuple slot could have been
13 * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14 * certain to fail the time qual and so it will not be mistakenly returned,
15 * but with anything else we might return a tuple that doesn't meet the
16 * required index qual conditions.
17 *
18 *
19 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
20 * Portions Copyright (c) 1994, Regents of the University of California
21 *
22 *
23 * IDENTIFICATION
24 * src/backend/executor/nodeBitmapHeapscan.c
25 *
26 *-------------------------------------------------------------------------
27 */
28/*
29 * INTERFACE ROUTINES
30 * ExecBitmapHeapScan scans a relation using bitmap info
31 * ExecBitmapHeapNext workhorse for above
32 * ExecInitBitmapHeapScan creates and initializes state info.
33 * ExecReScanBitmapHeapScan prepares to rescan the plan.
34 * ExecEndBitmapHeapScan releases all storage.
35 */
36#include "postgres.h"
37
38#include <math.h>
39
40#include "access/relscan.h"
41#include "access/tableam.h"
42#include "access/transam.h"
43#include "access/visibilitymap.h"
44#include "executor/execdebug.h"
45#include "executor/nodeBitmapHeapscan.h"
46#include "miscadmin.h"
47#include "pgstat.h"
48#include "storage/bufmgr.h"
49#include "storage/predicate.h"
50#include "utils/memutils.h"
51#include "utils/rel.h"
52#include "utils/spccache.h"
53#include "utils/snapmgr.h"
54
55
56static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
57static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
58static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
59 TBMIterateResult *tbmres);
60static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
61static inline void BitmapPrefetch(BitmapHeapScanState *node,
62 TableScanDesc scan);
63static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
64
65
66/* ----------------------------------------------------------------
67 * BitmapHeapNext
68 *
69 * Retrieve next tuple from the BitmapHeapScan node's currentRelation
70 * ----------------------------------------------------------------
71 */
72static TupleTableSlot *
73BitmapHeapNext(BitmapHeapScanState *node)
74{
75 ExprContext *econtext;
76 TableScanDesc scan;
77 TIDBitmap *tbm;
78 TBMIterator *tbmiterator = NULL;
79 TBMSharedIterator *shared_tbmiterator = NULL;
80 TBMIterateResult *tbmres;
81 TupleTableSlot *slot;
82 ParallelBitmapHeapState *pstate = node->pstate;
83 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
84
85 /*
86 * extract necessary information from index scan node
87 */
88 econtext = node->ss.ps.ps_ExprContext;
89 slot = node->ss.ss_ScanTupleSlot;
90 scan = node->ss.ss_currentScanDesc;
91 tbm = node->tbm;
92 if (pstate == NULL)
93 tbmiterator = node->tbmiterator;
94 else
95 shared_tbmiterator = node->shared_tbmiterator;
96 tbmres = node->tbmres;
97
98 /*
99 * If we haven't yet performed the underlying index scan, do it, and begin
100 * the iteration over the bitmap.
101 *
102 * For prefetching, we use *two* iterators, one for the pages we are
103 * actually scanning and another that runs ahead of the first for
104 * prefetching. node->prefetch_pages tracks exactly how many pages ahead
105 * the prefetch iterator is. Also, node->prefetch_target tracks the
106 * desired prefetch distance, which starts small and increases up to the
107 * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
108 * a scan that stops after a few tuples because of a LIMIT.
109 */
110 if (!node->initialized)
111 {
112 if (!pstate)
113 {
114 tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
115
116 if (!tbm || !IsA(tbm, TIDBitmap))
117 elog(ERROR, "unrecognized result from subplan");
118
119 node->tbm = tbm;
120 node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
121 node->tbmres = tbmres = NULL;
122
123#ifdef USE_PREFETCH
124 if (node->prefetch_maximum > 0)
125 {
126 node->prefetch_iterator = tbm_begin_iterate(tbm);
127 node->prefetch_pages = 0;
128 node->prefetch_target = -1;
129 }
130#endif /* USE_PREFETCH */
131 }
132 else
133 {
134 /*
135 * The leader will immediately come out of the function, but
136 * others will be blocked until leader populates the TBM and wakes
137 * them up.
138 */
139 if (BitmapShouldInitializeSharedState(pstate))
140 {
141 tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
142 if (!tbm || !IsA(tbm, TIDBitmap))
143 elog(ERROR, "unrecognized result from subplan");
144
145 node->tbm = tbm;
146
147 /*
148 * Prepare to iterate over the TBM. This will return the
149 * dsa_pointer of the iterator state which will be used by
150 * multiple processes to iterate jointly.
151 */
152 pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
153#ifdef USE_PREFETCH
154 if (node->prefetch_maximum > 0)
155 {
156 pstate->prefetch_iterator =
157 tbm_prepare_shared_iterate(tbm);
158
159 /*
160 * We don't need the mutex here as we haven't yet woke up
161 * others.
162 */
163 pstate->prefetch_pages = 0;
164 pstate->prefetch_target = -1;
165 }
166#endif
167
168 /* We have initialized the shared state so wake up others. */
169 BitmapDoneInitializingSharedState(pstate);
170 }
171
172 /* Allocate a private iterator and attach the shared state to it */
173 node->shared_tbmiterator = shared_tbmiterator =
174 tbm_attach_shared_iterate(dsa, pstate->tbmiterator);
175 node->tbmres = tbmres = NULL;
176
177#ifdef USE_PREFETCH
178 if (node->prefetch_maximum > 0)
179 {
180 node->shared_prefetch_iterator =
181 tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator);
182 }
183#endif /* USE_PREFETCH */
184 }
185 node->initialized = true;
186 }
187
188 for (;;)
189 {
190 bool skip_fetch;
191
192 CHECK_FOR_INTERRUPTS();
193
194 /*
195 * Get next page of results if needed
196 */
197 if (tbmres == NULL)
198 {
199 if (!pstate)
200 node->tbmres = tbmres = tbm_iterate(tbmiterator);
201 else
202 node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
203 if (tbmres == NULL)
204 {
205 /* no more entries in the bitmap */
206 break;
207 }
208
209 BitmapAdjustPrefetchIterator(node, tbmres);
210
211 /*
212 * We can skip fetching the heap page if we don't need any fields
213 * from the heap, and the bitmap entries don't need rechecking,
214 * and all tuples on the page are visible to our transaction.
215 *
216 * XXX: It's a layering violation that we do these checks above
217 * tableam, they should probably moved below it at some point.
218 */
219 skip_fetch = (node->can_skip_fetch &&
220 !tbmres->recheck &&
221 VM_ALL_VISIBLE(node->ss.ss_currentRelation,
222 tbmres->blockno,
223 &node->vmbuffer));
224
225 if (skip_fetch)
226 {
227 /* can't be lossy in the skip_fetch case */
228 Assert(tbmres->ntuples >= 0);
229
230 /*
231 * The number of tuples on this page is put into
232 * node->return_empty_tuples.
233 */
234 node->return_empty_tuples = tbmres->ntuples;
235 }
236 else if (!table_scan_bitmap_next_block(scan, tbmres))
237 {
238 /* AM doesn't think this block is valid, skip */
239 continue;
240 }
241
242 if (tbmres->ntuples >= 0)
243 node->exact_pages++;
244 else
245 node->lossy_pages++;
246
247 /* Adjust the prefetch target */
248 BitmapAdjustPrefetchTarget(node);
249 }
250 else
251 {
252 /*
253 * Continuing in previously obtained page.
254 */
255
256#ifdef USE_PREFETCH
257
258 /*
259 * Try to prefetch at least a few pages even before we get to the
260 * second page if we don't stop reading after the first tuple.
261 */
262 if (!pstate)
263 {
264 if (node->prefetch_target < node->prefetch_maximum)
265 node->prefetch_target++;
266 }
267 else if (pstate->prefetch_target < node->prefetch_maximum)
268 {
269 /* take spinlock while updating shared state */
270 SpinLockAcquire(&pstate->mutex);
271 if (pstate->prefetch_target < node->prefetch_maximum)
272 pstate->prefetch_target++;
273 SpinLockRelease(&pstate->mutex);
274 }
275#endif /* USE_PREFETCH */
276 }
277
278 /*
279 * We issue prefetch requests *after* fetching the current page to try
280 * to avoid having prefetching interfere with the main I/O. Also, this
281 * should happen only when we have determined there is still something
282 * to do on the current page, else we may uselessly prefetch the same
283 * page we are just about to request for real.
284 *
285 * XXX: It's a layering violation that we do these checks above
286 * tableam, they should probably moved below it at some point.
287 */
288 BitmapPrefetch(node, scan);
289
290 if (node->return_empty_tuples > 0)
291 {
292 /*
293 * If we don't have to fetch the tuple, just return nulls.
294 */
295 ExecStoreAllNullTuple(slot);
296
297 if (--node->return_empty_tuples == 0)
298 {
299 /* no more tuples to return in the next round */
300 node->tbmres = tbmres = NULL;
301 }
302 }
303 else
304 {
305 /*
306 * Attempt to fetch tuple from AM.
307 */
308 if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
309 {
310 /* nothing more to look at on this page */
311 node->tbmres = tbmres = NULL;
312 continue;
313 }
314
315 /*
316 * If we are using lossy info, we have to recheck the qual
317 * conditions at every tuple.
318 */
319 if (tbmres->recheck)
320 {
321 econtext->ecxt_scantuple = slot;
322 if (!ExecQualAndReset(node->bitmapqualorig, econtext))
323 {
324 /* Fails recheck, so drop it and loop back for another */
325 InstrCountFiltered2(node, 1);
326 ExecClearTuple(slot);
327 continue;
328 }
329 }
330 }
331
332 /* OK to return this tuple */
333 return slot;
334 }
335
336 /*
337 * if we get here it means we are at the end of the scan..
338 */
339 return ExecClearTuple(slot);
340}
341
342/*
343 * BitmapDoneInitializingSharedState - Shared state is initialized
344 *
345 * By this time the leader has already populated the TBM and initialized the
346 * shared state so wake up other processes.
347 */
348static inline void
349BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
350{
351 SpinLockAcquire(&pstate->mutex);
352 pstate->state = BM_FINISHED;
353 SpinLockRelease(&pstate->mutex);
354 ConditionVariableBroadcast(&pstate->cv);
355}
356
357/*
358 * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
359 */
360static inline void
361BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
362 TBMIterateResult *tbmres)
363{
364#ifdef USE_PREFETCH
365 ParallelBitmapHeapState *pstate = node->pstate;
366
367 if (pstate == NULL)
368 {
369 TBMIterator *prefetch_iterator = node->prefetch_iterator;
370
371 if (node->prefetch_pages > 0)
372 {
373 /* The main iterator has closed the distance by one page */
374 node->prefetch_pages--;
375 }
376 else if (prefetch_iterator)
377 {
378 /* Do not let the prefetch iterator get behind the main one */
379 TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
380
381 if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
382 elog(ERROR, "prefetch and main iterators are out of sync");
383 }
384 return;
385 }
386
387 if (node->prefetch_maximum > 0)
388 {
389 TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
390
391 SpinLockAcquire(&pstate->mutex);
392 if (pstate->prefetch_pages > 0)
393 {
394 pstate->prefetch_pages--;
395 SpinLockRelease(&pstate->mutex);
396 }
397 else
398 {
399 /* Release the mutex before iterating */
400 SpinLockRelease(&pstate->mutex);
401
402 /*
403 * In case of shared mode, we can not ensure that the current
404 * blockno of the main iterator and that of the prefetch iterator
405 * are same. It's possible that whatever blockno we are
406 * prefetching will be processed by another process. Therefore,
407 * we don't validate the blockno here as we do in non-parallel
408 * case.
409 */
410 if (prefetch_iterator)
411 tbm_shared_iterate(prefetch_iterator);
412 }
413 }
414#endif /* USE_PREFETCH */
415}
416
417/*
418 * BitmapAdjustPrefetchTarget - Adjust the prefetch target
419 *
420 * Increase prefetch target if it's not yet at the max. Note that
421 * we will increase it to zero after fetching the very first
422 * page/tuple, then to one after the second tuple is fetched, then
423 * it doubles as later pages are fetched.
424 */
425static inline void
426BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
427{
428#ifdef USE_PREFETCH
429 ParallelBitmapHeapState *pstate = node->pstate;
430
431 if (pstate == NULL)
432 {
433 if (node->prefetch_target >= node->prefetch_maximum)
434 /* don't increase any further */ ;
435 else if (node->prefetch_target >= node->prefetch_maximum / 2)
436 node->prefetch_target = node->prefetch_maximum;
437 else if (node->prefetch_target > 0)
438 node->prefetch_target *= 2;
439 else
440 node->prefetch_target++;
441 return;
442 }
443
444 /* Do an unlocked check first to save spinlock acquisitions. */
445 if (pstate->prefetch_target < node->prefetch_maximum)
446 {
447 SpinLockAcquire(&pstate->mutex);
448 if (pstate->prefetch_target >= node->prefetch_maximum)
449 /* don't increase any further */ ;
450 else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
451 pstate->prefetch_target = node->prefetch_maximum;
452 else if (pstate->prefetch_target > 0)
453 pstate->prefetch_target *= 2;
454 else
455 pstate->prefetch_target++;
456 SpinLockRelease(&pstate->mutex);
457 }
458#endif /* USE_PREFETCH */
459}
460
461/*
462 * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
463 */
464static inline void
465BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
466{
467#ifdef USE_PREFETCH
468 ParallelBitmapHeapState *pstate = node->pstate;
469
470 if (pstate == NULL)
471 {
472 TBMIterator *prefetch_iterator = node->prefetch_iterator;
473
474 if (prefetch_iterator)
475 {
476 while (node->prefetch_pages < node->prefetch_target)
477 {
478 TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
479 bool skip_fetch;
480
481 if (tbmpre == NULL)
482 {
483 /* No more pages to prefetch */
484 tbm_end_iterate(prefetch_iterator);
485 node->prefetch_iterator = NULL;
486 break;
487 }
488 node->prefetch_pages++;
489
490 /*
491 * If we expect not to have to actually read this heap page,
492 * skip this prefetch call, but continue to run the prefetch
493 * logic normally. (Would it be better not to increment
494 * prefetch_pages?)
495 *
496 * This depends on the assumption that the index AM will
497 * report the same recheck flag for this future heap page as
498 * it did for the current heap page; which is not a certainty
499 * but is true in many cases.
500 */
501 skip_fetch = (node->can_skip_fetch &&
502 (node->tbmres ? !node->tbmres->recheck : false) &&
503 VM_ALL_VISIBLE(node->ss.ss_currentRelation,
504 tbmpre->blockno,
505 &node->pvmbuffer));
506
507 if (!skip_fetch)
508 PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
509 }
510 }
511
512 return;
513 }
514
515 if (pstate->prefetch_pages < pstate->prefetch_target)
516 {
517 TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
518
519 if (prefetch_iterator)
520 {
521 while (1)
522 {
523 TBMIterateResult *tbmpre;
524 bool do_prefetch = false;
525 bool skip_fetch;
526
527 /*
528 * Recheck under the mutex. If some other process has already
529 * done enough prefetching then we need not to do anything.
530 */
531 SpinLockAcquire(&pstate->mutex);
532 if (pstate->prefetch_pages < pstate->prefetch_target)
533 {
534 pstate->prefetch_pages++;
535 do_prefetch = true;
536 }
537 SpinLockRelease(&pstate->mutex);
538
539 if (!do_prefetch)
540 return;
541
542 tbmpre = tbm_shared_iterate(prefetch_iterator);
543 if (tbmpre == NULL)
544 {
545 /* No more pages to prefetch */
546 tbm_end_shared_iterate(prefetch_iterator);
547 node->shared_prefetch_iterator = NULL;
548 break;
549 }
550
551 /* As above, skip prefetch if we expect not to need page */
552 skip_fetch = (node->can_skip_fetch &&
553 (node->tbmres ? !node->tbmres->recheck : false) &&
554 VM_ALL_VISIBLE(node->ss.ss_currentRelation,
555 tbmpre->blockno,
556 &node->pvmbuffer));
557
558 if (!skip_fetch)
559 PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
560 }
561 }
562 }
563#endif /* USE_PREFETCH */
564}
565
566/*
567 * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
568 */
569static bool
570BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
571{
572 ExprContext *econtext;
573
574 /*
575 * extract necessary information from index scan node
576 */
577 econtext = node->ss.ps.ps_ExprContext;
578
579 /* Does the tuple meet the original qual conditions? */
580 econtext->ecxt_scantuple = slot;
581 return ExecQualAndReset(node->bitmapqualorig, econtext);
582}
583
584/* ----------------------------------------------------------------
585 * ExecBitmapHeapScan(node)
586 * ----------------------------------------------------------------
587 */
588static TupleTableSlot *
589ExecBitmapHeapScan(PlanState *pstate)
590{
591 BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate);
592
593 return ExecScan(&node->ss,
594 (ExecScanAccessMtd) BitmapHeapNext,
595 (ExecScanRecheckMtd) BitmapHeapRecheck);
596}
597
598/* ----------------------------------------------------------------
599 * ExecReScanBitmapHeapScan(node)
600 * ----------------------------------------------------------------
601 */
602void
603ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
604{
605 PlanState *outerPlan = outerPlanState(node);
606
607 /* rescan to release any page pin */
608 table_rescan(node->ss.ss_currentScanDesc, NULL);
609
610 /* release bitmaps and buffers if any */
611 if (node->tbmiterator)
612 tbm_end_iterate(node->tbmiterator);
613 if (node->prefetch_iterator)
614 tbm_end_iterate(node->prefetch_iterator);
615 if (node->shared_tbmiterator)
616 tbm_end_shared_iterate(node->shared_tbmiterator);
617 if (node->shared_prefetch_iterator)
618 tbm_end_shared_iterate(node->shared_prefetch_iterator);
619 if (node->tbm)
620 tbm_free(node->tbm);
621 if (node->vmbuffer != InvalidBuffer)
622 ReleaseBuffer(node->vmbuffer);
623 if (node->pvmbuffer != InvalidBuffer)
624 ReleaseBuffer(node->pvmbuffer);
625 node->tbm = NULL;
626 node->tbmiterator = NULL;
627 node->tbmres = NULL;
628 node->prefetch_iterator = NULL;
629 node->initialized = false;
630 node->shared_tbmiterator = NULL;
631 node->shared_prefetch_iterator = NULL;
632 node->vmbuffer = InvalidBuffer;
633 node->pvmbuffer = InvalidBuffer;
634
635 ExecScanReScan(&node->ss);
636
637 /*
638 * if chgParam of subnode is not null then plan will be re-scanned by
639 * first ExecProcNode.
640 */
641 if (outerPlan->chgParam == NULL)
642 ExecReScan(outerPlan);
643}
644
645/* ----------------------------------------------------------------
646 * ExecEndBitmapHeapScan
647 * ----------------------------------------------------------------
648 */
649void
650ExecEndBitmapHeapScan(BitmapHeapScanState *node)
651{
652 TableScanDesc scanDesc;
653
654 /*
655 * extract information from the node
656 */
657 scanDesc = node->ss.ss_currentScanDesc;
658
659 /*
660 * Free the exprcontext
661 */
662 ExecFreeExprContext(&node->ss.ps);
663
664 /*
665 * clear out tuple table slots
666 */
667 if (node->ss.ps.ps_ResultTupleSlot)
668 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
669 ExecClearTuple(node->ss.ss_ScanTupleSlot);
670
671 /*
672 * close down subplans
673 */
674 ExecEndNode(outerPlanState(node));
675
676 /*
677 * release bitmaps and buffers if any
678 */
679 if (node->tbmiterator)
680 tbm_end_iterate(node->tbmiterator);
681 if (node->prefetch_iterator)
682 tbm_end_iterate(node->prefetch_iterator);
683 if (node->tbm)
684 tbm_free(node->tbm);
685 if (node->shared_tbmiterator)
686 tbm_end_shared_iterate(node->shared_tbmiterator);
687 if (node->shared_prefetch_iterator)
688 tbm_end_shared_iterate(node->shared_prefetch_iterator);
689 if (node->vmbuffer != InvalidBuffer)
690 ReleaseBuffer(node->vmbuffer);
691 if (node->pvmbuffer != InvalidBuffer)
692 ReleaseBuffer(node->pvmbuffer);
693
694 /*
695 * close heap scan
696 */
697 table_endscan(scanDesc);
698}
699
700/* ----------------------------------------------------------------
701 * ExecInitBitmapHeapScan
702 *
703 * Initializes the scan's state information.
704 * ----------------------------------------------------------------
705 */
706BitmapHeapScanState *
707ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
708{
709 BitmapHeapScanState *scanstate;
710 Relation currentRelation;
711 int io_concurrency;
712
713 /* check for unsupported flags */
714 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
715
716 /*
717 * Assert caller didn't ask for an unsafe snapshot --- see comments at
718 * head of file.
719 */
720 Assert(IsMVCCSnapshot(estate->es_snapshot));
721
722 /*
723 * create state structure
724 */
725 scanstate = makeNode(BitmapHeapScanState);
726 scanstate->ss.ps.plan = (Plan *) node;
727 scanstate->ss.ps.state = estate;
728 scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
729
730 scanstate->tbm = NULL;
731 scanstate->tbmiterator = NULL;
732 scanstate->tbmres = NULL;
733 scanstate->return_empty_tuples = 0;
734 scanstate->vmbuffer = InvalidBuffer;
735 scanstate->pvmbuffer = InvalidBuffer;
736 scanstate->exact_pages = 0;
737 scanstate->lossy_pages = 0;
738 scanstate->prefetch_iterator = NULL;
739 scanstate->prefetch_pages = 0;
740 scanstate->prefetch_target = 0;
741 /* may be updated below */
742 scanstate->prefetch_maximum = target_prefetch_pages;
743 scanstate->pscan_len = 0;
744 scanstate->initialized = false;
745 scanstate->shared_tbmiterator = NULL;
746 scanstate->shared_prefetch_iterator = NULL;
747 scanstate->pstate = NULL;
748
749 /*
750 * We can potentially skip fetching heap pages if we do not need any
751 * columns of the table, either for checking non-indexable quals or for
752 * returning data. This test is a bit simplistic, as it checks the
753 * stronger condition that there's no qual or return tlist at all. But in
754 * most cases it's probably not worth working harder than that.
755 */
756 scanstate->can_skip_fetch = (node->scan.plan.qual == NIL &&
757 node->scan.plan.targetlist == NIL);
758
759 /*
760 * Miscellaneous initialization
761 *
762 * create expression context for node
763 */
764 ExecAssignExprContext(estate, &scanstate->ss.ps);
765
766 /*
767 * open the scan relation
768 */
769 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
770
771 /*
772 * initialize child nodes
773 */
774 outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
775
776 /*
777 * get the scan type from the relation descriptor.
778 */
779 ExecInitScanTupleSlot(estate, &scanstate->ss,
780 RelationGetDescr(currentRelation),
781 table_slot_callbacks(currentRelation));
782
783 /*
784 * Initialize result type and projection.
785 */
786 ExecInitResultTypeTL(&scanstate->ss.ps);
787 ExecAssignScanProjectionInfo(&scanstate->ss);
788
789 /*
790 * initialize child expressions
791 */
792 scanstate->ss.ps.qual =
793 ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
794 scanstate->bitmapqualorig =
795 ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
796
797 /*
798 * Determine the maximum for prefetch_target. If the tablespace has a
799 * specific IO concurrency set, use that to compute the corresponding
800 * maximum value; otherwise, we already initialized to the value computed
801 * by the GUC machinery.
802 */
803 io_concurrency =
804 get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
805 if (io_concurrency != effective_io_concurrency)
806 {
807 double maximum;
808
809 if (ComputeIoConcurrency(io_concurrency, &maximum))
810 scanstate->prefetch_maximum = rint(maximum);
811 }
812
813 scanstate->ss.ss_currentRelation = currentRelation;
814
815 scanstate->ss.ss_currentScanDesc = table_beginscan_bm(currentRelation,
816 estate->es_snapshot,
817 0,
818 NULL);
819
820 /*
821 * all done.
822 */
823 return scanstate;
824}
825
826/*----------------
827 * BitmapShouldInitializeSharedState
828 *
829 * The first process to come here and see the state to the BM_INITIAL
830 * will become the leader for the parallel bitmap scan and will be
831 * responsible for populating the TIDBitmap. The other processes will
832 * be blocked by the condition variable until the leader wakes them up.
833 * ---------------
834 */
835static bool
836BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
837{
838 SharedBitmapState state;
839
840 while (1)
841 {
842 SpinLockAcquire(&pstate->mutex);
843 state = pstate->state;
844 if (pstate->state == BM_INITIAL)
845 pstate->state = BM_INPROGRESS;
846 SpinLockRelease(&pstate->mutex);
847
848 /* Exit if bitmap is done, or if we're the leader. */
849 if (state != BM_INPROGRESS)
850 break;
851
852 /* Wait for the leader to wake us up. */
853 ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
854 }
855
856 ConditionVariableCancelSleep();
857
858 return (state == BM_INITIAL);
859}
860
861/* ----------------------------------------------------------------
862 * ExecBitmapHeapEstimate
863 *
864 * Compute the amount of space we'll need in the parallel
865 * query DSM, and inform pcxt->estimator about our needs.
866 * ----------------------------------------------------------------
867 */
868void
869ExecBitmapHeapEstimate(BitmapHeapScanState *node,
870 ParallelContext *pcxt)
871{
872 EState *estate = node->ss.ps.state;
873
874 node->pscan_len = add_size(offsetof(ParallelBitmapHeapState,
875 phs_snapshot_data),
876 EstimateSnapshotSpace(estate->es_snapshot));
877
878 shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len);
879 shm_toc_estimate_keys(&pcxt->estimator, 1);
880}
881
882/* ----------------------------------------------------------------
883 * ExecBitmapHeapInitializeDSM
884 *
885 * Set up a parallel bitmap heap scan descriptor.
886 * ----------------------------------------------------------------
887 */
888void
889ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
890 ParallelContext *pcxt)
891{
892 ParallelBitmapHeapState *pstate;
893 EState *estate = node->ss.ps.state;
894 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
895
896 /* If there's no DSA, there are no workers; initialize nothing. */
897 if (dsa == NULL)
898 return;
899
900 pstate = shm_toc_allocate(pcxt->toc, node->pscan_len);
901
902 pstate->tbmiterator = 0;
903 pstate->prefetch_iterator = 0;
904
905 /* Initialize the mutex */
906 SpinLockInit(&pstate->mutex);
907 pstate->prefetch_pages = 0;
908 pstate->prefetch_target = 0;
909 pstate->state = BM_INITIAL;
910
911 ConditionVariableInit(&pstate->cv);
912 SerializeSnapshot(estate->es_snapshot, pstate->phs_snapshot_data);
913
914 shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
915 node->pstate = pstate;
916}
917
918/* ----------------------------------------------------------------
919 * ExecBitmapHeapReInitializeDSM
920 *
921 * Reset shared state before beginning a fresh scan.
922 * ----------------------------------------------------------------
923 */
924void
925ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
926 ParallelContext *pcxt)
927{
928 ParallelBitmapHeapState *pstate = node->pstate;
929 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
930
931 /* If there's no DSA, there are no workers; do nothing. */
932 if (dsa == NULL)
933 return;
934
935 pstate->state = BM_INITIAL;
936
937 if (DsaPointerIsValid(pstate->tbmiterator))
938 tbm_free_shared_area(dsa, pstate->tbmiterator);
939
940 if (DsaPointerIsValid(pstate->prefetch_iterator))
941 tbm_free_shared_area(dsa, pstate->prefetch_iterator);
942
943 pstate->tbmiterator = InvalidDsaPointer;
944 pstate->prefetch_iterator = InvalidDsaPointer;
945}
946
947/* ----------------------------------------------------------------
948 * ExecBitmapHeapInitializeWorker
949 *
950 * Copy relevant information from TOC into planstate.
951 * ----------------------------------------------------------------
952 */
953void
954ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
955 ParallelWorkerContext *pwcxt)
956{
957 ParallelBitmapHeapState *pstate;
958 Snapshot snapshot;
959
960 Assert(node->ss.ps.state->es_query_dsa != NULL);
961
962 pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
963 node->pstate = pstate;
964
965 snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
966 table_scan_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
967}
968