1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * nodeBitmapHeapscan.c |
4 | * Routines to support bitmapped scans of relations |
5 | * |
6 | * NOTE: it is critical that this plan type only be used with MVCC-compliant |
7 | * snapshots (ie, regular snapshots, not SnapshotAny or one of the other |
8 | * special snapshots). The reason is that since index and heap scans are |
9 | * decoupled, there can be no assurance that the index tuple prompting a |
10 | * visit to a particular heap TID still exists when the visit is made. |
11 | * Therefore the tuple might not exist anymore either (which is OK because |
12 | * heap_fetch will cope) --- but worse, the tuple slot could have been |
13 | * re-used for a newer tuple. With an MVCC snapshot the newer tuple is |
14 | * certain to fail the time qual and so it will not be mistakenly returned, |
15 | * but with anything else we might return a tuple that doesn't meet the |
16 | * required index qual conditions. |
17 | * |
18 | * |
19 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
20 | * Portions Copyright (c) 1994, Regents of the University of California |
21 | * |
22 | * |
23 | * IDENTIFICATION |
24 | * src/backend/executor/nodeBitmapHeapscan.c |
25 | * |
26 | *------------------------------------------------------------------------- |
27 | */ |
28 | /* |
29 | * INTERFACE ROUTINES |
30 | * ExecBitmapHeapScan scans a relation using bitmap info |
31 | * ExecBitmapHeapNext workhorse for above |
32 | * ExecInitBitmapHeapScan creates and initializes state info. |
33 | * ExecReScanBitmapHeapScan prepares to rescan the plan. |
34 | * ExecEndBitmapHeapScan releases all storage. |
35 | */ |
36 | #include "postgres.h" |
37 | |
38 | #include <math.h> |
39 | |
40 | #include "access/relscan.h" |
41 | #include "access/tableam.h" |
42 | #include "access/transam.h" |
43 | #include "access/visibilitymap.h" |
44 | #include "executor/execdebug.h" |
45 | #include "executor/nodeBitmapHeapscan.h" |
46 | #include "miscadmin.h" |
47 | #include "pgstat.h" |
48 | #include "storage/bufmgr.h" |
49 | #include "storage/predicate.h" |
50 | #include "utils/memutils.h" |
51 | #include "utils/rel.h" |
52 | #include "utils/spccache.h" |
53 | #include "utils/snapmgr.h" |
54 | |
55 | |
56 | static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node); |
57 | static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate); |
58 | static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, |
59 | TBMIterateResult *tbmres); |
60 | static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node); |
61 | static inline void BitmapPrefetch(BitmapHeapScanState *node, |
62 | TableScanDesc scan); |
63 | static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate); |
64 | |
65 | |
66 | /* ---------------------------------------------------------------- |
67 | * BitmapHeapNext |
68 | * |
69 | * Retrieve next tuple from the BitmapHeapScan node's currentRelation |
70 | * ---------------------------------------------------------------- |
71 | */ |
72 | static TupleTableSlot * |
73 | BitmapHeapNext(BitmapHeapScanState *node) |
74 | { |
75 | ExprContext *econtext; |
76 | TableScanDesc scan; |
77 | TIDBitmap *tbm; |
78 | TBMIterator *tbmiterator = NULL; |
79 | TBMSharedIterator *shared_tbmiterator = NULL; |
80 | TBMIterateResult *tbmres; |
81 | TupleTableSlot *slot; |
82 | ParallelBitmapHeapState *pstate = node->pstate; |
83 | dsa_area *dsa = node->ss.ps.state->es_query_dsa; |
84 | |
85 | /* |
86 | * extract necessary information from index scan node |
87 | */ |
88 | econtext = node->ss.ps.ps_ExprContext; |
89 | slot = node->ss.ss_ScanTupleSlot; |
90 | scan = node->ss.ss_currentScanDesc; |
91 | tbm = node->tbm; |
92 | if (pstate == NULL) |
93 | tbmiterator = node->tbmiterator; |
94 | else |
95 | shared_tbmiterator = node->shared_tbmiterator; |
96 | tbmres = node->tbmres; |
97 | |
98 | /* |
99 | * If we haven't yet performed the underlying index scan, do it, and begin |
100 | * the iteration over the bitmap. |
101 | * |
102 | * For prefetching, we use *two* iterators, one for the pages we are |
103 | * actually scanning and another that runs ahead of the first for |
104 | * prefetching. node->prefetch_pages tracks exactly how many pages ahead |
105 | * the prefetch iterator is. Also, node->prefetch_target tracks the |
106 | * desired prefetch distance, which starts small and increases up to the |
107 | * node->prefetch_maximum. This is to avoid doing a lot of prefetching in |
108 | * a scan that stops after a few tuples because of a LIMIT. |
109 | */ |
110 | if (!node->initialized) |
111 | { |
112 | if (!pstate) |
113 | { |
114 | tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); |
115 | |
116 | if (!tbm || !IsA(tbm, TIDBitmap)) |
117 | elog(ERROR, "unrecognized result from subplan" ); |
118 | |
119 | node->tbm = tbm; |
120 | node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm); |
121 | node->tbmres = tbmres = NULL; |
122 | |
123 | #ifdef USE_PREFETCH |
124 | if (node->prefetch_maximum > 0) |
125 | { |
126 | node->prefetch_iterator = tbm_begin_iterate(tbm); |
127 | node->prefetch_pages = 0; |
128 | node->prefetch_target = -1; |
129 | } |
130 | #endif /* USE_PREFETCH */ |
131 | } |
132 | else |
133 | { |
134 | /* |
135 | * The leader will immediately come out of the function, but |
136 | * others will be blocked until leader populates the TBM and wakes |
137 | * them up. |
138 | */ |
139 | if (BitmapShouldInitializeSharedState(pstate)) |
140 | { |
141 | tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); |
142 | if (!tbm || !IsA(tbm, TIDBitmap)) |
143 | elog(ERROR, "unrecognized result from subplan" ); |
144 | |
145 | node->tbm = tbm; |
146 | |
147 | /* |
148 | * Prepare to iterate over the TBM. This will return the |
149 | * dsa_pointer of the iterator state which will be used by |
150 | * multiple processes to iterate jointly. |
151 | */ |
152 | pstate->tbmiterator = tbm_prepare_shared_iterate(tbm); |
153 | #ifdef USE_PREFETCH |
154 | if (node->prefetch_maximum > 0) |
155 | { |
156 | pstate->prefetch_iterator = |
157 | tbm_prepare_shared_iterate(tbm); |
158 | |
159 | /* |
160 | * We don't need the mutex here as we haven't yet woke up |
161 | * others. |
162 | */ |
163 | pstate->prefetch_pages = 0; |
164 | pstate->prefetch_target = -1; |
165 | } |
166 | #endif |
167 | |
168 | /* We have initialized the shared state so wake up others. */ |
169 | BitmapDoneInitializingSharedState(pstate); |
170 | } |
171 | |
172 | /* Allocate a private iterator and attach the shared state to it */ |
173 | node->shared_tbmiterator = shared_tbmiterator = |
174 | tbm_attach_shared_iterate(dsa, pstate->tbmiterator); |
175 | node->tbmres = tbmres = NULL; |
176 | |
177 | #ifdef USE_PREFETCH |
178 | if (node->prefetch_maximum > 0) |
179 | { |
180 | node->shared_prefetch_iterator = |
181 | tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator); |
182 | } |
183 | #endif /* USE_PREFETCH */ |
184 | } |
185 | node->initialized = true; |
186 | } |
187 | |
188 | for (;;) |
189 | { |
190 | bool skip_fetch; |
191 | |
192 | CHECK_FOR_INTERRUPTS(); |
193 | |
194 | /* |
195 | * Get next page of results if needed |
196 | */ |
197 | if (tbmres == NULL) |
198 | { |
199 | if (!pstate) |
200 | node->tbmres = tbmres = tbm_iterate(tbmiterator); |
201 | else |
202 | node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator); |
203 | if (tbmres == NULL) |
204 | { |
205 | /* no more entries in the bitmap */ |
206 | break; |
207 | } |
208 | |
209 | BitmapAdjustPrefetchIterator(node, tbmres); |
210 | |
211 | /* |
212 | * We can skip fetching the heap page if we don't need any fields |
213 | * from the heap, and the bitmap entries don't need rechecking, |
214 | * and all tuples on the page are visible to our transaction. |
215 | * |
216 | * XXX: It's a layering violation that we do these checks above |
217 | * tableam, they should probably moved below it at some point. |
218 | */ |
219 | skip_fetch = (node->can_skip_fetch && |
220 | !tbmres->recheck && |
221 | VM_ALL_VISIBLE(node->ss.ss_currentRelation, |
222 | tbmres->blockno, |
223 | &node->vmbuffer)); |
224 | |
225 | if (skip_fetch) |
226 | { |
227 | /* can't be lossy in the skip_fetch case */ |
228 | Assert(tbmres->ntuples >= 0); |
229 | |
230 | /* |
231 | * The number of tuples on this page is put into |
232 | * node->return_empty_tuples. |
233 | */ |
234 | node->return_empty_tuples = tbmres->ntuples; |
235 | } |
236 | else if (!table_scan_bitmap_next_block(scan, tbmres)) |
237 | { |
238 | /* AM doesn't think this block is valid, skip */ |
239 | continue; |
240 | } |
241 | |
242 | if (tbmres->ntuples >= 0) |
243 | node->exact_pages++; |
244 | else |
245 | node->lossy_pages++; |
246 | |
247 | /* Adjust the prefetch target */ |
248 | BitmapAdjustPrefetchTarget(node); |
249 | } |
250 | else |
251 | { |
252 | /* |
253 | * Continuing in previously obtained page. |
254 | */ |
255 | |
256 | #ifdef USE_PREFETCH |
257 | |
258 | /* |
259 | * Try to prefetch at least a few pages even before we get to the |
260 | * second page if we don't stop reading after the first tuple. |
261 | */ |
262 | if (!pstate) |
263 | { |
264 | if (node->prefetch_target < node->prefetch_maximum) |
265 | node->prefetch_target++; |
266 | } |
267 | else if (pstate->prefetch_target < node->prefetch_maximum) |
268 | { |
269 | /* take spinlock while updating shared state */ |
270 | SpinLockAcquire(&pstate->mutex); |
271 | if (pstate->prefetch_target < node->prefetch_maximum) |
272 | pstate->prefetch_target++; |
273 | SpinLockRelease(&pstate->mutex); |
274 | } |
275 | #endif /* USE_PREFETCH */ |
276 | } |
277 | |
278 | /* |
279 | * We issue prefetch requests *after* fetching the current page to try |
280 | * to avoid having prefetching interfere with the main I/O. Also, this |
281 | * should happen only when we have determined there is still something |
282 | * to do on the current page, else we may uselessly prefetch the same |
283 | * page we are just about to request for real. |
284 | * |
285 | * XXX: It's a layering violation that we do these checks above |
286 | * tableam, they should probably moved below it at some point. |
287 | */ |
288 | BitmapPrefetch(node, scan); |
289 | |
290 | if (node->return_empty_tuples > 0) |
291 | { |
292 | /* |
293 | * If we don't have to fetch the tuple, just return nulls. |
294 | */ |
295 | ExecStoreAllNullTuple(slot); |
296 | |
297 | if (--node->return_empty_tuples == 0) |
298 | { |
299 | /* no more tuples to return in the next round */ |
300 | node->tbmres = tbmres = NULL; |
301 | } |
302 | } |
303 | else |
304 | { |
305 | /* |
306 | * Attempt to fetch tuple from AM. |
307 | */ |
308 | if (!table_scan_bitmap_next_tuple(scan, tbmres, slot)) |
309 | { |
310 | /* nothing more to look at on this page */ |
311 | node->tbmres = tbmres = NULL; |
312 | continue; |
313 | } |
314 | |
315 | /* |
316 | * If we are using lossy info, we have to recheck the qual |
317 | * conditions at every tuple. |
318 | */ |
319 | if (tbmres->recheck) |
320 | { |
321 | econtext->ecxt_scantuple = slot; |
322 | if (!ExecQualAndReset(node->bitmapqualorig, econtext)) |
323 | { |
324 | /* Fails recheck, so drop it and loop back for another */ |
325 | InstrCountFiltered2(node, 1); |
326 | ExecClearTuple(slot); |
327 | continue; |
328 | } |
329 | } |
330 | } |
331 | |
332 | /* OK to return this tuple */ |
333 | return slot; |
334 | } |
335 | |
336 | /* |
337 | * if we get here it means we are at the end of the scan.. |
338 | */ |
339 | return ExecClearTuple(slot); |
340 | } |
341 | |
342 | /* |
343 | * BitmapDoneInitializingSharedState - Shared state is initialized |
344 | * |
345 | * By this time the leader has already populated the TBM and initialized the |
346 | * shared state so wake up other processes. |
347 | */ |
348 | static inline void |
349 | BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate) |
350 | { |
351 | SpinLockAcquire(&pstate->mutex); |
352 | pstate->state = BM_FINISHED; |
353 | SpinLockRelease(&pstate->mutex); |
354 | ConditionVariableBroadcast(&pstate->cv); |
355 | } |
356 | |
357 | /* |
358 | * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator |
359 | */ |
360 | static inline void |
361 | BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, |
362 | TBMIterateResult *tbmres) |
363 | { |
364 | #ifdef USE_PREFETCH |
365 | ParallelBitmapHeapState *pstate = node->pstate; |
366 | |
367 | if (pstate == NULL) |
368 | { |
369 | TBMIterator *prefetch_iterator = node->prefetch_iterator; |
370 | |
371 | if (node->prefetch_pages > 0) |
372 | { |
373 | /* The main iterator has closed the distance by one page */ |
374 | node->prefetch_pages--; |
375 | } |
376 | else if (prefetch_iterator) |
377 | { |
378 | /* Do not let the prefetch iterator get behind the main one */ |
379 | TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); |
380 | |
381 | if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno) |
382 | elog(ERROR, "prefetch and main iterators are out of sync" ); |
383 | } |
384 | return; |
385 | } |
386 | |
387 | if (node->prefetch_maximum > 0) |
388 | { |
389 | TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator; |
390 | |
391 | SpinLockAcquire(&pstate->mutex); |
392 | if (pstate->prefetch_pages > 0) |
393 | { |
394 | pstate->prefetch_pages--; |
395 | SpinLockRelease(&pstate->mutex); |
396 | } |
397 | else |
398 | { |
399 | /* Release the mutex before iterating */ |
400 | SpinLockRelease(&pstate->mutex); |
401 | |
402 | /* |
403 | * In case of shared mode, we can not ensure that the current |
404 | * blockno of the main iterator and that of the prefetch iterator |
405 | * are same. It's possible that whatever blockno we are |
406 | * prefetching will be processed by another process. Therefore, |
407 | * we don't validate the blockno here as we do in non-parallel |
408 | * case. |
409 | */ |
410 | if (prefetch_iterator) |
411 | tbm_shared_iterate(prefetch_iterator); |
412 | } |
413 | } |
414 | #endif /* USE_PREFETCH */ |
415 | } |
416 | |
417 | /* |
418 | * BitmapAdjustPrefetchTarget - Adjust the prefetch target |
419 | * |
420 | * Increase prefetch target if it's not yet at the max. Note that |
421 | * we will increase it to zero after fetching the very first |
422 | * page/tuple, then to one after the second tuple is fetched, then |
423 | * it doubles as later pages are fetched. |
424 | */ |
425 | static inline void |
426 | BitmapAdjustPrefetchTarget(BitmapHeapScanState *node) |
427 | { |
428 | #ifdef USE_PREFETCH |
429 | ParallelBitmapHeapState *pstate = node->pstate; |
430 | |
431 | if (pstate == NULL) |
432 | { |
433 | if (node->prefetch_target >= node->prefetch_maximum) |
434 | /* don't increase any further */ ; |
435 | else if (node->prefetch_target >= node->prefetch_maximum / 2) |
436 | node->prefetch_target = node->prefetch_maximum; |
437 | else if (node->prefetch_target > 0) |
438 | node->prefetch_target *= 2; |
439 | else |
440 | node->prefetch_target++; |
441 | return; |
442 | } |
443 | |
444 | /* Do an unlocked check first to save spinlock acquisitions. */ |
445 | if (pstate->prefetch_target < node->prefetch_maximum) |
446 | { |
447 | SpinLockAcquire(&pstate->mutex); |
448 | if (pstate->prefetch_target >= node->prefetch_maximum) |
449 | /* don't increase any further */ ; |
450 | else if (pstate->prefetch_target >= node->prefetch_maximum / 2) |
451 | pstate->prefetch_target = node->prefetch_maximum; |
452 | else if (pstate->prefetch_target > 0) |
453 | pstate->prefetch_target *= 2; |
454 | else |
455 | pstate->prefetch_target++; |
456 | SpinLockRelease(&pstate->mutex); |
457 | } |
458 | #endif /* USE_PREFETCH */ |
459 | } |
460 | |
461 | /* |
462 | * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target |
463 | */ |
464 | static inline void |
465 | BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) |
466 | { |
467 | #ifdef USE_PREFETCH |
468 | ParallelBitmapHeapState *pstate = node->pstate; |
469 | |
470 | if (pstate == NULL) |
471 | { |
472 | TBMIterator *prefetch_iterator = node->prefetch_iterator; |
473 | |
474 | if (prefetch_iterator) |
475 | { |
476 | while (node->prefetch_pages < node->prefetch_target) |
477 | { |
478 | TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); |
479 | bool skip_fetch; |
480 | |
481 | if (tbmpre == NULL) |
482 | { |
483 | /* No more pages to prefetch */ |
484 | tbm_end_iterate(prefetch_iterator); |
485 | node->prefetch_iterator = NULL; |
486 | break; |
487 | } |
488 | node->prefetch_pages++; |
489 | |
490 | /* |
491 | * If we expect not to have to actually read this heap page, |
492 | * skip this prefetch call, but continue to run the prefetch |
493 | * logic normally. (Would it be better not to increment |
494 | * prefetch_pages?) |
495 | * |
496 | * This depends on the assumption that the index AM will |
497 | * report the same recheck flag for this future heap page as |
498 | * it did for the current heap page; which is not a certainty |
499 | * but is true in many cases. |
500 | */ |
501 | skip_fetch = (node->can_skip_fetch && |
502 | (node->tbmres ? !node->tbmres->recheck : false) && |
503 | VM_ALL_VISIBLE(node->ss.ss_currentRelation, |
504 | tbmpre->blockno, |
505 | &node->pvmbuffer)); |
506 | |
507 | if (!skip_fetch) |
508 | PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); |
509 | } |
510 | } |
511 | |
512 | return; |
513 | } |
514 | |
515 | if (pstate->prefetch_pages < pstate->prefetch_target) |
516 | { |
517 | TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator; |
518 | |
519 | if (prefetch_iterator) |
520 | { |
521 | while (1) |
522 | { |
523 | TBMIterateResult *tbmpre; |
524 | bool do_prefetch = false; |
525 | bool skip_fetch; |
526 | |
527 | /* |
528 | * Recheck under the mutex. If some other process has already |
529 | * done enough prefetching then we need not to do anything. |
530 | */ |
531 | SpinLockAcquire(&pstate->mutex); |
532 | if (pstate->prefetch_pages < pstate->prefetch_target) |
533 | { |
534 | pstate->prefetch_pages++; |
535 | do_prefetch = true; |
536 | } |
537 | SpinLockRelease(&pstate->mutex); |
538 | |
539 | if (!do_prefetch) |
540 | return; |
541 | |
542 | tbmpre = tbm_shared_iterate(prefetch_iterator); |
543 | if (tbmpre == NULL) |
544 | { |
545 | /* No more pages to prefetch */ |
546 | tbm_end_shared_iterate(prefetch_iterator); |
547 | node->shared_prefetch_iterator = NULL; |
548 | break; |
549 | } |
550 | |
551 | /* As above, skip prefetch if we expect not to need page */ |
552 | skip_fetch = (node->can_skip_fetch && |
553 | (node->tbmres ? !node->tbmres->recheck : false) && |
554 | VM_ALL_VISIBLE(node->ss.ss_currentRelation, |
555 | tbmpre->blockno, |
556 | &node->pvmbuffer)); |
557 | |
558 | if (!skip_fetch) |
559 | PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); |
560 | } |
561 | } |
562 | } |
563 | #endif /* USE_PREFETCH */ |
564 | } |
565 | |
566 | /* |
567 | * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual |
568 | */ |
569 | static bool |
570 | BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot) |
571 | { |
572 | ExprContext *econtext; |
573 | |
574 | /* |
575 | * extract necessary information from index scan node |
576 | */ |
577 | econtext = node->ss.ps.ps_ExprContext; |
578 | |
579 | /* Does the tuple meet the original qual conditions? */ |
580 | econtext->ecxt_scantuple = slot; |
581 | return ExecQualAndReset(node->bitmapqualorig, econtext); |
582 | } |
583 | |
584 | /* ---------------------------------------------------------------- |
585 | * ExecBitmapHeapScan(node) |
586 | * ---------------------------------------------------------------- |
587 | */ |
588 | static TupleTableSlot * |
589 | ExecBitmapHeapScan(PlanState *pstate) |
590 | { |
591 | BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate); |
592 | |
593 | return ExecScan(&node->ss, |
594 | (ExecScanAccessMtd) BitmapHeapNext, |
595 | (ExecScanRecheckMtd) BitmapHeapRecheck); |
596 | } |
597 | |
598 | /* ---------------------------------------------------------------- |
599 | * ExecReScanBitmapHeapScan(node) |
600 | * ---------------------------------------------------------------- |
601 | */ |
602 | void |
603 | ExecReScanBitmapHeapScan(BitmapHeapScanState *node) |
604 | { |
605 | PlanState *outerPlan = outerPlanState(node); |
606 | |
607 | /* rescan to release any page pin */ |
608 | table_rescan(node->ss.ss_currentScanDesc, NULL); |
609 | |
610 | /* release bitmaps and buffers if any */ |
611 | if (node->tbmiterator) |
612 | tbm_end_iterate(node->tbmiterator); |
613 | if (node->prefetch_iterator) |
614 | tbm_end_iterate(node->prefetch_iterator); |
615 | if (node->shared_tbmiterator) |
616 | tbm_end_shared_iterate(node->shared_tbmiterator); |
617 | if (node->shared_prefetch_iterator) |
618 | tbm_end_shared_iterate(node->shared_prefetch_iterator); |
619 | if (node->tbm) |
620 | tbm_free(node->tbm); |
621 | if (node->vmbuffer != InvalidBuffer) |
622 | ReleaseBuffer(node->vmbuffer); |
623 | if (node->pvmbuffer != InvalidBuffer) |
624 | ReleaseBuffer(node->pvmbuffer); |
625 | node->tbm = NULL; |
626 | node->tbmiterator = NULL; |
627 | node->tbmres = NULL; |
628 | node->prefetch_iterator = NULL; |
629 | node->initialized = false; |
630 | node->shared_tbmiterator = NULL; |
631 | node->shared_prefetch_iterator = NULL; |
632 | node->vmbuffer = InvalidBuffer; |
633 | node->pvmbuffer = InvalidBuffer; |
634 | |
635 | ExecScanReScan(&node->ss); |
636 | |
637 | /* |
638 | * if chgParam of subnode is not null then plan will be re-scanned by |
639 | * first ExecProcNode. |
640 | */ |
641 | if (outerPlan->chgParam == NULL) |
642 | ExecReScan(outerPlan); |
643 | } |
644 | |
645 | /* ---------------------------------------------------------------- |
646 | * ExecEndBitmapHeapScan |
647 | * ---------------------------------------------------------------- |
648 | */ |
649 | void |
650 | ExecEndBitmapHeapScan(BitmapHeapScanState *node) |
651 | { |
652 | TableScanDesc scanDesc; |
653 | |
654 | /* |
655 | * extract information from the node |
656 | */ |
657 | scanDesc = node->ss.ss_currentScanDesc; |
658 | |
659 | /* |
660 | * Free the exprcontext |
661 | */ |
662 | ExecFreeExprContext(&node->ss.ps); |
663 | |
664 | /* |
665 | * clear out tuple table slots |
666 | */ |
667 | if (node->ss.ps.ps_ResultTupleSlot) |
668 | ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); |
669 | ExecClearTuple(node->ss.ss_ScanTupleSlot); |
670 | |
671 | /* |
672 | * close down subplans |
673 | */ |
674 | ExecEndNode(outerPlanState(node)); |
675 | |
676 | /* |
677 | * release bitmaps and buffers if any |
678 | */ |
679 | if (node->tbmiterator) |
680 | tbm_end_iterate(node->tbmiterator); |
681 | if (node->prefetch_iterator) |
682 | tbm_end_iterate(node->prefetch_iterator); |
683 | if (node->tbm) |
684 | tbm_free(node->tbm); |
685 | if (node->shared_tbmiterator) |
686 | tbm_end_shared_iterate(node->shared_tbmiterator); |
687 | if (node->shared_prefetch_iterator) |
688 | tbm_end_shared_iterate(node->shared_prefetch_iterator); |
689 | if (node->vmbuffer != InvalidBuffer) |
690 | ReleaseBuffer(node->vmbuffer); |
691 | if (node->pvmbuffer != InvalidBuffer) |
692 | ReleaseBuffer(node->pvmbuffer); |
693 | |
694 | /* |
695 | * close heap scan |
696 | */ |
697 | table_endscan(scanDesc); |
698 | } |
699 | |
700 | /* ---------------------------------------------------------------- |
701 | * ExecInitBitmapHeapScan |
702 | * |
703 | * Initializes the scan's state information. |
704 | * ---------------------------------------------------------------- |
705 | */ |
706 | BitmapHeapScanState * |
707 | ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) |
708 | { |
709 | BitmapHeapScanState *scanstate; |
710 | Relation currentRelation; |
711 | int io_concurrency; |
712 | |
713 | /* check for unsupported flags */ |
714 | Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); |
715 | |
716 | /* |
717 | * Assert caller didn't ask for an unsafe snapshot --- see comments at |
718 | * head of file. |
719 | */ |
720 | Assert(IsMVCCSnapshot(estate->es_snapshot)); |
721 | |
722 | /* |
723 | * create state structure |
724 | */ |
725 | scanstate = makeNode(BitmapHeapScanState); |
726 | scanstate->ss.ps.plan = (Plan *) node; |
727 | scanstate->ss.ps.state = estate; |
728 | scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan; |
729 | |
730 | scanstate->tbm = NULL; |
731 | scanstate->tbmiterator = NULL; |
732 | scanstate->tbmres = NULL; |
733 | scanstate->return_empty_tuples = 0; |
734 | scanstate->vmbuffer = InvalidBuffer; |
735 | scanstate->pvmbuffer = InvalidBuffer; |
736 | scanstate->exact_pages = 0; |
737 | scanstate->lossy_pages = 0; |
738 | scanstate->prefetch_iterator = NULL; |
739 | scanstate->prefetch_pages = 0; |
740 | scanstate->prefetch_target = 0; |
741 | /* may be updated below */ |
742 | scanstate->prefetch_maximum = target_prefetch_pages; |
743 | scanstate->pscan_len = 0; |
744 | scanstate->initialized = false; |
745 | scanstate->shared_tbmiterator = NULL; |
746 | scanstate->shared_prefetch_iterator = NULL; |
747 | scanstate->pstate = NULL; |
748 | |
749 | /* |
750 | * We can potentially skip fetching heap pages if we do not need any |
751 | * columns of the table, either for checking non-indexable quals or for |
752 | * returning data. This test is a bit simplistic, as it checks the |
753 | * stronger condition that there's no qual or return tlist at all. But in |
754 | * most cases it's probably not worth working harder than that. |
755 | */ |
756 | scanstate->can_skip_fetch = (node->scan.plan.qual == NIL && |
757 | node->scan.plan.targetlist == NIL); |
758 | |
759 | /* |
760 | * Miscellaneous initialization |
761 | * |
762 | * create expression context for node |
763 | */ |
764 | ExecAssignExprContext(estate, &scanstate->ss.ps); |
765 | |
766 | /* |
767 | * open the scan relation |
768 | */ |
769 | currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); |
770 | |
771 | /* |
772 | * initialize child nodes |
773 | */ |
774 | outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags); |
775 | |
776 | /* |
777 | * get the scan type from the relation descriptor. |
778 | */ |
779 | ExecInitScanTupleSlot(estate, &scanstate->ss, |
780 | RelationGetDescr(currentRelation), |
781 | table_slot_callbacks(currentRelation)); |
782 | |
783 | /* |
784 | * Initialize result type and projection. |
785 | */ |
786 | ExecInitResultTypeTL(&scanstate->ss.ps); |
787 | ExecAssignScanProjectionInfo(&scanstate->ss); |
788 | |
789 | /* |
790 | * initialize child expressions |
791 | */ |
792 | scanstate->ss.ps.qual = |
793 | ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate); |
794 | scanstate->bitmapqualorig = |
795 | ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate); |
796 | |
797 | /* |
798 | * Determine the maximum for prefetch_target. If the tablespace has a |
799 | * specific IO concurrency set, use that to compute the corresponding |
800 | * maximum value; otherwise, we already initialized to the value computed |
801 | * by the GUC machinery. |
802 | */ |
803 | io_concurrency = |
804 | get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace); |
805 | if (io_concurrency != effective_io_concurrency) |
806 | { |
807 | double maximum; |
808 | |
809 | if (ComputeIoConcurrency(io_concurrency, &maximum)) |
810 | scanstate->prefetch_maximum = rint(maximum); |
811 | } |
812 | |
813 | scanstate->ss.ss_currentRelation = currentRelation; |
814 | |
815 | scanstate->ss.ss_currentScanDesc = table_beginscan_bm(currentRelation, |
816 | estate->es_snapshot, |
817 | 0, |
818 | NULL); |
819 | |
820 | /* |
821 | * all done. |
822 | */ |
823 | return scanstate; |
824 | } |
825 | |
826 | /*---------------- |
827 | * BitmapShouldInitializeSharedState |
828 | * |
829 | * The first process to come here and see the state to the BM_INITIAL |
830 | * will become the leader for the parallel bitmap scan and will be |
831 | * responsible for populating the TIDBitmap. The other processes will |
832 | * be blocked by the condition variable until the leader wakes them up. |
833 | * --------------- |
834 | */ |
835 | static bool |
836 | BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate) |
837 | { |
838 | SharedBitmapState state; |
839 | |
840 | while (1) |
841 | { |
842 | SpinLockAcquire(&pstate->mutex); |
843 | state = pstate->state; |
844 | if (pstate->state == BM_INITIAL) |
845 | pstate->state = BM_INPROGRESS; |
846 | SpinLockRelease(&pstate->mutex); |
847 | |
848 | /* Exit if bitmap is done, or if we're the leader. */ |
849 | if (state != BM_INPROGRESS) |
850 | break; |
851 | |
852 | /* Wait for the leader to wake us up. */ |
853 | ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN); |
854 | } |
855 | |
856 | ConditionVariableCancelSleep(); |
857 | |
858 | return (state == BM_INITIAL); |
859 | } |
860 | |
861 | /* ---------------------------------------------------------------- |
862 | * ExecBitmapHeapEstimate |
863 | * |
864 | * Compute the amount of space we'll need in the parallel |
865 | * query DSM, and inform pcxt->estimator about our needs. |
866 | * ---------------------------------------------------------------- |
867 | */ |
868 | void |
869 | ExecBitmapHeapEstimate(BitmapHeapScanState *node, |
870 | ParallelContext *pcxt) |
871 | { |
872 | EState *estate = node->ss.ps.state; |
873 | |
874 | node->pscan_len = add_size(offsetof(ParallelBitmapHeapState, |
875 | phs_snapshot_data), |
876 | EstimateSnapshotSpace(estate->es_snapshot)); |
877 | |
878 | shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len); |
879 | shm_toc_estimate_keys(&pcxt->estimator, 1); |
880 | } |
881 | |
882 | /* ---------------------------------------------------------------- |
883 | * ExecBitmapHeapInitializeDSM |
884 | * |
885 | * Set up a parallel bitmap heap scan descriptor. |
886 | * ---------------------------------------------------------------- |
887 | */ |
888 | void |
889 | ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, |
890 | ParallelContext *pcxt) |
891 | { |
892 | ParallelBitmapHeapState *pstate; |
893 | EState *estate = node->ss.ps.state; |
894 | dsa_area *dsa = node->ss.ps.state->es_query_dsa; |
895 | |
896 | /* If there's no DSA, there are no workers; initialize nothing. */ |
897 | if (dsa == NULL) |
898 | return; |
899 | |
900 | pstate = shm_toc_allocate(pcxt->toc, node->pscan_len); |
901 | |
902 | pstate->tbmiterator = 0; |
903 | pstate->prefetch_iterator = 0; |
904 | |
905 | /* Initialize the mutex */ |
906 | SpinLockInit(&pstate->mutex); |
907 | pstate->prefetch_pages = 0; |
908 | pstate->prefetch_target = 0; |
909 | pstate->state = BM_INITIAL; |
910 | |
911 | ConditionVariableInit(&pstate->cv); |
912 | SerializeSnapshot(estate->es_snapshot, pstate->phs_snapshot_data); |
913 | |
914 | shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate); |
915 | node->pstate = pstate; |
916 | } |
917 | |
918 | /* ---------------------------------------------------------------- |
919 | * ExecBitmapHeapReInitializeDSM |
920 | * |
921 | * Reset shared state before beginning a fresh scan. |
922 | * ---------------------------------------------------------------- |
923 | */ |
924 | void |
925 | ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, |
926 | ParallelContext *pcxt) |
927 | { |
928 | ParallelBitmapHeapState *pstate = node->pstate; |
929 | dsa_area *dsa = node->ss.ps.state->es_query_dsa; |
930 | |
931 | /* If there's no DSA, there are no workers; do nothing. */ |
932 | if (dsa == NULL) |
933 | return; |
934 | |
935 | pstate->state = BM_INITIAL; |
936 | |
937 | if (DsaPointerIsValid(pstate->tbmiterator)) |
938 | tbm_free_shared_area(dsa, pstate->tbmiterator); |
939 | |
940 | if (DsaPointerIsValid(pstate->prefetch_iterator)) |
941 | tbm_free_shared_area(dsa, pstate->prefetch_iterator); |
942 | |
943 | pstate->tbmiterator = InvalidDsaPointer; |
944 | pstate->prefetch_iterator = InvalidDsaPointer; |
945 | } |
946 | |
947 | /* ---------------------------------------------------------------- |
948 | * ExecBitmapHeapInitializeWorker |
949 | * |
950 | * Copy relevant information from TOC into planstate. |
951 | * ---------------------------------------------------------------- |
952 | */ |
953 | void |
954 | ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, |
955 | ParallelWorkerContext *pwcxt) |
956 | { |
957 | ParallelBitmapHeapState *pstate; |
958 | Snapshot snapshot; |
959 | |
960 | Assert(node->ss.ps.state->es_query_dsa != NULL); |
961 | |
962 | pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); |
963 | node->pstate = pstate; |
964 | |
965 | snapshot = RestoreSnapshot(pstate->phs_snapshot_data); |
966 | table_scan_update_snapshot(node->ss.ss_currentScanDesc, snapshot); |
967 | } |
968 | |