1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * hashsearch.c |
4 | * search code for postgres hash tables |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | * |
9 | * |
10 | * IDENTIFICATION |
11 | * src/backend/access/hash/hashsearch.c |
12 | * |
13 | *------------------------------------------------------------------------- |
14 | */ |
15 | #include "postgres.h" |
16 | |
17 | #include "access/hash.h" |
18 | #include "access/relscan.h" |
19 | #include "miscadmin.h" |
20 | #include "pgstat.h" |
21 | #include "utils/rel.h" |
22 | #include "storage/predicate.h" |
23 | |
24 | static bool _hash_readpage(IndexScanDesc scan, Buffer *bufP, |
25 | ScanDirection dir); |
26 | static int _hash_load_qualified_items(IndexScanDesc scan, Page page, |
27 | OffsetNumber offnum, ScanDirection dir); |
28 | static inline void _hash_saveitem(HashScanOpaque so, int itemIndex, |
29 | OffsetNumber offnum, IndexTuple itup); |
30 | static void _hash_readnext(IndexScanDesc scan, Buffer *bufp, |
31 | Page *pagep, HashPageOpaque *opaquep); |
32 | |
33 | /* |
34 | * _hash_next() -- Get the next item in a scan. |
35 | * |
36 | * On entry, so->currPos describes the current page, which may |
37 | * be pinned but not locked, and so->currPos.itemIndex identifies |
38 | * which item was previously returned. |
39 | * |
40 | * On successful exit, scan->xs_ctup.t_self is set to the TID |
41 | * of the next heap tuple. so->currPos is updated as needed. |
42 | * |
43 | * On failure exit (no more tuples), we return false with pin |
44 | * held on bucket page but no pins or locks held on overflow |
45 | * page. |
46 | */ |
47 | bool |
48 | _hash_next(IndexScanDesc scan, ScanDirection dir) |
49 | { |
50 | Relation rel = scan->indexRelation; |
51 | HashScanOpaque so = (HashScanOpaque) scan->opaque; |
52 | HashScanPosItem *currItem; |
53 | BlockNumber blkno; |
54 | Buffer buf; |
55 | bool end_of_scan = false; |
56 | |
57 | /* |
58 | * Advance to the next tuple on the current page; or if done, try to read |
59 | * data from the next or previous page based on the scan direction. Before |
60 | * moving to the next or previous page make sure that we deal with all the |
61 | * killed items. |
62 | */ |
63 | if (ScanDirectionIsForward(dir)) |
64 | { |
65 | if (++so->currPos.itemIndex > so->currPos.lastItem) |
66 | { |
67 | if (so->numKilled > 0) |
68 | _hash_kill_items(scan); |
69 | |
70 | blkno = so->currPos.nextPage; |
71 | if (BlockNumberIsValid(blkno)) |
72 | { |
73 | buf = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE); |
74 | TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(buf)); |
75 | if (!_hash_readpage(scan, &buf, dir)) |
76 | end_of_scan = true; |
77 | } |
78 | else |
79 | end_of_scan = true; |
80 | } |
81 | } |
82 | else |
83 | { |
84 | if (--so->currPos.itemIndex < so->currPos.firstItem) |
85 | { |
86 | if (so->numKilled > 0) |
87 | _hash_kill_items(scan); |
88 | |
89 | blkno = so->currPos.prevPage; |
90 | if (BlockNumberIsValid(blkno)) |
91 | { |
92 | buf = _hash_getbuf(rel, blkno, HASH_READ, |
93 | LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); |
94 | TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(buf)); |
95 | |
96 | /* |
97 | * We always maintain the pin on bucket page for whole scan |
98 | * operation, so releasing the additional pin we have acquired |
99 | * here. |
100 | */ |
101 | if (buf == so->hashso_bucket_buf || |
102 | buf == so->hashso_split_bucket_buf) |
103 | _hash_dropbuf(rel, buf); |
104 | |
105 | if (!_hash_readpage(scan, &buf, dir)) |
106 | end_of_scan = true; |
107 | } |
108 | else |
109 | end_of_scan = true; |
110 | } |
111 | } |
112 | |
113 | if (end_of_scan) |
114 | { |
115 | _hash_dropscanbuf(rel, so); |
116 | HashScanPosInvalidate(so->currPos); |
117 | return false; |
118 | } |
119 | |
120 | /* OK, itemIndex says what to return */ |
121 | currItem = &so->currPos.items[so->currPos.itemIndex]; |
122 | scan->xs_heaptid = currItem->heapTid; |
123 | |
124 | return true; |
125 | } |
126 | |
127 | /* |
128 | * Advance to next page in a bucket, if any. If we are scanning the bucket |
129 | * being populated during split operation then this function advances to the |
130 | * bucket being split after the last bucket page of bucket being populated. |
131 | */ |
132 | static void |
133 | _hash_readnext(IndexScanDesc scan, |
134 | Buffer *bufp, Page *pagep, HashPageOpaque *opaquep) |
135 | { |
136 | BlockNumber blkno; |
137 | Relation rel = scan->indexRelation; |
138 | HashScanOpaque so = (HashScanOpaque) scan->opaque; |
139 | bool block_found = false; |
140 | |
141 | blkno = (*opaquep)->hasho_nextblkno; |
142 | |
143 | /* |
144 | * Retain the pin on primary bucket page till the end of scan. Refer the |
145 | * comments in _hash_first to know the reason of retaining pin. |
146 | */ |
147 | if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf) |
148 | LockBuffer(*bufp, BUFFER_LOCK_UNLOCK); |
149 | else |
150 | _hash_relbuf(rel, *bufp); |
151 | |
152 | *bufp = InvalidBuffer; |
153 | /* check for interrupts while we're not holding any buffer lock */ |
154 | CHECK_FOR_INTERRUPTS(); |
155 | if (BlockNumberIsValid(blkno)) |
156 | { |
157 | *bufp = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE); |
158 | block_found = true; |
159 | } |
160 | else if (so->hashso_buc_populated && !so->hashso_buc_split) |
161 | { |
162 | /* |
163 | * end of bucket, scan bucket being split if there was a split in |
164 | * progress at the start of scan. |
165 | */ |
166 | *bufp = so->hashso_split_bucket_buf; |
167 | |
168 | /* |
169 | * buffer for bucket being split must be valid as we acquire the pin |
170 | * on it before the start of scan and retain it till end of scan. |
171 | */ |
172 | Assert(BufferIsValid(*bufp)); |
173 | |
174 | LockBuffer(*bufp, BUFFER_LOCK_SHARE); |
175 | PredicateLockPage(rel, BufferGetBlockNumber(*bufp), scan->xs_snapshot); |
176 | |
177 | /* |
178 | * setting hashso_buc_split to true indicates that we are scanning |
179 | * bucket being split. |
180 | */ |
181 | so->hashso_buc_split = true; |
182 | |
183 | block_found = true; |
184 | } |
185 | |
186 | if (block_found) |
187 | { |
188 | *pagep = BufferGetPage(*bufp); |
189 | TestForOldSnapshot(scan->xs_snapshot, rel, *pagep); |
190 | *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep); |
191 | } |
192 | } |
193 | |
194 | /* |
195 | * Advance to previous page in a bucket, if any. If the current scan has |
196 | * started during split operation then this function advances to bucket |
197 | * being populated after the first bucket page of bucket being split. |
198 | */ |
199 | static void |
200 | _hash_readprev(IndexScanDesc scan, |
201 | Buffer *bufp, Page *pagep, HashPageOpaque *opaquep) |
202 | { |
203 | BlockNumber blkno; |
204 | Relation rel = scan->indexRelation; |
205 | HashScanOpaque so = (HashScanOpaque) scan->opaque; |
206 | bool haveprevblk; |
207 | |
208 | blkno = (*opaquep)->hasho_prevblkno; |
209 | |
210 | /* |
211 | * Retain the pin on primary bucket page till the end of scan. Refer the |
212 | * comments in _hash_first to know the reason of retaining pin. |
213 | */ |
214 | if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf) |
215 | { |
216 | LockBuffer(*bufp, BUFFER_LOCK_UNLOCK); |
217 | haveprevblk = false; |
218 | } |
219 | else |
220 | { |
221 | _hash_relbuf(rel, *bufp); |
222 | haveprevblk = true; |
223 | } |
224 | |
225 | *bufp = InvalidBuffer; |
226 | /* check for interrupts while we're not holding any buffer lock */ |
227 | CHECK_FOR_INTERRUPTS(); |
228 | |
229 | if (haveprevblk) |
230 | { |
231 | Assert(BlockNumberIsValid(blkno)); |
232 | *bufp = _hash_getbuf(rel, blkno, HASH_READ, |
233 | LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); |
234 | *pagep = BufferGetPage(*bufp); |
235 | TestForOldSnapshot(scan->xs_snapshot, rel, *pagep); |
236 | *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep); |
237 | |
238 | /* |
239 | * We always maintain the pin on bucket page for whole scan operation, |
240 | * so releasing the additional pin we have acquired here. |
241 | */ |
242 | if (*bufp == so->hashso_bucket_buf || *bufp == so->hashso_split_bucket_buf) |
243 | _hash_dropbuf(rel, *bufp); |
244 | } |
245 | else if (so->hashso_buc_populated && so->hashso_buc_split) |
246 | { |
247 | /* |
248 | * end of bucket, scan bucket being populated if there was a split in |
249 | * progress at the start of scan. |
250 | */ |
251 | *bufp = so->hashso_bucket_buf; |
252 | |
253 | /* |
254 | * buffer for bucket being populated must be valid as we acquire the |
255 | * pin on it before the start of scan and retain it till end of scan. |
256 | */ |
257 | Assert(BufferIsValid(*bufp)); |
258 | |
259 | LockBuffer(*bufp, BUFFER_LOCK_SHARE); |
260 | *pagep = BufferGetPage(*bufp); |
261 | *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep); |
262 | |
263 | /* move to the end of bucket chain */ |
264 | while (BlockNumberIsValid((*opaquep)->hasho_nextblkno)) |
265 | _hash_readnext(scan, bufp, pagep, opaquep); |
266 | |
267 | /* |
268 | * setting hashso_buc_split to false indicates that we are scanning |
269 | * bucket being populated. |
270 | */ |
271 | so->hashso_buc_split = false; |
272 | } |
273 | } |
274 | |
275 | /* |
276 | * _hash_first() -- Find the first item in a scan. |
277 | * |
278 | * We find the first item (or, if backward scan, the last item) in the |
279 | * index that satisfies the qualification associated with the scan |
280 | * descriptor. |
281 | * |
282 | * On successful exit, if the page containing current index tuple is an |
283 | * overflow page, both pin and lock are released whereas if it is a bucket |
284 | * page then it is pinned but not locked and data about the matching |
285 | * tuple(s) on the page has been loaded into so->currPos, |
286 | * scan->xs_ctup.t_self is set to the heap TID of the current tuple. |
287 | * |
288 | * On failure exit (no more tuples), we return false, with pin held on |
289 | * bucket page but no pins or locks held on overflow page. |
290 | */ |
291 | bool |
292 | _hash_first(IndexScanDesc scan, ScanDirection dir) |
293 | { |
294 | Relation rel = scan->indexRelation; |
295 | HashScanOpaque so = (HashScanOpaque) scan->opaque; |
296 | ScanKey cur; |
297 | uint32 hashkey; |
298 | Bucket bucket; |
299 | Buffer buf; |
300 | Page page; |
301 | HashPageOpaque opaque; |
302 | HashScanPosItem *currItem; |
303 | |
304 | pgstat_count_index_scan(rel); |
305 | |
306 | /* |
307 | * We do not support hash scans with no index qualification, because we |
308 | * would have to read the whole index rather than just one bucket. That |
309 | * creates a whole raft of problems, since we haven't got a practical way |
310 | * to lock all the buckets against splits or compactions. |
311 | */ |
312 | if (scan->numberOfKeys < 1) |
313 | ereport(ERROR, |
314 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
315 | errmsg("hash indexes do not support whole-index scans" ))); |
316 | |
317 | /* There may be more than one index qual, but we hash only the first */ |
318 | cur = &scan->keyData[0]; |
319 | |
320 | /* We support only single-column hash indexes */ |
321 | Assert(cur->sk_attno == 1); |
322 | /* And there's only one operator strategy, too */ |
323 | Assert(cur->sk_strategy == HTEqualStrategyNumber); |
324 | |
325 | /* |
326 | * If the constant in the index qual is NULL, assume it cannot match any |
327 | * items in the index. |
328 | */ |
329 | if (cur->sk_flags & SK_ISNULL) |
330 | return false; |
331 | |
332 | /* |
333 | * Okay to compute the hash key. We want to do this before acquiring any |
334 | * locks, in case a user-defined hash function happens to be slow. |
335 | * |
336 | * If scankey operator is not a cross-type comparison, we can use the |
337 | * cached hash function; otherwise gotta look it up in the catalogs. |
338 | * |
339 | * We support the convention that sk_subtype == InvalidOid means the |
340 | * opclass input type; this is a hack to simplify life for ScanKeyInit(). |
341 | */ |
342 | if (cur->sk_subtype == rel->rd_opcintype[0] || |
343 | cur->sk_subtype == InvalidOid) |
344 | hashkey = _hash_datum2hashkey(rel, cur->sk_argument); |
345 | else |
346 | hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument, |
347 | cur->sk_subtype); |
348 | |
349 | so->hashso_sk_hash = hashkey; |
350 | |
351 | buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_READ, NULL); |
352 | PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot); |
353 | page = BufferGetPage(buf); |
354 | TestForOldSnapshot(scan->xs_snapshot, rel, page); |
355 | opaque = (HashPageOpaque) PageGetSpecialPointer(page); |
356 | bucket = opaque->hasho_bucket; |
357 | |
358 | so->hashso_bucket_buf = buf; |
359 | |
360 | /* |
361 | * If a bucket split is in progress, then while scanning the bucket being |
362 | * populated, we need to skip tuples that were copied from bucket being |
363 | * split. We also need to maintain a pin on the bucket being split to |
364 | * ensure that split-cleanup work done by vacuum doesn't remove tuples |
365 | * from it till this scan is done. We need to maintain a pin on the |
366 | * bucket being populated to ensure that vacuum doesn't squeeze that |
367 | * bucket till this scan is complete; otherwise, the ordering of tuples |
368 | * can't be maintained during forward and backward scans. Here, we have |
369 | * to be cautious about locking order: first, acquire the lock on bucket |
370 | * being split; then, release the lock on it but not the pin; then, |
371 | * acquire a lock on bucket being populated and again re-verify whether |
372 | * the bucket split is still in progress. Acquiring the lock on bucket |
373 | * being split first ensures that the vacuum waits for this scan to |
374 | * finish. |
375 | */ |
376 | if (H_BUCKET_BEING_POPULATED(opaque)) |
377 | { |
378 | BlockNumber old_blkno; |
379 | Buffer old_buf; |
380 | |
381 | old_blkno = _hash_get_oldblock_from_newbucket(rel, bucket); |
382 | |
383 | /* |
384 | * release the lock on new bucket and re-acquire it after acquiring |
385 | * the lock on old bucket. |
386 | */ |
387 | LockBuffer(buf, BUFFER_LOCK_UNLOCK); |
388 | |
389 | old_buf = _hash_getbuf(rel, old_blkno, HASH_READ, LH_BUCKET_PAGE); |
390 | TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(old_buf)); |
391 | |
392 | /* |
393 | * remember the split bucket buffer so as to use it later for |
394 | * scanning. |
395 | */ |
396 | so->hashso_split_bucket_buf = old_buf; |
397 | LockBuffer(old_buf, BUFFER_LOCK_UNLOCK); |
398 | |
399 | LockBuffer(buf, BUFFER_LOCK_SHARE); |
400 | page = BufferGetPage(buf); |
401 | opaque = (HashPageOpaque) PageGetSpecialPointer(page); |
402 | Assert(opaque->hasho_bucket == bucket); |
403 | |
404 | if (H_BUCKET_BEING_POPULATED(opaque)) |
405 | so->hashso_buc_populated = true; |
406 | else |
407 | { |
408 | _hash_dropbuf(rel, so->hashso_split_bucket_buf); |
409 | so->hashso_split_bucket_buf = InvalidBuffer; |
410 | } |
411 | } |
412 | |
413 | /* If a backwards scan is requested, move to the end of the chain */ |
414 | if (ScanDirectionIsBackward(dir)) |
415 | { |
416 | /* |
417 | * Backward scans that start during split needs to start from end of |
418 | * bucket being split. |
419 | */ |
420 | while (BlockNumberIsValid(opaque->hasho_nextblkno) || |
421 | (so->hashso_buc_populated && !so->hashso_buc_split)) |
422 | _hash_readnext(scan, &buf, &page, &opaque); |
423 | } |
424 | |
425 | /* remember which buffer we have pinned, if any */ |
426 | Assert(BufferIsInvalid(so->currPos.buf)); |
427 | so->currPos.buf = buf; |
428 | |
429 | /* Now find all the tuples satisfying the qualification from a page */ |
430 | if (!_hash_readpage(scan, &buf, dir)) |
431 | return false; |
432 | |
433 | /* OK, itemIndex says what to return */ |
434 | currItem = &so->currPos.items[so->currPos.itemIndex]; |
435 | scan->xs_heaptid = currItem->heapTid; |
436 | |
437 | /* if we're here, _hash_readpage found a valid tuples */ |
438 | return true; |
439 | } |
440 | |
441 | /* |
442 | * _hash_readpage() -- Load data from current index page into so->currPos |
443 | * |
444 | * We scan all the items in the current index page and save them into |
445 | * so->currPos if it satisfies the qualification. If no matching items |
446 | * are found in the current page, we move to the next or previous page |
447 | * in a bucket chain as indicated by the direction. |
448 | * |
449 | * Return true if any matching items are found else return false. |
450 | */ |
451 | static bool |
452 | _hash_readpage(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) |
453 | { |
454 | Relation rel = scan->indexRelation; |
455 | HashScanOpaque so = (HashScanOpaque) scan->opaque; |
456 | Buffer buf; |
457 | Page page; |
458 | HashPageOpaque opaque; |
459 | OffsetNumber offnum; |
460 | uint16 itemIndex; |
461 | |
462 | buf = *bufP; |
463 | Assert(BufferIsValid(buf)); |
464 | _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); |
465 | page = BufferGetPage(buf); |
466 | opaque = (HashPageOpaque) PageGetSpecialPointer(page); |
467 | |
468 | so->currPos.buf = buf; |
469 | so->currPos.currPage = BufferGetBlockNumber(buf); |
470 | |
471 | if (ScanDirectionIsForward(dir)) |
472 | { |
473 | BlockNumber prev_blkno = InvalidBlockNumber; |
474 | |
475 | for (;;) |
476 | { |
477 | /* new page, locate starting position by binary search */ |
478 | offnum = _hash_binsearch(page, so->hashso_sk_hash); |
479 | |
480 | itemIndex = _hash_load_qualified_items(scan, page, offnum, dir); |
481 | |
482 | if (itemIndex != 0) |
483 | break; |
484 | |
485 | /* |
486 | * Could not find any matching tuples in the current page, move to |
487 | * the next page. Before leaving the current page, deal with any |
488 | * killed items. |
489 | */ |
490 | if (so->numKilled > 0) |
491 | _hash_kill_items(scan); |
492 | |
493 | /* |
494 | * If this is a primary bucket page, hasho_prevblkno is not a real |
495 | * block number. |
496 | */ |
497 | if (so->currPos.buf == so->hashso_bucket_buf || |
498 | so->currPos.buf == so->hashso_split_bucket_buf) |
499 | prev_blkno = InvalidBlockNumber; |
500 | else |
501 | prev_blkno = opaque->hasho_prevblkno; |
502 | |
503 | _hash_readnext(scan, &buf, &page, &opaque); |
504 | if (BufferIsValid(buf)) |
505 | { |
506 | so->currPos.buf = buf; |
507 | so->currPos.currPage = BufferGetBlockNumber(buf); |
508 | } |
509 | else |
510 | { |
511 | /* |
512 | * Remember next and previous block numbers for scrollable |
513 | * cursors to know the start position and return false |
514 | * indicating that no more matching tuples were found. Also, |
515 | * don't reset currPage or lsn, because we expect |
516 | * _hash_kill_items to be called for the old page after this |
517 | * function returns. |
518 | */ |
519 | so->currPos.prevPage = prev_blkno; |
520 | so->currPos.nextPage = InvalidBlockNumber; |
521 | so->currPos.buf = buf; |
522 | return false; |
523 | } |
524 | } |
525 | |
526 | so->currPos.firstItem = 0; |
527 | so->currPos.lastItem = itemIndex - 1; |
528 | so->currPos.itemIndex = 0; |
529 | } |
530 | else |
531 | { |
532 | BlockNumber next_blkno = InvalidBlockNumber; |
533 | |
534 | for (;;) |
535 | { |
536 | /* new page, locate starting position by binary search */ |
537 | offnum = _hash_binsearch_last(page, so->hashso_sk_hash); |
538 | |
539 | itemIndex = _hash_load_qualified_items(scan, page, offnum, dir); |
540 | |
541 | if (itemIndex != MaxIndexTuplesPerPage) |
542 | break; |
543 | |
544 | /* |
545 | * Could not find any matching tuples in the current page, move to |
546 | * the previous page. Before leaving the current page, deal with |
547 | * any killed items. |
548 | */ |
549 | if (so->numKilled > 0) |
550 | _hash_kill_items(scan); |
551 | |
552 | if (so->currPos.buf == so->hashso_bucket_buf || |
553 | so->currPos.buf == so->hashso_split_bucket_buf) |
554 | next_blkno = opaque->hasho_nextblkno; |
555 | |
556 | _hash_readprev(scan, &buf, &page, &opaque); |
557 | if (BufferIsValid(buf)) |
558 | { |
559 | so->currPos.buf = buf; |
560 | so->currPos.currPage = BufferGetBlockNumber(buf); |
561 | } |
562 | else |
563 | { |
564 | /* |
565 | * Remember next and previous block numbers for scrollable |
566 | * cursors to know the start position and return false |
567 | * indicating that no more matching tuples were found. Also, |
568 | * don't reset currPage or lsn, because we expect |
569 | * _hash_kill_items to be called for the old page after this |
570 | * function returns. |
571 | */ |
572 | so->currPos.prevPage = InvalidBlockNumber; |
573 | so->currPos.nextPage = next_blkno; |
574 | so->currPos.buf = buf; |
575 | return false; |
576 | } |
577 | } |
578 | |
579 | so->currPos.firstItem = itemIndex; |
580 | so->currPos.lastItem = MaxIndexTuplesPerPage - 1; |
581 | so->currPos.itemIndex = MaxIndexTuplesPerPage - 1; |
582 | } |
583 | |
584 | if (so->currPos.buf == so->hashso_bucket_buf || |
585 | so->currPos.buf == so->hashso_split_bucket_buf) |
586 | { |
587 | so->currPos.prevPage = InvalidBlockNumber; |
588 | so->currPos.nextPage = opaque->hasho_nextblkno; |
589 | LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); |
590 | } |
591 | else |
592 | { |
593 | so->currPos.prevPage = opaque->hasho_prevblkno; |
594 | so->currPos.nextPage = opaque->hasho_nextblkno; |
595 | _hash_relbuf(rel, so->currPos.buf); |
596 | so->currPos.buf = InvalidBuffer; |
597 | } |
598 | |
599 | Assert(so->currPos.firstItem <= so->currPos.lastItem); |
600 | return true; |
601 | } |
602 | |
603 | /* |
604 | * Load all the qualified items from a current index page |
605 | * into so->currPos. Helper function for _hash_readpage. |
606 | */ |
607 | static int |
608 | _hash_load_qualified_items(IndexScanDesc scan, Page page, |
609 | OffsetNumber offnum, ScanDirection dir) |
610 | { |
611 | HashScanOpaque so = (HashScanOpaque) scan->opaque; |
612 | IndexTuple itup; |
613 | int itemIndex; |
614 | OffsetNumber maxoff; |
615 | |
616 | maxoff = PageGetMaxOffsetNumber(page); |
617 | |
618 | if (ScanDirectionIsForward(dir)) |
619 | { |
620 | /* load items[] in ascending order */ |
621 | itemIndex = 0; |
622 | |
623 | while (offnum <= maxoff) |
624 | { |
625 | Assert(offnum >= FirstOffsetNumber); |
626 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); |
627 | |
628 | /* |
629 | * skip the tuples that are moved by split operation for the scan |
630 | * that has started when split was in progress. Also, skip the |
631 | * tuples that are marked as dead. |
632 | */ |
633 | if ((so->hashso_buc_populated && !so->hashso_buc_split && |
634 | (itup->t_info & INDEX_MOVED_BY_SPLIT_MASK)) || |
635 | (scan->ignore_killed_tuples && |
636 | (ItemIdIsDead(PageGetItemId(page, offnum))))) |
637 | { |
638 | offnum = OffsetNumberNext(offnum); /* move forward */ |
639 | continue; |
640 | } |
641 | |
642 | if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup) && |
643 | _hash_checkqual(scan, itup)) |
644 | { |
645 | /* tuple is qualified, so remember it */ |
646 | _hash_saveitem(so, itemIndex, offnum, itup); |
647 | itemIndex++; |
648 | } |
649 | else |
650 | { |
651 | /* |
652 | * No more matching tuples exist in this page. so, exit while |
653 | * loop. |
654 | */ |
655 | break; |
656 | } |
657 | |
658 | offnum = OffsetNumberNext(offnum); |
659 | } |
660 | |
661 | Assert(itemIndex <= MaxIndexTuplesPerPage); |
662 | return itemIndex; |
663 | } |
664 | else |
665 | { |
666 | /* load items[] in descending order */ |
667 | itemIndex = MaxIndexTuplesPerPage; |
668 | |
669 | while (offnum >= FirstOffsetNumber) |
670 | { |
671 | Assert(offnum <= maxoff); |
672 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); |
673 | |
674 | /* |
675 | * skip the tuples that are moved by split operation for the scan |
676 | * that has started when split was in progress. Also, skip the |
677 | * tuples that are marked as dead. |
678 | */ |
679 | if ((so->hashso_buc_populated && !so->hashso_buc_split && |
680 | (itup->t_info & INDEX_MOVED_BY_SPLIT_MASK)) || |
681 | (scan->ignore_killed_tuples && |
682 | (ItemIdIsDead(PageGetItemId(page, offnum))))) |
683 | { |
684 | offnum = OffsetNumberPrev(offnum); /* move back */ |
685 | continue; |
686 | } |
687 | |
688 | if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup) && |
689 | _hash_checkqual(scan, itup)) |
690 | { |
691 | itemIndex--; |
692 | /* tuple is qualified, so remember it */ |
693 | _hash_saveitem(so, itemIndex, offnum, itup); |
694 | } |
695 | else |
696 | { |
697 | /* |
698 | * No more matching tuples exist in this page. so, exit while |
699 | * loop. |
700 | */ |
701 | break; |
702 | } |
703 | |
704 | offnum = OffsetNumberPrev(offnum); |
705 | } |
706 | |
707 | Assert(itemIndex >= 0); |
708 | return itemIndex; |
709 | } |
710 | } |
711 | |
712 | /* Save an index item into so->currPos.items[itemIndex] */ |
713 | static inline void |
714 | _hash_saveitem(HashScanOpaque so, int itemIndex, |
715 | OffsetNumber offnum, IndexTuple itup) |
716 | { |
717 | HashScanPosItem *currItem = &so->currPos.items[itemIndex]; |
718 | |
719 | currItem->heapTid = itup->t_tid; |
720 | currItem->indexOffset = offnum; |
721 | } |
722 | |