1/*-------------------------------------------------------------------------
2 *
3 * gistbuildbuffers.c
4 * node buffer management functions for GiST buffering build algorithm.
5 *
6 *
7 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/access/gist/gistbuildbuffers.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "access/genam.h"
18#include "access/gist_private.h"
19#include "catalog/index.h"
20#include "miscadmin.h"
21#include "storage/buffile.h"
22#include "storage/bufmgr.h"
23#include "utils/memutils.h"
24#include "utils/rel.h"
25
26static GISTNodeBufferPage *gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb);
27static void gistAddLoadedBuffer(GISTBuildBuffers *gfbb,
28 GISTNodeBuffer *nodeBuffer);
29static void gistLoadNodeBuffer(GISTBuildBuffers *gfbb,
30 GISTNodeBuffer *nodeBuffer);
31static void gistUnloadNodeBuffer(GISTBuildBuffers *gfbb,
32 GISTNodeBuffer *nodeBuffer);
33static void gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer,
34 IndexTuple item);
35static void gistGetItupFromPage(GISTNodeBufferPage *pageBuffer,
36 IndexTuple *item);
37static long gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb);
38static void gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum);
39
40static void ReadTempFileBlock(BufFile *file, long blknum, void *ptr);
41static void WriteTempFileBlock(BufFile *file, long blknum, void *ptr);
42
43
44/*
45 * Initialize GiST build buffers.
46 */
47GISTBuildBuffers *
48gistInitBuildBuffers(int pagesPerBuffer, int levelStep, int maxLevel)
49{
50 GISTBuildBuffers *gfbb;
51 HASHCTL hashCtl;
52
53 gfbb = palloc(sizeof(GISTBuildBuffers));
54 gfbb->pagesPerBuffer = pagesPerBuffer;
55 gfbb->levelStep = levelStep;
56
57 /*
58 * Create a temporary file to hold buffer pages that are swapped out of
59 * memory.
60 */
61 gfbb->pfile = BufFileCreateTemp(false);
62 gfbb->nFileBlocks = 0;
63
64 /* Initialize free page management. */
65 gfbb->nFreeBlocks = 0;
66 gfbb->freeBlocksLen = 32;
67 gfbb->freeBlocks = (long *) palloc(gfbb->freeBlocksLen * sizeof(long));
68
69 /*
70 * Current memory context will be used for all in-memory data structures
71 * of buffers which are persistent during buffering build.
72 */
73 gfbb->context = CurrentMemoryContext;
74
75 /*
76 * nodeBuffersTab hash is association between index blocks and it's
77 * buffers.
78 */
79 memset(&hashCtl, 0, sizeof(hashCtl));
80 hashCtl.keysize = sizeof(BlockNumber);
81 hashCtl.entrysize = sizeof(GISTNodeBuffer);
82 hashCtl.hcxt = CurrentMemoryContext;
83 gfbb->nodeBuffersTab = hash_create("gistbuildbuffers",
84 1024,
85 &hashCtl,
86 HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
87
88 gfbb->bufferEmptyingQueue = NIL;
89
90 /*
91 * Per-level node buffers lists for final buffers emptying process. Node
92 * buffers are inserted here when they are created.
93 */
94 gfbb->buffersOnLevelsLen = 1;
95 gfbb->buffersOnLevels = (List **) palloc(sizeof(List *) *
96 gfbb->buffersOnLevelsLen);
97 gfbb->buffersOnLevels[0] = NIL;
98
99 /*
100 * Block numbers of node buffers which last pages are currently loaded
101 * into main memory.
102 */
103 gfbb->loadedBuffersLen = 32;
104 gfbb->loadedBuffers = (GISTNodeBuffer **) palloc(gfbb->loadedBuffersLen *
105 sizeof(GISTNodeBuffer *));
106 gfbb->loadedBuffersCount = 0;
107
108 gfbb->rootlevel = maxLevel;
109
110 return gfbb;
111}
112
113/*
114 * Returns a node buffer for given block. The buffer is created if it
115 * doesn't exist yet.
116 */
117GISTNodeBuffer *
118gistGetNodeBuffer(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
119 BlockNumber nodeBlocknum, int level)
120{
121 GISTNodeBuffer *nodeBuffer;
122 bool found;
123
124 /* Find node buffer in hash table */
125 nodeBuffer = (GISTNodeBuffer *) hash_search(gfbb->nodeBuffersTab,
126 (const void *) &nodeBlocknum,
127 HASH_ENTER,
128 &found);
129 if (!found)
130 {
131 /*
132 * Node buffer wasn't found. Initialize the new buffer as empty.
133 */
134 MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
135
136 /* nodeBuffer->nodeBlocknum is the hash key and was filled in already */
137 nodeBuffer->blocksCount = 0;
138 nodeBuffer->pageBlocknum = InvalidBlockNumber;
139 nodeBuffer->pageBuffer = NULL;
140 nodeBuffer->queuedForEmptying = false;
141 nodeBuffer->isTemp = false;
142 nodeBuffer->level = level;
143
144 /*
145 * Add this buffer to the list of buffers on this level. Enlarge
146 * buffersOnLevels array if needed.
147 */
148 if (level >= gfbb->buffersOnLevelsLen)
149 {
150 int i;
151
152 gfbb->buffersOnLevels =
153 (List **) repalloc(gfbb->buffersOnLevels,
154 (level + 1) * sizeof(List *));
155
156 /* initialize the enlarged portion */
157 for (i = gfbb->buffersOnLevelsLen; i <= level; i++)
158 gfbb->buffersOnLevels[i] = NIL;
159 gfbb->buffersOnLevelsLen = level + 1;
160 }
161
162 /*
163 * Prepend the new buffer to the list of buffers on this level. It's
164 * not arbitrary that the new buffer is put to the beginning of the
165 * list: in the final emptying phase we loop through all buffers at
166 * each level, and flush them. If a page is split during the emptying,
167 * it's more efficient to flush the new splitted pages first, before
168 * moving on to pre-existing pages on the level. The buffers just
169 * created during the page split are likely still in cache, so
170 * flushing them immediately is more efficient than putting them to
171 * the end of the queue.
172 */
173 gfbb->buffersOnLevels[level] = lcons(nodeBuffer,
174 gfbb->buffersOnLevels[level]);
175
176 MemoryContextSwitchTo(oldcxt);
177 }
178
179 return nodeBuffer;
180}
181
182/*
183 * Allocate memory for a buffer page.
184 */
185static GISTNodeBufferPage *
186gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb)
187{
188 GISTNodeBufferPage *pageBuffer;
189
190 pageBuffer = (GISTNodeBufferPage *) MemoryContextAllocZero(gfbb->context,
191 BLCKSZ);
192 pageBuffer->prev = InvalidBlockNumber;
193
194 /* Set page free space */
195 PAGE_FREE_SPACE(pageBuffer) = BLCKSZ - BUFFER_PAGE_DATA_OFFSET;
196 return pageBuffer;
197}
198
199/*
200 * Add specified buffer into loadedBuffers array.
201 */
202static void
203gistAddLoadedBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
204{
205 /* Never add a temporary buffer to the array */
206 if (nodeBuffer->isTemp)
207 return;
208
209 /* Enlarge the array if needed */
210 if (gfbb->loadedBuffersCount >= gfbb->loadedBuffersLen)
211 {
212 gfbb->loadedBuffersLen *= 2;
213 gfbb->loadedBuffers = (GISTNodeBuffer **)
214 repalloc(gfbb->loadedBuffers,
215 gfbb->loadedBuffersLen * sizeof(GISTNodeBuffer *));
216 }
217
218 gfbb->loadedBuffers[gfbb->loadedBuffersCount] = nodeBuffer;
219 gfbb->loadedBuffersCount++;
220}
221
222/*
223 * Load last page of node buffer into main memory.
224 */
225static void
226gistLoadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
227{
228 /* Check if we really should load something */
229 if (!nodeBuffer->pageBuffer && nodeBuffer->blocksCount > 0)
230 {
231 /* Allocate memory for page */
232 nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
233
234 /* Read block from temporary file */
235 ReadTempFileBlock(gfbb->pfile, nodeBuffer->pageBlocknum,
236 nodeBuffer->pageBuffer);
237
238 /* Mark file block as free */
239 gistBuffersReleaseBlock(gfbb, nodeBuffer->pageBlocknum);
240
241 /* Mark node buffer as loaded */
242 gistAddLoadedBuffer(gfbb, nodeBuffer);
243 nodeBuffer->pageBlocknum = InvalidBlockNumber;
244 }
245}
246
247/*
248 * Write last page of node buffer to the disk.
249 */
250static void
251gistUnloadNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer)
252{
253 /* Check if we have something to write */
254 if (nodeBuffer->pageBuffer)
255 {
256 BlockNumber blkno;
257
258 /* Get free file block */
259 blkno = gistBuffersGetFreeBlock(gfbb);
260
261 /* Write block to the temporary file */
262 WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
263
264 /* Free memory of that page */
265 pfree(nodeBuffer->pageBuffer);
266 nodeBuffer->pageBuffer = NULL;
267
268 /* Save block number */
269 nodeBuffer->pageBlocknum = blkno;
270 }
271}
272
273/*
274 * Write last pages of all node buffers to the disk.
275 */
276void
277gistUnloadNodeBuffers(GISTBuildBuffers *gfbb)
278{
279 int i;
280
281 /* Unload all the buffers that have a page loaded in memory. */
282 for (i = 0; i < gfbb->loadedBuffersCount; i++)
283 gistUnloadNodeBuffer(gfbb, gfbb->loadedBuffers[i]);
284
285 /* Now there are no node buffers with loaded last page */
286 gfbb->loadedBuffersCount = 0;
287}
288
289/*
290 * Add index tuple to buffer page.
291 */
292static void
293gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer, IndexTuple itup)
294{
295 Size itupsz = IndexTupleSize(itup);
296 char *ptr;
297
298 /* There should be enough of space. */
299 Assert(PAGE_FREE_SPACE(pageBuffer) >= MAXALIGN(itupsz));
300
301 /* Reduce free space value of page to reserve a spot for the tuple. */
302 PAGE_FREE_SPACE(pageBuffer) -= MAXALIGN(itupsz);
303
304 /* Get pointer to the spot we reserved (ie. end of free space). */
305 ptr = (char *) pageBuffer + BUFFER_PAGE_DATA_OFFSET
306 + PAGE_FREE_SPACE(pageBuffer);
307
308 /* Copy the index tuple there. */
309 memcpy(ptr, itup, itupsz);
310}
311
312/*
313 * Get last item from buffer page and remove it from page.
314 */
315static void
316gistGetItupFromPage(GISTNodeBufferPage *pageBuffer, IndexTuple *itup)
317{
318 IndexTuple ptr;
319 Size itupsz;
320
321 Assert(!PAGE_IS_EMPTY(pageBuffer)); /* Page shouldn't be empty */
322
323 /* Get pointer to last index tuple */
324 ptr = (IndexTuple) ((char *) pageBuffer
325 + BUFFER_PAGE_DATA_OFFSET
326 + PAGE_FREE_SPACE(pageBuffer));
327 itupsz = IndexTupleSize(ptr);
328
329 /* Make a copy of the tuple */
330 *itup = (IndexTuple) palloc(itupsz);
331 memcpy(*itup, ptr, itupsz);
332
333 /* Mark the space used by the tuple as free */
334 PAGE_FREE_SPACE(pageBuffer) += MAXALIGN(itupsz);
335}
336
337/*
338 * Push an index tuple to node buffer.
339 */
340void
341gistPushItupToNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer,
342 IndexTuple itup)
343{
344 /*
345 * Most part of memory operations will be in buffering build persistent
346 * context. So, let's switch to it.
347 */
348 MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
349
350 /*
351 * If the buffer is currently empty, create the first page.
352 */
353 if (nodeBuffer->blocksCount == 0)
354 {
355 nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
356 nodeBuffer->blocksCount = 1;
357 gistAddLoadedBuffer(gfbb, nodeBuffer);
358 }
359
360 /* Load last page of node buffer if it wasn't in memory already */
361 if (!nodeBuffer->pageBuffer)
362 gistLoadNodeBuffer(gfbb, nodeBuffer);
363
364 /*
365 * Check if there is enough space on the last page for the tuple.
366 */
367 if (PAGE_NO_SPACE(nodeBuffer->pageBuffer, itup))
368 {
369 /*
370 * Nope. Swap previous block to disk and allocate a new one.
371 */
372 BlockNumber blkno;
373
374 /* Write filled page to the disk */
375 blkno = gistBuffersGetFreeBlock(gfbb);
376 WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
377
378 /*
379 * Reset the in-memory page as empty, and link the previous block to
380 * the new page by storing its block number in the prev-link.
381 */
382 PAGE_FREE_SPACE(nodeBuffer->pageBuffer) =
383 BLCKSZ - MAXALIGN(offsetof(GISTNodeBufferPage, tupledata));
384 nodeBuffer->pageBuffer->prev = blkno;
385
386 /* We've just added one more page */
387 nodeBuffer->blocksCount++;
388 }
389
390 gistPlaceItupToPage(nodeBuffer->pageBuffer, itup);
391
392 /*
393 * If the buffer just overflowed, add it to the emptying queue.
394 */
395 if (BUFFER_HALF_FILLED(nodeBuffer, gfbb) && !nodeBuffer->queuedForEmptying)
396 {
397 gfbb->bufferEmptyingQueue = lcons(nodeBuffer,
398 gfbb->bufferEmptyingQueue);
399 nodeBuffer->queuedForEmptying = true;
400 }
401
402 /* Restore memory context */
403 MemoryContextSwitchTo(oldcxt);
404}
405
406/*
407 * Removes one index tuple from node buffer. Returns true if success and false
408 * if node buffer is empty.
409 */
410bool
411gistPopItupFromNodeBuffer(GISTBuildBuffers *gfbb, GISTNodeBuffer *nodeBuffer,
412 IndexTuple *itup)
413{
414 /*
415 * If node buffer is empty then return false.
416 */
417 if (nodeBuffer->blocksCount <= 0)
418 return false;
419
420 /* Load last page of node buffer if needed */
421 if (!nodeBuffer->pageBuffer)
422 gistLoadNodeBuffer(gfbb, nodeBuffer);
423
424 /*
425 * Get index tuple from last non-empty page.
426 */
427 gistGetItupFromPage(nodeBuffer->pageBuffer, itup);
428
429 /*
430 * If we just removed the last tuple from the page, fetch previous page on
431 * this node buffer (if any).
432 */
433 if (PAGE_IS_EMPTY(nodeBuffer->pageBuffer))
434 {
435 BlockNumber prevblkno;
436
437 /*
438 * blocksCount includes the page in pageBuffer, so decrease it now.
439 */
440 nodeBuffer->blocksCount--;
441
442 /*
443 * If there's more pages, fetch previous one.
444 */
445 prevblkno = nodeBuffer->pageBuffer->prev;
446 if (prevblkno != InvalidBlockNumber)
447 {
448 /* There is a previous page. Fetch it. */
449 Assert(nodeBuffer->blocksCount > 0);
450 ReadTempFileBlock(gfbb->pfile, prevblkno, nodeBuffer->pageBuffer);
451
452 /*
453 * Now that we've read the block in memory, we can release its
454 * on-disk block for reuse.
455 */
456 gistBuffersReleaseBlock(gfbb, prevblkno);
457 }
458 else
459 {
460 /* No more pages. Free memory. */
461 Assert(nodeBuffer->blocksCount == 0);
462 pfree(nodeBuffer->pageBuffer);
463 nodeBuffer->pageBuffer = NULL;
464 }
465 }
466 return true;
467}
468
469/*
470 * Select a currently unused block for writing to.
471 */
472static long
473gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb)
474{
475 /*
476 * If there are multiple free blocks, we select the one appearing last in
477 * freeBlocks[]. If there are none, assign the next block at the end of
478 * the file (causing the file to be extended).
479 */
480 if (gfbb->nFreeBlocks > 0)
481 return gfbb->freeBlocks[--gfbb->nFreeBlocks];
482 else
483 return gfbb->nFileBlocks++;
484}
485
486/*
487 * Return a block# to the freelist.
488 */
489static void
490gistBuffersReleaseBlock(GISTBuildBuffers *gfbb, long blocknum)
491{
492 int ndx;
493
494 /* Enlarge freeBlocks array if full. */
495 if (gfbb->nFreeBlocks >= gfbb->freeBlocksLen)
496 {
497 gfbb->freeBlocksLen *= 2;
498 gfbb->freeBlocks = (long *) repalloc(gfbb->freeBlocks,
499 gfbb->freeBlocksLen *
500 sizeof(long));
501 }
502
503 /* Add blocknum to array */
504 ndx = gfbb->nFreeBlocks++;
505 gfbb->freeBlocks[ndx] = blocknum;
506}
507
508/*
509 * Free buffering build data structure.
510 */
511void
512gistFreeBuildBuffers(GISTBuildBuffers *gfbb)
513{
514 /* Close buffers file. */
515 BufFileClose(gfbb->pfile);
516
517 /* All other things will be freed on memory context release */
518}
519
520/*
521 * Data structure representing information about node buffer for index tuples
522 * relocation from splitted node buffer.
523 */
524typedef struct
525{
526 GISTENTRY entry[INDEX_MAX_KEYS];
527 bool isnull[INDEX_MAX_KEYS];
528 GISTPageSplitInfo *splitinfo;
529 GISTNodeBuffer *nodeBuffer;
530} RelocationBufferInfo;
531
532/*
533 * At page split, distribute tuples from the buffer of the split page to
534 * new buffers for the created page halves. This also adjusts the downlinks
535 * in 'splitinfo' to include the tuples in the buffers.
536 */
537void
538gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb, GISTSTATE *giststate,
539 Relation r, int level,
540 Buffer buffer, List *splitinfo)
541{
542 RelocationBufferInfo *relocationBuffersInfos;
543 bool found;
544 GISTNodeBuffer *nodeBuffer;
545 BlockNumber blocknum;
546 IndexTuple itup;
547 int splitPagesCount = 0,
548 i;
549 GISTENTRY entry[INDEX_MAX_KEYS];
550 bool isnull[INDEX_MAX_KEYS];
551 GISTNodeBuffer oldBuf;
552 ListCell *lc;
553
554 /* If the splitted page doesn't have buffers, we have nothing to do. */
555 if (!LEVEL_HAS_BUFFERS(level, gfbb))
556 return;
557
558 /*
559 * Get the node buffer of the splitted page.
560 */
561 blocknum = BufferGetBlockNumber(buffer);
562 nodeBuffer = hash_search(gfbb->nodeBuffersTab, &blocknum,
563 HASH_FIND, &found);
564 if (!found)
565 {
566 /* The page has no buffer, so we have nothing to do. */
567 return;
568 }
569
570 /*
571 * Make a copy of the old buffer, as we're going reuse it as the buffer
572 * for the new left page, which is on the same block as the old page.
573 * That's not true for the root page, but that's fine because we never
574 * have a buffer on the root page anyway. The original algorithm as
575 * described by Arge et al did, but it's of no use, as you might as well
576 * read the tuples straight from the heap instead of the root buffer.
577 */
578 Assert(blocknum != GIST_ROOT_BLKNO);
579 memcpy(&oldBuf, nodeBuffer, sizeof(GISTNodeBuffer));
580 oldBuf.isTemp = true;
581
582 /* Reset the old buffer, used for the new left page from now on */
583 nodeBuffer->blocksCount = 0;
584 nodeBuffer->pageBuffer = NULL;
585 nodeBuffer->pageBlocknum = InvalidBlockNumber;
586
587 /*
588 * Allocate memory for information about relocation buffers.
589 */
590 splitPagesCount = list_length(splitinfo);
591 relocationBuffersInfos =
592 (RelocationBufferInfo *) palloc(sizeof(RelocationBufferInfo) *
593 splitPagesCount);
594
595 /*
596 * Fill relocation buffers information for node buffers of pages produced
597 * by split.
598 */
599 i = 0;
600 foreach(lc, splitinfo)
601 {
602 GISTPageSplitInfo *si = (GISTPageSplitInfo *) lfirst(lc);
603 GISTNodeBuffer *newNodeBuffer;
604
605 /* Decompress parent index tuple of node buffer page. */
606 gistDeCompressAtt(giststate, r,
607 si->downlink, NULL, (OffsetNumber) 0,
608 relocationBuffersInfos[i].entry,
609 relocationBuffersInfos[i].isnull);
610
611 /*
612 * Create a node buffer for the page. The leftmost half is on the same
613 * block as the old page before split, so for the leftmost half this
614 * will return the original buffer. The tuples on the original buffer
615 * were relinked to the temporary buffer, so the original one is now
616 * empty.
617 */
618 newNodeBuffer = gistGetNodeBuffer(gfbb, giststate, BufferGetBlockNumber(si->buf), level);
619
620 relocationBuffersInfos[i].nodeBuffer = newNodeBuffer;
621 relocationBuffersInfos[i].splitinfo = si;
622
623 i++;
624 }
625
626 /*
627 * Loop through all index tuples in the buffer of the page being split,
628 * moving them to buffers for the new pages. We try to move each tuple to
629 * the page that will result in the lowest penalty for the leading column
630 * or, in the case of a tie, the lowest penalty for the earliest column
631 * that is not tied.
632 *
633 * The page searching logic is very similar to gistchoose().
634 */
635 while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup))
636 {
637 float best_penalty[INDEX_MAX_KEYS];
638 int i,
639 which;
640 IndexTuple newtup;
641 RelocationBufferInfo *targetBufferInfo;
642
643 gistDeCompressAtt(giststate, r,
644 itup, NULL, (OffsetNumber) 0, entry, isnull);
645
646 /* default to using first page (shouldn't matter) */
647 which = 0;
648
649 /*
650 * best_penalty[j] is the best penalty we have seen so far for column
651 * j, or -1 when we haven't yet examined column j. Array entries to
652 * the right of the first -1 are undefined.
653 */
654 best_penalty[0] = -1;
655
656 /*
657 * Loop over possible target pages, looking for one to move this tuple
658 * to.
659 */
660 for (i = 0; i < splitPagesCount; i++)
661 {
662 RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i];
663 bool zero_penalty;
664 int j;
665
666 zero_penalty = true;
667
668 /* Loop over index attributes. */
669 for (j = 0; j < r->rd_att->natts; j++)
670 {
671 float usize;
672
673 /* Compute penalty for this column. */
674 usize = gistpenalty(giststate, j,
675 &splitPageInfo->entry[j],
676 splitPageInfo->isnull[j],
677 &entry[j], isnull[j]);
678 if (usize > 0)
679 zero_penalty = false;
680
681 if (best_penalty[j] < 0 || usize < best_penalty[j])
682 {
683 /*
684 * New best penalty for column. Tentatively select this
685 * page as the target, and record the best penalty. Then
686 * reset the next column's penalty to "unknown" (and
687 * indirectly, the same for all the ones to its right).
688 * This will force us to adopt this page's penalty values
689 * as the best for all the remaining columns during
690 * subsequent loop iterations.
691 */
692 which = i;
693 best_penalty[j] = usize;
694
695 if (j < r->rd_att->natts - 1)
696 best_penalty[j + 1] = -1;
697 }
698 else if (best_penalty[j] == usize)
699 {
700 /*
701 * The current page is exactly as good for this column as
702 * the best page seen so far. The next iteration of this
703 * loop will compare the next column.
704 */
705 }
706 else
707 {
708 /*
709 * The current page is worse for this column than the best
710 * page seen so far. Skip the remaining columns and move
711 * on to the next page, if any.
712 */
713 zero_penalty = false; /* so outer loop won't exit */
714 break;
715 }
716 }
717
718 /*
719 * If we find a page with zero penalty for all columns, there's no
720 * need to examine remaining pages; just break out of the loop and
721 * return it.
722 */
723 if (zero_penalty)
724 break;
725 }
726
727 /* OK, "which" is the page index to push the tuple to */
728 targetBufferInfo = &relocationBuffersInfos[which];
729
730 /* Push item to selected node buffer */
731 gistPushItupToNodeBuffer(gfbb, targetBufferInfo->nodeBuffer, itup);
732
733 /* Adjust the downlink for this page, if needed. */
734 newtup = gistgetadjusted(r, targetBufferInfo->splitinfo->downlink,
735 itup, giststate);
736 if (newtup)
737 {
738 gistDeCompressAtt(giststate, r,
739 newtup, NULL, (OffsetNumber) 0,
740 targetBufferInfo->entry,
741 targetBufferInfo->isnull);
742
743 targetBufferInfo->splitinfo->downlink = newtup;
744 }
745 }
746
747 pfree(relocationBuffersInfos);
748}
749
750
751/*
752 * Wrappers around BufFile operations. The main difference is that these
753 * wrappers report errors with ereport(), so that the callers don't need
754 * to check the return code.
755 */
756
757static void
758ReadTempFileBlock(BufFile *file, long blknum, void *ptr)
759{
760 if (BufFileSeekBlock(file, blknum) != 0)
761 elog(ERROR, "could not seek temporary file: %m");
762 if (BufFileRead(file, ptr, BLCKSZ) != BLCKSZ)
763 elog(ERROR, "could not read temporary file: %m");
764}
765
766static void
767WriteTempFileBlock(BufFile *file, long blknum, void *ptr)
768{
769 if (BufFileSeekBlock(file, blknum) != 0)
770 elog(ERROR, "could not seek temporary file: %m");
771 if (BufFileWrite(file, ptr, BLCKSZ) != BLCKSZ)
772 {
773 /*
774 * the other errors in Read/WriteTempFileBlock shouldn't happen, but
775 * an error at write can easily happen if you run out of disk space.
776 */
777 ereport(ERROR,
778 (errcode_for_file_access(),
779 errmsg("could not write block %ld of temporary file: %m",
780 blknum)));
781 }
782}
783