gistbuildbuffers.c source code [PostgreSQL/src/backend/access/gist/gistbuildbuffers.c]

1	/-------------------------------------------------------------------------*
2	*
3	* gistbuildbuffers.c
4	* node buffer management functions for GiST buffering build algorithm.
5	*
6	*
7	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8	* Portions Copyright (c) 1994, Regents of the University of California
9	*
10	* IDENTIFICATION
11	* src/backend/access/gist/gistbuildbuffers.c
12	*
13	*-------------------------------------------------------------------------
14	*/
15	#include "postgres.h"
16
17	#include "access/genam.h"
18	#include "access/gist_private.h"
19	#include "catalog/index.h"
20	#include "miscadmin.h"
21	#include "storage/buffile.h"
22	#include "storage/bufmgr.h"
23	#include "utils/memutils.h"
24	#include "utils/rel.h"
25
26	static GISTNodeBufferPage gistAllocateNewPageBuffer(GISTBuildBuffers gfbb);
27	static void gistAddLoadedBuffer(GISTBuildBuffers *gfbb,
28	GISTNodeBuffer *nodeBuffer);
29	static void gistLoadNodeBuffer(GISTBuildBuffers *gfbb,
30	GISTNodeBuffer *nodeBuffer);
31	static void gistUnloadNodeBuffer(GISTBuildBuffers *gfbb,
32	GISTNodeBuffer *nodeBuffer);
33	static void gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer,
34	IndexTuple item);
35	static void gistGetItupFromPage(GISTNodeBufferPage *pageBuffer,
36	IndexTuple *item);
37	static long gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb);
38	static void gistBuffersReleaseBlock(GISTBuildBuffers gfbb, long* blocknum);
39
40	static void ReadTempFileBlock(BufFile file, long* blknum, void *ptr);
41	static void WriteTempFileBlock(BufFile file, long* blknum, void *ptr);
42
43
44	/*
45	* Initialize GiST build buffers.
46	*/
47	GISTBuildBuffers *
48	gistInitBuildBuffers(int pagesPerBuffer, int levelStep, int maxLevel)
49	{
50	GISTBuildBuffers *gfbb;
51	HASHCTL hashCtl;
52
53	gfbb = palloc(sizeof(GISTBuildBuffers));
54	gfbb->pagesPerBuffer = pagesPerBuffer;
55	gfbb->levelStep = levelStep;
56
57	/*
58	* Create a temporary file to hold buffer pages that are swapped out of
59	* memory.
60	*/
61	gfbb->pfile = BufFileCreateTemp(false);
62	gfbb->nFileBlocks = `0`;
63
64	/ Initialize free page management. /
65	gfbb->nFreeBlocks = `0`;
66	gfbb->freeBlocksLen = `32`;
67	gfbb->freeBlocks = (long ) palloc(gfbb->freeBlocksLen sizeof(long));
68
69	/*
70	* Current memory context will be used for all in-memory data structures
71	* of buffers which are persistent during buffering build.
72	*/
73	gfbb->context = CurrentMemoryContext;
74
75	/*
76	* nodeBuffersTab hash is association between index blocks and it's
77	* buffers.
78	*/
79	memset(&hashCtl, `0`, sizeof(hashCtl));
80	hashCtl.keysize = sizeof(BlockNumber);
81	hashCtl.entrysize = sizeof(GISTNodeBuffer);
82	hashCtl.hcxt = CurrentMemoryContext;
83	gfbb->nodeBuffersTab = hash_create("gistbuildbuffers",
84	`1024`,
85	&hashCtl,
86	HASH_ELEM \| HASH_BLOBS \| HASH_CONTEXT);
87
88	gfbb->bufferEmptyingQueue = NIL;
89
90	/*
91	* Per-level node buffers lists for final buffers emptying process. Node
92	* buffers are inserted here when they are created.
93	*/
94	gfbb->buffersOnLevelsLen = `1`;
95	gfbb->buffersOnLevels = (List ) palloc(sizeof*(List ) *
96	gfbb->buffersOnLevelsLen);
97	gfbb->buffersOnLevels[`0`] = NIL;
98
99	/*
100	* Block numbers of node buffers which last pages are currently loaded
101	* into main memory.
102	*/
103	gfbb->loadedBuffersLen = `32`;
104	gfbb->loadedBuffers = (GISTNodeBuffer *) palloc(gfbb->loadedBuffersLen
105	sizeof(GISTNodeBuffer *));
106	gfbb->loadedBuffersCount = `0`;
107
108	gfbb->rootlevel = maxLevel;
109
110	return gfbb;
111	}
112
113	/*
114	* Returns a node buffer for given block. The buffer is created if it
115	* doesn't exist yet.
116	*/
117	GISTNodeBuffer *
118	gistGetNodeBuffer(GISTBuildBuffers gfbb, GISTSTATE giststate,
119	BlockNumber nodeBlocknum, int level)
120	{
121	GISTNodeBuffer *nodeBuffer;
122	bool found;
123
124	/ Find node buffer in hash table /
125	nodeBuffer = (GISTNodeBuffer *) hash_search(gfbb->nodeBuffersTab,
126	(const void *) &nodeBlocknum,
127	HASH_ENTER,
128	&found);
129	if (!found)
130	{
131	/*
132	* Node buffer wasn't found. Initialize the new buffer as empty.
133	*/
134	MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
135
136	/ nodeBuffer->nodeBlocknum is the hash key and was filled in already /
137	nodeBuffer->blocksCount = `0`;
138	nodeBuffer->pageBlocknum = InvalidBlockNumber;
139	nodeBuffer->pageBuffer = NULL;
140	nodeBuffer->queuedForEmptying = false;
141	nodeBuffer->isTemp = false;
142	nodeBuffer->level = level;
143
144	/*
145	* Add this buffer to the list of buffers on this level. Enlarge
146	* buffersOnLevels array if needed.
147	*/
148	if (level >= gfbb->buffersOnLevelsLen)
149	{
150	int i;
151
152	gfbb->buffersOnLevels =
153	(List **) repalloc(gfbb->buffersOnLevels,
154	(level + `1`) * sizeof(List *));
155
156	/ initialize the enlarged portion /
157	for (i = gfbb->buffersOnLevelsLen; i <= level; i++)
158	gfbb->buffersOnLevels[i] = NIL;
159	gfbb->buffersOnLevelsLen = level + `1`;
160	}
161
162	/*
163	* Prepend the new buffer to the list of buffers on this level. It's
164	* not arbitrary that the new buffer is put to the beginning of the
165	* list: in the final emptying phase we loop through all buffers at
166	* each level, and flush them. If a page is split during the emptying,
167	* it's more efficient to flush the new splitted pages first, before
168	* moving on to pre-existing pages on the level. The buffers just
169	* created during the page split are likely still in cache, so
170	* flushing them immediately is more efficient than putting them to
171	* the end of the queue.
172	*/
173	gfbb->buffersOnLevels[level] = lcons(nodeBuffer,
174	gfbb->buffersOnLevels[level]);
175
176	MemoryContextSwitchTo(oldcxt);
177	}
178
179	return nodeBuffer;
180	}
181
182	/*
183	* Allocate memory for a buffer page.
184	*/
185	static GISTNodeBufferPage *
186	gistAllocateNewPageBuffer(GISTBuildBuffers *gfbb)
187	{
188	GISTNodeBufferPage *pageBuffer;
189
190	pageBuffer = (GISTNodeBufferPage *) MemoryContextAllocZero(gfbb->context,
191	BLCKSZ);
192	pageBuffer->prev = InvalidBlockNumber;
193
194	/ Set page free space /
195	PAGE_FREE_SPACE(pageBuffer) = BLCKSZ - BUFFER_PAGE_DATA_OFFSET;
196	return pageBuffer;
197	}
198
199	/*
200	* Add specified buffer into loadedBuffers array.
201	*/
202	static void
203	gistAddLoadedBuffer(GISTBuildBuffers gfbb, GISTNodeBuffer nodeBuffer)
204	{
205	/ Never add a temporary buffer to the array /
206	if (nodeBuffer->isTemp)
207	return;
208
209	/ Enlarge the array if needed /
210	if (gfbb->loadedBuffersCount >= gfbb->loadedBuffersLen)
211	{
212	gfbb->loadedBuffersLen *= `2`;
213	gfbb->loadedBuffers = (GISTNodeBuffer **)
214	repalloc(gfbb->loadedBuffers,
215	gfbb->loadedBuffersLen * sizeof(GISTNodeBuffer *));
216	}
217
218	gfbb->loadedBuffers[gfbb->loadedBuffersCount] = nodeBuffer;
219	gfbb->loadedBuffersCount++;
220	}
221
222	/*
223	* Load last page of node buffer into main memory.
224	*/
225	static void
226	gistLoadNodeBuffer(GISTBuildBuffers gfbb, GISTNodeBuffer nodeBuffer)
227	{
228	/ Check if we really should load something /
229	if (!nodeBuffer->pageBuffer && nodeBuffer->blocksCount > `0`)
230	{
231	/ Allocate memory for page /
232	nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
233
234	/ Read block from temporary file /
235	ReadTempFileBlock(gfbb->pfile, nodeBuffer->pageBlocknum,
236	nodeBuffer->pageBuffer);
237
238	/ Mark file block as free /
239	gistBuffersReleaseBlock(gfbb, nodeBuffer->pageBlocknum);
240
241	/ Mark node buffer as loaded /
242	gistAddLoadedBuffer(gfbb, nodeBuffer);
243	nodeBuffer->pageBlocknum = InvalidBlockNumber;
244	}
245	}
246
247	/*
248	* Write last page of node buffer to the disk.
249	*/
250	static void
251	gistUnloadNodeBuffer(GISTBuildBuffers gfbb, GISTNodeBuffer nodeBuffer)
252	{
253	/ Check if we have something to write /
254	if (nodeBuffer->pageBuffer)
255	{
256	BlockNumber blkno;
257
258	/ Get free file block /
259	blkno = gistBuffersGetFreeBlock(gfbb);
260
261	/ Write block to the temporary file /
262	WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
263
264	/ Free memory of that page /
265	pfree(nodeBuffer->pageBuffer);
266	nodeBuffer->pageBuffer = NULL;
267
268	/ Save block number /
269	nodeBuffer->pageBlocknum = blkno;
270	}
271	}
272
273	/*
274	* Write last pages of all node buffers to the disk.
275	*/
276	void
277	gistUnloadNodeBuffers(GISTBuildBuffers *gfbb)
278	{
279	int i;
280
281	/ Unload all the buffers that have a page loaded in memory. /
282	for (i = `0`; i < gfbb->loadedBuffersCount; i++)
283	gistUnloadNodeBuffer(gfbb, gfbb->loadedBuffers[i]);
284
285	/ Now there are no node buffers with loaded last page /
286	gfbb->loadedBuffersCount = `0`;
287	}
288
289	/*
290	* Add index tuple to buffer page.
291	*/
292	static void
293	gistPlaceItupToPage(GISTNodeBufferPage *pageBuffer, IndexTuple itup)
294	{
295	Size itupsz = IndexTupleSize(itup);
296	char *ptr;
297
298	/ There should be enough of space. /
299	Assert(PAGE_FREE_SPACE(pageBuffer) >= MAXALIGN(itupsz));
300
301	/ Reduce free space value of page to reserve a spot for the tuple. /
302	PAGE_FREE_SPACE(pageBuffer) -= MAXALIGN(itupsz);
303
304	/ Get pointer to the spot we reserved (ie. end of free space). /
305	ptr = (char *) pageBuffer + BUFFER_PAGE_DATA_OFFSET
306	+ PAGE_FREE_SPACE(pageBuffer);
307
308	/ Copy the index tuple there. /
309	memcpy(ptr, itup, itupsz);
310	}
311
312	/*
313	* Get last item from buffer page and remove it from page.
314	*/
315	static void
316	gistGetItupFromPage(GISTNodeBufferPage pageBuffer, IndexTuple itup)
317	{
318	IndexTuple ptr;
319	Size itupsz;
320
321	Assert(!PAGE_IS_EMPTY(pageBuffer)); / Page shouldn't be empty /
322
323	/ Get pointer to last index tuple /
324	ptr = (IndexTuple) ((char *) pageBuffer
325	+ BUFFER_PAGE_DATA_OFFSET
326	+ PAGE_FREE_SPACE(pageBuffer));
327	itupsz = IndexTupleSize(ptr);
328
329	/ Make a copy of the tuple /
330	*itup = (IndexTuple) palloc(itupsz);
331	memcpy(*itup, ptr, itupsz);
332
333	/ Mark the space used by the tuple as free /
334	PAGE_FREE_SPACE(pageBuffer) += MAXALIGN(itupsz);
335	}
336
337	/*
338	* Push an index tuple to node buffer.
339	*/
340	void
341	gistPushItupToNodeBuffer(GISTBuildBuffers gfbb, GISTNodeBuffer nodeBuffer,
342	IndexTuple itup)
343	{
344	/*
345	* Most part of memory operations will be in buffering build persistent
346	* context. So, let's switch to it.
347	*/
348	MemoryContext oldcxt = MemoryContextSwitchTo(gfbb->context);
349
350	/*
351	* If the buffer is currently empty, create the first page.
352	*/
353	if (nodeBuffer->blocksCount == `0`)
354	{
355	nodeBuffer->pageBuffer = gistAllocateNewPageBuffer(gfbb);
356	nodeBuffer->blocksCount = `1`;
357	gistAddLoadedBuffer(gfbb, nodeBuffer);
358	}
359
360	/ Load last page of node buffer if it wasn't in memory already /
361	if (!nodeBuffer->pageBuffer)
362	gistLoadNodeBuffer(gfbb, nodeBuffer);
363
364	/*
365	* Check if there is enough space on the last page for the tuple.
366	*/
367	if (PAGE_NO_SPACE(nodeBuffer->pageBuffer, itup))
368	{
369	/*
370	* Nope. Swap previous block to disk and allocate a new one.
371	*/
372	BlockNumber blkno;
373
374	/ Write filled page to the disk /
375	blkno = gistBuffersGetFreeBlock(gfbb);
376	WriteTempFileBlock(gfbb->pfile, blkno, nodeBuffer->pageBuffer);
377
378	/*
379	* Reset the in-memory page as empty, and link the previous block to
380	* the new page by storing its block number in the prev-link.
381	*/
382	PAGE_FREE_SPACE(nodeBuffer->pageBuffer) =
383	BLCKSZ - MAXALIGN(offsetof(GISTNodeBufferPage, tupledata));
384	nodeBuffer->pageBuffer->prev = blkno;
385
386	/ We've just added one more page /
387	nodeBuffer->blocksCount++;
388	}
389
390	gistPlaceItupToPage(nodeBuffer->pageBuffer, itup);
391
392	/*
393	* If the buffer just overflowed, add it to the emptying queue.
394	*/
395	if (BUFFER_HALF_FILLED(nodeBuffer, gfbb) && !nodeBuffer->queuedForEmptying)
396	{
397	gfbb->bufferEmptyingQueue = lcons(nodeBuffer,
398	gfbb->bufferEmptyingQueue);
399	nodeBuffer->queuedForEmptying = true;
400	}
401
402	/ Restore memory context /
403	MemoryContextSwitchTo(oldcxt);
404	}
405
406	/*
407	* Removes one index tuple from node buffer. Returns true if success and false
408	* if node buffer is empty.
409	*/
410	bool
411	gistPopItupFromNodeBuffer(GISTBuildBuffers gfbb, GISTNodeBuffer nodeBuffer,
412	IndexTuple *itup)
413	{
414	/*
415	* If node buffer is empty then return false.
416	*/
417	if (nodeBuffer->blocksCount <= `0`)
418	return false;
419
420	/ Load last page of node buffer if needed /
421	if (!nodeBuffer->pageBuffer)
422	gistLoadNodeBuffer(gfbb, nodeBuffer);
423
424	/*
425	* Get index tuple from last non-empty page.
426	*/
427	gistGetItupFromPage(nodeBuffer->pageBuffer, itup);
428
429	/*
430	* If we just removed the last tuple from the page, fetch previous page on
431	* this node buffer (if any).
432	*/
433	if (PAGE_IS_EMPTY(nodeBuffer->pageBuffer))
434	{
435	BlockNumber prevblkno;
436
437	/*
438	* blocksCount includes the page in pageBuffer, so decrease it now.
439	*/
440	nodeBuffer->blocksCount--;
441
442	/*
443	* If there's more pages, fetch previous one.
444	*/
445	prevblkno = nodeBuffer->pageBuffer->prev;
446	if (prevblkno != InvalidBlockNumber)
447	{
448	/ There is a previous page. Fetch it. /
449	Assert(nodeBuffer->blocksCount > `0`);
450	ReadTempFileBlock(gfbb->pfile, prevblkno, nodeBuffer->pageBuffer);
451
452	/*
453	* Now that we've read the block in memory, we can release its
454	* on-disk block for reuse.
455	*/
456	gistBuffersReleaseBlock(gfbb, prevblkno);
457	}
458	else
459	{
460	/ No more pages. Free memory. /
461	Assert(nodeBuffer->blocksCount == `0`);
462	pfree(nodeBuffer->pageBuffer);
463	nodeBuffer->pageBuffer = NULL;
464	}
465	}
466	return true;
467	}
468
469	/*
470	* Select a currently unused block for writing to.
471	*/
472	static long
473	gistBuffersGetFreeBlock(GISTBuildBuffers *gfbb)
474	{
475	/*
476	* If there are multiple free blocks, we select the one appearing last in
477	* freeBlocks[]. If there are none, assign the next block at the end of
478	* the file (causing the file to be extended).
479	*/
480	if (gfbb->nFreeBlocks > `0`)
481	return gfbb->freeBlocks[--gfbb->nFreeBlocks];
482	else
483	return gfbb->nFileBlocks++;
484	}
485
486	/*
487	* Return a block# to the freelist.
488	*/
489	static void
490	gistBuffersReleaseBlock(GISTBuildBuffers gfbb, long* blocknum)
491	{
492	int ndx;
493
494	/ Enlarge freeBlocks array if full. /
495	if (gfbb->nFreeBlocks >= gfbb->freeBlocksLen)
496	{
497	gfbb->freeBlocksLen *= `2`;
498	gfbb->freeBlocks = (long *) repalloc(gfbb->freeBlocks,
499	gfbb->freeBlocksLen *
500	sizeof(long));
501	}
502
503	/ Add blocknum to array /
504	ndx = gfbb->nFreeBlocks++;
505	gfbb->freeBlocks[ndx] = blocknum;
506	}
507
508	/*
509	* Free buffering build data structure.
510	*/
511	void
512	gistFreeBuildBuffers(GISTBuildBuffers *gfbb)
513	{
514	/ Close buffers file. /
515	BufFileClose(gfbb->pfile);
516
517	/ All other things will be freed on memory context release /
518	}
519
520	/*
521	* Data structure representing information about node buffer for index tuples
522	* relocation from splitted node buffer.
523	*/
524	typedef struct
525	{
526	GISTENTRY entry[INDEX_MAX_KEYS];
527	bool isnull[INDEX_MAX_KEYS];
528	GISTPageSplitInfo *splitinfo;
529	GISTNodeBuffer *nodeBuffer;
530	} RelocationBufferInfo;
531
532	/*
533	* At page split, distribute tuples from the buffer of the split page to
534	* new buffers for the created page halves. This also adjusts the downlinks
535	* in 'splitinfo' to include the tuples in the buffers.
536	*/
537	void
538	gistRelocateBuildBuffersOnSplit(GISTBuildBuffers gfbb, GISTSTATE giststate,
539	Relation r, int level,
540	Buffer buffer, List *splitinfo)
541	{
542	RelocationBufferInfo *relocationBuffersInfos;
543	bool found;
544	GISTNodeBuffer *nodeBuffer;
545	BlockNumber blocknum;
546	IndexTuple itup;
547	int splitPagesCount = `0`,
548	i;
549	GISTENTRY entry[INDEX_MAX_KEYS];
550	bool isnull[INDEX_MAX_KEYS];
551	GISTNodeBuffer oldBuf;
552	ListCell *lc;
553
554	/ If the splitted page doesn't have buffers, we have nothing to do. /
555	if (!LEVEL_HAS_BUFFERS(level, gfbb))
556	return;
557
558	/*
559	* Get the node buffer of the splitted page.
560	*/
561	blocknum = BufferGetBlockNumber(buffer);
562	nodeBuffer = hash_search(gfbb->nodeBuffersTab, &blocknum,
563	HASH_FIND, &found);
564	if (!found)
565	{
566	/ The page has no buffer, so we have nothing to do. /
567	return;
568	}
569
570	/*
571	* Make a copy of the old buffer, as we're going reuse it as the buffer
572	* for the new left page, which is on the same block as the old page.
573	* That's not true for the root page, but that's fine because we never
574	* have a buffer on the root page anyway. The original algorithm as
575	* described by Arge et al did, but it's of no use, as you might as well
576	* read the tuples straight from the heap instead of the root buffer.
577	*/
578	Assert(blocknum != GIST_ROOT_BLKNO);
579	memcpy(&oldBuf, nodeBuffer, sizeof(GISTNodeBuffer));
580	oldBuf.isTemp = true;
581
582	/ Reset the old buffer, used for the new left page from now on /
583	nodeBuffer->blocksCount = `0`;
584	nodeBuffer->pageBuffer = NULL;
585	nodeBuffer->pageBlocknum = InvalidBlockNumber;
586
587	/*
588	* Allocate memory for information about relocation buffers.
589	*/
590	splitPagesCount = list_length(splitinfo);
591	relocationBuffersInfos =
592	(RelocationBufferInfo ) palloc(sizeof(RelocationBufferInfo)
593	splitPagesCount);
594
595	/*
596	* Fill relocation buffers information for node buffers of pages produced
597	* by split.
598	*/
599	i = `0`;
600	foreach(lc, splitinfo)
601	{
602	GISTPageSplitInfo si = (GISTPageSplitInfo ) lfirst(lc);
603	GISTNodeBuffer *newNodeBuffer;
604
605	/ Decompress parent index tuple of node buffer page. /
606	gistDeCompressAtt(giststate, r,
607	si->downlink, NULL, (OffsetNumber) `0`,
608	relocationBuffersInfos[i].entry,
609	relocationBuffersInfos[i].isnull);
610
611	/*
612	* Create a node buffer for the page. The leftmost half is on the same
613	* block as the old page before split, so for the leftmost half this
614	* will return the original buffer. The tuples on the original buffer
615	* were relinked to the temporary buffer, so the original one is now
616	* empty.
617	*/
618	newNodeBuffer = gistGetNodeBuffer(gfbb, giststate, BufferGetBlockNumber(si->buf), level);
619
620	relocationBuffersInfos[i].nodeBuffer = newNodeBuffer;
621	relocationBuffersInfos[i].splitinfo = si;
622
623	i++;
624	}
625
626	/*
627	* Loop through all index tuples in the buffer of the page being split,
628	* moving them to buffers for the new pages. We try to move each tuple to
629	* the page that will result in the lowest penalty for the leading column
630	* or, in the case of a tie, the lowest penalty for the earliest column
631	* that is not tied.
632	*
633	* The page searching logic is very similar to gistchoose().
634	*/
635	while (gistPopItupFromNodeBuffer(gfbb, &oldBuf, &itup))
636	{
637	float best_penalty[INDEX_MAX_KEYS];
638	int i,
639	which;
640	IndexTuple newtup;
641	RelocationBufferInfo *targetBufferInfo;
642
643	gistDeCompressAtt(giststate, r,
644	itup, NULL, (OffsetNumber) `0`, entry, isnull);
645
646	/ default to using first page (shouldn't matter) /
647	which = `0`;
648
649	/*
650	* best_penalty[j] is the best penalty we have seen so far for column
651	* j, or -1 when we haven't yet examined column j. Array entries to
652	* the right of the first -1 are undefined.
653	*/
654	best_penalty[`0`] = -`1`;
655
656	/*
657	* Loop over possible target pages, looking for one to move this tuple
658	* to.
659	*/
660	for (i = `0`; i < splitPagesCount; i++)
661	{
662	RelocationBufferInfo *splitPageInfo = &relocationBuffersInfos[i];
663	bool zero_penalty;
664	int j;
665
666	zero_penalty = true;
667
668	/ Loop over index attributes. /
669	for (j = `0`; j < r->rd_att->natts; j++)
670	{
671	float usize;
672
673	/ Compute penalty for this column. /
674	usize = gistpenalty(giststate, j,
675	&splitPageInfo->entry[j],
676	splitPageInfo->isnull[j],
677	&entry[j], isnull[j]);
678	if (usize > `0`)
679	zero_penalty = false;
680
681	if (best_penalty[j] < `0` \|\| usize < best_penalty[j])
682	{
683	/*
684	* New best penalty for column. Tentatively select this
685	* page as the target, and record the best penalty. Then
686	* reset the next column's penalty to "unknown" (and
687	* indirectly, the same for all the ones to its right).
688	* This will force us to adopt this page's penalty values
689	* as the best for all the remaining columns during
690	* subsequent loop iterations.
691	*/
692	which = i;
693	best_penalty[j] = usize;
694
695	if (j < r->rd_att->natts - `1`)
696	best_penalty[j + `1`] = -`1`;
697	}
698	else if (best_penalty[j] == usize)
699	{
700	/*
701	* The current page is exactly as good for this column as
702	* the best page seen so far. The next iteration of this
703	* loop will compare the next column.
704	*/
705	}
706	else
707	{
708	/*
709	* The current page is worse for this column than the best
710	* page seen so far. Skip the remaining columns and move
711	* on to the next page, if any.
712	*/
713	zero_penalty = false; / so outer loop won't exit /
714	break;
715	}
716	}
717
718	/*
719	* If we find a page with zero penalty for all columns, there's no
720	* need to examine remaining pages; just break out of the loop and
721	* return it.
722	*/
723	if (zero_penalty)
724	break;
725	}
726
727	/ OK, "which" is the page index to push the tuple to /
728	targetBufferInfo = &relocationBuffersInfos[which];
729
730	/ Push item to selected node buffer /
731	gistPushItupToNodeBuffer(gfbb, targetBufferInfo->nodeBuffer, itup);
732
733	/ Adjust the downlink for this page, if needed. /
734	newtup = gistgetadjusted(r, targetBufferInfo->splitinfo->downlink,
735	itup, giststate);
736	if (newtup)
737	{
738	gistDeCompressAtt(giststate, r,
739	newtup, NULL, (OffsetNumber) `0`,
740	targetBufferInfo->entry,
741	targetBufferInfo->isnull);
742
743	targetBufferInfo->splitinfo->downlink = newtup;
744	}
745	}
746
747	pfree(relocationBuffersInfos);
748	}
749
750
751	/*
752	* Wrappers around BufFile operations. The main difference is that these
753	* wrappers report errors with ereport(), so that the callers don't need
754	* to check the return code.
755	*/
756
757	static void
758	ReadTempFileBlock(BufFile file, long* blknum, void *ptr)
759	{
760	if (BufFileSeekBlock(file, blknum) != `0`)
761	elog(ERROR, "could not seek temporary file: %m");
762	if (BufFileRead(file, ptr, BLCKSZ) != BLCKSZ)
763	elog(ERROR, "could not read temporary file: %m");
764	}
765
766	static void
767	WriteTempFileBlock(BufFile file, long* blknum, void *ptr)
768	{
769	if (BufFileSeekBlock(file, blknum) != `0`)
770	elog(ERROR, "could not seek temporary file: %m");
771	if (BufFileWrite(file, ptr, BLCKSZ) != BLCKSZ)
772	{
773	/*
774	* the other errors in Read/WriteTempFileBlock shouldn't happen, but
775	* an error at write can easily happen if you run out of disk space.
776	*/
777	ereport(ERROR,
778	(errcode_for_file_access(),
779	errmsg("could not write block %ld of temporary file: %m",
780	blknum)));
781	}
782	}
783

Browse the source code of PostgreSQL/src/backend/access/gist/gistbuildbuffers.c