gistutil.c source code [PostgreSQL/src/backend/access/gist/gistutil.c]

1	/-------------------------------------------------------------------------*
2	*
3	* gistutil.c
4	* utilities routines for the postgres GiST index access method.
5	*
6	*
7	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8	* Portions Copyright (c) 1994, Regents of the University of California
9	*
10	* IDENTIFICATION
11	* src/backend/access/gist/gistutil.c
12	*-------------------------------------------------------------------------
13	*/
14	#include "postgres.h"
15
16	#include <math.h>
17
18	#include "access/gist_private.h"
19	#include "access/htup_details.h"
20	#include "access/reloptions.h"
21	#include "catalog/pg_opclass.h"
22	#include "storage/indexfsm.h"
23	#include "storage/lmgr.h"
24	#include "utils/float.h"
25	#include "utils/syscache.h"
26	#include "utils/snapmgr.h"
27	#include "utils/lsyscache.h"
28
29
30	/*
31	* Write itup vector to page, has no control of free space.
32	*/
33	void
34	gistfillbuffer(Page page, IndexTuple itup, int* len, OffsetNumber off)
35	{
36	OffsetNumber l = InvalidOffsetNumber;
37	int i;
38
39	if (off == InvalidOffsetNumber)
40	off = (PageIsEmpty(page)) ? FirstOffsetNumber :
41	OffsetNumberNext(PageGetMaxOffsetNumber(page));
42
43	for (i = `0`; i < len; i++)
44	{
45	Size sz = IndexTupleSize(itup[i]);
46
47	l = PageAddItem(page, (Item) itup[i], sz, off, false, false);
48	if (l == InvalidOffsetNumber)
49	elog(ERROR, "failed to add item to GiST index page, item %d out of %d, size %d bytes",
50	i, len, (int) sz);
51	off++;
52	}
53	}
54
55	/*
56	* Check space for itup vector on page
57	*/
58	bool
59	gistnospace(Page page, IndexTuple itvec, int* len, OffsetNumber todelete, Size freespace)
60	{
61	unsigned int size = freespace,
62	deleted = `0`;
63	int i;
64
65	for (i = `0`; i < len; i++)
66	size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
67
68	if (todelete != InvalidOffsetNumber)
69	{
70	IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, todelete));
71
72	deleted = IndexTupleSize(itup) + sizeof(ItemIdData);
73	}
74
75	return (PageGetFreeSpace(page) + deleted < size);
76	}
77
78	bool
79	gistfitpage(IndexTuple itvec, int* len)
80	{
81	int i;
82	Size size = `0`;
83
84	for (i = `0`; i < len; i++)
85	size += IndexTupleSize(itvec[i]) + sizeof(ItemIdData);
86
87	/ TODO: Consider fillfactor /
88	return (size <= GiSTPageSize);
89	}
90
91	/*
92	* Read buffer into itup vector
93	*/
94	IndexTuple *
95	gistextractpage(Page page, int len /* out / )
96	{
97	OffsetNumber i,
98	maxoff;
99	IndexTuple *itvec;
100
101	maxoff = PageGetMaxOffsetNumber(page);
102	*len = maxoff;
103	itvec = palloc(sizeof(IndexTuple) * maxoff);
104	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
105	itvec[i - FirstOffsetNumber] = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
106
107	return itvec;
108	}
109
110	/*
111	* join two vectors into one
112	*/
113	IndexTuple *
114	gistjoinvector(IndexTuple itvec, int* len, IndexTuple additvec, int addlen)
115	{
116	itvec = (IndexTuple ) repalloc((void* ) itvec, sizeof(IndexTuple) ((*len) + addlen));
117	memmove(&itvec[len], additvec, sizeof(IndexTuple) addlen);
118	*len += addlen;
119	return itvec;
120	}
121
122	/*
123	* make plain IndexTupleVector
124	*/
125
126	IndexTupleData *
127	gistfillitupvec(IndexTuple vec, int* veclen, int *memlen)
128	{
129	char *ptr,
130	*ret;
131	int i;
132
133	*memlen = `0`;
134
135	for (i = `0`; i < veclen; i++)
136	*memlen += IndexTupleSize(vec[i]);
137
138	ptr = ret = palloc(*memlen);
139
140	for (i = `0`; i < veclen; i++)
141	{
142	memcpy(ptr, vec[i], IndexTupleSize(vec[i]));
143	ptr += IndexTupleSize(vec[i]);
144	}
145
146	return (IndexTupleData *) ret;
147	}
148
149	/*
150	* Make unions of keys in IndexTuple vector (one union datum per index column).
151	* Union Datums are returned into the attr/isnull arrays.
152	* Resulting Datums aren't compressed.
153	*/
154	void
155	gistMakeUnionItVec(GISTSTATE giststate, IndexTuple itvec, int len,
156	Datum attr, bool isnull)
157	{
158	int i;
159	GistEntryVector *evec;
160	int attrsize;
161
162	evec = (GistEntryVector ) palloc((len + `2`) sizeof(GISTENTRY) + GEVHDRSZ);
163
164	for (i = `0`; i < giststate->nonLeafTupdesc->natts; i++)
165	{
166	int j;
167
168	/ Collect non-null datums for this column /
169	evec->n = `0`;
170	for (j = `0`; j < len; j++)
171	{
172	Datum datum;
173	bool IsNull;
174
175	datum = index_getattr(itvec[j], i + `1`, giststate->leafTupdesc,
176	&IsNull);
177	if (IsNull)
178	continue;
179
180	gistdentryinit(giststate, i,
181	evec->vector + evec->n,
182	datum,
183	NULL, NULL, (OffsetNumber) `0`,
184	false, IsNull);
185	evec->n++;
186	}
187
188	/ If this column was all NULLs, the union is NULL /
189	if (evec->n == `0`)
190	{
191	attr[i] = (Datum) `0`;
192	isnull[i] = true;
193	}
194	else
195	{
196	if (evec->n == `1`)
197	{
198	/ unionFn may expect at least two inputs /
199	evec->n = `2`;
200	evec->vector[`1`] = evec->vector[`0`];
201	}
202
203	/ Make union and store in attr array /
204	attr[i] = FunctionCall2Coll(&giststate->unionFn[i],
205	giststate->supportCollation[i],
206	PointerGetDatum(evec),
207	PointerGetDatum(&attrsize));
208
209	isnull[i] = false;
210	}
211	}
212	}
213
214	/*
215	* Return an IndexTuple containing the result of applying the "union"
216	* method to the specified IndexTuple vector.
217	*/
218	IndexTuple
219	gistunion(Relation r, IndexTuple itvec, int* len, GISTSTATE *giststate)
220	{
221	Datum attr[INDEX_MAX_KEYS];
222	bool isnull[INDEX_MAX_KEYS];
223
224	gistMakeUnionItVec(giststate, itvec, len, attr, isnull);
225
226	return gistFormTuple(giststate, r, attr, isnull, false);
227	}
228
229	/*
230	* makes union of two key
231	*/
232	void
233	gistMakeUnionKey(GISTSTATE giststate, int* attno,
234	GISTENTRY *entry1, bool isnull1,
235	GISTENTRY *entry2, bool isnull2,
236	Datum dst, bool dstisnull)
237	{
238	/ we need a GistEntryVector with room for exactly 2 elements /
239	union
240	{
241	GistEntryVector gev;
242	char padding[`2` * sizeof(GISTENTRY) + GEVHDRSZ];
243	} storage;
244	GistEntryVector *evec = &storage.gev;
245	int dstsize;
246
247	evec->n = `2`;
248
249	if (isnull1 && isnull2)
250	{
251	*dstisnull = true;
252	*dst = (Datum) `0`;
253	}
254	else
255	{
256	if (isnull1 == false && isnull2 == false)
257	{
258	evec->vector[`0`] = *entry1;
259	evec->vector[`1`] = *entry2;
260	}
261	else if (isnull1 == false)
262	{
263	evec->vector[`0`] = *entry1;
264	evec->vector[`1`] = *entry1;
265	}
266	else
267	{
268	evec->vector[`0`] = *entry2;
269	evec->vector[`1`] = *entry2;
270	}
271
272	*dstisnull = false;
273	*dst = FunctionCall2Coll(&giststate->unionFn[attno],
274	giststate->supportCollation[attno],
275	PointerGetDatum(evec),
276	PointerGetDatum(&dstsize));
277	}
278	}
279
280	bool
281	gistKeyIsEQ(GISTSTATE giststate, int* attno, Datum a, Datum b)
282	{
283	bool result;
284
285	FunctionCall3Coll(&giststate->equalFn[attno],
286	giststate->supportCollation[attno],
287	a, b,
288	PointerGetDatum(&result));
289	return result;
290	}
291
292	/*
293	* Decompress all keys in tuple
294	*/
295	void
296	gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p,
297	OffsetNumber o, GISTENTRY attdata, bool isnull)
298	{
299	int i;
300
301	for (i = `0`; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
302	{
303	Datum datum;
304
305	datum = index_getattr(tuple, i + `1`, giststate->leafTupdesc, &isnull[i]);
306	gistdentryinit(giststate, i, &attdata[i],
307	datum, r, p, o,
308	false, isnull[i]);
309	}
310	}
311
312	/*
313	* Forms union of oldtup and addtup, if union == oldtup then return NULL
314	*/
315	IndexTuple
316	gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *giststate)
317	{
318	bool neednew = false;
319	GISTENTRY oldentries[INDEX_MAX_KEYS],
320	addentries[INDEX_MAX_KEYS];
321	bool oldisnull[INDEX_MAX_KEYS],
322	addisnull[INDEX_MAX_KEYS];
323	Datum attr[INDEX_MAX_KEYS];
324	bool isnull[INDEX_MAX_KEYS];
325	IndexTuple newtup = NULL;
326	int i;
327
328	gistDeCompressAtt(giststate, r, oldtup, NULL,
329	(OffsetNumber) `0`, oldentries, oldisnull);
330
331	gistDeCompressAtt(giststate, r, addtup, NULL,
332	(OffsetNumber) `0`, addentries, addisnull);
333
334	for (i = `0`; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
335	{
336	gistMakeUnionKey(giststate, i,
337	oldentries + i, oldisnull[i],
338	addentries + i, addisnull[i],
339	attr + i, isnull + i);
340
341	if (neednew)
342	/ we already need new key, so we can skip check /
343	continue;
344
345	if (isnull[i])
346	/ union of key may be NULL if and only if both keys are NULL /
347	continue;
348
349	if (!addisnull[i])
350	{
351	if (oldisnull[i] \|\|
352	!gistKeyIsEQ(giststate, i, oldentries[i].key, attr[i]))
353	neednew = true;
354	}
355	}
356
357	if (neednew)
358	{
359	/ need to update key /
360	newtup = gistFormTuple(giststate, r, attr, isnull, false);
361	newtup->t_tid = oldtup->t_tid;
362	}
363
364	return newtup;
365	}
366
367	/*
368	* Search an upper index page for the entry with lowest penalty for insertion
369	* of the new index key contained in "it".
370	*
371	* Returns the index of the page entry to insert into.
372	*/
373	OffsetNumber
374	gistchoose(Relation r, Page p, IndexTuple it, / it has compressed entry /
375	GISTSTATE *giststate)
376	{
377	OffsetNumber result;
378	OffsetNumber maxoff;
379	OffsetNumber i;
380	float best_penalty[INDEX_MAX_KEYS];
381	GISTENTRY entry,
382	identry[INDEX_MAX_KEYS];
383	bool isnull[INDEX_MAX_KEYS];
384	int keep_current_best;
385
386	Assert(!GistPageIsLeaf(p));
387
388	gistDeCompressAtt(giststate, r,
389	it, NULL, (OffsetNumber) `0`,
390	identry, isnull);
391
392	/ we'll return FirstOffsetNumber if page is empty (shouldn't happen) /
393	result = FirstOffsetNumber;
394
395	/*
396	* The index may have multiple columns, and there's a penalty value for
397	* each column. The penalty associated with a column that appears earlier
398	* in the index definition is strictly more important than the penalty of
399	* a column that appears later in the index definition.
400	*
401	* best_penalty[j] is the best penalty we have seen so far for column j,
402	* or -1 when we haven't yet examined column j. Array entries to the
403	* right of the first -1 are undefined.
404	*/
405	best_penalty[`0`] = -`1`;
406
407	/*
408	* If we find a tuple that's exactly as good as the currently best one, we
409	* could use either one. When inserting a lot of tuples with the same or
410	* similar keys, it's preferable to descend down the same path when
411	* possible, as that's more cache-friendly. On the other hand, if all
412	* inserts land on the same leaf page after a split, we're never going to
413	* insert anything to the other half of the split, and will end up using
414	* only 50% of the available space. Distributing the inserts evenly would
415	* lead to better space usage, but that hurts cache-locality during
416	* insertion. To get the best of both worlds, when we find a tuple that's
417	* exactly as good as the previous best, choose randomly whether to stick
418	* to the old best, or use the new one. Once we decide to stick to the
419	* old best, we keep sticking to it for any subsequent equally good tuples
420	* we might find. This favors tuples with low offsets, but still allows
421	* some inserts to go to other equally-good subtrees.
422	*
423	* keep_current_best is -1 if we haven't yet had to make a random choice
424	* whether to keep the current best tuple. If we have done so, and
425	* decided to keep it, keep_current_best is 1; if we've decided to
426	* replace, keep_current_best is 0. (This state will be reset to -1 as
427	* soon as we've made the replacement, but sometimes we make the choice in
428	* advance of actually finding a replacement best tuple.)
429	*/
430	keep_current_best = -`1`;
431
432	/*
433	* Loop over tuples on page.
434	*/
435	maxoff = PageGetMaxOffsetNumber(p);
436	Assert(maxoff >= FirstOffsetNumber);
437
438	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
439	{
440	IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
441	bool zero_penalty;
442	int j;
443
444	zero_penalty = true;
445
446	/ Loop over index attributes. /
447	for (j = `0`; j < IndexRelationGetNumberOfKeyAttributes(r); j++)
448	{
449	Datum datum;
450	float usize;
451	bool IsNull;
452
453	/ Compute penalty for this column. /
454	datum = index_getattr(itup, j + `1`, giststate->leafTupdesc,
455	&IsNull);
456	gistdentryinit(giststate, j, &entry, datum, r, p, i,
457	false, IsNull);
458	usize = gistpenalty(giststate, j, &entry, IsNull,
459	&identry[j], isnull[j]);
460	if (usize > `0`)
461	zero_penalty = false;
462
463	if (best_penalty[j] < `0` \|\| usize < best_penalty[j])
464	{
465	/*
466	* New best penalty for column. Tentatively select this tuple
467	* as the target, and record the best penalty. Then reset the
468	* next column's penalty to "unknown" (and indirectly, the
469	* same for all the ones to its right). This will force us to
470	* adopt this tuple's penalty values as the best for all the
471	* remaining columns during subsequent loop iterations.
472	*/
473	result = i;
474	best_penalty[j] = usize;
475
476	if (j < IndexRelationGetNumberOfKeyAttributes(r) - `1`)
477	best_penalty[j + `1`] = -`1`;
478
479	/ we have new best, so reset keep-it decision /
480	keep_current_best = -`1`;
481	}
482	else if (best_penalty[j] == usize)
483	{
484	/*
485	* The current tuple is exactly as good for this column as the
486	* best tuple seen so far. The next iteration of this loop
487	* will compare the next column.
488	*/
489	}
490	else
491	{
492	/*
493	* The current tuple is worse for this column than the best
494	* tuple seen so far. Skip the remaining columns and move on
495	* to the next tuple, if any.
496	*/
497	zero_penalty = false; / so outer loop won't exit /
498	break;
499	}
500	}
501
502	/*
503	* If we looped past the last column, and did not update "result",
504	* then this tuple is exactly as good as the prior best tuple.
505	*/
506	if (j == IndexRelationGetNumberOfKeyAttributes(r) && result != i)
507	{
508	if (keep_current_best == -`1`)
509	{
510	/ we didn't make the random choice yet for this old best /
511	keep_current_best = (random() <= (MAX_RANDOM_VALUE / `2`)) ? `1` : `0`;
512	}
513	if (keep_current_best == `0`)
514	{
515	/ we choose to use the new tuple /
516	result = i;
517	/ choose again if there are even more exactly-as-good ones /
518	keep_current_best = -`1`;
519	}
520	}
521
522	/*
523	* If we find a tuple with zero penalty for all columns, and we've
524	* decided we don't want to search for another tuple with equal
525	* penalty, there's no need to examine remaining tuples; just break
526	* out of the loop and return it.
527	*/
528	if (zero_penalty)
529	{
530	if (keep_current_best == -`1`)
531	{
532	/ we didn't make the random choice yet for this old best /
533	keep_current_best = (random() <= (MAX_RANDOM_VALUE / `2`)) ? `1` : `0`;
534	}
535	if (keep_current_best == `1`)
536	break;
537	}
538	}
539
540	return result;
541	}
542
543	/*
544	* initialize a GiST entry with a decompressed version of key
545	*/
546	void
547	gistdentryinit(GISTSTATE giststate, int* nkey, GISTENTRY *e,
548	Datum k, Relation r, Page pg, OffsetNumber o,
549	bool l, bool isNull)
550	{
551	if (!isNull)
552	{
553	GISTENTRY *dep;
554
555	gistentryinit(*e, k, r, pg, o, l);
556
557	/ there may not be a decompress function in opclass /
558	if (!OidIsValid(giststate->decompressFn[nkey].fn_oid))
559	return;
560
561	dep = (GISTENTRY *)
562	DatumGetPointer(FunctionCall1Coll(&giststate->decompressFn[nkey],
563	giststate->supportCollation[nkey],
564	PointerGetDatum(e)));
565	/ decompressFn may just return the given pointer /
566	if (dep != e)
567	gistentryinit(*e, dep->key, dep->rel, dep->page, dep->offset,
568	dep->leafkey);
569	}
570	else
571	gistentryinit(*e, (Datum) `0`, r, pg, o, l);
572	}
573
574	IndexTuple
575	gistFormTuple(GISTSTATE *giststate, Relation r,
576	Datum attdata[], bool isnull[], bool isleaf)
577	{
578	Datum compatt[INDEX_MAX_KEYS];
579	int i;
580	IndexTuple res;
581
582	/*
583	* Call the compress method on each attribute.
584	*/
585	for (i = `0`; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
586	{
587	if (isnull[i])
588	compatt[i] = (Datum) `0`;
589	else
590	{
591	GISTENTRY centry;
592	GISTENTRY *cep;
593
594	gistentryinit(centry, attdata[i], r, NULL, (OffsetNumber) `0`,
595	isleaf);
596	/ there may not be a compress function in opclass /
597	if (OidIsValid(giststate->compressFn[i].fn_oid))
598	cep = (GISTENTRY *)
599	DatumGetPointer(FunctionCall1Coll(&giststate->compressFn[i],
600	giststate->supportCollation[i],
601	PointerGetDatum(&centry)));
602	else
603	cep = &centry;
604	compatt[i] = cep->key;
605	}
606	}
607
608	if (isleaf)
609	{
610	/*
611	* Emplace each included attribute if any.
612	*/
613	for (; i < r->rd_att->natts; i++)
614	{
615	if (isnull[i])
616	compatt[i] = (Datum) `0`;
617	else
618	compatt[i] = attdata[i];
619	}
620	}
621
622	res = index_form_tuple(isleaf ? giststate->leafTupdesc :
623	giststate->nonLeafTupdesc,
624	compatt, isnull);
625
626	/*
627	* The offset number on tuples on internal pages is unused. For historical
628	* reasons, it is set to 0xffff.
629	*/
630	ItemPointerSetOffsetNumber(&(res->t_tid), `0xffff`);
631	return res;
632	}
633
634	/*
635	* initialize a GiST entry with fetched value in key field
636	*/
637	static Datum
638	gistFetchAtt(GISTSTATE giststate, int* nkey, Datum k, Relation r)
639	{
640	GISTENTRY fentry;
641	GISTENTRY *fep;
642
643	gistentryinit(fentry, k, r, NULL, (OffsetNumber) `0`, false);
644
645	fep = (GISTENTRY *)
646	DatumGetPointer(FunctionCall1Coll(&giststate->fetchFn[nkey],
647	giststate->supportCollation[nkey],
648	PointerGetDatum(&fentry)));
649
650	/ fetchFn set 'key', return it to the caller /
651	return fep->key;
652	}
653
654	/*
655	* Fetch all keys in tuple.
656	* Returns a new HeapTuple containing the originally-indexed data.
657	*/
658	HeapTuple
659	gistFetchTuple(GISTSTATE *giststate, Relation r, IndexTuple tuple)
660	{
661	MemoryContext oldcxt = MemoryContextSwitchTo(giststate->tempCxt);
662	Datum fetchatt[INDEX_MAX_KEYS];
663	bool isnull[INDEX_MAX_KEYS];
664	int i;
665
666	for (i = `0`; i < IndexRelationGetNumberOfKeyAttributes(r); i++)
667	{
668	Datum datum;
669
670	datum = index_getattr(tuple, i + `1`, giststate->leafTupdesc, &isnull[i]);
671
672	if (giststate->fetchFn[i].fn_oid != InvalidOid)
673	{
674	if (!isnull[i])
675	fetchatt[i] = gistFetchAtt(giststate, i, datum, r);
676	else
677	fetchatt[i] = (Datum) `0`;
678	}
679	else if (giststate->compressFn[i].fn_oid == InvalidOid)
680	{
681	/*
682	* If opclass does not provide compress method that could change
683	* original value, att is necessarily stored in original form.
684	*/
685	if (!isnull[i])
686	fetchatt[i] = datum;
687	else
688	fetchatt[i] = (Datum) `0`;
689	}
690	else
691	{
692	/*
693	* Index-only scans not supported for this column. Since the
694	* planner chose an index-only scan anyway, it is not interested
695	* in this column, and we can replace it with a NULL.
696	*/
697	isnull[i] = true;
698	fetchatt[i] = (Datum) `0`;
699	}
700	}
701
702	/*
703	* Get each included attribute.
704	*/
705	for (; i < r->rd_att->natts; i++)
706	{
707	fetchatt[i] = index_getattr(tuple, i + `1`, giststate->leafTupdesc,
708	&isnull[i]);
709	}
710	MemoryContextSwitchTo(oldcxt);
711
712	return heap_form_tuple(giststate->fetchTupdesc, fetchatt, isnull);
713	}
714
715	float
716	gistpenalty(GISTSTATE giststate, int* attno,
717	GISTENTRY *orig, bool isNullOrig,
718	GISTENTRY *add, bool isNullAdd)
719	{
720	float penalty = `0.0`;
721
722	if (giststate->penaltyFn[attno].fn_strict == false \|\|
723	(isNullOrig == false && isNullAdd == false))
724	{
725	FunctionCall3Coll(&giststate->penaltyFn[attno],
726	giststate->supportCollation[attno],
727	PointerGetDatum(orig),
728	PointerGetDatum(add),
729	PointerGetDatum(&penalty));
730	/ disallow negative or NaN penalty /
731	if (isnan(penalty) \|\| penalty < `0.0`)
732	penalty = `0.0`;
733	}
734	else if (isNullOrig && isNullAdd)
735	penalty = `0.0`;
736	else
737	{
738	/ try to prevent mixing null and non-null values /
739	penalty = get_float4_infinity();
740	}
741
742	return penalty;
743	}
744
745	/*
746	* Initialize a new index page
747	*/
748	void
749	GISTInitBuffer(Buffer b, uint32 f)
750	{
751	GISTPageOpaque opaque;
752	Page page;
753	Size pageSize;
754
755	pageSize = BufferGetPageSize(b);
756	page = BufferGetPage(b);
757	PageInit(page, pageSize, sizeof(GISTPageOpaqueData));
758
759	opaque = GistPageGetOpaque(page);
760	/ page was already zeroed by PageInit, so this is not needed: /
761	/ memset(&(opaque->nsn), 0, sizeof(GistNSN)); /
762	opaque->rightlink = InvalidBlockNumber;
763	opaque->flags = f;
764	opaque->gist_page_id = GIST_PAGE_ID;
765	}
766
767	/*
768	* Verify that a freshly-read page looks sane.
769	*/
770	void
771	gistcheckpage(Relation rel, Buffer buf)
772	{
773	Page page = BufferGetPage(buf);
774
775	/*
776	* ReadBuffer verifies that every newly-read page passes
777	* PageHeaderIsValid, which means it either contains a reasonably sane
778	* page header or is all-zero. We have to defend against the all-zero
779	* case, however.
780	*/
781	if (PageIsNew(page))
782	ereport(ERROR,
783	(errcode(ERRCODE_INDEX_CORRUPTED),
784	errmsg("index \"%s\" contains unexpected zero page at block %u",
785	RelationGetRelationName(rel),
786	BufferGetBlockNumber(buf)),
787	errhint("Please REINDEX it.")));
788
789	/*
790	* Additionally check that the special area looks sane.
791	*/
792	if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GISTPageOpaqueData)))
793	ereport(ERROR,
794	(errcode(ERRCODE_INDEX_CORRUPTED),
795	errmsg("index \"%s\" contains corrupted page at block %u",
796	RelationGetRelationName(rel),
797	BufferGetBlockNumber(buf)),
798	errhint("Please REINDEX it.")));
799	}
800
801
802	/*
803	* Allocate a new page (either by recycling, or by extending the index file)
804	*
805	* The returned buffer is already pinned and exclusive-locked
806	*
807	* Caller is responsible for initializing the page by calling GISTInitBuffer
808	*/
809	Buffer
810	gistNewBuffer(Relation r)
811	{
812	Buffer buffer;
813	bool needLock;
814
815	/ First, try to get a page from FSM /
816	for (;;)
817	{
818	BlockNumber blkno = GetFreeIndexPage(r);
819
820	if (blkno == InvalidBlockNumber)
821	break; / nothing left in FSM /
822
823	buffer = ReadBuffer(r, blkno);
824
825	/*
826	* We have to guard against the possibility that someone else already
827	* recycled this page; the buffer may be locked if so.
828	*/
829	if (ConditionalLockBuffer(buffer))
830	{
831	Page page = BufferGetPage(buffer);
832
833	/*
834	* If the page was never initialized, it's OK to use.
835	*/
836	if (PageIsNew(page))
837	return buffer;
838
839	gistcheckpage(r, buffer);
840
841	/*
842	* Otherwise, recycle it if deleted, and too old to have any
843	* processes interested in it.
844	*/
845	if (gistPageRecyclable(page))
846	{
847	/*
848	* If we are generating WAL for Hot Standby then create a WAL
849	* record that will allow us to conflict with queries running
850	* on standby, in case they have snapshots older than the
851	* page's deleteXid.
852	*/
853	if (XLogStandbyInfoActive() && RelationNeedsWAL(r))
854	gistXLogPageReuse(r, blkno, GistPageGetDeleteXid(page));
855
856	return buffer;
857	}
858
859	LockBuffer(buffer, GIST_UNLOCK);
860	}
861
862	/ Can't use it, so release buffer and try again /
863	ReleaseBuffer(buffer);
864	}
865
866	/ Must extend the file /
867	needLock = !RELATION_IS_LOCAL(r);
868
869	if (needLock)
870	LockRelationForExtension(r, ExclusiveLock);
871
872	buffer = ReadBuffer(r, P_NEW);
873	LockBuffer(buffer, GIST_EXCLUSIVE);
874
875	if (needLock)
876	UnlockRelationForExtension(r, ExclusiveLock);
877
878	return buffer;
879	}
880
881	/ Can this page be recycled yet? /
882	bool
883	gistPageRecyclable(Page page)
884	{
885	if (PageIsNew(page))
886	return true;
887	if (GistPageIsDeleted(page))
888	{
889	/*
890	* The page was deleted, but when? If it was just deleted, a scan
891	* might have seen the downlink to it, and will read the page later.
892	* As long as that can happen, we must keep the deleted page around as
893	* a tombstone.
894	*
895	* Compare the deletion XID with RecentGlobalXmin. If deleteXid <
896	* RecentGlobalXmin, then no scan that's still in progress could have
897	* seen its downlink, and we can recycle it.
898	*/
899	FullTransactionId deletexid_full = GistPageGetDeleteXid(page);
900	FullTransactionId recentxmin_full = GetFullRecentGlobalXmin();
901
902	if (FullTransactionIdPrecedes(deletexid_full, recentxmin_full))
903	return true;
904	}
905	return false;
906	}
907
908	bytea *
909	gistoptions(Datum reloptions, bool validate)
910	{
911	relopt_value *options;
912	GiSTOptions *rdopts;
913	int numoptions;
914	static const relopt_parse_elt tab[] = {
915	{"fillfactor", RELOPT_TYPE_INT, offsetof(GiSTOptions, fillfactor)},
916	{"buffering", RELOPT_TYPE_STRING, offsetof(GiSTOptions, bufferingModeOffset)}
917	};
918
919	options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIST,
920	&numoptions);
921
922	/ if none set, we're done /
923	if (numoptions == `0`)
924	return NULL;
925
926	rdopts = allocateReloptStruct(sizeof(GiSTOptions), options, numoptions);
927
928	fillRelOptions((void ) rdopts, sizeof*(GiSTOptions), options, numoptions,
929	validate, tab, lengthof(tab));
930
931	pfree(options);
932
933	return (bytea *) rdopts;
934	}
935
936	/*
937	* gistproperty() -- Check boolean properties of indexes.
938	*
939	* This is optional for most AMs, but is required for GiST because the core
940	* property code doesn't support AMPROP_DISTANCE_ORDERABLE. We also handle
941	* AMPROP_RETURNABLE here to save opening the rel to call gistcanreturn.
942	*/
943	bool
944	gistproperty(Oid index_oid, int attno,
945	IndexAMProperty prop, const char *propname,
946	bool res, bool isnull)
947	{
948	Oid opclass,
949	opfamily,
950	opcintype;
951	int16 procno;
952
953	/ Only answer column-level inquiries /
954	if (attno == `0`)
955	return false;
956
957	/*
958	* Currently, GiST distance-ordered scans require that there be a distance
959	* function in the opclass with the default types (i.e. the one loaded
960	* into the relcache entry, see initGISTstate). So we assume that if such
961	* a function exists, then there's a reason for it (rather than grubbing
962	* through all the opfamily's operators to find an ordered one).
963	*
964	* Essentially the same code can test whether we support returning the
965	* column data, since that's true if the opclass provides a fetch proc.
966	*/
967
968	switch (prop)
969	{
970	case AMPROP_DISTANCE_ORDERABLE:
971	procno = GIST_DISTANCE_PROC;
972	break;
973	case AMPROP_RETURNABLE:
974	procno = GIST_FETCH_PROC;
975	break;
976	default:
977	return false;
978	}
979
980	/ First we need to know the column's opclass. /
981	opclass = get_index_column_opclass(index_oid, attno);
982	if (!OidIsValid(opclass))
983	{
984	*isnull = true;
985	return true;
986	}
987
988	/ Now look up the opclass family and input datatype. /
989	if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype))
990	{
991	*isnull = true;
992	return true;
993	}
994
995	/ And now we can check whether the function is provided. /
996
997	*res = SearchSysCacheExists4(AMPROCNUM,
998	ObjectIdGetDatum(opfamily),
999	ObjectIdGetDatum(opcintype),
1000	ObjectIdGetDatum(opcintype),
1001	Int16GetDatum(procno));
1002
1003	/*
1004	* Special case: even without a fetch function, AMPROP_RETURNABLE is true
1005	* if the opclass has no compress function.
1006	*/
1007	if (prop == AMPROP_RETURNABLE && !*res)
1008	{
1009	*res = !SearchSysCacheExists4(AMPROCNUM,
1010	ObjectIdGetDatum(opfamily),
1011	ObjectIdGetDatum(opcintype),
1012	ObjectIdGetDatum(opcintype),
1013	Int16GetDatum(GIST_COMPRESS_PROC));
1014	}
1015
1016	*isnull = false;
1017
1018	return true;
1019	}
1020
1021	/*
1022	* Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs
1023	* to detect concurrent page splits anyway. This function provides a fake
1024	* sequence of LSNs for that purpose.
1025	*/
1026	XLogRecPtr
1027	gistGetFakeLSN(Relation rel)
1028	{
1029	static XLogRecPtr counter = FirstNormalUnloggedLSN;
1030
1031	if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
1032	{
1033	/*
1034	* Temporary relations are only accessible in our session, so a simple
1035	* backend-local counter will do.
1036	*/
1037	return counter++;
1038	}
1039	else
1040	{
1041	/*
1042	* Unlogged relations are accessible from other backends, and survive
1043	* (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
1044	*/
1045	Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
1046	return GetFakeLSNForUnloggedRel();
1047	}
1048	}
1049

Browse the source code of PostgreSQL/src/backend/access/gist/gistutil.c