1/*-------------------------------------------------------------------------
2 *
3 * gist_private.h
4 * private declarations for GiST -- declarations related to the
5 * internal implementation of GiST, not the public API
6 *
7 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * src/include/access/gist_private.h
11 *
12 *-------------------------------------------------------------------------
13 */
14#ifndef GIST_PRIVATE_H
15#define GIST_PRIVATE_H
16
17#include "access/amapi.h"
18#include "access/gist.h"
19#include "access/itup.h"
20#include "fmgr.h"
21#include "lib/pairingheap.h"
22#include "storage/bufmgr.h"
23#include "storage/buffile.h"
24#include "utils/hsearch.h"
25#include "access/genam.h"
26
27/*
28 * Maximum number of "halves" a page can be split into in one operation.
29 * Typically a split produces 2 halves, but can be more if keys have very
30 * different lengths, or when inserting multiple keys in one operation (as
31 * when inserting downlinks to an internal node). There is no theoretical
32 * limit on this, but in practice if you get more than a handful page halves
33 * in one split, there's something wrong with the opclass implementation.
34 * GIST_MAX_SPLIT_PAGES is an arbitrary limit on that, used to size some
35 * local arrays used during split. Note that there is also a limit on the
36 * number of buffers that can be held locked at a time, MAX_SIMUL_LWLOCKS,
37 * so if you raise this higher than that limit, you'll just get a different
38 * error.
39 */
40#define GIST_MAX_SPLIT_PAGES 75
41
42/* Buffer lock modes */
43#define GIST_SHARE BUFFER_LOCK_SHARE
44#define GIST_EXCLUSIVE BUFFER_LOCK_EXCLUSIVE
45#define GIST_UNLOCK BUFFER_LOCK_UNLOCK
46
47typedef struct
48{
49 BlockNumber prev;
50 uint32 freespace;
51 char tupledata[FLEXIBLE_ARRAY_MEMBER];
52} GISTNodeBufferPage;
53
54#define BUFFER_PAGE_DATA_OFFSET MAXALIGN(offsetof(GISTNodeBufferPage, tupledata))
55/* Returns free space in node buffer page */
56#define PAGE_FREE_SPACE(nbp) (nbp->freespace)
57/* Checks if node buffer page is empty */
58#define PAGE_IS_EMPTY(nbp) (nbp->freespace == BLCKSZ - BUFFER_PAGE_DATA_OFFSET)
59/* Checks if node buffers page don't contain sufficient space for index tuple */
60#define PAGE_NO_SPACE(nbp, itup) (PAGE_FREE_SPACE(nbp) < \
61 MAXALIGN(IndexTupleSize(itup)))
62
63/*
64 * GISTSTATE: information needed for any GiST index operation
65 *
66 * This struct retains call info for the index's opclass-specific support
67 * functions (per index column), plus the index's tuple descriptor.
68 *
69 * scanCxt holds the GISTSTATE itself as well as any data that lives for the
70 * lifetime of the index operation. We pass this to the support functions
71 * via fn_mcxt, so that they can store scan-lifespan data in it. The
72 * functions are invoked in tempCxt, which is typically short-lifespan
73 * (that is, it's reset after each tuple). However, tempCxt can be the same
74 * as scanCxt if we're not bothering with per-tuple context resets.
75 */
76typedef struct GISTSTATE
77{
78 MemoryContext scanCxt; /* context for scan-lifespan data */
79 MemoryContext tempCxt; /* short-term context for calling functions */
80
81 TupleDesc leafTupdesc; /* index's tuple descriptor */
82 TupleDesc nonLeafTupdesc; /* truncated tuple descriptor for non-leaf
83 * pages */
84 TupleDesc fetchTupdesc; /* tuple descriptor for tuples returned in an
85 * index-only scan */
86
87 FmgrInfo consistentFn[INDEX_MAX_KEYS];
88 FmgrInfo unionFn[INDEX_MAX_KEYS];
89 FmgrInfo compressFn[INDEX_MAX_KEYS];
90 FmgrInfo decompressFn[INDEX_MAX_KEYS];
91 FmgrInfo penaltyFn[INDEX_MAX_KEYS];
92 FmgrInfo picksplitFn[INDEX_MAX_KEYS];
93 FmgrInfo equalFn[INDEX_MAX_KEYS];
94 FmgrInfo distanceFn[INDEX_MAX_KEYS];
95 FmgrInfo fetchFn[INDEX_MAX_KEYS];
96
97 /* Collations to pass to the support functions */
98 Oid supportCollation[INDEX_MAX_KEYS];
99} GISTSTATE;
100
101
102/*
103 * During a GiST index search, we must maintain a queue of unvisited items,
104 * which can be either individual heap tuples or whole index pages. If it
105 * is an ordered search, the unvisited items should be visited in distance
106 * order. Unvisited items at the same distance should be visited in
107 * depth-first order, that is heap items first, then lower index pages, then
108 * upper index pages; this rule avoids doing extra work during a search that
109 * ends early due to LIMIT.
110 *
111 * To perform an ordered search, we use a pairing heap to manage the
112 * distance-order queue. In a non-ordered search (no order-by operators),
113 * we use it to return heap tuples before unvisited index pages, to
114 * ensure depth-first order, but all entries are otherwise considered
115 * equal.
116 */
117
118/* Individual heap tuple to be visited */
119typedef struct GISTSearchHeapItem
120{
121 ItemPointerData heapPtr;
122 bool recheck; /* T if quals must be rechecked */
123 bool recheckDistances; /* T if distances must be rechecked */
124 HeapTuple recontup; /* data reconstructed from the index, used in
125 * index-only scans */
126 OffsetNumber offnum; /* track offset in page to mark tuple as
127 * LP_DEAD */
128} GISTSearchHeapItem;
129
130/* Unvisited item, either index page or heap tuple */
131typedef struct GISTSearchItem
132{
133 pairingheap_node phNode;
134 BlockNumber blkno; /* index page number, or InvalidBlockNumber */
135 union
136 {
137 GistNSN parentlsn; /* parent page's LSN, if index page */
138 /* we must store parentlsn to detect whether a split occurred */
139 GISTSearchHeapItem heap; /* heap info, if heap tuple */
140 } data;
141
142 /* numberOfOrderBys entries */
143 IndexOrderByDistance distances[FLEXIBLE_ARRAY_MEMBER];
144} GISTSearchItem;
145
146#define GISTSearchItemIsHeap(item) ((item).blkno == InvalidBlockNumber)
147
148#define SizeOfGISTSearchItem(n_distances) \
149 (offsetof(GISTSearchItem, distances) + \
150 sizeof(IndexOrderByDistance) * (n_distances))
151
152/*
153 * GISTScanOpaqueData: private state for a scan of a GiST index
154 */
155typedef struct GISTScanOpaqueData
156{
157 GISTSTATE *giststate; /* index information, see above */
158 Oid *orderByTypes; /* datatypes of ORDER BY expressions */
159
160 pairingheap *queue; /* queue of unvisited items */
161 MemoryContext queueCxt; /* context holding the queue */
162 bool qual_ok; /* false if qual can never be satisfied */
163 bool firstCall; /* true until first gistgettuple call */
164
165 /* pre-allocated workspace arrays */
166 IndexOrderByDistance *distances; /* output area for gistindex_keytest */
167
168 /* info about killed items if any (killedItems is NULL if never used) */
169 OffsetNumber *killedItems; /* offset numbers of killed items */
170 int numKilled; /* number of currently stored items */
171 BlockNumber curBlkno; /* current number of block */
172 GistNSN curPageLSN; /* pos in the WAL stream when page was read */
173
174 /* In a non-ordered search, returnable heap items are stored here: */
175 GISTSearchHeapItem pageData[BLCKSZ / sizeof(IndexTupleData)];
176 OffsetNumber nPageData; /* number of valid items in array */
177 OffsetNumber curPageData; /* next item to return */
178 MemoryContext pageDataCxt; /* context holding the fetched tuples, for
179 * index-only scans */
180} GISTScanOpaqueData;
181
182typedef GISTScanOpaqueData *GISTScanOpaque;
183
184/* despite the name, gistxlogPage is not part of any xlog record */
185typedef struct gistxlogPage
186{
187 BlockNumber blkno;
188 int num; /* number of index tuples following */
189} gistxlogPage;
190
191/* SplitedPageLayout - gistSplit function result */
192typedef struct SplitedPageLayout
193{
194 gistxlogPage block;
195 IndexTupleData *list;
196 int lenlist;
197 IndexTuple itup; /* union key for page */
198 Page page; /* to operate */
199 Buffer buffer; /* to write after all proceed */
200
201 struct SplitedPageLayout *next;
202} SplitedPageLayout;
203
204/*
205 * GISTInsertStack used for locking buffers and transfer arguments during
206 * insertion
207 */
208typedef struct GISTInsertStack
209{
210 /* current page */
211 BlockNumber blkno;
212 Buffer buffer;
213 Page page;
214
215 /*
216 * log sequence number from page->lsn to recognize page update and compare
217 * it with page's nsn to recognize page split
218 */
219 GistNSN lsn;
220
221 /*
222 * If set, we split the page while descending the tree to find an
223 * insertion target. It means that we need to retry from the parent,
224 * because the downlink of this page might no longer cover the new key.
225 */
226 bool retry_from_parent;
227
228 /* offset of the downlink in the parent page, that points to this page */
229 OffsetNumber downlinkoffnum;
230
231 /* pointer to parent */
232 struct GISTInsertStack *parent;
233} GISTInsertStack;
234
235/* Working state and results for multi-column split logic in gistsplit.c */
236typedef struct GistSplitVector
237{
238 GIST_SPLITVEC splitVector; /* passed to/from user PickSplit method */
239
240 Datum spl_lattr[INDEX_MAX_KEYS]; /* Union of subkeys in
241 * splitVector.spl_left */
242 bool spl_lisnull[INDEX_MAX_KEYS];
243
244 Datum spl_rattr[INDEX_MAX_KEYS]; /* Union of subkeys in
245 * splitVector.spl_right */
246 bool spl_risnull[INDEX_MAX_KEYS];
247
248 bool *spl_dontcare; /* flags tuples which could go to either side
249 * of the split for zero penalty */
250} GistSplitVector;
251
252typedef struct
253{
254 Relation r;
255 Relation heapRel;
256 Size freespace; /* free space to be left */
257 bool is_build;
258
259 GISTInsertStack *stack;
260} GISTInsertState;
261
262/* root page of a gist index */
263#define GIST_ROOT_BLKNO 0
264
265/*
266 * Before PostgreSQL 9.1, we used to rely on so-called "invalid tuples" on
267 * inner pages to finish crash recovery of incomplete page splits. If a crash
268 * happened in the middle of a page split, so that the downlink pointers were
269 * not yet inserted, crash recovery inserted a special downlink pointer. The
270 * semantics of an invalid tuple was that it if you encounter one in a scan,
271 * it must always be followed, because we don't know if the tuples on the
272 * child page match or not.
273 *
274 * We no longer create such invalid tuples, we now mark the left-half of such
275 * an incomplete split with the F_FOLLOW_RIGHT flag instead, and finish the
276 * split properly the next time we need to insert on that page. To retain
277 * on-disk compatibility for the sake of pg_upgrade, we still store 0xffff as
278 * the offset number of all inner tuples. If we encounter any invalid tuples
279 * with 0xfffe during insertion, we throw an error, though scans still handle
280 * them. You should only encounter invalid tuples if you pg_upgrade a pre-9.1
281 * gist index which already has invalid tuples in it because of a crash. That
282 * should be rare, and you are recommended to REINDEX anyway if you have any
283 * invalid tuples in an index, so throwing an error is as far as we go with
284 * supporting that.
285 */
286#define TUPLE_IS_VALID 0xffff
287#define TUPLE_IS_INVALID 0xfffe
288
289#define GistTupleIsInvalid(itup) ( ItemPointerGetOffsetNumber( &((itup)->t_tid) ) == TUPLE_IS_INVALID )
290#define GistTupleSetValid(itup) ItemPointerSetOffsetNumber( &((itup)->t_tid), TUPLE_IS_VALID )
291
292
293
294
295/*
296 * A buffer attached to an internal node, used when building an index in
297 * buffering mode.
298 */
299typedef struct
300{
301 BlockNumber nodeBlocknum; /* index block # this buffer is for */
302 int32 blocksCount; /* current # of blocks occupied by buffer */
303
304 BlockNumber pageBlocknum; /* temporary file block # */
305 GISTNodeBufferPage *pageBuffer; /* in-memory buffer page */
306
307 /* is this buffer queued for emptying? */
308 bool queuedForEmptying;
309
310 /* is this a temporary copy, not in the hash table? */
311 bool isTemp;
312
313 int level; /* 0 == leaf */
314} GISTNodeBuffer;
315
316/*
317 * Does specified level have buffers? (Beware of multiple evaluation of
318 * arguments.)
319 */
320#define LEVEL_HAS_BUFFERS(nlevel, gfbb) \
321 ((nlevel) != 0 && (nlevel) % (gfbb)->levelStep == 0 && \
322 (nlevel) != (gfbb)->rootlevel)
323
324/* Is specified buffer at least half-filled (should be queued for emptying)? */
325#define BUFFER_HALF_FILLED(nodeBuffer, gfbb) \
326 ((nodeBuffer)->blocksCount > (gfbb)->pagesPerBuffer / 2)
327
328/*
329 * Is specified buffer full? Our buffers can actually grow indefinitely,
330 * beyond the "maximum" size, so this just means whether the buffer has grown
331 * beyond the nominal maximum size.
332 */
333#define BUFFER_OVERFLOWED(nodeBuffer, gfbb) \
334 ((nodeBuffer)->blocksCount > (gfbb)->pagesPerBuffer)
335
336/*
337 * Data structure with general information about build buffers.
338 */
339typedef struct GISTBuildBuffers
340{
341 /* Persistent memory context for the buffers and metadata. */
342 MemoryContext context;
343
344 BufFile *pfile; /* Temporary file to store buffers in */
345 long nFileBlocks; /* Current size of the temporary file */
346
347 /*
348 * resizable array of free blocks.
349 */
350 long *freeBlocks;
351 int nFreeBlocks; /* # of currently free blocks in the array */
352 int freeBlocksLen; /* current allocated length of the array */
353
354 /* Hash for buffers by block number */
355 HTAB *nodeBuffersTab;
356
357 /* List of buffers scheduled for emptying */
358 List *bufferEmptyingQueue;
359
360 /*
361 * Parameters to the buffering build algorithm. levelStep determines which
362 * levels in the tree have buffers, and pagesPerBuffer determines how
363 * large each buffer is.
364 */
365 int levelStep;
366 int pagesPerBuffer;
367
368 /* Array of lists of buffers on each level, for final emptying */
369 List **buffersOnLevels;
370 int buffersOnLevelsLen;
371
372 /*
373 * Dynamically-sized array of buffers that currently have their last page
374 * loaded in main memory.
375 */
376 GISTNodeBuffer **loadedBuffers;
377 int loadedBuffersCount; /* # of entries in loadedBuffers */
378 int loadedBuffersLen; /* allocated size of loadedBuffers */
379
380 /* Level of the current root node (= height of the index tree - 1) */
381 int rootlevel;
382} GISTBuildBuffers;
383
384/*
385 * Storage type for GiST's reloptions
386 */
387typedef struct GiSTOptions
388{
389 int32 vl_len_; /* varlena header (do not touch directly!) */
390 int fillfactor; /* page fill factor in percent (0..100) */
391 int bufferingModeOffset; /* use buffering build? */
392} GiSTOptions;
393
394/* gist.c */
395extern void gistbuildempty(Relation index);
396extern bool gistinsert(Relation r, Datum *values, bool *isnull,
397 ItemPointer ht_ctid, Relation heapRel,
398 IndexUniqueCheck checkUnique,
399 struct IndexInfo *indexInfo);
400extern MemoryContext createTempGistContext(void);
401extern GISTSTATE *initGISTstate(Relation index);
402extern void freeGISTstate(GISTSTATE *giststate);
403extern void gistdoinsert(Relation r,
404 IndexTuple itup,
405 Size freespace,
406 GISTSTATE *GISTstate,
407 Relation heapRel,
408 bool is_build);
409
410/* A List of these is returned from gistplacetopage() in *splitinfo */
411typedef struct
412{
413 Buffer buf; /* the split page "half" */
414 IndexTuple downlink; /* downlink for this half. */
415} GISTPageSplitInfo;
416
417extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
418 Buffer buffer,
419 IndexTuple *itup, int ntup,
420 OffsetNumber oldoffnum, BlockNumber *newblkno,
421 Buffer leftchildbuf,
422 List **splitinfo,
423 bool markleftchild,
424 Relation heapRel,
425 bool is_build);
426
427extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup,
428 int len, GISTSTATE *giststate);
429
430/* gistxlog.c */
431extern XLogRecPtr gistXLogPageDelete(Buffer buffer,
432 FullTransactionId xid, Buffer parentBuffer,
433 OffsetNumber downlinkOffset);
434
435extern void gistXLogPageReuse(Relation rel, BlockNumber blkno,
436 FullTransactionId latestRemovedXid);
437
438extern XLogRecPtr gistXLogUpdate(Buffer buffer,
439 OffsetNumber *todelete, int ntodelete,
440 IndexTuple *itup, int ntup,
441 Buffer leftchild);
442
443extern XLogRecPtr gistXLogDelete(Buffer buffer, OffsetNumber *todelete,
444 int ntodelete, TransactionId latestRemovedXid);
445
446extern XLogRecPtr gistXLogSplit(bool page_is_leaf,
447 SplitedPageLayout *dist,
448 BlockNumber origrlink, GistNSN oldnsn,
449 Buffer leftchild, bool markfollowright);
450
451/* gistget.c */
452extern bool gistgettuple(IndexScanDesc scan, ScanDirection dir);
453extern int64 gistgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
454extern bool gistcanreturn(Relation index, int attno);
455
456/* gistvalidate.c */
457extern bool gistvalidate(Oid opclassoid);
458
459/* gistutil.c */
460
461#define GiSTPageSize \
462 ( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GISTPageOpaqueData)) )
463
464#define GIST_MIN_FILLFACTOR 10
465#define GIST_DEFAULT_FILLFACTOR 90
466
467extern bytea *gistoptions(Datum reloptions, bool validate);
468extern bool gistproperty(Oid index_oid, int attno,
469 IndexAMProperty prop, const char *propname,
470 bool *res, bool *isnull);
471extern bool gistfitpage(IndexTuple *itvec, int len);
472extern bool gistnospace(Page page, IndexTuple *itvec, int len, OffsetNumber todelete, Size freespace);
473extern void gistcheckpage(Relation rel, Buffer buf);
474extern Buffer gistNewBuffer(Relation r);
475extern bool gistPageRecyclable(Page page);
476extern void gistfillbuffer(Page page, IndexTuple *itup, int len,
477 OffsetNumber off);
478extern IndexTuple *gistextractpage(Page page, int *len /* out */ );
479extern IndexTuple *gistjoinvector(IndexTuple *itvec, int *len,
480 IndexTuple *additvec, int addlen);
481extern IndexTupleData *gistfillitupvec(IndexTuple *vec, int veclen, int *memlen);
482
483extern IndexTuple gistunion(Relation r, IndexTuple *itvec,
484 int len, GISTSTATE *giststate);
485extern IndexTuple gistgetadjusted(Relation r,
486 IndexTuple oldtup,
487 IndexTuple addtup,
488 GISTSTATE *giststate);
489extern IndexTuple gistFormTuple(GISTSTATE *giststate,
490 Relation r, Datum *attdata, bool *isnull, bool isleaf);
491
492extern OffsetNumber gistchoose(Relation r, Page p,
493 IndexTuple it,
494 GISTSTATE *giststate);
495
496extern void GISTInitBuffer(Buffer b, uint32 f);
497extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
498 Datum k, Relation r, Page pg, OffsetNumber o,
499 bool l, bool isNull);
500
501extern float gistpenalty(GISTSTATE *giststate, int attno,
502 GISTENTRY *key1, bool isNull1,
503 GISTENTRY *key2, bool isNull2);
504extern void gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, int len,
505 Datum *attr, bool *isnull);
506extern bool gistKeyIsEQ(GISTSTATE *giststate, int attno, Datum a, Datum b);
507extern void gistDeCompressAtt(GISTSTATE *giststate, Relation r, IndexTuple tuple, Page p,
508 OffsetNumber o, GISTENTRY *attdata, bool *isnull);
509extern HeapTuple gistFetchTuple(GISTSTATE *giststate, Relation r,
510 IndexTuple tuple);
511extern void gistMakeUnionKey(GISTSTATE *giststate, int attno,
512 GISTENTRY *entry1, bool isnull1,
513 GISTENTRY *entry2, bool isnull2,
514 Datum *dst, bool *dstisnull);
515
516extern XLogRecPtr gistGetFakeLSN(Relation rel);
517
518/* gistvacuum.c */
519extern IndexBulkDeleteResult *gistbulkdelete(IndexVacuumInfo *info,
520 IndexBulkDeleteResult *stats,
521 IndexBulkDeleteCallback callback,
522 void *callback_state);
523extern IndexBulkDeleteResult *gistvacuumcleanup(IndexVacuumInfo *info,
524 IndexBulkDeleteResult *stats);
525
526/* gistsplit.c */
527extern void gistSplitByKey(Relation r, Page page, IndexTuple *itup,
528 int len, GISTSTATE *giststate,
529 GistSplitVector *v,
530 int attno);
531
532/* gistbuild.c */
533extern IndexBuildResult *gistbuild(Relation heap, Relation index,
534 struct IndexInfo *indexInfo);
535extern void gistValidateBufferingOption(const char *value);
536
537/* gistbuildbuffers.c */
538extern GISTBuildBuffers *gistInitBuildBuffers(int pagesPerBuffer, int levelStep,
539 int maxLevel);
540extern GISTNodeBuffer *gistGetNodeBuffer(GISTBuildBuffers *gfbb,
541 GISTSTATE *giststate,
542 BlockNumber blkno, int level);
543extern void gistPushItupToNodeBuffer(GISTBuildBuffers *gfbb,
544 GISTNodeBuffer *nodeBuffer, IndexTuple item);
545extern bool gistPopItupFromNodeBuffer(GISTBuildBuffers *gfbb,
546 GISTNodeBuffer *nodeBuffer, IndexTuple *item);
547extern void gistFreeBuildBuffers(GISTBuildBuffers *gfbb);
548extern void gistRelocateBuildBuffersOnSplit(GISTBuildBuffers *gfbb,
549 GISTSTATE *giststate, Relation r,
550 int level, Buffer buffer,
551 List *splitinfo);
552extern void gistUnloadNodeBuffers(GISTBuildBuffers *gfbb);
553
554#endif /* GIST_PRIVATE_H */
555