| 1 | /*-------------------------------------------------------------------------- |
| 2 | * ginblock.h |
| 3 | * details of structures stored in GIN index blocks |
| 4 | * |
| 5 | * Copyright (c) 2006-2019, PostgreSQL Global Development Group |
| 6 | * |
| 7 | * src/include/access/ginblock.h |
| 8 | *-------------------------------------------------------------------------- |
| 9 | */ |
| 10 | #ifndef GINBLOCK_H |
| 11 | #define GINBLOCK_H |
| 12 | |
| 13 | #include "access/transam.h" |
| 14 | #include "storage/block.h" |
| 15 | #include "storage/itemptr.h" |
| 16 | #include "storage/off.h" |
| 17 | |
| 18 | /* |
| 19 | * Page opaque data in an inverted index page. |
| 20 | * |
| 21 | * Note: GIN does not include a page ID word as do the other index types. |
| 22 | * This is OK because the opaque data is only 8 bytes and so can be reliably |
| 23 | * distinguished by size. Revisit this if the size ever increases. |
| 24 | * Further note: as of 9.2, SP-GiST also uses 8-byte special space, as does |
| 25 | * BRIN as of 9.5. This is still OK, as long as GIN isn't using all of the |
| 26 | * high-order bits in its flags word, because that way the flags word cannot |
| 27 | * match the page IDs used by SP-GiST and BRIN. |
| 28 | */ |
| 29 | typedef struct GinPageOpaqueData |
| 30 | { |
| 31 | BlockNumber rightlink; /* next page if any */ |
| 32 | OffsetNumber maxoff; /* number of PostingItems on GIN_DATA & |
| 33 | * ~GIN_LEAF page. On GIN_LIST page, number of |
| 34 | * heap tuples. */ |
| 35 | uint16 flags; /* see bit definitions below */ |
| 36 | } GinPageOpaqueData; |
| 37 | |
| 38 | typedef GinPageOpaqueData *GinPageOpaque; |
| 39 | |
| 40 | #define GIN_DATA (1 << 0) |
| 41 | #define GIN_LEAF (1 << 1) |
| 42 | #define GIN_DELETED (1 << 2) |
| 43 | #define GIN_META (1 << 3) |
| 44 | #define GIN_LIST (1 << 4) |
| 45 | #define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */ |
| 46 | #define GIN_INCOMPLETE_SPLIT (1 << 6) /* page was split, but parent not |
| 47 | * updated */ |
| 48 | #define GIN_COMPRESSED (1 << 7) |
| 49 | |
| 50 | /* Page numbers of fixed-location pages */ |
| 51 | #define GIN_METAPAGE_BLKNO (0) |
| 52 | #define GIN_ROOT_BLKNO (1) |
| 53 | |
| 54 | typedef struct GinMetaPageData |
| 55 | { |
| 56 | /* |
| 57 | * Pointers to head and tail of pending list, which consists of GIN_LIST |
| 58 | * pages. These store fast-inserted entries that haven't yet been moved |
| 59 | * into the regular GIN structure. |
| 60 | */ |
| 61 | BlockNumber head; |
| 62 | BlockNumber tail; |
| 63 | |
| 64 | /* |
| 65 | * Free space in bytes in the pending list's tail page. |
| 66 | */ |
| 67 | uint32 tailFreeSize; |
| 68 | |
| 69 | /* |
| 70 | * We store both number of pages and number of heap tuples that are in the |
| 71 | * pending list. |
| 72 | */ |
| 73 | BlockNumber nPendingPages; |
| 74 | int64 nPendingHeapTuples; |
| 75 | |
| 76 | /* |
| 77 | * Statistics for planner use (accurate as of last VACUUM) |
| 78 | */ |
| 79 | BlockNumber nTotalPages; |
| 80 | BlockNumber nEntryPages; |
| 81 | BlockNumber nDataPages; |
| 82 | int64 nEntries; |
| 83 | |
| 84 | /* |
| 85 | * GIN version number (ideally this should have been at the front, but too |
| 86 | * late now. Don't move it!) |
| 87 | * |
| 88 | * Currently 2 (for indexes initialized in 9.4 or later) |
| 89 | * |
| 90 | * Version 1 (indexes initialized in version 9.1, 9.2 or 9.3), is |
| 91 | * compatible, but may contain uncompressed posting tree (leaf) pages and |
| 92 | * posting lists. They will be converted to compressed format when |
| 93 | * modified. |
| 94 | * |
| 95 | * Version 0 (indexes initialized in 9.0 or before) is compatible but may |
| 96 | * be missing null entries, including both null keys and placeholders. |
| 97 | * Reject full-index-scan attempts on such indexes. |
| 98 | */ |
| 99 | int32 ginVersion; |
| 100 | } GinMetaPageData; |
| 101 | |
| 102 | #define GIN_CURRENT_VERSION 2 |
| 103 | |
| 104 | #define GinPageGetMeta(p) \ |
| 105 | ((GinMetaPageData *) PageGetContents(p)) |
| 106 | |
| 107 | /* |
| 108 | * Macros for accessing a GIN index page's opaque data |
| 109 | */ |
| 110 | #define GinPageGetOpaque(page) ( (GinPageOpaque) PageGetSpecialPointer(page) ) |
| 111 | |
| 112 | #define GinPageIsLeaf(page) ( (GinPageGetOpaque(page)->flags & GIN_LEAF) != 0 ) |
| 113 | #define GinPageSetLeaf(page) ( GinPageGetOpaque(page)->flags |= GIN_LEAF ) |
| 114 | #define GinPageSetNonLeaf(page) ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF ) |
| 115 | #define GinPageIsData(page) ( (GinPageGetOpaque(page)->flags & GIN_DATA) != 0 ) |
| 116 | #define GinPageSetData(page) ( GinPageGetOpaque(page)->flags |= GIN_DATA ) |
| 117 | #define GinPageIsList(page) ( (GinPageGetOpaque(page)->flags & GIN_LIST) != 0 ) |
| 118 | #define GinPageSetList(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST ) |
| 119 | #define GinPageHasFullRow(page) ( (GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW) != 0 ) |
| 120 | #define GinPageSetFullRow(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW ) |
| 121 | #define GinPageIsCompressed(page) ( (GinPageGetOpaque(page)->flags & GIN_COMPRESSED) != 0 ) |
| 122 | #define GinPageSetCompressed(page) ( GinPageGetOpaque(page)->flags |= GIN_COMPRESSED ) |
| 123 | |
| 124 | #define GinPageIsDeleted(page) ( (GinPageGetOpaque(page)->flags & GIN_DELETED) != 0 ) |
| 125 | #define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED) |
| 126 | #define GinPageSetNonDeleted(page) ( GinPageGetOpaque(page)->flags &= ~GIN_DELETED) |
| 127 | #define GinPageIsIncompleteSplit(page) ( (GinPageGetOpaque(page)->flags & GIN_INCOMPLETE_SPLIT) != 0 ) |
| 128 | |
| 129 | #define (page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber) |
| 130 | |
| 131 | /* |
| 132 | * We should reclaim deleted page only once every transaction started before |
| 133 | * its deletion is over. |
| 134 | */ |
| 135 | #define GinPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid ) |
| 136 | #define GinPageSetDeleteXid(page, xid) ( ((PageHeader) (page))->pd_prune_xid = xid) |
| 137 | #define GinPageIsRecyclable(page) ( PageIsNew(page) || (GinPageIsDeleted(page) \ |
| 138 | && TransactionIdPrecedes(GinPageGetDeleteXid(page), RecentGlobalXmin))) |
| 139 | |
| 140 | /* |
| 141 | * We use our own ItemPointerGet(BlockNumber|OffsetNumber) |
| 142 | * to avoid Asserts, since sometimes the ip_posid isn't "valid" |
| 143 | */ |
| 144 | #define GinItemPointerGetBlockNumber(pointer) \ |
| 145 | (ItemPointerGetBlockNumberNoCheck(pointer)) |
| 146 | |
| 147 | #define GinItemPointerGetOffsetNumber(pointer) \ |
| 148 | (ItemPointerGetOffsetNumberNoCheck(pointer)) |
| 149 | |
| 150 | #define GinItemPointerSetBlockNumber(pointer, blkno) \ |
| 151 | (ItemPointerSetBlockNumber((pointer), (blkno))) |
| 152 | |
| 153 | #define GinItemPointerSetOffsetNumber(pointer, offnum) \ |
| 154 | (ItemPointerSetOffsetNumber((pointer), (offnum))) |
| 155 | |
| 156 | |
| 157 | /* |
| 158 | * Special-case item pointer values needed by the GIN search logic. |
| 159 | * MIN: sorts less than any valid item pointer |
| 160 | * MAX: sorts greater than any valid item pointer |
| 161 | * LOSSY PAGE: indicates a whole heap page, sorts after normal item |
| 162 | * pointers for that page |
| 163 | * Note that these are all distinguishable from an "invalid" item pointer |
| 164 | * (which is InvalidBlockNumber/0) as well as from all normal item |
| 165 | * pointers (which have item numbers in the range 1..MaxHeapTuplesPerPage). |
| 166 | */ |
| 167 | #define ItemPointerSetMin(p) \ |
| 168 | ItemPointerSet((p), (BlockNumber)0, (OffsetNumber)0) |
| 169 | #define ItemPointerIsMin(p) \ |
| 170 | (GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0 && \ |
| 171 | GinItemPointerGetBlockNumber(p) == (BlockNumber)0) |
| 172 | #define ItemPointerSetMax(p) \ |
| 173 | ItemPointerSet((p), InvalidBlockNumber, (OffsetNumber)0xffff) |
| 174 | #define ItemPointerIsMax(p) \ |
| 175 | (GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \ |
| 176 | GinItemPointerGetBlockNumber(p) == InvalidBlockNumber) |
| 177 | #define ItemPointerSetLossyPage(p, b) \ |
| 178 | ItemPointerSet((p), (b), (OffsetNumber)0xffff) |
| 179 | #define ItemPointerIsLossyPage(p) \ |
| 180 | (GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \ |
| 181 | GinItemPointerGetBlockNumber(p) != InvalidBlockNumber) |
| 182 | |
| 183 | /* |
| 184 | * Posting item in a non-leaf posting-tree page |
| 185 | */ |
| 186 | typedef struct |
| 187 | { |
| 188 | /* We use BlockIdData not BlockNumber to avoid padding space wastage */ |
| 189 | BlockIdData child_blkno; |
| 190 | ItemPointerData key; |
| 191 | } PostingItem; |
| 192 | |
| 193 | #define PostingItemGetBlockNumber(pointer) \ |
| 194 | BlockIdGetBlockNumber(&(pointer)->child_blkno) |
| 195 | |
| 196 | #define PostingItemSetBlockNumber(pointer, blockNumber) \ |
| 197 | BlockIdSet(&((pointer)->child_blkno), (blockNumber)) |
| 198 | |
| 199 | /* |
| 200 | * Category codes to distinguish placeholder nulls from ordinary NULL keys. |
| 201 | * |
| 202 | * The first two code values were chosen to be compatible with the usual usage |
| 203 | * of bool isNull flags. However, casting between bool and GinNullCategory is |
| 204 | * risky because of the possibility of different bit patterns and type sizes, |
| 205 | * so it is no longer done. |
| 206 | * |
| 207 | * GIN_CAT_EMPTY_QUERY is never stored in the index; and notice that it is |
| 208 | * chosen to sort before not after regular key values. |
| 209 | */ |
| 210 | typedef signed char GinNullCategory; |
| 211 | |
| 212 | #define GIN_CAT_NORM_KEY 0 /* normal, non-null key value */ |
| 213 | #define GIN_CAT_NULL_KEY 1 /* null key value */ |
| 214 | #define GIN_CAT_EMPTY_ITEM 2 /* placeholder for zero-key item */ |
| 215 | #define GIN_CAT_NULL_ITEM 3 /* placeholder for null item */ |
| 216 | #define GIN_CAT_EMPTY_QUERY (-1) /* placeholder for full-scan query */ |
| 217 | |
| 218 | /* |
| 219 | * Access macros for null category byte in entry tuples |
| 220 | */ |
| 221 | #define GinCategoryOffset(itup,ginstate) \ |
| 222 | (IndexInfoFindDataOffset((itup)->t_info) + \ |
| 223 | ((ginstate)->oneCol ? 0 : sizeof(int16))) |
| 224 | #define GinGetNullCategory(itup,ginstate) \ |
| 225 | (*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate)))) |
| 226 | #define GinSetNullCategory(itup,ginstate,c) \ |
| 227 | (*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate))) = (c)) |
| 228 | |
| 229 | /* |
| 230 | * Access macros for leaf-page entry tuples (see discussion in README) |
| 231 | */ |
| 232 | #define GinGetNPosting(itup) GinItemPointerGetOffsetNumber(&(itup)->t_tid) |
| 233 | #define GinSetNPosting(itup,n) ItemPointerSetOffsetNumber(&(itup)->t_tid,n) |
| 234 | #define GIN_TREE_POSTING ((OffsetNumber)0xffff) |
| 235 | #define GinIsPostingTree(itup) (GinGetNPosting(itup) == GIN_TREE_POSTING) |
| 236 | #define GinSetPostingTree(itup, blkno) ( GinSetNPosting((itup),GIN_TREE_POSTING), ItemPointerSetBlockNumber(&(itup)->t_tid, blkno) ) |
| 237 | #define GinGetPostingTree(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid) |
| 238 | |
| 239 | #define GIN_ITUP_COMPRESSED (1U << 31) |
| 240 | #define GinGetPostingOffset(itup) (GinItemPointerGetBlockNumber(&(itup)->t_tid) & (~GIN_ITUP_COMPRESSED)) |
| 241 | #define GinSetPostingOffset(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,(n)|GIN_ITUP_COMPRESSED) |
| 242 | #define GinGetPosting(itup) ((Pointer) ((char*)(itup) + GinGetPostingOffset(itup))) |
| 243 | #define GinItupIsCompressed(itup) ((GinItemPointerGetBlockNumber(&(itup)->t_tid) & GIN_ITUP_COMPRESSED) != 0) |
| 244 | |
| 245 | /* |
| 246 | * Maximum size of an item on entry tree page. Make sure that we fit at least |
| 247 | * three items on each page. (On regular B-tree indexes, we must fit at least |
| 248 | * three items: two data items and the "high key". In GIN entry tree, we don't |
| 249 | * currently store the high key explicitly, we just use the rightmost item on |
| 250 | * the page, so it would actually be enough to fit two items.) |
| 251 | */ |
| 252 | #define GinMaxItemSize \ |
| 253 | Min(INDEX_SIZE_MASK, \ |
| 254 | MAXALIGN_DOWN(((BLCKSZ - \ |
| 255 | MAXALIGN(SizeOfPageHeaderData + 3 * sizeof(ItemIdData)) - \ |
| 256 | MAXALIGN(sizeof(GinPageOpaqueData))) / 3))) |
| 257 | |
| 258 | /* |
| 259 | * Access macros for non-leaf entry tuples |
| 260 | */ |
| 261 | #define GinGetDownlink(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid) |
| 262 | #define GinSetDownlink(itup,blkno) ItemPointerSet(&(itup)->t_tid, blkno, InvalidOffsetNumber) |
| 263 | |
| 264 | |
| 265 | /* |
| 266 | * Data (posting tree) pages |
| 267 | * |
| 268 | * Posting tree pages don't store regular tuples. Non-leaf pages contain |
| 269 | * PostingItems, which are pairs of ItemPointers and child block numbers. |
| 270 | * Leaf pages contain GinPostingLists and an uncompressed array of item |
| 271 | * pointers. |
| 272 | * |
| 273 | * In a leaf page, the compressed posting lists are stored after the regular |
| 274 | * page header, one after each other. Although we don't store regular tuples, |
| 275 | * pd_lower is used to indicate the end of the posting lists. After that, free |
| 276 | * space follows. This layout is compatible with the "standard" heap and |
| 277 | * index page layout described in bufpage.h, so that we can e.g set buffer_std |
| 278 | * when writing WAL records. |
| 279 | * |
| 280 | * In the special space is the GinPageOpaque struct. |
| 281 | */ |
| 282 | #define GinDataLeafPageGetPostingList(page) \ |
| 283 | (GinPostingList *) ((PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData)))) |
| 284 | #define GinDataLeafPageGetPostingListSize(page) \ |
| 285 | (((PageHeader) page)->pd_lower - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(ItemPointerData))) |
| 286 | |
| 287 | #define GinDataLeafPageIsEmpty(page) \ |
| 288 | (GinPageIsCompressed(page) ? (GinDataLeafPageGetPostingListSize(page) == 0) : (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber)) |
| 289 | |
| 290 | #define GinDataLeafPageGetFreeSpace(page) PageGetExactFreeSpace(page) |
| 291 | |
| 292 | #define GinDataPageGetRightBound(page) ((ItemPointer) PageGetContents(page)) |
| 293 | /* |
| 294 | * Pointer to the data portion of a posting tree page. For internal pages, |
| 295 | * that's the beginning of the array of PostingItems. For compressed leaf |
| 296 | * pages, the first compressed posting list. For uncompressed (pre-9.4) leaf |
| 297 | * pages, it's the beginning of the ItemPointer array. |
| 298 | */ |
| 299 | #define GinDataPageGetData(page) \ |
| 300 | (PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData))) |
| 301 | /* non-leaf pages contain PostingItems */ |
| 302 | #define GinDataPageGetPostingItem(page, i) \ |
| 303 | ((PostingItem *) (GinDataPageGetData(page) + ((i)-1) * sizeof(PostingItem))) |
| 304 | |
| 305 | /* |
| 306 | * Note: there is no GinDataPageGetDataSize macro, because before version |
| 307 | * 9.4, we didn't set pd_lower on data pages. There can be pages in the index |
| 308 | * that were binary-upgraded from earlier versions and still have an invalid |
| 309 | * pd_lower, so we cannot trust it in general. Compressed posting tree leaf |
| 310 | * pages are new in 9.4, however, so we can trust them; see |
| 311 | * GinDataLeafPageGetPostingListSize. |
| 312 | */ |
| 313 | #define GinDataPageSetDataSize(page, size) \ |
| 314 | { \ |
| 315 | Assert(size <= GinDataPageMaxDataSize); \ |
| 316 | ((PageHeader) page)->pd_lower = (size) + MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(ItemPointerData)); \ |
| 317 | } |
| 318 | |
| 319 | #define GinNonLeafDataPageGetFreeSpace(page) \ |
| 320 | (GinDataPageMaxDataSize - \ |
| 321 | GinPageGetOpaque(page)->maxoff * sizeof(PostingItem)) |
| 322 | |
| 323 | #define GinDataPageMaxDataSize \ |
| 324 | (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \ |
| 325 | - MAXALIGN(sizeof(ItemPointerData)) \ |
| 326 | - MAXALIGN(sizeof(GinPageOpaqueData))) |
| 327 | |
| 328 | /* |
| 329 | * List pages |
| 330 | */ |
| 331 | #define GinListPageSize \ |
| 332 | ( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GinPageOpaqueData)) ) |
| 333 | |
| 334 | /* |
| 335 | * A compressed posting list. |
| 336 | * |
| 337 | * Note: This requires 2-byte alignment. |
| 338 | */ |
| 339 | typedef struct |
| 340 | { |
| 341 | ItemPointerData first; /* first item in this posting list (unpacked) */ |
| 342 | uint16 nbytes; /* number of bytes that follow */ |
| 343 | unsigned char bytes[FLEXIBLE_ARRAY_MEMBER]; /* varbyte encoded items */ |
| 344 | } GinPostingList; |
| 345 | |
| 346 | #define SizeOfGinPostingList(plist) (offsetof(GinPostingList, bytes) + SHORTALIGN((plist)->nbytes) ) |
| 347 | #define GinNextPostingListSegment(cur) ((GinPostingList *) (((char *) (cur)) + SizeOfGinPostingList((cur)))) |
| 348 | |
| 349 | #endif /* GINBLOCK_H */ |
| 350 | |