| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * spgutils.c |
| 4 | * various support functions for SP-GiST |
| 5 | * |
| 6 | * |
| 7 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 8 | * Portions Copyright (c) 1994, Regents of the University of California |
| 9 | * |
| 10 | * IDENTIFICATION |
| 11 | * src/backend/access/spgist/spgutils.c |
| 12 | * |
| 13 | *------------------------------------------------------------------------- |
| 14 | */ |
| 15 | |
| 16 | #include "postgres.h" |
| 17 | |
| 18 | #include "access/amvalidate.h" |
| 19 | #include "access/htup_details.h" |
| 20 | #include "access/reloptions.h" |
| 21 | #include "access/spgist_private.h" |
| 22 | #include "access/transam.h" |
| 23 | #include "access/xact.h" |
| 24 | #include "catalog/pg_amop.h" |
| 25 | #include "storage/bufmgr.h" |
| 26 | #include "storage/indexfsm.h" |
| 27 | #include "storage/lmgr.h" |
| 28 | #include "utils/builtins.h" |
| 29 | #include "utils/catcache.h" |
| 30 | #include "utils/index_selfuncs.h" |
| 31 | #include "utils/lsyscache.h" |
| 32 | #include "utils/syscache.h" |
| 33 | |
| 34 | |
| 35 | /* |
| 36 | * SP-GiST handler function: return IndexAmRoutine with access method parameters |
| 37 | * and callbacks. |
| 38 | */ |
| 39 | Datum |
| 40 | spghandler(PG_FUNCTION_ARGS) |
| 41 | { |
| 42 | IndexAmRoutine *amroutine = makeNode(IndexAmRoutine); |
| 43 | |
| 44 | amroutine->amstrategies = 0; |
| 45 | amroutine->amsupport = SPGISTNProc; |
| 46 | amroutine->amcanorder = false; |
| 47 | amroutine->amcanorderbyop = true; |
| 48 | amroutine->amcanbackward = false; |
| 49 | amroutine->amcanunique = false; |
| 50 | amroutine->amcanmulticol = false; |
| 51 | amroutine->amoptionalkey = true; |
| 52 | amroutine->amsearcharray = false; |
| 53 | amroutine->amsearchnulls = true; |
| 54 | amroutine->amstorage = false; |
| 55 | amroutine->amclusterable = false; |
| 56 | amroutine->ampredlocks = false; |
| 57 | amroutine->amcanparallel = false; |
| 58 | amroutine->amcaninclude = false; |
| 59 | amroutine->amkeytype = InvalidOid; |
| 60 | |
| 61 | amroutine->ambuild = spgbuild; |
| 62 | amroutine->ambuildempty = spgbuildempty; |
| 63 | amroutine->aminsert = spginsert; |
| 64 | amroutine->ambulkdelete = spgbulkdelete; |
| 65 | amroutine->amvacuumcleanup = spgvacuumcleanup; |
| 66 | amroutine->amcanreturn = spgcanreturn; |
| 67 | amroutine->amcostestimate = spgcostestimate; |
| 68 | amroutine->amoptions = spgoptions; |
| 69 | amroutine->amproperty = spgproperty; |
| 70 | amroutine->ambuildphasename = NULL; |
| 71 | amroutine->amvalidate = spgvalidate; |
| 72 | amroutine->ambeginscan = spgbeginscan; |
| 73 | amroutine->amrescan = spgrescan; |
| 74 | amroutine->amgettuple = spggettuple; |
| 75 | amroutine->amgetbitmap = spggetbitmap; |
| 76 | amroutine->amendscan = spgendscan; |
| 77 | amroutine->ammarkpos = NULL; |
| 78 | amroutine->amrestrpos = NULL; |
| 79 | amroutine->amestimateparallelscan = NULL; |
| 80 | amroutine->aminitparallelscan = NULL; |
| 81 | amroutine->amparallelrescan = NULL; |
| 82 | |
| 83 | PG_RETURN_POINTER(amroutine); |
| 84 | } |
| 85 | |
| 86 | /* Fill in a SpGistTypeDesc struct with info about the specified data type */ |
| 87 | static void |
| 88 | fillTypeDesc(SpGistTypeDesc *desc, Oid type) |
| 89 | { |
| 90 | desc->type = type; |
| 91 | get_typlenbyval(type, &desc->attlen, &desc->attbyval); |
| 92 | } |
| 93 | |
| 94 | /* |
| 95 | * Fetch local cache of AM-specific info about the index, initializing it |
| 96 | * if necessary |
| 97 | */ |
| 98 | SpGistCache * |
| 99 | spgGetCache(Relation index) |
| 100 | { |
| 101 | SpGistCache *cache; |
| 102 | |
| 103 | if (index->rd_amcache == NULL) |
| 104 | { |
| 105 | Oid atttype; |
| 106 | spgConfigIn in; |
| 107 | FmgrInfo *procinfo; |
| 108 | Buffer metabuffer; |
| 109 | SpGistMetaPageData *metadata; |
| 110 | |
| 111 | cache = MemoryContextAllocZero(index->rd_indexcxt, |
| 112 | sizeof(SpGistCache)); |
| 113 | |
| 114 | /* SPGiST doesn't support multi-column indexes */ |
| 115 | Assert(index->rd_att->natts == 1); |
| 116 | |
| 117 | /* |
| 118 | * Get the actual data type of the indexed column from the index |
| 119 | * tupdesc. We pass this to the opclass config function so that |
| 120 | * polymorphic opclasses are possible. |
| 121 | */ |
| 122 | atttype = TupleDescAttr(index->rd_att, 0)->atttypid; |
| 123 | |
| 124 | /* Call the config function to get config info for the opclass */ |
| 125 | in.attType = atttype; |
| 126 | |
| 127 | procinfo = index_getprocinfo(index, 1, SPGIST_CONFIG_PROC); |
| 128 | FunctionCall2Coll(procinfo, |
| 129 | index->rd_indcollation[0], |
| 130 | PointerGetDatum(&in), |
| 131 | PointerGetDatum(&cache->config)); |
| 132 | |
| 133 | /* Get the information we need about each relevant datatype */ |
| 134 | fillTypeDesc(&cache->attType, atttype); |
| 135 | |
| 136 | if (OidIsValid(cache->config.leafType) && |
| 137 | cache->config.leafType != atttype) |
| 138 | { |
| 139 | if (!OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC))) |
| 140 | ereport(ERROR, |
| 141 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| 142 | errmsg("compress method must be defined when leaf type is different from input type" ))); |
| 143 | |
| 144 | fillTypeDesc(&cache->attLeafType, cache->config.leafType); |
| 145 | } |
| 146 | else |
| 147 | { |
| 148 | cache->attLeafType = cache->attType; |
| 149 | } |
| 150 | |
| 151 | fillTypeDesc(&cache->attPrefixType, cache->config.prefixType); |
| 152 | fillTypeDesc(&cache->attLabelType, cache->config.labelType); |
| 153 | |
| 154 | /* Last, get the lastUsedPages data from the metapage */ |
| 155 | metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO); |
| 156 | LockBuffer(metabuffer, BUFFER_LOCK_SHARE); |
| 157 | |
| 158 | metadata = SpGistPageGetMeta(BufferGetPage(metabuffer)); |
| 159 | |
| 160 | if (metadata->magicNumber != SPGIST_MAGIC_NUMBER) |
| 161 | elog(ERROR, "index \"%s\" is not an SP-GiST index" , |
| 162 | RelationGetRelationName(index)); |
| 163 | |
| 164 | cache->lastUsedPages = metadata->lastUsedPages; |
| 165 | |
| 166 | UnlockReleaseBuffer(metabuffer); |
| 167 | |
| 168 | index->rd_amcache = (void *) cache; |
| 169 | } |
| 170 | else |
| 171 | { |
| 172 | /* assume it's up to date */ |
| 173 | cache = (SpGistCache *) index->rd_amcache; |
| 174 | } |
| 175 | |
| 176 | return cache; |
| 177 | } |
| 178 | |
| 179 | /* Initialize SpGistState for working with the given index */ |
| 180 | void |
| 181 | initSpGistState(SpGistState *state, Relation index) |
| 182 | { |
| 183 | SpGistCache *cache; |
| 184 | |
| 185 | /* Get cached static information about index */ |
| 186 | cache = spgGetCache(index); |
| 187 | |
| 188 | state->config = cache->config; |
| 189 | state->attType = cache->attType; |
| 190 | state->attLeafType = cache->attLeafType; |
| 191 | state->attPrefixType = cache->attPrefixType; |
| 192 | state->attLabelType = cache->attLabelType; |
| 193 | |
| 194 | /* Make workspace for constructing dead tuples */ |
| 195 | state->deadTupleStorage = palloc0(SGDTSIZE); |
| 196 | |
| 197 | /* Set XID to use in redirection tuples */ |
| 198 | state->myXid = GetTopTransactionIdIfAny(); |
| 199 | |
| 200 | /* Assume we're not in an index build (spgbuild will override) */ |
| 201 | state->isBuild = false; |
| 202 | } |
| 203 | |
| 204 | /* |
| 205 | * Allocate a new page (either by recycling, or by extending the index file). |
| 206 | * |
| 207 | * The returned buffer is already pinned and exclusive-locked. |
| 208 | * Caller is responsible for initializing the page by calling SpGistInitBuffer. |
| 209 | */ |
| 210 | Buffer |
| 211 | SpGistNewBuffer(Relation index) |
| 212 | { |
| 213 | Buffer buffer; |
| 214 | bool needLock; |
| 215 | |
| 216 | /* First, try to get a page from FSM */ |
| 217 | for (;;) |
| 218 | { |
| 219 | BlockNumber blkno = GetFreeIndexPage(index); |
| 220 | |
| 221 | if (blkno == InvalidBlockNumber) |
| 222 | break; /* nothing known to FSM */ |
| 223 | |
| 224 | /* |
| 225 | * The fixed pages shouldn't ever be listed in FSM, but just in case |
| 226 | * one is, ignore it. |
| 227 | */ |
| 228 | if (SpGistBlockIsFixed(blkno)) |
| 229 | continue; |
| 230 | |
| 231 | buffer = ReadBuffer(index, blkno); |
| 232 | |
| 233 | /* |
| 234 | * We have to guard against the possibility that someone else already |
| 235 | * recycled this page; the buffer may be locked if so. |
| 236 | */ |
| 237 | if (ConditionalLockBuffer(buffer)) |
| 238 | { |
| 239 | Page page = BufferGetPage(buffer); |
| 240 | |
| 241 | if (PageIsNew(page)) |
| 242 | return buffer; /* OK to use, if never initialized */ |
| 243 | |
| 244 | if (SpGistPageIsDeleted(page) || PageIsEmpty(page)) |
| 245 | return buffer; /* OK to use */ |
| 246 | |
| 247 | LockBuffer(buffer, BUFFER_LOCK_UNLOCK); |
| 248 | } |
| 249 | |
| 250 | /* Can't use it, so release buffer and try again */ |
| 251 | ReleaseBuffer(buffer); |
| 252 | } |
| 253 | |
| 254 | /* Must extend the file */ |
| 255 | needLock = !RELATION_IS_LOCAL(index); |
| 256 | if (needLock) |
| 257 | LockRelationForExtension(index, ExclusiveLock); |
| 258 | |
| 259 | buffer = ReadBuffer(index, P_NEW); |
| 260 | LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); |
| 261 | |
| 262 | if (needLock) |
| 263 | UnlockRelationForExtension(index, ExclusiveLock); |
| 264 | |
| 265 | return buffer; |
| 266 | } |
| 267 | |
| 268 | /* |
| 269 | * Update index metapage's lastUsedPages info from local cache, if possible |
| 270 | * |
| 271 | * Updating meta page isn't critical for index working, so |
| 272 | * 1 use ConditionalLockBuffer to improve concurrency |
| 273 | * 2 don't WAL-log metabuffer changes to decrease WAL traffic |
| 274 | */ |
| 275 | void |
| 276 | SpGistUpdateMetaPage(Relation index) |
| 277 | { |
| 278 | SpGistCache *cache = (SpGistCache *) index->rd_amcache; |
| 279 | |
| 280 | if (cache != NULL) |
| 281 | { |
| 282 | Buffer metabuffer; |
| 283 | |
| 284 | metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO); |
| 285 | |
| 286 | if (ConditionalLockBuffer(metabuffer)) |
| 287 | { |
| 288 | Page metapage = BufferGetPage(metabuffer); |
| 289 | SpGistMetaPageData *metadata = SpGistPageGetMeta(metapage); |
| 290 | |
| 291 | metadata->lastUsedPages = cache->lastUsedPages; |
| 292 | |
| 293 | /* |
| 294 | * Set pd_lower just past the end of the metadata. This is |
| 295 | * essential, because without doing so, metadata will be lost if |
| 296 | * xlog.c compresses the page. (We must do this here because |
| 297 | * pre-v11 versions of PG did not set the metapage's pd_lower |
| 298 | * correctly, so a pg_upgraded index might contain the wrong |
| 299 | * value.) |
| 300 | */ |
| 301 | ((PageHeader) metapage)->pd_lower = |
| 302 | ((char *) metadata + sizeof(SpGistMetaPageData)) - (char *) metapage; |
| 303 | |
| 304 | MarkBufferDirty(metabuffer); |
| 305 | UnlockReleaseBuffer(metabuffer); |
| 306 | } |
| 307 | else |
| 308 | { |
| 309 | ReleaseBuffer(metabuffer); |
| 310 | } |
| 311 | } |
| 312 | } |
| 313 | |
| 314 | /* Macro to select proper element of lastUsedPages cache depending on flags */ |
| 315 | /* Masking flags with SPGIST_CACHED_PAGES is just for paranoia's sake */ |
| 316 | #define GET_LUP(c, f) (&(c)->lastUsedPages.cachedPage[((unsigned int) (f)) % SPGIST_CACHED_PAGES]) |
| 317 | |
| 318 | /* |
| 319 | * Allocate and initialize a new buffer of the type and parity specified by |
| 320 | * flags. The returned buffer is already pinned and exclusive-locked. |
| 321 | * |
| 322 | * When requesting an inner page, if we get one with the wrong parity, |
| 323 | * we just release the buffer and try again. We will get a different page |
| 324 | * because GetFreeIndexPage will have marked the page used in FSM. The page |
| 325 | * is entered in our local lastUsedPages cache, so there's some hope of |
| 326 | * making use of it later in this session, but otherwise we rely on VACUUM |
| 327 | * to eventually re-enter the page in FSM, making it available for recycling. |
| 328 | * Note that such a page does not get marked dirty here, so unless it's used |
| 329 | * fairly soon, the buffer will just get discarded and the page will remain |
| 330 | * as it was on disk. |
| 331 | * |
| 332 | * When we return a buffer to the caller, the page is *not* entered into |
| 333 | * the lastUsedPages cache; we expect the caller will do so after it's taken |
| 334 | * whatever space it will use. This is because after the caller has used up |
| 335 | * some space, the page might have less space than whatever was cached already |
| 336 | * so we'd rather not trash the old cache entry. |
| 337 | */ |
| 338 | static Buffer |
| 339 | allocNewBuffer(Relation index, int flags) |
| 340 | { |
| 341 | SpGistCache *cache = spgGetCache(index); |
| 342 | uint16 pageflags = 0; |
| 343 | |
| 344 | if (GBUF_REQ_LEAF(flags)) |
| 345 | pageflags |= SPGIST_LEAF; |
| 346 | if (GBUF_REQ_NULLS(flags)) |
| 347 | pageflags |= SPGIST_NULLS; |
| 348 | |
| 349 | for (;;) |
| 350 | { |
| 351 | Buffer buffer; |
| 352 | |
| 353 | buffer = SpGistNewBuffer(index); |
| 354 | SpGistInitBuffer(buffer, pageflags); |
| 355 | |
| 356 | if (pageflags & SPGIST_LEAF) |
| 357 | { |
| 358 | /* Leaf pages have no parity concerns, so just use it */ |
| 359 | return buffer; |
| 360 | } |
| 361 | else |
| 362 | { |
| 363 | BlockNumber blkno = BufferGetBlockNumber(buffer); |
| 364 | int blkFlags = GBUF_INNER_PARITY(blkno); |
| 365 | |
| 366 | if ((flags & GBUF_PARITY_MASK) == blkFlags) |
| 367 | { |
| 368 | /* Page has right parity, use it */ |
| 369 | return buffer; |
| 370 | } |
| 371 | else |
| 372 | { |
| 373 | /* Page has wrong parity, record it in cache and try again */ |
| 374 | if (pageflags & SPGIST_NULLS) |
| 375 | blkFlags |= GBUF_NULLS; |
| 376 | cache->lastUsedPages.cachedPage[blkFlags].blkno = blkno; |
| 377 | cache->lastUsedPages.cachedPage[blkFlags].freeSpace = |
| 378 | PageGetExactFreeSpace(BufferGetPage(buffer)); |
| 379 | UnlockReleaseBuffer(buffer); |
| 380 | } |
| 381 | } |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | /* |
| 386 | * Get a buffer of the type and parity specified by flags, having at least |
| 387 | * as much free space as indicated by needSpace. We use the lastUsedPages |
| 388 | * cache to assign the same buffer previously requested when possible. |
| 389 | * The returned buffer is already pinned and exclusive-locked. |
| 390 | * |
| 391 | * *isNew is set true if the page was initialized here, false if it was |
| 392 | * already valid. |
| 393 | */ |
| 394 | Buffer |
| 395 | SpGistGetBuffer(Relation index, int flags, int needSpace, bool *isNew) |
| 396 | { |
| 397 | SpGistCache *cache = spgGetCache(index); |
| 398 | SpGistLastUsedPage *lup; |
| 399 | |
| 400 | /* Bail out if even an empty page wouldn't meet the demand */ |
| 401 | if (needSpace > SPGIST_PAGE_CAPACITY) |
| 402 | elog(ERROR, "desired SPGiST tuple size is too big" ); |
| 403 | |
| 404 | /* |
| 405 | * If possible, increase the space request to include relation's |
| 406 | * fillfactor. This ensures that when we add unrelated tuples to a page, |
| 407 | * we try to keep 100-fillfactor% available for adding tuples that are |
| 408 | * related to the ones already on it. But fillfactor mustn't cause an |
| 409 | * error for requests that would otherwise be legal. |
| 410 | */ |
| 411 | needSpace += RelationGetTargetPageFreeSpace(index, |
| 412 | SPGIST_DEFAULT_FILLFACTOR); |
| 413 | needSpace = Min(needSpace, SPGIST_PAGE_CAPACITY); |
| 414 | |
| 415 | /* Get the cache entry for this flags setting */ |
| 416 | lup = GET_LUP(cache, flags); |
| 417 | |
| 418 | /* If we have nothing cached, just turn it over to allocNewBuffer */ |
| 419 | if (lup->blkno == InvalidBlockNumber) |
| 420 | { |
| 421 | *isNew = true; |
| 422 | return allocNewBuffer(index, flags); |
| 423 | } |
| 424 | |
| 425 | /* fixed pages should never be in cache */ |
| 426 | Assert(!SpGistBlockIsFixed(lup->blkno)); |
| 427 | |
| 428 | /* If cached freeSpace isn't enough, don't bother looking at the page */ |
| 429 | if (lup->freeSpace >= needSpace) |
| 430 | { |
| 431 | Buffer buffer; |
| 432 | Page page; |
| 433 | |
| 434 | buffer = ReadBuffer(index, lup->blkno); |
| 435 | |
| 436 | if (!ConditionalLockBuffer(buffer)) |
| 437 | { |
| 438 | /* |
| 439 | * buffer is locked by another process, so return a new buffer |
| 440 | */ |
| 441 | ReleaseBuffer(buffer); |
| 442 | *isNew = true; |
| 443 | return allocNewBuffer(index, flags); |
| 444 | } |
| 445 | |
| 446 | page = BufferGetPage(buffer); |
| 447 | |
| 448 | if (PageIsNew(page) || SpGistPageIsDeleted(page) || PageIsEmpty(page)) |
| 449 | { |
| 450 | /* OK to initialize the page */ |
| 451 | uint16 pageflags = 0; |
| 452 | |
| 453 | if (GBUF_REQ_LEAF(flags)) |
| 454 | pageflags |= SPGIST_LEAF; |
| 455 | if (GBUF_REQ_NULLS(flags)) |
| 456 | pageflags |= SPGIST_NULLS; |
| 457 | SpGistInitBuffer(buffer, pageflags); |
| 458 | lup->freeSpace = PageGetExactFreeSpace(page) - needSpace; |
| 459 | *isNew = true; |
| 460 | return buffer; |
| 461 | } |
| 462 | |
| 463 | /* |
| 464 | * Check that page is of right type and has enough space. We must |
| 465 | * recheck this since our cache isn't necessarily up to date. |
| 466 | */ |
| 467 | if ((GBUF_REQ_LEAF(flags) ? SpGistPageIsLeaf(page) : !SpGistPageIsLeaf(page)) && |
| 468 | (GBUF_REQ_NULLS(flags) ? SpGistPageStoresNulls(page) : !SpGistPageStoresNulls(page))) |
| 469 | { |
| 470 | int freeSpace = PageGetExactFreeSpace(page); |
| 471 | |
| 472 | if (freeSpace >= needSpace) |
| 473 | { |
| 474 | /* Success, update freespace info and return the buffer */ |
| 475 | lup->freeSpace = freeSpace - needSpace; |
| 476 | *isNew = false; |
| 477 | return buffer; |
| 478 | } |
| 479 | } |
| 480 | |
| 481 | /* |
| 482 | * fallback to allocation of new buffer |
| 483 | */ |
| 484 | UnlockReleaseBuffer(buffer); |
| 485 | } |
| 486 | |
| 487 | /* No success with cache, so return a new buffer */ |
| 488 | *isNew = true; |
| 489 | return allocNewBuffer(index, flags); |
| 490 | } |
| 491 | |
| 492 | /* |
| 493 | * Update lastUsedPages cache when done modifying a page. |
| 494 | * |
| 495 | * We update the appropriate cache entry if it already contained this page |
| 496 | * (its freeSpace is likely obsolete), or if this page has more space than |
| 497 | * whatever we had cached. |
| 498 | */ |
| 499 | void |
| 500 | SpGistSetLastUsedPage(Relation index, Buffer buffer) |
| 501 | { |
| 502 | SpGistCache *cache = spgGetCache(index); |
| 503 | SpGistLastUsedPage *lup; |
| 504 | int freeSpace; |
| 505 | Page page = BufferGetPage(buffer); |
| 506 | BlockNumber blkno = BufferGetBlockNumber(buffer); |
| 507 | int flags; |
| 508 | |
| 509 | /* Never enter fixed pages (root pages) in cache, though */ |
| 510 | if (SpGistBlockIsFixed(blkno)) |
| 511 | return; |
| 512 | |
| 513 | if (SpGistPageIsLeaf(page)) |
| 514 | flags = GBUF_LEAF; |
| 515 | else |
| 516 | flags = GBUF_INNER_PARITY(blkno); |
| 517 | if (SpGistPageStoresNulls(page)) |
| 518 | flags |= GBUF_NULLS; |
| 519 | |
| 520 | lup = GET_LUP(cache, flags); |
| 521 | |
| 522 | freeSpace = PageGetExactFreeSpace(page); |
| 523 | if (lup->blkno == InvalidBlockNumber || lup->blkno == blkno || |
| 524 | lup->freeSpace < freeSpace) |
| 525 | { |
| 526 | lup->blkno = blkno; |
| 527 | lup->freeSpace = freeSpace; |
| 528 | } |
| 529 | } |
| 530 | |
| 531 | /* |
| 532 | * Initialize an SPGiST page to empty, with specified flags |
| 533 | */ |
| 534 | void |
| 535 | SpGistInitPage(Page page, uint16 f) |
| 536 | { |
| 537 | SpGistPageOpaque opaque; |
| 538 | |
| 539 | PageInit(page, BLCKSZ, MAXALIGN(sizeof(SpGistPageOpaqueData))); |
| 540 | opaque = SpGistPageGetOpaque(page); |
| 541 | memset(opaque, 0, sizeof(SpGistPageOpaqueData)); |
| 542 | opaque->flags = f; |
| 543 | opaque->spgist_page_id = SPGIST_PAGE_ID; |
| 544 | } |
| 545 | |
| 546 | /* |
| 547 | * Initialize a buffer's page to empty, with specified flags |
| 548 | */ |
| 549 | void |
| 550 | SpGistInitBuffer(Buffer b, uint16 f) |
| 551 | { |
| 552 | Assert(BufferGetPageSize(b) == BLCKSZ); |
| 553 | SpGistInitPage(BufferGetPage(b), f); |
| 554 | } |
| 555 | |
| 556 | /* |
| 557 | * Initialize metadata page |
| 558 | */ |
| 559 | void |
| 560 | SpGistInitMetapage(Page page) |
| 561 | { |
| 562 | SpGistMetaPageData *metadata; |
| 563 | int i; |
| 564 | |
| 565 | SpGistInitPage(page, SPGIST_META); |
| 566 | metadata = SpGistPageGetMeta(page); |
| 567 | memset(metadata, 0, sizeof(SpGistMetaPageData)); |
| 568 | metadata->magicNumber = SPGIST_MAGIC_NUMBER; |
| 569 | |
| 570 | /* initialize last-used-page cache to empty */ |
| 571 | for (i = 0; i < SPGIST_CACHED_PAGES; i++) |
| 572 | metadata->lastUsedPages.cachedPage[i].blkno = InvalidBlockNumber; |
| 573 | |
| 574 | /* |
| 575 | * Set pd_lower just past the end of the metadata. This is essential, |
| 576 | * because without doing so, metadata will be lost if xlog.c compresses |
| 577 | * the page. |
| 578 | */ |
| 579 | ((PageHeader) page)->pd_lower = |
| 580 | ((char *) metadata + sizeof(SpGistMetaPageData)) - (char *) page; |
| 581 | } |
| 582 | |
| 583 | /* |
| 584 | * reloptions processing for SPGiST |
| 585 | */ |
| 586 | bytea * |
| 587 | spgoptions(Datum reloptions, bool validate) |
| 588 | { |
| 589 | return default_reloptions(reloptions, validate, RELOPT_KIND_SPGIST); |
| 590 | } |
| 591 | |
| 592 | /* |
| 593 | * Get the space needed to store a non-null datum of the indicated type. |
| 594 | * Note the result is already rounded up to a MAXALIGN boundary. |
| 595 | * Also, we follow the SPGiST convention that pass-by-val types are |
| 596 | * just stored in their Datum representation (compare memcpyDatum). |
| 597 | */ |
| 598 | unsigned int |
| 599 | SpGistGetTypeSize(SpGistTypeDesc *att, Datum datum) |
| 600 | { |
| 601 | unsigned int size; |
| 602 | |
| 603 | if (att->attbyval) |
| 604 | size = sizeof(Datum); |
| 605 | else if (att->attlen > 0) |
| 606 | size = att->attlen; |
| 607 | else |
| 608 | size = VARSIZE_ANY(datum); |
| 609 | |
| 610 | return MAXALIGN(size); |
| 611 | } |
| 612 | |
| 613 | /* |
| 614 | * Copy the given non-null datum to *target |
| 615 | */ |
| 616 | static void |
| 617 | memcpyDatum(void *target, SpGistTypeDesc *att, Datum datum) |
| 618 | { |
| 619 | unsigned int size; |
| 620 | |
| 621 | if (att->attbyval) |
| 622 | { |
| 623 | memcpy(target, &datum, sizeof(Datum)); |
| 624 | } |
| 625 | else |
| 626 | { |
| 627 | size = (att->attlen > 0) ? att->attlen : VARSIZE_ANY(datum); |
| 628 | memcpy(target, DatumGetPointer(datum), size); |
| 629 | } |
| 630 | } |
| 631 | |
| 632 | /* |
| 633 | * Construct a leaf tuple containing the given heap TID and datum value |
| 634 | */ |
| 635 | SpGistLeafTuple |
| 636 | spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr, |
| 637 | Datum datum, bool isnull) |
| 638 | { |
| 639 | SpGistLeafTuple tup; |
| 640 | unsigned int size; |
| 641 | |
| 642 | /* compute space needed (note result is already maxaligned) */ |
| 643 | size = SGLTHDRSZ; |
| 644 | if (!isnull) |
| 645 | size += SpGistGetTypeSize(&state->attLeafType, datum); |
| 646 | |
| 647 | /* |
| 648 | * Ensure that we can replace the tuple with a dead tuple later. This |
| 649 | * test is unnecessary when !isnull, but let's be safe. |
| 650 | */ |
| 651 | if (size < SGDTSIZE) |
| 652 | size = SGDTSIZE; |
| 653 | |
| 654 | /* OK, form the tuple */ |
| 655 | tup = (SpGistLeafTuple) palloc0(size); |
| 656 | |
| 657 | tup->size = size; |
| 658 | tup->nextOffset = InvalidOffsetNumber; |
| 659 | tup->heapPtr = *heapPtr; |
| 660 | if (!isnull) |
| 661 | memcpyDatum(SGLTDATAPTR(tup), &state->attLeafType, datum); |
| 662 | |
| 663 | return tup; |
| 664 | } |
| 665 | |
| 666 | /* |
| 667 | * Construct a node (to go into an inner tuple) containing the given label |
| 668 | * |
| 669 | * Note that the node's downlink is just set invalid here. Caller will fill |
| 670 | * it in later. |
| 671 | */ |
| 672 | SpGistNodeTuple |
| 673 | spgFormNodeTuple(SpGistState *state, Datum label, bool isnull) |
| 674 | { |
| 675 | SpGistNodeTuple tup; |
| 676 | unsigned int size; |
| 677 | unsigned short infomask = 0; |
| 678 | |
| 679 | /* compute space needed (note result is already maxaligned) */ |
| 680 | size = SGNTHDRSZ; |
| 681 | if (!isnull) |
| 682 | size += SpGistGetTypeSize(&state->attLabelType, label); |
| 683 | |
| 684 | /* |
| 685 | * Here we make sure that the size will fit in the field reserved for it |
| 686 | * in t_info. |
| 687 | */ |
| 688 | if ((size & INDEX_SIZE_MASK) != size) |
| 689 | ereport(ERROR, |
| 690 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| 691 | errmsg("index row requires %zu bytes, maximum size is %zu" , |
| 692 | (Size) size, (Size) INDEX_SIZE_MASK))); |
| 693 | |
| 694 | tup = (SpGistNodeTuple) palloc0(size); |
| 695 | |
| 696 | if (isnull) |
| 697 | infomask |= INDEX_NULL_MASK; |
| 698 | /* we don't bother setting the INDEX_VAR_MASK bit */ |
| 699 | infomask |= size; |
| 700 | tup->t_info = infomask; |
| 701 | |
| 702 | /* The TID field will be filled in later */ |
| 703 | ItemPointerSetInvalid(&tup->t_tid); |
| 704 | |
| 705 | if (!isnull) |
| 706 | memcpyDatum(SGNTDATAPTR(tup), &state->attLabelType, label); |
| 707 | |
| 708 | return tup; |
| 709 | } |
| 710 | |
| 711 | /* |
| 712 | * Construct an inner tuple containing the given prefix and node array |
| 713 | */ |
| 714 | SpGistInnerTuple |
| 715 | spgFormInnerTuple(SpGistState *state, bool hasPrefix, Datum prefix, |
| 716 | int nNodes, SpGistNodeTuple *nodes) |
| 717 | { |
| 718 | SpGistInnerTuple tup; |
| 719 | unsigned int size; |
| 720 | unsigned int prefixSize; |
| 721 | int i; |
| 722 | char *ptr; |
| 723 | |
| 724 | /* Compute size needed */ |
| 725 | if (hasPrefix) |
| 726 | prefixSize = SpGistGetTypeSize(&state->attPrefixType, prefix); |
| 727 | else |
| 728 | prefixSize = 0; |
| 729 | |
| 730 | size = SGITHDRSZ + prefixSize; |
| 731 | |
| 732 | /* Note: we rely on node tuple sizes to be maxaligned already */ |
| 733 | for (i = 0; i < nNodes; i++) |
| 734 | size += IndexTupleSize(nodes[i]); |
| 735 | |
| 736 | /* |
| 737 | * Ensure that we can replace the tuple with a dead tuple later. This |
| 738 | * test is unnecessary given current tuple layouts, but let's be safe. |
| 739 | */ |
| 740 | if (size < SGDTSIZE) |
| 741 | size = SGDTSIZE; |
| 742 | |
| 743 | /* |
| 744 | * Inner tuple should be small enough to fit on a page |
| 745 | */ |
| 746 | if (size > SPGIST_PAGE_CAPACITY - sizeof(ItemIdData)) |
| 747 | ereport(ERROR, |
| 748 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| 749 | errmsg("SP-GiST inner tuple size %zu exceeds maximum %zu" , |
| 750 | (Size) size, |
| 751 | SPGIST_PAGE_CAPACITY - sizeof(ItemIdData)), |
| 752 | errhint("Values larger than a buffer page cannot be indexed." ))); |
| 753 | |
| 754 | /* |
| 755 | * Check for overflow of header fields --- probably can't fail if the |
| 756 | * above succeeded, but let's be paranoid |
| 757 | */ |
| 758 | if (size > SGITMAXSIZE || |
| 759 | prefixSize > SGITMAXPREFIXSIZE || |
| 760 | nNodes > SGITMAXNNODES) |
| 761 | elog(ERROR, "SPGiST inner tuple header field is too small" ); |
| 762 | |
| 763 | /* OK, form the tuple */ |
| 764 | tup = (SpGistInnerTuple) palloc0(size); |
| 765 | |
| 766 | tup->nNodes = nNodes; |
| 767 | tup->prefixSize = prefixSize; |
| 768 | tup->size = size; |
| 769 | |
| 770 | if (hasPrefix) |
| 771 | memcpyDatum(SGITDATAPTR(tup), &state->attPrefixType, prefix); |
| 772 | |
| 773 | ptr = (char *) SGITNODEPTR(tup); |
| 774 | |
| 775 | for (i = 0; i < nNodes; i++) |
| 776 | { |
| 777 | SpGistNodeTuple node = nodes[i]; |
| 778 | |
| 779 | memcpy(ptr, node, IndexTupleSize(node)); |
| 780 | ptr += IndexTupleSize(node); |
| 781 | } |
| 782 | |
| 783 | return tup; |
| 784 | } |
| 785 | |
| 786 | /* |
| 787 | * Construct a "dead" tuple to replace a tuple being deleted. |
| 788 | * |
| 789 | * The state can be SPGIST_REDIRECT, SPGIST_DEAD, or SPGIST_PLACEHOLDER. |
| 790 | * For a REDIRECT tuple, a pointer (blkno+offset) must be supplied, and |
| 791 | * the xid field is filled in automatically. |
| 792 | * |
| 793 | * This is called in critical sections, so we don't use palloc; the tuple |
| 794 | * is built in preallocated storage. It should be copied before another |
| 795 | * call with different parameters can occur. |
| 796 | */ |
| 797 | SpGistDeadTuple |
| 798 | spgFormDeadTuple(SpGistState *state, int tupstate, |
| 799 | BlockNumber blkno, OffsetNumber offnum) |
| 800 | { |
| 801 | SpGistDeadTuple tuple = (SpGistDeadTuple) state->deadTupleStorage; |
| 802 | |
| 803 | tuple->tupstate = tupstate; |
| 804 | tuple->size = SGDTSIZE; |
| 805 | tuple->nextOffset = InvalidOffsetNumber; |
| 806 | |
| 807 | if (tupstate == SPGIST_REDIRECT) |
| 808 | { |
| 809 | ItemPointerSet(&tuple->pointer, blkno, offnum); |
| 810 | Assert(TransactionIdIsValid(state->myXid)); |
| 811 | tuple->xid = state->myXid; |
| 812 | } |
| 813 | else |
| 814 | { |
| 815 | ItemPointerSetInvalid(&tuple->pointer); |
| 816 | tuple->xid = InvalidTransactionId; |
| 817 | } |
| 818 | |
| 819 | return tuple; |
| 820 | } |
| 821 | |
| 822 | /* |
| 823 | * Extract the label datums of the nodes within innerTuple |
| 824 | * |
| 825 | * Returns NULL if label datums are NULLs |
| 826 | */ |
| 827 | Datum * |
| 828 | (SpGistState *state, SpGistInnerTuple innerTuple) |
| 829 | { |
| 830 | Datum *nodeLabels; |
| 831 | int i; |
| 832 | SpGistNodeTuple node; |
| 833 | |
| 834 | /* Either all the labels must be NULL, or none. */ |
| 835 | node = SGITNODEPTR(innerTuple); |
| 836 | if (IndexTupleHasNulls(node)) |
| 837 | { |
| 838 | SGITITERATE(innerTuple, i, node) |
| 839 | { |
| 840 | if (!IndexTupleHasNulls(node)) |
| 841 | elog(ERROR, "some but not all node labels are null in SPGiST inner tuple" ); |
| 842 | } |
| 843 | /* They're all null, so just return NULL */ |
| 844 | return NULL; |
| 845 | } |
| 846 | else |
| 847 | { |
| 848 | nodeLabels = (Datum *) palloc(sizeof(Datum) * innerTuple->nNodes); |
| 849 | SGITITERATE(innerTuple, i, node) |
| 850 | { |
| 851 | if (IndexTupleHasNulls(node)) |
| 852 | elog(ERROR, "some but not all node labels are null in SPGiST inner tuple" ); |
| 853 | nodeLabels[i] = SGNTDATUM(node, state); |
| 854 | } |
| 855 | return nodeLabels; |
| 856 | } |
| 857 | } |
| 858 | |
| 859 | /* |
| 860 | * Add a new item to the page, replacing a PLACEHOLDER item if possible. |
| 861 | * Return the location it's inserted at, or InvalidOffsetNumber on failure. |
| 862 | * |
| 863 | * If startOffset isn't NULL, we start searching for placeholders at |
| 864 | * *startOffset, and update that to the next place to search. This is just |
| 865 | * an optimization for repeated insertions. |
| 866 | * |
| 867 | * If errorOK is false, we throw error when there's not enough room, |
| 868 | * rather than returning InvalidOffsetNumber. |
| 869 | */ |
| 870 | OffsetNumber |
| 871 | SpGistPageAddNewItem(SpGistState *state, Page page, Item item, Size size, |
| 872 | OffsetNumber *startOffset, bool errorOK) |
| 873 | { |
| 874 | SpGistPageOpaque opaque = SpGistPageGetOpaque(page); |
| 875 | OffsetNumber i, |
| 876 | maxoff, |
| 877 | offnum; |
| 878 | |
| 879 | if (opaque->nPlaceholder > 0 && |
| 880 | PageGetExactFreeSpace(page) + SGDTSIZE >= MAXALIGN(size)) |
| 881 | { |
| 882 | /* Try to replace a placeholder */ |
| 883 | maxoff = PageGetMaxOffsetNumber(page); |
| 884 | offnum = InvalidOffsetNumber; |
| 885 | |
| 886 | for (;;) |
| 887 | { |
| 888 | if (startOffset && *startOffset != InvalidOffsetNumber) |
| 889 | i = *startOffset; |
| 890 | else |
| 891 | i = FirstOffsetNumber; |
| 892 | for (; i <= maxoff; i++) |
| 893 | { |
| 894 | SpGistDeadTuple it = (SpGistDeadTuple) PageGetItem(page, |
| 895 | PageGetItemId(page, i)); |
| 896 | |
| 897 | if (it->tupstate == SPGIST_PLACEHOLDER) |
| 898 | { |
| 899 | offnum = i; |
| 900 | break; |
| 901 | } |
| 902 | } |
| 903 | |
| 904 | /* Done if we found a placeholder */ |
| 905 | if (offnum != InvalidOffsetNumber) |
| 906 | break; |
| 907 | |
| 908 | if (startOffset && *startOffset != InvalidOffsetNumber) |
| 909 | { |
| 910 | /* Hint was no good, re-search from beginning */ |
| 911 | *startOffset = InvalidOffsetNumber; |
| 912 | continue; |
| 913 | } |
| 914 | |
| 915 | /* Hmm, no placeholder found? */ |
| 916 | opaque->nPlaceholder = 0; |
| 917 | break; |
| 918 | } |
| 919 | |
| 920 | if (offnum != InvalidOffsetNumber) |
| 921 | { |
| 922 | /* Replace the placeholder tuple */ |
| 923 | PageIndexTupleDelete(page, offnum); |
| 924 | |
| 925 | offnum = PageAddItem(page, item, size, offnum, false, false); |
| 926 | |
| 927 | /* |
| 928 | * We should not have failed given the size check at the top of |
| 929 | * the function, but test anyway. If we did fail, we must PANIC |
| 930 | * because we've already deleted the placeholder tuple, and |
| 931 | * there's no other way to keep the damage from getting to disk. |
| 932 | */ |
| 933 | if (offnum != InvalidOffsetNumber) |
| 934 | { |
| 935 | Assert(opaque->nPlaceholder > 0); |
| 936 | opaque->nPlaceholder--; |
| 937 | if (startOffset) |
| 938 | *startOffset = offnum + 1; |
| 939 | } |
| 940 | else |
| 941 | elog(PANIC, "failed to add item of size %u to SPGiST index page" , |
| 942 | (int) size); |
| 943 | |
| 944 | return offnum; |
| 945 | } |
| 946 | } |
| 947 | |
| 948 | /* No luck in replacing a placeholder, so just add it to the page */ |
| 949 | offnum = PageAddItem(page, item, size, |
| 950 | InvalidOffsetNumber, false, false); |
| 951 | |
| 952 | if (offnum == InvalidOffsetNumber && !errorOK) |
| 953 | elog(ERROR, "failed to add item of size %u to SPGiST index page" , |
| 954 | (int) size); |
| 955 | |
| 956 | return offnum; |
| 957 | } |
| 958 | |
| 959 | /* |
| 960 | * spgproperty() -- Check boolean properties of indexes. |
| 961 | * |
| 962 | * This is optional for most AMs, but is required for SP-GiST because the core |
| 963 | * property code doesn't support AMPROP_DISTANCE_ORDERABLE. |
| 964 | */ |
| 965 | bool |
| 966 | spgproperty(Oid index_oid, int attno, |
| 967 | IndexAMProperty prop, const char *propname, |
| 968 | bool *res, bool *isnull) |
| 969 | { |
| 970 | Oid opclass, |
| 971 | opfamily, |
| 972 | opcintype; |
| 973 | CatCList *catlist; |
| 974 | int i; |
| 975 | |
| 976 | /* Only answer column-level inquiries */ |
| 977 | if (attno == 0) |
| 978 | return false; |
| 979 | |
| 980 | switch (prop) |
| 981 | { |
| 982 | case AMPROP_DISTANCE_ORDERABLE: |
| 983 | break; |
| 984 | default: |
| 985 | return false; |
| 986 | } |
| 987 | |
| 988 | /* |
| 989 | * Currently, SP-GiST distance-ordered scans require that there be a |
| 990 | * distance operator in the opclass with the default types. So we assume |
| 991 | * that if such a operator exists, then there's a reason for it. |
| 992 | */ |
| 993 | |
| 994 | /* First we need to know the column's opclass. */ |
| 995 | opclass = get_index_column_opclass(index_oid, attno); |
| 996 | if (!OidIsValid(opclass)) |
| 997 | { |
| 998 | *isnull = true; |
| 999 | return true; |
| 1000 | } |
| 1001 | |
| 1002 | /* Now look up the opclass family and input datatype. */ |
| 1003 | if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype)) |
| 1004 | { |
| 1005 | *isnull = true; |
| 1006 | return true; |
| 1007 | } |
| 1008 | |
| 1009 | /* And now we can check whether the operator is provided. */ |
| 1010 | catlist = SearchSysCacheList1(AMOPSTRATEGY, |
| 1011 | ObjectIdGetDatum(opfamily)); |
| 1012 | |
| 1013 | *res = false; |
| 1014 | |
| 1015 | for (i = 0; i < catlist->n_members; i++) |
| 1016 | { |
| 1017 | HeapTuple amoptup = &catlist->members[i]->tuple; |
| 1018 | Form_pg_amop amopform = (Form_pg_amop) GETSTRUCT(amoptup); |
| 1019 | |
| 1020 | if (amopform->amoppurpose == AMOP_ORDER && |
| 1021 | (amopform->amoplefttype == opcintype || |
| 1022 | amopform->amoprighttype == opcintype) && |
| 1023 | opfamily_can_sort_type(amopform->amopsortfamily, |
| 1024 | get_op_rettype(amopform->amopopr))) |
| 1025 | { |
| 1026 | *res = true; |
| 1027 | break; |
| 1028 | } |
| 1029 | } |
| 1030 | |
| 1031 | ReleaseSysCacheList(catlist); |
| 1032 | |
| 1033 | *isnull = false; |
| 1034 | |
| 1035 | return true; |
| 1036 | } |
| 1037 | |