| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * gininsert.c |
| 4 | * insert routines for the postgres inverted index access method. |
| 5 | * |
| 6 | * |
| 7 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 8 | * Portions Copyright (c) 1994, Regents of the University of California |
| 9 | * |
| 10 | * IDENTIFICATION |
| 11 | * src/backend/access/gin/gininsert.c |
| 12 | *------------------------------------------------------------------------- |
| 13 | */ |
| 14 | |
| 15 | #include "postgres.h" |
| 16 | |
| 17 | #include "access/gin_private.h" |
| 18 | #include "access/ginxlog.h" |
| 19 | #include "access/xloginsert.h" |
| 20 | #include "access/tableam.h" |
| 21 | #include "catalog/index.h" |
| 22 | #include "miscadmin.h" |
| 23 | #include "storage/bufmgr.h" |
| 24 | #include "storage/smgr.h" |
| 25 | #include "storage/indexfsm.h" |
| 26 | #include "storage/predicate.h" |
| 27 | #include "utils/memutils.h" |
| 28 | #include "utils/rel.h" |
| 29 | |
| 30 | |
| 31 | typedef struct |
| 32 | { |
| 33 | GinState ginstate; |
| 34 | double indtuples; |
| 35 | GinStatsData buildStats; |
| 36 | MemoryContext tmpCtx; |
| 37 | MemoryContext funcCtx; |
| 38 | BuildAccumulator accum; |
| 39 | } GinBuildState; |
| 40 | |
| 41 | |
| 42 | /* |
| 43 | * Adds array of item pointers to tuple's posting list, or |
| 44 | * creates posting tree and tuple pointing to tree in case |
| 45 | * of not enough space. Max size of tuple is defined in |
| 46 | * GinFormTuple(). Returns a new, modified index tuple. |
| 47 | * items[] must be in sorted order with no duplicates. |
| 48 | */ |
| 49 | static IndexTuple |
| 50 | addItemPointersToLeafTuple(GinState *ginstate, |
| 51 | IndexTuple old, |
| 52 | ItemPointerData *items, uint32 nitem, |
| 53 | GinStatsData *buildStats, Buffer buffer) |
| 54 | { |
| 55 | OffsetNumber attnum; |
| 56 | Datum key; |
| 57 | GinNullCategory category; |
| 58 | IndexTuple res; |
| 59 | ItemPointerData *newItems, |
| 60 | *oldItems; |
| 61 | int oldNPosting, |
| 62 | newNPosting; |
| 63 | GinPostingList *compressedList; |
| 64 | |
| 65 | Assert(!GinIsPostingTree(old)); |
| 66 | |
| 67 | attnum = gintuple_get_attrnum(ginstate, old); |
| 68 | key = gintuple_get_key(ginstate, old, &category); |
| 69 | |
| 70 | /* merge the old and new posting lists */ |
| 71 | oldItems = ginReadTuple(ginstate, attnum, old, &oldNPosting); |
| 72 | |
| 73 | newItems = ginMergeItemPointers(items, nitem, |
| 74 | oldItems, oldNPosting, |
| 75 | &newNPosting); |
| 76 | |
| 77 | /* Compress the posting list, and try to a build tuple with room for it */ |
| 78 | res = NULL; |
| 79 | compressedList = ginCompressPostingList(newItems, newNPosting, GinMaxItemSize, |
| 80 | NULL); |
| 81 | pfree(newItems); |
| 82 | if (compressedList) |
| 83 | { |
| 84 | res = GinFormTuple(ginstate, attnum, key, category, |
| 85 | (char *) compressedList, |
| 86 | SizeOfGinPostingList(compressedList), |
| 87 | newNPosting, |
| 88 | false); |
| 89 | pfree(compressedList); |
| 90 | } |
| 91 | if (!res) |
| 92 | { |
| 93 | /* posting list would be too big, convert to posting tree */ |
| 94 | BlockNumber postingRoot; |
| 95 | |
| 96 | /* |
| 97 | * Initialize posting tree with the old tuple's posting list. It's |
| 98 | * surely small enough to fit on one posting-tree page, and should |
| 99 | * already be in order with no duplicates. |
| 100 | */ |
| 101 | postingRoot = createPostingTree(ginstate->index, |
| 102 | oldItems, |
| 103 | oldNPosting, |
| 104 | buildStats, |
| 105 | buffer); |
| 106 | |
| 107 | /* Now insert the TIDs-to-be-added into the posting tree */ |
| 108 | ginInsertItemPointers(ginstate->index, postingRoot, |
| 109 | items, nitem, |
| 110 | buildStats); |
| 111 | |
| 112 | /* And build a new posting-tree-only result tuple */ |
| 113 | res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true); |
| 114 | GinSetPostingTree(res, postingRoot); |
| 115 | } |
| 116 | pfree(oldItems); |
| 117 | |
| 118 | return res; |
| 119 | } |
| 120 | |
| 121 | /* |
| 122 | * Build a fresh leaf tuple, either posting-list or posting-tree format |
| 123 | * depending on whether the given items list will fit. |
| 124 | * items[] must be in sorted order with no duplicates. |
| 125 | * |
| 126 | * This is basically the same logic as in addItemPointersToLeafTuple, |
| 127 | * but working from slightly different input. |
| 128 | */ |
| 129 | static IndexTuple |
| 130 | buildFreshLeafTuple(GinState *ginstate, |
| 131 | OffsetNumber attnum, Datum key, GinNullCategory category, |
| 132 | ItemPointerData *items, uint32 nitem, |
| 133 | GinStatsData *buildStats, Buffer buffer) |
| 134 | { |
| 135 | IndexTuple res = NULL; |
| 136 | GinPostingList *compressedList; |
| 137 | |
| 138 | /* try to build a posting list tuple with all the items */ |
| 139 | compressedList = ginCompressPostingList(items, nitem, GinMaxItemSize, NULL); |
| 140 | if (compressedList) |
| 141 | { |
| 142 | res = GinFormTuple(ginstate, attnum, key, category, |
| 143 | (char *) compressedList, |
| 144 | SizeOfGinPostingList(compressedList), |
| 145 | nitem, false); |
| 146 | pfree(compressedList); |
| 147 | } |
| 148 | if (!res) |
| 149 | { |
| 150 | /* posting list would be too big, build posting tree */ |
| 151 | BlockNumber postingRoot; |
| 152 | |
| 153 | /* |
| 154 | * Build posting-tree-only result tuple. We do this first so as to |
| 155 | * fail quickly if the key is too big. |
| 156 | */ |
| 157 | res = GinFormTuple(ginstate, attnum, key, category, NULL, 0, 0, true); |
| 158 | |
| 159 | /* |
| 160 | * Initialize a new posting tree with the TIDs. |
| 161 | */ |
| 162 | postingRoot = createPostingTree(ginstate->index, items, nitem, |
| 163 | buildStats, buffer); |
| 164 | |
| 165 | /* And save the root link in the result tuple */ |
| 166 | GinSetPostingTree(res, postingRoot); |
| 167 | } |
| 168 | |
| 169 | return res; |
| 170 | } |
| 171 | |
| 172 | /* |
| 173 | * Insert one or more heap TIDs associated with the given key value. |
| 174 | * This will either add a single key entry, or enlarge a pre-existing entry. |
| 175 | * |
| 176 | * During an index build, buildStats is non-null and the counters |
| 177 | * it contains should be incremented as needed. |
| 178 | */ |
| 179 | void |
| 180 | ginEntryInsert(GinState *ginstate, |
| 181 | OffsetNumber attnum, Datum key, GinNullCategory category, |
| 182 | ItemPointerData *items, uint32 nitem, |
| 183 | GinStatsData *buildStats) |
| 184 | { |
| 185 | GinBtreeData btree; |
| 186 | GinBtreeEntryInsertData insertdata; |
| 187 | GinBtreeStack *stack; |
| 188 | IndexTuple itup; |
| 189 | Page page; |
| 190 | |
| 191 | insertdata.isDelete = false; |
| 192 | |
| 193 | /* During index build, count the to-be-inserted entry */ |
| 194 | if (buildStats) |
| 195 | buildStats->nEntries++; |
| 196 | |
| 197 | ginPrepareEntryScan(&btree, attnum, key, category, ginstate); |
| 198 | btree.isBuild = (buildStats != NULL); |
| 199 | |
| 200 | stack = ginFindLeafPage(&btree, false, false, NULL); |
| 201 | page = BufferGetPage(stack->buffer); |
| 202 | |
| 203 | if (btree.findItem(&btree, stack)) |
| 204 | { |
| 205 | /* found pre-existing entry */ |
| 206 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); |
| 207 | |
| 208 | if (GinIsPostingTree(itup)) |
| 209 | { |
| 210 | /* add entries to existing posting tree */ |
| 211 | BlockNumber rootPostingTree = GinGetPostingTree(itup); |
| 212 | |
| 213 | /* release all stack */ |
| 214 | LockBuffer(stack->buffer, GIN_UNLOCK); |
| 215 | freeGinBtreeStack(stack); |
| 216 | |
| 217 | /* insert into posting tree */ |
| 218 | ginInsertItemPointers(ginstate->index, rootPostingTree, |
| 219 | items, nitem, |
| 220 | buildStats); |
| 221 | return; |
| 222 | } |
| 223 | |
| 224 | CheckForSerializableConflictIn(ginstate->index, NULL, stack->buffer); |
| 225 | /* modify an existing leaf entry */ |
| 226 | itup = addItemPointersToLeafTuple(ginstate, itup, |
| 227 | items, nitem, buildStats, stack->buffer); |
| 228 | |
| 229 | insertdata.isDelete = true; |
| 230 | } |
| 231 | else |
| 232 | { |
| 233 | CheckForSerializableConflictIn(ginstate->index, NULL, stack->buffer); |
| 234 | /* no match, so construct a new leaf entry */ |
| 235 | itup = buildFreshLeafTuple(ginstate, attnum, key, category, |
| 236 | items, nitem, buildStats, stack->buffer); |
| 237 | } |
| 238 | |
| 239 | /* Insert the new or modified leaf tuple */ |
| 240 | insertdata.entry = itup; |
| 241 | ginInsertValue(&btree, stack, &insertdata, buildStats); |
| 242 | pfree(itup); |
| 243 | } |
| 244 | |
| 245 | /* |
| 246 | * Extract index entries for a single indexable item, and add them to the |
| 247 | * BuildAccumulator's state. |
| 248 | * |
| 249 | * This function is used only during initial index creation. |
| 250 | */ |
| 251 | static void |
| 252 | ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum, |
| 253 | Datum value, bool isNull, |
| 254 | ItemPointer heapptr) |
| 255 | { |
| 256 | Datum *entries; |
| 257 | GinNullCategory *categories; |
| 258 | int32 nentries; |
| 259 | MemoryContext oldCtx; |
| 260 | |
| 261 | oldCtx = MemoryContextSwitchTo(buildstate->funcCtx); |
| 262 | entries = ginExtractEntries(buildstate->accum.ginstate, attnum, |
| 263 | value, isNull, |
| 264 | &nentries, &categories); |
| 265 | MemoryContextSwitchTo(oldCtx); |
| 266 | |
| 267 | ginInsertBAEntries(&buildstate->accum, heapptr, attnum, |
| 268 | entries, categories, nentries); |
| 269 | |
| 270 | buildstate->indtuples += nentries; |
| 271 | |
| 272 | MemoryContextReset(buildstate->funcCtx); |
| 273 | } |
| 274 | |
| 275 | static void |
| 276 | ginBuildCallback(Relation index, HeapTuple htup, Datum *values, |
| 277 | bool *isnull, bool tupleIsAlive, void *state) |
| 278 | { |
| 279 | GinBuildState *buildstate = (GinBuildState *) state; |
| 280 | MemoryContext oldCtx; |
| 281 | int i; |
| 282 | |
| 283 | oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); |
| 284 | |
| 285 | for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++) |
| 286 | ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1), |
| 287 | values[i], isnull[i], |
| 288 | &htup->t_self); |
| 289 | |
| 290 | /* If we've maxed out our available memory, dump everything to the index */ |
| 291 | if (buildstate->accum.allocatedMemory >= (Size) maintenance_work_mem * 1024L) |
| 292 | { |
| 293 | ItemPointerData *list; |
| 294 | Datum key; |
| 295 | GinNullCategory category; |
| 296 | uint32 nlist; |
| 297 | OffsetNumber attnum; |
| 298 | |
| 299 | ginBeginBAScan(&buildstate->accum); |
| 300 | while ((list = ginGetBAEntry(&buildstate->accum, |
| 301 | &attnum, &key, &category, &nlist)) != NULL) |
| 302 | { |
| 303 | /* there could be many entries, so be willing to abort here */ |
| 304 | CHECK_FOR_INTERRUPTS(); |
| 305 | ginEntryInsert(&buildstate->ginstate, attnum, key, category, |
| 306 | list, nlist, &buildstate->buildStats); |
| 307 | } |
| 308 | |
| 309 | MemoryContextReset(buildstate->tmpCtx); |
| 310 | ginInitBA(&buildstate->accum); |
| 311 | } |
| 312 | |
| 313 | MemoryContextSwitchTo(oldCtx); |
| 314 | } |
| 315 | |
| 316 | IndexBuildResult * |
| 317 | ginbuild(Relation heap, Relation index, IndexInfo *indexInfo) |
| 318 | { |
| 319 | IndexBuildResult *result; |
| 320 | double reltuples; |
| 321 | GinBuildState buildstate; |
| 322 | Buffer RootBuffer, |
| 323 | MetaBuffer; |
| 324 | ItemPointerData *list; |
| 325 | Datum key; |
| 326 | GinNullCategory category; |
| 327 | uint32 nlist; |
| 328 | MemoryContext oldCtx; |
| 329 | OffsetNumber attnum; |
| 330 | |
| 331 | if (RelationGetNumberOfBlocks(index) != 0) |
| 332 | elog(ERROR, "index \"%s\" already contains data" , |
| 333 | RelationGetRelationName(index)); |
| 334 | |
| 335 | initGinState(&buildstate.ginstate, index); |
| 336 | buildstate.indtuples = 0; |
| 337 | memset(&buildstate.buildStats, 0, sizeof(GinStatsData)); |
| 338 | |
| 339 | /* initialize the meta page */ |
| 340 | MetaBuffer = GinNewBuffer(index); |
| 341 | |
| 342 | /* initialize the root page */ |
| 343 | RootBuffer = GinNewBuffer(index); |
| 344 | |
| 345 | START_CRIT_SECTION(); |
| 346 | GinInitMetabuffer(MetaBuffer); |
| 347 | MarkBufferDirty(MetaBuffer); |
| 348 | GinInitBuffer(RootBuffer, GIN_LEAF); |
| 349 | MarkBufferDirty(RootBuffer); |
| 350 | |
| 351 | |
| 352 | UnlockReleaseBuffer(MetaBuffer); |
| 353 | UnlockReleaseBuffer(RootBuffer); |
| 354 | END_CRIT_SECTION(); |
| 355 | |
| 356 | /* count the root as first entry page */ |
| 357 | buildstate.buildStats.nEntryPages++; |
| 358 | |
| 359 | /* |
| 360 | * create a temporary memory context that is used to hold data not yet |
| 361 | * dumped out to the index |
| 362 | */ |
| 363 | buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext, |
| 364 | "Gin build temporary context" , |
| 365 | ALLOCSET_DEFAULT_SIZES); |
| 366 | |
| 367 | /* |
| 368 | * create a temporary memory context that is used for calling |
| 369 | * ginExtractEntries(), and can be reset after each tuple |
| 370 | */ |
| 371 | buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext, |
| 372 | "Gin build temporary context for user-defined function" , |
| 373 | ALLOCSET_DEFAULT_SIZES); |
| 374 | |
| 375 | buildstate.accum.ginstate = &buildstate.ginstate; |
| 376 | ginInitBA(&buildstate.accum); |
| 377 | |
| 378 | /* |
| 379 | * Do the heap scan. We disallow sync scan here because dataPlaceToPage |
| 380 | * prefers to receive tuples in TID order. |
| 381 | */ |
| 382 | reltuples = table_index_build_scan(heap, index, indexInfo, false, true, |
| 383 | ginBuildCallback, (void *) &buildstate, |
| 384 | NULL); |
| 385 | |
| 386 | /* dump remaining entries to the index */ |
| 387 | oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx); |
| 388 | ginBeginBAScan(&buildstate.accum); |
| 389 | while ((list = ginGetBAEntry(&buildstate.accum, |
| 390 | &attnum, &key, &category, &nlist)) != NULL) |
| 391 | { |
| 392 | /* there could be many entries, so be willing to abort here */ |
| 393 | CHECK_FOR_INTERRUPTS(); |
| 394 | ginEntryInsert(&buildstate.ginstate, attnum, key, category, |
| 395 | list, nlist, &buildstate.buildStats); |
| 396 | } |
| 397 | MemoryContextSwitchTo(oldCtx); |
| 398 | |
| 399 | MemoryContextDelete(buildstate.funcCtx); |
| 400 | MemoryContextDelete(buildstate.tmpCtx); |
| 401 | |
| 402 | /* |
| 403 | * Update metapage stats |
| 404 | */ |
| 405 | buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index); |
| 406 | ginUpdateStats(index, &buildstate.buildStats, true); |
| 407 | |
| 408 | /* |
| 409 | * We didn't write WAL records as we built the index, so if WAL-logging is |
| 410 | * required, write all pages to the WAL now. |
| 411 | */ |
| 412 | if (RelationNeedsWAL(index)) |
| 413 | { |
| 414 | log_newpage_range(index, MAIN_FORKNUM, |
| 415 | 0, RelationGetNumberOfBlocks(index), |
| 416 | true); |
| 417 | } |
| 418 | |
| 419 | /* |
| 420 | * Return statistics |
| 421 | */ |
| 422 | result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); |
| 423 | |
| 424 | result->heap_tuples = reltuples; |
| 425 | result->index_tuples = buildstate.indtuples; |
| 426 | |
| 427 | return result; |
| 428 | } |
| 429 | |
| 430 | /* |
| 431 | * ginbuildempty() -- build an empty gin index in the initialization fork |
| 432 | */ |
| 433 | void |
| 434 | ginbuildempty(Relation index) |
| 435 | { |
| 436 | Buffer RootBuffer, |
| 437 | MetaBuffer; |
| 438 | |
| 439 | /* An empty GIN index has two pages. */ |
| 440 | MetaBuffer = |
| 441 | ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL); |
| 442 | LockBuffer(MetaBuffer, BUFFER_LOCK_EXCLUSIVE); |
| 443 | RootBuffer = |
| 444 | ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL); |
| 445 | LockBuffer(RootBuffer, BUFFER_LOCK_EXCLUSIVE); |
| 446 | |
| 447 | /* Initialize and xlog metabuffer and root buffer. */ |
| 448 | START_CRIT_SECTION(); |
| 449 | GinInitMetabuffer(MetaBuffer); |
| 450 | MarkBufferDirty(MetaBuffer); |
| 451 | log_newpage_buffer(MetaBuffer, true); |
| 452 | GinInitBuffer(RootBuffer, GIN_LEAF); |
| 453 | MarkBufferDirty(RootBuffer); |
| 454 | log_newpage_buffer(RootBuffer, false); |
| 455 | END_CRIT_SECTION(); |
| 456 | |
| 457 | /* Unlock and release the buffers. */ |
| 458 | UnlockReleaseBuffer(MetaBuffer); |
| 459 | UnlockReleaseBuffer(RootBuffer); |
| 460 | } |
| 461 | |
| 462 | /* |
| 463 | * Insert index entries for a single indexable item during "normal" |
| 464 | * (non-fast-update) insertion |
| 465 | */ |
| 466 | static void |
| 467 | ginHeapTupleInsert(GinState *ginstate, OffsetNumber attnum, |
| 468 | Datum value, bool isNull, |
| 469 | ItemPointer item) |
| 470 | { |
| 471 | Datum *entries; |
| 472 | GinNullCategory *categories; |
| 473 | int32 i, |
| 474 | nentries; |
| 475 | |
| 476 | entries = ginExtractEntries(ginstate, attnum, value, isNull, |
| 477 | &nentries, &categories); |
| 478 | |
| 479 | for (i = 0; i < nentries; i++) |
| 480 | ginEntryInsert(ginstate, attnum, entries[i], categories[i], |
| 481 | item, 1, NULL); |
| 482 | } |
| 483 | |
| 484 | bool |
| 485 | gininsert(Relation index, Datum *values, bool *isnull, |
| 486 | ItemPointer ht_ctid, Relation heapRel, |
| 487 | IndexUniqueCheck checkUnique, |
| 488 | IndexInfo *indexInfo) |
| 489 | { |
| 490 | GinState *ginstate = (GinState *) indexInfo->ii_AmCache; |
| 491 | MemoryContext oldCtx; |
| 492 | MemoryContext insertCtx; |
| 493 | int i; |
| 494 | |
| 495 | /* Initialize GinState cache if first call in this statement */ |
| 496 | if (ginstate == NULL) |
| 497 | { |
| 498 | oldCtx = MemoryContextSwitchTo(indexInfo->ii_Context); |
| 499 | ginstate = (GinState *) palloc(sizeof(GinState)); |
| 500 | initGinState(ginstate, index); |
| 501 | indexInfo->ii_AmCache = (void *) ginstate; |
| 502 | MemoryContextSwitchTo(oldCtx); |
| 503 | } |
| 504 | |
| 505 | insertCtx = AllocSetContextCreate(CurrentMemoryContext, |
| 506 | "Gin insert temporary context" , |
| 507 | ALLOCSET_DEFAULT_SIZES); |
| 508 | |
| 509 | oldCtx = MemoryContextSwitchTo(insertCtx); |
| 510 | |
| 511 | if (GinGetUseFastUpdate(index)) |
| 512 | { |
| 513 | GinTupleCollector collector; |
| 514 | |
| 515 | memset(&collector, 0, sizeof(GinTupleCollector)); |
| 516 | |
| 517 | for (i = 0; i < ginstate->origTupdesc->natts; i++) |
| 518 | ginHeapTupleFastCollect(ginstate, &collector, |
| 519 | (OffsetNumber) (i + 1), |
| 520 | values[i], isnull[i], |
| 521 | ht_ctid); |
| 522 | |
| 523 | ginHeapTupleFastInsert(ginstate, &collector); |
| 524 | } |
| 525 | else |
| 526 | { |
| 527 | for (i = 0; i < ginstate->origTupdesc->natts; i++) |
| 528 | ginHeapTupleInsert(ginstate, (OffsetNumber) (i + 1), |
| 529 | values[i], isnull[i], |
| 530 | ht_ctid); |
| 531 | } |
| 532 | |
| 533 | MemoryContextSwitchTo(oldCtx); |
| 534 | MemoryContextDelete(insertCtx); |
| 535 | |
| 536 | return false; |
| 537 | } |
| 538 | |