1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * ginentrypage.c |
4 | * routines for handling GIN entry tree pages. |
5 | * |
6 | * |
7 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
8 | * Portions Copyright (c) 1994, Regents of the University of California |
9 | * |
10 | * IDENTIFICATION |
11 | * src/backend/access/gin/ginentrypage.c |
12 | *------------------------------------------------------------------------- |
13 | */ |
14 | |
15 | #include "postgres.h" |
16 | |
17 | #include "access/gin_private.h" |
18 | #include "access/ginxlog.h" |
19 | #include "access/xloginsert.h" |
20 | #include "miscadmin.h" |
21 | #include "utils/rel.h" |
22 | |
23 | static void entrySplitPage(GinBtree btree, Buffer origbuf, |
24 | GinBtreeStack *stack, |
25 | GinBtreeEntryInsertData *insertData, |
26 | BlockNumber updateblkno, |
27 | Page *newlpage, Page *newrpage); |
28 | |
29 | /* |
30 | * Form a tuple for entry tree. |
31 | * |
32 | * If the tuple would be too big to be stored, function throws a suitable |
33 | * error if errorTooBig is true, or returns NULL if errorTooBig is false. |
34 | * |
35 | * See src/backend/access/gin/README for a description of the index tuple |
36 | * format that is being built here. We build on the assumption that we |
37 | * are making a leaf-level key entry containing a posting list of nipd items. |
38 | * If the caller is actually trying to make a posting-tree entry, non-leaf |
39 | * entry, or pending-list entry, it should pass dataSize = 0 and then overwrite |
40 | * the t_tid fields as necessary. In any case, 'data' can be NULL to skip |
41 | * filling in the posting list; the caller is responsible for filling it |
42 | * afterwards if data = NULL and nipd > 0. |
43 | */ |
44 | IndexTuple |
45 | GinFormTuple(GinState *ginstate, |
46 | OffsetNumber attnum, Datum key, GinNullCategory category, |
47 | Pointer data, Size dataSize, int nipd, |
48 | bool errorTooBig) |
49 | { |
50 | Datum datums[2]; |
51 | bool isnull[2]; |
52 | IndexTuple itup; |
53 | uint32 newsize; |
54 | |
55 | /* Build the basic tuple: optional column number, plus key datum */ |
56 | if (ginstate->oneCol) |
57 | { |
58 | datums[0] = key; |
59 | isnull[0] = (category != GIN_CAT_NORM_KEY); |
60 | } |
61 | else |
62 | { |
63 | datums[0] = UInt16GetDatum(attnum); |
64 | isnull[0] = false; |
65 | datums[1] = key; |
66 | isnull[1] = (category != GIN_CAT_NORM_KEY); |
67 | } |
68 | |
69 | itup = index_form_tuple(ginstate->tupdesc[attnum - 1], datums, isnull); |
70 | |
71 | /* |
72 | * Determine and store offset to the posting list, making sure there is |
73 | * room for the category byte if needed. |
74 | * |
75 | * Note: because index_form_tuple MAXALIGNs the tuple size, there may well |
76 | * be some wasted pad space. Is it worth recomputing the data length to |
77 | * prevent that? That would also allow us to Assert that the real data |
78 | * doesn't overlap the GinNullCategory byte, which this code currently |
79 | * takes on faith. |
80 | */ |
81 | newsize = IndexTupleSize(itup); |
82 | |
83 | if (IndexTupleHasNulls(itup)) |
84 | { |
85 | uint32 minsize; |
86 | |
87 | Assert(category != GIN_CAT_NORM_KEY); |
88 | minsize = GinCategoryOffset(itup, ginstate) + sizeof(GinNullCategory); |
89 | newsize = Max(newsize, minsize); |
90 | } |
91 | |
92 | newsize = SHORTALIGN(newsize); |
93 | |
94 | GinSetPostingOffset(itup, newsize); |
95 | GinSetNPosting(itup, nipd); |
96 | |
97 | /* |
98 | * Add space needed for posting list, if any. Then check that the tuple |
99 | * won't be too big to store. |
100 | */ |
101 | newsize += dataSize; |
102 | |
103 | newsize = MAXALIGN(newsize); |
104 | |
105 | if (newsize > GinMaxItemSize) |
106 | { |
107 | if (errorTooBig) |
108 | ereport(ERROR, |
109 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
110 | errmsg("index row size %zu exceeds maximum %zu for index \"%s\"" , |
111 | (Size) newsize, (Size) GinMaxItemSize, |
112 | RelationGetRelationName(ginstate->index)))); |
113 | pfree(itup); |
114 | return NULL; |
115 | } |
116 | |
117 | /* |
118 | * Resize tuple if needed |
119 | */ |
120 | if (newsize != IndexTupleSize(itup)) |
121 | { |
122 | itup = repalloc(itup, newsize); |
123 | |
124 | /* |
125 | * PostgreSQL 9.3 and earlier did not clear this new space, so we |
126 | * might find uninitialized padding when reading tuples from disk. |
127 | */ |
128 | memset((char *) itup + IndexTupleSize(itup), |
129 | 0, newsize - IndexTupleSize(itup)); |
130 | /* set new size in tuple header */ |
131 | itup->t_info &= ~INDEX_SIZE_MASK; |
132 | itup->t_info |= newsize; |
133 | } |
134 | |
135 | /* |
136 | * Copy in the posting list, if provided |
137 | */ |
138 | if (data) |
139 | { |
140 | char *ptr = GinGetPosting(itup); |
141 | |
142 | memcpy(ptr, data, dataSize); |
143 | } |
144 | |
145 | /* |
146 | * Insert category byte, if needed |
147 | */ |
148 | if (category != GIN_CAT_NORM_KEY) |
149 | { |
150 | Assert(IndexTupleHasNulls(itup)); |
151 | GinSetNullCategory(itup, ginstate, category); |
152 | } |
153 | return itup; |
154 | } |
155 | |
156 | /* |
157 | * Read item pointers from leaf entry tuple. |
158 | * |
159 | * Returns a palloc'd array of ItemPointers. The number of items is returned |
160 | * in *nitems. |
161 | */ |
162 | ItemPointer |
163 | ginReadTuple(GinState *ginstate, OffsetNumber attnum, IndexTuple itup, |
164 | int *nitems) |
165 | { |
166 | Pointer ptr = GinGetPosting(itup); |
167 | int nipd = GinGetNPosting(itup); |
168 | ItemPointer ipd; |
169 | int ndecoded; |
170 | |
171 | if (GinItupIsCompressed(itup)) |
172 | { |
173 | if (nipd > 0) |
174 | { |
175 | ipd = ginPostingListDecode((GinPostingList *) ptr, &ndecoded); |
176 | if (nipd != ndecoded) |
177 | elog(ERROR, "number of items mismatch in GIN entry tuple, %d in tuple header, %d decoded" , |
178 | nipd, ndecoded); |
179 | } |
180 | else |
181 | { |
182 | ipd = palloc(0); |
183 | } |
184 | } |
185 | else |
186 | { |
187 | ipd = (ItemPointer) palloc(sizeof(ItemPointerData) * nipd); |
188 | memcpy(ipd, ptr, sizeof(ItemPointerData) * nipd); |
189 | } |
190 | *nitems = nipd; |
191 | return ipd; |
192 | } |
193 | |
194 | /* |
195 | * Form a non-leaf entry tuple by copying the key data from the given tuple, |
196 | * which can be either a leaf or non-leaf entry tuple. |
197 | * |
198 | * Any posting list in the source tuple is not copied. The specified child |
199 | * block number is inserted into t_tid. |
200 | */ |
201 | static IndexTuple |
202 | GinFormInteriorTuple(IndexTuple itup, Page page, BlockNumber childblk) |
203 | { |
204 | IndexTuple nitup; |
205 | |
206 | if (GinPageIsLeaf(page) && !GinIsPostingTree(itup)) |
207 | { |
208 | /* Tuple contains a posting list, just copy stuff before that */ |
209 | uint32 origsize = GinGetPostingOffset(itup); |
210 | |
211 | origsize = MAXALIGN(origsize); |
212 | nitup = (IndexTuple) palloc(origsize); |
213 | memcpy(nitup, itup, origsize); |
214 | /* ... be sure to fix the size header field ... */ |
215 | nitup->t_info &= ~INDEX_SIZE_MASK; |
216 | nitup->t_info |= origsize; |
217 | } |
218 | else |
219 | { |
220 | /* Copy the tuple as-is */ |
221 | nitup = (IndexTuple) palloc(IndexTupleSize(itup)); |
222 | memcpy(nitup, itup, IndexTupleSize(itup)); |
223 | } |
224 | |
225 | /* Now insert the correct downlink */ |
226 | GinSetDownlink(nitup, childblk); |
227 | |
228 | return nitup; |
229 | } |
230 | |
231 | /* |
232 | * Entry tree is a "static", ie tuple never deletes from it, |
233 | * so we don't use right bound, we use rightmost key instead. |
234 | */ |
235 | static IndexTuple |
236 | getRightMostTuple(Page page) |
237 | { |
238 | OffsetNumber maxoff = PageGetMaxOffsetNumber(page); |
239 | |
240 | return (IndexTuple) PageGetItem(page, PageGetItemId(page, maxoff)); |
241 | } |
242 | |
243 | static bool |
244 | entryIsMoveRight(GinBtree btree, Page page) |
245 | { |
246 | IndexTuple itup; |
247 | OffsetNumber attnum; |
248 | Datum key; |
249 | GinNullCategory category; |
250 | |
251 | if (GinPageRightMost(page)) |
252 | return false; |
253 | |
254 | itup = getRightMostTuple(page); |
255 | attnum = gintuple_get_attrnum(btree->ginstate, itup); |
256 | key = gintuple_get_key(btree->ginstate, itup, &category); |
257 | |
258 | if (ginCompareAttEntries(btree->ginstate, |
259 | btree->entryAttnum, btree->entryKey, btree->entryCategory, |
260 | attnum, key, category) > 0) |
261 | return true; |
262 | |
263 | return false; |
264 | } |
265 | |
266 | /* |
267 | * Find correct tuple in non-leaf page. It supposed that |
268 | * page correctly chosen and searching value SHOULD be on page |
269 | */ |
270 | static BlockNumber |
271 | entryLocateEntry(GinBtree btree, GinBtreeStack *stack) |
272 | { |
273 | OffsetNumber low, |
274 | high, |
275 | maxoff; |
276 | IndexTuple itup = NULL; |
277 | int result; |
278 | Page page = BufferGetPage(stack->buffer); |
279 | |
280 | Assert(!GinPageIsLeaf(page)); |
281 | Assert(!GinPageIsData(page)); |
282 | |
283 | if (btree->fullScan) |
284 | { |
285 | stack->off = FirstOffsetNumber; |
286 | stack->predictNumber *= PageGetMaxOffsetNumber(page); |
287 | return btree->getLeftMostChild(btree, page); |
288 | } |
289 | |
290 | low = FirstOffsetNumber; |
291 | maxoff = high = PageGetMaxOffsetNumber(page); |
292 | Assert(high >= low); |
293 | |
294 | high++; |
295 | |
296 | while (high > low) |
297 | { |
298 | OffsetNumber mid = low + ((high - low) / 2); |
299 | |
300 | if (mid == maxoff && GinPageRightMost(page)) |
301 | { |
302 | /* Right infinity */ |
303 | result = -1; |
304 | } |
305 | else |
306 | { |
307 | OffsetNumber attnum; |
308 | Datum key; |
309 | GinNullCategory category; |
310 | |
311 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid)); |
312 | attnum = gintuple_get_attrnum(btree->ginstate, itup); |
313 | key = gintuple_get_key(btree->ginstate, itup, &category); |
314 | result = ginCompareAttEntries(btree->ginstate, |
315 | btree->entryAttnum, |
316 | btree->entryKey, |
317 | btree->entryCategory, |
318 | attnum, key, category); |
319 | } |
320 | |
321 | if (result == 0) |
322 | { |
323 | stack->off = mid; |
324 | Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO); |
325 | return GinGetDownlink(itup); |
326 | } |
327 | else if (result > 0) |
328 | low = mid + 1; |
329 | else |
330 | high = mid; |
331 | } |
332 | |
333 | Assert(high >= FirstOffsetNumber && high <= maxoff); |
334 | |
335 | stack->off = high; |
336 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, high)); |
337 | Assert(GinGetDownlink(itup) != GIN_ROOT_BLKNO); |
338 | return GinGetDownlink(itup); |
339 | } |
340 | |
341 | /* |
342 | * Searches correct position for value on leaf page. |
343 | * Page should be correctly chosen. |
344 | * Returns true if value found on page. |
345 | */ |
346 | static bool |
347 | entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack) |
348 | { |
349 | Page page = BufferGetPage(stack->buffer); |
350 | OffsetNumber low, |
351 | high; |
352 | |
353 | Assert(GinPageIsLeaf(page)); |
354 | Assert(!GinPageIsData(page)); |
355 | |
356 | if (btree->fullScan) |
357 | { |
358 | stack->off = FirstOffsetNumber; |
359 | return true; |
360 | } |
361 | |
362 | low = FirstOffsetNumber; |
363 | high = PageGetMaxOffsetNumber(page); |
364 | |
365 | if (high < low) |
366 | { |
367 | stack->off = FirstOffsetNumber; |
368 | return false; |
369 | } |
370 | |
371 | high++; |
372 | |
373 | while (high > low) |
374 | { |
375 | OffsetNumber mid = low + ((high - low) / 2); |
376 | IndexTuple itup; |
377 | OffsetNumber attnum; |
378 | Datum key; |
379 | GinNullCategory category; |
380 | int result; |
381 | |
382 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, mid)); |
383 | attnum = gintuple_get_attrnum(btree->ginstate, itup); |
384 | key = gintuple_get_key(btree->ginstate, itup, &category); |
385 | result = ginCompareAttEntries(btree->ginstate, |
386 | btree->entryAttnum, |
387 | btree->entryKey, |
388 | btree->entryCategory, |
389 | attnum, key, category); |
390 | if (result == 0) |
391 | { |
392 | stack->off = mid; |
393 | return true; |
394 | } |
395 | else if (result > 0) |
396 | low = mid + 1; |
397 | else |
398 | high = mid; |
399 | } |
400 | |
401 | stack->off = high; |
402 | return false; |
403 | } |
404 | |
405 | static OffsetNumber |
406 | entryFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff) |
407 | { |
408 | OffsetNumber i, |
409 | maxoff = PageGetMaxOffsetNumber(page); |
410 | IndexTuple itup; |
411 | |
412 | Assert(!GinPageIsLeaf(page)); |
413 | Assert(!GinPageIsData(page)); |
414 | |
415 | /* if page isn't changed, we returns storedOff */ |
416 | if (storedOff >= FirstOffsetNumber && storedOff <= maxoff) |
417 | { |
418 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, storedOff)); |
419 | if (GinGetDownlink(itup) == blkno) |
420 | return storedOff; |
421 | |
422 | /* |
423 | * we hope, that needed pointer goes to right. It's true if there |
424 | * wasn't a deletion |
425 | */ |
426 | for (i = storedOff + 1; i <= maxoff; i++) |
427 | { |
428 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); |
429 | if (GinGetDownlink(itup) == blkno) |
430 | return i; |
431 | } |
432 | maxoff = storedOff - 1; |
433 | } |
434 | |
435 | /* last chance */ |
436 | for (i = FirstOffsetNumber; i <= maxoff; i++) |
437 | { |
438 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); |
439 | if (GinGetDownlink(itup) == blkno) |
440 | return i; |
441 | } |
442 | |
443 | return InvalidOffsetNumber; |
444 | } |
445 | |
446 | static BlockNumber |
447 | entryGetLeftMostPage(GinBtree btree, Page page) |
448 | { |
449 | IndexTuple itup; |
450 | |
451 | Assert(!GinPageIsLeaf(page)); |
452 | Assert(!GinPageIsData(page)); |
453 | Assert(PageGetMaxOffsetNumber(page) >= FirstOffsetNumber); |
454 | |
455 | itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, FirstOffsetNumber)); |
456 | return GinGetDownlink(itup); |
457 | } |
458 | |
459 | static bool |
460 | entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off, |
461 | GinBtreeEntryInsertData *insertData) |
462 | { |
463 | Size releasedsz = 0; |
464 | Size addedsz; |
465 | Page page = BufferGetPage(buf); |
466 | |
467 | Assert(insertData->entry); |
468 | Assert(!GinPageIsData(page)); |
469 | |
470 | if (insertData->isDelete) |
471 | { |
472 | IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); |
473 | |
474 | releasedsz = MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); |
475 | } |
476 | |
477 | addedsz = MAXALIGN(IndexTupleSize(insertData->entry)) + sizeof(ItemIdData); |
478 | |
479 | if (PageGetFreeSpace(page) + releasedsz >= addedsz) |
480 | return true; |
481 | |
482 | return false; |
483 | } |
484 | |
485 | /* |
486 | * Delete tuple on leaf page if tuples existed and we |
487 | * should update it, update old child blkno to new right page |
488 | * if child split occurred |
489 | */ |
490 | static void |
491 | entryPreparePage(GinBtree btree, Page page, OffsetNumber off, |
492 | GinBtreeEntryInsertData *insertData, BlockNumber updateblkno) |
493 | { |
494 | Assert(insertData->entry); |
495 | Assert(!GinPageIsData(page)); |
496 | |
497 | if (insertData->isDelete) |
498 | { |
499 | Assert(GinPageIsLeaf(page)); |
500 | PageIndexTupleDelete(page, off); |
501 | } |
502 | |
503 | if (!GinPageIsLeaf(page) && updateblkno != InvalidBlockNumber) |
504 | { |
505 | IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off)); |
506 | |
507 | GinSetDownlink(itup, updateblkno); |
508 | } |
509 | } |
510 | |
511 | /* |
512 | * Prepare to insert data on an entry page. |
513 | * |
514 | * If it will fit, return GPTP_INSERT after doing whatever setup is needed |
515 | * before we enter the insertion critical section. *ptp_workspace can be |
516 | * set to pass information along to the execPlaceToPage function. |
517 | * |
518 | * If it won't fit, perform a page split and return two temporary page |
519 | * images into *newlpage and *newrpage, with result GPTP_SPLIT. |
520 | * |
521 | * In neither case should the given page buffer be modified here. |
522 | * |
523 | * Note: on insertion to an internal node, in addition to inserting the given |
524 | * item, the downlink of the existing item at stack->off will be updated to |
525 | * point to updateblkno. |
526 | */ |
527 | static GinPlaceToPageRC |
528 | entryBeginPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, |
529 | void *insertPayload, BlockNumber updateblkno, |
530 | void **ptp_workspace, |
531 | Page *newlpage, Page *newrpage) |
532 | { |
533 | GinBtreeEntryInsertData *insertData = insertPayload; |
534 | OffsetNumber off = stack->off; |
535 | |
536 | /* If it doesn't fit, deal with split case */ |
537 | if (!entryIsEnoughSpace(btree, buf, off, insertData)) |
538 | { |
539 | entrySplitPage(btree, buf, stack, insertData, updateblkno, |
540 | newlpage, newrpage); |
541 | return GPTP_SPLIT; |
542 | } |
543 | |
544 | /* Else, we're ready to proceed with insertion */ |
545 | return GPTP_INSERT; |
546 | } |
547 | |
548 | /* |
549 | * Perform data insertion after beginPlaceToPage has decided it will fit. |
550 | * |
551 | * This is invoked within a critical section, and XLOG record creation (if |
552 | * needed) is already started. The target buffer is registered in slot 0. |
553 | */ |
554 | static void |
555 | entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack, |
556 | void *insertPayload, BlockNumber updateblkno, |
557 | void *ptp_workspace) |
558 | { |
559 | GinBtreeEntryInsertData *insertData = insertPayload; |
560 | Page page = BufferGetPage(buf); |
561 | OffsetNumber off = stack->off; |
562 | OffsetNumber placed; |
563 | |
564 | entryPreparePage(btree, page, off, insertData, updateblkno); |
565 | |
566 | placed = PageAddItem(page, |
567 | (Item) insertData->entry, |
568 | IndexTupleSize(insertData->entry), |
569 | off, false, false); |
570 | if (placed != off) |
571 | elog(ERROR, "failed to add item to index page in \"%s\"" , |
572 | RelationGetRelationName(btree->index)); |
573 | |
574 | if (RelationNeedsWAL(btree->index) && !btree->isBuild) |
575 | { |
576 | /* |
577 | * This must be static, because it has to survive until XLogInsert, |
578 | * and we can't palloc here. Ugly, but the XLogInsert infrastructure |
579 | * isn't reentrant anyway. |
580 | */ |
581 | static ginxlogInsertEntry data; |
582 | |
583 | data.isDelete = insertData->isDelete; |
584 | data.offset = off; |
585 | |
586 | XLogRegisterBufData(0, (char *) &data, |
587 | offsetof(ginxlogInsertEntry, tuple)); |
588 | XLogRegisterBufData(0, (char *) insertData->entry, |
589 | IndexTupleSize(insertData->entry)); |
590 | } |
591 | } |
592 | |
593 | /* |
594 | * Split entry page and insert new data. |
595 | * |
596 | * Returns new temp pages to *newlpage and *newrpage. |
597 | * The original buffer is left untouched. |
598 | */ |
599 | static void |
600 | entrySplitPage(GinBtree btree, Buffer origbuf, |
601 | GinBtreeStack *stack, |
602 | GinBtreeEntryInsertData *insertData, |
603 | BlockNumber updateblkno, |
604 | Page *newlpage, Page *newrpage) |
605 | { |
606 | OffsetNumber off = stack->off; |
607 | OffsetNumber i, |
608 | maxoff, |
609 | separator = InvalidOffsetNumber; |
610 | Size totalsize = 0; |
611 | Size lsize = 0, |
612 | size; |
613 | char *ptr; |
614 | IndexTuple itup; |
615 | Page page; |
616 | Page lpage = PageGetTempPageCopy(BufferGetPage(origbuf)); |
617 | Page rpage = PageGetTempPageCopy(BufferGetPage(origbuf)); |
618 | Size pageSize = PageGetPageSize(lpage); |
619 | PGAlignedBlock tupstore[2]; /* could need 2 pages' worth of tuples */ |
620 | |
621 | entryPreparePage(btree, lpage, off, insertData, updateblkno); |
622 | |
623 | /* |
624 | * First, append all the existing tuples and the new tuple we're inserting |
625 | * one after another in a temporary workspace. |
626 | */ |
627 | maxoff = PageGetMaxOffsetNumber(lpage); |
628 | ptr = tupstore[0].data; |
629 | for (i = FirstOffsetNumber; i <= maxoff; i++) |
630 | { |
631 | if (i == off) |
632 | { |
633 | size = MAXALIGN(IndexTupleSize(insertData->entry)); |
634 | memcpy(ptr, insertData->entry, size); |
635 | ptr += size; |
636 | totalsize += size + sizeof(ItemIdData); |
637 | } |
638 | |
639 | itup = (IndexTuple) PageGetItem(lpage, PageGetItemId(lpage, i)); |
640 | size = MAXALIGN(IndexTupleSize(itup)); |
641 | memcpy(ptr, itup, size); |
642 | ptr += size; |
643 | totalsize += size + sizeof(ItemIdData); |
644 | } |
645 | |
646 | if (off == maxoff + 1) |
647 | { |
648 | size = MAXALIGN(IndexTupleSize(insertData->entry)); |
649 | memcpy(ptr, insertData->entry, size); |
650 | ptr += size; |
651 | totalsize += size + sizeof(ItemIdData); |
652 | } |
653 | |
654 | /* |
655 | * Initialize the left and right pages, and copy all the tuples back to |
656 | * them. |
657 | */ |
658 | GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize); |
659 | GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize); |
660 | |
661 | ptr = tupstore[0].data; |
662 | maxoff++; |
663 | lsize = 0; |
664 | |
665 | page = lpage; |
666 | for (i = FirstOffsetNumber; i <= maxoff; i++) |
667 | { |
668 | itup = (IndexTuple) ptr; |
669 | |
670 | /* |
671 | * Decide where to split. We try to equalize the pages' total data |
672 | * size, not number of tuples. |
673 | */ |
674 | if (lsize > totalsize / 2) |
675 | { |
676 | if (separator == InvalidOffsetNumber) |
677 | separator = i - 1; |
678 | page = rpage; |
679 | } |
680 | else |
681 | { |
682 | lsize += MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData); |
683 | } |
684 | |
685 | if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber) |
686 | elog(ERROR, "failed to add item to index page in \"%s\"" , |
687 | RelationGetRelationName(btree->index)); |
688 | ptr += MAXALIGN(IndexTupleSize(itup)); |
689 | } |
690 | |
691 | /* return temp pages to caller */ |
692 | *newlpage = lpage; |
693 | *newrpage = rpage; |
694 | } |
695 | |
696 | /* |
697 | * Construct insertion payload for inserting the downlink for given buffer. |
698 | */ |
699 | static void * |
700 | entryPrepareDownlink(GinBtree btree, Buffer lbuf) |
701 | { |
702 | GinBtreeEntryInsertData *insertData; |
703 | Page lpage = BufferGetPage(lbuf); |
704 | BlockNumber lblkno = BufferGetBlockNumber(lbuf); |
705 | IndexTuple itup; |
706 | |
707 | itup = getRightMostTuple(lpage); |
708 | |
709 | insertData = palloc(sizeof(GinBtreeEntryInsertData)); |
710 | insertData->entry = GinFormInteriorTuple(itup, lpage, lblkno); |
711 | insertData->isDelete = false; |
712 | |
713 | return insertData; |
714 | } |
715 | |
716 | /* |
717 | * Fills new root by rightest values from child. |
718 | * Also called from ginxlog, should not use btree |
719 | */ |
720 | void |
721 | ginEntryFillRoot(GinBtree btree, Page root, |
722 | BlockNumber lblkno, Page lpage, |
723 | BlockNumber rblkno, Page rpage) |
724 | { |
725 | IndexTuple itup; |
726 | |
727 | itup = GinFormInteriorTuple(getRightMostTuple(lpage), lpage, lblkno); |
728 | if (PageAddItem(root, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber) |
729 | elog(ERROR, "failed to add item to index root page" ); |
730 | pfree(itup); |
731 | |
732 | itup = GinFormInteriorTuple(getRightMostTuple(rpage), rpage, rblkno); |
733 | if (PageAddItem(root, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber) |
734 | elog(ERROR, "failed to add item to index root page" ); |
735 | pfree(itup); |
736 | } |
737 | |
738 | /* |
739 | * Set up GinBtree for entry page access |
740 | * |
741 | * Note: during WAL recovery, there may be no valid data in ginstate |
742 | * other than a faked-up Relation pointer; the key datum is bogus too. |
743 | */ |
744 | void |
745 | ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum, |
746 | Datum key, GinNullCategory category, |
747 | GinState *ginstate) |
748 | { |
749 | memset(btree, 0, sizeof(GinBtreeData)); |
750 | |
751 | btree->index = ginstate->index; |
752 | btree->rootBlkno = GIN_ROOT_BLKNO; |
753 | btree->ginstate = ginstate; |
754 | |
755 | btree->findChildPage = entryLocateEntry; |
756 | btree->getLeftMostChild = entryGetLeftMostPage; |
757 | btree->isMoveRight = entryIsMoveRight; |
758 | btree->findItem = entryLocateLeafEntry; |
759 | btree->findChildPtr = entryFindChildPtr; |
760 | btree->beginPlaceToPage = entryBeginPlaceToPage; |
761 | btree->execPlaceToPage = entryExecPlaceToPage; |
762 | btree->fillRoot = ginEntryFillRoot; |
763 | btree->prepareDownlink = entryPrepareDownlink; |
764 | |
765 | btree->isData = false; |
766 | btree->fullScan = false; |
767 | btree->isBuild = false; |
768 | |
769 | btree->entryAttnum = attnum; |
770 | btree->entryKey = key; |
771 | btree->entryCategory = category; |
772 | } |
773 | |