1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * gistbuild.c |
4 | * build algorithm for GiST indexes implementation. |
5 | * |
6 | * |
7 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
8 | * Portions Copyright (c) 1994, Regents of the University of California |
9 | * |
10 | * IDENTIFICATION |
11 | * src/backend/access/gist/gistbuild.c |
12 | * |
13 | *------------------------------------------------------------------------- |
14 | */ |
15 | #include "postgres.h" |
16 | |
17 | #include <math.h> |
18 | |
19 | #include "access/genam.h" |
20 | #include "access/gist_private.h" |
21 | #include "access/gistxlog.h" |
22 | #include "access/tableam.h" |
23 | #include "access/xloginsert.h" |
24 | #include "catalog/index.h" |
25 | #include "miscadmin.h" |
26 | #include "optimizer/optimizer.h" |
27 | #include "storage/bufmgr.h" |
28 | #include "storage/smgr.h" |
29 | #include "utils/memutils.h" |
30 | #include "utils/rel.h" |
31 | |
32 | /* Step of index tuples for check whether to switch to buffering build mode */ |
33 | #define BUFFERING_MODE_SWITCH_CHECK_STEP 256 |
34 | |
35 | /* |
36 | * Number of tuples to process in the slow way before switching to buffering |
37 | * mode, when buffering is explicitly turned on. Also, the number of tuples |
38 | * to process between readjusting the buffer size parameter, while in |
39 | * buffering mode. |
40 | */ |
41 | #define BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET 4096 |
42 | |
43 | typedef enum |
44 | { |
45 | GIST_BUFFERING_DISABLED, /* in regular build mode and aren't going to |
46 | * switch */ |
47 | GIST_BUFFERING_AUTO, /* in regular build mode, but will switch to |
48 | * buffering build mode if the index grows too |
49 | * big */ |
50 | GIST_BUFFERING_STATS, /* gathering statistics of index tuple size |
51 | * before switching to the buffering build |
52 | * mode */ |
53 | GIST_BUFFERING_ACTIVE /* in buffering build mode */ |
54 | } GistBufferingMode; |
55 | |
56 | /* Working state for gistbuild and its callback */ |
57 | typedef struct |
58 | { |
59 | Relation indexrel; |
60 | Relation heaprel; |
61 | GISTSTATE *giststate; |
62 | |
63 | int64 indtuples; /* number of tuples indexed */ |
64 | int64 indtuplesSize; /* total size of all indexed tuples */ |
65 | |
66 | Size freespace; /* amount of free space to leave on pages */ |
67 | |
68 | /* |
69 | * Extra data structures used during a buffering build. 'gfbb' contains |
70 | * information related to managing the build buffers. 'parentMap' is a |
71 | * lookup table of the parent of each internal page. |
72 | */ |
73 | GISTBuildBuffers *gfbb; |
74 | HTAB *parentMap; |
75 | |
76 | GistBufferingMode bufferingMode; |
77 | } GISTBuildState; |
78 | |
79 | /* prototypes for private functions */ |
80 | static void gistInitBuffering(GISTBuildState *buildstate); |
81 | static int calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep); |
82 | static void gistBuildCallback(Relation index, |
83 | HeapTuple htup, |
84 | Datum *values, |
85 | bool *isnull, |
86 | bool tupleIsAlive, |
87 | void *state); |
88 | static void gistBufferingBuildInsert(GISTBuildState *buildstate, |
89 | IndexTuple itup); |
90 | static bool gistProcessItup(GISTBuildState *buildstate, IndexTuple itup, |
91 | BlockNumber startblkno, int startlevel); |
92 | static BlockNumber gistbufferinginserttuples(GISTBuildState *buildstate, |
93 | Buffer buffer, int level, |
94 | IndexTuple *itup, int ntup, OffsetNumber oldoffnum, |
95 | BlockNumber parentblk, OffsetNumber downlinkoffnum); |
96 | static Buffer gistBufferingFindCorrectParent(GISTBuildState *buildstate, |
97 | BlockNumber childblkno, int level, |
98 | BlockNumber *parentblk, |
99 | OffsetNumber *downlinkoffnum); |
100 | static void gistProcessEmptyingQueue(GISTBuildState *buildstate); |
101 | static void gistEmptyAllBuffers(GISTBuildState *buildstate); |
102 | static int gistGetMaxLevel(Relation index); |
103 | |
104 | static void gistInitParentMap(GISTBuildState *buildstate); |
105 | static void gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child, |
106 | BlockNumber parent); |
107 | static void gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parent); |
108 | static BlockNumber gistGetParent(GISTBuildState *buildstate, BlockNumber child); |
109 | |
110 | /* |
111 | * Main entry point to GiST index build. Initially calls insert over and over, |
112 | * but switches to more efficient buffering build algorithm after a certain |
113 | * number of tuples (unless buffering mode is disabled). |
114 | */ |
115 | IndexBuildResult * |
116 | gistbuild(Relation heap, Relation index, IndexInfo *indexInfo) |
117 | { |
118 | IndexBuildResult *result; |
119 | double reltuples; |
120 | GISTBuildState buildstate; |
121 | Buffer buffer; |
122 | Page page; |
123 | MemoryContext oldcxt = CurrentMemoryContext; |
124 | int fillfactor; |
125 | |
126 | buildstate.indexrel = index; |
127 | buildstate.heaprel = heap; |
128 | if (index->rd_options) |
129 | { |
130 | /* Get buffering mode from the options string */ |
131 | GiSTOptions *options = (GiSTOptions *) index->rd_options; |
132 | char *bufferingMode = (char *) options + options->bufferingModeOffset; |
133 | |
134 | if (strcmp(bufferingMode, "on" ) == 0) |
135 | buildstate.bufferingMode = GIST_BUFFERING_STATS; |
136 | else if (strcmp(bufferingMode, "off" ) == 0) |
137 | buildstate.bufferingMode = GIST_BUFFERING_DISABLED; |
138 | else |
139 | buildstate.bufferingMode = GIST_BUFFERING_AUTO; |
140 | |
141 | fillfactor = options->fillfactor; |
142 | } |
143 | else |
144 | { |
145 | /* |
146 | * By default, switch to buffering mode when the index grows too large |
147 | * to fit in cache. |
148 | */ |
149 | buildstate.bufferingMode = GIST_BUFFERING_AUTO; |
150 | fillfactor = GIST_DEFAULT_FILLFACTOR; |
151 | } |
152 | /* Calculate target amount of free space to leave on pages */ |
153 | buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100; |
154 | |
155 | /* |
156 | * We expect to be called exactly once for any index relation. If that's |
157 | * not the case, big trouble's what we have. |
158 | */ |
159 | if (RelationGetNumberOfBlocks(index) != 0) |
160 | elog(ERROR, "index \"%s\" already contains data" , |
161 | RelationGetRelationName(index)); |
162 | |
163 | /* no locking is needed */ |
164 | buildstate.giststate = initGISTstate(index); |
165 | |
166 | /* |
167 | * Create a temporary memory context that is reset once for each tuple |
168 | * processed. (Note: we don't bother to make this a child of the |
169 | * giststate's scanCxt, so we have to delete it separately at the end.) |
170 | */ |
171 | buildstate.giststate->tempCxt = createTempGistContext(); |
172 | |
173 | /* initialize the root page */ |
174 | buffer = gistNewBuffer(index); |
175 | Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO); |
176 | page = BufferGetPage(buffer); |
177 | |
178 | START_CRIT_SECTION(); |
179 | |
180 | GISTInitBuffer(buffer, F_LEAF); |
181 | |
182 | MarkBufferDirty(buffer); |
183 | PageSetLSN(page, GistBuildLSN); |
184 | |
185 | UnlockReleaseBuffer(buffer); |
186 | |
187 | END_CRIT_SECTION(); |
188 | |
189 | /* build the index */ |
190 | buildstate.indtuples = 0; |
191 | buildstate.indtuplesSize = 0; |
192 | |
193 | /* |
194 | * Do the heap scan. |
195 | */ |
196 | reltuples = table_index_build_scan(heap, index, indexInfo, true, true, |
197 | gistBuildCallback, |
198 | (void *) &buildstate, NULL); |
199 | |
200 | /* |
201 | * If buffering was used, flush out all the tuples that are still in the |
202 | * buffers. |
203 | */ |
204 | if (buildstate.bufferingMode == GIST_BUFFERING_ACTIVE) |
205 | { |
206 | elog(DEBUG1, "all tuples processed, emptying buffers" ); |
207 | gistEmptyAllBuffers(&buildstate); |
208 | gistFreeBuildBuffers(buildstate.gfbb); |
209 | } |
210 | |
211 | /* okay, all heap tuples are indexed */ |
212 | MemoryContextSwitchTo(oldcxt); |
213 | MemoryContextDelete(buildstate.giststate->tempCxt); |
214 | |
215 | freeGISTstate(buildstate.giststate); |
216 | |
217 | /* |
218 | * We didn't write WAL records as we built the index, so if WAL-logging is |
219 | * required, write all pages to the WAL now. |
220 | */ |
221 | if (RelationNeedsWAL(index)) |
222 | { |
223 | log_newpage_range(index, MAIN_FORKNUM, |
224 | 0, RelationGetNumberOfBlocks(index), |
225 | true); |
226 | } |
227 | |
228 | /* |
229 | * Return statistics |
230 | */ |
231 | result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); |
232 | |
233 | result->heap_tuples = reltuples; |
234 | result->index_tuples = (double) buildstate.indtuples; |
235 | |
236 | return result; |
237 | } |
238 | |
239 | /* |
240 | * Validator for "buffering" reloption on GiST indexes. Allows "on", "off" |
241 | * and "auto" values. |
242 | */ |
243 | void |
244 | gistValidateBufferingOption(const char *value) |
245 | { |
246 | if (value == NULL || |
247 | (strcmp(value, "on" ) != 0 && |
248 | strcmp(value, "off" ) != 0 && |
249 | strcmp(value, "auto" ) != 0)) |
250 | { |
251 | ereport(ERROR, |
252 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
253 | errmsg("invalid value for \"buffering\" option" ), |
254 | errdetail("Valid values are \"on\", \"off\", and \"auto\"." ))); |
255 | } |
256 | } |
257 | |
258 | /* |
259 | * Attempt to switch to buffering mode. |
260 | * |
261 | * If there is not enough memory for buffering build, sets bufferingMode |
262 | * to GIST_BUFFERING_DISABLED, so that we don't bother to try the switch |
263 | * anymore. Otherwise initializes the build buffers, and sets bufferingMode to |
264 | * GIST_BUFFERING_ACTIVE. |
265 | */ |
266 | static void |
267 | gistInitBuffering(GISTBuildState *buildstate) |
268 | { |
269 | Relation index = buildstate->indexrel; |
270 | int pagesPerBuffer; |
271 | Size pageFreeSpace; |
272 | Size itupAvgSize, |
273 | itupMinSize; |
274 | double avgIndexTuplesPerPage, |
275 | maxIndexTuplesPerPage; |
276 | int i; |
277 | int levelStep; |
278 | |
279 | /* Calc space of index page which is available for index tuples */ |
280 | pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData) |
281 | - sizeof(ItemIdData) |
282 | - buildstate->freespace; |
283 | |
284 | /* |
285 | * Calculate average size of already inserted index tuples using gathered |
286 | * statistics. |
287 | */ |
288 | itupAvgSize = (double) buildstate->indtuplesSize / |
289 | (double) buildstate->indtuples; |
290 | |
291 | /* |
292 | * Calculate minimal possible size of index tuple by index metadata. |
293 | * Minimal possible size of varlena is VARHDRSZ. |
294 | * |
295 | * XXX: that's not actually true, as a short varlen can be just 2 bytes. |
296 | * And we should take padding into account here. |
297 | */ |
298 | itupMinSize = (Size) MAXALIGN(sizeof(IndexTupleData)); |
299 | for (i = 0; i < index->rd_att->natts; i++) |
300 | { |
301 | if (TupleDescAttr(index->rd_att, i)->attlen < 0) |
302 | itupMinSize += VARHDRSZ; |
303 | else |
304 | itupMinSize += TupleDescAttr(index->rd_att, i)->attlen; |
305 | } |
306 | |
307 | /* Calculate average and maximal number of index tuples which fit to page */ |
308 | avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize; |
309 | maxIndexTuplesPerPage = pageFreeSpace / itupMinSize; |
310 | |
311 | /* |
312 | * We need to calculate two parameters for the buffering algorithm: |
313 | * levelStep and pagesPerBuffer. |
314 | * |
315 | * levelStep determines the size of subtree that we operate on, while |
316 | * emptying a buffer. A higher value is better, as you need fewer buffer |
317 | * emptying steps to build the index. However, if you set it too high, the |
318 | * subtree doesn't fit in cache anymore, and you quickly lose the benefit |
319 | * of the buffers. |
320 | * |
321 | * In Arge et al's paper, levelStep is chosen as logB(M/4B), where B is |
322 | * the number of tuples on page (ie. fanout), and M is the amount of |
323 | * internal memory available. Curiously, they doesn't explain *why* that |
324 | * setting is optimal. We calculate it by taking the highest levelStep so |
325 | * that a subtree still fits in cache. For a small B, our way of |
326 | * calculating levelStep is very close to Arge et al's formula. For a |
327 | * large B, our formula gives a value that is 2x higher. |
328 | * |
329 | * The average size (in pages) of a subtree of depth n can be calculated |
330 | * as a geometric series: |
331 | * |
332 | * B^0 + B^1 + B^2 + ... + B^n = (1 - B^(n + 1)) / (1 - B) |
333 | * |
334 | * where B is the average number of index tuples on page. The subtree is |
335 | * cached in the shared buffer cache and the OS cache, so we choose |
336 | * levelStep so that the subtree size is comfortably smaller than |
337 | * effective_cache_size, with a safety factor of 4. |
338 | * |
339 | * The estimate on the average number of index tuples on page is based on |
340 | * average tuple sizes observed before switching to buffered build, so the |
341 | * real subtree size can be somewhat larger. Also, it would selfish to |
342 | * gobble the whole cache for our index build. The safety factor of 4 |
343 | * should account for those effects. |
344 | * |
345 | * The other limiting factor for setting levelStep is that while |
346 | * processing a subtree, we need to hold one page for each buffer at the |
347 | * next lower buffered level. The max. number of buffers needed for that |
348 | * is maxIndexTuplesPerPage^levelStep. This is very conservative, but |
349 | * hopefully maintenance_work_mem is set high enough that you're |
350 | * constrained by effective_cache_size rather than maintenance_work_mem. |
351 | * |
352 | * XXX: the buffer hash table consumes a fair amount of memory too per |
353 | * buffer, but that is not currently taken into account. That scales on |
354 | * the total number of buffers used, ie. the index size and on levelStep. |
355 | * Note that a higher levelStep *reduces* the amount of memory needed for |
356 | * the hash table. |
357 | */ |
358 | levelStep = 1; |
359 | for (;;) |
360 | { |
361 | double subtreesize; |
362 | double maxlowestlevelpages; |
363 | |
364 | /* size of an average subtree at this levelStep (in pages). */ |
365 | subtreesize = |
366 | (1 - pow(avgIndexTuplesPerPage, (double) (levelStep + 1))) / |
367 | (1 - avgIndexTuplesPerPage); |
368 | |
369 | /* max number of pages at the lowest level of a subtree */ |
370 | maxlowestlevelpages = pow(maxIndexTuplesPerPage, (double) levelStep); |
371 | |
372 | /* subtree must fit in cache (with safety factor of 4) */ |
373 | if (subtreesize > effective_cache_size / 4) |
374 | break; |
375 | |
376 | /* each node in the lowest level of a subtree has one page in memory */ |
377 | if (maxlowestlevelpages > ((double) maintenance_work_mem * 1024) / BLCKSZ) |
378 | break; |
379 | |
380 | /* Good, we can handle this levelStep. See if we can go one higher. */ |
381 | levelStep++; |
382 | } |
383 | |
384 | /* |
385 | * We just reached an unacceptable value of levelStep in previous loop. |
386 | * So, decrease levelStep to get last acceptable value. |
387 | */ |
388 | levelStep--; |
389 | |
390 | /* |
391 | * If there's not enough cache or maintenance_work_mem, fall back to plain |
392 | * inserts. |
393 | */ |
394 | if (levelStep <= 0) |
395 | { |
396 | elog(DEBUG1, "failed to switch to buffered GiST build" ); |
397 | buildstate->bufferingMode = GIST_BUFFERING_DISABLED; |
398 | return; |
399 | } |
400 | |
401 | /* |
402 | * The second parameter to set is pagesPerBuffer, which determines the |
403 | * size of each buffer. We adjust pagesPerBuffer also during the build, |
404 | * which is why this calculation is in a separate function. |
405 | */ |
406 | pagesPerBuffer = calculatePagesPerBuffer(buildstate, levelStep); |
407 | |
408 | /* Initialize GISTBuildBuffers with these parameters */ |
409 | buildstate->gfbb = gistInitBuildBuffers(pagesPerBuffer, levelStep, |
410 | gistGetMaxLevel(index)); |
411 | |
412 | gistInitParentMap(buildstate); |
413 | |
414 | buildstate->bufferingMode = GIST_BUFFERING_ACTIVE; |
415 | |
416 | elog(DEBUG1, "switched to buffered GiST build; level step = %d, pagesPerBuffer = %d" , |
417 | levelStep, pagesPerBuffer); |
418 | } |
419 | |
420 | /* |
421 | * Calculate pagesPerBuffer parameter for the buffering algorithm. |
422 | * |
423 | * Buffer size is chosen so that assuming that tuples are distributed |
424 | * randomly, emptying half a buffer fills on average one page in every buffer |
425 | * at the next lower level. |
426 | */ |
427 | static int |
428 | calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep) |
429 | { |
430 | double pagesPerBuffer; |
431 | double avgIndexTuplesPerPage; |
432 | double itupAvgSize; |
433 | Size pageFreeSpace; |
434 | |
435 | /* Calc space of index page which is available for index tuples */ |
436 | pageFreeSpace = BLCKSZ - SizeOfPageHeaderData - sizeof(GISTPageOpaqueData) |
437 | - sizeof(ItemIdData) |
438 | - buildstate->freespace; |
439 | |
440 | /* |
441 | * Calculate average size of already inserted index tuples using gathered |
442 | * statistics. |
443 | */ |
444 | itupAvgSize = (double) buildstate->indtuplesSize / |
445 | (double) buildstate->indtuples; |
446 | |
447 | avgIndexTuplesPerPage = pageFreeSpace / itupAvgSize; |
448 | |
449 | /* |
450 | * Recalculate required size of buffers. |
451 | */ |
452 | pagesPerBuffer = 2 * pow(avgIndexTuplesPerPage, levelStep); |
453 | |
454 | return (int) rint(pagesPerBuffer); |
455 | } |
456 | |
457 | /* |
458 | * Per-tuple callback for table_index_build_scan. |
459 | */ |
460 | static void |
461 | gistBuildCallback(Relation index, |
462 | HeapTuple htup, |
463 | Datum *values, |
464 | bool *isnull, |
465 | bool tupleIsAlive, |
466 | void *state) |
467 | { |
468 | GISTBuildState *buildstate = (GISTBuildState *) state; |
469 | IndexTuple itup; |
470 | MemoryContext oldCtx; |
471 | |
472 | oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt); |
473 | |
474 | /* form an index tuple and point it at the heap tuple */ |
475 | itup = gistFormTuple(buildstate->giststate, index, values, isnull, true); |
476 | itup->t_tid = htup->t_self; |
477 | |
478 | if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE) |
479 | { |
480 | /* We have buffers, so use them. */ |
481 | gistBufferingBuildInsert(buildstate, itup); |
482 | } |
483 | else |
484 | { |
485 | /* |
486 | * There's no buffers (yet). Since we already have the index relation |
487 | * locked, we call gistdoinsert directly. |
488 | */ |
489 | gistdoinsert(index, itup, buildstate->freespace, |
490 | buildstate->giststate, buildstate->heaprel, true); |
491 | } |
492 | |
493 | /* Update tuple count and total size. */ |
494 | buildstate->indtuples += 1; |
495 | buildstate->indtuplesSize += IndexTupleSize(itup); |
496 | |
497 | MemoryContextSwitchTo(oldCtx); |
498 | MemoryContextReset(buildstate->giststate->tempCxt); |
499 | |
500 | if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE && |
501 | buildstate->indtuples % BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET == 0) |
502 | { |
503 | /* Adjust the target buffer size now */ |
504 | buildstate->gfbb->pagesPerBuffer = |
505 | calculatePagesPerBuffer(buildstate, buildstate->gfbb->levelStep); |
506 | } |
507 | |
508 | /* |
509 | * In 'auto' mode, check if the index has grown too large to fit in cache, |
510 | * and switch to buffering mode if it has. |
511 | * |
512 | * To avoid excessive calls to smgrnblocks(), only check this every |
513 | * BUFFERING_MODE_SWITCH_CHECK_STEP index tuples |
514 | */ |
515 | if ((buildstate->bufferingMode == GIST_BUFFERING_AUTO && |
516 | buildstate->indtuples % BUFFERING_MODE_SWITCH_CHECK_STEP == 0 && |
517 | effective_cache_size < smgrnblocks(index->rd_smgr, MAIN_FORKNUM)) || |
518 | (buildstate->bufferingMode == GIST_BUFFERING_STATS && |
519 | buildstate->indtuples >= BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET)) |
520 | { |
521 | /* |
522 | * Index doesn't fit in effective cache anymore. Try to switch to |
523 | * buffering build mode. |
524 | */ |
525 | gistInitBuffering(buildstate); |
526 | } |
527 | } |
528 | |
529 | /* |
530 | * Insert function for buffering index build. |
531 | */ |
532 | static void |
533 | gistBufferingBuildInsert(GISTBuildState *buildstate, IndexTuple itup) |
534 | { |
535 | /* Insert the tuple to buffers. */ |
536 | gistProcessItup(buildstate, itup, 0, buildstate->gfbb->rootlevel); |
537 | |
538 | /* If we filled up (half of a) buffer, process buffer emptying. */ |
539 | gistProcessEmptyingQueue(buildstate); |
540 | } |
541 | |
542 | /* |
543 | * Process an index tuple. Runs the tuple down the tree until we reach a leaf |
544 | * page or node buffer, and inserts the tuple there. Returns true if we have |
545 | * to stop buffer emptying process (because one of child buffers can't take |
546 | * index tuples anymore). |
547 | */ |
548 | static bool |
549 | gistProcessItup(GISTBuildState *buildstate, IndexTuple itup, |
550 | BlockNumber startblkno, int startlevel) |
551 | { |
552 | GISTSTATE *giststate = buildstate->giststate; |
553 | GISTBuildBuffers *gfbb = buildstate->gfbb; |
554 | Relation indexrel = buildstate->indexrel; |
555 | BlockNumber childblkno; |
556 | Buffer buffer; |
557 | bool result = false; |
558 | BlockNumber blkno; |
559 | int level; |
560 | OffsetNumber downlinkoffnum = InvalidOffsetNumber; |
561 | BlockNumber parentblkno = InvalidBlockNumber; |
562 | |
563 | CHECK_FOR_INTERRUPTS(); |
564 | |
565 | /* |
566 | * Loop until we reach a leaf page (level == 0) or a level with buffers |
567 | * (not including the level we start at, because we would otherwise make |
568 | * no progress). |
569 | */ |
570 | blkno = startblkno; |
571 | level = startlevel; |
572 | for (;;) |
573 | { |
574 | ItemId iid; |
575 | IndexTuple idxtuple, |
576 | newtup; |
577 | Page page; |
578 | OffsetNumber childoffnum; |
579 | |
580 | /* Have we reached a level with buffers? */ |
581 | if (LEVEL_HAS_BUFFERS(level, gfbb) && level != startlevel) |
582 | break; |
583 | |
584 | /* Have we reached a leaf page? */ |
585 | if (level == 0) |
586 | break; |
587 | |
588 | /* |
589 | * Nope. Descend down to the next level then. Choose a child to |
590 | * descend down to. |
591 | */ |
592 | |
593 | buffer = ReadBuffer(indexrel, blkno); |
594 | LockBuffer(buffer, GIST_EXCLUSIVE); |
595 | |
596 | page = (Page) BufferGetPage(buffer); |
597 | childoffnum = gistchoose(indexrel, page, itup, giststate); |
598 | iid = PageGetItemId(page, childoffnum); |
599 | idxtuple = (IndexTuple) PageGetItem(page, iid); |
600 | childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); |
601 | |
602 | if (level > 1) |
603 | gistMemorizeParent(buildstate, childblkno, blkno); |
604 | |
605 | /* |
606 | * Check that the key representing the target child node is consistent |
607 | * with the key we're inserting. Update it if it's not. |
608 | */ |
609 | newtup = gistgetadjusted(indexrel, idxtuple, itup, giststate); |
610 | if (newtup) |
611 | { |
612 | blkno = gistbufferinginserttuples(buildstate, |
613 | buffer, |
614 | level, |
615 | &newtup, |
616 | 1, |
617 | childoffnum, |
618 | InvalidBlockNumber, |
619 | InvalidOffsetNumber); |
620 | /* gistbufferinginserttuples() released the buffer */ |
621 | } |
622 | else |
623 | UnlockReleaseBuffer(buffer); |
624 | |
625 | /* Descend to the child */ |
626 | parentblkno = blkno; |
627 | blkno = childblkno; |
628 | downlinkoffnum = childoffnum; |
629 | Assert(level > 0); |
630 | level--; |
631 | } |
632 | |
633 | if (LEVEL_HAS_BUFFERS(level, gfbb)) |
634 | { |
635 | /* |
636 | * We've reached level with buffers. Place the index tuple to the |
637 | * buffer, and add the buffer to the emptying queue if it overflows. |
638 | */ |
639 | GISTNodeBuffer *childNodeBuffer; |
640 | |
641 | /* Find the buffer or create a new one */ |
642 | childNodeBuffer = gistGetNodeBuffer(gfbb, giststate, blkno, level); |
643 | |
644 | /* Add index tuple to it */ |
645 | gistPushItupToNodeBuffer(gfbb, childNodeBuffer, itup); |
646 | |
647 | if (BUFFER_OVERFLOWED(childNodeBuffer, gfbb)) |
648 | result = true; |
649 | } |
650 | else |
651 | { |
652 | /* |
653 | * We've reached a leaf page. Place the tuple here. |
654 | */ |
655 | Assert(level == 0); |
656 | buffer = ReadBuffer(indexrel, blkno); |
657 | LockBuffer(buffer, GIST_EXCLUSIVE); |
658 | gistbufferinginserttuples(buildstate, buffer, level, |
659 | &itup, 1, InvalidOffsetNumber, |
660 | parentblkno, downlinkoffnum); |
661 | /* gistbufferinginserttuples() released the buffer */ |
662 | } |
663 | |
664 | return result; |
665 | } |
666 | |
667 | /* |
668 | * Insert tuples to a given page. |
669 | * |
670 | * This is analogous with gistinserttuples() in the regular insertion code. |
671 | * |
672 | * Returns the block number of the page where the (first) new or updated tuple |
673 | * was inserted. Usually that's the original page, but might be a sibling page |
674 | * if the original page was split. |
675 | * |
676 | * Caller should hold a lock on 'buffer' on entry. This function will unlock |
677 | * and unpin it. |
678 | */ |
679 | static BlockNumber |
680 | gistbufferinginserttuples(GISTBuildState *buildstate, Buffer buffer, int level, |
681 | IndexTuple *itup, int ntup, OffsetNumber oldoffnum, |
682 | BlockNumber parentblk, OffsetNumber downlinkoffnum) |
683 | { |
684 | GISTBuildBuffers *gfbb = buildstate->gfbb; |
685 | List *splitinfo; |
686 | bool is_split; |
687 | BlockNumber placed_to_blk = InvalidBlockNumber; |
688 | |
689 | is_split = gistplacetopage(buildstate->indexrel, |
690 | buildstate->freespace, |
691 | buildstate->giststate, |
692 | buffer, |
693 | itup, ntup, oldoffnum, &placed_to_blk, |
694 | InvalidBuffer, |
695 | &splitinfo, |
696 | false, |
697 | buildstate->heaprel, true); |
698 | |
699 | /* |
700 | * If this is a root split, update the root path item kept in memory. This |
701 | * ensures that all path stacks are always complete, including all parent |
702 | * nodes up to the root. That simplifies the algorithm to re-find correct |
703 | * parent. |
704 | */ |
705 | if (is_split && BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO) |
706 | { |
707 | Page page = BufferGetPage(buffer); |
708 | OffsetNumber off; |
709 | OffsetNumber maxoff; |
710 | |
711 | Assert(level == gfbb->rootlevel); |
712 | gfbb->rootlevel++; |
713 | |
714 | elog(DEBUG2, "splitting GiST root page, now %d levels deep" , gfbb->rootlevel); |
715 | |
716 | /* |
717 | * All the downlinks on the old root page are now on one of the child |
718 | * pages. Visit all the new child pages to memorize the parents of the |
719 | * grandchildren. |
720 | */ |
721 | if (gfbb->rootlevel > 1) |
722 | { |
723 | maxoff = PageGetMaxOffsetNumber(page); |
724 | for (off = FirstOffsetNumber; off <= maxoff; off++) |
725 | { |
726 | ItemId iid = PageGetItemId(page, off); |
727 | IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); |
728 | BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); |
729 | Buffer childbuf = ReadBuffer(buildstate->indexrel, childblkno); |
730 | |
731 | LockBuffer(childbuf, GIST_SHARE); |
732 | gistMemorizeAllDownlinks(buildstate, childbuf); |
733 | UnlockReleaseBuffer(childbuf); |
734 | |
735 | /* |
736 | * Also remember that the parent of the new child page is the |
737 | * root block. |
738 | */ |
739 | gistMemorizeParent(buildstate, childblkno, GIST_ROOT_BLKNO); |
740 | } |
741 | } |
742 | } |
743 | |
744 | if (splitinfo) |
745 | { |
746 | /* |
747 | * Insert the downlinks to the parent. This is analogous with |
748 | * gistfinishsplit() in the regular insertion code, but the locking is |
749 | * simpler, and we have to maintain the buffers on internal nodes and |
750 | * the parent map. |
751 | */ |
752 | IndexTuple *downlinks; |
753 | int ndownlinks, |
754 | i; |
755 | Buffer parentBuffer; |
756 | ListCell *lc; |
757 | |
758 | /* Parent may have changed since we memorized this path. */ |
759 | parentBuffer = |
760 | gistBufferingFindCorrectParent(buildstate, |
761 | BufferGetBlockNumber(buffer), |
762 | level, |
763 | &parentblk, |
764 | &downlinkoffnum); |
765 | |
766 | /* |
767 | * If there's a buffer associated with this page, that needs to be |
768 | * split too. gistRelocateBuildBuffersOnSplit() will also adjust the |
769 | * downlinks in 'splitinfo', to make sure they're consistent not only |
770 | * with the tuples already on the pages, but also the tuples in the |
771 | * buffers that will eventually be inserted to them. |
772 | */ |
773 | gistRelocateBuildBuffersOnSplit(gfbb, |
774 | buildstate->giststate, |
775 | buildstate->indexrel, |
776 | level, |
777 | buffer, splitinfo); |
778 | |
779 | /* Create an array of all the downlink tuples */ |
780 | ndownlinks = list_length(splitinfo); |
781 | downlinks = (IndexTuple *) palloc(sizeof(IndexTuple) * ndownlinks); |
782 | i = 0; |
783 | foreach(lc, splitinfo) |
784 | { |
785 | GISTPageSplitInfo *splitinfo = lfirst(lc); |
786 | |
787 | /* |
788 | * Remember the parent of each new child page in our parent map. |
789 | * This assumes that the downlinks fit on the parent page. If the |
790 | * parent page is split, too, when we recurse up to insert the |
791 | * downlinks, the recursive gistbufferinginserttuples() call will |
792 | * update the map again. |
793 | */ |
794 | if (level > 0) |
795 | gistMemorizeParent(buildstate, |
796 | BufferGetBlockNumber(splitinfo->buf), |
797 | BufferGetBlockNumber(parentBuffer)); |
798 | |
799 | /* |
800 | * Also update the parent map for all the downlinks that got moved |
801 | * to a different page. (actually this also loops through the |
802 | * downlinks that stayed on the original page, but it does no |
803 | * harm). |
804 | */ |
805 | if (level > 1) |
806 | gistMemorizeAllDownlinks(buildstate, splitinfo->buf); |
807 | |
808 | /* |
809 | * Since there's no concurrent access, we can release the lower |
810 | * level buffers immediately. This includes the original page. |
811 | */ |
812 | UnlockReleaseBuffer(splitinfo->buf); |
813 | downlinks[i++] = splitinfo->downlink; |
814 | } |
815 | |
816 | /* Insert them into parent. */ |
817 | gistbufferinginserttuples(buildstate, parentBuffer, level + 1, |
818 | downlinks, ndownlinks, downlinkoffnum, |
819 | InvalidBlockNumber, InvalidOffsetNumber); |
820 | |
821 | list_free_deep(splitinfo); /* we don't need this anymore */ |
822 | } |
823 | else |
824 | UnlockReleaseBuffer(buffer); |
825 | |
826 | return placed_to_blk; |
827 | } |
828 | |
829 | /* |
830 | * Find the downlink pointing to a child page. |
831 | * |
832 | * 'childblkno' indicates the child page to find the parent for. 'level' is |
833 | * the level of the child. On entry, *parentblkno and *downlinkoffnum can |
834 | * point to a location where the downlink used to be - we will check that |
835 | * location first, and save some cycles if it hasn't moved. The function |
836 | * returns a buffer containing the downlink, exclusively-locked, and |
837 | * *parentblkno and *downlinkoffnum are set to the real location of the |
838 | * downlink. |
839 | * |
840 | * If the child page is a leaf (level == 0), the caller must supply a correct |
841 | * parentblkno. Otherwise we use the parent map hash table to find the parent |
842 | * block. |
843 | * |
844 | * This function serves the same purpose as gistFindCorrectParent() during |
845 | * normal index inserts, but this is simpler because we don't need to deal |
846 | * with concurrent inserts. |
847 | */ |
848 | static Buffer |
849 | gistBufferingFindCorrectParent(GISTBuildState *buildstate, |
850 | BlockNumber childblkno, int level, |
851 | BlockNumber *parentblkno, |
852 | OffsetNumber *downlinkoffnum) |
853 | { |
854 | BlockNumber parent; |
855 | Buffer buffer; |
856 | Page page; |
857 | OffsetNumber maxoff; |
858 | OffsetNumber off; |
859 | |
860 | if (level > 0) |
861 | parent = gistGetParent(buildstate, childblkno); |
862 | else |
863 | { |
864 | /* |
865 | * For a leaf page, the caller must supply a correct parent block |
866 | * number. |
867 | */ |
868 | if (*parentblkno == InvalidBlockNumber) |
869 | elog(ERROR, "no parent buffer provided of child %d" , childblkno); |
870 | parent = *parentblkno; |
871 | } |
872 | |
873 | buffer = ReadBuffer(buildstate->indexrel, parent); |
874 | page = BufferGetPage(buffer); |
875 | LockBuffer(buffer, GIST_EXCLUSIVE); |
876 | gistcheckpage(buildstate->indexrel, buffer); |
877 | maxoff = PageGetMaxOffsetNumber(page); |
878 | |
879 | /* Check if it was not moved */ |
880 | if (parent == *parentblkno && *parentblkno != InvalidBlockNumber && |
881 | *downlinkoffnum != InvalidOffsetNumber && *downlinkoffnum <= maxoff) |
882 | { |
883 | ItemId iid = PageGetItemId(page, *downlinkoffnum); |
884 | IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); |
885 | |
886 | if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno) |
887 | { |
888 | /* Still there */ |
889 | return buffer; |
890 | } |
891 | } |
892 | |
893 | /* |
894 | * Downlink was not at the offset where it used to be. Scan the page to |
895 | * find it. During normal gist insertions, it might've moved to another |
896 | * page, to the right, but during a buffering build, we keep track of the |
897 | * parent of each page in the lookup table so we should always know what |
898 | * page it's on. |
899 | */ |
900 | for (off = FirstOffsetNumber; off <= maxoff; off = OffsetNumberNext(off)) |
901 | { |
902 | ItemId iid = PageGetItemId(page, off); |
903 | IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); |
904 | |
905 | if (ItemPointerGetBlockNumber(&(idxtuple->t_tid)) == childblkno) |
906 | { |
907 | /* yes!!, found it */ |
908 | *downlinkoffnum = off; |
909 | return buffer; |
910 | } |
911 | } |
912 | |
913 | elog(ERROR, "failed to re-find parent for block %u" , childblkno); |
914 | return InvalidBuffer; /* keep compiler quiet */ |
915 | } |
916 | |
917 | /* |
918 | * Process buffers emptying stack. Emptying of one buffer can cause emptying |
919 | * of other buffers. This function iterates until this cascading emptying |
920 | * process finished, e.g. until buffers emptying stack is empty. |
921 | */ |
922 | static void |
923 | gistProcessEmptyingQueue(GISTBuildState *buildstate) |
924 | { |
925 | GISTBuildBuffers *gfbb = buildstate->gfbb; |
926 | |
927 | /* Iterate while we have elements in buffers emptying stack. */ |
928 | while (gfbb->bufferEmptyingQueue != NIL) |
929 | { |
930 | GISTNodeBuffer *emptyingNodeBuffer; |
931 | |
932 | /* Get node buffer from emptying stack. */ |
933 | emptyingNodeBuffer = (GISTNodeBuffer *) linitial(gfbb->bufferEmptyingQueue); |
934 | gfbb->bufferEmptyingQueue = list_delete_first(gfbb->bufferEmptyingQueue); |
935 | emptyingNodeBuffer->queuedForEmptying = false; |
936 | |
937 | /* |
938 | * We are going to load last pages of buffers where emptying will be |
939 | * to. So let's unload any previously loaded buffers. |
940 | */ |
941 | gistUnloadNodeBuffers(gfbb); |
942 | |
943 | /* |
944 | * Pop tuples from the buffer and run them down to the buffers at |
945 | * lower level, or leaf pages. We continue until one of the lower |
946 | * level buffers fills up, or this buffer runs empty. |
947 | * |
948 | * In Arge et al's paper, the buffer emptying is stopped after |
949 | * processing 1/2 node buffer worth of tuples, to avoid overfilling |
950 | * any of the lower level buffers. However, it's more efficient to |
951 | * keep going until one of the lower level buffers actually fills up, |
952 | * so that's what we do. This doesn't need to be exact, if a buffer |
953 | * overfills by a few tuples, there's no harm done. |
954 | */ |
955 | while (true) |
956 | { |
957 | IndexTuple itup; |
958 | |
959 | /* Get next index tuple from the buffer */ |
960 | if (!gistPopItupFromNodeBuffer(gfbb, emptyingNodeBuffer, &itup)) |
961 | break; |
962 | |
963 | /* |
964 | * Run it down to the underlying node buffer or leaf page. |
965 | * |
966 | * Note: it's possible that the buffer we're emptying splits as a |
967 | * result of this call. If that happens, our emptyingNodeBuffer |
968 | * points to the left half of the split. After split, it's very |
969 | * likely that the new left buffer is no longer over the half-full |
970 | * threshold, but we might as well keep flushing tuples from it |
971 | * until we fill a lower-level buffer. |
972 | */ |
973 | if (gistProcessItup(buildstate, itup, emptyingNodeBuffer->nodeBlocknum, emptyingNodeBuffer->level)) |
974 | { |
975 | /* |
976 | * A lower level buffer filled up. Stop emptying this buffer, |
977 | * to avoid overflowing the lower level buffer. |
978 | */ |
979 | break; |
980 | } |
981 | |
982 | /* Free all the memory allocated during index tuple processing */ |
983 | MemoryContextReset(buildstate->giststate->tempCxt); |
984 | } |
985 | } |
986 | } |
987 | |
988 | /* |
989 | * Empty all node buffers, from top to bottom. This is done at the end of |
990 | * index build to flush all remaining tuples to the index. |
991 | * |
992 | * Note: This destroys the buffersOnLevels lists, so the buffers should not |
993 | * be inserted to after this call. |
994 | */ |
995 | static void |
996 | gistEmptyAllBuffers(GISTBuildState *buildstate) |
997 | { |
998 | GISTBuildBuffers *gfbb = buildstate->gfbb; |
999 | MemoryContext oldCtx; |
1000 | int i; |
1001 | |
1002 | oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt); |
1003 | |
1004 | /* |
1005 | * Iterate through the levels from top to bottom. |
1006 | */ |
1007 | for (i = gfbb->buffersOnLevelsLen - 1; i >= 0; i--) |
1008 | { |
1009 | /* |
1010 | * Empty all buffers on this level. Note that new buffers can pop up |
1011 | * in the list during the processing, as a result of page splits, so a |
1012 | * simple walk through the list won't work. We remove buffers from the |
1013 | * list when we see them empty; a buffer can't become non-empty once |
1014 | * it's been fully emptied. |
1015 | */ |
1016 | while (gfbb->buffersOnLevels[i] != NIL) |
1017 | { |
1018 | GISTNodeBuffer *nodeBuffer; |
1019 | |
1020 | nodeBuffer = (GISTNodeBuffer *) linitial(gfbb->buffersOnLevels[i]); |
1021 | |
1022 | if (nodeBuffer->blocksCount != 0) |
1023 | { |
1024 | /* |
1025 | * Add this buffer to the emptying queue, and proceed to empty |
1026 | * the queue. |
1027 | */ |
1028 | if (!nodeBuffer->queuedForEmptying) |
1029 | { |
1030 | MemoryContextSwitchTo(gfbb->context); |
1031 | nodeBuffer->queuedForEmptying = true; |
1032 | gfbb->bufferEmptyingQueue = |
1033 | lcons(nodeBuffer, gfbb->bufferEmptyingQueue); |
1034 | MemoryContextSwitchTo(buildstate->giststate->tempCxt); |
1035 | } |
1036 | gistProcessEmptyingQueue(buildstate); |
1037 | } |
1038 | else |
1039 | gfbb->buffersOnLevels[i] = |
1040 | list_delete_first(gfbb->buffersOnLevels[i]); |
1041 | } |
1042 | elog(DEBUG2, "emptied all buffers at level %d" , i); |
1043 | } |
1044 | MemoryContextSwitchTo(oldCtx); |
1045 | } |
1046 | |
1047 | /* |
1048 | * Get the depth of the GiST index. |
1049 | */ |
1050 | static int |
1051 | gistGetMaxLevel(Relation index) |
1052 | { |
1053 | int maxLevel; |
1054 | BlockNumber blkno; |
1055 | |
1056 | /* |
1057 | * Traverse down the tree, starting from the root, until we hit the leaf |
1058 | * level. |
1059 | */ |
1060 | maxLevel = 0; |
1061 | blkno = GIST_ROOT_BLKNO; |
1062 | while (true) |
1063 | { |
1064 | Buffer buffer; |
1065 | Page page; |
1066 | IndexTuple itup; |
1067 | |
1068 | buffer = ReadBuffer(index, blkno); |
1069 | |
1070 | /* |
1071 | * There's no concurrent access during index build, so locking is just |
1072 | * pro forma. |
1073 | */ |
1074 | LockBuffer(buffer, GIST_SHARE); |
1075 | page = (Page) BufferGetPage(buffer); |
1076 | |
1077 | if (GistPageIsLeaf(page)) |
1078 | { |
1079 | /* We hit the bottom, so we're done. */ |
1080 | UnlockReleaseBuffer(buffer); |
1081 | break; |
1082 | } |
1083 | |
1084 | /* |
1085 | * Pick the first downlink on the page, and follow it. It doesn't |
1086 | * matter which downlink we choose, the tree has the same depth |
1087 | * everywhere, so we just pick the first one. |
1088 | */ |
1089 | itup = (IndexTuple) PageGetItem(page, |
1090 | PageGetItemId(page, FirstOffsetNumber)); |
1091 | blkno = ItemPointerGetBlockNumber(&(itup->t_tid)); |
1092 | UnlockReleaseBuffer(buffer); |
1093 | |
1094 | /* |
1095 | * We're going down on the tree. It means that there is yet one more |
1096 | * level in the tree. |
1097 | */ |
1098 | maxLevel++; |
1099 | } |
1100 | return maxLevel; |
1101 | } |
1102 | |
1103 | |
1104 | /* |
1105 | * Routines for managing the parent map. |
1106 | * |
1107 | * Whenever a page is split, we need to insert the downlinks into the parent. |
1108 | * We need to somehow find the parent page to do that. In normal insertions, |
1109 | * we keep a stack of nodes visited when we descend the tree. However, in |
1110 | * buffering build, we can start descending the tree from any internal node, |
1111 | * when we empty a buffer by cascading tuples to its children. So we don't |
1112 | * have a full stack up to the root available at that time. |
1113 | * |
1114 | * So instead, we maintain a hash table to track the parent of every internal |
1115 | * page. We don't need to track the parents of leaf nodes, however. Whenever |
1116 | * we insert to a leaf, we've just descended down from its parent, so we know |
1117 | * its immediate parent already. This helps a lot to limit the memory used |
1118 | * by this hash table. |
1119 | * |
1120 | * Whenever an internal node is split, the parent map needs to be updated. |
1121 | * the parent of the new child page needs to be recorded, and also the |
1122 | * entries for all page whose downlinks are moved to a new page at the split |
1123 | * needs to be updated. |
1124 | * |
1125 | * We also update the parent map whenever we descend the tree. That might seem |
1126 | * unnecessary, because we maintain the map whenever a downlink is moved or |
1127 | * created, but it is needed because we switch to buffering mode after |
1128 | * creating a tree with regular index inserts. Any pages created before |
1129 | * switching to buffering mode will not be present in the parent map initially, |
1130 | * but will be added there the first time we visit them. |
1131 | */ |
1132 | |
1133 | typedef struct |
1134 | { |
1135 | BlockNumber childblkno; /* hash key */ |
1136 | BlockNumber parentblkno; |
1137 | } ParentMapEntry; |
1138 | |
1139 | static void |
1140 | gistInitParentMap(GISTBuildState *buildstate) |
1141 | { |
1142 | HASHCTL hashCtl; |
1143 | |
1144 | hashCtl.keysize = sizeof(BlockNumber); |
1145 | hashCtl.entrysize = sizeof(ParentMapEntry); |
1146 | hashCtl.hcxt = CurrentMemoryContext; |
1147 | buildstate->parentMap = hash_create("gistbuild parent map" , |
1148 | 1024, |
1149 | &hashCtl, |
1150 | HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); |
1151 | } |
1152 | |
1153 | static void |
1154 | gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child, BlockNumber parent) |
1155 | { |
1156 | ParentMapEntry *entry; |
1157 | bool found; |
1158 | |
1159 | entry = (ParentMapEntry *) hash_search(buildstate->parentMap, |
1160 | (const void *) &child, |
1161 | HASH_ENTER, |
1162 | &found); |
1163 | entry->parentblkno = parent; |
1164 | } |
1165 | |
1166 | /* |
1167 | * Scan all downlinks on a page, and memorize their parent. |
1168 | */ |
1169 | static void |
1170 | gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parentbuf) |
1171 | { |
1172 | OffsetNumber maxoff; |
1173 | OffsetNumber off; |
1174 | BlockNumber parentblkno = BufferGetBlockNumber(parentbuf); |
1175 | Page page = BufferGetPage(parentbuf); |
1176 | |
1177 | Assert(!GistPageIsLeaf(page)); |
1178 | |
1179 | maxoff = PageGetMaxOffsetNumber(page); |
1180 | for (off = FirstOffsetNumber; off <= maxoff; off++) |
1181 | { |
1182 | ItemId iid = PageGetItemId(page, off); |
1183 | IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); |
1184 | BlockNumber childblkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); |
1185 | |
1186 | gistMemorizeParent(buildstate, childblkno, parentblkno); |
1187 | } |
1188 | } |
1189 | |
1190 | static BlockNumber |
1191 | gistGetParent(GISTBuildState *buildstate, BlockNumber child) |
1192 | { |
1193 | ParentMapEntry *entry; |
1194 | bool found; |
1195 | |
1196 | /* Find node buffer in hash table */ |
1197 | entry = (ParentMapEntry *) hash_search(buildstate->parentMap, |
1198 | (const void *) &child, |
1199 | HASH_FIND, |
1200 | &found); |
1201 | if (!found) |
1202 | elog(ERROR, "could not find parent of block %d in lookup table" , child); |
1203 | |
1204 | return entry->parentblkno; |
1205 | } |
1206 | |