1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * gistsplit.c |
4 | * Multi-column page splitting algorithm |
5 | * |
6 | * This file is concerned with making good page-split decisions in multi-column |
7 | * GiST indexes. The opclass-specific picksplit functions can only be expected |
8 | * to produce answers based on a single column. We first run the picksplit |
9 | * function for column 1; then, if there are more columns, we check if any of |
10 | * the tuples are "don't cares" so far as the column 1 split is concerned |
11 | * (that is, they could go to either side for no additional penalty). If so, |
12 | * we try to redistribute those tuples on the basis of the next column. |
13 | * Repeat till we're out of columns. |
14 | * |
15 | * gistSplitByKey() is the entry point to this file. |
16 | * |
17 | * |
18 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
19 | * Portions Copyright (c) 1994, Regents of the University of California |
20 | * |
21 | * IDENTIFICATION |
22 | * src/backend/access/gist/gistsplit.c |
23 | * |
24 | *------------------------------------------------------------------------- |
25 | */ |
26 | #include "postgres.h" |
27 | |
28 | #include "access/gist_private.h" |
29 | #include "utils/rel.h" |
30 | |
31 | typedef struct |
32 | { |
33 | OffsetNumber *entries; |
34 | int len; |
35 | Datum *attr; |
36 | bool *isnull; |
37 | bool *dontcare; |
38 | } GistSplitUnion; |
39 | |
40 | |
41 | /* |
42 | * Form unions of subkeys in itvec[] entries listed in gsvp->entries[], |
43 | * ignoring any tuples that are marked in gsvp->dontcare[]. Subroutine for |
44 | * gistunionsubkey. |
45 | */ |
46 | static void |
47 | gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec, |
48 | GistSplitUnion *gsvp) |
49 | { |
50 | IndexTuple *cleanedItVec; |
51 | int i, |
52 | cleanedLen = 0; |
53 | |
54 | cleanedItVec = (IndexTuple *) palloc(sizeof(IndexTuple) * gsvp->len); |
55 | |
56 | for (i = 0; i < gsvp->len; i++) |
57 | { |
58 | if (gsvp->dontcare && gsvp->dontcare[gsvp->entries[i]]) |
59 | continue; |
60 | |
61 | cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1]; |
62 | } |
63 | |
64 | gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen, |
65 | gsvp->attr, gsvp->isnull); |
66 | |
67 | pfree(cleanedItVec); |
68 | } |
69 | |
70 | /* |
71 | * Recompute unions of left- and right-side subkeys after a page split, |
72 | * ignoring any tuples that are marked in spl->spl_dontcare[]. |
73 | * |
74 | * Note: we always recompute union keys for all index columns. In some cases |
75 | * this might represent duplicate work for the leftmost column(s), but it's |
76 | * not safe to assume that "zero penalty to move a tuple" means "the union |
77 | * key doesn't change at all". Penalty functions aren't 100% accurate. |
78 | */ |
79 | static void |
80 | gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl) |
81 | { |
82 | GistSplitUnion gsvp; |
83 | |
84 | gsvp.dontcare = spl->spl_dontcare; |
85 | |
86 | gsvp.entries = spl->splitVector.spl_left; |
87 | gsvp.len = spl->splitVector.spl_nleft; |
88 | gsvp.attr = spl->spl_lattr; |
89 | gsvp.isnull = spl->spl_lisnull; |
90 | |
91 | gistunionsubkeyvec(giststate, itvec, &gsvp); |
92 | |
93 | gsvp.entries = spl->splitVector.spl_right; |
94 | gsvp.len = spl->splitVector.spl_nright; |
95 | gsvp.attr = spl->spl_rattr; |
96 | gsvp.isnull = spl->spl_risnull; |
97 | |
98 | gistunionsubkeyvec(giststate, itvec, &gsvp); |
99 | } |
100 | |
101 | /* |
102 | * Find tuples that are "don't cares", that is could be moved to the other |
103 | * side of the split with zero penalty, so far as the attno column is |
104 | * concerned. |
105 | * |
106 | * Don't-care tuples are marked by setting the corresponding entry in |
107 | * spl->spl_dontcare[] to "true". Caller must have initialized that array |
108 | * to zeroes. |
109 | * |
110 | * Returns number of don't-cares found. |
111 | */ |
112 | static int |
113 | findDontCares(Relation r, GISTSTATE *giststate, GISTENTRY *valvec, |
114 | GistSplitVector *spl, int attno) |
115 | { |
116 | int i; |
117 | GISTENTRY entry; |
118 | int NumDontCare = 0; |
119 | |
120 | /* |
121 | * First, search the left-side tuples to see if any have zero penalty to |
122 | * be added to the right-side union key. |
123 | * |
124 | * attno column is known all-not-null (see gistSplitByKey), so we need not |
125 | * check for nulls |
126 | */ |
127 | gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL, |
128 | (OffsetNumber) 0, false); |
129 | for (i = 0; i < spl->splitVector.spl_nleft; i++) |
130 | { |
131 | int j = spl->splitVector.spl_left[i]; |
132 | float penalty = gistpenalty(giststate, attno, &entry, false, |
133 | &valvec[j], false); |
134 | |
135 | if (penalty == 0.0) |
136 | { |
137 | spl->spl_dontcare[j] = true; |
138 | NumDontCare++; |
139 | } |
140 | } |
141 | |
142 | /* And conversely for the right-side tuples */ |
143 | gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL, |
144 | (OffsetNumber) 0, false); |
145 | for (i = 0; i < spl->splitVector.spl_nright; i++) |
146 | { |
147 | int j = spl->splitVector.spl_right[i]; |
148 | float penalty = gistpenalty(giststate, attno, &entry, false, |
149 | &valvec[j], false); |
150 | |
151 | if (penalty == 0.0) |
152 | { |
153 | spl->spl_dontcare[j] = true; |
154 | NumDontCare++; |
155 | } |
156 | } |
157 | |
158 | return NumDontCare; |
159 | } |
160 | |
161 | /* |
162 | * Remove tuples that are marked don't-cares from the tuple index array a[] |
163 | * of length *len. This is applied separately to the spl_left and spl_right |
164 | * arrays. |
165 | */ |
166 | static void |
167 | removeDontCares(OffsetNumber *a, int *len, const bool *dontcare) |
168 | { |
169 | int origlen, |
170 | newlen, |
171 | i; |
172 | OffsetNumber *curwpos; |
173 | |
174 | origlen = newlen = *len; |
175 | curwpos = a; |
176 | for (i = 0; i < origlen; i++) |
177 | { |
178 | OffsetNumber ai = a[i]; |
179 | |
180 | if (dontcare[ai] == false) |
181 | { |
182 | /* re-emit item into a[] */ |
183 | *curwpos = ai; |
184 | curwpos++; |
185 | } |
186 | else |
187 | newlen--; |
188 | } |
189 | |
190 | *len = newlen; |
191 | } |
192 | |
193 | /* |
194 | * Place a single don't-care tuple into either the left or right side of the |
195 | * split, according to which has least penalty for merging the tuple into |
196 | * the previously-computed union keys. We need consider only columns starting |
197 | * at attno. |
198 | */ |
199 | static void |
200 | placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v, |
201 | IndexTuple itup, OffsetNumber off, int attno) |
202 | { |
203 | GISTENTRY identry[INDEX_MAX_KEYS]; |
204 | bool isnull[INDEX_MAX_KEYS]; |
205 | bool toLeft = true; |
206 | |
207 | gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0, |
208 | identry, isnull); |
209 | |
210 | for (; attno < giststate->nonLeafTupdesc->natts; attno++) |
211 | { |
212 | float lpenalty, |
213 | rpenalty; |
214 | GISTENTRY entry; |
215 | |
216 | gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, false); |
217 | lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno], |
218 | identry + attno, isnull[attno]); |
219 | gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, false); |
220 | rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno], |
221 | identry + attno, isnull[attno]); |
222 | |
223 | if (lpenalty != rpenalty) |
224 | { |
225 | if (lpenalty > rpenalty) |
226 | toLeft = false; |
227 | break; |
228 | } |
229 | } |
230 | |
231 | if (toLeft) |
232 | v->splitVector.spl_left[v->splitVector.spl_nleft++] = off; |
233 | else |
234 | v->splitVector.spl_right[v->splitVector.spl_nright++] = off; |
235 | } |
236 | |
237 | #define SWAPVAR( s, d, t ) \ |
238 | do { \ |
239 | (t) = (s); \ |
240 | (s) = (d); \ |
241 | (d) = (t); \ |
242 | } while(0) |
243 | |
244 | /* |
245 | * Clean up when we did a secondary split but the user-defined PickSplit |
246 | * method didn't support it (leaving spl_ldatum_exists or spl_rdatum_exists |
247 | * true). |
248 | * |
249 | * We consider whether to swap the left and right outputs of the secondary |
250 | * split; this can be worthwhile if the penalty for merging those tuples into |
251 | * the previously chosen sets is less that way. |
252 | * |
253 | * In any case we must update the union datums for the current column by |
254 | * adding in the previous union keys (oldL/oldR), since the user-defined |
255 | * PickSplit method didn't do so. |
256 | */ |
257 | static void |
258 | supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno, |
259 | GIST_SPLITVEC *sv, Datum oldL, Datum oldR) |
260 | { |
261 | bool leaveOnLeft = true, |
262 | tmpBool; |
263 | GISTENTRY entryL, |
264 | entryR, |
265 | entrySL, |
266 | entrySR; |
267 | |
268 | gistentryinit(entryL, oldL, r, NULL, 0, false); |
269 | gistentryinit(entryR, oldR, r, NULL, 0, false); |
270 | gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false); |
271 | gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false); |
272 | |
273 | if (sv->spl_ldatum_exists && sv->spl_rdatum_exists) |
274 | { |
275 | float penalty1, |
276 | penalty2; |
277 | |
278 | penalty1 = gistpenalty(giststate, attno, &entryL, false, &entrySL, false) + |
279 | gistpenalty(giststate, attno, &entryR, false, &entrySR, false); |
280 | penalty2 = gistpenalty(giststate, attno, &entryL, false, &entrySR, false) + |
281 | gistpenalty(giststate, attno, &entryR, false, &entrySL, false); |
282 | |
283 | if (penalty1 > penalty2) |
284 | leaveOnLeft = false; |
285 | } |
286 | else |
287 | { |
288 | GISTENTRY *entry1 = (sv->spl_ldatum_exists) ? &entryL : &entryR; |
289 | float penalty1, |
290 | penalty2; |
291 | |
292 | /* |
293 | * There is only one previously defined union, so we just choose swap |
294 | * or not by lowest penalty for that side. We can only get here if a |
295 | * secondary split happened to have all NULLs in its column in the |
296 | * tuples that the outer recursion level had assigned to one side. |
297 | * (Note that the null checks in gistSplitByKey don't prevent the |
298 | * case, because they'll only be checking tuples that were considered |
299 | * don't-cares at the outer recursion level, not the tuples that went |
300 | * into determining the passed-down left and right union keys.) |
301 | */ |
302 | penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false); |
303 | penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false); |
304 | |
305 | if (penalty1 < penalty2) |
306 | leaveOnLeft = (sv->spl_ldatum_exists) ? true : false; |
307 | else |
308 | leaveOnLeft = (sv->spl_rdatum_exists) ? true : false; |
309 | } |
310 | |
311 | if (leaveOnLeft == false) |
312 | { |
313 | /* |
314 | * swap left and right |
315 | */ |
316 | OffsetNumber *off, |
317 | noff; |
318 | Datum datum; |
319 | |
320 | SWAPVAR(sv->spl_left, sv->spl_right, off); |
321 | SWAPVAR(sv->spl_nleft, sv->spl_nright, noff); |
322 | SWAPVAR(sv->spl_ldatum, sv->spl_rdatum, datum); |
323 | gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false); |
324 | gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false); |
325 | } |
326 | |
327 | if (sv->spl_ldatum_exists) |
328 | gistMakeUnionKey(giststate, attno, &entryL, false, &entrySL, false, |
329 | &sv->spl_ldatum, &tmpBool); |
330 | |
331 | if (sv->spl_rdatum_exists) |
332 | gistMakeUnionKey(giststate, attno, &entryR, false, &entrySR, false, |
333 | &sv->spl_rdatum, &tmpBool); |
334 | |
335 | sv->spl_ldatum_exists = sv->spl_rdatum_exists = false; |
336 | } |
337 | |
338 | /* |
339 | * Trivial picksplit implementation. Function called only |
340 | * if user-defined picksplit puts all keys on the same side of the split. |
341 | * That is a bug of user-defined picksplit but we don't want to fail. |
342 | */ |
343 | static void |
344 | genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC *v, int attno) |
345 | { |
346 | OffsetNumber i, |
347 | maxoff; |
348 | int nbytes; |
349 | GistEntryVector *evec; |
350 | |
351 | maxoff = entryvec->n - 1; |
352 | |
353 | nbytes = (maxoff + 2) * sizeof(OffsetNumber); |
354 | |
355 | v->spl_left = (OffsetNumber *) palloc(nbytes); |
356 | v->spl_right = (OffsetNumber *) palloc(nbytes); |
357 | v->spl_nleft = v->spl_nright = 0; |
358 | |
359 | for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) |
360 | { |
361 | if (i <= (maxoff - FirstOffsetNumber + 1) / 2) |
362 | { |
363 | v->spl_left[v->spl_nleft] = i; |
364 | v->spl_nleft++; |
365 | } |
366 | else |
367 | { |
368 | v->spl_right[v->spl_nright] = i; |
369 | v->spl_nright++; |
370 | } |
371 | } |
372 | |
373 | /* |
374 | * Form union datums for each side |
375 | */ |
376 | evec = palloc(sizeof(GISTENTRY) * entryvec->n + GEVHDRSZ); |
377 | |
378 | evec->n = v->spl_nleft; |
379 | memcpy(evec->vector, entryvec->vector + FirstOffsetNumber, |
380 | sizeof(GISTENTRY) * evec->n); |
381 | v->spl_ldatum = FunctionCall2Coll(&giststate->unionFn[attno], |
382 | giststate->supportCollation[attno], |
383 | PointerGetDatum(evec), |
384 | PointerGetDatum(&nbytes)); |
385 | |
386 | evec->n = v->spl_nright; |
387 | memcpy(evec->vector, entryvec->vector + FirstOffsetNumber + v->spl_nleft, |
388 | sizeof(GISTENTRY) * evec->n); |
389 | v->spl_rdatum = FunctionCall2Coll(&giststate->unionFn[attno], |
390 | giststate->supportCollation[attno], |
391 | PointerGetDatum(evec), |
392 | PointerGetDatum(&nbytes)); |
393 | } |
394 | |
395 | /* |
396 | * Calls user picksplit method for attno column to split tuples into |
397 | * two vectors. |
398 | * |
399 | * Returns false if split is complete (there are no more index columns, or |
400 | * there is no need to consider them because split is optimal already). |
401 | * |
402 | * Returns true and v->spl_dontcare = NULL if the picksplit result is |
403 | * degenerate (all tuples seem to be don't-cares), so we should just |
404 | * disregard this column and split on the next column(s) instead. |
405 | * |
406 | * Returns true and v->spl_dontcare != NULL if there are don't-care tuples |
407 | * that could be relocated based on the next column(s). The don't-care |
408 | * tuples have been removed from the split and must be reinserted by caller. |
409 | * There is at least one non-don't-care tuple on each side of the split, |
410 | * and union keys for all columns are updated to include just those tuples. |
411 | * |
412 | * A true result implies there is at least one more index column. |
413 | */ |
414 | static bool |
415 | gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v, |
416 | IndexTuple *itup, int len, GISTSTATE *giststate) |
417 | { |
418 | GIST_SPLITVEC *sv = &v->splitVector; |
419 | |
420 | /* |
421 | * Prepare spl_ldatum/spl_rdatum/spl_ldatum_exists/spl_rdatum_exists in |
422 | * case we are doing a secondary split (see comments in gist.h). |
423 | */ |
424 | sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true; |
425 | sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true; |
426 | sv->spl_ldatum = v->spl_lattr[attno]; |
427 | sv->spl_rdatum = v->spl_rattr[attno]; |
428 | |
429 | /* |
430 | * Let the opclass-specific PickSplit method do its thing. Note that at |
431 | * this point we know there are no null keys in the entryvec. |
432 | */ |
433 | FunctionCall2Coll(&giststate->picksplitFn[attno], |
434 | giststate->supportCollation[attno], |
435 | PointerGetDatum(entryvec), |
436 | PointerGetDatum(sv)); |
437 | |
438 | if (sv->spl_nleft == 0 || sv->spl_nright == 0) |
439 | { |
440 | /* |
441 | * User-defined picksplit failed to create an actual split, ie it put |
442 | * everything on the same side. Complain but cope. |
443 | */ |
444 | ereport(DEBUG1, |
445 | (errcode(ERRCODE_INTERNAL_ERROR), |
446 | errmsg("picksplit method for column %d of index \"%s\" failed" , |
447 | attno + 1, RelationGetRelationName(r)), |
448 | errhint("The index is not optimal. To optimize it, contact a developer, or try to use the column as the second one in the CREATE INDEX command." ))); |
449 | |
450 | /* |
451 | * Reinit GIST_SPLITVEC. Although these fields are not used by |
452 | * genericPickSplit(), set them up for further processing |
453 | */ |
454 | sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true; |
455 | sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true; |
456 | sv->spl_ldatum = v->spl_lattr[attno]; |
457 | sv->spl_rdatum = v->spl_rattr[attno]; |
458 | |
459 | /* Do a generic split */ |
460 | genericPickSplit(giststate, entryvec, sv, attno); |
461 | } |
462 | else |
463 | { |
464 | /* hack for compatibility with old picksplit API */ |
465 | if (sv->spl_left[sv->spl_nleft - 1] == InvalidOffsetNumber) |
466 | sv->spl_left[sv->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1); |
467 | if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber) |
468 | sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1); |
469 | } |
470 | |
471 | /* Clean up if PickSplit didn't take care of a secondary split */ |
472 | if (sv->spl_ldatum_exists || sv->spl_rdatum_exists) |
473 | supportSecondarySplit(r, giststate, attno, sv, |
474 | v->spl_lattr[attno], v->spl_rattr[attno]); |
475 | |
476 | /* emit union datums computed by PickSplit back to v arrays */ |
477 | v->spl_lattr[attno] = sv->spl_ldatum; |
478 | v->spl_rattr[attno] = sv->spl_rdatum; |
479 | v->spl_lisnull[attno] = false; |
480 | v->spl_risnull[attno] = false; |
481 | |
482 | /* |
483 | * If index columns remain, then consider whether we can improve the split |
484 | * by using them. |
485 | */ |
486 | v->spl_dontcare = NULL; |
487 | |
488 | if (attno + 1 < giststate->nonLeafTupdesc->natts) |
489 | { |
490 | int NumDontCare; |
491 | |
492 | /* |
493 | * Make a quick check to see if left and right union keys are equal; |
494 | * if so, the split is certainly degenerate, so tell caller to |
495 | * re-split with the next column. |
496 | */ |
497 | if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum)) |
498 | return true; |
499 | |
500 | /* |
501 | * Locate don't-care tuples, if any. If there are none, the split is |
502 | * optimal, so just fall out and return false. |
503 | */ |
504 | v->spl_dontcare = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1)); |
505 | |
506 | NumDontCare = findDontCares(r, giststate, entryvec->vector, v, attno); |
507 | |
508 | if (NumDontCare > 0) |
509 | { |
510 | /* |
511 | * Remove don't-cares from spl_left[] and spl_right[]. |
512 | */ |
513 | removeDontCares(sv->spl_left, &sv->spl_nleft, v->spl_dontcare); |
514 | removeDontCares(sv->spl_right, &sv->spl_nright, v->spl_dontcare); |
515 | |
516 | /* |
517 | * If all tuples on either side were don't-cares, the split is |
518 | * degenerate, and we're best off to ignore it and split on the |
519 | * next column. (We used to try to press on with a secondary |
520 | * split by forcing a random tuple on each side to be treated as |
521 | * non-don't-care, but it seems unlikely that that technique |
522 | * really gives a better result. Note that we don't want to try a |
523 | * secondary split with empty left or right primary split sides, |
524 | * because then there is no union key on that side for the |
525 | * PickSplit function to try to expand, so it can have no good |
526 | * figure of merit for what it's doing. Also note that this check |
527 | * ensures we can't produce a bogus one-side-only split in the |
528 | * NumDontCare == 1 special case below.) |
529 | */ |
530 | if (sv->spl_nleft == 0 || sv->spl_nright == 0) |
531 | { |
532 | v->spl_dontcare = NULL; |
533 | return true; |
534 | } |
535 | |
536 | /* |
537 | * Recompute union keys, considering only non-don't-care tuples. |
538 | * NOTE: this will set union keys for remaining index columns, |
539 | * which will cause later calls of gistUserPicksplit to pass those |
540 | * values down to user-defined PickSplit methods with |
541 | * spl_ldatum_exists/spl_rdatum_exists set true. |
542 | */ |
543 | gistunionsubkey(giststate, itup, v); |
544 | |
545 | if (NumDontCare == 1) |
546 | { |
547 | /* |
548 | * If there's only one don't-care tuple then we can't do a |
549 | * PickSplit on it, so just choose whether to send it left or |
550 | * right by comparing penalties. We needed the |
551 | * gistunionsubkey step anyway so that we have appropriate |
552 | * union keys for figuring the penalties. |
553 | */ |
554 | OffsetNumber toMove; |
555 | |
556 | /* find it ... */ |
557 | for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++) |
558 | { |
559 | if (v->spl_dontcare[toMove]) |
560 | break; |
561 | } |
562 | Assert(toMove < entryvec->n); |
563 | |
564 | /* ... and assign it to cheaper side */ |
565 | placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1); |
566 | |
567 | /* |
568 | * At this point the union keys are wrong, but we don't care |
569 | * because we're done splitting. The outermost recursion |
570 | * level of gistSplitByKey will fix things before returning. |
571 | */ |
572 | } |
573 | else |
574 | return true; |
575 | } |
576 | } |
577 | |
578 | return false; |
579 | } |
580 | |
581 | /* |
582 | * simply split page in half |
583 | */ |
584 | static void |
585 | gistSplitHalf(GIST_SPLITVEC *v, int len) |
586 | { |
587 | int i; |
588 | |
589 | v->spl_nright = v->spl_nleft = 0; |
590 | v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); |
591 | v->spl_right = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); |
592 | for (i = 1; i <= len; i++) |
593 | if (i < len / 2) |
594 | v->spl_right[v->spl_nright++] = i; |
595 | else |
596 | v->spl_left[v->spl_nleft++] = i; |
597 | |
598 | /* we need not compute union keys, caller took care of it */ |
599 | } |
600 | |
601 | /* |
602 | * gistSplitByKey: main entry point for page-splitting algorithm |
603 | * |
604 | * r: index relation |
605 | * page: page being split |
606 | * itup: array of IndexTuples to be processed |
607 | * len: number of IndexTuples to be processed (must be at least 2) |
608 | * giststate: additional info about index |
609 | * v: working state and output area |
610 | * attno: column we are working on (zero-based index) |
611 | * |
612 | * Outside caller must initialize v->spl_lisnull and v->spl_risnull arrays |
613 | * to all-true. On return, spl_left/spl_nleft contain indexes of tuples |
614 | * to go left, spl_right/spl_nright contain indexes of tuples to go right, |
615 | * spl_lattr/spl_lisnull contain left-side union key values, and |
616 | * spl_rattr/spl_risnull contain right-side union key values. Other fields |
617 | * in this struct are workspace for this file. |
618 | * |
619 | * Outside caller must pass zero for attno. The function may internally |
620 | * recurse to the next column by passing attno+1. |
621 | */ |
622 | void |
623 | gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, |
624 | GISTSTATE *giststate, GistSplitVector *v, int attno) |
625 | { |
626 | GistEntryVector *entryvec; |
627 | OffsetNumber *offNullTuples; |
628 | int nOffNullTuples = 0; |
629 | int i; |
630 | |
631 | /* generate the item array, and identify tuples with null keys */ |
632 | /* note that entryvec->vector[0] goes unused in this code */ |
633 | entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY)); |
634 | entryvec->n = len + 1; |
635 | offNullTuples = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); |
636 | |
637 | for (i = 1; i <= len; i++) |
638 | { |
639 | Datum datum; |
640 | bool IsNull; |
641 | |
642 | datum = index_getattr(itup[i - 1], attno + 1, giststate->leafTupdesc, |
643 | &IsNull); |
644 | gistdentryinit(giststate, attno, &(entryvec->vector[i]), |
645 | datum, r, page, i, |
646 | false, IsNull); |
647 | if (IsNull) |
648 | offNullTuples[nOffNullTuples++] = i; |
649 | } |
650 | |
651 | if (nOffNullTuples == len) |
652 | { |
653 | /* |
654 | * Corner case: All keys in attno column are null, so just transfer |
655 | * our attention to the next column. If there's no next column, just |
656 | * split page in half. |
657 | */ |
658 | v->spl_risnull[attno] = v->spl_lisnull[attno] = true; |
659 | |
660 | if (attno + 1 < giststate->nonLeafTupdesc->natts) |
661 | gistSplitByKey(r, page, itup, len, giststate, v, attno + 1); |
662 | else |
663 | gistSplitHalf(&v->splitVector, len); |
664 | } |
665 | else if (nOffNullTuples > 0) |
666 | { |
667 | int j = 0; |
668 | |
669 | /* |
670 | * We don't want to mix NULL and not-NULL keys on one page, so split |
671 | * nulls to right page and not-nulls to left. |
672 | */ |
673 | v->splitVector.spl_right = offNullTuples; |
674 | v->splitVector.spl_nright = nOffNullTuples; |
675 | v->spl_risnull[attno] = true; |
676 | |
677 | v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); |
678 | v->splitVector.spl_nleft = 0; |
679 | for (i = 1; i <= len; i++) |
680 | if (j < v->splitVector.spl_nright && offNullTuples[j] == i) |
681 | j++; |
682 | else |
683 | v->splitVector.spl_left[v->splitVector.spl_nleft++] = i; |
684 | |
685 | /* Compute union keys, unless outer recursion level will handle it */ |
686 | if (attno == 0 && giststate->nonLeafTupdesc->natts == 1) |
687 | { |
688 | v->spl_dontcare = NULL; |
689 | gistunionsubkey(giststate, itup, v); |
690 | } |
691 | } |
692 | else |
693 | { |
694 | /* |
695 | * All keys are not-null, so apply user-defined PickSplit method |
696 | */ |
697 | if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate)) |
698 | { |
699 | /* |
700 | * Splitting on attno column is not optimal, so consider |
701 | * redistributing don't-care tuples according to the next column |
702 | */ |
703 | Assert(attno + 1 < giststate->nonLeafTupdesc->natts); |
704 | |
705 | if (v->spl_dontcare == NULL) |
706 | { |
707 | /* |
708 | * This split was actually degenerate, so ignore it altogether |
709 | * and just split according to the next column. |
710 | */ |
711 | gistSplitByKey(r, page, itup, len, giststate, v, attno + 1); |
712 | } |
713 | else |
714 | { |
715 | /* |
716 | * Form an array of just the don't-care tuples to pass to a |
717 | * recursive invocation of this function for the next column. |
718 | */ |
719 | IndexTuple *newitup = (IndexTuple *) palloc(len * sizeof(IndexTuple)); |
720 | OffsetNumber *map = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); |
721 | int newlen = 0; |
722 | GIST_SPLITVEC backupSplit; |
723 | |
724 | for (i = 0; i < len; i++) |
725 | { |
726 | if (v->spl_dontcare[i + 1]) |
727 | { |
728 | newitup[newlen] = itup[i]; |
729 | map[newlen] = i + 1; |
730 | newlen++; |
731 | } |
732 | } |
733 | |
734 | Assert(newlen > 0); |
735 | |
736 | /* |
737 | * Make a backup copy of v->splitVector, since the recursive |
738 | * call will overwrite that with its own result. |
739 | */ |
740 | backupSplit = v->splitVector; |
741 | backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len); |
742 | memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft); |
743 | backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len); |
744 | memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright); |
745 | |
746 | /* Recursively decide how to split the don't-care tuples */ |
747 | gistSplitByKey(r, page, newitup, newlen, giststate, v, attno + 1); |
748 | |
749 | /* Merge result of subsplit with non-don't-care tuples */ |
750 | for (i = 0; i < v->splitVector.spl_nleft; i++) |
751 | backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1]; |
752 | for (i = 0; i < v->splitVector.spl_nright; i++) |
753 | backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1]; |
754 | |
755 | v->splitVector = backupSplit; |
756 | } |
757 | } |
758 | } |
759 | |
760 | /* |
761 | * If we're handling a multicolumn index, at the end of the recursion |
762 | * recompute the left and right union datums for all index columns. This |
763 | * makes sure we hand back correct union datums in all corner cases, |
764 | * including when we haven't processed all columns to start with, or when |
765 | * a secondary split moved "don't care" tuples from one side to the other |
766 | * (we really shouldn't assume that that didn't change the union datums). |
767 | * |
768 | * Note: when we're in an internal recursion (attno > 0), we do not worry |
769 | * about whether the union datums we return with are sensible, since |
770 | * calling levels won't care. Also, in a single-column index, we expect |
771 | * that PickSplit (or the special cases above) produced correct union |
772 | * datums. |
773 | */ |
774 | if (attno == 0 && giststate->nonLeafTupdesc->natts > 1) |
775 | { |
776 | v->spl_dontcare = NULL; |
777 | gistunionsubkey(giststate, itup, v); |
778 | } |
779 | } |
780 | |