| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * gistsplit.c |
| 4 | * Multi-column page splitting algorithm |
| 5 | * |
| 6 | * This file is concerned with making good page-split decisions in multi-column |
| 7 | * GiST indexes. The opclass-specific picksplit functions can only be expected |
| 8 | * to produce answers based on a single column. We first run the picksplit |
| 9 | * function for column 1; then, if there are more columns, we check if any of |
| 10 | * the tuples are "don't cares" so far as the column 1 split is concerned |
| 11 | * (that is, they could go to either side for no additional penalty). If so, |
| 12 | * we try to redistribute those tuples on the basis of the next column. |
| 13 | * Repeat till we're out of columns. |
| 14 | * |
| 15 | * gistSplitByKey() is the entry point to this file. |
| 16 | * |
| 17 | * |
| 18 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 19 | * Portions Copyright (c) 1994, Regents of the University of California |
| 20 | * |
| 21 | * IDENTIFICATION |
| 22 | * src/backend/access/gist/gistsplit.c |
| 23 | * |
| 24 | *------------------------------------------------------------------------- |
| 25 | */ |
| 26 | #include "postgres.h" |
| 27 | |
| 28 | #include "access/gist_private.h" |
| 29 | #include "utils/rel.h" |
| 30 | |
| 31 | typedef struct |
| 32 | { |
| 33 | OffsetNumber *entries; |
| 34 | int len; |
| 35 | Datum *attr; |
| 36 | bool *isnull; |
| 37 | bool *dontcare; |
| 38 | } GistSplitUnion; |
| 39 | |
| 40 | |
| 41 | /* |
| 42 | * Form unions of subkeys in itvec[] entries listed in gsvp->entries[], |
| 43 | * ignoring any tuples that are marked in gsvp->dontcare[]. Subroutine for |
| 44 | * gistunionsubkey. |
| 45 | */ |
| 46 | static void |
| 47 | gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec, |
| 48 | GistSplitUnion *gsvp) |
| 49 | { |
| 50 | IndexTuple *cleanedItVec; |
| 51 | int i, |
| 52 | cleanedLen = 0; |
| 53 | |
| 54 | cleanedItVec = (IndexTuple *) palloc(sizeof(IndexTuple) * gsvp->len); |
| 55 | |
| 56 | for (i = 0; i < gsvp->len; i++) |
| 57 | { |
| 58 | if (gsvp->dontcare && gsvp->dontcare[gsvp->entries[i]]) |
| 59 | continue; |
| 60 | |
| 61 | cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1]; |
| 62 | } |
| 63 | |
| 64 | gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen, |
| 65 | gsvp->attr, gsvp->isnull); |
| 66 | |
| 67 | pfree(cleanedItVec); |
| 68 | } |
| 69 | |
| 70 | /* |
| 71 | * Recompute unions of left- and right-side subkeys after a page split, |
| 72 | * ignoring any tuples that are marked in spl->spl_dontcare[]. |
| 73 | * |
| 74 | * Note: we always recompute union keys for all index columns. In some cases |
| 75 | * this might represent duplicate work for the leftmost column(s), but it's |
| 76 | * not safe to assume that "zero penalty to move a tuple" means "the union |
| 77 | * key doesn't change at all". Penalty functions aren't 100% accurate. |
| 78 | */ |
| 79 | static void |
| 80 | gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl) |
| 81 | { |
| 82 | GistSplitUnion gsvp; |
| 83 | |
| 84 | gsvp.dontcare = spl->spl_dontcare; |
| 85 | |
| 86 | gsvp.entries = spl->splitVector.spl_left; |
| 87 | gsvp.len = spl->splitVector.spl_nleft; |
| 88 | gsvp.attr = spl->spl_lattr; |
| 89 | gsvp.isnull = spl->spl_lisnull; |
| 90 | |
| 91 | gistunionsubkeyvec(giststate, itvec, &gsvp); |
| 92 | |
| 93 | gsvp.entries = spl->splitVector.spl_right; |
| 94 | gsvp.len = spl->splitVector.spl_nright; |
| 95 | gsvp.attr = spl->spl_rattr; |
| 96 | gsvp.isnull = spl->spl_risnull; |
| 97 | |
| 98 | gistunionsubkeyvec(giststate, itvec, &gsvp); |
| 99 | } |
| 100 | |
| 101 | /* |
| 102 | * Find tuples that are "don't cares", that is could be moved to the other |
| 103 | * side of the split with zero penalty, so far as the attno column is |
| 104 | * concerned. |
| 105 | * |
| 106 | * Don't-care tuples are marked by setting the corresponding entry in |
| 107 | * spl->spl_dontcare[] to "true". Caller must have initialized that array |
| 108 | * to zeroes. |
| 109 | * |
| 110 | * Returns number of don't-cares found. |
| 111 | */ |
| 112 | static int |
| 113 | findDontCares(Relation r, GISTSTATE *giststate, GISTENTRY *valvec, |
| 114 | GistSplitVector *spl, int attno) |
| 115 | { |
| 116 | int i; |
| 117 | GISTENTRY entry; |
| 118 | int NumDontCare = 0; |
| 119 | |
| 120 | /* |
| 121 | * First, search the left-side tuples to see if any have zero penalty to |
| 122 | * be added to the right-side union key. |
| 123 | * |
| 124 | * attno column is known all-not-null (see gistSplitByKey), so we need not |
| 125 | * check for nulls |
| 126 | */ |
| 127 | gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL, |
| 128 | (OffsetNumber) 0, false); |
| 129 | for (i = 0; i < spl->splitVector.spl_nleft; i++) |
| 130 | { |
| 131 | int j = spl->splitVector.spl_left[i]; |
| 132 | float penalty = gistpenalty(giststate, attno, &entry, false, |
| 133 | &valvec[j], false); |
| 134 | |
| 135 | if (penalty == 0.0) |
| 136 | { |
| 137 | spl->spl_dontcare[j] = true; |
| 138 | NumDontCare++; |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | /* And conversely for the right-side tuples */ |
| 143 | gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL, |
| 144 | (OffsetNumber) 0, false); |
| 145 | for (i = 0; i < spl->splitVector.spl_nright; i++) |
| 146 | { |
| 147 | int j = spl->splitVector.spl_right[i]; |
| 148 | float penalty = gistpenalty(giststate, attno, &entry, false, |
| 149 | &valvec[j], false); |
| 150 | |
| 151 | if (penalty == 0.0) |
| 152 | { |
| 153 | spl->spl_dontcare[j] = true; |
| 154 | NumDontCare++; |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | return NumDontCare; |
| 159 | } |
| 160 | |
| 161 | /* |
| 162 | * Remove tuples that are marked don't-cares from the tuple index array a[] |
| 163 | * of length *len. This is applied separately to the spl_left and spl_right |
| 164 | * arrays. |
| 165 | */ |
| 166 | static void |
| 167 | removeDontCares(OffsetNumber *a, int *len, const bool *dontcare) |
| 168 | { |
| 169 | int origlen, |
| 170 | newlen, |
| 171 | i; |
| 172 | OffsetNumber *curwpos; |
| 173 | |
| 174 | origlen = newlen = *len; |
| 175 | curwpos = a; |
| 176 | for (i = 0; i < origlen; i++) |
| 177 | { |
| 178 | OffsetNumber ai = a[i]; |
| 179 | |
| 180 | if (dontcare[ai] == false) |
| 181 | { |
| 182 | /* re-emit item into a[] */ |
| 183 | *curwpos = ai; |
| 184 | curwpos++; |
| 185 | } |
| 186 | else |
| 187 | newlen--; |
| 188 | } |
| 189 | |
| 190 | *len = newlen; |
| 191 | } |
| 192 | |
| 193 | /* |
| 194 | * Place a single don't-care tuple into either the left or right side of the |
| 195 | * split, according to which has least penalty for merging the tuple into |
| 196 | * the previously-computed union keys. We need consider only columns starting |
| 197 | * at attno. |
| 198 | */ |
| 199 | static void |
| 200 | placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v, |
| 201 | IndexTuple itup, OffsetNumber off, int attno) |
| 202 | { |
| 203 | GISTENTRY identry[INDEX_MAX_KEYS]; |
| 204 | bool isnull[INDEX_MAX_KEYS]; |
| 205 | bool toLeft = true; |
| 206 | |
| 207 | gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0, |
| 208 | identry, isnull); |
| 209 | |
| 210 | for (; attno < giststate->nonLeafTupdesc->natts; attno++) |
| 211 | { |
| 212 | float lpenalty, |
| 213 | rpenalty; |
| 214 | GISTENTRY entry; |
| 215 | |
| 216 | gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, false); |
| 217 | lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno], |
| 218 | identry + attno, isnull[attno]); |
| 219 | gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, false); |
| 220 | rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno], |
| 221 | identry + attno, isnull[attno]); |
| 222 | |
| 223 | if (lpenalty != rpenalty) |
| 224 | { |
| 225 | if (lpenalty > rpenalty) |
| 226 | toLeft = false; |
| 227 | break; |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | if (toLeft) |
| 232 | v->splitVector.spl_left[v->splitVector.spl_nleft++] = off; |
| 233 | else |
| 234 | v->splitVector.spl_right[v->splitVector.spl_nright++] = off; |
| 235 | } |
| 236 | |
| 237 | #define SWAPVAR( s, d, t ) \ |
| 238 | do { \ |
| 239 | (t) = (s); \ |
| 240 | (s) = (d); \ |
| 241 | (d) = (t); \ |
| 242 | } while(0) |
| 243 | |
| 244 | /* |
| 245 | * Clean up when we did a secondary split but the user-defined PickSplit |
| 246 | * method didn't support it (leaving spl_ldatum_exists or spl_rdatum_exists |
| 247 | * true). |
| 248 | * |
| 249 | * We consider whether to swap the left and right outputs of the secondary |
| 250 | * split; this can be worthwhile if the penalty for merging those tuples into |
| 251 | * the previously chosen sets is less that way. |
| 252 | * |
| 253 | * In any case we must update the union datums for the current column by |
| 254 | * adding in the previous union keys (oldL/oldR), since the user-defined |
| 255 | * PickSplit method didn't do so. |
| 256 | */ |
| 257 | static void |
| 258 | supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno, |
| 259 | GIST_SPLITVEC *sv, Datum oldL, Datum oldR) |
| 260 | { |
| 261 | bool leaveOnLeft = true, |
| 262 | tmpBool; |
| 263 | GISTENTRY entryL, |
| 264 | entryR, |
| 265 | entrySL, |
| 266 | entrySR; |
| 267 | |
| 268 | gistentryinit(entryL, oldL, r, NULL, 0, false); |
| 269 | gistentryinit(entryR, oldR, r, NULL, 0, false); |
| 270 | gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false); |
| 271 | gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false); |
| 272 | |
| 273 | if (sv->spl_ldatum_exists && sv->spl_rdatum_exists) |
| 274 | { |
| 275 | float penalty1, |
| 276 | penalty2; |
| 277 | |
| 278 | penalty1 = gistpenalty(giststate, attno, &entryL, false, &entrySL, false) + |
| 279 | gistpenalty(giststate, attno, &entryR, false, &entrySR, false); |
| 280 | penalty2 = gistpenalty(giststate, attno, &entryL, false, &entrySR, false) + |
| 281 | gistpenalty(giststate, attno, &entryR, false, &entrySL, false); |
| 282 | |
| 283 | if (penalty1 > penalty2) |
| 284 | leaveOnLeft = false; |
| 285 | } |
| 286 | else |
| 287 | { |
| 288 | GISTENTRY *entry1 = (sv->spl_ldatum_exists) ? &entryL : &entryR; |
| 289 | float penalty1, |
| 290 | penalty2; |
| 291 | |
| 292 | /* |
| 293 | * There is only one previously defined union, so we just choose swap |
| 294 | * or not by lowest penalty for that side. We can only get here if a |
| 295 | * secondary split happened to have all NULLs in its column in the |
| 296 | * tuples that the outer recursion level had assigned to one side. |
| 297 | * (Note that the null checks in gistSplitByKey don't prevent the |
| 298 | * case, because they'll only be checking tuples that were considered |
| 299 | * don't-cares at the outer recursion level, not the tuples that went |
| 300 | * into determining the passed-down left and right union keys.) |
| 301 | */ |
| 302 | penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false); |
| 303 | penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false); |
| 304 | |
| 305 | if (penalty1 < penalty2) |
| 306 | leaveOnLeft = (sv->spl_ldatum_exists) ? true : false; |
| 307 | else |
| 308 | leaveOnLeft = (sv->spl_rdatum_exists) ? true : false; |
| 309 | } |
| 310 | |
| 311 | if (leaveOnLeft == false) |
| 312 | { |
| 313 | /* |
| 314 | * swap left and right |
| 315 | */ |
| 316 | OffsetNumber *off, |
| 317 | noff; |
| 318 | Datum datum; |
| 319 | |
| 320 | SWAPVAR(sv->spl_left, sv->spl_right, off); |
| 321 | SWAPVAR(sv->spl_nleft, sv->spl_nright, noff); |
| 322 | SWAPVAR(sv->spl_ldatum, sv->spl_rdatum, datum); |
| 323 | gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, false); |
| 324 | gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, false); |
| 325 | } |
| 326 | |
| 327 | if (sv->spl_ldatum_exists) |
| 328 | gistMakeUnionKey(giststate, attno, &entryL, false, &entrySL, false, |
| 329 | &sv->spl_ldatum, &tmpBool); |
| 330 | |
| 331 | if (sv->spl_rdatum_exists) |
| 332 | gistMakeUnionKey(giststate, attno, &entryR, false, &entrySR, false, |
| 333 | &sv->spl_rdatum, &tmpBool); |
| 334 | |
| 335 | sv->spl_ldatum_exists = sv->spl_rdatum_exists = false; |
| 336 | } |
| 337 | |
| 338 | /* |
| 339 | * Trivial picksplit implementation. Function called only |
| 340 | * if user-defined picksplit puts all keys on the same side of the split. |
| 341 | * That is a bug of user-defined picksplit but we don't want to fail. |
| 342 | */ |
| 343 | static void |
| 344 | genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC *v, int attno) |
| 345 | { |
| 346 | OffsetNumber i, |
| 347 | maxoff; |
| 348 | int nbytes; |
| 349 | GistEntryVector *evec; |
| 350 | |
| 351 | maxoff = entryvec->n - 1; |
| 352 | |
| 353 | nbytes = (maxoff + 2) * sizeof(OffsetNumber); |
| 354 | |
| 355 | v->spl_left = (OffsetNumber *) palloc(nbytes); |
| 356 | v->spl_right = (OffsetNumber *) palloc(nbytes); |
| 357 | v->spl_nleft = v->spl_nright = 0; |
| 358 | |
| 359 | for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) |
| 360 | { |
| 361 | if (i <= (maxoff - FirstOffsetNumber + 1) / 2) |
| 362 | { |
| 363 | v->spl_left[v->spl_nleft] = i; |
| 364 | v->spl_nleft++; |
| 365 | } |
| 366 | else |
| 367 | { |
| 368 | v->spl_right[v->spl_nright] = i; |
| 369 | v->spl_nright++; |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | /* |
| 374 | * Form union datums for each side |
| 375 | */ |
| 376 | evec = palloc(sizeof(GISTENTRY) * entryvec->n + GEVHDRSZ); |
| 377 | |
| 378 | evec->n = v->spl_nleft; |
| 379 | memcpy(evec->vector, entryvec->vector + FirstOffsetNumber, |
| 380 | sizeof(GISTENTRY) * evec->n); |
| 381 | v->spl_ldatum = FunctionCall2Coll(&giststate->unionFn[attno], |
| 382 | giststate->supportCollation[attno], |
| 383 | PointerGetDatum(evec), |
| 384 | PointerGetDatum(&nbytes)); |
| 385 | |
| 386 | evec->n = v->spl_nright; |
| 387 | memcpy(evec->vector, entryvec->vector + FirstOffsetNumber + v->spl_nleft, |
| 388 | sizeof(GISTENTRY) * evec->n); |
| 389 | v->spl_rdatum = FunctionCall2Coll(&giststate->unionFn[attno], |
| 390 | giststate->supportCollation[attno], |
| 391 | PointerGetDatum(evec), |
| 392 | PointerGetDatum(&nbytes)); |
| 393 | } |
| 394 | |
| 395 | /* |
| 396 | * Calls user picksplit method for attno column to split tuples into |
| 397 | * two vectors. |
| 398 | * |
| 399 | * Returns false if split is complete (there are no more index columns, or |
| 400 | * there is no need to consider them because split is optimal already). |
| 401 | * |
| 402 | * Returns true and v->spl_dontcare = NULL if the picksplit result is |
| 403 | * degenerate (all tuples seem to be don't-cares), so we should just |
| 404 | * disregard this column and split on the next column(s) instead. |
| 405 | * |
| 406 | * Returns true and v->spl_dontcare != NULL if there are don't-care tuples |
| 407 | * that could be relocated based on the next column(s). The don't-care |
| 408 | * tuples have been removed from the split and must be reinserted by caller. |
| 409 | * There is at least one non-don't-care tuple on each side of the split, |
| 410 | * and union keys for all columns are updated to include just those tuples. |
| 411 | * |
| 412 | * A true result implies there is at least one more index column. |
| 413 | */ |
| 414 | static bool |
| 415 | gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v, |
| 416 | IndexTuple *itup, int len, GISTSTATE *giststate) |
| 417 | { |
| 418 | GIST_SPLITVEC *sv = &v->splitVector; |
| 419 | |
| 420 | /* |
| 421 | * Prepare spl_ldatum/spl_rdatum/spl_ldatum_exists/spl_rdatum_exists in |
| 422 | * case we are doing a secondary split (see comments in gist.h). |
| 423 | */ |
| 424 | sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true; |
| 425 | sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true; |
| 426 | sv->spl_ldatum = v->spl_lattr[attno]; |
| 427 | sv->spl_rdatum = v->spl_rattr[attno]; |
| 428 | |
| 429 | /* |
| 430 | * Let the opclass-specific PickSplit method do its thing. Note that at |
| 431 | * this point we know there are no null keys in the entryvec. |
| 432 | */ |
| 433 | FunctionCall2Coll(&giststate->picksplitFn[attno], |
| 434 | giststate->supportCollation[attno], |
| 435 | PointerGetDatum(entryvec), |
| 436 | PointerGetDatum(sv)); |
| 437 | |
| 438 | if (sv->spl_nleft == 0 || sv->spl_nright == 0) |
| 439 | { |
| 440 | /* |
| 441 | * User-defined picksplit failed to create an actual split, ie it put |
| 442 | * everything on the same side. Complain but cope. |
| 443 | */ |
| 444 | ereport(DEBUG1, |
| 445 | (errcode(ERRCODE_INTERNAL_ERROR), |
| 446 | errmsg("picksplit method for column %d of index \"%s\" failed" , |
| 447 | attno + 1, RelationGetRelationName(r)), |
| 448 | errhint("The index is not optimal. To optimize it, contact a developer, or try to use the column as the second one in the CREATE INDEX command." ))); |
| 449 | |
| 450 | /* |
| 451 | * Reinit GIST_SPLITVEC. Although these fields are not used by |
| 452 | * genericPickSplit(), set them up for further processing |
| 453 | */ |
| 454 | sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true; |
| 455 | sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true; |
| 456 | sv->spl_ldatum = v->spl_lattr[attno]; |
| 457 | sv->spl_rdatum = v->spl_rattr[attno]; |
| 458 | |
| 459 | /* Do a generic split */ |
| 460 | genericPickSplit(giststate, entryvec, sv, attno); |
| 461 | } |
| 462 | else |
| 463 | { |
| 464 | /* hack for compatibility with old picksplit API */ |
| 465 | if (sv->spl_left[sv->spl_nleft - 1] == InvalidOffsetNumber) |
| 466 | sv->spl_left[sv->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1); |
| 467 | if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber) |
| 468 | sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1); |
| 469 | } |
| 470 | |
| 471 | /* Clean up if PickSplit didn't take care of a secondary split */ |
| 472 | if (sv->spl_ldatum_exists || sv->spl_rdatum_exists) |
| 473 | supportSecondarySplit(r, giststate, attno, sv, |
| 474 | v->spl_lattr[attno], v->spl_rattr[attno]); |
| 475 | |
| 476 | /* emit union datums computed by PickSplit back to v arrays */ |
| 477 | v->spl_lattr[attno] = sv->spl_ldatum; |
| 478 | v->spl_rattr[attno] = sv->spl_rdatum; |
| 479 | v->spl_lisnull[attno] = false; |
| 480 | v->spl_risnull[attno] = false; |
| 481 | |
| 482 | /* |
| 483 | * If index columns remain, then consider whether we can improve the split |
| 484 | * by using them. |
| 485 | */ |
| 486 | v->spl_dontcare = NULL; |
| 487 | |
| 488 | if (attno + 1 < giststate->nonLeafTupdesc->natts) |
| 489 | { |
| 490 | int NumDontCare; |
| 491 | |
| 492 | /* |
| 493 | * Make a quick check to see if left and right union keys are equal; |
| 494 | * if so, the split is certainly degenerate, so tell caller to |
| 495 | * re-split with the next column. |
| 496 | */ |
| 497 | if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum)) |
| 498 | return true; |
| 499 | |
| 500 | /* |
| 501 | * Locate don't-care tuples, if any. If there are none, the split is |
| 502 | * optimal, so just fall out and return false. |
| 503 | */ |
| 504 | v->spl_dontcare = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1)); |
| 505 | |
| 506 | NumDontCare = findDontCares(r, giststate, entryvec->vector, v, attno); |
| 507 | |
| 508 | if (NumDontCare > 0) |
| 509 | { |
| 510 | /* |
| 511 | * Remove don't-cares from spl_left[] and spl_right[]. |
| 512 | */ |
| 513 | removeDontCares(sv->spl_left, &sv->spl_nleft, v->spl_dontcare); |
| 514 | removeDontCares(sv->spl_right, &sv->spl_nright, v->spl_dontcare); |
| 515 | |
| 516 | /* |
| 517 | * If all tuples on either side were don't-cares, the split is |
| 518 | * degenerate, and we're best off to ignore it and split on the |
| 519 | * next column. (We used to try to press on with a secondary |
| 520 | * split by forcing a random tuple on each side to be treated as |
| 521 | * non-don't-care, but it seems unlikely that that technique |
| 522 | * really gives a better result. Note that we don't want to try a |
| 523 | * secondary split with empty left or right primary split sides, |
| 524 | * because then there is no union key on that side for the |
| 525 | * PickSplit function to try to expand, so it can have no good |
| 526 | * figure of merit for what it's doing. Also note that this check |
| 527 | * ensures we can't produce a bogus one-side-only split in the |
| 528 | * NumDontCare == 1 special case below.) |
| 529 | */ |
| 530 | if (sv->spl_nleft == 0 || sv->spl_nright == 0) |
| 531 | { |
| 532 | v->spl_dontcare = NULL; |
| 533 | return true; |
| 534 | } |
| 535 | |
| 536 | /* |
| 537 | * Recompute union keys, considering only non-don't-care tuples. |
| 538 | * NOTE: this will set union keys for remaining index columns, |
| 539 | * which will cause later calls of gistUserPicksplit to pass those |
| 540 | * values down to user-defined PickSplit methods with |
| 541 | * spl_ldatum_exists/spl_rdatum_exists set true. |
| 542 | */ |
| 543 | gistunionsubkey(giststate, itup, v); |
| 544 | |
| 545 | if (NumDontCare == 1) |
| 546 | { |
| 547 | /* |
| 548 | * If there's only one don't-care tuple then we can't do a |
| 549 | * PickSplit on it, so just choose whether to send it left or |
| 550 | * right by comparing penalties. We needed the |
| 551 | * gistunionsubkey step anyway so that we have appropriate |
| 552 | * union keys for figuring the penalties. |
| 553 | */ |
| 554 | OffsetNumber toMove; |
| 555 | |
| 556 | /* find it ... */ |
| 557 | for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++) |
| 558 | { |
| 559 | if (v->spl_dontcare[toMove]) |
| 560 | break; |
| 561 | } |
| 562 | Assert(toMove < entryvec->n); |
| 563 | |
| 564 | /* ... and assign it to cheaper side */ |
| 565 | placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1); |
| 566 | |
| 567 | /* |
| 568 | * At this point the union keys are wrong, but we don't care |
| 569 | * because we're done splitting. The outermost recursion |
| 570 | * level of gistSplitByKey will fix things before returning. |
| 571 | */ |
| 572 | } |
| 573 | else |
| 574 | return true; |
| 575 | } |
| 576 | } |
| 577 | |
| 578 | return false; |
| 579 | } |
| 580 | |
| 581 | /* |
| 582 | * simply split page in half |
| 583 | */ |
| 584 | static void |
| 585 | gistSplitHalf(GIST_SPLITVEC *v, int len) |
| 586 | { |
| 587 | int i; |
| 588 | |
| 589 | v->spl_nright = v->spl_nleft = 0; |
| 590 | v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); |
| 591 | v->spl_right = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); |
| 592 | for (i = 1; i <= len; i++) |
| 593 | if (i < len / 2) |
| 594 | v->spl_right[v->spl_nright++] = i; |
| 595 | else |
| 596 | v->spl_left[v->spl_nleft++] = i; |
| 597 | |
| 598 | /* we need not compute union keys, caller took care of it */ |
| 599 | } |
| 600 | |
| 601 | /* |
| 602 | * gistSplitByKey: main entry point for page-splitting algorithm |
| 603 | * |
| 604 | * r: index relation |
| 605 | * page: page being split |
| 606 | * itup: array of IndexTuples to be processed |
| 607 | * len: number of IndexTuples to be processed (must be at least 2) |
| 608 | * giststate: additional info about index |
| 609 | * v: working state and output area |
| 610 | * attno: column we are working on (zero-based index) |
| 611 | * |
| 612 | * Outside caller must initialize v->spl_lisnull and v->spl_risnull arrays |
| 613 | * to all-true. On return, spl_left/spl_nleft contain indexes of tuples |
| 614 | * to go left, spl_right/spl_nright contain indexes of tuples to go right, |
| 615 | * spl_lattr/spl_lisnull contain left-side union key values, and |
| 616 | * spl_rattr/spl_risnull contain right-side union key values. Other fields |
| 617 | * in this struct are workspace for this file. |
| 618 | * |
| 619 | * Outside caller must pass zero for attno. The function may internally |
| 620 | * recurse to the next column by passing attno+1. |
| 621 | */ |
| 622 | void |
| 623 | gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, |
| 624 | GISTSTATE *giststate, GistSplitVector *v, int attno) |
| 625 | { |
| 626 | GistEntryVector *entryvec; |
| 627 | OffsetNumber *offNullTuples; |
| 628 | int nOffNullTuples = 0; |
| 629 | int i; |
| 630 | |
| 631 | /* generate the item array, and identify tuples with null keys */ |
| 632 | /* note that entryvec->vector[0] goes unused in this code */ |
| 633 | entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY)); |
| 634 | entryvec->n = len + 1; |
| 635 | offNullTuples = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); |
| 636 | |
| 637 | for (i = 1; i <= len; i++) |
| 638 | { |
| 639 | Datum datum; |
| 640 | bool IsNull; |
| 641 | |
| 642 | datum = index_getattr(itup[i - 1], attno + 1, giststate->leafTupdesc, |
| 643 | &IsNull); |
| 644 | gistdentryinit(giststate, attno, &(entryvec->vector[i]), |
| 645 | datum, r, page, i, |
| 646 | false, IsNull); |
| 647 | if (IsNull) |
| 648 | offNullTuples[nOffNullTuples++] = i; |
| 649 | } |
| 650 | |
| 651 | if (nOffNullTuples == len) |
| 652 | { |
| 653 | /* |
| 654 | * Corner case: All keys in attno column are null, so just transfer |
| 655 | * our attention to the next column. If there's no next column, just |
| 656 | * split page in half. |
| 657 | */ |
| 658 | v->spl_risnull[attno] = v->spl_lisnull[attno] = true; |
| 659 | |
| 660 | if (attno + 1 < giststate->nonLeafTupdesc->natts) |
| 661 | gistSplitByKey(r, page, itup, len, giststate, v, attno + 1); |
| 662 | else |
| 663 | gistSplitHalf(&v->splitVector, len); |
| 664 | } |
| 665 | else if (nOffNullTuples > 0) |
| 666 | { |
| 667 | int j = 0; |
| 668 | |
| 669 | /* |
| 670 | * We don't want to mix NULL and not-NULL keys on one page, so split |
| 671 | * nulls to right page and not-nulls to left. |
| 672 | */ |
| 673 | v->splitVector.spl_right = offNullTuples; |
| 674 | v->splitVector.spl_nright = nOffNullTuples; |
| 675 | v->spl_risnull[attno] = true; |
| 676 | |
| 677 | v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); |
| 678 | v->splitVector.spl_nleft = 0; |
| 679 | for (i = 1; i <= len; i++) |
| 680 | if (j < v->splitVector.spl_nright && offNullTuples[j] == i) |
| 681 | j++; |
| 682 | else |
| 683 | v->splitVector.spl_left[v->splitVector.spl_nleft++] = i; |
| 684 | |
| 685 | /* Compute union keys, unless outer recursion level will handle it */ |
| 686 | if (attno == 0 && giststate->nonLeafTupdesc->natts == 1) |
| 687 | { |
| 688 | v->spl_dontcare = NULL; |
| 689 | gistunionsubkey(giststate, itup, v); |
| 690 | } |
| 691 | } |
| 692 | else |
| 693 | { |
| 694 | /* |
| 695 | * All keys are not-null, so apply user-defined PickSplit method |
| 696 | */ |
| 697 | if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate)) |
| 698 | { |
| 699 | /* |
| 700 | * Splitting on attno column is not optimal, so consider |
| 701 | * redistributing don't-care tuples according to the next column |
| 702 | */ |
| 703 | Assert(attno + 1 < giststate->nonLeafTupdesc->natts); |
| 704 | |
| 705 | if (v->spl_dontcare == NULL) |
| 706 | { |
| 707 | /* |
| 708 | * This split was actually degenerate, so ignore it altogether |
| 709 | * and just split according to the next column. |
| 710 | */ |
| 711 | gistSplitByKey(r, page, itup, len, giststate, v, attno + 1); |
| 712 | } |
| 713 | else |
| 714 | { |
| 715 | /* |
| 716 | * Form an array of just the don't-care tuples to pass to a |
| 717 | * recursive invocation of this function for the next column. |
| 718 | */ |
| 719 | IndexTuple *newitup = (IndexTuple *) palloc(len * sizeof(IndexTuple)); |
| 720 | OffsetNumber *map = (OffsetNumber *) palloc(len * sizeof(OffsetNumber)); |
| 721 | int newlen = 0; |
| 722 | GIST_SPLITVEC backupSplit; |
| 723 | |
| 724 | for (i = 0; i < len; i++) |
| 725 | { |
| 726 | if (v->spl_dontcare[i + 1]) |
| 727 | { |
| 728 | newitup[newlen] = itup[i]; |
| 729 | map[newlen] = i + 1; |
| 730 | newlen++; |
| 731 | } |
| 732 | } |
| 733 | |
| 734 | Assert(newlen > 0); |
| 735 | |
| 736 | /* |
| 737 | * Make a backup copy of v->splitVector, since the recursive |
| 738 | * call will overwrite that with its own result. |
| 739 | */ |
| 740 | backupSplit = v->splitVector; |
| 741 | backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len); |
| 742 | memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft); |
| 743 | backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len); |
| 744 | memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright); |
| 745 | |
| 746 | /* Recursively decide how to split the don't-care tuples */ |
| 747 | gistSplitByKey(r, page, newitup, newlen, giststate, v, attno + 1); |
| 748 | |
| 749 | /* Merge result of subsplit with non-don't-care tuples */ |
| 750 | for (i = 0; i < v->splitVector.spl_nleft; i++) |
| 751 | backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1]; |
| 752 | for (i = 0; i < v->splitVector.spl_nright; i++) |
| 753 | backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1]; |
| 754 | |
| 755 | v->splitVector = backupSplit; |
| 756 | } |
| 757 | } |
| 758 | } |
| 759 | |
| 760 | /* |
| 761 | * If we're handling a multicolumn index, at the end of the recursion |
| 762 | * recompute the left and right union datums for all index columns. This |
| 763 | * makes sure we hand back correct union datums in all corner cases, |
| 764 | * including when we haven't processed all columns to start with, or when |
| 765 | * a secondary split moved "don't care" tuples from one side to the other |
| 766 | * (we really shouldn't assume that that didn't change the union datums). |
| 767 | * |
| 768 | * Note: when we're in an internal recursion (attno > 0), we do not worry |
| 769 | * about whether the union datums we return with are sensible, since |
| 770 | * calling levels won't care. Also, in a single-column index, we expect |
| 771 | * that PickSplit (or the special cases above) produced correct union |
| 772 | * datums. |
| 773 | */ |
| 774 | if (attno == 0 && giststate->nonLeafTupdesc->natts > 1) |
| 775 | { |
| 776 | v->spl_dontcare = NULL; |
| 777 | gistunionsubkey(giststate, itup, v); |
| 778 | } |
| 779 | } |
| 780 | |