| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * indextuple.c |
| 4 | * This file contains index tuple accessor and mutator routines, |
| 5 | * as well as various tuple utilities. |
| 6 | * |
| 7 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 8 | * Portions Copyright (c) 1994, Regents of the University of California |
| 9 | * |
| 10 | * |
| 11 | * IDENTIFICATION |
| 12 | * src/backend/access/common/indextuple.c |
| 13 | * |
| 14 | *------------------------------------------------------------------------- |
| 15 | */ |
| 16 | |
| 17 | #include "postgres.h" |
| 18 | |
| 19 | #include "access/htup_details.h" |
| 20 | #include "access/itup.h" |
| 21 | #include "access/tuptoaster.h" |
| 22 | |
| 23 | |
| 24 | /* ---------------------------------------------------------------- |
| 25 | * index_ tuple interface routines |
| 26 | * ---------------------------------------------------------------- |
| 27 | */ |
| 28 | |
| 29 | /* ---------------- |
| 30 | * index_form_tuple |
| 31 | * |
| 32 | * This shouldn't leak any memory; otherwise, callers such as |
| 33 | * tuplesort_putindextuplevalues() will be very unhappy. |
| 34 | * |
| 35 | * This shouldn't perform external table access provided caller |
| 36 | * does not pass values that are stored EXTERNAL. |
| 37 | * ---------------- |
| 38 | */ |
| 39 | IndexTuple |
| 40 | index_form_tuple(TupleDesc tupleDescriptor, |
| 41 | Datum *values, |
| 42 | bool *isnull) |
| 43 | { |
| 44 | char *tp; /* tuple pointer */ |
| 45 | IndexTuple tuple; /* return tuple */ |
| 46 | Size size, |
| 47 | data_size, |
| 48 | hoff; |
| 49 | int i; |
| 50 | unsigned short infomask = 0; |
| 51 | bool hasnull = false; |
| 52 | uint16 tupmask = 0; |
| 53 | int numberOfAttributes = tupleDescriptor->natts; |
| 54 | |
| 55 | #ifdef TOAST_INDEX_HACK |
| 56 | Datum untoasted_values[INDEX_MAX_KEYS]; |
| 57 | bool untoasted_free[INDEX_MAX_KEYS]; |
| 58 | #endif |
| 59 | |
| 60 | if (numberOfAttributes > INDEX_MAX_KEYS) |
| 61 | ereport(ERROR, |
| 62 | (errcode(ERRCODE_TOO_MANY_COLUMNS), |
| 63 | errmsg("number of index columns (%d) exceeds limit (%d)" , |
| 64 | numberOfAttributes, INDEX_MAX_KEYS))); |
| 65 | |
| 66 | #ifdef TOAST_INDEX_HACK |
| 67 | for (i = 0; i < numberOfAttributes; i++) |
| 68 | { |
| 69 | Form_pg_attribute att = TupleDescAttr(tupleDescriptor, i); |
| 70 | |
| 71 | untoasted_values[i] = values[i]; |
| 72 | untoasted_free[i] = false; |
| 73 | |
| 74 | /* Do nothing if value is NULL or not of varlena type */ |
| 75 | if (isnull[i] || att->attlen != -1) |
| 76 | continue; |
| 77 | |
| 78 | /* |
| 79 | * If value is stored EXTERNAL, must fetch it so we are not depending |
| 80 | * on outside storage. This should be improved someday. |
| 81 | */ |
| 82 | if (VARATT_IS_EXTERNAL(DatumGetPointer(values[i]))) |
| 83 | { |
| 84 | untoasted_values[i] = |
| 85 | PointerGetDatum(heap_tuple_fetch_attr((struct varlena *) |
| 86 | DatumGetPointer(values[i]))); |
| 87 | untoasted_free[i] = true; |
| 88 | } |
| 89 | |
| 90 | /* |
| 91 | * If value is above size target, and is of a compressible datatype, |
| 92 | * try to compress it in-line. |
| 93 | */ |
| 94 | if (!VARATT_IS_EXTENDED(DatumGetPointer(untoasted_values[i])) && |
| 95 | VARSIZE(DatumGetPointer(untoasted_values[i])) > TOAST_INDEX_TARGET && |
| 96 | (att->attstorage == 'x' || att->attstorage == 'm')) |
| 97 | { |
| 98 | Datum cvalue = toast_compress_datum(untoasted_values[i]); |
| 99 | |
| 100 | if (DatumGetPointer(cvalue) != NULL) |
| 101 | { |
| 102 | /* successful compression */ |
| 103 | if (untoasted_free[i]) |
| 104 | pfree(DatumGetPointer(untoasted_values[i])); |
| 105 | untoasted_values[i] = cvalue; |
| 106 | untoasted_free[i] = true; |
| 107 | } |
| 108 | } |
| 109 | } |
| 110 | #endif |
| 111 | |
| 112 | for (i = 0; i < numberOfAttributes; i++) |
| 113 | { |
| 114 | if (isnull[i]) |
| 115 | { |
| 116 | hasnull = true; |
| 117 | break; |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | if (hasnull) |
| 122 | infomask |= INDEX_NULL_MASK; |
| 123 | |
| 124 | hoff = IndexInfoFindDataOffset(infomask); |
| 125 | #ifdef TOAST_INDEX_HACK |
| 126 | data_size = heap_compute_data_size(tupleDescriptor, |
| 127 | untoasted_values, isnull); |
| 128 | #else |
| 129 | data_size = heap_compute_data_size(tupleDescriptor, |
| 130 | values, isnull); |
| 131 | #endif |
| 132 | size = hoff + data_size; |
| 133 | size = MAXALIGN(size); /* be conservative */ |
| 134 | |
| 135 | tp = (char *) palloc0(size); |
| 136 | tuple = (IndexTuple) tp; |
| 137 | |
| 138 | heap_fill_tuple(tupleDescriptor, |
| 139 | #ifdef TOAST_INDEX_HACK |
| 140 | untoasted_values, |
| 141 | #else |
| 142 | values, |
| 143 | #endif |
| 144 | isnull, |
| 145 | (char *) tp + hoff, |
| 146 | data_size, |
| 147 | &tupmask, |
| 148 | (hasnull ? (bits8 *) tp + sizeof(IndexTupleData) : NULL)); |
| 149 | |
| 150 | #ifdef TOAST_INDEX_HACK |
| 151 | for (i = 0; i < numberOfAttributes; i++) |
| 152 | { |
| 153 | if (untoasted_free[i]) |
| 154 | pfree(DatumGetPointer(untoasted_values[i])); |
| 155 | } |
| 156 | #endif |
| 157 | |
| 158 | /* |
| 159 | * We do this because heap_fill_tuple wants to initialize a "tupmask" |
| 160 | * which is used for HeapTuples, but we want an indextuple infomask. The |
| 161 | * only relevant info is the "has variable attributes" field. We have |
| 162 | * already set the hasnull bit above. |
| 163 | */ |
| 164 | if (tupmask & HEAP_HASVARWIDTH) |
| 165 | infomask |= INDEX_VAR_MASK; |
| 166 | |
| 167 | /* Also assert we got rid of external attributes */ |
| 168 | #ifdef TOAST_INDEX_HACK |
| 169 | Assert((tupmask & HEAP_HASEXTERNAL) == 0); |
| 170 | #endif |
| 171 | |
| 172 | /* |
| 173 | * Here we make sure that the size will fit in the field reserved for it |
| 174 | * in t_info. |
| 175 | */ |
| 176 | if ((size & INDEX_SIZE_MASK) != size) |
| 177 | ereport(ERROR, |
| 178 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| 179 | errmsg("index row requires %zu bytes, maximum size is %zu" , |
| 180 | size, (Size) INDEX_SIZE_MASK))); |
| 181 | |
| 182 | infomask |= size; |
| 183 | |
| 184 | /* |
| 185 | * initialize metadata |
| 186 | */ |
| 187 | tuple->t_info = infomask; |
| 188 | return tuple; |
| 189 | } |
| 190 | |
| 191 | /* ---------------- |
| 192 | * nocache_index_getattr |
| 193 | * |
| 194 | * This gets called from index_getattr() macro, and only in cases |
| 195 | * where we can't use cacheoffset and the value is not null. |
| 196 | * |
| 197 | * This caches attribute offsets in the attribute descriptor. |
| 198 | * |
| 199 | * An alternative way to speed things up would be to cache offsets |
| 200 | * with the tuple, but that seems more difficult unless you take |
| 201 | * the storage hit of actually putting those offsets into the |
| 202 | * tuple you send to disk. Yuck. |
| 203 | * |
| 204 | * This scheme will be slightly slower than that, but should |
| 205 | * perform well for queries which hit large #'s of tuples. After |
| 206 | * you cache the offsets once, examining all the other tuples using |
| 207 | * the same attribute descriptor will go much quicker. -cim 5/4/91 |
| 208 | * ---------------- |
| 209 | */ |
| 210 | Datum |
| 211 | nocache_index_getattr(IndexTuple tup, |
| 212 | int attnum, |
| 213 | TupleDesc tupleDesc) |
| 214 | { |
| 215 | char *tp; /* ptr to data part of tuple */ |
| 216 | bits8 *bp = NULL; /* ptr to null bitmap in tuple */ |
| 217 | bool slow = false; /* do we have to walk attrs? */ |
| 218 | int data_off; /* tuple data offset */ |
| 219 | int off; /* current offset within data */ |
| 220 | |
| 221 | /* ---------------- |
| 222 | * Three cases: |
| 223 | * |
| 224 | * 1: No nulls and no variable-width attributes. |
| 225 | * 2: Has a null or a var-width AFTER att. |
| 226 | * 3: Has nulls or var-widths BEFORE att. |
| 227 | * ---------------- |
| 228 | */ |
| 229 | |
| 230 | data_off = IndexInfoFindDataOffset(tup->t_info); |
| 231 | |
| 232 | attnum--; |
| 233 | |
| 234 | if (IndexTupleHasNulls(tup)) |
| 235 | { |
| 236 | /* |
| 237 | * there's a null somewhere in the tuple |
| 238 | * |
| 239 | * check to see if desired att is null |
| 240 | */ |
| 241 | |
| 242 | /* XXX "knows" t_bits are just after fixed tuple header! */ |
| 243 | bp = (bits8 *) ((char *) tup + sizeof(IndexTupleData)); |
| 244 | |
| 245 | /* |
| 246 | * Now check to see if any preceding bits are null... |
| 247 | */ |
| 248 | { |
| 249 | int byte = attnum >> 3; |
| 250 | int finalbit = attnum & 0x07; |
| 251 | |
| 252 | /* check for nulls "before" final bit of last byte */ |
| 253 | if ((~bp[byte]) & ((1 << finalbit) - 1)) |
| 254 | slow = true; |
| 255 | else |
| 256 | { |
| 257 | /* check for nulls in any "earlier" bytes */ |
| 258 | int i; |
| 259 | |
| 260 | for (i = 0; i < byte; i++) |
| 261 | { |
| 262 | if (bp[i] != 0xFF) |
| 263 | { |
| 264 | slow = true; |
| 265 | break; |
| 266 | } |
| 267 | } |
| 268 | } |
| 269 | } |
| 270 | } |
| 271 | |
| 272 | tp = (char *) tup + data_off; |
| 273 | |
| 274 | if (!slow) |
| 275 | { |
| 276 | Form_pg_attribute att; |
| 277 | |
| 278 | /* |
| 279 | * If we get here, there are no nulls up to and including the target |
| 280 | * attribute. If we have a cached offset, we can use it. |
| 281 | */ |
| 282 | att = TupleDescAttr(tupleDesc, attnum); |
| 283 | if (att->attcacheoff >= 0) |
| 284 | return fetchatt(att, tp + att->attcacheoff); |
| 285 | |
| 286 | /* |
| 287 | * Otherwise, check for non-fixed-length attrs up to and including |
| 288 | * target. If there aren't any, it's safe to cheaply initialize the |
| 289 | * cached offsets for these attrs. |
| 290 | */ |
| 291 | if (IndexTupleHasVarwidths(tup)) |
| 292 | { |
| 293 | int j; |
| 294 | |
| 295 | for (j = 0; j <= attnum; j++) |
| 296 | { |
| 297 | if (TupleDescAttr(tupleDesc, j)->attlen <= 0) |
| 298 | { |
| 299 | slow = true; |
| 300 | break; |
| 301 | } |
| 302 | } |
| 303 | } |
| 304 | } |
| 305 | |
| 306 | if (!slow) |
| 307 | { |
| 308 | int natts = tupleDesc->natts; |
| 309 | int j = 1; |
| 310 | |
| 311 | /* |
| 312 | * If we get here, we have a tuple with no nulls or var-widths up to |
| 313 | * and including the target attribute, so we can use the cached offset |
| 314 | * ... only we don't have it yet, or we'd not have got here. Since |
| 315 | * it's cheap to compute offsets for fixed-width columns, we take the |
| 316 | * opportunity to initialize the cached offsets for *all* the leading |
| 317 | * fixed-width columns, in hope of avoiding future visits to this |
| 318 | * routine. |
| 319 | */ |
| 320 | TupleDescAttr(tupleDesc, 0)->attcacheoff = 0; |
| 321 | |
| 322 | /* we might have set some offsets in the slow path previously */ |
| 323 | while (j < natts && TupleDescAttr(tupleDesc, j)->attcacheoff > 0) |
| 324 | j++; |
| 325 | |
| 326 | off = TupleDescAttr(tupleDesc, j - 1)->attcacheoff + |
| 327 | TupleDescAttr(tupleDesc, j - 1)->attlen; |
| 328 | |
| 329 | for (; j < natts; j++) |
| 330 | { |
| 331 | Form_pg_attribute att = TupleDescAttr(tupleDesc, j); |
| 332 | |
| 333 | if (att->attlen <= 0) |
| 334 | break; |
| 335 | |
| 336 | off = att_align_nominal(off, att->attalign); |
| 337 | |
| 338 | att->attcacheoff = off; |
| 339 | |
| 340 | off += att->attlen; |
| 341 | } |
| 342 | |
| 343 | Assert(j > attnum); |
| 344 | |
| 345 | off = TupleDescAttr(tupleDesc, attnum)->attcacheoff; |
| 346 | } |
| 347 | else |
| 348 | { |
| 349 | bool usecache = true; |
| 350 | int i; |
| 351 | |
| 352 | /* |
| 353 | * Now we know that we have to walk the tuple CAREFULLY. But we still |
| 354 | * might be able to cache some offsets for next time. |
| 355 | * |
| 356 | * Note - This loop is a little tricky. For each non-null attribute, |
| 357 | * we have to first account for alignment padding before the attr, |
| 358 | * then advance over the attr based on its length. Nulls have no |
| 359 | * storage and no alignment padding either. We can use/set |
| 360 | * attcacheoff until we reach either a null or a var-width attribute. |
| 361 | */ |
| 362 | off = 0; |
| 363 | for (i = 0;; i++) /* loop exit is at "break" */ |
| 364 | { |
| 365 | Form_pg_attribute att = TupleDescAttr(tupleDesc, i); |
| 366 | |
| 367 | if (IndexTupleHasNulls(tup) && att_isnull(i, bp)) |
| 368 | { |
| 369 | usecache = false; |
| 370 | continue; /* this cannot be the target att */ |
| 371 | } |
| 372 | |
| 373 | /* If we know the next offset, we can skip the rest */ |
| 374 | if (usecache && att->attcacheoff >= 0) |
| 375 | off = att->attcacheoff; |
| 376 | else if (att->attlen == -1) |
| 377 | { |
| 378 | /* |
| 379 | * We can only cache the offset for a varlena attribute if the |
| 380 | * offset is already suitably aligned, so that there would be |
| 381 | * no pad bytes in any case: then the offset will be valid for |
| 382 | * either an aligned or unaligned value. |
| 383 | */ |
| 384 | if (usecache && |
| 385 | off == att_align_nominal(off, att->attalign)) |
| 386 | att->attcacheoff = off; |
| 387 | else |
| 388 | { |
| 389 | off = att_align_pointer(off, att->attalign, -1, |
| 390 | tp + off); |
| 391 | usecache = false; |
| 392 | } |
| 393 | } |
| 394 | else |
| 395 | { |
| 396 | /* not varlena, so safe to use att_align_nominal */ |
| 397 | off = att_align_nominal(off, att->attalign); |
| 398 | |
| 399 | if (usecache) |
| 400 | att->attcacheoff = off; |
| 401 | } |
| 402 | |
| 403 | if (i == attnum) |
| 404 | break; |
| 405 | |
| 406 | off = att_addlength_pointer(off, att->attlen, tp + off); |
| 407 | |
| 408 | if (usecache && att->attlen <= 0) |
| 409 | usecache = false; |
| 410 | } |
| 411 | } |
| 412 | |
| 413 | return fetchatt(TupleDescAttr(tupleDesc, attnum), tp + off); |
| 414 | } |
| 415 | |
| 416 | /* |
| 417 | * Convert an index tuple into Datum/isnull arrays. |
| 418 | * |
| 419 | * The caller must allocate sufficient storage for the output arrays. |
| 420 | * (INDEX_MAX_KEYS entries should be enough.) |
| 421 | * |
| 422 | * This is nearly the same as heap_deform_tuple(), but for IndexTuples. |
| 423 | * One difference is that the tuple should never have any missing columns. |
| 424 | */ |
| 425 | void |
| 426 | index_deform_tuple(IndexTuple tup, TupleDesc tupleDescriptor, |
| 427 | Datum *values, bool *isnull) |
| 428 | { |
| 429 | int hasnulls = IndexTupleHasNulls(tup); |
| 430 | int natts = tupleDescriptor->natts; /* number of atts to extract */ |
| 431 | int attnum; |
| 432 | char *tp; /* ptr to tuple data */ |
| 433 | int off; /* offset in tuple data */ |
| 434 | bits8 *bp; /* ptr to null bitmap in tuple */ |
| 435 | bool slow = false; /* can we use/set attcacheoff? */ |
| 436 | |
| 437 | /* Assert to protect callers who allocate fixed-size arrays */ |
| 438 | Assert(natts <= INDEX_MAX_KEYS); |
| 439 | |
| 440 | /* XXX "knows" t_bits are just after fixed tuple header! */ |
| 441 | bp = (bits8 *) ((char *) tup + sizeof(IndexTupleData)); |
| 442 | |
| 443 | tp = (char *) tup + IndexInfoFindDataOffset(tup->t_info); |
| 444 | off = 0; |
| 445 | |
| 446 | for (attnum = 0; attnum < natts; attnum++) |
| 447 | { |
| 448 | Form_pg_attribute thisatt = TupleDescAttr(tupleDescriptor, attnum); |
| 449 | |
| 450 | if (hasnulls && att_isnull(attnum, bp)) |
| 451 | { |
| 452 | values[attnum] = (Datum) 0; |
| 453 | isnull[attnum] = true; |
| 454 | slow = true; /* can't use attcacheoff anymore */ |
| 455 | continue; |
| 456 | } |
| 457 | |
| 458 | isnull[attnum] = false; |
| 459 | |
| 460 | if (!slow && thisatt->attcacheoff >= 0) |
| 461 | off = thisatt->attcacheoff; |
| 462 | else if (thisatt->attlen == -1) |
| 463 | { |
| 464 | /* |
| 465 | * We can only cache the offset for a varlena attribute if the |
| 466 | * offset is already suitably aligned, so that there would be no |
| 467 | * pad bytes in any case: then the offset will be valid for either |
| 468 | * an aligned or unaligned value. |
| 469 | */ |
| 470 | if (!slow && |
| 471 | off == att_align_nominal(off, thisatt->attalign)) |
| 472 | thisatt->attcacheoff = off; |
| 473 | else |
| 474 | { |
| 475 | off = att_align_pointer(off, thisatt->attalign, -1, |
| 476 | tp + off); |
| 477 | slow = true; |
| 478 | } |
| 479 | } |
| 480 | else |
| 481 | { |
| 482 | /* not varlena, so safe to use att_align_nominal */ |
| 483 | off = att_align_nominal(off, thisatt->attalign); |
| 484 | |
| 485 | if (!slow) |
| 486 | thisatt->attcacheoff = off; |
| 487 | } |
| 488 | |
| 489 | values[attnum] = fetchatt(thisatt, tp + off); |
| 490 | |
| 491 | off = att_addlength_pointer(off, thisatt->attlen, tp + off); |
| 492 | |
| 493 | if (thisatt->attlen <= 0) |
| 494 | slow = true; /* can't use attcacheoff anymore */ |
| 495 | } |
| 496 | } |
| 497 | |
| 498 | /* |
| 499 | * Create a palloc'd copy of an index tuple. |
| 500 | */ |
| 501 | IndexTuple |
| 502 | CopyIndexTuple(IndexTuple source) |
| 503 | { |
| 504 | IndexTuple result; |
| 505 | Size size; |
| 506 | |
| 507 | size = IndexTupleSize(source); |
| 508 | result = (IndexTuple) palloc(size); |
| 509 | memcpy(result, source, size); |
| 510 | return result; |
| 511 | } |
| 512 | |
| 513 | /* |
| 514 | * Create a palloc'd copy of an index tuple, leaving only the first |
| 515 | * leavenatts attributes remaining. |
| 516 | * |
| 517 | * Truncation is guaranteed to result in an index tuple that is no |
| 518 | * larger than the original. It is safe to use the IndexTuple with |
| 519 | * the original tuple descriptor, but caller must avoid actually |
| 520 | * accessing truncated attributes from returned tuple! In practice |
| 521 | * this means that index_getattr() must be called with special care, |
| 522 | * and that the truncated tuple should only ever be accessed by code |
| 523 | * under caller's direct control. |
| 524 | * |
| 525 | * It's safe to call this function with a buffer lock held, since it |
| 526 | * never performs external table access. If it ever became possible |
| 527 | * for index tuples to contain EXTERNAL TOAST values, then this would |
| 528 | * have to be revisited. |
| 529 | */ |
| 530 | IndexTuple |
| 531 | index_truncate_tuple(TupleDesc sourceDescriptor, IndexTuple source, |
| 532 | int leavenatts) |
| 533 | { |
| 534 | TupleDesc truncdesc; |
| 535 | Datum values[INDEX_MAX_KEYS]; |
| 536 | bool isnull[INDEX_MAX_KEYS]; |
| 537 | IndexTuple truncated; |
| 538 | |
| 539 | Assert(leavenatts <= sourceDescriptor->natts); |
| 540 | |
| 541 | /* Easy case: no truncation actually required */ |
| 542 | if (leavenatts == sourceDescriptor->natts) |
| 543 | return CopyIndexTuple(source); |
| 544 | |
| 545 | /* Create temporary descriptor to scribble on */ |
| 546 | truncdesc = palloc(TupleDescSize(sourceDescriptor)); |
| 547 | TupleDescCopy(truncdesc, sourceDescriptor); |
| 548 | truncdesc->natts = leavenatts; |
| 549 | |
| 550 | /* Deform, form copy of tuple with fewer attributes */ |
| 551 | index_deform_tuple(source, truncdesc, values, isnull); |
| 552 | truncated = index_form_tuple(truncdesc, values, isnull); |
| 553 | truncated->t_tid = source->t_tid; |
| 554 | Assert(IndexTupleSize(truncated) <= IndexTupleSize(source)); |
| 555 | |
| 556 | /* |
| 557 | * Cannot leak memory here, TupleDescCopy() doesn't allocate any inner |
| 558 | * structure, so, plain pfree() should clean all allocated memory |
| 559 | */ |
| 560 | pfree(truncdesc); |
| 561 | |
| 562 | return truncated; |
| 563 | } |
| 564 | |