1/*-------------------------------------------------------------------------
2 *
3 * tuptoaster.c
4 * Support routines for external and compressed storage of
5 * variable size attributes.
6 *
7 * Copyright (c) 2000-2019, PostgreSQL Global Development Group
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/access/heap/tuptoaster.c
12 *
13 *
14 * INTERFACE ROUTINES
15 * toast_insert_or_update -
16 * Try to make a given tuple fit into one page by compressing
17 * or moving off attributes
18 *
19 * toast_delete -
20 * Reclaim toast storage when a tuple is deleted
21 *
22 * heap_tuple_untoast_attr -
23 * Fetch back a given value from the "secondary" relation
24 *
25 *-------------------------------------------------------------------------
26 */
27
28#include "postgres.h"
29
30#include <unistd.h>
31#include <fcntl.h>
32
33#include "access/genam.h"
34#include "access/heapam.h"
35#include "access/tuptoaster.h"
36#include "access/xact.h"
37#include "catalog/catalog.h"
38#include "common/pg_lzcompress.h"
39#include "miscadmin.h"
40#include "utils/expandeddatum.h"
41#include "utils/fmgroids.h"
42#include "utils/rel.h"
43#include "utils/snapmgr.h"
44#include "utils/typcache.h"
45
46
47#undef TOAST_DEBUG
48
49/*
50 * The information at the start of the compressed toast data.
51 */
52typedef struct toast_compress_header
53{
54 int32 vl_len_; /* varlena header (do not touch directly!) */
55 int32 rawsize;
56} toast_compress_header;
57
58/*
59 * Utilities for manipulation of header information for compressed
60 * toast entries.
61 */
62#define TOAST_COMPRESS_HDRSZ ((int32) sizeof(toast_compress_header))
63#define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize)
64#define TOAST_COMPRESS_RAWDATA(ptr) \
65 (((char *) (ptr)) + TOAST_COMPRESS_HDRSZ)
66#define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \
67 (((toast_compress_header *) (ptr))->rawsize = (len))
68
69static void toast_delete_datum(Relation rel, Datum value, bool is_speculative);
70static Datum toast_save_datum(Relation rel, Datum value,
71 struct varlena *oldexternal, int options);
72static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
73static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
74static struct varlena *toast_fetch_datum(struct varlena *attr);
75static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
76 int32 sliceoffset, int32 length);
77static struct varlena *toast_decompress_datum(struct varlena *attr);
78static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
79static int toast_open_indexes(Relation toastrel,
80 LOCKMODE lock,
81 Relation **toastidxs,
82 int *num_indexes);
83static void toast_close_indexes(Relation *toastidxs, int num_indexes,
84 LOCKMODE lock);
85static void init_toast_snapshot(Snapshot toast_snapshot);
86
87
88/* ----------
89 * heap_tuple_fetch_attr -
90 *
91 * Public entry point to get back a toasted value from
92 * external source (possibly still in compressed format).
93 *
94 * This will return a datum that contains all the data internally, ie, not
95 * relying on external storage or memory, but it can still be compressed or
96 * have a short header. Note some callers assume that if the input is an
97 * EXTERNAL datum, the result will be a pfree'able chunk.
98 * ----------
99 */
100struct varlena *
101heap_tuple_fetch_attr(struct varlena *attr)
102{
103 struct varlena *result;
104
105 if (VARATT_IS_EXTERNAL_ONDISK(attr))
106 {
107 /*
108 * This is an external stored plain value
109 */
110 result = toast_fetch_datum(attr);
111 }
112 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
113 {
114 /*
115 * This is an indirect pointer --- dereference it
116 */
117 struct varatt_indirect redirect;
118
119 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
120 attr = (struct varlena *) redirect.pointer;
121
122 /* nested indirect Datums aren't allowed */
123 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
124
125 /* recurse if value is still external in some other way */
126 if (VARATT_IS_EXTERNAL(attr))
127 return heap_tuple_fetch_attr(attr);
128
129 /*
130 * Copy into the caller's memory context, in case caller tries to
131 * pfree the result.
132 */
133 result = (struct varlena *) palloc(VARSIZE_ANY(attr));
134 memcpy(result, attr, VARSIZE_ANY(attr));
135 }
136 else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
137 {
138 /*
139 * This is an expanded-object pointer --- get flat format
140 */
141 ExpandedObjectHeader *eoh;
142 Size resultsize;
143
144 eoh = DatumGetEOHP(PointerGetDatum(attr));
145 resultsize = EOH_get_flat_size(eoh);
146 result = (struct varlena *) palloc(resultsize);
147 EOH_flatten_into(eoh, (void *) result, resultsize);
148 }
149 else
150 {
151 /*
152 * This is a plain value inside of the main tuple - why am I called?
153 */
154 result = attr;
155 }
156
157 return result;
158}
159
160
161/* ----------
162 * heap_tuple_untoast_attr -
163 *
164 * Public entry point to get back a toasted value from compression
165 * or external storage. The result is always non-extended varlena form.
166 *
167 * Note some callers assume that if the input is an EXTERNAL or COMPRESSED
168 * datum, the result will be a pfree'able chunk.
169 * ----------
170 */
171struct varlena *
172heap_tuple_untoast_attr(struct varlena *attr)
173{
174 if (VARATT_IS_EXTERNAL_ONDISK(attr))
175 {
176 /*
177 * This is an externally stored datum --- fetch it back from there
178 */
179 attr = toast_fetch_datum(attr);
180 /* If it's compressed, decompress it */
181 if (VARATT_IS_COMPRESSED(attr))
182 {
183 struct varlena *tmp = attr;
184
185 attr = toast_decompress_datum(tmp);
186 pfree(tmp);
187 }
188 }
189 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
190 {
191 /*
192 * This is an indirect pointer --- dereference it
193 */
194 struct varatt_indirect redirect;
195
196 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
197 attr = (struct varlena *) redirect.pointer;
198
199 /* nested indirect Datums aren't allowed */
200 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
201
202 /* recurse in case value is still extended in some other way */
203 attr = heap_tuple_untoast_attr(attr);
204
205 /* if it isn't, we'd better copy it */
206 if (attr == (struct varlena *) redirect.pointer)
207 {
208 struct varlena *result;
209
210 result = (struct varlena *) palloc(VARSIZE_ANY(attr));
211 memcpy(result, attr, VARSIZE_ANY(attr));
212 attr = result;
213 }
214 }
215 else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
216 {
217 /*
218 * This is an expanded-object pointer --- get flat format
219 */
220 attr = heap_tuple_fetch_attr(attr);
221 /* flatteners are not allowed to produce compressed/short output */
222 Assert(!VARATT_IS_EXTENDED(attr));
223 }
224 else if (VARATT_IS_COMPRESSED(attr))
225 {
226 /*
227 * This is a compressed value inside of the main tuple
228 */
229 attr = toast_decompress_datum(attr);
230 }
231 else if (VARATT_IS_SHORT(attr))
232 {
233 /*
234 * This is a short-header varlena --- convert to 4-byte header format
235 */
236 Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
237 Size new_size = data_size + VARHDRSZ;
238 struct varlena *new_attr;
239
240 new_attr = (struct varlena *) palloc(new_size);
241 SET_VARSIZE(new_attr, new_size);
242 memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
243 attr = new_attr;
244 }
245
246 return attr;
247}
248
249
250/* ----------
251 * heap_tuple_untoast_attr_slice -
252 *
253 * Public entry point to get back part of a toasted value
254 * from compression or external storage.
255 * ----------
256 */
257struct varlena *
258heap_tuple_untoast_attr_slice(struct varlena *attr,
259 int32 sliceoffset, int32 slicelength)
260{
261 struct varlena *preslice;
262 struct varlena *result;
263 char *attrdata;
264 int32 attrsize;
265
266 if (VARATT_IS_EXTERNAL_ONDISK(attr))
267 {
268 struct varatt_external toast_pointer;
269
270 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
271
272 /* fast path for non-compressed external datums */
273 if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
274 return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
275
276 /* fetch it back (compressed marker will get set automatically) */
277 preslice = toast_fetch_datum(attr);
278 }
279 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
280 {
281 struct varatt_indirect redirect;
282
283 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
284
285 /* nested indirect Datums aren't allowed */
286 Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer));
287
288 return heap_tuple_untoast_attr_slice(redirect.pointer,
289 sliceoffset, slicelength);
290 }
291 else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
292 {
293 /* pass it off to heap_tuple_fetch_attr to flatten */
294 preslice = heap_tuple_fetch_attr(attr);
295 }
296 else
297 preslice = attr;
298
299 Assert(!VARATT_IS_EXTERNAL(preslice));
300
301 if (VARATT_IS_COMPRESSED(preslice))
302 {
303 struct varlena *tmp = preslice;
304
305 /* Decompress enough to encompass the slice and the offset */
306 if (slicelength > 0 && sliceoffset >= 0)
307 preslice = toast_decompress_datum_slice(tmp, slicelength + sliceoffset);
308 else
309 preslice = toast_decompress_datum(tmp);
310
311 if (tmp != attr)
312 pfree(tmp);
313 }
314
315 if (VARATT_IS_SHORT(preslice))
316 {
317 attrdata = VARDATA_SHORT(preslice);
318 attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT;
319 }
320 else
321 {
322 attrdata = VARDATA(preslice);
323 attrsize = VARSIZE(preslice) - VARHDRSZ;
324 }
325
326 /* slicing of datum for compressed cases and plain value */
327
328 if (sliceoffset >= attrsize)
329 {
330 sliceoffset = 0;
331 slicelength = 0;
332 }
333
334 if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
335 slicelength = attrsize - sliceoffset;
336
337 result = (struct varlena *) palloc(slicelength + VARHDRSZ);
338 SET_VARSIZE(result, slicelength + VARHDRSZ);
339
340 memcpy(VARDATA(result), attrdata + sliceoffset, slicelength);
341
342 if (preslice != attr)
343 pfree(preslice);
344
345 return result;
346}
347
348
349/* ----------
350 * toast_raw_datum_size -
351 *
352 * Return the raw (detoasted) size of a varlena datum
353 * (including the VARHDRSZ header)
354 * ----------
355 */
356Size
357toast_raw_datum_size(Datum value)
358{
359 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
360 Size result;
361
362 if (VARATT_IS_EXTERNAL_ONDISK(attr))
363 {
364 /* va_rawsize is the size of the original datum -- including header */
365 struct varatt_external toast_pointer;
366
367 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
368 result = toast_pointer.va_rawsize;
369 }
370 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
371 {
372 struct varatt_indirect toast_pointer;
373
374 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
375
376 /* nested indirect Datums aren't allowed */
377 Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer));
378
379 return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
380 }
381 else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
382 {
383 result = EOH_get_flat_size(DatumGetEOHP(value));
384 }
385 else if (VARATT_IS_COMPRESSED(attr))
386 {
387 /* here, va_rawsize is just the payload size */
388 result = VARRAWSIZE_4B_C(attr) + VARHDRSZ;
389 }
390 else if (VARATT_IS_SHORT(attr))
391 {
392 /*
393 * we have to normalize the header length to VARHDRSZ or else the
394 * callers of this function will be confused.
395 */
396 result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ;
397 }
398 else
399 {
400 /* plain untoasted datum */
401 result = VARSIZE(attr);
402 }
403 return result;
404}
405
406/* ----------
407 * toast_datum_size
408 *
409 * Return the physical storage size (possibly compressed) of a varlena datum
410 * ----------
411 */
412Size
413toast_datum_size(Datum value)
414{
415 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
416 Size result;
417
418 if (VARATT_IS_EXTERNAL_ONDISK(attr))
419 {
420 /*
421 * Attribute is stored externally - return the extsize whether
422 * compressed or not. We do not count the size of the toast pointer
423 * ... should we?
424 */
425 struct varatt_external toast_pointer;
426
427 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
428 result = toast_pointer.va_extsize;
429 }
430 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
431 {
432 struct varatt_indirect toast_pointer;
433
434 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
435
436 /* nested indirect Datums aren't allowed */
437 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
438
439 return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
440 }
441 else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
442 {
443 result = EOH_get_flat_size(DatumGetEOHP(value));
444 }
445 else if (VARATT_IS_SHORT(attr))
446 {
447 result = VARSIZE_SHORT(attr);
448 }
449 else
450 {
451 /*
452 * Attribute is stored inline either compressed or not, just calculate
453 * the size of the datum in either case.
454 */
455 result = VARSIZE(attr);
456 }
457 return result;
458}
459
460
461/* ----------
462 * toast_delete -
463 *
464 * Cascaded delete toast-entries on DELETE
465 * ----------
466 */
467void
468toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
469{
470 TupleDesc tupleDesc;
471 int numAttrs;
472 int i;
473 Datum toast_values[MaxHeapAttributeNumber];
474 bool toast_isnull[MaxHeapAttributeNumber];
475
476 /*
477 * We should only ever be called for tuples of plain relations or
478 * materialized views --- recursing on a toast rel is bad news.
479 */
480 Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
481 rel->rd_rel->relkind == RELKIND_MATVIEW);
482
483 /*
484 * Get the tuple descriptor and break down the tuple into fields.
485 *
486 * NOTE: it's debatable whether to use heap_deform_tuple() here or just
487 * heap_getattr() only the varlena columns. The latter could win if there
488 * are few varlena columns and many non-varlena ones. However,
489 * heap_deform_tuple costs only O(N) while the heap_getattr way would cost
490 * O(N^2) if there are many varlena columns, so it seems better to err on
491 * the side of linear cost. (We won't even be here unless there's at
492 * least one varlena column, by the way.)
493 */
494 tupleDesc = rel->rd_att;
495 numAttrs = tupleDesc->natts;
496
497 Assert(numAttrs <= MaxHeapAttributeNumber);
498 heap_deform_tuple(oldtup, tupleDesc, toast_values, toast_isnull);
499
500 /*
501 * Check for external stored attributes and delete them from the secondary
502 * relation.
503 */
504 for (i = 0; i < numAttrs; i++)
505 {
506 if (TupleDescAttr(tupleDesc, i)->attlen == -1)
507 {
508 Datum value = toast_values[i];
509
510 if (toast_isnull[i])
511 continue;
512 else if (VARATT_IS_EXTERNAL_ONDISK(PointerGetDatum(value)))
513 toast_delete_datum(rel, value, is_speculative);
514 }
515 }
516}
517
518
519/* ----------
520 * toast_insert_or_update -
521 *
522 * Delete no-longer-used toast-entries and create new ones to
523 * make the new tuple fit on INSERT or UPDATE
524 *
525 * Inputs:
526 * newtup: the candidate new tuple to be inserted
527 * oldtup: the old row version for UPDATE, or NULL for INSERT
528 * options: options to be passed to heap_insert() for toast rows
529 * Result:
530 * either newtup if no toasting is needed, or a palloc'd modified tuple
531 * that is what should actually get stored
532 *
533 * NOTE: neither newtup nor oldtup will be modified. This is a change
534 * from the pre-8.1 API of this routine.
535 * ----------
536 */
537HeapTuple
538toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
539 int options)
540{
541 HeapTuple result_tuple;
542 TupleDesc tupleDesc;
543 int numAttrs;
544 int i;
545
546 bool need_change = false;
547 bool need_free = false;
548 bool need_delold = false;
549 bool has_nulls = false;
550
551 Size maxDataLen;
552 Size hoff;
553
554 char toast_action[MaxHeapAttributeNumber];
555 bool toast_isnull[MaxHeapAttributeNumber];
556 bool toast_oldisnull[MaxHeapAttributeNumber];
557 Datum toast_values[MaxHeapAttributeNumber];
558 Datum toast_oldvalues[MaxHeapAttributeNumber];
559 struct varlena *toast_oldexternal[MaxHeapAttributeNumber];
560 int32 toast_sizes[MaxHeapAttributeNumber];
561 bool toast_free[MaxHeapAttributeNumber];
562 bool toast_delold[MaxHeapAttributeNumber];
563
564 /*
565 * Ignore the INSERT_SPECULATIVE option. Speculative insertions/super
566 * deletions just normally insert/delete the toast values. It seems
567 * easiest to deal with that here, instead on, potentially, multiple
568 * callers.
569 */
570 options &= ~HEAP_INSERT_SPECULATIVE;
571
572 /*
573 * We should only ever be called for tuples of plain relations or
574 * materialized views --- recursing on a toast rel is bad news.
575 */
576 Assert(rel->rd_rel->relkind == RELKIND_RELATION ||
577 rel->rd_rel->relkind == RELKIND_MATVIEW);
578
579 /*
580 * Get the tuple descriptor and break down the tuple(s) into fields.
581 */
582 tupleDesc = rel->rd_att;
583 numAttrs = tupleDesc->natts;
584
585 Assert(numAttrs <= MaxHeapAttributeNumber);
586 heap_deform_tuple(newtup, tupleDesc, toast_values, toast_isnull);
587 if (oldtup != NULL)
588 heap_deform_tuple(oldtup, tupleDesc, toast_oldvalues, toast_oldisnull);
589
590 /* ----------
591 * Then collect information about the values given
592 *
593 * NOTE: toast_action[i] can have these values:
594 * ' ' default handling
595 * 'p' already processed --- don't touch it
596 * 'x' incompressible, but OK to move off
597 *
598 * NOTE: toast_sizes[i] is only made valid for varlena attributes with
599 * toast_action[i] different from 'p'.
600 * ----------
601 */
602 memset(toast_action, ' ', numAttrs * sizeof(char));
603 memset(toast_oldexternal, 0, numAttrs * sizeof(struct varlena *));
604 memset(toast_free, 0, numAttrs * sizeof(bool));
605 memset(toast_delold, 0, numAttrs * sizeof(bool));
606
607 for (i = 0; i < numAttrs; i++)
608 {
609 Form_pg_attribute att = TupleDescAttr(tupleDesc, i);
610 struct varlena *old_value;
611 struct varlena *new_value;
612
613 if (oldtup != NULL)
614 {
615 /*
616 * For UPDATE get the old and new values of this attribute
617 */
618 old_value = (struct varlena *) DatumGetPointer(toast_oldvalues[i]);
619 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
620
621 /*
622 * If the old value is stored on disk, check if it has changed so
623 * we have to delete it later.
624 */
625 if (att->attlen == -1 && !toast_oldisnull[i] &&
626 VARATT_IS_EXTERNAL_ONDISK(old_value))
627 {
628 if (toast_isnull[i] || !VARATT_IS_EXTERNAL_ONDISK(new_value) ||
629 memcmp((char *) old_value, (char *) new_value,
630 VARSIZE_EXTERNAL(old_value)) != 0)
631 {
632 /*
633 * The old external stored value isn't needed any more
634 * after the update
635 */
636 toast_delold[i] = true;
637 need_delold = true;
638 }
639 else
640 {
641 /*
642 * This attribute isn't changed by this update so we reuse
643 * the original reference to the old value in the new
644 * tuple.
645 */
646 toast_action[i] = 'p';
647 continue;
648 }
649 }
650 }
651 else
652 {
653 /*
654 * For INSERT simply get the new value
655 */
656 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
657 }
658
659 /*
660 * Handle NULL attributes
661 */
662 if (toast_isnull[i])
663 {
664 toast_action[i] = 'p';
665 has_nulls = true;
666 continue;
667 }
668
669 /*
670 * Now look at varlena attributes
671 */
672 if (att->attlen == -1)
673 {
674 /*
675 * If the table's attribute says PLAIN always, force it so.
676 */
677 if (att->attstorage == 'p')
678 toast_action[i] = 'p';
679
680 /*
681 * We took care of UPDATE above, so any external value we find
682 * still in the tuple must be someone else's that we cannot reuse
683 * (this includes the case of an out-of-line in-memory datum).
684 * Fetch it back (without decompression, unless we are forcing
685 * PLAIN storage). If necessary, we'll push it out as a new
686 * external value below.
687 */
688 if (VARATT_IS_EXTERNAL(new_value))
689 {
690 toast_oldexternal[i] = new_value;
691 if (att->attstorage == 'p')
692 new_value = heap_tuple_untoast_attr(new_value);
693 else
694 new_value = heap_tuple_fetch_attr(new_value);
695 toast_values[i] = PointerGetDatum(new_value);
696 toast_free[i] = true;
697 need_change = true;
698 need_free = true;
699 }
700
701 /*
702 * Remember the size of this attribute
703 */
704 toast_sizes[i] = VARSIZE_ANY(new_value);
705 }
706 else
707 {
708 /*
709 * Not a varlena attribute, plain storage always
710 */
711 toast_action[i] = 'p';
712 }
713 }
714
715 /* ----------
716 * Compress and/or save external until data fits into target length
717 *
718 * 1: Inline compress attributes with attstorage 'x', and store very
719 * large attributes with attstorage 'x' or 'e' external immediately
720 * 2: Store attributes with attstorage 'x' or 'e' external
721 * 3: Inline compress attributes with attstorage 'm'
722 * 4: Store attributes with attstorage 'm' external
723 * ----------
724 */
725
726 /* compute header overhead --- this should match heap_form_tuple() */
727 hoff = SizeofHeapTupleHeader;
728 if (has_nulls)
729 hoff += BITMAPLEN(numAttrs);
730 hoff = MAXALIGN(hoff);
731 /* now convert to a limit on the tuple data size */
732 maxDataLen = RelationGetToastTupleTarget(rel, TOAST_TUPLE_TARGET) - hoff;
733
734 /*
735 * Look for attributes with attstorage 'x' to compress. Also find large
736 * attributes with attstorage 'x' or 'e', and store them external.
737 */
738 while (heap_compute_data_size(tupleDesc,
739 toast_values, toast_isnull) > maxDataLen)
740 {
741 int biggest_attno = -1;
742 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
743 Datum old_value;
744 Datum new_value;
745
746 /*
747 * Search for the biggest yet unprocessed internal attribute
748 */
749 for (i = 0; i < numAttrs; i++)
750 {
751 Form_pg_attribute att = TupleDescAttr(tupleDesc, i);
752
753 if (toast_action[i] != ' ')
754 continue;
755 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
756 continue; /* can't happen, toast_action would be 'p' */
757 if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
758 continue;
759 if (att->attstorage != 'x' && att->attstorage != 'e')
760 continue;
761 if (toast_sizes[i] > biggest_size)
762 {
763 biggest_attno = i;
764 biggest_size = toast_sizes[i];
765 }
766 }
767
768 if (biggest_attno < 0)
769 break;
770
771 /*
772 * Attempt to compress it inline, if it has attstorage 'x'
773 */
774 i = biggest_attno;
775 if (TupleDescAttr(tupleDesc, i)->attstorage == 'x')
776 {
777 old_value = toast_values[i];
778 new_value = toast_compress_datum(old_value);
779
780 if (DatumGetPointer(new_value) != NULL)
781 {
782 /* successful compression */
783 if (toast_free[i])
784 pfree(DatumGetPointer(old_value));
785 toast_values[i] = new_value;
786 toast_free[i] = true;
787 toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i]));
788 need_change = true;
789 need_free = true;
790 }
791 else
792 {
793 /* incompressible, ignore on subsequent compression passes */
794 toast_action[i] = 'x';
795 }
796 }
797 else
798 {
799 /* has attstorage 'e', ignore on subsequent compression passes */
800 toast_action[i] = 'x';
801 }
802
803 /*
804 * If this value is by itself more than maxDataLen (after compression
805 * if any), push it out to the toast table immediately, if possible.
806 * This avoids uselessly compressing other fields in the common case
807 * where we have one long field and several short ones.
808 *
809 * XXX maybe the threshold should be less than maxDataLen?
810 */
811 if (toast_sizes[i] > maxDataLen &&
812 rel->rd_rel->reltoastrelid != InvalidOid)
813 {
814 old_value = toast_values[i];
815 toast_action[i] = 'p';
816 toast_values[i] = toast_save_datum(rel, toast_values[i],
817 toast_oldexternal[i], options);
818 if (toast_free[i])
819 pfree(DatumGetPointer(old_value));
820 toast_free[i] = true;
821 need_change = true;
822 need_free = true;
823 }
824 }
825
826 /*
827 * Second we look for attributes of attstorage 'x' or 'e' that are still
828 * inline. But skip this if there's no toast table to push them to.
829 */
830 while (heap_compute_data_size(tupleDesc,
831 toast_values, toast_isnull) > maxDataLen &&
832 rel->rd_rel->reltoastrelid != InvalidOid)
833 {
834 int biggest_attno = -1;
835 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
836 Datum old_value;
837
838 /*------
839 * Search for the biggest yet inlined attribute with
840 * attstorage equals 'x' or 'e'
841 *------
842 */
843 for (i = 0; i < numAttrs; i++)
844 {
845 Form_pg_attribute att = TupleDescAttr(tupleDesc, i);
846
847 if (toast_action[i] == 'p')
848 continue;
849 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
850 continue; /* can't happen, toast_action would be 'p' */
851 if (att->attstorage != 'x' && att->attstorage != 'e')
852 continue;
853 if (toast_sizes[i] > biggest_size)
854 {
855 biggest_attno = i;
856 biggest_size = toast_sizes[i];
857 }
858 }
859
860 if (biggest_attno < 0)
861 break;
862
863 /*
864 * Store this external
865 */
866 i = biggest_attno;
867 old_value = toast_values[i];
868 toast_action[i] = 'p';
869 toast_values[i] = toast_save_datum(rel, toast_values[i],
870 toast_oldexternal[i], options);
871 if (toast_free[i])
872 pfree(DatumGetPointer(old_value));
873 toast_free[i] = true;
874
875 need_change = true;
876 need_free = true;
877 }
878
879 /*
880 * Round 3 - this time we take attributes with storage 'm' into
881 * compression
882 */
883 while (heap_compute_data_size(tupleDesc,
884 toast_values, toast_isnull) > maxDataLen)
885 {
886 int biggest_attno = -1;
887 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
888 Datum old_value;
889 Datum new_value;
890
891 /*
892 * Search for the biggest yet uncompressed internal attribute
893 */
894 for (i = 0; i < numAttrs; i++)
895 {
896 if (toast_action[i] != ' ')
897 continue;
898 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
899 continue; /* can't happen, toast_action would be 'p' */
900 if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i])))
901 continue;
902 if (TupleDescAttr(tupleDesc, i)->attstorage != 'm')
903 continue;
904 if (toast_sizes[i] > biggest_size)
905 {
906 biggest_attno = i;
907 biggest_size = toast_sizes[i];
908 }
909 }
910
911 if (biggest_attno < 0)
912 break;
913
914 /*
915 * Attempt to compress it inline
916 */
917 i = biggest_attno;
918 old_value = toast_values[i];
919 new_value = toast_compress_datum(old_value);
920
921 if (DatumGetPointer(new_value) != NULL)
922 {
923 /* successful compression */
924 if (toast_free[i])
925 pfree(DatumGetPointer(old_value));
926 toast_values[i] = new_value;
927 toast_free[i] = true;
928 toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i]));
929 need_change = true;
930 need_free = true;
931 }
932 else
933 {
934 /* incompressible, ignore on subsequent compression passes */
935 toast_action[i] = 'x';
936 }
937 }
938
939 /*
940 * Finally we store attributes of type 'm' externally. At this point we
941 * increase the target tuple size, so that 'm' attributes aren't stored
942 * externally unless really necessary.
943 */
944 maxDataLen = TOAST_TUPLE_TARGET_MAIN - hoff;
945
946 while (heap_compute_data_size(tupleDesc,
947 toast_values, toast_isnull) > maxDataLen &&
948 rel->rd_rel->reltoastrelid != InvalidOid)
949 {
950 int biggest_attno = -1;
951 int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE);
952 Datum old_value;
953
954 /*--------
955 * Search for the biggest yet inlined attribute with
956 * attstorage = 'm'
957 *--------
958 */
959 for (i = 0; i < numAttrs; i++)
960 {
961 if (toast_action[i] == 'p')
962 continue;
963 if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i])))
964 continue; /* can't happen, toast_action would be 'p' */
965 if (TupleDescAttr(tupleDesc, i)->attstorage != 'm')
966 continue;
967 if (toast_sizes[i] > biggest_size)
968 {
969 biggest_attno = i;
970 biggest_size = toast_sizes[i];
971 }
972 }
973
974 if (biggest_attno < 0)
975 break;
976
977 /*
978 * Store this external
979 */
980 i = biggest_attno;
981 old_value = toast_values[i];
982 toast_action[i] = 'p';
983 toast_values[i] = toast_save_datum(rel, toast_values[i],
984 toast_oldexternal[i], options);
985 if (toast_free[i])
986 pfree(DatumGetPointer(old_value));
987 toast_free[i] = true;
988
989 need_change = true;
990 need_free = true;
991 }
992
993 /*
994 * In the case we toasted any values, we need to build a new heap tuple
995 * with the changed values.
996 */
997 if (need_change)
998 {
999 HeapTupleHeader olddata = newtup->t_data;
1000 HeapTupleHeader new_data;
1001 int32 new_header_len;
1002 int32 new_data_len;
1003 int32 new_tuple_len;
1004
1005 /*
1006 * Calculate the new size of the tuple.
1007 *
1008 * Note: we used to assume here that the old tuple's t_hoff must equal
1009 * the new_header_len value, but that was incorrect. The old tuple
1010 * might have a smaller-than-current natts, if there's been an ALTER
1011 * TABLE ADD COLUMN since it was stored; and that would lead to a
1012 * different conclusion about the size of the null bitmap, or even
1013 * whether there needs to be one at all.
1014 */
1015 new_header_len = SizeofHeapTupleHeader;
1016 if (has_nulls)
1017 new_header_len += BITMAPLEN(numAttrs);
1018 new_header_len = MAXALIGN(new_header_len);
1019 new_data_len = heap_compute_data_size(tupleDesc,
1020 toast_values, toast_isnull);
1021 new_tuple_len = new_header_len + new_data_len;
1022
1023 /*
1024 * Allocate and zero the space needed, and fill HeapTupleData fields.
1025 */
1026 result_tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + new_tuple_len);
1027 result_tuple->t_len = new_tuple_len;
1028 result_tuple->t_self = newtup->t_self;
1029 result_tuple->t_tableOid = newtup->t_tableOid;
1030 new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE);
1031 result_tuple->t_data = new_data;
1032
1033 /*
1034 * Copy the existing tuple header, but adjust natts and t_hoff.
1035 */
1036 memcpy(new_data, olddata, SizeofHeapTupleHeader);
1037 HeapTupleHeaderSetNatts(new_data, numAttrs);
1038 new_data->t_hoff = new_header_len;
1039
1040 /* Copy over the data, and fill the null bitmap if needed */
1041 heap_fill_tuple(tupleDesc,
1042 toast_values,
1043 toast_isnull,
1044 (char *) new_data + new_header_len,
1045 new_data_len,
1046 &(new_data->t_infomask),
1047 has_nulls ? new_data->t_bits : NULL);
1048 }
1049 else
1050 result_tuple = newtup;
1051
1052 /*
1053 * Free allocated temp values
1054 */
1055 if (need_free)
1056 for (i = 0; i < numAttrs; i++)
1057 if (toast_free[i])
1058 pfree(DatumGetPointer(toast_values[i]));
1059
1060 /*
1061 * Delete external values from the old tuple
1062 */
1063 if (need_delold)
1064 for (i = 0; i < numAttrs; i++)
1065 if (toast_delold[i])
1066 toast_delete_datum(rel, toast_oldvalues[i], false);
1067
1068 return result_tuple;
1069}
1070
1071
1072/* ----------
1073 * toast_flatten_tuple -
1074 *
1075 * "Flatten" a tuple to contain no out-of-line toasted fields.
1076 * (This does not eliminate compressed or short-header datums.)
1077 *
1078 * Note: we expect the caller already checked HeapTupleHasExternal(tup),
1079 * so there is no need for a short-circuit path.
1080 * ----------
1081 */
1082HeapTuple
1083toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc)
1084{
1085 HeapTuple new_tuple;
1086 int numAttrs = tupleDesc->natts;
1087 int i;
1088 Datum toast_values[MaxTupleAttributeNumber];
1089 bool toast_isnull[MaxTupleAttributeNumber];
1090 bool toast_free[MaxTupleAttributeNumber];
1091
1092 /*
1093 * Break down the tuple into fields.
1094 */
1095 Assert(numAttrs <= MaxTupleAttributeNumber);
1096 heap_deform_tuple(tup, tupleDesc, toast_values, toast_isnull);
1097
1098 memset(toast_free, 0, numAttrs * sizeof(bool));
1099
1100 for (i = 0; i < numAttrs; i++)
1101 {
1102 /*
1103 * Look at non-null varlena attributes
1104 */
1105 if (!toast_isnull[i] && TupleDescAttr(tupleDesc, i)->attlen == -1)
1106 {
1107 struct varlena *new_value;
1108
1109 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
1110 if (VARATT_IS_EXTERNAL(new_value))
1111 {
1112 new_value = heap_tuple_fetch_attr(new_value);
1113 toast_values[i] = PointerGetDatum(new_value);
1114 toast_free[i] = true;
1115 }
1116 }
1117 }
1118
1119 /*
1120 * Form the reconfigured tuple.
1121 */
1122 new_tuple = heap_form_tuple(tupleDesc, toast_values, toast_isnull);
1123
1124 /*
1125 * Be sure to copy the tuple's identity fields. We also make a point of
1126 * copying visibility info, just in case anybody looks at those fields in
1127 * a syscache entry.
1128 */
1129 new_tuple->t_self = tup->t_self;
1130 new_tuple->t_tableOid = tup->t_tableOid;
1131
1132 new_tuple->t_data->t_choice = tup->t_data->t_choice;
1133 new_tuple->t_data->t_ctid = tup->t_data->t_ctid;
1134 new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK;
1135 new_tuple->t_data->t_infomask |=
1136 tup->t_data->t_infomask & HEAP_XACT_MASK;
1137 new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK;
1138 new_tuple->t_data->t_infomask2 |=
1139 tup->t_data->t_infomask2 & HEAP2_XACT_MASK;
1140
1141 /*
1142 * Free allocated temp values
1143 */
1144 for (i = 0; i < numAttrs; i++)
1145 if (toast_free[i])
1146 pfree(DatumGetPointer(toast_values[i]));
1147
1148 return new_tuple;
1149}
1150
1151
1152/* ----------
1153 * toast_flatten_tuple_to_datum -
1154 *
1155 * "Flatten" a tuple containing out-of-line toasted fields into a Datum.
1156 * The result is always palloc'd in the current memory context.
1157 *
1158 * We have a general rule that Datums of container types (rows, arrays,
1159 * ranges, etc) must not contain any external TOAST pointers. Without
1160 * this rule, we'd have to look inside each Datum when preparing a tuple
1161 * for storage, which would be expensive and would fail to extend cleanly
1162 * to new sorts of container types.
1163 *
1164 * However, we don't want to say that tuples represented as HeapTuples
1165 * can't contain toasted fields, so instead this routine should be called
1166 * when such a HeapTuple is being converted into a Datum.
1167 *
1168 * While we're at it, we decompress any compressed fields too. This is not
1169 * necessary for correctness, but reflects an expectation that compression
1170 * will be more effective if applied to the whole tuple not individual
1171 * fields. We are not so concerned about that that we want to deconstruct
1172 * and reconstruct tuples just to get rid of compressed fields, however.
1173 * So callers typically won't call this unless they see that the tuple has
1174 * at least one external field.
1175 *
1176 * On the other hand, in-line short-header varlena fields are left alone.
1177 * If we "untoasted" them here, they'd just get changed back to short-header
1178 * format anyway within heap_fill_tuple.
1179 * ----------
1180 */
1181Datum
1182toast_flatten_tuple_to_datum(HeapTupleHeader tup,
1183 uint32 tup_len,
1184 TupleDesc tupleDesc)
1185{
1186 HeapTupleHeader new_data;
1187 int32 new_header_len;
1188 int32 new_data_len;
1189 int32 new_tuple_len;
1190 HeapTupleData tmptup;
1191 int numAttrs = tupleDesc->natts;
1192 int i;
1193 bool has_nulls = false;
1194 Datum toast_values[MaxTupleAttributeNumber];
1195 bool toast_isnull[MaxTupleAttributeNumber];
1196 bool toast_free[MaxTupleAttributeNumber];
1197
1198 /* Build a temporary HeapTuple control structure */
1199 tmptup.t_len = tup_len;
1200 ItemPointerSetInvalid(&(tmptup.t_self));
1201 tmptup.t_tableOid = InvalidOid;
1202 tmptup.t_data = tup;
1203
1204 /*
1205 * Break down the tuple into fields.
1206 */
1207 Assert(numAttrs <= MaxTupleAttributeNumber);
1208 heap_deform_tuple(&tmptup, tupleDesc, toast_values, toast_isnull);
1209
1210 memset(toast_free, 0, numAttrs * sizeof(bool));
1211
1212 for (i = 0; i < numAttrs; i++)
1213 {
1214 /*
1215 * Look at non-null varlena attributes
1216 */
1217 if (toast_isnull[i])
1218 has_nulls = true;
1219 else if (TupleDescAttr(tupleDesc, i)->attlen == -1)
1220 {
1221 struct varlena *new_value;
1222
1223 new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
1224 if (VARATT_IS_EXTERNAL(new_value) ||
1225 VARATT_IS_COMPRESSED(new_value))
1226 {
1227 new_value = heap_tuple_untoast_attr(new_value);
1228 toast_values[i] = PointerGetDatum(new_value);
1229 toast_free[i] = true;
1230 }
1231 }
1232 }
1233
1234 /*
1235 * Calculate the new size of the tuple.
1236 *
1237 * This should match the reconstruction code in toast_insert_or_update.
1238 */
1239 new_header_len = SizeofHeapTupleHeader;
1240 if (has_nulls)
1241 new_header_len += BITMAPLEN(numAttrs);
1242 new_header_len = MAXALIGN(new_header_len);
1243 new_data_len = heap_compute_data_size(tupleDesc,
1244 toast_values, toast_isnull);
1245 new_tuple_len = new_header_len + new_data_len;
1246
1247 new_data = (HeapTupleHeader) palloc0(new_tuple_len);
1248
1249 /*
1250 * Copy the existing tuple header, but adjust natts and t_hoff.
1251 */
1252 memcpy(new_data, tup, SizeofHeapTupleHeader);
1253 HeapTupleHeaderSetNatts(new_data, numAttrs);
1254 new_data->t_hoff = new_header_len;
1255
1256 /* Set the composite-Datum header fields correctly */
1257 HeapTupleHeaderSetDatumLength(new_data, new_tuple_len);
1258 HeapTupleHeaderSetTypeId(new_data, tupleDesc->tdtypeid);
1259 HeapTupleHeaderSetTypMod(new_data, tupleDesc->tdtypmod);
1260
1261 /* Copy over the data, and fill the null bitmap if needed */
1262 heap_fill_tuple(tupleDesc,
1263 toast_values,
1264 toast_isnull,
1265 (char *) new_data + new_header_len,
1266 new_data_len,
1267 &(new_data->t_infomask),
1268 has_nulls ? new_data->t_bits : NULL);
1269
1270 /*
1271 * Free allocated temp values
1272 */
1273 for (i = 0; i < numAttrs; i++)
1274 if (toast_free[i])
1275 pfree(DatumGetPointer(toast_values[i]));
1276
1277 return PointerGetDatum(new_data);
1278}
1279
1280
1281/* ----------
1282 * toast_build_flattened_tuple -
1283 *
1284 * Build a tuple containing no out-of-line toasted fields.
1285 * (This does not eliminate compressed or short-header datums.)
1286 *
1287 * This is essentially just like heap_form_tuple, except that it will
1288 * expand any external-data pointers beforehand.
1289 *
1290 * It's not very clear whether it would be preferable to decompress
1291 * in-line compressed datums while at it. For now, we don't.
1292 * ----------
1293 */
1294HeapTuple
1295toast_build_flattened_tuple(TupleDesc tupleDesc,
1296 Datum *values,
1297 bool *isnull)
1298{
1299 HeapTuple new_tuple;
1300 int numAttrs = tupleDesc->natts;
1301 int num_to_free;
1302 int i;
1303 Datum new_values[MaxTupleAttributeNumber];
1304 Pointer freeable_values[MaxTupleAttributeNumber];
1305
1306 /*
1307 * We can pass the caller's isnull array directly to heap_form_tuple, but
1308 * we potentially need to modify the values array.
1309 */
1310 Assert(numAttrs <= MaxTupleAttributeNumber);
1311 memcpy(new_values, values, numAttrs * sizeof(Datum));
1312
1313 num_to_free = 0;
1314 for (i = 0; i < numAttrs; i++)
1315 {
1316 /*
1317 * Look at non-null varlena attributes
1318 */
1319 if (!isnull[i] && TupleDescAttr(tupleDesc, i)->attlen == -1)
1320 {
1321 struct varlena *new_value;
1322
1323 new_value = (struct varlena *) DatumGetPointer(new_values[i]);
1324 if (VARATT_IS_EXTERNAL(new_value))
1325 {
1326 new_value = heap_tuple_fetch_attr(new_value);
1327 new_values[i] = PointerGetDatum(new_value);
1328 freeable_values[num_to_free++] = (Pointer) new_value;
1329 }
1330 }
1331 }
1332
1333 /*
1334 * Form the reconfigured tuple.
1335 */
1336 new_tuple = heap_form_tuple(tupleDesc, new_values, isnull);
1337
1338 /*
1339 * Free allocated temp values
1340 */
1341 for (i = 0; i < num_to_free; i++)
1342 pfree(freeable_values[i]);
1343
1344 return new_tuple;
1345}
1346
1347
1348/* ----------
1349 * toast_compress_datum -
1350 *
1351 * Create a compressed version of a varlena datum
1352 *
1353 * If we fail (ie, compressed result is actually bigger than original)
1354 * then return NULL. We must not use compressed data if it'd expand
1355 * the tuple!
1356 *
1357 * We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without
1358 * copying them. But we can't handle external or compressed datums.
1359 * ----------
1360 */
1361Datum
1362toast_compress_datum(Datum value)
1363{
1364 struct varlena *tmp;
1365 int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
1366 int32 len;
1367
1368 Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
1369 Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
1370
1371 /*
1372 * No point in wasting a palloc cycle if value size is out of the allowed
1373 * range for compression
1374 */
1375 if (valsize < PGLZ_strategy_default->min_input_size ||
1376 valsize > PGLZ_strategy_default->max_input_size)
1377 return PointerGetDatum(NULL);
1378
1379 tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) +
1380 TOAST_COMPRESS_HDRSZ);
1381
1382 /*
1383 * We recheck the actual size even if pglz_compress() reports success,
1384 * because it might be satisfied with having saved as little as one byte
1385 * in the compressed data --- which could turn into a net loss once you
1386 * consider header and alignment padding. Worst case, the compressed
1387 * format might require three padding bytes (plus header, which is
1388 * included in VARSIZE(tmp)), whereas the uncompressed format would take
1389 * only one header byte and no padding if the value is short enough. So
1390 * we insist on a savings of more than 2 bytes to ensure we have a gain.
1391 */
1392 len = pglz_compress(VARDATA_ANY(DatumGetPointer(value)),
1393 valsize,
1394 TOAST_COMPRESS_RAWDATA(tmp),
1395 PGLZ_strategy_default);
1396 if (len >= 0 &&
1397 len + TOAST_COMPRESS_HDRSZ < valsize - 2)
1398 {
1399 TOAST_COMPRESS_SET_RAWSIZE(tmp, valsize);
1400 SET_VARSIZE_COMPRESSED(tmp, len + TOAST_COMPRESS_HDRSZ);
1401 /* successful compression */
1402 return PointerGetDatum(tmp);
1403 }
1404 else
1405 {
1406 /* incompressible data */
1407 pfree(tmp);
1408 return PointerGetDatum(NULL);
1409 }
1410}
1411
1412
1413/* ----------
1414 * toast_get_valid_index
1415 *
1416 * Get OID of valid index associated to given toast relation. A toast
1417 * relation can have only one valid index at the same time.
1418 */
1419Oid
1420toast_get_valid_index(Oid toastoid, LOCKMODE lock)
1421{
1422 int num_indexes;
1423 int validIndex;
1424 Oid validIndexOid;
1425 Relation *toastidxs;
1426 Relation toastrel;
1427
1428 /* Open the toast relation */
1429 toastrel = table_open(toastoid, lock);
1430
1431 /* Look for the valid index of the toast relation */
1432 validIndex = toast_open_indexes(toastrel,
1433 lock,
1434 &toastidxs,
1435 &num_indexes);
1436 validIndexOid = RelationGetRelid(toastidxs[validIndex]);
1437
1438 /* Close the toast relation and all its indexes */
1439 toast_close_indexes(toastidxs, num_indexes, lock);
1440 table_close(toastrel, lock);
1441
1442 return validIndexOid;
1443}
1444
1445
1446/* ----------
1447 * toast_save_datum -
1448 *
1449 * Save one single datum into the secondary relation and return
1450 * a Datum reference for it.
1451 *
1452 * rel: the main relation we're working with (not the toast rel!)
1453 * value: datum to be pushed to toast storage
1454 * oldexternal: if not NULL, toast pointer previously representing the datum
1455 * options: options to be passed to heap_insert() for toast rows
1456 * ----------
1457 */
1458static Datum
1459toast_save_datum(Relation rel, Datum value,
1460 struct varlena *oldexternal, int options)
1461{
1462 Relation toastrel;
1463 Relation *toastidxs;
1464 HeapTuple toasttup;
1465 TupleDesc toasttupDesc;
1466 Datum t_values[3];
1467 bool t_isnull[3];
1468 CommandId mycid = GetCurrentCommandId(true);
1469 struct varlena *result;
1470 struct varatt_external toast_pointer;
1471 union
1472 {
1473 struct varlena hdr;
1474 /* this is to make the union big enough for a chunk: */
1475 char data[TOAST_MAX_CHUNK_SIZE + VARHDRSZ];
1476 /* ensure union is aligned well enough: */
1477 int32 align_it;
1478 } chunk_data;
1479 int32 chunk_size;
1480 int32 chunk_seq = 0;
1481 char *data_p;
1482 int32 data_todo;
1483 Pointer dval = DatumGetPointer(value);
1484 int num_indexes;
1485 int validIndex;
1486
1487 Assert(!VARATT_IS_EXTERNAL(value));
1488
1489 /*
1490 * Open the toast relation and its indexes. We can use the index to check
1491 * uniqueness of the OID we assign to the toasted item, even though it has
1492 * additional columns besides OID.
1493 */
1494 toastrel = table_open(rel->rd_rel->reltoastrelid, RowExclusiveLock);
1495 toasttupDesc = toastrel->rd_att;
1496
1497 /* Open all the toast indexes and look for the valid one */
1498 validIndex = toast_open_indexes(toastrel,
1499 RowExclusiveLock,
1500 &toastidxs,
1501 &num_indexes);
1502
1503 /*
1504 * Get the data pointer and length, and compute va_rawsize and va_extsize.
1505 *
1506 * va_rawsize is the size of the equivalent fully uncompressed datum, so
1507 * we have to adjust for short headers.
1508 *
1509 * va_extsize is the actual size of the data payload in the toast records.
1510 */
1511 if (VARATT_IS_SHORT(dval))
1512 {
1513 data_p = VARDATA_SHORT(dval);
1514 data_todo = VARSIZE_SHORT(dval) - VARHDRSZ_SHORT;
1515 toast_pointer.va_rawsize = data_todo + VARHDRSZ; /* as if not short */
1516 toast_pointer.va_extsize = data_todo;
1517 }
1518 else if (VARATT_IS_COMPRESSED(dval))
1519 {
1520 data_p = VARDATA(dval);
1521 data_todo = VARSIZE(dval) - VARHDRSZ;
1522 /* rawsize in a compressed datum is just the size of the payload */
1523 toast_pointer.va_rawsize = VARRAWSIZE_4B_C(dval) + VARHDRSZ;
1524 toast_pointer.va_extsize = data_todo;
1525 /* Assert that the numbers look like it's compressed */
1526 Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
1527 }
1528 else
1529 {
1530 data_p = VARDATA(dval);
1531 data_todo = VARSIZE(dval) - VARHDRSZ;
1532 toast_pointer.va_rawsize = VARSIZE(dval);
1533 toast_pointer.va_extsize = data_todo;
1534 }
1535
1536 /*
1537 * Insert the correct table OID into the result TOAST pointer.
1538 *
1539 * Normally this is the actual OID of the target toast table, but during
1540 * table-rewriting operations such as CLUSTER, we have to insert the OID
1541 * of the table's real permanent toast table instead. rd_toastoid is set
1542 * if we have to substitute such an OID.
1543 */
1544 if (OidIsValid(rel->rd_toastoid))
1545 toast_pointer.va_toastrelid = rel->rd_toastoid;
1546 else
1547 toast_pointer.va_toastrelid = RelationGetRelid(toastrel);
1548
1549 /*
1550 * Choose an OID to use as the value ID for this toast value.
1551 *
1552 * Normally we just choose an unused OID within the toast table. But
1553 * during table-rewriting operations where we are preserving an existing
1554 * toast table OID, we want to preserve toast value OIDs too. So, if
1555 * rd_toastoid is set and we had a prior external value from that same
1556 * toast table, re-use its value ID. If we didn't have a prior external
1557 * value (which is a corner case, but possible if the table's attstorage
1558 * options have been changed), we have to pick a value ID that doesn't
1559 * conflict with either new or existing toast value OIDs.
1560 */
1561 if (!OidIsValid(rel->rd_toastoid))
1562 {
1563 /* normal case: just choose an unused OID */
1564 toast_pointer.va_valueid =
1565 GetNewOidWithIndex(toastrel,
1566 RelationGetRelid(toastidxs[validIndex]),
1567 (AttrNumber) 1);
1568 }
1569 else
1570 {
1571 /* rewrite case: check to see if value was in old toast table */
1572 toast_pointer.va_valueid = InvalidOid;
1573 if (oldexternal != NULL)
1574 {
1575 struct varatt_external old_toast_pointer;
1576
1577 Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
1578 /* Must copy to access aligned fields */
1579 VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
1580 if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
1581 {
1582 /* This value came from the old toast table; reuse its OID */
1583 toast_pointer.va_valueid = old_toast_pointer.va_valueid;
1584
1585 /*
1586 * There is a corner case here: the table rewrite might have
1587 * to copy both live and recently-dead versions of a row, and
1588 * those versions could easily reference the same toast value.
1589 * When we copy the second or later version of such a row,
1590 * reusing the OID will mean we select an OID that's already
1591 * in the new toast table. Check for that, and if so, just
1592 * fall through without writing the data again.
1593 *
1594 * While annoying and ugly-looking, this is a good thing
1595 * because it ensures that we wind up with only one copy of
1596 * the toast value when there is only one copy in the old
1597 * toast table. Before we detected this case, we'd have made
1598 * multiple copies, wasting space; and what's worse, the
1599 * copies belonging to already-deleted heap tuples would not
1600 * be reclaimed by VACUUM.
1601 */
1602 if (toastrel_valueid_exists(toastrel,
1603 toast_pointer.va_valueid))
1604 {
1605 /* Match, so short-circuit the data storage loop below */
1606 data_todo = 0;
1607 }
1608 }
1609 }
1610 if (toast_pointer.va_valueid == InvalidOid)
1611 {
1612 /*
1613 * new value; must choose an OID that doesn't conflict in either
1614 * old or new toast table
1615 */
1616 do
1617 {
1618 toast_pointer.va_valueid =
1619 GetNewOidWithIndex(toastrel,
1620 RelationGetRelid(toastidxs[validIndex]),
1621 (AttrNumber) 1);
1622 } while (toastid_valueid_exists(rel->rd_toastoid,
1623 toast_pointer.va_valueid));
1624 }
1625 }
1626
1627 /*
1628 * Initialize constant parts of the tuple data
1629 */
1630 t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid);
1631 t_values[2] = PointerGetDatum(&chunk_data);
1632 t_isnull[0] = false;
1633 t_isnull[1] = false;
1634 t_isnull[2] = false;
1635
1636 /*
1637 * Split up the item into chunks
1638 */
1639 while (data_todo > 0)
1640 {
1641 int i;
1642
1643 CHECK_FOR_INTERRUPTS();
1644
1645 /*
1646 * Calculate the size of this chunk
1647 */
1648 chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo);
1649
1650 /*
1651 * Build a tuple and store it
1652 */
1653 t_values[1] = Int32GetDatum(chunk_seq++);
1654 SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ);
1655 memcpy(VARDATA(&chunk_data), data_p, chunk_size);
1656 toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);
1657
1658 heap_insert(toastrel, toasttup, mycid, options, NULL);
1659
1660 /*
1661 * Create the index entry. We cheat a little here by not using
1662 * FormIndexDatum: this relies on the knowledge that the index columns
1663 * are the same as the initial columns of the table for all the
1664 * indexes. We also cheat by not providing an IndexInfo: this is okay
1665 * for now because btree doesn't need one, but we might have to be
1666 * more honest someday.
1667 *
1668 * Note also that there had better not be any user-created index on
1669 * the TOAST table, since we don't bother to update anything else.
1670 */
1671 for (i = 0; i < num_indexes; i++)
1672 {
1673 /* Only index relations marked as ready can be updated */
1674 if (toastidxs[i]->rd_index->indisready)
1675 index_insert(toastidxs[i], t_values, t_isnull,
1676 &(toasttup->t_self),
1677 toastrel,
1678 toastidxs[i]->rd_index->indisunique ?
1679 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
1680 NULL);
1681 }
1682
1683 /*
1684 * Free memory
1685 */
1686 heap_freetuple(toasttup);
1687
1688 /*
1689 * Move on to next chunk
1690 */
1691 data_todo -= chunk_size;
1692 data_p += chunk_size;
1693 }
1694
1695 /*
1696 * Done - close toast relation and its indexes
1697 */
1698 toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
1699 table_close(toastrel, RowExclusiveLock);
1700
1701 /*
1702 * Create the TOAST pointer value that we'll return
1703 */
1704 result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
1705 SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK);
1706 memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));
1707
1708 return PointerGetDatum(result);
1709}
1710
1711
1712/* ----------
1713 * toast_delete_datum -
1714 *
1715 * Delete a single external stored value.
1716 * ----------
1717 */
1718static void
1719toast_delete_datum(Relation rel, Datum value, bool is_speculative)
1720{
1721 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
1722 struct varatt_external toast_pointer;
1723 Relation toastrel;
1724 Relation *toastidxs;
1725 ScanKeyData toastkey;
1726 SysScanDesc toastscan;
1727 HeapTuple toasttup;
1728 int num_indexes;
1729 int validIndex;
1730 SnapshotData SnapshotToast;
1731
1732 if (!VARATT_IS_EXTERNAL_ONDISK(attr))
1733 return;
1734
1735 /* Must copy to access aligned fields */
1736 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1737
1738 /*
1739 * Open the toast relation and its indexes
1740 */
1741 toastrel = table_open(toast_pointer.va_toastrelid, RowExclusiveLock);
1742
1743 /* Fetch valid relation used for process */
1744 validIndex = toast_open_indexes(toastrel,
1745 RowExclusiveLock,
1746 &toastidxs,
1747 &num_indexes);
1748
1749 /*
1750 * Setup a scan key to find chunks with matching va_valueid
1751 */
1752 ScanKeyInit(&toastkey,
1753 (AttrNumber) 1,
1754 BTEqualStrategyNumber, F_OIDEQ,
1755 ObjectIdGetDatum(toast_pointer.va_valueid));
1756
1757 /*
1758 * Find all the chunks. (We don't actually care whether we see them in
1759 * sequence or not, but since we've already locked the index we might as
1760 * well use systable_beginscan_ordered.)
1761 */
1762 init_toast_snapshot(&SnapshotToast);
1763 toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
1764 &SnapshotToast, 1, &toastkey);
1765 while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
1766 {
1767 /*
1768 * Have a chunk, delete it
1769 */
1770 if (is_speculative)
1771 heap_abort_speculative(toastrel, &toasttup->t_self);
1772 else
1773 simple_heap_delete(toastrel, &toasttup->t_self);
1774 }
1775
1776 /*
1777 * End scan and close relations
1778 */
1779 systable_endscan_ordered(toastscan);
1780 toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
1781 table_close(toastrel, RowExclusiveLock);
1782}
1783
1784
1785/* ----------
1786 * toastrel_valueid_exists -
1787 *
1788 * Test whether a toast value with the given ID exists in the toast relation.
1789 * For safety, we consider a value to exist if there are either live or dead
1790 * toast rows with that ID; see notes for GetNewOidWithIndex().
1791 * ----------
1792 */
1793static bool
1794toastrel_valueid_exists(Relation toastrel, Oid valueid)
1795{
1796 bool result = false;
1797 ScanKeyData toastkey;
1798 SysScanDesc toastscan;
1799 int num_indexes;
1800 int validIndex;
1801 Relation *toastidxs;
1802
1803 /* Fetch a valid index relation */
1804 validIndex = toast_open_indexes(toastrel,
1805 RowExclusiveLock,
1806 &toastidxs,
1807 &num_indexes);
1808
1809 /*
1810 * Setup a scan key to find chunks with matching va_valueid
1811 */
1812 ScanKeyInit(&toastkey,
1813 (AttrNumber) 1,
1814 BTEqualStrategyNumber, F_OIDEQ,
1815 ObjectIdGetDatum(valueid));
1816
1817 /*
1818 * Is there any such chunk?
1819 */
1820 toastscan = systable_beginscan(toastrel,
1821 RelationGetRelid(toastidxs[validIndex]),
1822 true, SnapshotAny, 1, &toastkey);
1823
1824 if (systable_getnext(toastscan) != NULL)
1825 result = true;
1826
1827 systable_endscan(toastscan);
1828
1829 /* Clean up */
1830 toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
1831
1832 return result;
1833}
1834
1835/* ----------
1836 * toastid_valueid_exists -
1837 *
1838 * As above, but work from toast rel's OID not an open relation
1839 * ----------
1840 */
1841static bool
1842toastid_valueid_exists(Oid toastrelid, Oid valueid)
1843{
1844 bool result;
1845 Relation toastrel;
1846
1847 toastrel = table_open(toastrelid, AccessShareLock);
1848
1849 result = toastrel_valueid_exists(toastrel, valueid);
1850
1851 table_close(toastrel, AccessShareLock);
1852
1853 return result;
1854}
1855
1856
1857/* ----------
1858 * toast_fetch_datum -
1859 *
1860 * Reconstruct an in memory Datum from the chunks saved
1861 * in the toast relation
1862 * ----------
1863 */
1864static struct varlena *
1865toast_fetch_datum(struct varlena *attr)
1866{
1867 Relation toastrel;
1868 Relation *toastidxs;
1869 ScanKeyData toastkey;
1870 SysScanDesc toastscan;
1871 HeapTuple ttup;
1872 TupleDesc toasttupDesc;
1873 struct varlena *result;
1874 struct varatt_external toast_pointer;
1875 int32 ressize;
1876 int32 residx,
1877 nextidx;
1878 int32 numchunks;
1879 Pointer chunk;
1880 bool isnull;
1881 char *chunkdata;
1882 int32 chunksize;
1883 int num_indexes;
1884 int validIndex;
1885 SnapshotData SnapshotToast;
1886
1887 if (!VARATT_IS_EXTERNAL_ONDISK(attr))
1888 elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
1889
1890 /* Must copy to access aligned fields */
1891 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
1892
1893 ressize = toast_pointer.va_extsize;
1894 numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
1895
1896 result = (struct varlena *) palloc(ressize + VARHDRSZ);
1897
1898 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
1899 SET_VARSIZE_COMPRESSED(result, ressize + VARHDRSZ);
1900 else
1901 SET_VARSIZE(result, ressize + VARHDRSZ);
1902
1903 /*
1904 * Open the toast relation and its indexes
1905 */
1906 toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
1907 toasttupDesc = toastrel->rd_att;
1908
1909 /* Look for the valid index of the toast relation */
1910 validIndex = toast_open_indexes(toastrel,
1911 AccessShareLock,
1912 &toastidxs,
1913 &num_indexes);
1914
1915 /*
1916 * Setup a scan key to fetch from the index by va_valueid
1917 */
1918 ScanKeyInit(&toastkey,
1919 (AttrNumber) 1,
1920 BTEqualStrategyNumber, F_OIDEQ,
1921 ObjectIdGetDatum(toast_pointer.va_valueid));
1922
1923 /*
1924 * Read the chunks by index
1925 *
1926 * Note that because the index is actually on (valueid, chunkidx) we will
1927 * see the chunks in chunkidx order, even though we didn't explicitly ask
1928 * for it.
1929 */
1930 nextidx = 0;
1931
1932 init_toast_snapshot(&SnapshotToast);
1933 toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
1934 &SnapshotToast, 1, &toastkey);
1935 while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
1936 {
1937 /*
1938 * Have a chunk, extract the sequence number and the data
1939 */
1940 residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
1941 Assert(!isnull);
1942 chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
1943 Assert(!isnull);
1944 if (!VARATT_IS_EXTENDED(chunk))
1945 {
1946 chunksize = VARSIZE(chunk) - VARHDRSZ;
1947 chunkdata = VARDATA(chunk);
1948 }
1949 else if (VARATT_IS_SHORT(chunk))
1950 {
1951 /* could happen due to heap_form_tuple doing its thing */
1952 chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
1953 chunkdata = VARDATA_SHORT(chunk);
1954 }
1955 else
1956 {
1957 /* should never happen */
1958 elog(ERROR, "found toasted toast chunk for toast value %u in %s",
1959 toast_pointer.va_valueid,
1960 RelationGetRelationName(toastrel));
1961 chunksize = 0; /* keep compiler quiet */
1962 chunkdata = NULL;
1963 }
1964
1965 /*
1966 * Some checks on the data we've found
1967 */
1968 if (residx != nextidx)
1969 elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
1970 residx, nextidx,
1971 toast_pointer.va_valueid,
1972 RelationGetRelationName(toastrel));
1973 if (residx < numchunks - 1)
1974 {
1975 if (chunksize != TOAST_MAX_CHUNK_SIZE)
1976 elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s",
1977 chunksize, (int) TOAST_MAX_CHUNK_SIZE,
1978 residx, numchunks,
1979 toast_pointer.va_valueid,
1980 RelationGetRelationName(toastrel));
1981 }
1982 else if (residx == numchunks - 1)
1983 {
1984 if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
1985 elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s",
1986 chunksize,
1987 (int) (ressize - residx * TOAST_MAX_CHUNK_SIZE),
1988 residx,
1989 toast_pointer.va_valueid,
1990 RelationGetRelationName(toastrel));
1991 }
1992 else
1993 elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
1994 residx,
1995 0, numchunks - 1,
1996 toast_pointer.va_valueid,
1997 RelationGetRelationName(toastrel));
1998
1999 /*
2000 * Copy the data into proper place in our result
2001 */
2002 memcpy(VARDATA(result) + residx * TOAST_MAX_CHUNK_SIZE,
2003 chunkdata,
2004 chunksize);
2005
2006 nextidx++;
2007 }
2008
2009 /*
2010 * Final checks that we successfully fetched the datum
2011 */
2012 if (nextidx != numchunks)
2013 elog(ERROR, "missing chunk number %d for toast value %u in %s",
2014 nextidx,
2015 toast_pointer.va_valueid,
2016 RelationGetRelationName(toastrel));
2017
2018 /*
2019 * End scan and close relations
2020 */
2021 systable_endscan_ordered(toastscan);
2022 toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
2023 table_close(toastrel, AccessShareLock);
2024
2025 return result;
2026}
2027
2028/* ----------
2029 * toast_fetch_datum_slice -
2030 *
2031 * Reconstruct a segment of a Datum from the chunks saved
2032 * in the toast relation
2033 *
2034 * Note that this function only supports non-compressed external datums.
2035 * ----------
2036 */
2037static struct varlena *
2038toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
2039{
2040 Relation toastrel;
2041 Relation *toastidxs;
2042 ScanKeyData toastkey[3];
2043 int nscankeys;
2044 SysScanDesc toastscan;
2045 HeapTuple ttup;
2046 TupleDesc toasttupDesc;
2047 struct varlena *result;
2048 struct varatt_external toast_pointer;
2049 int32 attrsize;
2050 int32 residx;
2051 int32 nextidx;
2052 int numchunks;
2053 int startchunk;
2054 int endchunk;
2055 int32 startoffset;
2056 int32 endoffset;
2057 int totalchunks;
2058 Pointer chunk;
2059 bool isnull;
2060 char *chunkdata;
2061 int32 chunksize;
2062 int32 chcpystrt;
2063 int32 chcpyend;
2064 int num_indexes;
2065 int validIndex;
2066 SnapshotData SnapshotToast;
2067
2068 if (!VARATT_IS_EXTERNAL_ONDISK(attr))
2069 elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
2070
2071 /* Must copy to access aligned fields */
2072 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
2073
2074 /*
2075 * It's nonsense to fetch slices of a compressed datum -- this isn't lo_*
2076 * we can't return a compressed datum which is meaningful to toast later
2077 */
2078 Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
2079
2080 attrsize = toast_pointer.va_extsize;
2081 totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
2082
2083 if (sliceoffset >= attrsize)
2084 {
2085 sliceoffset = 0;
2086 length = 0;
2087 }
2088
2089 if (((sliceoffset + length) > attrsize) || length < 0)
2090 length = attrsize - sliceoffset;
2091
2092 result = (struct varlena *) palloc(length + VARHDRSZ);
2093
2094 SET_VARSIZE(result, length + VARHDRSZ);
2095
2096 if (length == 0)
2097 return result; /* Can save a lot of work at this point! */
2098
2099 startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE;
2100 endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE;
2101 numchunks = (endchunk - startchunk) + 1;
2102
2103 startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE;
2104 endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE;
2105
2106 /*
2107 * Open the toast relation and its indexes
2108 */
2109 toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
2110 toasttupDesc = toastrel->rd_att;
2111
2112 /* Look for the valid index of toast relation */
2113 validIndex = toast_open_indexes(toastrel,
2114 AccessShareLock,
2115 &toastidxs,
2116 &num_indexes);
2117
2118 /*
2119 * Setup a scan key to fetch from the index. This is either two keys or
2120 * three depending on the number of chunks.
2121 */
2122 ScanKeyInit(&toastkey[0],
2123 (AttrNumber) 1,
2124 BTEqualStrategyNumber, F_OIDEQ,
2125 ObjectIdGetDatum(toast_pointer.va_valueid));
2126
2127 /*
2128 * Use equality condition for one chunk, a range condition otherwise:
2129 */
2130 if (numchunks == 1)
2131 {
2132 ScanKeyInit(&toastkey[1],
2133 (AttrNumber) 2,
2134 BTEqualStrategyNumber, F_INT4EQ,
2135 Int32GetDatum(startchunk));
2136 nscankeys = 2;
2137 }
2138 else
2139 {
2140 ScanKeyInit(&toastkey[1],
2141 (AttrNumber) 2,
2142 BTGreaterEqualStrategyNumber, F_INT4GE,
2143 Int32GetDatum(startchunk));
2144 ScanKeyInit(&toastkey[2],
2145 (AttrNumber) 2,
2146 BTLessEqualStrategyNumber, F_INT4LE,
2147 Int32GetDatum(endchunk));
2148 nscankeys = 3;
2149 }
2150
2151 /*
2152 * Read the chunks by index
2153 *
2154 * The index is on (valueid, chunkidx) so they will come in order
2155 */
2156 init_toast_snapshot(&SnapshotToast);
2157 nextidx = startchunk;
2158 toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
2159 &SnapshotToast, nscankeys, toastkey);
2160 while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
2161 {
2162 /*
2163 * Have a chunk, extract the sequence number and the data
2164 */
2165 residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull));
2166 Assert(!isnull);
2167 chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull));
2168 Assert(!isnull);
2169 if (!VARATT_IS_EXTENDED(chunk))
2170 {
2171 chunksize = VARSIZE(chunk) - VARHDRSZ;
2172 chunkdata = VARDATA(chunk);
2173 }
2174 else if (VARATT_IS_SHORT(chunk))
2175 {
2176 /* could happen due to heap_form_tuple doing its thing */
2177 chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT;
2178 chunkdata = VARDATA_SHORT(chunk);
2179 }
2180 else
2181 {
2182 /* should never happen */
2183 elog(ERROR, "found toasted toast chunk for toast value %u in %s",
2184 toast_pointer.va_valueid,
2185 RelationGetRelationName(toastrel));
2186 chunksize = 0; /* keep compiler quiet */
2187 chunkdata = NULL;
2188 }
2189
2190 /*
2191 * Some checks on the data we've found
2192 */
2193 if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk))
2194 elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
2195 residx, nextidx,
2196 toast_pointer.va_valueid,
2197 RelationGetRelationName(toastrel));
2198 if (residx < totalchunks - 1)
2199 {
2200 if (chunksize != TOAST_MAX_CHUNK_SIZE)
2201 elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s when fetching slice",
2202 chunksize, (int) TOAST_MAX_CHUNK_SIZE,
2203 residx, totalchunks,
2204 toast_pointer.va_valueid,
2205 RelationGetRelationName(toastrel));
2206 }
2207 else if (residx == totalchunks - 1)
2208 {
2209 if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize)
2210 elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s when fetching slice",
2211 chunksize,
2212 (int) (attrsize - residx * TOAST_MAX_CHUNK_SIZE),
2213 residx,
2214 toast_pointer.va_valueid,
2215 RelationGetRelationName(toastrel));
2216 }
2217 else
2218 elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
2219 residx,
2220 0, totalchunks - 1,
2221 toast_pointer.va_valueid,
2222 RelationGetRelationName(toastrel));
2223
2224 /*
2225 * Copy the data into proper place in our result
2226 */
2227 chcpystrt = 0;
2228 chcpyend = chunksize - 1;
2229 if (residx == startchunk)
2230 chcpystrt = startoffset;
2231 if (residx == endchunk)
2232 chcpyend = endoffset;
2233
2234 memcpy(VARDATA(result) +
2235 (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt,
2236 chunkdata + chcpystrt,
2237 (chcpyend - chcpystrt) + 1);
2238
2239 nextidx++;
2240 }
2241
2242 /*
2243 * Final checks that we successfully fetched the datum
2244 */
2245 if (nextidx != (endchunk + 1))
2246 elog(ERROR, "missing chunk number %d for toast value %u in %s",
2247 nextidx,
2248 toast_pointer.va_valueid,
2249 RelationGetRelationName(toastrel));
2250
2251 /*
2252 * End scan and close relations
2253 */
2254 systable_endscan_ordered(toastscan);
2255 toast_close_indexes(toastidxs, num_indexes, AccessShareLock);
2256 table_close(toastrel, AccessShareLock);
2257
2258 return result;
2259}
2260
2261/* ----------
2262 * toast_decompress_datum -
2263 *
2264 * Decompress a compressed version of a varlena datum
2265 */
2266static struct varlena *
2267toast_decompress_datum(struct varlena *attr)
2268{
2269 struct varlena *result;
2270
2271 Assert(VARATT_IS_COMPRESSED(attr));
2272
2273 result = (struct varlena *)
2274 palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
2275 SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
2276
2277 if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
2278 VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
2279 VARDATA(result),
2280 TOAST_COMPRESS_RAWSIZE(attr), true) < 0)
2281 elog(ERROR, "compressed data is corrupted");
2282
2283 return result;
2284}
2285
2286
2287/* ----------
2288 * toast_decompress_datum_slice -
2289 *
2290 * Decompress the front of a compressed version of a varlena datum.
2291 * offset handling happens in heap_tuple_untoast_attr_slice.
2292 * Here we just decompress a slice from the front.
2293 */
2294static struct varlena *
2295toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
2296{
2297 struct varlena *result;
2298 int32 rawsize;
2299
2300 Assert(VARATT_IS_COMPRESSED(attr));
2301
2302 result = (struct varlena *) palloc(slicelength + VARHDRSZ);
2303
2304 rawsize = pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
2305 VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
2306 VARDATA(result),
2307 slicelength, false);
2308 if (rawsize < 0)
2309 elog(ERROR, "compressed data is corrupted");
2310
2311 SET_VARSIZE(result, rawsize + VARHDRSZ);
2312 return result;
2313}
2314
2315
2316/* ----------
2317 * toast_open_indexes
2318 *
2319 * Get an array of the indexes associated to the given toast relation
2320 * and return as well the position of the valid index used by the toast
2321 * relation in this array. It is the responsibility of the caller of this
2322 * function to close the indexes as well as free them.
2323 */
2324static int
2325toast_open_indexes(Relation toastrel,
2326 LOCKMODE lock,
2327 Relation **toastidxs,
2328 int *num_indexes)
2329{
2330 int i = 0;
2331 int res = 0;
2332 bool found = false;
2333 List *indexlist;
2334 ListCell *lc;
2335
2336 /* Get index list of the toast relation */
2337 indexlist = RelationGetIndexList(toastrel);
2338 Assert(indexlist != NIL);
2339
2340 *num_indexes = list_length(indexlist);
2341
2342 /* Open all the index relations */
2343 *toastidxs = (Relation *) palloc(*num_indexes * sizeof(Relation));
2344 foreach(lc, indexlist)
2345 (*toastidxs)[i++] = index_open(lfirst_oid(lc), lock);
2346
2347 /* Fetch the first valid index in list */
2348 for (i = 0; i < *num_indexes; i++)
2349 {
2350 Relation toastidx = (*toastidxs)[i];
2351
2352 if (toastidx->rd_index->indisvalid)
2353 {
2354 res = i;
2355 found = true;
2356 break;
2357 }
2358 }
2359
2360 /*
2361 * Free index list, not necessary anymore as relations are opened and a
2362 * valid index has been found.
2363 */
2364 list_free(indexlist);
2365
2366 /*
2367 * The toast relation should have one valid index, so something is going
2368 * wrong if there is nothing.
2369 */
2370 if (!found)
2371 elog(ERROR, "no valid index found for toast relation with Oid %u",
2372 RelationGetRelid(toastrel));
2373
2374 return res;
2375}
2376
2377/* ----------
2378 * toast_close_indexes
2379 *
2380 * Close an array of indexes for a toast relation and free it. This should
2381 * be called for a set of indexes opened previously with toast_open_indexes.
2382 */
2383static void
2384toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock)
2385{
2386 int i;
2387
2388 /* Close relations and clean up things */
2389 for (i = 0; i < num_indexes; i++)
2390 index_close(toastidxs[i], lock);
2391 pfree(toastidxs);
2392}
2393
2394/* ----------
2395 * init_toast_snapshot
2396 *
2397 * Initialize an appropriate TOAST snapshot. We must use an MVCC snapshot
2398 * to initialize the TOAST snapshot; since we don't know which one to use,
2399 * just use the oldest one. This is safe: at worst, we will get a "snapshot
2400 * too old" error that might have been avoided otherwise.
2401 */
2402static void
2403init_toast_snapshot(Snapshot toast_snapshot)
2404{
2405 Snapshot snapshot = GetOldestSnapshot();
2406
2407 if (snapshot == NULL)
2408 elog(ERROR, "no known snapshots");
2409
2410 InitToastSnapshot(*toast_snapshot, snapshot->lsn, snapshot->whenTaken);
2411}
2412