1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * tuptoaster.c |
4 | * Support routines for external and compressed storage of |
5 | * variable size attributes. |
6 | * |
7 | * Copyright (c) 2000-2019, PostgreSQL Global Development Group |
8 | * |
9 | * |
10 | * IDENTIFICATION |
11 | * src/backend/access/heap/tuptoaster.c |
12 | * |
13 | * |
14 | * INTERFACE ROUTINES |
15 | * toast_insert_or_update - |
16 | * Try to make a given tuple fit into one page by compressing |
17 | * or moving off attributes |
18 | * |
19 | * toast_delete - |
20 | * Reclaim toast storage when a tuple is deleted |
21 | * |
22 | * heap_tuple_untoast_attr - |
23 | * Fetch back a given value from the "secondary" relation |
24 | * |
25 | *------------------------------------------------------------------------- |
26 | */ |
27 | |
28 | #include "postgres.h" |
29 | |
30 | #include <unistd.h> |
31 | #include <fcntl.h> |
32 | |
33 | #include "access/genam.h" |
34 | #include "access/heapam.h" |
35 | #include "access/tuptoaster.h" |
36 | #include "access/xact.h" |
37 | #include "catalog/catalog.h" |
38 | #include "common/pg_lzcompress.h" |
39 | #include "miscadmin.h" |
40 | #include "utils/expandeddatum.h" |
41 | #include "utils/fmgroids.h" |
42 | #include "utils/rel.h" |
43 | #include "utils/snapmgr.h" |
44 | #include "utils/typcache.h" |
45 | |
46 | |
47 | #undef TOAST_DEBUG |
48 | |
49 | /* |
50 | * The information at the start of the compressed toast data. |
51 | */ |
52 | typedef struct |
53 | { |
54 | int32 ; /* varlena header (do not touch directly!) */ |
55 | int32 ; |
56 | } ; |
57 | |
58 | /* |
59 | * Utilities for manipulation of header information for compressed |
60 | * toast entries. |
61 | */ |
62 | #define TOAST_COMPRESS_HDRSZ ((int32) sizeof(toast_compress_header)) |
63 | #define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->rawsize) |
64 | #define TOAST_COMPRESS_RAWDATA(ptr) \ |
65 | (((char *) (ptr)) + TOAST_COMPRESS_HDRSZ) |
66 | #define TOAST_COMPRESS_SET_RAWSIZE(ptr, len) \ |
67 | (((toast_compress_header *) (ptr))->rawsize = (len)) |
68 | |
69 | static void toast_delete_datum(Relation rel, Datum value, bool is_speculative); |
70 | static Datum toast_save_datum(Relation rel, Datum value, |
71 | struct varlena *oldexternal, int options); |
72 | static bool toastrel_valueid_exists(Relation toastrel, Oid valueid); |
73 | static bool toastid_valueid_exists(Oid toastrelid, Oid valueid); |
74 | static struct varlena *toast_fetch_datum(struct varlena *attr); |
75 | static struct varlena *toast_fetch_datum_slice(struct varlena *attr, |
76 | int32 sliceoffset, int32 length); |
77 | static struct varlena *toast_decompress_datum(struct varlena *attr); |
78 | static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength); |
79 | static int toast_open_indexes(Relation toastrel, |
80 | LOCKMODE lock, |
81 | Relation **toastidxs, |
82 | int *num_indexes); |
83 | static void toast_close_indexes(Relation *toastidxs, int num_indexes, |
84 | LOCKMODE lock); |
85 | static void init_toast_snapshot(Snapshot toast_snapshot); |
86 | |
87 | |
88 | /* ---------- |
89 | * heap_tuple_fetch_attr - |
90 | * |
91 | * Public entry point to get back a toasted value from |
92 | * external source (possibly still in compressed format). |
93 | * |
94 | * This will return a datum that contains all the data internally, ie, not |
95 | * relying on external storage or memory, but it can still be compressed or |
96 | * have a short header. Note some callers assume that if the input is an |
97 | * EXTERNAL datum, the result will be a pfree'able chunk. |
98 | * ---------- |
99 | */ |
100 | struct varlena * |
101 | heap_tuple_fetch_attr(struct varlena *attr) |
102 | { |
103 | struct varlena *result; |
104 | |
105 | if (VARATT_IS_EXTERNAL_ONDISK(attr)) |
106 | { |
107 | /* |
108 | * This is an external stored plain value |
109 | */ |
110 | result = toast_fetch_datum(attr); |
111 | } |
112 | else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) |
113 | { |
114 | /* |
115 | * This is an indirect pointer --- dereference it |
116 | */ |
117 | struct varatt_indirect redirect; |
118 | |
119 | VARATT_EXTERNAL_GET_POINTER(redirect, attr); |
120 | attr = (struct varlena *) redirect.pointer; |
121 | |
122 | /* nested indirect Datums aren't allowed */ |
123 | Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); |
124 | |
125 | /* recurse if value is still external in some other way */ |
126 | if (VARATT_IS_EXTERNAL(attr)) |
127 | return heap_tuple_fetch_attr(attr); |
128 | |
129 | /* |
130 | * Copy into the caller's memory context, in case caller tries to |
131 | * pfree the result. |
132 | */ |
133 | result = (struct varlena *) palloc(VARSIZE_ANY(attr)); |
134 | memcpy(result, attr, VARSIZE_ANY(attr)); |
135 | } |
136 | else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) |
137 | { |
138 | /* |
139 | * This is an expanded-object pointer --- get flat format |
140 | */ |
141 | ExpandedObjectHeader *eoh; |
142 | Size resultsize; |
143 | |
144 | eoh = DatumGetEOHP(PointerGetDatum(attr)); |
145 | resultsize = EOH_get_flat_size(eoh); |
146 | result = (struct varlena *) palloc(resultsize); |
147 | EOH_flatten_into(eoh, (void *) result, resultsize); |
148 | } |
149 | else |
150 | { |
151 | /* |
152 | * This is a plain value inside of the main tuple - why am I called? |
153 | */ |
154 | result = attr; |
155 | } |
156 | |
157 | return result; |
158 | } |
159 | |
160 | |
161 | /* ---------- |
162 | * heap_tuple_untoast_attr - |
163 | * |
164 | * Public entry point to get back a toasted value from compression |
165 | * or external storage. The result is always non-extended varlena form. |
166 | * |
167 | * Note some callers assume that if the input is an EXTERNAL or COMPRESSED |
168 | * datum, the result will be a pfree'able chunk. |
169 | * ---------- |
170 | */ |
171 | struct varlena * |
172 | heap_tuple_untoast_attr(struct varlena *attr) |
173 | { |
174 | if (VARATT_IS_EXTERNAL_ONDISK(attr)) |
175 | { |
176 | /* |
177 | * This is an externally stored datum --- fetch it back from there |
178 | */ |
179 | attr = toast_fetch_datum(attr); |
180 | /* If it's compressed, decompress it */ |
181 | if (VARATT_IS_COMPRESSED(attr)) |
182 | { |
183 | struct varlena *tmp = attr; |
184 | |
185 | attr = toast_decompress_datum(tmp); |
186 | pfree(tmp); |
187 | } |
188 | } |
189 | else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) |
190 | { |
191 | /* |
192 | * This is an indirect pointer --- dereference it |
193 | */ |
194 | struct varatt_indirect redirect; |
195 | |
196 | VARATT_EXTERNAL_GET_POINTER(redirect, attr); |
197 | attr = (struct varlena *) redirect.pointer; |
198 | |
199 | /* nested indirect Datums aren't allowed */ |
200 | Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); |
201 | |
202 | /* recurse in case value is still extended in some other way */ |
203 | attr = heap_tuple_untoast_attr(attr); |
204 | |
205 | /* if it isn't, we'd better copy it */ |
206 | if (attr == (struct varlena *) redirect.pointer) |
207 | { |
208 | struct varlena *result; |
209 | |
210 | result = (struct varlena *) palloc(VARSIZE_ANY(attr)); |
211 | memcpy(result, attr, VARSIZE_ANY(attr)); |
212 | attr = result; |
213 | } |
214 | } |
215 | else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) |
216 | { |
217 | /* |
218 | * This is an expanded-object pointer --- get flat format |
219 | */ |
220 | attr = heap_tuple_fetch_attr(attr); |
221 | /* flatteners are not allowed to produce compressed/short output */ |
222 | Assert(!VARATT_IS_EXTENDED(attr)); |
223 | } |
224 | else if (VARATT_IS_COMPRESSED(attr)) |
225 | { |
226 | /* |
227 | * This is a compressed value inside of the main tuple |
228 | */ |
229 | attr = toast_decompress_datum(attr); |
230 | } |
231 | else if (VARATT_IS_SHORT(attr)) |
232 | { |
233 | /* |
234 | * This is a short-header varlena --- convert to 4-byte header format |
235 | */ |
236 | Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT; |
237 | Size new_size = data_size + VARHDRSZ; |
238 | struct varlena *new_attr; |
239 | |
240 | new_attr = (struct varlena *) palloc(new_size); |
241 | SET_VARSIZE(new_attr, new_size); |
242 | memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size); |
243 | attr = new_attr; |
244 | } |
245 | |
246 | return attr; |
247 | } |
248 | |
249 | |
250 | /* ---------- |
251 | * heap_tuple_untoast_attr_slice - |
252 | * |
253 | * Public entry point to get back part of a toasted value |
254 | * from compression or external storage. |
255 | * ---------- |
256 | */ |
257 | struct varlena * |
258 | heap_tuple_untoast_attr_slice(struct varlena *attr, |
259 | int32 sliceoffset, int32 slicelength) |
260 | { |
261 | struct varlena *preslice; |
262 | struct varlena *result; |
263 | char *attrdata; |
264 | int32 attrsize; |
265 | |
266 | if (VARATT_IS_EXTERNAL_ONDISK(attr)) |
267 | { |
268 | struct varatt_external toast_pointer; |
269 | |
270 | VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); |
271 | |
272 | /* fast path for non-compressed external datums */ |
273 | if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) |
274 | return toast_fetch_datum_slice(attr, sliceoffset, slicelength); |
275 | |
276 | /* fetch it back (compressed marker will get set automatically) */ |
277 | preslice = toast_fetch_datum(attr); |
278 | } |
279 | else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) |
280 | { |
281 | struct varatt_indirect redirect; |
282 | |
283 | VARATT_EXTERNAL_GET_POINTER(redirect, attr); |
284 | |
285 | /* nested indirect Datums aren't allowed */ |
286 | Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer)); |
287 | |
288 | return heap_tuple_untoast_attr_slice(redirect.pointer, |
289 | sliceoffset, slicelength); |
290 | } |
291 | else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) |
292 | { |
293 | /* pass it off to heap_tuple_fetch_attr to flatten */ |
294 | preslice = heap_tuple_fetch_attr(attr); |
295 | } |
296 | else |
297 | preslice = attr; |
298 | |
299 | Assert(!VARATT_IS_EXTERNAL(preslice)); |
300 | |
301 | if (VARATT_IS_COMPRESSED(preslice)) |
302 | { |
303 | struct varlena *tmp = preslice; |
304 | |
305 | /* Decompress enough to encompass the slice and the offset */ |
306 | if (slicelength > 0 && sliceoffset >= 0) |
307 | preslice = toast_decompress_datum_slice(tmp, slicelength + sliceoffset); |
308 | else |
309 | preslice = toast_decompress_datum(tmp); |
310 | |
311 | if (tmp != attr) |
312 | pfree(tmp); |
313 | } |
314 | |
315 | if (VARATT_IS_SHORT(preslice)) |
316 | { |
317 | attrdata = VARDATA_SHORT(preslice); |
318 | attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT; |
319 | } |
320 | else |
321 | { |
322 | attrdata = VARDATA(preslice); |
323 | attrsize = VARSIZE(preslice) - VARHDRSZ; |
324 | } |
325 | |
326 | /* slicing of datum for compressed cases and plain value */ |
327 | |
328 | if (sliceoffset >= attrsize) |
329 | { |
330 | sliceoffset = 0; |
331 | slicelength = 0; |
332 | } |
333 | |
334 | if (((sliceoffset + slicelength) > attrsize) || slicelength < 0) |
335 | slicelength = attrsize - sliceoffset; |
336 | |
337 | result = (struct varlena *) palloc(slicelength + VARHDRSZ); |
338 | SET_VARSIZE(result, slicelength + VARHDRSZ); |
339 | |
340 | memcpy(VARDATA(result), attrdata + sliceoffset, slicelength); |
341 | |
342 | if (preslice != attr) |
343 | pfree(preslice); |
344 | |
345 | return result; |
346 | } |
347 | |
348 | |
349 | /* ---------- |
350 | * toast_raw_datum_size - |
351 | * |
352 | * Return the raw (detoasted) size of a varlena datum |
353 | * (including the VARHDRSZ header) |
354 | * ---------- |
355 | */ |
356 | Size |
357 | toast_raw_datum_size(Datum value) |
358 | { |
359 | struct varlena *attr = (struct varlena *) DatumGetPointer(value); |
360 | Size result; |
361 | |
362 | if (VARATT_IS_EXTERNAL_ONDISK(attr)) |
363 | { |
364 | /* va_rawsize is the size of the original datum -- including header */ |
365 | struct varatt_external toast_pointer; |
366 | |
367 | VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); |
368 | result = toast_pointer.va_rawsize; |
369 | } |
370 | else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) |
371 | { |
372 | struct varatt_indirect toast_pointer; |
373 | |
374 | VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); |
375 | |
376 | /* nested indirect Datums aren't allowed */ |
377 | Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer)); |
378 | |
379 | return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer)); |
380 | } |
381 | else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) |
382 | { |
383 | result = EOH_get_flat_size(DatumGetEOHP(value)); |
384 | } |
385 | else if (VARATT_IS_COMPRESSED(attr)) |
386 | { |
387 | /* here, va_rawsize is just the payload size */ |
388 | result = VARRAWSIZE_4B_C(attr) + VARHDRSZ; |
389 | } |
390 | else if (VARATT_IS_SHORT(attr)) |
391 | { |
392 | /* |
393 | * we have to normalize the header length to VARHDRSZ or else the |
394 | * callers of this function will be confused. |
395 | */ |
396 | result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ; |
397 | } |
398 | else |
399 | { |
400 | /* plain untoasted datum */ |
401 | result = VARSIZE(attr); |
402 | } |
403 | return result; |
404 | } |
405 | |
406 | /* ---------- |
407 | * toast_datum_size |
408 | * |
409 | * Return the physical storage size (possibly compressed) of a varlena datum |
410 | * ---------- |
411 | */ |
412 | Size |
413 | toast_datum_size(Datum value) |
414 | { |
415 | struct varlena *attr = (struct varlena *) DatumGetPointer(value); |
416 | Size result; |
417 | |
418 | if (VARATT_IS_EXTERNAL_ONDISK(attr)) |
419 | { |
420 | /* |
421 | * Attribute is stored externally - return the extsize whether |
422 | * compressed or not. We do not count the size of the toast pointer |
423 | * ... should we? |
424 | */ |
425 | struct varatt_external toast_pointer; |
426 | |
427 | VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); |
428 | result = toast_pointer.va_extsize; |
429 | } |
430 | else if (VARATT_IS_EXTERNAL_INDIRECT(attr)) |
431 | { |
432 | struct varatt_indirect toast_pointer; |
433 | |
434 | VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); |
435 | |
436 | /* nested indirect Datums aren't allowed */ |
437 | Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr)); |
438 | |
439 | return toast_datum_size(PointerGetDatum(toast_pointer.pointer)); |
440 | } |
441 | else if (VARATT_IS_EXTERNAL_EXPANDED(attr)) |
442 | { |
443 | result = EOH_get_flat_size(DatumGetEOHP(value)); |
444 | } |
445 | else if (VARATT_IS_SHORT(attr)) |
446 | { |
447 | result = VARSIZE_SHORT(attr); |
448 | } |
449 | else |
450 | { |
451 | /* |
452 | * Attribute is stored inline either compressed or not, just calculate |
453 | * the size of the datum in either case. |
454 | */ |
455 | result = VARSIZE(attr); |
456 | } |
457 | return result; |
458 | } |
459 | |
460 | |
461 | /* ---------- |
462 | * toast_delete - |
463 | * |
464 | * Cascaded delete toast-entries on DELETE |
465 | * ---------- |
466 | */ |
467 | void |
468 | toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative) |
469 | { |
470 | TupleDesc tupleDesc; |
471 | int numAttrs; |
472 | int i; |
473 | Datum toast_values[MaxHeapAttributeNumber]; |
474 | bool toast_isnull[MaxHeapAttributeNumber]; |
475 | |
476 | /* |
477 | * We should only ever be called for tuples of plain relations or |
478 | * materialized views --- recursing on a toast rel is bad news. |
479 | */ |
480 | Assert(rel->rd_rel->relkind == RELKIND_RELATION || |
481 | rel->rd_rel->relkind == RELKIND_MATVIEW); |
482 | |
483 | /* |
484 | * Get the tuple descriptor and break down the tuple into fields. |
485 | * |
486 | * NOTE: it's debatable whether to use heap_deform_tuple() here or just |
487 | * heap_getattr() only the varlena columns. The latter could win if there |
488 | * are few varlena columns and many non-varlena ones. However, |
489 | * heap_deform_tuple costs only O(N) while the heap_getattr way would cost |
490 | * O(N^2) if there are many varlena columns, so it seems better to err on |
491 | * the side of linear cost. (We won't even be here unless there's at |
492 | * least one varlena column, by the way.) |
493 | */ |
494 | tupleDesc = rel->rd_att; |
495 | numAttrs = tupleDesc->natts; |
496 | |
497 | Assert(numAttrs <= MaxHeapAttributeNumber); |
498 | heap_deform_tuple(oldtup, tupleDesc, toast_values, toast_isnull); |
499 | |
500 | /* |
501 | * Check for external stored attributes and delete them from the secondary |
502 | * relation. |
503 | */ |
504 | for (i = 0; i < numAttrs; i++) |
505 | { |
506 | if (TupleDescAttr(tupleDesc, i)->attlen == -1) |
507 | { |
508 | Datum value = toast_values[i]; |
509 | |
510 | if (toast_isnull[i]) |
511 | continue; |
512 | else if (VARATT_IS_EXTERNAL_ONDISK(PointerGetDatum(value))) |
513 | toast_delete_datum(rel, value, is_speculative); |
514 | } |
515 | } |
516 | } |
517 | |
518 | |
519 | /* ---------- |
520 | * toast_insert_or_update - |
521 | * |
522 | * Delete no-longer-used toast-entries and create new ones to |
523 | * make the new tuple fit on INSERT or UPDATE |
524 | * |
525 | * Inputs: |
526 | * newtup: the candidate new tuple to be inserted |
527 | * oldtup: the old row version for UPDATE, or NULL for INSERT |
528 | * options: options to be passed to heap_insert() for toast rows |
529 | * Result: |
530 | * either newtup if no toasting is needed, or a palloc'd modified tuple |
531 | * that is what should actually get stored |
532 | * |
533 | * NOTE: neither newtup nor oldtup will be modified. This is a change |
534 | * from the pre-8.1 API of this routine. |
535 | * ---------- |
536 | */ |
537 | HeapTuple |
538 | toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, |
539 | int options) |
540 | { |
541 | HeapTuple result_tuple; |
542 | TupleDesc tupleDesc; |
543 | int numAttrs; |
544 | int i; |
545 | |
546 | bool need_change = false; |
547 | bool need_free = false; |
548 | bool need_delold = false; |
549 | bool has_nulls = false; |
550 | |
551 | Size maxDataLen; |
552 | Size hoff; |
553 | |
554 | char toast_action[MaxHeapAttributeNumber]; |
555 | bool toast_isnull[MaxHeapAttributeNumber]; |
556 | bool toast_oldisnull[MaxHeapAttributeNumber]; |
557 | Datum toast_values[MaxHeapAttributeNumber]; |
558 | Datum toast_oldvalues[MaxHeapAttributeNumber]; |
559 | struct varlena *toast_oldexternal[MaxHeapAttributeNumber]; |
560 | int32 toast_sizes[MaxHeapAttributeNumber]; |
561 | bool toast_free[MaxHeapAttributeNumber]; |
562 | bool toast_delold[MaxHeapAttributeNumber]; |
563 | |
564 | /* |
565 | * Ignore the INSERT_SPECULATIVE option. Speculative insertions/super |
566 | * deletions just normally insert/delete the toast values. It seems |
567 | * easiest to deal with that here, instead on, potentially, multiple |
568 | * callers. |
569 | */ |
570 | options &= ~HEAP_INSERT_SPECULATIVE; |
571 | |
572 | /* |
573 | * We should only ever be called for tuples of plain relations or |
574 | * materialized views --- recursing on a toast rel is bad news. |
575 | */ |
576 | Assert(rel->rd_rel->relkind == RELKIND_RELATION || |
577 | rel->rd_rel->relkind == RELKIND_MATVIEW); |
578 | |
579 | /* |
580 | * Get the tuple descriptor and break down the tuple(s) into fields. |
581 | */ |
582 | tupleDesc = rel->rd_att; |
583 | numAttrs = tupleDesc->natts; |
584 | |
585 | Assert(numAttrs <= MaxHeapAttributeNumber); |
586 | heap_deform_tuple(newtup, tupleDesc, toast_values, toast_isnull); |
587 | if (oldtup != NULL) |
588 | heap_deform_tuple(oldtup, tupleDesc, toast_oldvalues, toast_oldisnull); |
589 | |
590 | /* ---------- |
591 | * Then collect information about the values given |
592 | * |
593 | * NOTE: toast_action[i] can have these values: |
594 | * ' ' default handling |
595 | * 'p' already processed --- don't touch it |
596 | * 'x' incompressible, but OK to move off |
597 | * |
598 | * NOTE: toast_sizes[i] is only made valid for varlena attributes with |
599 | * toast_action[i] different from 'p'. |
600 | * ---------- |
601 | */ |
602 | memset(toast_action, ' ', numAttrs * sizeof(char)); |
603 | memset(toast_oldexternal, 0, numAttrs * sizeof(struct varlena *)); |
604 | memset(toast_free, 0, numAttrs * sizeof(bool)); |
605 | memset(toast_delold, 0, numAttrs * sizeof(bool)); |
606 | |
607 | for (i = 0; i < numAttrs; i++) |
608 | { |
609 | Form_pg_attribute att = TupleDescAttr(tupleDesc, i); |
610 | struct varlena *old_value; |
611 | struct varlena *new_value; |
612 | |
613 | if (oldtup != NULL) |
614 | { |
615 | /* |
616 | * For UPDATE get the old and new values of this attribute |
617 | */ |
618 | old_value = (struct varlena *) DatumGetPointer(toast_oldvalues[i]); |
619 | new_value = (struct varlena *) DatumGetPointer(toast_values[i]); |
620 | |
621 | /* |
622 | * If the old value is stored on disk, check if it has changed so |
623 | * we have to delete it later. |
624 | */ |
625 | if (att->attlen == -1 && !toast_oldisnull[i] && |
626 | VARATT_IS_EXTERNAL_ONDISK(old_value)) |
627 | { |
628 | if (toast_isnull[i] || !VARATT_IS_EXTERNAL_ONDISK(new_value) || |
629 | memcmp((char *) old_value, (char *) new_value, |
630 | VARSIZE_EXTERNAL(old_value)) != 0) |
631 | { |
632 | /* |
633 | * The old external stored value isn't needed any more |
634 | * after the update |
635 | */ |
636 | toast_delold[i] = true; |
637 | need_delold = true; |
638 | } |
639 | else |
640 | { |
641 | /* |
642 | * This attribute isn't changed by this update so we reuse |
643 | * the original reference to the old value in the new |
644 | * tuple. |
645 | */ |
646 | toast_action[i] = 'p'; |
647 | continue; |
648 | } |
649 | } |
650 | } |
651 | else |
652 | { |
653 | /* |
654 | * For INSERT simply get the new value |
655 | */ |
656 | new_value = (struct varlena *) DatumGetPointer(toast_values[i]); |
657 | } |
658 | |
659 | /* |
660 | * Handle NULL attributes |
661 | */ |
662 | if (toast_isnull[i]) |
663 | { |
664 | toast_action[i] = 'p'; |
665 | has_nulls = true; |
666 | continue; |
667 | } |
668 | |
669 | /* |
670 | * Now look at varlena attributes |
671 | */ |
672 | if (att->attlen == -1) |
673 | { |
674 | /* |
675 | * If the table's attribute says PLAIN always, force it so. |
676 | */ |
677 | if (att->attstorage == 'p') |
678 | toast_action[i] = 'p'; |
679 | |
680 | /* |
681 | * We took care of UPDATE above, so any external value we find |
682 | * still in the tuple must be someone else's that we cannot reuse |
683 | * (this includes the case of an out-of-line in-memory datum). |
684 | * Fetch it back (without decompression, unless we are forcing |
685 | * PLAIN storage). If necessary, we'll push it out as a new |
686 | * external value below. |
687 | */ |
688 | if (VARATT_IS_EXTERNAL(new_value)) |
689 | { |
690 | toast_oldexternal[i] = new_value; |
691 | if (att->attstorage == 'p') |
692 | new_value = heap_tuple_untoast_attr(new_value); |
693 | else |
694 | new_value = heap_tuple_fetch_attr(new_value); |
695 | toast_values[i] = PointerGetDatum(new_value); |
696 | toast_free[i] = true; |
697 | need_change = true; |
698 | need_free = true; |
699 | } |
700 | |
701 | /* |
702 | * Remember the size of this attribute |
703 | */ |
704 | toast_sizes[i] = VARSIZE_ANY(new_value); |
705 | } |
706 | else |
707 | { |
708 | /* |
709 | * Not a varlena attribute, plain storage always |
710 | */ |
711 | toast_action[i] = 'p'; |
712 | } |
713 | } |
714 | |
715 | /* ---------- |
716 | * Compress and/or save external until data fits into target length |
717 | * |
718 | * 1: Inline compress attributes with attstorage 'x', and store very |
719 | * large attributes with attstorage 'x' or 'e' external immediately |
720 | * 2: Store attributes with attstorage 'x' or 'e' external |
721 | * 3: Inline compress attributes with attstorage 'm' |
722 | * 4: Store attributes with attstorage 'm' external |
723 | * ---------- |
724 | */ |
725 | |
726 | /* compute header overhead --- this should match heap_form_tuple() */ |
727 | hoff = SizeofHeapTupleHeader; |
728 | if (has_nulls) |
729 | hoff += BITMAPLEN(numAttrs); |
730 | hoff = MAXALIGN(hoff); |
731 | /* now convert to a limit on the tuple data size */ |
732 | maxDataLen = RelationGetToastTupleTarget(rel, TOAST_TUPLE_TARGET) - hoff; |
733 | |
734 | /* |
735 | * Look for attributes with attstorage 'x' to compress. Also find large |
736 | * attributes with attstorage 'x' or 'e', and store them external. |
737 | */ |
738 | while (heap_compute_data_size(tupleDesc, |
739 | toast_values, toast_isnull) > maxDataLen) |
740 | { |
741 | int biggest_attno = -1; |
742 | int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE); |
743 | Datum old_value; |
744 | Datum new_value; |
745 | |
746 | /* |
747 | * Search for the biggest yet unprocessed internal attribute |
748 | */ |
749 | for (i = 0; i < numAttrs; i++) |
750 | { |
751 | Form_pg_attribute att = TupleDescAttr(tupleDesc, i); |
752 | |
753 | if (toast_action[i] != ' ') |
754 | continue; |
755 | if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i]))) |
756 | continue; /* can't happen, toast_action would be 'p' */ |
757 | if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i]))) |
758 | continue; |
759 | if (att->attstorage != 'x' && att->attstorage != 'e') |
760 | continue; |
761 | if (toast_sizes[i] > biggest_size) |
762 | { |
763 | biggest_attno = i; |
764 | biggest_size = toast_sizes[i]; |
765 | } |
766 | } |
767 | |
768 | if (biggest_attno < 0) |
769 | break; |
770 | |
771 | /* |
772 | * Attempt to compress it inline, if it has attstorage 'x' |
773 | */ |
774 | i = biggest_attno; |
775 | if (TupleDescAttr(tupleDesc, i)->attstorage == 'x') |
776 | { |
777 | old_value = toast_values[i]; |
778 | new_value = toast_compress_datum(old_value); |
779 | |
780 | if (DatumGetPointer(new_value) != NULL) |
781 | { |
782 | /* successful compression */ |
783 | if (toast_free[i]) |
784 | pfree(DatumGetPointer(old_value)); |
785 | toast_values[i] = new_value; |
786 | toast_free[i] = true; |
787 | toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i])); |
788 | need_change = true; |
789 | need_free = true; |
790 | } |
791 | else |
792 | { |
793 | /* incompressible, ignore on subsequent compression passes */ |
794 | toast_action[i] = 'x'; |
795 | } |
796 | } |
797 | else |
798 | { |
799 | /* has attstorage 'e', ignore on subsequent compression passes */ |
800 | toast_action[i] = 'x'; |
801 | } |
802 | |
803 | /* |
804 | * If this value is by itself more than maxDataLen (after compression |
805 | * if any), push it out to the toast table immediately, if possible. |
806 | * This avoids uselessly compressing other fields in the common case |
807 | * where we have one long field and several short ones. |
808 | * |
809 | * XXX maybe the threshold should be less than maxDataLen? |
810 | */ |
811 | if (toast_sizes[i] > maxDataLen && |
812 | rel->rd_rel->reltoastrelid != InvalidOid) |
813 | { |
814 | old_value = toast_values[i]; |
815 | toast_action[i] = 'p'; |
816 | toast_values[i] = toast_save_datum(rel, toast_values[i], |
817 | toast_oldexternal[i], options); |
818 | if (toast_free[i]) |
819 | pfree(DatumGetPointer(old_value)); |
820 | toast_free[i] = true; |
821 | need_change = true; |
822 | need_free = true; |
823 | } |
824 | } |
825 | |
826 | /* |
827 | * Second we look for attributes of attstorage 'x' or 'e' that are still |
828 | * inline. But skip this if there's no toast table to push them to. |
829 | */ |
830 | while (heap_compute_data_size(tupleDesc, |
831 | toast_values, toast_isnull) > maxDataLen && |
832 | rel->rd_rel->reltoastrelid != InvalidOid) |
833 | { |
834 | int biggest_attno = -1; |
835 | int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE); |
836 | Datum old_value; |
837 | |
838 | /*------ |
839 | * Search for the biggest yet inlined attribute with |
840 | * attstorage equals 'x' or 'e' |
841 | *------ |
842 | */ |
843 | for (i = 0; i < numAttrs; i++) |
844 | { |
845 | Form_pg_attribute att = TupleDescAttr(tupleDesc, i); |
846 | |
847 | if (toast_action[i] == 'p') |
848 | continue; |
849 | if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i]))) |
850 | continue; /* can't happen, toast_action would be 'p' */ |
851 | if (att->attstorage != 'x' && att->attstorage != 'e') |
852 | continue; |
853 | if (toast_sizes[i] > biggest_size) |
854 | { |
855 | biggest_attno = i; |
856 | biggest_size = toast_sizes[i]; |
857 | } |
858 | } |
859 | |
860 | if (biggest_attno < 0) |
861 | break; |
862 | |
863 | /* |
864 | * Store this external |
865 | */ |
866 | i = biggest_attno; |
867 | old_value = toast_values[i]; |
868 | toast_action[i] = 'p'; |
869 | toast_values[i] = toast_save_datum(rel, toast_values[i], |
870 | toast_oldexternal[i], options); |
871 | if (toast_free[i]) |
872 | pfree(DatumGetPointer(old_value)); |
873 | toast_free[i] = true; |
874 | |
875 | need_change = true; |
876 | need_free = true; |
877 | } |
878 | |
879 | /* |
880 | * Round 3 - this time we take attributes with storage 'm' into |
881 | * compression |
882 | */ |
883 | while (heap_compute_data_size(tupleDesc, |
884 | toast_values, toast_isnull) > maxDataLen) |
885 | { |
886 | int biggest_attno = -1; |
887 | int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE); |
888 | Datum old_value; |
889 | Datum new_value; |
890 | |
891 | /* |
892 | * Search for the biggest yet uncompressed internal attribute |
893 | */ |
894 | for (i = 0; i < numAttrs; i++) |
895 | { |
896 | if (toast_action[i] != ' ') |
897 | continue; |
898 | if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i]))) |
899 | continue; /* can't happen, toast_action would be 'p' */ |
900 | if (VARATT_IS_COMPRESSED(DatumGetPointer(toast_values[i]))) |
901 | continue; |
902 | if (TupleDescAttr(tupleDesc, i)->attstorage != 'm') |
903 | continue; |
904 | if (toast_sizes[i] > biggest_size) |
905 | { |
906 | biggest_attno = i; |
907 | biggest_size = toast_sizes[i]; |
908 | } |
909 | } |
910 | |
911 | if (biggest_attno < 0) |
912 | break; |
913 | |
914 | /* |
915 | * Attempt to compress it inline |
916 | */ |
917 | i = biggest_attno; |
918 | old_value = toast_values[i]; |
919 | new_value = toast_compress_datum(old_value); |
920 | |
921 | if (DatumGetPointer(new_value) != NULL) |
922 | { |
923 | /* successful compression */ |
924 | if (toast_free[i]) |
925 | pfree(DatumGetPointer(old_value)); |
926 | toast_values[i] = new_value; |
927 | toast_free[i] = true; |
928 | toast_sizes[i] = VARSIZE(DatumGetPointer(toast_values[i])); |
929 | need_change = true; |
930 | need_free = true; |
931 | } |
932 | else |
933 | { |
934 | /* incompressible, ignore on subsequent compression passes */ |
935 | toast_action[i] = 'x'; |
936 | } |
937 | } |
938 | |
939 | /* |
940 | * Finally we store attributes of type 'm' externally. At this point we |
941 | * increase the target tuple size, so that 'm' attributes aren't stored |
942 | * externally unless really necessary. |
943 | */ |
944 | maxDataLen = TOAST_TUPLE_TARGET_MAIN - hoff; |
945 | |
946 | while (heap_compute_data_size(tupleDesc, |
947 | toast_values, toast_isnull) > maxDataLen && |
948 | rel->rd_rel->reltoastrelid != InvalidOid) |
949 | { |
950 | int biggest_attno = -1; |
951 | int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE); |
952 | Datum old_value; |
953 | |
954 | /*-------- |
955 | * Search for the biggest yet inlined attribute with |
956 | * attstorage = 'm' |
957 | *-------- |
958 | */ |
959 | for (i = 0; i < numAttrs; i++) |
960 | { |
961 | if (toast_action[i] == 'p') |
962 | continue; |
963 | if (VARATT_IS_EXTERNAL(DatumGetPointer(toast_values[i]))) |
964 | continue; /* can't happen, toast_action would be 'p' */ |
965 | if (TupleDescAttr(tupleDesc, i)->attstorage != 'm') |
966 | continue; |
967 | if (toast_sizes[i] > biggest_size) |
968 | { |
969 | biggest_attno = i; |
970 | biggest_size = toast_sizes[i]; |
971 | } |
972 | } |
973 | |
974 | if (biggest_attno < 0) |
975 | break; |
976 | |
977 | /* |
978 | * Store this external |
979 | */ |
980 | i = biggest_attno; |
981 | old_value = toast_values[i]; |
982 | toast_action[i] = 'p'; |
983 | toast_values[i] = toast_save_datum(rel, toast_values[i], |
984 | toast_oldexternal[i], options); |
985 | if (toast_free[i]) |
986 | pfree(DatumGetPointer(old_value)); |
987 | toast_free[i] = true; |
988 | |
989 | need_change = true; |
990 | need_free = true; |
991 | } |
992 | |
993 | /* |
994 | * In the case we toasted any values, we need to build a new heap tuple |
995 | * with the changed values. |
996 | */ |
997 | if (need_change) |
998 | { |
999 | HeapTupleHeader olddata = newtup->t_data; |
1000 | HeapTupleHeader new_data; |
1001 | int32 ; |
1002 | int32 new_data_len; |
1003 | int32 new_tuple_len; |
1004 | |
1005 | /* |
1006 | * Calculate the new size of the tuple. |
1007 | * |
1008 | * Note: we used to assume here that the old tuple's t_hoff must equal |
1009 | * the new_header_len value, but that was incorrect. The old tuple |
1010 | * might have a smaller-than-current natts, if there's been an ALTER |
1011 | * TABLE ADD COLUMN since it was stored; and that would lead to a |
1012 | * different conclusion about the size of the null bitmap, or even |
1013 | * whether there needs to be one at all. |
1014 | */ |
1015 | new_header_len = SizeofHeapTupleHeader; |
1016 | if (has_nulls) |
1017 | new_header_len += BITMAPLEN(numAttrs); |
1018 | new_header_len = MAXALIGN(new_header_len); |
1019 | new_data_len = heap_compute_data_size(tupleDesc, |
1020 | toast_values, toast_isnull); |
1021 | new_tuple_len = new_header_len + new_data_len; |
1022 | |
1023 | /* |
1024 | * Allocate and zero the space needed, and fill HeapTupleData fields. |
1025 | */ |
1026 | result_tuple = (HeapTuple) palloc0(HEAPTUPLESIZE + new_tuple_len); |
1027 | result_tuple->t_len = new_tuple_len; |
1028 | result_tuple->t_self = newtup->t_self; |
1029 | result_tuple->t_tableOid = newtup->t_tableOid; |
1030 | new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE); |
1031 | result_tuple->t_data = new_data; |
1032 | |
1033 | /* |
1034 | * Copy the existing tuple header, but adjust natts and t_hoff. |
1035 | */ |
1036 | memcpy(new_data, olddata, SizeofHeapTupleHeader); |
1037 | HeapTupleHeaderSetNatts(new_data, numAttrs); |
1038 | new_data->t_hoff = new_header_len; |
1039 | |
1040 | /* Copy over the data, and fill the null bitmap if needed */ |
1041 | heap_fill_tuple(tupleDesc, |
1042 | toast_values, |
1043 | toast_isnull, |
1044 | (char *) new_data + new_header_len, |
1045 | new_data_len, |
1046 | &(new_data->t_infomask), |
1047 | has_nulls ? new_data->t_bits : NULL); |
1048 | } |
1049 | else |
1050 | result_tuple = newtup; |
1051 | |
1052 | /* |
1053 | * Free allocated temp values |
1054 | */ |
1055 | if (need_free) |
1056 | for (i = 0; i < numAttrs; i++) |
1057 | if (toast_free[i]) |
1058 | pfree(DatumGetPointer(toast_values[i])); |
1059 | |
1060 | /* |
1061 | * Delete external values from the old tuple |
1062 | */ |
1063 | if (need_delold) |
1064 | for (i = 0; i < numAttrs; i++) |
1065 | if (toast_delold[i]) |
1066 | toast_delete_datum(rel, toast_oldvalues[i], false); |
1067 | |
1068 | return result_tuple; |
1069 | } |
1070 | |
1071 | |
1072 | /* ---------- |
1073 | * toast_flatten_tuple - |
1074 | * |
1075 | * "Flatten" a tuple to contain no out-of-line toasted fields. |
1076 | * (This does not eliminate compressed or short-header datums.) |
1077 | * |
1078 | * Note: we expect the caller already checked HeapTupleHasExternal(tup), |
1079 | * so there is no need for a short-circuit path. |
1080 | * ---------- |
1081 | */ |
1082 | HeapTuple |
1083 | toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc) |
1084 | { |
1085 | HeapTuple new_tuple; |
1086 | int numAttrs = tupleDesc->natts; |
1087 | int i; |
1088 | Datum toast_values[MaxTupleAttributeNumber]; |
1089 | bool toast_isnull[MaxTupleAttributeNumber]; |
1090 | bool toast_free[MaxTupleAttributeNumber]; |
1091 | |
1092 | /* |
1093 | * Break down the tuple into fields. |
1094 | */ |
1095 | Assert(numAttrs <= MaxTupleAttributeNumber); |
1096 | heap_deform_tuple(tup, tupleDesc, toast_values, toast_isnull); |
1097 | |
1098 | memset(toast_free, 0, numAttrs * sizeof(bool)); |
1099 | |
1100 | for (i = 0; i < numAttrs; i++) |
1101 | { |
1102 | /* |
1103 | * Look at non-null varlena attributes |
1104 | */ |
1105 | if (!toast_isnull[i] && TupleDescAttr(tupleDesc, i)->attlen == -1) |
1106 | { |
1107 | struct varlena *new_value; |
1108 | |
1109 | new_value = (struct varlena *) DatumGetPointer(toast_values[i]); |
1110 | if (VARATT_IS_EXTERNAL(new_value)) |
1111 | { |
1112 | new_value = heap_tuple_fetch_attr(new_value); |
1113 | toast_values[i] = PointerGetDatum(new_value); |
1114 | toast_free[i] = true; |
1115 | } |
1116 | } |
1117 | } |
1118 | |
1119 | /* |
1120 | * Form the reconfigured tuple. |
1121 | */ |
1122 | new_tuple = heap_form_tuple(tupleDesc, toast_values, toast_isnull); |
1123 | |
1124 | /* |
1125 | * Be sure to copy the tuple's identity fields. We also make a point of |
1126 | * copying visibility info, just in case anybody looks at those fields in |
1127 | * a syscache entry. |
1128 | */ |
1129 | new_tuple->t_self = tup->t_self; |
1130 | new_tuple->t_tableOid = tup->t_tableOid; |
1131 | |
1132 | new_tuple->t_data->t_choice = tup->t_data->t_choice; |
1133 | new_tuple->t_data->t_ctid = tup->t_data->t_ctid; |
1134 | new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK; |
1135 | new_tuple->t_data->t_infomask |= |
1136 | tup->t_data->t_infomask & HEAP_XACT_MASK; |
1137 | new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK; |
1138 | new_tuple->t_data->t_infomask2 |= |
1139 | tup->t_data->t_infomask2 & HEAP2_XACT_MASK; |
1140 | |
1141 | /* |
1142 | * Free allocated temp values |
1143 | */ |
1144 | for (i = 0; i < numAttrs; i++) |
1145 | if (toast_free[i]) |
1146 | pfree(DatumGetPointer(toast_values[i])); |
1147 | |
1148 | return new_tuple; |
1149 | } |
1150 | |
1151 | |
1152 | /* ---------- |
1153 | * toast_flatten_tuple_to_datum - |
1154 | * |
1155 | * "Flatten" a tuple containing out-of-line toasted fields into a Datum. |
1156 | * The result is always palloc'd in the current memory context. |
1157 | * |
1158 | * We have a general rule that Datums of container types (rows, arrays, |
1159 | * ranges, etc) must not contain any external TOAST pointers. Without |
1160 | * this rule, we'd have to look inside each Datum when preparing a tuple |
1161 | * for storage, which would be expensive and would fail to extend cleanly |
1162 | * to new sorts of container types. |
1163 | * |
1164 | * However, we don't want to say that tuples represented as HeapTuples |
1165 | * can't contain toasted fields, so instead this routine should be called |
1166 | * when such a HeapTuple is being converted into a Datum. |
1167 | * |
1168 | * While we're at it, we decompress any compressed fields too. This is not |
1169 | * necessary for correctness, but reflects an expectation that compression |
1170 | * will be more effective if applied to the whole tuple not individual |
1171 | * fields. We are not so concerned about that that we want to deconstruct |
1172 | * and reconstruct tuples just to get rid of compressed fields, however. |
1173 | * So callers typically won't call this unless they see that the tuple has |
1174 | * at least one external field. |
1175 | * |
1176 | * On the other hand, in-line short-header varlena fields are left alone. |
1177 | * If we "untoasted" them here, they'd just get changed back to short-header |
1178 | * format anyway within heap_fill_tuple. |
1179 | * ---------- |
1180 | */ |
1181 | Datum |
1182 | toast_flatten_tuple_to_datum(HeapTupleHeader tup, |
1183 | uint32 tup_len, |
1184 | TupleDesc tupleDesc) |
1185 | { |
1186 | HeapTupleHeader new_data; |
1187 | int32 ; |
1188 | int32 new_data_len; |
1189 | int32 new_tuple_len; |
1190 | HeapTupleData tmptup; |
1191 | int numAttrs = tupleDesc->natts; |
1192 | int i; |
1193 | bool has_nulls = false; |
1194 | Datum toast_values[MaxTupleAttributeNumber]; |
1195 | bool toast_isnull[MaxTupleAttributeNumber]; |
1196 | bool toast_free[MaxTupleAttributeNumber]; |
1197 | |
1198 | /* Build a temporary HeapTuple control structure */ |
1199 | tmptup.t_len = tup_len; |
1200 | ItemPointerSetInvalid(&(tmptup.t_self)); |
1201 | tmptup.t_tableOid = InvalidOid; |
1202 | tmptup.t_data = tup; |
1203 | |
1204 | /* |
1205 | * Break down the tuple into fields. |
1206 | */ |
1207 | Assert(numAttrs <= MaxTupleAttributeNumber); |
1208 | heap_deform_tuple(&tmptup, tupleDesc, toast_values, toast_isnull); |
1209 | |
1210 | memset(toast_free, 0, numAttrs * sizeof(bool)); |
1211 | |
1212 | for (i = 0; i < numAttrs; i++) |
1213 | { |
1214 | /* |
1215 | * Look at non-null varlena attributes |
1216 | */ |
1217 | if (toast_isnull[i]) |
1218 | has_nulls = true; |
1219 | else if (TupleDescAttr(tupleDesc, i)->attlen == -1) |
1220 | { |
1221 | struct varlena *new_value; |
1222 | |
1223 | new_value = (struct varlena *) DatumGetPointer(toast_values[i]); |
1224 | if (VARATT_IS_EXTERNAL(new_value) || |
1225 | VARATT_IS_COMPRESSED(new_value)) |
1226 | { |
1227 | new_value = heap_tuple_untoast_attr(new_value); |
1228 | toast_values[i] = PointerGetDatum(new_value); |
1229 | toast_free[i] = true; |
1230 | } |
1231 | } |
1232 | } |
1233 | |
1234 | /* |
1235 | * Calculate the new size of the tuple. |
1236 | * |
1237 | * This should match the reconstruction code in toast_insert_or_update. |
1238 | */ |
1239 | new_header_len = SizeofHeapTupleHeader; |
1240 | if (has_nulls) |
1241 | new_header_len += BITMAPLEN(numAttrs); |
1242 | new_header_len = MAXALIGN(new_header_len); |
1243 | new_data_len = heap_compute_data_size(tupleDesc, |
1244 | toast_values, toast_isnull); |
1245 | new_tuple_len = new_header_len + new_data_len; |
1246 | |
1247 | new_data = (HeapTupleHeader) palloc0(new_tuple_len); |
1248 | |
1249 | /* |
1250 | * Copy the existing tuple header, but adjust natts and t_hoff. |
1251 | */ |
1252 | memcpy(new_data, tup, SizeofHeapTupleHeader); |
1253 | HeapTupleHeaderSetNatts(new_data, numAttrs); |
1254 | new_data->t_hoff = new_header_len; |
1255 | |
1256 | /* Set the composite-Datum header fields correctly */ |
1257 | HeapTupleHeaderSetDatumLength(new_data, new_tuple_len); |
1258 | HeapTupleHeaderSetTypeId(new_data, tupleDesc->tdtypeid); |
1259 | HeapTupleHeaderSetTypMod(new_data, tupleDesc->tdtypmod); |
1260 | |
1261 | /* Copy over the data, and fill the null bitmap if needed */ |
1262 | heap_fill_tuple(tupleDesc, |
1263 | toast_values, |
1264 | toast_isnull, |
1265 | (char *) new_data + new_header_len, |
1266 | new_data_len, |
1267 | &(new_data->t_infomask), |
1268 | has_nulls ? new_data->t_bits : NULL); |
1269 | |
1270 | /* |
1271 | * Free allocated temp values |
1272 | */ |
1273 | for (i = 0; i < numAttrs; i++) |
1274 | if (toast_free[i]) |
1275 | pfree(DatumGetPointer(toast_values[i])); |
1276 | |
1277 | return PointerGetDatum(new_data); |
1278 | } |
1279 | |
1280 | |
1281 | /* ---------- |
1282 | * toast_build_flattened_tuple - |
1283 | * |
1284 | * Build a tuple containing no out-of-line toasted fields. |
1285 | * (This does not eliminate compressed or short-header datums.) |
1286 | * |
1287 | * This is essentially just like heap_form_tuple, except that it will |
1288 | * expand any external-data pointers beforehand. |
1289 | * |
1290 | * It's not very clear whether it would be preferable to decompress |
1291 | * in-line compressed datums while at it. For now, we don't. |
1292 | * ---------- |
1293 | */ |
1294 | HeapTuple |
1295 | toast_build_flattened_tuple(TupleDesc tupleDesc, |
1296 | Datum *values, |
1297 | bool *isnull) |
1298 | { |
1299 | HeapTuple new_tuple; |
1300 | int numAttrs = tupleDesc->natts; |
1301 | int num_to_free; |
1302 | int i; |
1303 | Datum new_values[MaxTupleAttributeNumber]; |
1304 | Pointer freeable_values[MaxTupleAttributeNumber]; |
1305 | |
1306 | /* |
1307 | * We can pass the caller's isnull array directly to heap_form_tuple, but |
1308 | * we potentially need to modify the values array. |
1309 | */ |
1310 | Assert(numAttrs <= MaxTupleAttributeNumber); |
1311 | memcpy(new_values, values, numAttrs * sizeof(Datum)); |
1312 | |
1313 | num_to_free = 0; |
1314 | for (i = 0; i < numAttrs; i++) |
1315 | { |
1316 | /* |
1317 | * Look at non-null varlena attributes |
1318 | */ |
1319 | if (!isnull[i] && TupleDescAttr(tupleDesc, i)->attlen == -1) |
1320 | { |
1321 | struct varlena *new_value; |
1322 | |
1323 | new_value = (struct varlena *) DatumGetPointer(new_values[i]); |
1324 | if (VARATT_IS_EXTERNAL(new_value)) |
1325 | { |
1326 | new_value = heap_tuple_fetch_attr(new_value); |
1327 | new_values[i] = PointerGetDatum(new_value); |
1328 | freeable_values[num_to_free++] = (Pointer) new_value; |
1329 | } |
1330 | } |
1331 | } |
1332 | |
1333 | /* |
1334 | * Form the reconfigured tuple. |
1335 | */ |
1336 | new_tuple = heap_form_tuple(tupleDesc, new_values, isnull); |
1337 | |
1338 | /* |
1339 | * Free allocated temp values |
1340 | */ |
1341 | for (i = 0; i < num_to_free; i++) |
1342 | pfree(freeable_values[i]); |
1343 | |
1344 | return new_tuple; |
1345 | } |
1346 | |
1347 | |
1348 | /* ---------- |
1349 | * toast_compress_datum - |
1350 | * |
1351 | * Create a compressed version of a varlena datum |
1352 | * |
1353 | * If we fail (ie, compressed result is actually bigger than original) |
1354 | * then return NULL. We must not use compressed data if it'd expand |
1355 | * the tuple! |
1356 | * |
1357 | * We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without |
1358 | * copying them. But we can't handle external or compressed datums. |
1359 | * ---------- |
1360 | */ |
1361 | Datum |
1362 | toast_compress_datum(Datum value) |
1363 | { |
1364 | struct varlena *tmp; |
1365 | int32 valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value)); |
1366 | int32 len; |
1367 | |
1368 | Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value))); |
1369 | Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value))); |
1370 | |
1371 | /* |
1372 | * No point in wasting a palloc cycle if value size is out of the allowed |
1373 | * range for compression |
1374 | */ |
1375 | if (valsize < PGLZ_strategy_default->min_input_size || |
1376 | valsize > PGLZ_strategy_default->max_input_size) |
1377 | return PointerGetDatum(NULL); |
1378 | |
1379 | tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) + |
1380 | TOAST_COMPRESS_HDRSZ); |
1381 | |
1382 | /* |
1383 | * We recheck the actual size even if pglz_compress() reports success, |
1384 | * because it might be satisfied with having saved as little as one byte |
1385 | * in the compressed data --- which could turn into a net loss once you |
1386 | * consider header and alignment padding. Worst case, the compressed |
1387 | * format might require three padding bytes (plus header, which is |
1388 | * included in VARSIZE(tmp)), whereas the uncompressed format would take |
1389 | * only one header byte and no padding if the value is short enough. So |
1390 | * we insist on a savings of more than 2 bytes to ensure we have a gain. |
1391 | */ |
1392 | len = pglz_compress(VARDATA_ANY(DatumGetPointer(value)), |
1393 | valsize, |
1394 | TOAST_COMPRESS_RAWDATA(tmp), |
1395 | PGLZ_strategy_default); |
1396 | if (len >= 0 && |
1397 | len + TOAST_COMPRESS_HDRSZ < valsize - 2) |
1398 | { |
1399 | TOAST_COMPRESS_SET_RAWSIZE(tmp, valsize); |
1400 | SET_VARSIZE_COMPRESSED(tmp, len + TOAST_COMPRESS_HDRSZ); |
1401 | /* successful compression */ |
1402 | return PointerGetDatum(tmp); |
1403 | } |
1404 | else |
1405 | { |
1406 | /* incompressible data */ |
1407 | pfree(tmp); |
1408 | return PointerGetDatum(NULL); |
1409 | } |
1410 | } |
1411 | |
1412 | |
1413 | /* ---------- |
1414 | * toast_get_valid_index |
1415 | * |
1416 | * Get OID of valid index associated to given toast relation. A toast |
1417 | * relation can have only one valid index at the same time. |
1418 | */ |
1419 | Oid |
1420 | toast_get_valid_index(Oid toastoid, LOCKMODE lock) |
1421 | { |
1422 | int num_indexes; |
1423 | int validIndex; |
1424 | Oid validIndexOid; |
1425 | Relation *toastidxs; |
1426 | Relation toastrel; |
1427 | |
1428 | /* Open the toast relation */ |
1429 | toastrel = table_open(toastoid, lock); |
1430 | |
1431 | /* Look for the valid index of the toast relation */ |
1432 | validIndex = toast_open_indexes(toastrel, |
1433 | lock, |
1434 | &toastidxs, |
1435 | &num_indexes); |
1436 | validIndexOid = RelationGetRelid(toastidxs[validIndex]); |
1437 | |
1438 | /* Close the toast relation and all its indexes */ |
1439 | toast_close_indexes(toastidxs, num_indexes, lock); |
1440 | table_close(toastrel, lock); |
1441 | |
1442 | return validIndexOid; |
1443 | } |
1444 | |
1445 | |
1446 | /* ---------- |
1447 | * toast_save_datum - |
1448 | * |
1449 | * Save one single datum into the secondary relation and return |
1450 | * a Datum reference for it. |
1451 | * |
1452 | * rel: the main relation we're working with (not the toast rel!) |
1453 | * value: datum to be pushed to toast storage |
1454 | * oldexternal: if not NULL, toast pointer previously representing the datum |
1455 | * options: options to be passed to heap_insert() for toast rows |
1456 | * ---------- |
1457 | */ |
1458 | static Datum |
1459 | toast_save_datum(Relation rel, Datum value, |
1460 | struct varlena *oldexternal, int options) |
1461 | { |
1462 | Relation toastrel; |
1463 | Relation *toastidxs; |
1464 | HeapTuple toasttup; |
1465 | TupleDesc toasttupDesc; |
1466 | Datum t_values[3]; |
1467 | bool t_isnull[3]; |
1468 | CommandId mycid = GetCurrentCommandId(true); |
1469 | struct varlena *result; |
1470 | struct varatt_external toast_pointer; |
1471 | union |
1472 | { |
1473 | struct varlena hdr; |
1474 | /* this is to make the union big enough for a chunk: */ |
1475 | char data[TOAST_MAX_CHUNK_SIZE + VARHDRSZ]; |
1476 | /* ensure union is aligned well enough: */ |
1477 | int32 align_it; |
1478 | } chunk_data; |
1479 | int32 chunk_size; |
1480 | int32 chunk_seq = 0; |
1481 | char *data_p; |
1482 | int32 data_todo; |
1483 | Pointer dval = DatumGetPointer(value); |
1484 | int num_indexes; |
1485 | int validIndex; |
1486 | |
1487 | Assert(!VARATT_IS_EXTERNAL(value)); |
1488 | |
1489 | /* |
1490 | * Open the toast relation and its indexes. We can use the index to check |
1491 | * uniqueness of the OID we assign to the toasted item, even though it has |
1492 | * additional columns besides OID. |
1493 | */ |
1494 | toastrel = table_open(rel->rd_rel->reltoastrelid, RowExclusiveLock); |
1495 | toasttupDesc = toastrel->rd_att; |
1496 | |
1497 | /* Open all the toast indexes and look for the valid one */ |
1498 | validIndex = toast_open_indexes(toastrel, |
1499 | RowExclusiveLock, |
1500 | &toastidxs, |
1501 | &num_indexes); |
1502 | |
1503 | /* |
1504 | * Get the data pointer and length, and compute va_rawsize and va_extsize. |
1505 | * |
1506 | * va_rawsize is the size of the equivalent fully uncompressed datum, so |
1507 | * we have to adjust for short headers. |
1508 | * |
1509 | * va_extsize is the actual size of the data payload in the toast records. |
1510 | */ |
1511 | if (VARATT_IS_SHORT(dval)) |
1512 | { |
1513 | data_p = VARDATA_SHORT(dval); |
1514 | data_todo = VARSIZE_SHORT(dval) - VARHDRSZ_SHORT; |
1515 | toast_pointer.va_rawsize = data_todo + VARHDRSZ; /* as if not short */ |
1516 | toast_pointer.va_extsize = data_todo; |
1517 | } |
1518 | else if (VARATT_IS_COMPRESSED(dval)) |
1519 | { |
1520 | data_p = VARDATA(dval); |
1521 | data_todo = VARSIZE(dval) - VARHDRSZ; |
1522 | /* rawsize in a compressed datum is just the size of the payload */ |
1523 | toast_pointer.va_rawsize = VARRAWSIZE_4B_C(dval) + VARHDRSZ; |
1524 | toast_pointer.va_extsize = data_todo; |
1525 | /* Assert that the numbers look like it's compressed */ |
1526 | Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); |
1527 | } |
1528 | else |
1529 | { |
1530 | data_p = VARDATA(dval); |
1531 | data_todo = VARSIZE(dval) - VARHDRSZ; |
1532 | toast_pointer.va_rawsize = VARSIZE(dval); |
1533 | toast_pointer.va_extsize = data_todo; |
1534 | } |
1535 | |
1536 | /* |
1537 | * Insert the correct table OID into the result TOAST pointer. |
1538 | * |
1539 | * Normally this is the actual OID of the target toast table, but during |
1540 | * table-rewriting operations such as CLUSTER, we have to insert the OID |
1541 | * of the table's real permanent toast table instead. rd_toastoid is set |
1542 | * if we have to substitute such an OID. |
1543 | */ |
1544 | if (OidIsValid(rel->rd_toastoid)) |
1545 | toast_pointer.va_toastrelid = rel->rd_toastoid; |
1546 | else |
1547 | toast_pointer.va_toastrelid = RelationGetRelid(toastrel); |
1548 | |
1549 | /* |
1550 | * Choose an OID to use as the value ID for this toast value. |
1551 | * |
1552 | * Normally we just choose an unused OID within the toast table. But |
1553 | * during table-rewriting operations where we are preserving an existing |
1554 | * toast table OID, we want to preserve toast value OIDs too. So, if |
1555 | * rd_toastoid is set and we had a prior external value from that same |
1556 | * toast table, re-use its value ID. If we didn't have a prior external |
1557 | * value (which is a corner case, but possible if the table's attstorage |
1558 | * options have been changed), we have to pick a value ID that doesn't |
1559 | * conflict with either new or existing toast value OIDs. |
1560 | */ |
1561 | if (!OidIsValid(rel->rd_toastoid)) |
1562 | { |
1563 | /* normal case: just choose an unused OID */ |
1564 | toast_pointer.va_valueid = |
1565 | GetNewOidWithIndex(toastrel, |
1566 | RelationGetRelid(toastidxs[validIndex]), |
1567 | (AttrNumber) 1); |
1568 | } |
1569 | else |
1570 | { |
1571 | /* rewrite case: check to see if value was in old toast table */ |
1572 | toast_pointer.va_valueid = InvalidOid; |
1573 | if (oldexternal != NULL) |
1574 | { |
1575 | struct varatt_external old_toast_pointer; |
1576 | |
1577 | Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal)); |
1578 | /* Must copy to access aligned fields */ |
1579 | VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal); |
1580 | if (old_toast_pointer.va_toastrelid == rel->rd_toastoid) |
1581 | { |
1582 | /* This value came from the old toast table; reuse its OID */ |
1583 | toast_pointer.va_valueid = old_toast_pointer.va_valueid; |
1584 | |
1585 | /* |
1586 | * There is a corner case here: the table rewrite might have |
1587 | * to copy both live and recently-dead versions of a row, and |
1588 | * those versions could easily reference the same toast value. |
1589 | * When we copy the second or later version of such a row, |
1590 | * reusing the OID will mean we select an OID that's already |
1591 | * in the new toast table. Check for that, and if so, just |
1592 | * fall through without writing the data again. |
1593 | * |
1594 | * While annoying and ugly-looking, this is a good thing |
1595 | * because it ensures that we wind up with only one copy of |
1596 | * the toast value when there is only one copy in the old |
1597 | * toast table. Before we detected this case, we'd have made |
1598 | * multiple copies, wasting space; and what's worse, the |
1599 | * copies belonging to already-deleted heap tuples would not |
1600 | * be reclaimed by VACUUM. |
1601 | */ |
1602 | if (toastrel_valueid_exists(toastrel, |
1603 | toast_pointer.va_valueid)) |
1604 | { |
1605 | /* Match, so short-circuit the data storage loop below */ |
1606 | data_todo = 0; |
1607 | } |
1608 | } |
1609 | } |
1610 | if (toast_pointer.va_valueid == InvalidOid) |
1611 | { |
1612 | /* |
1613 | * new value; must choose an OID that doesn't conflict in either |
1614 | * old or new toast table |
1615 | */ |
1616 | do |
1617 | { |
1618 | toast_pointer.va_valueid = |
1619 | GetNewOidWithIndex(toastrel, |
1620 | RelationGetRelid(toastidxs[validIndex]), |
1621 | (AttrNumber) 1); |
1622 | } while (toastid_valueid_exists(rel->rd_toastoid, |
1623 | toast_pointer.va_valueid)); |
1624 | } |
1625 | } |
1626 | |
1627 | /* |
1628 | * Initialize constant parts of the tuple data |
1629 | */ |
1630 | t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid); |
1631 | t_values[2] = PointerGetDatum(&chunk_data); |
1632 | t_isnull[0] = false; |
1633 | t_isnull[1] = false; |
1634 | t_isnull[2] = false; |
1635 | |
1636 | /* |
1637 | * Split up the item into chunks |
1638 | */ |
1639 | while (data_todo > 0) |
1640 | { |
1641 | int i; |
1642 | |
1643 | CHECK_FOR_INTERRUPTS(); |
1644 | |
1645 | /* |
1646 | * Calculate the size of this chunk |
1647 | */ |
1648 | chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo); |
1649 | |
1650 | /* |
1651 | * Build a tuple and store it |
1652 | */ |
1653 | t_values[1] = Int32GetDatum(chunk_seq++); |
1654 | SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ); |
1655 | memcpy(VARDATA(&chunk_data), data_p, chunk_size); |
1656 | toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull); |
1657 | |
1658 | heap_insert(toastrel, toasttup, mycid, options, NULL); |
1659 | |
1660 | /* |
1661 | * Create the index entry. We cheat a little here by not using |
1662 | * FormIndexDatum: this relies on the knowledge that the index columns |
1663 | * are the same as the initial columns of the table for all the |
1664 | * indexes. We also cheat by not providing an IndexInfo: this is okay |
1665 | * for now because btree doesn't need one, but we might have to be |
1666 | * more honest someday. |
1667 | * |
1668 | * Note also that there had better not be any user-created index on |
1669 | * the TOAST table, since we don't bother to update anything else. |
1670 | */ |
1671 | for (i = 0; i < num_indexes; i++) |
1672 | { |
1673 | /* Only index relations marked as ready can be updated */ |
1674 | if (toastidxs[i]->rd_index->indisready) |
1675 | index_insert(toastidxs[i], t_values, t_isnull, |
1676 | &(toasttup->t_self), |
1677 | toastrel, |
1678 | toastidxs[i]->rd_index->indisunique ? |
1679 | UNIQUE_CHECK_YES : UNIQUE_CHECK_NO, |
1680 | NULL); |
1681 | } |
1682 | |
1683 | /* |
1684 | * Free memory |
1685 | */ |
1686 | heap_freetuple(toasttup); |
1687 | |
1688 | /* |
1689 | * Move on to next chunk |
1690 | */ |
1691 | data_todo -= chunk_size; |
1692 | data_p += chunk_size; |
1693 | } |
1694 | |
1695 | /* |
1696 | * Done - close toast relation and its indexes |
1697 | */ |
1698 | toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock); |
1699 | table_close(toastrel, RowExclusiveLock); |
1700 | |
1701 | /* |
1702 | * Create the TOAST pointer value that we'll return |
1703 | */ |
1704 | result = (struct varlena *) palloc(TOAST_POINTER_SIZE); |
1705 | SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK); |
1706 | memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer)); |
1707 | |
1708 | return PointerGetDatum(result); |
1709 | } |
1710 | |
1711 | |
1712 | /* ---------- |
1713 | * toast_delete_datum - |
1714 | * |
1715 | * Delete a single external stored value. |
1716 | * ---------- |
1717 | */ |
1718 | static void |
1719 | toast_delete_datum(Relation rel, Datum value, bool is_speculative) |
1720 | { |
1721 | struct varlena *attr = (struct varlena *) DatumGetPointer(value); |
1722 | struct varatt_external toast_pointer; |
1723 | Relation toastrel; |
1724 | Relation *toastidxs; |
1725 | ScanKeyData toastkey; |
1726 | SysScanDesc toastscan; |
1727 | HeapTuple toasttup; |
1728 | int num_indexes; |
1729 | int validIndex; |
1730 | SnapshotData SnapshotToast; |
1731 | |
1732 | if (!VARATT_IS_EXTERNAL_ONDISK(attr)) |
1733 | return; |
1734 | |
1735 | /* Must copy to access aligned fields */ |
1736 | VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); |
1737 | |
1738 | /* |
1739 | * Open the toast relation and its indexes |
1740 | */ |
1741 | toastrel = table_open(toast_pointer.va_toastrelid, RowExclusiveLock); |
1742 | |
1743 | /* Fetch valid relation used for process */ |
1744 | validIndex = toast_open_indexes(toastrel, |
1745 | RowExclusiveLock, |
1746 | &toastidxs, |
1747 | &num_indexes); |
1748 | |
1749 | /* |
1750 | * Setup a scan key to find chunks with matching va_valueid |
1751 | */ |
1752 | ScanKeyInit(&toastkey, |
1753 | (AttrNumber) 1, |
1754 | BTEqualStrategyNumber, F_OIDEQ, |
1755 | ObjectIdGetDatum(toast_pointer.va_valueid)); |
1756 | |
1757 | /* |
1758 | * Find all the chunks. (We don't actually care whether we see them in |
1759 | * sequence or not, but since we've already locked the index we might as |
1760 | * well use systable_beginscan_ordered.) |
1761 | */ |
1762 | init_toast_snapshot(&SnapshotToast); |
1763 | toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex], |
1764 | &SnapshotToast, 1, &toastkey); |
1765 | while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL) |
1766 | { |
1767 | /* |
1768 | * Have a chunk, delete it |
1769 | */ |
1770 | if (is_speculative) |
1771 | heap_abort_speculative(toastrel, &toasttup->t_self); |
1772 | else |
1773 | simple_heap_delete(toastrel, &toasttup->t_self); |
1774 | } |
1775 | |
1776 | /* |
1777 | * End scan and close relations |
1778 | */ |
1779 | systable_endscan_ordered(toastscan); |
1780 | toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock); |
1781 | table_close(toastrel, RowExclusiveLock); |
1782 | } |
1783 | |
1784 | |
1785 | /* ---------- |
1786 | * toastrel_valueid_exists - |
1787 | * |
1788 | * Test whether a toast value with the given ID exists in the toast relation. |
1789 | * For safety, we consider a value to exist if there are either live or dead |
1790 | * toast rows with that ID; see notes for GetNewOidWithIndex(). |
1791 | * ---------- |
1792 | */ |
1793 | static bool |
1794 | toastrel_valueid_exists(Relation toastrel, Oid valueid) |
1795 | { |
1796 | bool result = false; |
1797 | ScanKeyData toastkey; |
1798 | SysScanDesc toastscan; |
1799 | int num_indexes; |
1800 | int validIndex; |
1801 | Relation *toastidxs; |
1802 | |
1803 | /* Fetch a valid index relation */ |
1804 | validIndex = toast_open_indexes(toastrel, |
1805 | RowExclusiveLock, |
1806 | &toastidxs, |
1807 | &num_indexes); |
1808 | |
1809 | /* |
1810 | * Setup a scan key to find chunks with matching va_valueid |
1811 | */ |
1812 | ScanKeyInit(&toastkey, |
1813 | (AttrNumber) 1, |
1814 | BTEqualStrategyNumber, F_OIDEQ, |
1815 | ObjectIdGetDatum(valueid)); |
1816 | |
1817 | /* |
1818 | * Is there any such chunk? |
1819 | */ |
1820 | toastscan = systable_beginscan(toastrel, |
1821 | RelationGetRelid(toastidxs[validIndex]), |
1822 | true, SnapshotAny, 1, &toastkey); |
1823 | |
1824 | if (systable_getnext(toastscan) != NULL) |
1825 | result = true; |
1826 | |
1827 | systable_endscan(toastscan); |
1828 | |
1829 | /* Clean up */ |
1830 | toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock); |
1831 | |
1832 | return result; |
1833 | } |
1834 | |
1835 | /* ---------- |
1836 | * toastid_valueid_exists - |
1837 | * |
1838 | * As above, but work from toast rel's OID not an open relation |
1839 | * ---------- |
1840 | */ |
1841 | static bool |
1842 | toastid_valueid_exists(Oid toastrelid, Oid valueid) |
1843 | { |
1844 | bool result; |
1845 | Relation toastrel; |
1846 | |
1847 | toastrel = table_open(toastrelid, AccessShareLock); |
1848 | |
1849 | result = toastrel_valueid_exists(toastrel, valueid); |
1850 | |
1851 | table_close(toastrel, AccessShareLock); |
1852 | |
1853 | return result; |
1854 | } |
1855 | |
1856 | |
1857 | /* ---------- |
1858 | * toast_fetch_datum - |
1859 | * |
1860 | * Reconstruct an in memory Datum from the chunks saved |
1861 | * in the toast relation |
1862 | * ---------- |
1863 | */ |
1864 | static struct varlena * |
1865 | toast_fetch_datum(struct varlena *attr) |
1866 | { |
1867 | Relation toastrel; |
1868 | Relation *toastidxs; |
1869 | ScanKeyData toastkey; |
1870 | SysScanDesc toastscan; |
1871 | HeapTuple ttup; |
1872 | TupleDesc toasttupDesc; |
1873 | struct varlena *result; |
1874 | struct varatt_external toast_pointer; |
1875 | int32 ressize; |
1876 | int32 residx, |
1877 | nextidx; |
1878 | int32 numchunks; |
1879 | Pointer chunk; |
1880 | bool isnull; |
1881 | char *chunkdata; |
1882 | int32 chunksize; |
1883 | int num_indexes; |
1884 | int validIndex; |
1885 | SnapshotData SnapshotToast; |
1886 | |
1887 | if (!VARATT_IS_EXTERNAL_ONDISK(attr)) |
1888 | elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums" ); |
1889 | |
1890 | /* Must copy to access aligned fields */ |
1891 | VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); |
1892 | |
1893 | ressize = toast_pointer.va_extsize; |
1894 | numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; |
1895 | |
1896 | result = (struct varlena *) palloc(ressize + VARHDRSZ); |
1897 | |
1898 | if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)) |
1899 | SET_VARSIZE_COMPRESSED(result, ressize + VARHDRSZ); |
1900 | else |
1901 | SET_VARSIZE(result, ressize + VARHDRSZ); |
1902 | |
1903 | /* |
1904 | * Open the toast relation and its indexes |
1905 | */ |
1906 | toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock); |
1907 | toasttupDesc = toastrel->rd_att; |
1908 | |
1909 | /* Look for the valid index of the toast relation */ |
1910 | validIndex = toast_open_indexes(toastrel, |
1911 | AccessShareLock, |
1912 | &toastidxs, |
1913 | &num_indexes); |
1914 | |
1915 | /* |
1916 | * Setup a scan key to fetch from the index by va_valueid |
1917 | */ |
1918 | ScanKeyInit(&toastkey, |
1919 | (AttrNumber) 1, |
1920 | BTEqualStrategyNumber, F_OIDEQ, |
1921 | ObjectIdGetDatum(toast_pointer.va_valueid)); |
1922 | |
1923 | /* |
1924 | * Read the chunks by index |
1925 | * |
1926 | * Note that because the index is actually on (valueid, chunkidx) we will |
1927 | * see the chunks in chunkidx order, even though we didn't explicitly ask |
1928 | * for it. |
1929 | */ |
1930 | nextidx = 0; |
1931 | |
1932 | init_toast_snapshot(&SnapshotToast); |
1933 | toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex], |
1934 | &SnapshotToast, 1, &toastkey); |
1935 | while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL) |
1936 | { |
1937 | /* |
1938 | * Have a chunk, extract the sequence number and the data |
1939 | */ |
1940 | residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull)); |
1941 | Assert(!isnull); |
1942 | chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); |
1943 | Assert(!isnull); |
1944 | if (!VARATT_IS_EXTENDED(chunk)) |
1945 | { |
1946 | chunksize = VARSIZE(chunk) - VARHDRSZ; |
1947 | chunkdata = VARDATA(chunk); |
1948 | } |
1949 | else if (VARATT_IS_SHORT(chunk)) |
1950 | { |
1951 | /* could happen due to heap_form_tuple doing its thing */ |
1952 | chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; |
1953 | chunkdata = VARDATA_SHORT(chunk); |
1954 | } |
1955 | else |
1956 | { |
1957 | /* should never happen */ |
1958 | elog(ERROR, "found toasted toast chunk for toast value %u in %s" , |
1959 | toast_pointer.va_valueid, |
1960 | RelationGetRelationName(toastrel)); |
1961 | chunksize = 0; /* keep compiler quiet */ |
1962 | chunkdata = NULL; |
1963 | } |
1964 | |
1965 | /* |
1966 | * Some checks on the data we've found |
1967 | */ |
1968 | if (residx != nextidx) |
1969 | elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s" , |
1970 | residx, nextidx, |
1971 | toast_pointer.va_valueid, |
1972 | RelationGetRelationName(toastrel)); |
1973 | if (residx < numchunks - 1) |
1974 | { |
1975 | if (chunksize != TOAST_MAX_CHUNK_SIZE) |
1976 | elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s" , |
1977 | chunksize, (int) TOAST_MAX_CHUNK_SIZE, |
1978 | residx, numchunks, |
1979 | toast_pointer.va_valueid, |
1980 | RelationGetRelationName(toastrel)); |
1981 | } |
1982 | else if (residx == numchunks - 1) |
1983 | { |
1984 | if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize) |
1985 | elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s" , |
1986 | chunksize, |
1987 | (int) (ressize - residx * TOAST_MAX_CHUNK_SIZE), |
1988 | residx, |
1989 | toast_pointer.va_valueid, |
1990 | RelationGetRelationName(toastrel)); |
1991 | } |
1992 | else |
1993 | elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s" , |
1994 | residx, |
1995 | 0, numchunks - 1, |
1996 | toast_pointer.va_valueid, |
1997 | RelationGetRelationName(toastrel)); |
1998 | |
1999 | /* |
2000 | * Copy the data into proper place in our result |
2001 | */ |
2002 | memcpy(VARDATA(result) + residx * TOAST_MAX_CHUNK_SIZE, |
2003 | chunkdata, |
2004 | chunksize); |
2005 | |
2006 | nextidx++; |
2007 | } |
2008 | |
2009 | /* |
2010 | * Final checks that we successfully fetched the datum |
2011 | */ |
2012 | if (nextidx != numchunks) |
2013 | elog(ERROR, "missing chunk number %d for toast value %u in %s" , |
2014 | nextidx, |
2015 | toast_pointer.va_valueid, |
2016 | RelationGetRelationName(toastrel)); |
2017 | |
2018 | /* |
2019 | * End scan and close relations |
2020 | */ |
2021 | systable_endscan_ordered(toastscan); |
2022 | toast_close_indexes(toastidxs, num_indexes, AccessShareLock); |
2023 | table_close(toastrel, AccessShareLock); |
2024 | |
2025 | return result; |
2026 | } |
2027 | |
2028 | /* ---------- |
2029 | * toast_fetch_datum_slice - |
2030 | * |
2031 | * Reconstruct a segment of a Datum from the chunks saved |
2032 | * in the toast relation |
2033 | * |
2034 | * Note that this function only supports non-compressed external datums. |
2035 | * ---------- |
2036 | */ |
2037 | static struct varlena * |
2038 | toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length) |
2039 | { |
2040 | Relation toastrel; |
2041 | Relation *toastidxs; |
2042 | ScanKeyData toastkey[3]; |
2043 | int nscankeys; |
2044 | SysScanDesc toastscan; |
2045 | HeapTuple ttup; |
2046 | TupleDesc toasttupDesc; |
2047 | struct varlena *result; |
2048 | struct varatt_external toast_pointer; |
2049 | int32 attrsize; |
2050 | int32 residx; |
2051 | int32 nextidx; |
2052 | int numchunks; |
2053 | int startchunk; |
2054 | int endchunk; |
2055 | int32 startoffset; |
2056 | int32 endoffset; |
2057 | int totalchunks; |
2058 | Pointer chunk; |
2059 | bool isnull; |
2060 | char *chunkdata; |
2061 | int32 chunksize; |
2062 | int32 chcpystrt; |
2063 | int32 chcpyend; |
2064 | int num_indexes; |
2065 | int validIndex; |
2066 | SnapshotData SnapshotToast; |
2067 | |
2068 | if (!VARATT_IS_EXTERNAL_ONDISK(attr)) |
2069 | elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums" ); |
2070 | |
2071 | /* Must copy to access aligned fields */ |
2072 | VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr); |
2073 | |
2074 | /* |
2075 | * It's nonsense to fetch slices of a compressed datum -- this isn't lo_* |
2076 | * we can't return a compressed datum which is meaningful to toast later |
2077 | */ |
2078 | Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); |
2079 | |
2080 | attrsize = toast_pointer.va_extsize; |
2081 | totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1; |
2082 | |
2083 | if (sliceoffset >= attrsize) |
2084 | { |
2085 | sliceoffset = 0; |
2086 | length = 0; |
2087 | } |
2088 | |
2089 | if (((sliceoffset + length) > attrsize) || length < 0) |
2090 | length = attrsize - sliceoffset; |
2091 | |
2092 | result = (struct varlena *) palloc(length + VARHDRSZ); |
2093 | |
2094 | SET_VARSIZE(result, length + VARHDRSZ); |
2095 | |
2096 | if (length == 0) |
2097 | return result; /* Can save a lot of work at this point! */ |
2098 | |
2099 | startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE; |
2100 | endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE; |
2101 | numchunks = (endchunk - startchunk) + 1; |
2102 | |
2103 | startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE; |
2104 | endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE; |
2105 | |
2106 | /* |
2107 | * Open the toast relation and its indexes |
2108 | */ |
2109 | toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock); |
2110 | toasttupDesc = toastrel->rd_att; |
2111 | |
2112 | /* Look for the valid index of toast relation */ |
2113 | validIndex = toast_open_indexes(toastrel, |
2114 | AccessShareLock, |
2115 | &toastidxs, |
2116 | &num_indexes); |
2117 | |
2118 | /* |
2119 | * Setup a scan key to fetch from the index. This is either two keys or |
2120 | * three depending on the number of chunks. |
2121 | */ |
2122 | ScanKeyInit(&toastkey[0], |
2123 | (AttrNumber) 1, |
2124 | BTEqualStrategyNumber, F_OIDEQ, |
2125 | ObjectIdGetDatum(toast_pointer.va_valueid)); |
2126 | |
2127 | /* |
2128 | * Use equality condition for one chunk, a range condition otherwise: |
2129 | */ |
2130 | if (numchunks == 1) |
2131 | { |
2132 | ScanKeyInit(&toastkey[1], |
2133 | (AttrNumber) 2, |
2134 | BTEqualStrategyNumber, F_INT4EQ, |
2135 | Int32GetDatum(startchunk)); |
2136 | nscankeys = 2; |
2137 | } |
2138 | else |
2139 | { |
2140 | ScanKeyInit(&toastkey[1], |
2141 | (AttrNumber) 2, |
2142 | BTGreaterEqualStrategyNumber, F_INT4GE, |
2143 | Int32GetDatum(startchunk)); |
2144 | ScanKeyInit(&toastkey[2], |
2145 | (AttrNumber) 2, |
2146 | BTLessEqualStrategyNumber, F_INT4LE, |
2147 | Int32GetDatum(endchunk)); |
2148 | nscankeys = 3; |
2149 | } |
2150 | |
2151 | /* |
2152 | * Read the chunks by index |
2153 | * |
2154 | * The index is on (valueid, chunkidx) so they will come in order |
2155 | */ |
2156 | init_toast_snapshot(&SnapshotToast); |
2157 | nextidx = startchunk; |
2158 | toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex], |
2159 | &SnapshotToast, nscankeys, toastkey); |
2160 | while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL) |
2161 | { |
2162 | /* |
2163 | * Have a chunk, extract the sequence number and the data |
2164 | */ |
2165 | residx = DatumGetInt32(fastgetattr(ttup, 2, toasttupDesc, &isnull)); |
2166 | Assert(!isnull); |
2167 | chunk = DatumGetPointer(fastgetattr(ttup, 3, toasttupDesc, &isnull)); |
2168 | Assert(!isnull); |
2169 | if (!VARATT_IS_EXTENDED(chunk)) |
2170 | { |
2171 | chunksize = VARSIZE(chunk) - VARHDRSZ; |
2172 | chunkdata = VARDATA(chunk); |
2173 | } |
2174 | else if (VARATT_IS_SHORT(chunk)) |
2175 | { |
2176 | /* could happen due to heap_form_tuple doing its thing */ |
2177 | chunksize = VARSIZE_SHORT(chunk) - VARHDRSZ_SHORT; |
2178 | chunkdata = VARDATA_SHORT(chunk); |
2179 | } |
2180 | else |
2181 | { |
2182 | /* should never happen */ |
2183 | elog(ERROR, "found toasted toast chunk for toast value %u in %s" , |
2184 | toast_pointer.va_valueid, |
2185 | RelationGetRelationName(toastrel)); |
2186 | chunksize = 0; /* keep compiler quiet */ |
2187 | chunkdata = NULL; |
2188 | } |
2189 | |
2190 | /* |
2191 | * Some checks on the data we've found |
2192 | */ |
2193 | if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk)) |
2194 | elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s" , |
2195 | residx, nextidx, |
2196 | toast_pointer.va_valueid, |
2197 | RelationGetRelationName(toastrel)); |
2198 | if (residx < totalchunks - 1) |
2199 | { |
2200 | if (chunksize != TOAST_MAX_CHUNK_SIZE) |
2201 | elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s when fetching slice" , |
2202 | chunksize, (int) TOAST_MAX_CHUNK_SIZE, |
2203 | residx, totalchunks, |
2204 | toast_pointer.va_valueid, |
2205 | RelationGetRelationName(toastrel)); |
2206 | } |
2207 | else if (residx == totalchunks - 1) |
2208 | { |
2209 | if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize) |
2210 | elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s when fetching slice" , |
2211 | chunksize, |
2212 | (int) (attrsize - residx * TOAST_MAX_CHUNK_SIZE), |
2213 | residx, |
2214 | toast_pointer.va_valueid, |
2215 | RelationGetRelationName(toastrel)); |
2216 | } |
2217 | else |
2218 | elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s" , |
2219 | residx, |
2220 | 0, totalchunks - 1, |
2221 | toast_pointer.va_valueid, |
2222 | RelationGetRelationName(toastrel)); |
2223 | |
2224 | /* |
2225 | * Copy the data into proper place in our result |
2226 | */ |
2227 | chcpystrt = 0; |
2228 | chcpyend = chunksize - 1; |
2229 | if (residx == startchunk) |
2230 | chcpystrt = startoffset; |
2231 | if (residx == endchunk) |
2232 | chcpyend = endoffset; |
2233 | |
2234 | memcpy(VARDATA(result) + |
2235 | (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt, |
2236 | chunkdata + chcpystrt, |
2237 | (chcpyend - chcpystrt) + 1); |
2238 | |
2239 | nextidx++; |
2240 | } |
2241 | |
2242 | /* |
2243 | * Final checks that we successfully fetched the datum |
2244 | */ |
2245 | if (nextidx != (endchunk + 1)) |
2246 | elog(ERROR, "missing chunk number %d for toast value %u in %s" , |
2247 | nextidx, |
2248 | toast_pointer.va_valueid, |
2249 | RelationGetRelationName(toastrel)); |
2250 | |
2251 | /* |
2252 | * End scan and close relations |
2253 | */ |
2254 | systable_endscan_ordered(toastscan); |
2255 | toast_close_indexes(toastidxs, num_indexes, AccessShareLock); |
2256 | table_close(toastrel, AccessShareLock); |
2257 | |
2258 | return result; |
2259 | } |
2260 | |
2261 | /* ---------- |
2262 | * toast_decompress_datum - |
2263 | * |
2264 | * Decompress a compressed version of a varlena datum |
2265 | */ |
2266 | static struct varlena * |
2267 | toast_decompress_datum(struct varlena *attr) |
2268 | { |
2269 | struct varlena *result; |
2270 | |
2271 | Assert(VARATT_IS_COMPRESSED(attr)); |
2272 | |
2273 | result = (struct varlena *) |
2274 | palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ); |
2275 | SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ); |
2276 | |
2277 | if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr), |
2278 | VARSIZE(attr) - TOAST_COMPRESS_HDRSZ, |
2279 | VARDATA(result), |
2280 | TOAST_COMPRESS_RAWSIZE(attr), true) < 0) |
2281 | elog(ERROR, "compressed data is corrupted" ); |
2282 | |
2283 | return result; |
2284 | } |
2285 | |
2286 | |
2287 | /* ---------- |
2288 | * toast_decompress_datum_slice - |
2289 | * |
2290 | * Decompress the front of a compressed version of a varlena datum. |
2291 | * offset handling happens in heap_tuple_untoast_attr_slice. |
2292 | * Here we just decompress a slice from the front. |
2293 | */ |
2294 | static struct varlena * |
2295 | toast_decompress_datum_slice(struct varlena *attr, int32 slicelength) |
2296 | { |
2297 | struct varlena *result; |
2298 | int32 rawsize; |
2299 | |
2300 | Assert(VARATT_IS_COMPRESSED(attr)); |
2301 | |
2302 | result = (struct varlena *) palloc(slicelength + VARHDRSZ); |
2303 | |
2304 | rawsize = pglz_decompress(TOAST_COMPRESS_RAWDATA(attr), |
2305 | VARSIZE(attr) - TOAST_COMPRESS_HDRSZ, |
2306 | VARDATA(result), |
2307 | slicelength, false); |
2308 | if (rawsize < 0) |
2309 | elog(ERROR, "compressed data is corrupted" ); |
2310 | |
2311 | SET_VARSIZE(result, rawsize + VARHDRSZ); |
2312 | return result; |
2313 | } |
2314 | |
2315 | |
2316 | /* ---------- |
2317 | * toast_open_indexes |
2318 | * |
2319 | * Get an array of the indexes associated to the given toast relation |
2320 | * and return as well the position of the valid index used by the toast |
2321 | * relation in this array. It is the responsibility of the caller of this |
2322 | * function to close the indexes as well as free them. |
2323 | */ |
2324 | static int |
2325 | toast_open_indexes(Relation toastrel, |
2326 | LOCKMODE lock, |
2327 | Relation **toastidxs, |
2328 | int *num_indexes) |
2329 | { |
2330 | int i = 0; |
2331 | int res = 0; |
2332 | bool found = false; |
2333 | List *indexlist; |
2334 | ListCell *lc; |
2335 | |
2336 | /* Get index list of the toast relation */ |
2337 | indexlist = RelationGetIndexList(toastrel); |
2338 | Assert(indexlist != NIL); |
2339 | |
2340 | *num_indexes = list_length(indexlist); |
2341 | |
2342 | /* Open all the index relations */ |
2343 | *toastidxs = (Relation *) palloc(*num_indexes * sizeof(Relation)); |
2344 | foreach(lc, indexlist) |
2345 | (*toastidxs)[i++] = index_open(lfirst_oid(lc), lock); |
2346 | |
2347 | /* Fetch the first valid index in list */ |
2348 | for (i = 0; i < *num_indexes; i++) |
2349 | { |
2350 | Relation toastidx = (*toastidxs)[i]; |
2351 | |
2352 | if (toastidx->rd_index->indisvalid) |
2353 | { |
2354 | res = i; |
2355 | found = true; |
2356 | break; |
2357 | } |
2358 | } |
2359 | |
2360 | /* |
2361 | * Free index list, not necessary anymore as relations are opened and a |
2362 | * valid index has been found. |
2363 | */ |
2364 | list_free(indexlist); |
2365 | |
2366 | /* |
2367 | * The toast relation should have one valid index, so something is going |
2368 | * wrong if there is nothing. |
2369 | */ |
2370 | if (!found) |
2371 | elog(ERROR, "no valid index found for toast relation with Oid %u" , |
2372 | RelationGetRelid(toastrel)); |
2373 | |
2374 | return res; |
2375 | } |
2376 | |
2377 | /* ---------- |
2378 | * toast_close_indexes |
2379 | * |
2380 | * Close an array of indexes for a toast relation and free it. This should |
2381 | * be called for a set of indexes opened previously with toast_open_indexes. |
2382 | */ |
2383 | static void |
2384 | toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock) |
2385 | { |
2386 | int i; |
2387 | |
2388 | /* Close relations and clean up things */ |
2389 | for (i = 0; i < num_indexes; i++) |
2390 | index_close(toastidxs[i], lock); |
2391 | pfree(toastidxs); |
2392 | } |
2393 | |
2394 | /* ---------- |
2395 | * init_toast_snapshot |
2396 | * |
2397 | * Initialize an appropriate TOAST snapshot. We must use an MVCC snapshot |
2398 | * to initialize the TOAST snapshot; since we don't know which one to use, |
2399 | * just use the oldest one. This is safe: at worst, we will get a "snapshot |
2400 | * too old" error that might have been avoided otherwise. |
2401 | */ |
2402 | static void |
2403 | init_toast_snapshot(Snapshot toast_snapshot) |
2404 | { |
2405 | Snapshot snapshot = GetOldestSnapshot(); |
2406 | |
2407 | if (snapshot == NULL) |
2408 | elog(ERROR, "no known snapshots" ); |
2409 | |
2410 | InitToastSnapshot(*toast_snapshot, snapshot->lsn, snapshot->whenTaken); |
2411 | } |
2412 | |