1/*
2 * brin_tuple.c
3 * Method implementations for tuples in BRIN indexes.
4 *
5 * Intended usage is that code outside this file only deals with
6 * BrinMemTuples, and convert to and from the on-disk representation through
7 * functions in this file.
8 *
9 * NOTES
10 *
11 * A BRIN tuple is similar to a heap tuple, with a few key differences. The
12 * first interesting difference is that the tuple header is much simpler, only
13 * containing its total length and a small area for flags. Also, the stored
14 * data does not match the relation tuple descriptor exactly: for each
15 * attribute in the descriptor, the index tuple carries an arbitrary number
16 * of values, depending on the opclass.
17 *
18 * Also, for each column of the index relation there are two null bits: one
19 * (hasnulls) stores whether any tuple within the page range has that column
20 * set to null; the other one (allnulls) stores whether the column values are
21 * all null. If allnulls is true, then the tuple data area does not contain
22 * values for that column at all; whereas it does if the hasnulls is set.
23 * Note the size of the null bitmask may not be the same as that of the
24 * datum array.
25 *
26 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
27 * Portions Copyright (c) 1994, Regents of the University of California
28 *
29 * IDENTIFICATION
30 * src/backend/access/brin/brin_tuple.c
31 */
32#include "postgres.h"
33
34#include "access/htup_details.h"
35#include "access/brin_tuple.h"
36#include "access/tupdesc.h"
37#include "access/tupmacs.h"
38#include "utils/datum.h"
39#include "utils/memutils.h"
40
41
42static inline void brin_deconstruct_tuple(BrinDesc *brdesc,
43 char *tp, bits8 *nullbits, bool nulls,
44 Datum *values, bool *allnulls, bool *hasnulls);
45
46
47/*
48 * Return a tuple descriptor used for on-disk storage of BRIN tuples.
49 */
50static TupleDesc
51brtuple_disk_tupdesc(BrinDesc *brdesc)
52{
53 /* We cache these in the BrinDesc */
54 if (brdesc->bd_disktdesc == NULL)
55 {
56 int i;
57 int j;
58 AttrNumber attno = 1;
59 TupleDesc tupdesc;
60 MemoryContext oldcxt;
61
62 /* make sure it's in the bdesc's context */
63 oldcxt = MemoryContextSwitchTo(brdesc->bd_context);
64
65 tupdesc = CreateTemplateTupleDesc(brdesc->bd_totalstored);
66
67 for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
68 {
69 for (j = 0; j < brdesc->bd_info[i]->oi_nstored; j++)
70 TupleDescInitEntry(tupdesc, attno++, NULL,
71 brdesc->bd_info[i]->oi_typcache[j]->type_id,
72 -1, 0);
73 }
74
75 MemoryContextSwitchTo(oldcxt);
76
77 brdesc->bd_disktdesc = tupdesc;
78 }
79
80 return brdesc->bd_disktdesc;
81}
82
83/*
84 * Generate a new on-disk tuple to be inserted in a BRIN index.
85 *
86 * See brin_form_placeholder_tuple if you touch this.
87 */
88BrinTuple *
89brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple,
90 Size *size)
91{
92 Datum *values;
93 bool *nulls;
94 bool anynulls = false;
95 BrinTuple *rettuple;
96 int keyno;
97 int idxattno;
98 uint16 phony_infomask = 0;
99 bits8 *phony_nullbitmap;
100 Size len,
101 hoff,
102 data_len;
103
104 Assert(brdesc->bd_totalstored > 0);
105
106 values = (Datum *) palloc(sizeof(Datum) * brdesc->bd_totalstored);
107 nulls = (bool *) palloc0(sizeof(bool) * brdesc->bd_totalstored);
108 phony_nullbitmap = (bits8 *)
109 palloc(sizeof(bits8) * BITMAPLEN(brdesc->bd_totalstored));
110
111 /*
112 * Set up the values/nulls arrays for heap_fill_tuple
113 */
114 idxattno = 0;
115 for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
116 {
117 int datumno;
118
119 /*
120 * "allnulls" is set when there's no nonnull value in any row in the
121 * column; when this happens, there is no data to store. Thus set the
122 * nullable bits for all data elements of this column and we're done.
123 */
124 if (tuple->bt_columns[keyno].bv_allnulls)
125 {
126 for (datumno = 0;
127 datumno < brdesc->bd_info[keyno]->oi_nstored;
128 datumno++)
129 nulls[idxattno++] = true;
130 anynulls = true;
131 continue;
132 }
133
134 /*
135 * The "hasnulls" bit is set when there are some null values in the
136 * data. We still need to store a real value, but the presence of
137 * this means we need a null bitmap.
138 */
139 if (tuple->bt_columns[keyno].bv_hasnulls)
140 anynulls = true;
141
142 for (datumno = 0;
143 datumno < brdesc->bd_info[keyno]->oi_nstored;
144 datumno++)
145 values[idxattno++] = tuple->bt_columns[keyno].bv_values[datumno];
146 }
147
148 /* Assert we did not overrun temp arrays */
149 Assert(idxattno <= brdesc->bd_totalstored);
150
151 /* compute total space needed */
152 len = SizeOfBrinTuple;
153 if (anynulls)
154 {
155 /*
156 * We need a double-length bitmap on an on-disk BRIN index tuple; the
157 * first half stores the "allnulls" bits, the second stores
158 * "hasnulls".
159 */
160 len += BITMAPLEN(brdesc->bd_tupdesc->natts * 2);
161 }
162
163 len = hoff = MAXALIGN(len);
164
165 data_len = heap_compute_data_size(brtuple_disk_tupdesc(brdesc),
166 values, nulls);
167 len += data_len;
168
169 len = MAXALIGN(len);
170
171 rettuple = palloc0(len);
172 rettuple->bt_blkno = blkno;
173 rettuple->bt_info = hoff;
174
175 /* Assert that hoff fits in the space available */
176 Assert((rettuple->bt_info & BRIN_OFFSET_MASK) == hoff);
177
178 /*
179 * The infomask and null bitmap as computed by heap_fill_tuple are useless
180 * to us. However, that function will not accept a null infomask; and we
181 * need to pass a valid null bitmap so that it will correctly skip
182 * outputting null attributes in the data area.
183 */
184 heap_fill_tuple(brtuple_disk_tupdesc(brdesc),
185 values,
186 nulls,
187 (char *) rettuple + hoff,
188 data_len,
189 &phony_infomask,
190 phony_nullbitmap);
191
192 /* done with these */
193 pfree(values);
194 pfree(nulls);
195 pfree(phony_nullbitmap);
196
197 /*
198 * Now fill in the real null bitmasks. allnulls first.
199 */
200 if (anynulls)
201 {
202 bits8 *bitP;
203 int bitmask;
204
205 rettuple->bt_info |= BRIN_NULLS_MASK;
206
207 /*
208 * Note that we reverse the sense of null bits in this module: we
209 * store a 1 for a null attribute rather than a 0. So we must reverse
210 * the sense of the att_isnull test in brin_deconstruct_tuple as well.
211 */
212 bitP = ((bits8 *) ((char *) rettuple + SizeOfBrinTuple)) - 1;
213 bitmask = HIGHBIT;
214 for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
215 {
216 if (bitmask != HIGHBIT)
217 bitmask <<= 1;
218 else
219 {
220 bitP += 1;
221 *bitP = 0x0;
222 bitmask = 1;
223 }
224
225 if (!tuple->bt_columns[keyno].bv_allnulls)
226 continue;
227
228 *bitP |= bitmask;
229 }
230 /* hasnulls bits follow */
231 for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
232 {
233 if (bitmask != HIGHBIT)
234 bitmask <<= 1;
235 else
236 {
237 bitP += 1;
238 *bitP = 0x0;
239 bitmask = 1;
240 }
241
242 if (!tuple->bt_columns[keyno].bv_hasnulls)
243 continue;
244
245 *bitP |= bitmask;
246 }
247 bitP = ((bits8 *) (rettuple + SizeOfBrinTuple)) - 1;
248 }
249
250 if (tuple->bt_placeholder)
251 rettuple->bt_info |= BRIN_PLACEHOLDER_MASK;
252
253 *size = len;
254 return rettuple;
255}
256
257/*
258 * Generate a new on-disk tuple with no data values, marked as placeholder.
259 *
260 * This is a cut-down version of brin_form_tuple.
261 */
262BrinTuple *
263brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size)
264{
265 Size len;
266 Size hoff;
267 BrinTuple *rettuple;
268 int keyno;
269 bits8 *bitP;
270 int bitmask;
271
272 /* compute total space needed: always add nulls */
273 len = SizeOfBrinTuple;
274 len += BITMAPLEN(brdesc->bd_tupdesc->natts * 2);
275 len = hoff = MAXALIGN(len);
276
277 rettuple = palloc0(len);
278 rettuple->bt_blkno = blkno;
279 rettuple->bt_info = hoff;
280 rettuple->bt_info |= BRIN_NULLS_MASK | BRIN_PLACEHOLDER_MASK;
281
282 bitP = ((bits8 *) ((char *) rettuple + SizeOfBrinTuple)) - 1;
283 bitmask = HIGHBIT;
284 /* set allnulls true for all attributes */
285 for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
286 {
287 if (bitmask != HIGHBIT)
288 bitmask <<= 1;
289 else
290 {
291 bitP += 1;
292 *bitP = 0x0;
293 bitmask = 1;
294 }
295
296 *bitP |= bitmask;
297 }
298 /* no need to set hasnulls */
299
300 *size = len;
301 return rettuple;
302}
303
304/*
305 * Free a tuple created by brin_form_tuple
306 */
307void
308brin_free_tuple(BrinTuple *tuple)
309{
310 pfree(tuple);
311}
312
313/*
314 * Given a brin tuple of size len, create a copy of it. If 'dest' is not
315 * NULL, its size is destsz, and can be used as output buffer; if the tuple
316 * to be copied does not fit, it is enlarged by repalloc, and the size is
317 * updated to match. This avoids palloc/free cycles when many brin tuples
318 * are being processed in loops.
319 */
320BrinTuple *
321brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz)
322{
323 if (!destsz || *destsz == 0)
324 dest = palloc(len);
325 else if (len > *destsz)
326 {
327 dest = repalloc(dest, len);
328 *destsz = len;
329 }
330
331 memcpy(dest, tuple, len);
332
333 return dest;
334}
335
336/*
337 * Return whether two BrinTuples are bitwise identical.
338 */
339bool
340brin_tuples_equal(const BrinTuple *a, Size alen, const BrinTuple *b, Size blen)
341{
342 if (alen != blen)
343 return false;
344 if (memcmp(a, b, alen) != 0)
345 return false;
346 return true;
347}
348
349/*
350 * Create a new BrinMemTuple from scratch, and initialize it to an empty
351 * state.
352 *
353 * Note: we don't provide any means to free a deformed tuple, so make sure to
354 * use a temporary memory context.
355 */
356BrinMemTuple *
357brin_new_memtuple(BrinDesc *brdesc)
358{
359 BrinMemTuple *dtup;
360 long basesize;
361
362 basesize = MAXALIGN(sizeof(BrinMemTuple) +
363 sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
364 dtup = palloc0(basesize + sizeof(Datum) * brdesc->bd_totalstored);
365
366 dtup->bt_values = palloc(sizeof(Datum) * brdesc->bd_totalstored);
367 dtup->bt_allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
368 dtup->bt_hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts);
369
370 dtup->bt_context = AllocSetContextCreate(CurrentMemoryContext,
371 "brin dtuple",
372 ALLOCSET_DEFAULT_SIZES);
373
374 brin_memtuple_initialize(dtup, brdesc);
375
376 return dtup;
377}
378
379/*
380 * Reset a BrinMemTuple to initial state. We return the same tuple, for
381 * notational convenience.
382 */
383BrinMemTuple *
384brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc)
385{
386 int i;
387 char *currdatum;
388
389 MemoryContextReset(dtuple->bt_context);
390
391 currdatum = (char *) dtuple +
392 MAXALIGN(sizeof(BrinMemTuple) +
393 sizeof(BrinValues) * brdesc->bd_tupdesc->natts);
394 for (i = 0; i < brdesc->bd_tupdesc->natts; i++)
395 {
396 dtuple->bt_columns[i].bv_allnulls = true;
397 dtuple->bt_columns[i].bv_hasnulls = false;
398
399 dtuple->bt_columns[i].bv_attno = i + 1;
400 dtuple->bt_columns[i].bv_allnulls = true;
401 dtuple->bt_columns[i].bv_hasnulls = false;
402 dtuple->bt_columns[i].bv_values = (Datum *) currdatum;
403 currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored;
404 }
405
406 return dtuple;
407}
408
409/*
410 * Convert a BrinTuple back to a BrinMemTuple. This is the reverse of
411 * brin_form_tuple.
412 *
413 * As an optimization, the caller can pass a previously allocated 'dMemtuple'.
414 * This avoids having to allocate it here, which can be useful when this
415 * function is called many times in a loop. It is caller's responsibility
416 * that the given BrinMemTuple matches what we need here.
417 *
418 * Note we don't need the "on disk tupdesc" here; we rely on our own routine to
419 * deconstruct the tuple from the on-disk format.
420 */
421BrinMemTuple *
422brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple)
423{
424 BrinMemTuple *dtup;
425 Datum *values;
426 bool *allnulls;
427 bool *hasnulls;
428 char *tp;
429 bits8 *nullbits;
430 int keyno;
431 int valueno;
432 MemoryContext oldcxt;
433
434 dtup = dMemtuple ? brin_memtuple_initialize(dMemtuple, brdesc) :
435 brin_new_memtuple(brdesc);
436
437 if (BrinTupleIsPlaceholder(tuple))
438 dtup->bt_placeholder = true;
439 dtup->bt_blkno = tuple->bt_blkno;
440
441 values = dtup->bt_values;
442 allnulls = dtup->bt_allnulls;
443 hasnulls = dtup->bt_hasnulls;
444
445 tp = (char *) tuple + BrinTupleDataOffset(tuple);
446
447 if (BrinTupleHasNulls(tuple))
448 nullbits = (bits8 *) ((char *) tuple + SizeOfBrinTuple);
449 else
450 nullbits = NULL;
451 brin_deconstruct_tuple(brdesc,
452 tp, nullbits, BrinTupleHasNulls(tuple),
453 values, allnulls, hasnulls);
454
455 /*
456 * Iterate to assign each of the values to the corresponding item in the
457 * values array of each column. The copies occur in the tuple's context.
458 */
459 oldcxt = MemoryContextSwitchTo(dtup->bt_context);
460 for (valueno = 0, keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++)
461 {
462 int i;
463
464 if (allnulls[keyno])
465 {
466 valueno += brdesc->bd_info[keyno]->oi_nstored;
467 continue;
468 }
469
470 /*
471 * We would like to skip datumCopy'ing the values datum in some cases,
472 * caller permitting ...
473 */
474 for (i = 0; i < brdesc->bd_info[keyno]->oi_nstored; i++)
475 dtup->bt_columns[keyno].bv_values[i] =
476 datumCopy(values[valueno++],
477 brdesc->bd_info[keyno]->oi_typcache[i]->typbyval,
478 brdesc->bd_info[keyno]->oi_typcache[i]->typlen);
479
480 dtup->bt_columns[keyno].bv_hasnulls = hasnulls[keyno];
481 dtup->bt_columns[keyno].bv_allnulls = false;
482 }
483
484 MemoryContextSwitchTo(oldcxt);
485
486 return dtup;
487}
488
489/*
490 * brin_deconstruct_tuple
491 * Guts of attribute extraction from an on-disk BRIN tuple.
492 *
493 * Its arguments are:
494 * brdesc BRIN descriptor for the stored tuple
495 * tp pointer to the tuple data area
496 * nullbits pointer to the tuple nulls bitmask
497 * nulls "has nulls" bit in tuple infomask
498 * values output values, array of size brdesc->bd_totalstored
499 * allnulls output "allnulls", size brdesc->bd_tupdesc->natts
500 * hasnulls output "hasnulls", size brdesc->bd_tupdesc->natts
501 *
502 * Output arrays must have been allocated by caller.
503 */
504static inline void
505brin_deconstruct_tuple(BrinDesc *brdesc,
506 char *tp, bits8 *nullbits, bool nulls,
507 Datum *values, bool *allnulls, bool *hasnulls)
508{
509 int attnum;
510 int stored;
511 TupleDesc diskdsc;
512 long off;
513
514 /*
515 * First iterate to natts to obtain both null flags for each attribute.
516 * Note that we reverse the sense of the att_isnull test, because we store
517 * 1 for a null value (rather than a 1 for a not null value as is the
518 * att_isnull convention used elsewhere.) See brin_form_tuple.
519 */
520 for (attnum = 0; attnum < brdesc->bd_tupdesc->natts; attnum++)
521 {
522 /*
523 * the "all nulls" bit means that all values in the page range for
524 * this column are nulls. Therefore there are no values in the tuple
525 * data area.
526 */
527 allnulls[attnum] = nulls && !att_isnull(attnum, nullbits);
528
529 /*
530 * the "has nulls" bit means that some tuples have nulls, but others
531 * have not-null values. Therefore we know the tuple contains data
532 * for this column.
533 *
534 * The hasnulls bits follow the allnulls bits in the same bitmask.
535 */
536 hasnulls[attnum] =
537 nulls && !att_isnull(brdesc->bd_tupdesc->natts + attnum, nullbits);
538 }
539
540 /*
541 * Iterate to obtain each attribute's stored values. Note that since we
542 * may reuse attribute entries for more than one column, we cannot cache
543 * offsets here.
544 */
545 diskdsc = brtuple_disk_tupdesc(brdesc);
546 stored = 0;
547 off = 0;
548 for (attnum = 0; attnum < brdesc->bd_tupdesc->natts; attnum++)
549 {
550 int datumno;
551
552 if (allnulls[attnum])
553 {
554 stored += brdesc->bd_info[attnum]->oi_nstored;
555 continue;
556 }
557
558 for (datumno = 0;
559 datumno < brdesc->bd_info[attnum]->oi_nstored;
560 datumno++)
561 {
562 Form_pg_attribute thisatt = TupleDescAttr(diskdsc, stored);
563
564 if (thisatt->attlen == -1)
565 {
566 off = att_align_pointer(off, thisatt->attalign, -1,
567 tp + off);
568 }
569 else
570 {
571 /* not varlena, so safe to use att_align_nominal */
572 off = att_align_nominal(off, thisatt->attalign);
573 }
574
575 values[stored++] = fetchatt(thisatt, tp + off);
576
577 off = att_addlength_pointer(off, thisatt->attlen, tp + off);
578 }
579 }
580}
581