1 | /* |
2 | * brin_tuple.c |
3 | * Method implementations for tuples in BRIN indexes. |
4 | * |
5 | * Intended usage is that code outside this file only deals with |
6 | * BrinMemTuples, and convert to and from the on-disk representation through |
7 | * functions in this file. |
8 | * |
9 | * NOTES |
10 | * |
11 | * A BRIN tuple is similar to a heap tuple, with a few key differences. The |
12 | * first interesting difference is that the tuple header is much simpler, only |
13 | * containing its total length and a small area for flags. Also, the stored |
14 | * data does not match the relation tuple descriptor exactly: for each |
15 | * attribute in the descriptor, the index tuple carries an arbitrary number |
16 | * of values, depending on the opclass. |
17 | * |
18 | * Also, for each column of the index relation there are two null bits: one |
19 | * (hasnulls) stores whether any tuple within the page range has that column |
20 | * set to null; the other one (allnulls) stores whether the column values are |
21 | * all null. If allnulls is true, then the tuple data area does not contain |
22 | * values for that column at all; whereas it does if the hasnulls is set. |
23 | * Note the size of the null bitmask may not be the same as that of the |
24 | * datum array. |
25 | * |
26 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
27 | * Portions Copyright (c) 1994, Regents of the University of California |
28 | * |
29 | * IDENTIFICATION |
30 | * src/backend/access/brin/brin_tuple.c |
31 | */ |
32 | #include "postgres.h" |
33 | |
34 | #include "access/htup_details.h" |
35 | #include "access/brin_tuple.h" |
36 | #include "access/tupdesc.h" |
37 | #include "access/tupmacs.h" |
38 | #include "utils/datum.h" |
39 | #include "utils/memutils.h" |
40 | |
41 | |
42 | static inline void brin_deconstruct_tuple(BrinDesc *brdesc, |
43 | char *tp, bits8 *nullbits, bool nulls, |
44 | Datum *values, bool *allnulls, bool *hasnulls); |
45 | |
46 | |
47 | /* |
48 | * Return a tuple descriptor used for on-disk storage of BRIN tuples. |
49 | */ |
50 | static TupleDesc |
51 | brtuple_disk_tupdesc(BrinDesc *brdesc) |
52 | { |
53 | /* We cache these in the BrinDesc */ |
54 | if (brdesc->bd_disktdesc == NULL) |
55 | { |
56 | int i; |
57 | int j; |
58 | AttrNumber attno = 1; |
59 | TupleDesc tupdesc; |
60 | MemoryContext oldcxt; |
61 | |
62 | /* make sure it's in the bdesc's context */ |
63 | oldcxt = MemoryContextSwitchTo(brdesc->bd_context); |
64 | |
65 | tupdesc = CreateTemplateTupleDesc(brdesc->bd_totalstored); |
66 | |
67 | for (i = 0; i < brdesc->bd_tupdesc->natts; i++) |
68 | { |
69 | for (j = 0; j < brdesc->bd_info[i]->oi_nstored; j++) |
70 | TupleDescInitEntry(tupdesc, attno++, NULL, |
71 | brdesc->bd_info[i]->oi_typcache[j]->type_id, |
72 | -1, 0); |
73 | } |
74 | |
75 | MemoryContextSwitchTo(oldcxt); |
76 | |
77 | brdesc->bd_disktdesc = tupdesc; |
78 | } |
79 | |
80 | return brdesc->bd_disktdesc; |
81 | } |
82 | |
83 | /* |
84 | * Generate a new on-disk tuple to be inserted in a BRIN index. |
85 | * |
86 | * See brin_form_placeholder_tuple if you touch this. |
87 | */ |
88 | BrinTuple * |
89 | brin_form_tuple(BrinDesc *brdesc, BlockNumber blkno, BrinMemTuple *tuple, |
90 | Size *size) |
91 | { |
92 | Datum *values; |
93 | bool *nulls; |
94 | bool anynulls = false; |
95 | BrinTuple *rettuple; |
96 | int keyno; |
97 | int idxattno; |
98 | uint16 phony_infomask = 0; |
99 | bits8 *phony_nullbitmap; |
100 | Size len, |
101 | hoff, |
102 | data_len; |
103 | |
104 | Assert(brdesc->bd_totalstored > 0); |
105 | |
106 | values = (Datum *) palloc(sizeof(Datum) * brdesc->bd_totalstored); |
107 | nulls = (bool *) palloc0(sizeof(bool) * brdesc->bd_totalstored); |
108 | phony_nullbitmap = (bits8 *) |
109 | palloc(sizeof(bits8) * BITMAPLEN(brdesc->bd_totalstored)); |
110 | |
111 | /* |
112 | * Set up the values/nulls arrays for heap_fill_tuple |
113 | */ |
114 | idxattno = 0; |
115 | for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++) |
116 | { |
117 | int datumno; |
118 | |
119 | /* |
120 | * "allnulls" is set when there's no nonnull value in any row in the |
121 | * column; when this happens, there is no data to store. Thus set the |
122 | * nullable bits for all data elements of this column and we're done. |
123 | */ |
124 | if (tuple->bt_columns[keyno].bv_allnulls) |
125 | { |
126 | for (datumno = 0; |
127 | datumno < brdesc->bd_info[keyno]->oi_nstored; |
128 | datumno++) |
129 | nulls[idxattno++] = true; |
130 | anynulls = true; |
131 | continue; |
132 | } |
133 | |
134 | /* |
135 | * The "hasnulls" bit is set when there are some null values in the |
136 | * data. We still need to store a real value, but the presence of |
137 | * this means we need a null bitmap. |
138 | */ |
139 | if (tuple->bt_columns[keyno].bv_hasnulls) |
140 | anynulls = true; |
141 | |
142 | for (datumno = 0; |
143 | datumno < brdesc->bd_info[keyno]->oi_nstored; |
144 | datumno++) |
145 | values[idxattno++] = tuple->bt_columns[keyno].bv_values[datumno]; |
146 | } |
147 | |
148 | /* Assert we did not overrun temp arrays */ |
149 | Assert(idxattno <= brdesc->bd_totalstored); |
150 | |
151 | /* compute total space needed */ |
152 | len = SizeOfBrinTuple; |
153 | if (anynulls) |
154 | { |
155 | /* |
156 | * We need a double-length bitmap on an on-disk BRIN index tuple; the |
157 | * first half stores the "allnulls" bits, the second stores |
158 | * "hasnulls". |
159 | */ |
160 | len += BITMAPLEN(brdesc->bd_tupdesc->natts * 2); |
161 | } |
162 | |
163 | len = hoff = MAXALIGN(len); |
164 | |
165 | data_len = heap_compute_data_size(brtuple_disk_tupdesc(brdesc), |
166 | values, nulls); |
167 | len += data_len; |
168 | |
169 | len = MAXALIGN(len); |
170 | |
171 | rettuple = palloc0(len); |
172 | rettuple->bt_blkno = blkno; |
173 | rettuple->bt_info = hoff; |
174 | |
175 | /* Assert that hoff fits in the space available */ |
176 | Assert((rettuple->bt_info & BRIN_OFFSET_MASK) == hoff); |
177 | |
178 | /* |
179 | * The infomask and null bitmap as computed by heap_fill_tuple are useless |
180 | * to us. However, that function will not accept a null infomask; and we |
181 | * need to pass a valid null bitmap so that it will correctly skip |
182 | * outputting null attributes in the data area. |
183 | */ |
184 | heap_fill_tuple(brtuple_disk_tupdesc(brdesc), |
185 | values, |
186 | nulls, |
187 | (char *) rettuple + hoff, |
188 | data_len, |
189 | &phony_infomask, |
190 | phony_nullbitmap); |
191 | |
192 | /* done with these */ |
193 | pfree(values); |
194 | pfree(nulls); |
195 | pfree(phony_nullbitmap); |
196 | |
197 | /* |
198 | * Now fill in the real null bitmasks. allnulls first. |
199 | */ |
200 | if (anynulls) |
201 | { |
202 | bits8 *bitP; |
203 | int bitmask; |
204 | |
205 | rettuple->bt_info |= BRIN_NULLS_MASK; |
206 | |
207 | /* |
208 | * Note that we reverse the sense of null bits in this module: we |
209 | * store a 1 for a null attribute rather than a 0. So we must reverse |
210 | * the sense of the att_isnull test in brin_deconstruct_tuple as well. |
211 | */ |
212 | bitP = ((bits8 *) ((char *) rettuple + SizeOfBrinTuple)) - 1; |
213 | bitmask = HIGHBIT; |
214 | for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++) |
215 | { |
216 | if (bitmask != HIGHBIT) |
217 | bitmask <<= 1; |
218 | else |
219 | { |
220 | bitP += 1; |
221 | *bitP = 0x0; |
222 | bitmask = 1; |
223 | } |
224 | |
225 | if (!tuple->bt_columns[keyno].bv_allnulls) |
226 | continue; |
227 | |
228 | *bitP |= bitmask; |
229 | } |
230 | /* hasnulls bits follow */ |
231 | for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++) |
232 | { |
233 | if (bitmask != HIGHBIT) |
234 | bitmask <<= 1; |
235 | else |
236 | { |
237 | bitP += 1; |
238 | *bitP = 0x0; |
239 | bitmask = 1; |
240 | } |
241 | |
242 | if (!tuple->bt_columns[keyno].bv_hasnulls) |
243 | continue; |
244 | |
245 | *bitP |= bitmask; |
246 | } |
247 | bitP = ((bits8 *) (rettuple + SizeOfBrinTuple)) - 1; |
248 | } |
249 | |
250 | if (tuple->bt_placeholder) |
251 | rettuple->bt_info |= BRIN_PLACEHOLDER_MASK; |
252 | |
253 | *size = len; |
254 | return rettuple; |
255 | } |
256 | |
257 | /* |
258 | * Generate a new on-disk tuple with no data values, marked as placeholder. |
259 | * |
260 | * This is a cut-down version of brin_form_tuple. |
261 | */ |
262 | BrinTuple * |
263 | brin_form_placeholder_tuple(BrinDesc *brdesc, BlockNumber blkno, Size *size) |
264 | { |
265 | Size len; |
266 | Size hoff; |
267 | BrinTuple *rettuple; |
268 | int keyno; |
269 | bits8 *bitP; |
270 | int bitmask; |
271 | |
272 | /* compute total space needed: always add nulls */ |
273 | len = SizeOfBrinTuple; |
274 | len += BITMAPLEN(brdesc->bd_tupdesc->natts * 2); |
275 | len = hoff = MAXALIGN(len); |
276 | |
277 | rettuple = palloc0(len); |
278 | rettuple->bt_blkno = blkno; |
279 | rettuple->bt_info = hoff; |
280 | rettuple->bt_info |= BRIN_NULLS_MASK | BRIN_PLACEHOLDER_MASK; |
281 | |
282 | bitP = ((bits8 *) ((char *) rettuple + SizeOfBrinTuple)) - 1; |
283 | bitmask = HIGHBIT; |
284 | /* set allnulls true for all attributes */ |
285 | for (keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++) |
286 | { |
287 | if (bitmask != HIGHBIT) |
288 | bitmask <<= 1; |
289 | else |
290 | { |
291 | bitP += 1; |
292 | *bitP = 0x0; |
293 | bitmask = 1; |
294 | } |
295 | |
296 | *bitP |= bitmask; |
297 | } |
298 | /* no need to set hasnulls */ |
299 | |
300 | *size = len; |
301 | return rettuple; |
302 | } |
303 | |
304 | /* |
305 | * Free a tuple created by brin_form_tuple |
306 | */ |
307 | void |
308 | brin_free_tuple(BrinTuple *tuple) |
309 | { |
310 | pfree(tuple); |
311 | } |
312 | |
313 | /* |
314 | * Given a brin tuple of size len, create a copy of it. If 'dest' is not |
315 | * NULL, its size is destsz, and can be used as output buffer; if the tuple |
316 | * to be copied does not fit, it is enlarged by repalloc, and the size is |
317 | * updated to match. This avoids palloc/free cycles when many brin tuples |
318 | * are being processed in loops. |
319 | */ |
320 | BrinTuple * |
321 | brin_copy_tuple(BrinTuple *tuple, Size len, BrinTuple *dest, Size *destsz) |
322 | { |
323 | if (!destsz || *destsz == 0) |
324 | dest = palloc(len); |
325 | else if (len > *destsz) |
326 | { |
327 | dest = repalloc(dest, len); |
328 | *destsz = len; |
329 | } |
330 | |
331 | memcpy(dest, tuple, len); |
332 | |
333 | return dest; |
334 | } |
335 | |
336 | /* |
337 | * Return whether two BrinTuples are bitwise identical. |
338 | */ |
339 | bool |
340 | brin_tuples_equal(const BrinTuple *a, Size alen, const BrinTuple *b, Size blen) |
341 | { |
342 | if (alen != blen) |
343 | return false; |
344 | if (memcmp(a, b, alen) != 0) |
345 | return false; |
346 | return true; |
347 | } |
348 | |
349 | /* |
350 | * Create a new BrinMemTuple from scratch, and initialize it to an empty |
351 | * state. |
352 | * |
353 | * Note: we don't provide any means to free a deformed tuple, so make sure to |
354 | * use a temporary memory context. |
355 | */ |
356 | BrinMemTuple * |
357 | brin_new_memtuple(BrinDesc *brdesc) |
358 | { |
359 | BrinMemTuple *dtup; |
360 | long basesize; |
361 | |
362 | basesize = MAXALIGN(sizeof(BrinMemTuple) + |
363 | sizeof(BrinValues) * brdesc->bd_tupdesc->natts); |
364 | dtup = palloc0(basesize + sizeof(Datum) * brdesc->bd_totalstored); |
365 | |
366 | dtup->bt_values = palloc(sizeof(Datum) * brdesc->bd_totalstored); |
367 | dtup->bt_allnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts); |
368 | dtup->bt_hasnulls = palloc(sizeof(bool) * brdesc->bd_tupdesc->natts); |
369 | |
370 | dtup->bt_context = AllocSetContextCreate(CurrentMemoryContext, |
371 | "brin dtuple" , |
372 | ALLOCSET_DEFAULT_SIZES); |
373 | |
374 | brin_memtuple_initialize(dtup, brdesc); |
375 | |
376 | return dtup; |
377 | } |
378 | |
379 | /* |
380 | * Reset a BrinMemTuple to initial state. We return the same tuple, for |
381 | * notational convenience. |
382 | */ |
383 | BrinMemTuple * |
384 | brin_memtuple_initialize(BrinMemTuple *dtuple, BrinDesc *brdesc) |
385 | { |
386 | int i; |
387 | char *currdatum; |
388 | |
389 | MemoryContextReset(dtuple->bt_context); |
390 | |
391 | currdatum = (char *) dtuple + |
392 | MAXALIGN(sizeof(BrinMemTuple) + |
393 | sizeof(BrinValues) * brdesc->bd_tupdesc->natts); |
394 | for (i = 0; i < brdesc->bd_tupdesc->natts; i++) |
395 | { |
396 | dtuple->bt_columns[i].bv_allnulls = true; |
397 | dtuple->bt_columns[i].bv_hasnulls = false; |
398 | |
399 | dtuple->bt_columns[i].bv_attno = i + 1; |
400 | dtuple->bt_columns[i].bv_allnulls = true; |
401 | dtuple->bt_columns[i].bv_hasnulls = false; |
402 | dtuple->bt_columns[i].bv_values = (Datum *) currdatum; |
403 | currdatum += sizeof(Datum) * brdesc->bd_info[i]->oi_nstored; |
404 | } |
405 | |
406 | return dtuple; |
407 | } |
408 | |
409 | /* |
410 | * Convert a BrinTuple back to a BrinMemTuple. This is the reverse of |
411 | * brin_form_tuple. |
412 | * |
413 | * As an optimization, the caller can pass a previously allocated 'dMemtuple'. |
414 | * This avoids having to allocate it here, which can be useful when this |
415 | * function is called many times in a loop. It is caller's responsibility |
416 | * that the given BrinMemTuple matches what we need here. |
417 | * |
418 | * Note we don't need the "on disk tupdesc" here; we rely on our own routine to |
419 | * deconstruct the tuple from the on-disk format. |
420 | */ |
421 | BrinMemTuple * |
422 | brin_deform_tuple(BrinDesc *brdesc, BrinTuple *tuple, BrinMemTuple *dMemtuple) |
423 | { |
424 | BrinMemTuple *dtup; |
425 | Datum *values; |
426 | bool *allnulls; |
427 | bool *hasnulls; |
428 | char *tp; |
429 | bits8 *nullbits; |
430 | int keyno; |
431 | int valueno; |
432 | MemoryContext oldcxt; |
433 | |
434 | dtup = dMemtuple ? brin_memtuple_initialize(dMemtuple, brdesc) : |
435 | brin_new_memtuple(brdesc); |
436 | |
437 | if (BrinTupleIsPlaceholder(tuple)) |
438 | dtup->bt_placeholder = true; |
439 | dtup->bt_blkno = tuple->bt_blkno; |
440 | |
441 | values = dtup->bt_values; |
442 | allnulls = dtup->bt_allnulls; |
443 | hasnulls = dtup->bt_hasnulls; |
444 | |
445 | tp = (char *) tuple + BrinTupleDataOffset(tuple); |
446 | |
447 | if (BrinTupleHasNulls(tuple)) |
448 | nullbits = (bits8 *) ((char *) tuple + SizeOfBrinTuple); |
449 | else |
450 | nullbits = NULL; |
451 | brin_deconstruct_tuple(brdesc, |
452 | tp, nullbits, BrinTupleHasNulls(tuple), |
453 | values, allnulls, hasnulls); |
454 | |
455 | /* |
456 | * Iterate to assign each of the values to the corresponding item in the |
457 | * values array of each column. The copies occur in the tuple's context. |
458 | */ |
459 | oldcxt = MemoryContextSwitchTo(dtup->bt_context); |
460 | for (valueno = 0, keyno = 0; keyno < brdesc->bd_tupdesc->natts; keyno++) |
461 | { |
462 | int i; |
463 | |
464 | if (allnulls[keyno]) |
465 | { |
466 | valueno += brdesc->bd_info[keyno]->oi_nstored; |
467 | continue; |
468 | } |
469 | |
470 | /* |
471 | * We would like to skip datumCopy'ing the values datum in some cases, |
472 | * caller permitting ... |
473 | */ |
474 | for (i = 0; i < brdesc->bd_info[keyno]->oi_nstored; i++) |
475 | dtup->bt_columns[keyno].bv_values[i] = |
476 | datumCopy(values[valueno++], |
477 | brdesc->bd_info[keyno]->oi_typcache[i]->typbyval, |
478 | brdesc->bd_info[keyno]->oi_typcache[i]->typlen); |
479 | |
480 | dtup->bt_columns[keyno].bv_hasnulls = hasnulls[keyno]; |
481 | dtup->bt_columns[keyno].bv_allnulls = false; |
482 | } |
483 | |
484 | MemoryContextSwitchTo(oldcxt); |
485 | |
486 | return dtup; |
487 | } |
488 | |
489 | /* |
490 | * brin_deconstruct_tuple |
491 | * Guts of attribute extraction from an on-disk BRIN tuple. |
492 | * |
493 | * Its arguments are: |
494 | * brdesc BRIN descriptor for the stored tuple |
495 | * tp pointer to the tuple data area |
496 | * nullbits pointer to the tuple nulls bitmask |
497 | * nulls "has nulls" bit in tuple infomask |
498 | * values output values, array of size brdesc->bd_totalstored |
499 | * allnulls output "allnulls", size brdesc->bd_tupdesc->natts |
500 | * hasnulls output "hasnulls", size brdesc->bd_tupdesc->natts |
501 | * |
502 | * Output arrays must have been allocated by caller. |
503 | */ |
504 | static inline void |
505 | brin_deconstruct_tuple(BrinDesc *brdesc, |
506 | char *tp, bits8 *nullbits, bool nulls, |
507 | Datum *values, bool *allnulls, bool *hasnulls) |
508 | { |
509 | int attnum; |
510 | int stored; |
511 | TupleDesc diskdsc; |
512 | long off; |
513 | |
514 | /* |
515 | * First iterate to natts to obtain both null flags for each attribute. |
516 | * Note that we reverse the sense of the att_isnull test, because we store |
517 | * 1 for a null value (rather than a 1 for a not null value as is the |
518 | * att_isnull convention used elsewhere.) See brin_form_tuple. |
519 | */ |
520 | for (attnum = 0; attnum < brdesc->bd_tupdesc->natts; attnum++) |
521 | { |
522 | /* |
523 | * the "all nulls" bit means that all values in the page range for |
524 | * this column are nulls. Therefore there are no values in the tuple |
525 | * data area. |
526 | */ |
527 | allnulls[attnum] = nulls && !att_isnull(attnum, nullbits); |
528 | |
529 | /* |
530 | * the "has nulls" bit means that some tuples have nulls, but others |
531 | * have not-null values. Therefore we know the tuple contains data |
532 | * for this column. |
533 | * |
534 | * The hasnulls bits follow the allnulls bits in the same bitmask. |
535 | */ |
536 | hasnulls[attnum] = |
537 | nulls && !att_isnull(brdesc->bd_tupdesc->natts + attnum, nullbits); |
538 | } |
539 | |
540 | /* |
541 | * Iterate to obtain each attribute's stored values. Note that since we |
542 | * may reuse attribute entries for more than one column, we cannot cache |
543 | * offsets here. |
544 | */ |
545 | diskdsc = brtuple_disk_tupdesc(brdesc); |
546 | stored = 0; |
547 | off = 0; |
548 | for (attnum = 0; attnum < brdesc->bd_tupdesc->natts; attnum++) |
549 | { |
550 | int datumno; |
551 | |
552 | if (allnulls[attnum]) |
553 | { |
554 | stored += brdesc->bd_info[attnum]->oi_nstored; |
555 | continue; |
556 | } |
557 | |
558 | for (datumno = 0; |
559 | datumno < brdesc->bd_info[attnum]->oi_nstored; |
560 | datumno++) |
561 | { |
562 | Form_pg_attribute thisatt = TupleDescAttr(diskdsc, stored); |
563 | |
564 | if (thisatt->attlen == -1) |
565 | { |
566 | off = att_align_pointer(off, thisatt->attalign, -1, |
567 | tp + off); |
568 | } |
569 | else |
570 | { |
571 | /* not varlena, so safe to use att_align_nominal */ |
572 | off = att_align_nominal(off, thisatt->attalign); |
573 | } |
574 | |
575 | values[stored++] = fetchatt(thisatt, tp + off); |
576 | |
577 | off = att_addlength_pointer(off, thisatt->attlen, tp + off); |
578 | } |
579 | } |
580 | } |
581 | |