1/*-------------------------------------------------------------------------
2 *
3 * mcv.c
4 * POSTGRES multivariate MCV lists
5 *
6 *
7 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/statistics/mcv.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include <math.h>
18
19#include "access/htup_details.h"
20#include "catalog/pg_collation.h"
21#include "catalog/pg_statistic_ext.h"
22#include "catalog/pg_statistic_ext_data.h"
23#include "fmgr.h"
24#include "funcapi.h"
25#include "nodes/nodeFuncs.h"
26#include "optimizer/clauses.h"
27#include "statistics/extended_stats_internal.h"
28#include "statistics/statistics.h"
29#include "utils/builtins.h"
30#include "utils/bytea.h"
31#include "utils/fmgroids.h"
32#include "utils/fmgrprotos.h"
33#include "utils/lsyscache.h"
34#include "utils/syscache.h"
35#include "utils/typcache.h"
36
37/*
38 * Computes size of a serialized MCV item, depending on the number of
39 * dimensions (columns) the statistic is defined on. The datum values are
40 * stored in a separate array (deduplicated, to minimize the size), and
41 * so the serialized items only store uint16 indexes into that array.
42 *
43 * Each serialized item stores (in this order):
44 *
45 * - indexes to values (ndim * sizeof(uint16))
46 * - null flags (ndim * sizeof(bool))
47 * - frequency (sizeof(double))
48 * - base_frequency (sizeof(double))
49 *
50 * There is no alignment padding within an MCV item.
51 * So in total each MCV item requires this many bytes:
52 *
53 * ndim * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double)
54 */
55#define ITEM_SIZE(ndims) \
56 ((ndims) * (sizeof(uint16) + sizeof(bool)) + 2 * sizeof(double))
57
58/*
59 * Used to compute size of serialized MCV list representation.
60 */
61#define MinSizeOfMCVList \
62 (VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
63
64/*
65 * Size of the serialized MCV list, excluding the space needed for
66 * deduplicated per-dimension values. The macro is meant to be used
67 * when it's not yet safe to access the serialized info about amount
68 * of data for each column.
69 */
70#define SizeOfMCVList(ndims,nitems) \
71 ((MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
72 ((ndims) * sizeof(DimensionInfo)) + \
73 ((nitems) * ITEM_SIZE(ndims)))
74
75static MultiSortSupport build_mss(VacAttrStats **stats, int numattrs);
76
77static SortItem *build_distinct_groups(int numrows, SortItem *items,
78 MultiSortSupport mss, int *ndistinct);
79
80static SortItem **build_column_frequencies(SortItem *groups, int ngroups,
81 MultiSortSupport mss, int *ncounts);
82
83static int count_distinct_groups(int numrows, SortItem *items,
84 MultiSortSupport mss);
85
86/*
87 * Compute new value for bitmap item, considering whether it's used for
88 * clauses connected by AND/OR.
89 */
90#define RESULT_MERGE(value, is_or, match) \
91 ((is_or) ? ((value) || (match)) : ((value) && (match)))
92
93/*
94 * When processing a list of clauses, the bitmap item may get set to a value
95 * such that additional clauses can't change it. For example, when processing
96 * a list of clauses connected to AND, as soon as the item gets set to 'false'
97 * then it'll remain like that. Similarly clauses connected by OR and 'true'.
98 *
99 * Returns true when the value in the bitmap can't change no matter how the
100 * remaining clauses are evaluated.
101 */
102#define RESULT_IS_FINAL(value, is_or) ((is_or) ? (value) : (!(value)))
103
104/*
105 * get_mincount_for_mcv_list
106 * Determine the minimum number of times a value needs to appear in
107 * the sample for it to be included in the MCV list.
108 *
109 * We want to keep only values that appear sufficiently often in the
110 * sample that it is reasonable to extrapolate their sample frequencies to
111 * the entire table. We do this by placing an upper bound on the relative
112 * standard error of the sample frequency, so that any estimates the
113 * planner generates from the MCV statistics can be expected to be
114 * reasonably accurate.
115 *
116 * Since we are sampling without replacement, the sample frequency of a
117 * particular value is described by a hypergeometric distribution. A
118 * common rule of thumb when estimating errors in this situation is to
119 * require at least 10 instances of the value in the sample, in which case
120 * the distribution can be approximated by a normal distribution, and
121 * standard error analysis techniques can be applied. Given a sample size
122 * of n, a population size of N, and a sample frequency of p=cnt/n, the
123 * standard error of the proportion p is given by
124 * SE = sqrt(p*(1-p)/n) * sqrt((N-n)/(N-1))
125 * where the second term is the finite population correction. To get
126 * reasonably accurate planner estimates, we impose an upper bound on the
127 * relative standard error of 20% -- i.e., SE/p < 0.2. This 20% relative
128 * error bound is fairly arbitrary, but has been found empirically to work
129 * well. Rearranging this formula gives a lower bound on the number of
130 * instances of the value seen:
131 * cnt > n*(N-n) / (N-n+0.04*n*(N-1))
132 * This bound is at most 25, and approaches 0 as n approaches 0 or N. The
133 * case where n approaches 0 cannot happen in practice, since the sample
134 * size is at least 300. The case where n approaches N corresponds to
135 * sampling the whole the table, in which case it is reasonable to keep
136 * the whole MCV list (have no lower bound), so it makes sense to apply
137 * this formula for all inputs, even though the above derivation is
138 * technically only valid when the right hand side is at least around 10.
139 *
140 * An alternative way to look at this formula is as follows -- assume that
141 * the number of instances of the value seen scales up to the entire
142 * table, so that the population count is K=N*cnt/n. Then the distribution
143 * in the sample is a hypergeometric distribution parameterised by N, n
144 * and K, and the bound above is mathematically equivalent to demanding
145 * that the standard deviation of that distribution is less than 20% of
146 * its mean. Thus the relative errors in any planner estimates produced
147 * from the MCV statistics are likely to be not too large.
148 */
149static double
150get_mincount_for_mcv_list(int samplerows, double totalrows)
151{
152 double n = samplerows;
153 double N = totalrows;
154 double numer,
155 denom;
156
157 numer = n * (N - n);
158 denom = N - n + 0.04 * n * (N - 1);
159
160 /* Guard against division by zero (possible if n = N = 1) */
161 if (denom == 0.0)
162 return 0.0;
163
164 return numer / denom;
165}
166
167/*
168 * Builds MCV list from the set of sampled rows.
169 *
170 * The algorithm is quite simple:
171 *
172 * (1) sort the data (default collation, '<' for the data type)
173 *
174 * (2) count distinct groups, decide how many to keep
175 *
176 * (3) build the MCV list using the threshold determined in (2)
177 *
178 * (4) remove rows represented by the MCV from the sample
179 *
180 */
181MCVList *
182statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
183 VacAttrStats **stats, double totalrows)
184{
185 int i,
186 numattrs,
187 ngroups,
188 nitems;
189 AttrNumber *attnums;
190 double mincount;
191 SortItem *items;
192 SortItem *groups;
193 MCVList *mcvlist = NULL;
194 MultiSortSupport mss;
195
196 attnums = build_attnums_array(attrs, &numattrs);
197
198 /* comparator for all the columns */
199 mss = build_mss(stats, numattrs);
200
201 /* sort the rows */
202 items = build_sorted_items(numrows, &nitems, rows, stats[0]->tupDesc,
203 mss, numattrs, attnums);
204
205 if (!items)
206 return NULL;
207
208 /* transform the sorted rows into groups (sorted by frequency) */
209 groups = build_distinct_groups(nitems, items, mss, &ngroups);
210
211 /*
212 * Maximum number of MCV items to store, based on the attribute with the
213 * largest stats target (and the number of groups we have available).
214 */
215 nitems = stats[0]->attr->attstattarget;
216 for (i = 1; i < numattrs; i++)
217 {
218 if (stats[i]->attr->attstattarget > nitems)
219 nitems = stats[i]->attr->attstattarget;
220 }
221 if (nitems > ngroups)
222 nitems = ngroups;
223
224 /*
225 * Decide how many items to keep in the MCV list. We can't use the same
226 * algorithm as per-column MCV lists, because that only considers the
227 * actual group frequency - but we're primarily interested in how the
228 * actual frequency differs from the base frequency (product of simple
229 * per-column frequencies, as if the columns were independent).
230 *
231 * Using the same algorithm might exclude items that are close to the
232 * "average" frequency of the sample. But that does not say whether the
233 * observed frequency is close to the base frequency or not. We also need
234 * to consider unexpectedly uncommon items (again, compared to the base
235 * frequency), and the single-column algorithm does not have to.
236 *
237 * We simply decide how many items to keep by computing minimum count
238 * using get_mincount_for_mcv_list() and then keep all items that seem to
239 * be more common than that.
240 */
241 mincount = get_mincount_for_mcv_list(numrows, totalrows);
242
243 /*
244 * Walk the groups until we find the first group with a count below the
245 * mincount threshold (the index of that group is the number of groups we
246 * want to keep).
247 */
248 for (i = 0; i < nitems; i++)
249 {
250 if (groups[i].count < mincount)
251 {
252 nitems = i;
253 break;
254 }
255 }
256
257 /*
258 * At this point we know the number of items for the MCV list. There might
259 * be none (for uniform distribution with many groups), and in that case
260 * there will be no MCV list. Otherwise construct the MCV list.
261 */
262 if (nitems > 0)
263 {
264 int j;
265 SortItem key;
266 MultiSortSupport tmp;
267
268 /* frequencies for values in each attribute */
269 SortItem **freqs;
270 int *nfreqs;
271
272 /* used to search values */
273 tmp = (MultiSortSupport) palloc(offsetof(MultiSortSupportData, ssup)
274 + sizeof(SortSupportData));
275
276 /* compute frequencies for values in each column */
277 nfreqs = (int *) palloc0(sizeof(int) * numattrs);
278 freqs = build_column_frequencies(groups, ngroups, mss, nfreqs);
279
280 /*
281 * Allocate the MCV list structure, set the global parameters.
282 */
283 mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) +
284 sizeof(MCVItem) * nitems);
285
286 mcvlist->magic = STATS_MCV_MAGIC;
287 mcvlist->type = STATS_MCV_TYPE_BASIC;
288 mcvlist->ndimensions = numattrs;
289 mcvlist->nitems = nitems;
290
291 /* store info about data type OIDs */
292 for (i = 0; i < numattrs; i++)
293 mcvlist->types[i] = stats[i]->attrtypid;
294
295 /* Copy the first chunk of groups into the result. */
296 for (i = 0; i < nitems; i++)
297 {
298 /* just pointer to the proper place in the list */
299 MCVItem *item = &mcvlist->items[i];
300
301 item->values = (Datum *) palloc(sizeof(Datum) * numattrs);
302 item->isnull = (bool *) palloc(sizeof(bool) * numattrs);
303
304 /* copy values for the group */
305 memcpy(item->values, groups[i].values, sizeof(Datum) * numattrs);
306 memcpy(item->isnull, groups[i].isnull, sizeof(bool) * numattrs);
307
308 /* groups should be sorted by frequency in descending order */
309 Assert((i == 0) || (groups[i - 1].count >= groups[i].count));
310
311 /* group frequency */
312 item->frequency = (double) groups[i].count / numrows;
313
314 /* base frequency, if the attributes were independent */
315 item->base_frequency = 1.0;
316 for (j = 0; j < numattrs; j++)
317 {
318 SortItem *freq;
319
320 /* single dimension */
321 tmp->ndims = 1;
322 tmp->ssup[0] = mss->ssup[j];
323
324 /* fill search key */
325 key.values = &groups[i].values[j];
326 key.isnull = &groups[i].isnull[j];
327
328 freq = (SortItem *) bsearch_arg(&key, freqs[j], nfreqs[j],
329 sizeof(SortItem),
330 multi_sort_compare, tmp);
331
332 item->base_frequency *= ((double) freq->count) / numrows;
333 }
334 }
335
336 pfree(nfreqs);
337 pfree(freqs);
338 }
339
340 pfree(items);
341 pfree(groups);
342
343 return mcvlist;
344}
345
346/*
347 * build_mss
348 * build MultiSortSupport for the attributes passed in attrs
349 */
350static MultiSortSupport
351build_mss(VacAttrStats **stats, int numattrs)
352{
353 int i;
354
355 /* Sort by multiple columns (using array of SortSupport) */
356 MultiSortSupport mss = multi_sort_init(numattrs);
357
358 /* prepare the sort functions for all the attributes */
359 for (i = 0; i < numattrs; i++)
360 {
361 VacAttrStats *colstat = stats[i];
362 TypeCacheEntry *type;
363
364 type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
365 if (type->lt_opr == InvalidOid) /* shouldn't happen */
366 elog(ERROR, "cache lookup failed for ordering operator for type %u",
367 colstat->attrtypid);
368
369 multi_sort_add_dimension(mss, i, type->lt_opr, colstat->attrcollid);
370 }
371
372 return mss;
373}
374
375/*
376 * count_distinct_groups
377 * count distinct combinations of SortItems in the array
378 *
379 * The array is assumed to be sorted according to the MultiSortSupport.
380 */
381static int
382count_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss)
383{
384 int i;
385 int ndistinct;
386
387 ndistinct = 1;
388 for (i = 1; i < numrows; i++)
389 {
390 /* make sure the array really is sorted */
391 Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
392
393 if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
394 ndistinct += 1;
395 }
396
397 return ndistinct;
398}
399
400/*
401 * compare_sort_item_count
402 * comparator for sorting items by count (frequencies) in descending order
403 */
404static int
405compare_sort_item_count(const void *a, const void *b)
406{
407 SortItem *ia = (SortItem *) a;
408 SortItem *ib = (SortItem *) b;
409
410 if (ia->count == ib->count)
411 return 0;
412 else if (ia->count > ib->count)
413 return -1;
414
415 return 1;
416}
417
418/*
419 * build_distinct_groups
420 * build an array of SortItems for distinct groups and counts matching items
421 *
422 * The input array is assumed to be sorted
423 */
424static SortItem *
425build_distinct_groups(int numrows, SortItem *items, MultiSortSupport mss,
426 int *ndistinct)
427{
428 int i,
429 j;
430 int ngroups = count_distinct_groups(numrows, items, mss);
431
432 SortItem *groups = (SortItem *) palloc(ngroups * sizeof(SortItem));
433
434 j = 0;
435 groups[0] = items[0];
436 groups[0].count = 1;
437
438 for (i = 1; i < numrows; i++)
439 {
440 /* Assume sorted in ascending order. */
441 Assert(multi_sort_compare(&items[i], &items[i - 1], mss) >= 0);
442
443 /* New distinct group detected. */
444 if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
445 {
446 groups[++j] = items[i];
447 groups[j].count = 0;
448 }
449
450 groups[j].count++;
451 }
452
453 /* ensure we filled the expected number of distinct groups */
454 Assert(j + 1 == ngroups);
455
456 /* Sort the distinct groups by frequency (in descending order). */
457 pg_qsort((void *) groups, ngroups, sizeof(SortItem),
458 compare_sort_item_count);
459
460 *ndistinct = ngroups;
461 return groups;
462}
463
464/* compare sort items (single dimension) */
465static int
466sort_item_compare(const void *a, const void *b, void *arg)
467{
468 SortSupport ssup = (SortSupport) arg;
469 SortItem *ia = (SortItem *) a;
470 SortItem *ib = (SortItem *) b;
471
472 return ApplySortComparator(ia->values[0], ia->isnull[0],
473 ib->values[0], ib->isnull[0],
474 ssup);
475}
476
477/*
478 * build_column_frequencies
479 * compute frequencies of values in each column
480 *
481 * This returns an array of SortItems for each attibute the MCV is built
482 * on, with a frequency (number of occurrences) for each value. This is
483 * then used to compute "base" frequency of MCV items.
484 *
485 * All the memory is allocated in a single chunk, so that a single pfree
486 * is enough to release it. We do not allocate space for values/isnull
487 * arrays in the SortItems, because we can simply point into the input
488 * groups directly.
489 */
490static SortItem **
491build_column_frequencies(SortItem *groups, int ngroups,
492 MultiSortSupport mss, int *ncounts)
493{
494 int i,
495 dim;
496 SortItem **result;
497 char *ptr;
498
499 Assert(groups);
500 Assert(ncounts);
501
502 /* allocate arrays for all columns as a single chunk */
503 ptr = palloc(MAXALIGN(sizeof(SortItem *) * mss->ndims) +
504 mss->ndims * MAXALIGN(sizeof(SortItem) * ngroups));
505
506 /* initial array of pointers */
507 result = (SortItem **) ptr;
508 ptr += MAXALIGN(sizeof(SortItem *) * mss->ndims);
509
510 for (dim = 0; dim < mss->ndims; dim++)
511 {
512 SortSupport ssup = &mss->ssup[dim];
513
514 /* array of values for a single column */
515 result[dim] = (SortItem *) ptr;
516 ptr += MAXALIGN(sizeof(SortItem) * ngroups);
517
518 /* extract data for the dimension */
519 for (i = 0; i < ngroups; i++)
520 {
521 /* point into the input groups */
522 result[dim][i].values = &groups[i].values[dim];
523 result[dim][i].isnull = &groups[i].isnull[dim];
524 result[dim][i].count = groups[i].count;
525 }
526
527 /* sort the values, deduplicate */
528 qsort_arg((void *) result[dim], ngroups, sizeof(SortItem),
529 sort_item_compare, ssup);
530
531 /*
532 * Identify distinct values, compute frequency (there might be
533 * multiple MCV items containing this value, so we need to sum
534 * counts from all of them.
535 */
536 ncounts[dim] = 1;
537 for (i = 1; i < ngroups; i++)
538 {
539 if (sort_item_compare(&result[dim][i-1], &result[dim][i], ssup) == 0)
540 {
541 result[dim][ncounts[dim]-1].count += result[dim][i].count;
542 continue;
543 }
544
545 result[dim][ncounts[dim]] = result[dim][i];
546
547 ncounts[dim]++;
548 }
549 }
550
551 return result;
552}
553
554/*
555 * statext_mcv_load
556 * Load the MCV list for the indicated pg_statistic_ext tuple
557 */
558MCVList *
559statext_mcv_load(Oid mvoid)
560{
561 MCVList *result;
562 bool isnull;
563 Datum mcvlist;
564 HeapTuple htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(mvoid));
565
566 if (!HeapTupleIsValid(htup))
567 elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
568
569 mcvlist = SysCacheGetAttr(STATEXTDATASTXOID, htup,
570 Anum_pg_statistic_ext_data_stxdmcv, &isnull);
571
572 if (isnull)
573 elog(ERROR,
574 "requested statistic kind \"%c\" is not yet built for statistics object %u",
575 STATS_EXT_DEPENDENCIES, mvoid);
576
577 result = statext_mcv_deserialize(DatumGetByteaP(mcvlist));
578
579 ReleaseSysCache(htup);
580
581 return result;
582}
583
584
585/*
586 * statext_mcv_serialize
587 * Serialize MCV list into a pg_mcv_list value.
588 *
589 * The MCV items may include values of various data types, and it's reasonable
590 * to expect redundancy (values for a given attribute, repeated for multiple
591 * MCV list items). So we deduplicate the values into arrays, and then replace
592 * the values by indexes into those arrays.
593 *
594 * The overall structure of the serialized representation looks like this:
595 *
596 * +---------------+----------------+---------------------+-------+
597 * | header fields | dimension info | deduplicated values | items |
598 * +---------------+----------------+---------------------+-------+
599 *
600 * Where dimension info stores information about type of K-th attribute (e.g.
601 * typlen, typbyval and length of deduplicated values). Deduplicated values
602 * store deduplicated values for each attribute. And items store the actual
603 * MCV list items, with values replaced by indexes into the arrays.
604 *
605 * When serializing the items, we use uint16 indexes. The number of MCV items
606 * is limited by the statistics target (which is capped to 10k at the moment).
607 * We might increase this to 65k and still fit into uint16, so there's a bit of
608 * slack. Furthermore, this limit is on the number of distinct values per column,
609 * and we usually have few of those (and various combinations of them for the
610 * those MCV list). So uint16 seems fine for now.
611 *
612 * We don't really expect the serialization to save as much space as for
613 * histograms, as we are not doing any bucket splits (which is the source
614 * of high redundancy in histograms).
615 *
616 * TODO: Consider packing boolean flags (NULL) for each item into a single char
617 * (or a longer type) instead of using an array of bool items.
618 */
619bytea *
620statext_mcv_serialize(MCVList *mcvlist, VacAttrStats **stats)
621{
622 int i;
623 int dim;
624 int ndims = mcvlist->ndimensions;
625
626 SortSupport ssup;
627 DimensionInfo *info;
628
629 Size total_length;
630
631 /* serialized items (indexes into arrays, etc.) */
632 bytea *raw;
633 char *ptr;
634 char *endptr PG_USED_FOR_ASSERTS_ONLY;
635
636 /* values per dimension (and number of non-NULL values) */
637 Datum **values = (Datum **) palloc0(sizeof(Datum *) * ndims);
638 int *counts = (int *) palloc0(sizeof(int) * ndims);
639
640 /*
641 * We'll include some rudimentary information about the attribute types
642 * (length, by-val flag), so that we don't have to look them up while
643 * deserializating the MCV list (we already have the type OID in the
644 * header). This is safe, because when changing type of the attribute the
645 * statistics gets dropped automatically. We need to store the info about
646 * the arrays of deduplicated values anyway.
647 */
648 info = (DimensionInfo *) palloc0(sizeof(DimensionInfo) * ndims);
649
650 /* sort support data for all attributes included in the MCV list */
651 ssup = (SortSupport) palloc0(sizeof(SortSupportData) * ndims);
652
653 /* collect and deduplicate values for each dimension (attribute) */
654 for (dim = 0; dim < ndims; dim++)
655 {
656 int ndistinct;
657 TypeCacheEntry *typentry;
658
659 /*
660 * Lookup the LT operator (can't get it from stats extra_data, as we
661 * don't know how to interpret that - scalar vs. array etc.).
662 */
663 typentry = lookup_type_cache(stats[dim]->attrtypid, TYPECACHE_LT_OPR);
664
665 /* copy important info about the data type (length, by-value) */
666 info[dim].typlen = stats[dim]->attrtype->typlen;
667 info[dim].typbyval = stats[dim]->attrtype->typbyval;
668
669 /* allocate space for values in the attribute and collect them */
670 values[dim] = (Datum *) palloc0(sizeof(Datum) * mcvlist->nitems);
671
672 for (i = 0; i < mcvlist->nitems; i++)
673 {
674 /* skip NULL values - we don't need to deduplicate those */
675 if (mcvlist->items[i].isnull[dim])
676 continue;
677
678 /* append the value at the end */
679 values[dim][counts[dim]] = mcvlist->items[i].values[dim];
680 counts[dim] += 1;
681 }
682
683 /* if there are just NULL values in this dimension, we're done */
684 if (counts[dim] == 0)
685 continue;
686
687 /* sort and deduplicate the data */
688 ssup[dim].ssup_cxt = CurrentMemoryContext;
689 ssup[dim].ssup_collation = stats[dim]->attrcollid;
690 ssup[dim].ssup_nulls_first = false;
691
692 PrepareSortSupportFromOrderingOp(typentry->lt_opr, &ssup[dim]);
693
694 qsort_arg(values[dim], counts[dim], sizeof(Datum),
695 compare_scalars_simple, &ssup[dim]);
696
697 /*
698 * Walk through the array and eliminate duplicate values, but keep the
699 * ordering (so that we can do bsearch later). We know there's at
700 * least one item as (counts[dim] != 0), so we can skip the first
701 * element.
702 */
703 ndistinct = 1; /* number of distinct values */
704 for (i = 1; i < counts[dim]; i++)
705 {
706 /* expect sorted array */
707 Assert(compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]) <= 0);
708
709 /* if the value is the same as the previous one, we can skip it */
710 if (!compare_datums_simple(values[dim][i - 1], values[dim][i], &ssup[dim]))
711 continue;
712
713 values[dim][ndistinct] = values[dim][i];
714 ndistinct += 1;
715 }
716
717 /* we must not exceed PG_UINT16_MAX, as we use uint16 indexes */
718 Assert(ndistinct <= PG_UINT16_MAX);
719
720 /*
721 * Store additional info about the attribute - number of deduplicated
722 * values, and also size of the serialized data. For fixed-length data
723 * types this is trivial to compute, for varwidth types we need to
724 * actually walk the array and sum the sizes.
725 */
726 info[dim].nvalues = ndistinct;
727
728 if (info[dim].typbyval) /* by-value data types */
729 {
730 info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
731
732 /*
733 * We copy the data into the MCV item during deserialization, so
734 * we don't need to allocate any extra space.
735 */
736 info[dim].nbytes_aligned = 0;
737 }
738 else if (info[dim].typlen > 0) /* fixed-length by-ref */
739 {
740 /*
741 * We don't care about alignment in the serialized data, so we
742 * pack the data as much as possible. But we also track how much
743 * data will be needed after deserialization, and in that case
744 * we need to account for alignment of each item.
745 *
746 * Note: As the items are fixed-length, we could easily compute
747 * this during deserialization, but we do it here anyway.
748 */
749 info[dim].nbytes = info[dim].nvalues * info[dim].typlen;
750 info[dim].nbytes_aligned = info[dim].nvalues * MAXALIGN(info[dim].typlen);
751 }
752 else if (info[dim].typlen == -1) /* varlena */
753 {
754 info[dim].nbytes = 0;
755 info[dim].nbytes_aligned = 0;
756 for (i = 0; i < info[dim].nvalues; i++)
757 {
758 Size len;
759
760 /*
761 * For varlena values, we detoast the values and store the
762 * length and data separately. We don't bother with alignment
763 * here, which means that during deserialization we need to
764 * copy the fields and only access the copies.
765 */
766 values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
767
768 /* serialized length (uint32 length + data) */
769 len = VARSIZE_ANY_EXHDR(values[dim][i]);
770 info[dim].nbytes += sizeof(uint32); /* length */
771 info[dim].nbytes += len; /* value (no header) */
772
773 /*
774 * During deserialization we'll build regular varlena values
775 * with full headers, and we need to align them properly.
776 */
777 info[dim].nbytes_aligned += MAXALIGN(VARHDRSZ + len);
778 }
779 }
780 else if (info[dim].typlen == -2) /* cstring */
781 {
782 info[dim].nbytes = 0;
783 info[dim].nbytes_aligned = 0;
784 for (i = 0; i < info[dim].nvalues; i++)
785 {
786 Size len;
787
788 /*
789 * For cstring, we do similar thing as for varlena - first we
790 * store the length as uint32 and then the data. We don't care
791 * about alignment, which means that during deserialization we
792 * need to copy the fields and only access the copies.
793 */
794
795 /* c-strings include terminator, so +1 byte */
796 len = strlen(DatumGetCString(values[dim][i])) + 1;
797 info[dim].nbytes += sizeof(uint32); /* length */
798 info[dim].nbytes += len; /* value */
799
800 /* space needed for properly aligned deserialized copies */
801 info[dim].nbytes_aligned += MAXALIGN(len);
802 }
803 }
804
805 /* we know (count>0) so there must be some data */
806 Assert(info[dim].nbytes > 0);
807 }
808
809 /*
810 * Now we can finally compute how much space we'll actually need for the
811 * whole serialized MCV list (varlena header, MCV header, dimension info
812 * for each attribute, deduplicated values and items).
813 */
814 total_length = (3 * sizeof(uint32)) /* magic + type + nitems */
815 + sizeof(AttrNumber) /* ndimensions */
816 + (ndims * sizeof(Oid)); /* attribute types */
817
818 /* dimension info */
819 total_length += ndims * sizeof(DimensionInfo);
820
821 /* add space for the arrays of deduplicated values */
822 for (i = 0; i < ndims; i++)
823 total_length += info[i].nbytes;
824
825 /*
826 * And finally account for the items (those are fixed-length, thanks to
827 * replacing values with uint16 indexes into the deduplicated arrays).
828 */
829 total_length += mcvlist->nitems * ITEM_SIZE(dim);
830
831 /*
832 * Allocate space for the whole serialized MCV list (we'll skip bytes, so
833 * we set them to zero to make the result more compressible).
834 */
835 raw = (bytea *) palloc0(VARHDRSZ + total_length);
836 SET_VARSIZE(raw, VARHDRSZ + total_length);
837
838 ptr = VARDATA(raw);
839 endptr = ptr + total_length;
840
841 /* copy the MCV list header fields, one by one */
842 memcpy(ptr, &mcvlist->magic, sizeof(uint32));
843 ptr += sizeof(uint32);
844
845 memcpy(ptr, &mcvlist->type, sizeof(uint32));
846 ptr += sizeof(uint32);
847
848 memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
849 ptr += sizeof(uint32);
850
851 memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
852 ptr += sizeof(AttrNumber);
853
854 memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
855 ptr += (sizeof(Oid) * ndims);
856
857 /* store information about the attributes (data amounts, ...) */
858 memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
859 ptr += sizeof(DimensionInfo) * ndims;
860
861 /* Copy the deduplicated values for all attributes to the output. */
862 for (dim = 0; dim < ndims; dim++)
863 {
864 /* remember the starting point for Asserts later */
865 char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
866
867 for (i = 0; i < info[dim].nvalues; i++)
868 {
869 Datum value = values[dim][i];
870
871 if (info[dim].typbyval) /* passed by value */
872 {
873 Datum tmp;
874
875 /*
876 * For values passed by value, we need to copy just the
877 * significant bytes - we can't use memcpy directly, as that
878 * assumes little endian behavior. store_att_byval does
879 * almost what we need, but it requires properly aligned
880 * buffer - the output buffer does not guarantee that. So we
881 * simply use a local Datum variable (which guarantees proper
882 * alignment), and then copy the value from it.
883 */
884 store_att_byval(&tmp, value, info[dim].typlen);
885
886 memcpy(ptr, &tmp, info[dim].typlen);
887 ptr += info[dim].typlen;
888 }
889 else if (info[dim].typlen > 0) /* passed by reference */
890 {
891 /* no special alignment needed, treated as char array */
892 memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
893 ptr += info[dim].typlen;
894 }
895 else if (info[dim].typlen == -1) /* varlena */
896 {
897 uint32 len = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
898
899 /* copy the length */
900 memcpy(ptr, &len, sizeof(uint32));
901 ptr += sizeof(uint32);
902
903 /* data from the varlena value (without the header) */
904 memcpy(ptr, VARDATA_ANY(DatumGetPointer(value)), len);
905 ptr += len;
906 }
907 else if (info[dim].typlen == -2) /* cstring */
908 {
909 uint32 len = (uint32) strlen(DatumGetCString(value)) + 1;
910
911 /* copy the length */
912 memcpy(ptr, &len, sizeof(uint32));
913 ptr += sizeof(uint32);
914
915 /* value */
916 memcpy(ptr, DatumGetCString(value), len);
917 ptr += len;
918 }
919
920 /* no underflows or overflows */
921 Assert((ptr > start) && ((ptr - start) <= info[dim].nbytes));
922 }
923
924 /* we should get exactly nbytes of data for this dimension */
925 Assert((ptr - start) == info[dim].nbytes);
926 }
927
928 /* Serialize the items, with uint16 indexes instead of the values. */
929 for (i = 0; i < mcvlist->nitems; i++)
930 {
931 MCVItem *mcvitem = &mcvlist->items[i];
932
933 /* don't write beyond the allocated space */
934 Assert(ptr <= (endptr - ITEM_SIZE(dim)));
935
936 /* copy NULL and frequency flags into the serialized MCV */
937 memcpy(ptr, mcvitem->isnull, sizeof(bool) * ndims);
938 ptr += sizeof(bool) * ndims;
939
940 memcpy(ptr, &mcvitem->frequency, sizeof(double));
941 ptr += sizeof(double);
942
943 memcpy(ptr, &mcvitem->base_frequency, sizeof(double));
944 ptr += sizeof(double);
945
946 /* store the indexes last */
947 for (dim = 0; dim < ndims; dim++)
948 {
949 uint16 index = 0;
950 Datum *value;
951
952 /* do the lookup only for non-NULL values */
953 if (!mcvitem->isnull[dim])
954 {
955 value = (Datum *) bsearch_arg(&mcvitem->values[dim], values[dim],
956 info[dim].nvalues, sizeof(Datum),
957 compare_scalars_simple, &ssup[dim]);
958
959 Assert(value != NULL); /* serialization or deduplication error */
960
961 /* compute index within the deduplicated array */
962 index = (uint16) (value - values[dim]);
963
964 /* check the index is within expected bounds */
965 Assert(index < info[dim].nvalues);
966 }
967
968 /* copy the index into the serialized MCV */
969 memcpy(ptr, &index, sizeof(uint16));
970 ptr += sizeof(uint16);
971 }
972
973 /* make sure we don't overflow the allocated value */
974 Assert(ptr <= endptr);
975 }
976
977 /* at this point we expect to match the total_length exactly */
978 Assert(ptr == endptr);
979
980 pfree(values);
981 pfree(counts);
982
983 return raw;
984}
985
986/*
987 * statext_mcv_deserialize
988 * Reads serialized MCV list into MCVList structure.
989 *
990 * All the memory needed by the MCV list is allocated as a single chunk, so
991 * it's possible to simply pfree() it at once.
992 */
993MCVList *
994statext_mcv_deserialize(bytea *data)
995{
996 int dim,
997 i;
998 Size expected_size;
999 MCVList *mcvlist;
1000 char *raw;
1001 char *ptr;
1002 char *endptr PG_USED_FOR_ASSERTS_ONLY;
1003
1004 int ndims,
1005 nitems;
1006 DimensionInfo *info = NULL;
1007
1008 /* local allocation buffer (used only for deserialization) */
1009 Datum **map = NULL;
1010
1011 /* MCV list */
1012 Size mcvlen;
1013
1014 /* buffer used for the result */
1015 Size datalen;
1016 char *dataptr;
1017 char *valuesptr;
1018 char *isnullptr;
1019
1020 if (data == NULL)
1021 return NULL;
1022
1023 /*
1024 * We can't possibly deserialize a MCV list if there's not even a complete
1025 * header. We need an explicit formula here, because we serialize the
1026 * header fields one by one, so we need to ignore struct alignment.
1027 */
1028 if (VARSIZE_ANY(data) < MinSizeOfMCVList)
1029 elog(ERROR, "invalid MCV size %zd (expected at least %zu)",
1030 VARSIZE_ANY(data), MinSizeOfMCVList);
1031
1032 /* read the MCV list header */
1033 mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
1034
1035 /* pointer to the data part (skip the varlena header) */
1036 raw = (char *) data;
1037 ptr = VARDATA_ANY(raw);
1038 endptr = (char *) raw + VARSIZE_ANY(data);
1039
1040 /* get the header and perform further sanity checks */
1041 memcpy(&mcvlist->magic, ptr, sizeof(uint32));
1042 ptr += sizeof(uint32);
1043
1044 memcpy(&mcvlist->type, ptr, sizeof(uint32));
1045 ptr += sizeof(uint32);
1046
1047 memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
1048 ptr += sizeof(uint32);
1049
1050 memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
1051 ptr += sizeof(AttrNumber);
1052
1053 if (mcvlist->magic != STATS_MCV_MAGIC)
1054 elog(ERROR, "invalid MCV magic %u (expected %u)",
1055 mcvlist->magic, STATS_MCV_MAGIC);
1056
1057 if (mcvlist->type != STATS_MCV_TYPE_BASIC)
1058 elog(ERROR, "invalid MCV type %u (expected %u)",
1059 mcvlist->type, STATS_MCV_TYPE_BASIC);
1060
1061 if (mcvlist->ndimensions == 0)
1062 elog(ERROR, "invalid zero-length dimension array in MCVList");
1063 else if ((mcvlist->ndimensions > STATS_MAX_DIMENSIONS) ||
1064 (mcvlist->ndimensions < 0))
1065 elog(ERROR, "invalid length (%d) dimension array in MCVList",
1066 mcvlist->ndimensions);
1067
1068 if (mcvlist->nitems == 0)
1069 elog(ERROR, "invalid zero-length item array in MCVList");
1070 else if (mcvlist->nitems > STATS_MCVLIST_MAX_ITEMS)
1071 elog(ERROR, "invalid length (%u) item array in MCVList",
1072 mcvlist->nitems);
1073
1074 nitems = mcvlist->nitems;
1075 ndims = mcvlist->ndimensions;
1076
1077 /*
1078 * Check amount of data including DimensionInfo for all dimensions and
1079 * also the serialized items (including uint16 indexes). Also, walk
1080 * through the dimension information and add it to the sum.
1081 */
1082 expected_size = SizeOfMCVList(ndims, nitems);
1083
1084 /*
1085 * Check that we have at least the dimension and info records, along with
1086 * the items. We don't know the size of the serialized values yet. We need
1087 * to do this check first, before accessing the dimension info.
1088 */
1089 if (VARSIZE_ANY(data) < expected_size)
1090 elog(ERROR, "invalid MCV size %zd (expected %zu)",
1091 VARSIZE_ANY(data), expected_size);
1092
1093 /* Now copy the array of type Oids. */
1094 memcpy(mcvlist->types, ptr, sizeof(Oid) * ndims);
1095 ptr += (sizeof(Oid) * ndims);
1096
1097 /* Now it's safe to access the dimension info. */
1098 info = palloc(ndims * sizeof(DimensionInfo));
1099
1100 memcpy(info, ptr, ndims * sizeof(DimensionInfo));
1101 ptr += (ndims * sizeof(DimensionInfo));
1102
1103 /* account for the value arrays */
1104 for (dim = 0; dim < ndims; dim++)
1105 {
1106 /*
1107 * XXX I wonder if we can/should rely on asserts here. Maybe those
1108 * checks should be done every time?
1109 */
1110 Assert(info[dim].nvalues >= 0);
1111 Assert(info[dim].nbytes >= 0);
1112
1113 expected_size += info[dim].nbytes;
1114 }
1115
1116 /*
1117 * Now we know the total expected MCV size, including all the pieces
1118 * (header, dimension info. items and deduplicated data). So do the final
1119 * check on size.
1120 */
1121 if (VARSIZE_ANY(data) != expected_size)
1122 elog(ERROR, "invalid MCV size %zd (expected %zu)",
1123 VARSIZE_ANY(data), expected_size);
1124
1125 /*
1126 * We need an array of Datum values for each dimension, so that we can
1127 * easily translate the uint16 indexes later. We also need a top-level
1128 * array of pointers to those per-dimension arrays.
1129 *
1130 * While allocating the arrays for dimensions, compute how much space we
1131 * need for a copy of the by-ref data, as we can't simply point to the
1132 * original values (it might go away).
1133 */
1134 datalen = 0; /* space for by-ref data */
1135 map = (Datum **) palloc(ndims * sizeof(Datum *));
1136
1137 for (dim = 0; dim < ndims; dim++)
1138 {
1139 map[dim] = (Datum *) palloc(sizeof(Datum) * info[dim].nvalues);
1140
1141 /* space needed for a copy of data for by-ref types */
1142 datalen += info[dim].nbytes_aligned;
1143 }
1144
1145 /*
1146 * Now resize the MCV list so that the allocation includes all the data.
1147 *
1148 * Allocate space for a copy of the data, as we can't simply reference the
1149 * serialized data - it's not aligned properly, and it may disappear while
1150 * we're still using the MCV list, e.g. due to catcache release.
1151 *
1152 * We do care about alignment here, because we will allocate all the pieces
1153 * at once, but then use pointers to different parts.
1154 */
1155 mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1156
1157 /* arrays of values and isnull flags for all MCV items */
1158 mcvlen += nitems * MAXALIGN(sizeof(Datum) * ndims);
1159 mcvlen += nitems * MAXALIGN(sizeof(bool) * ndims);
1160
1161 /* we don't quite need to align this, but it makes some asserts easier */
1162 mcvlen += MAXALIGN(datalen);
1163
1164 /* now resize the deserialized MCV list, and compute pointers to parts */
1165 mcvlist = repalloc(mcvlist, mcvlen);
1166
1167 /* pointer to the beginning of values/isnull arrays */
1168 valuesptr = (char *) mcvlist
1169 + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
1170
1171 isnullptr = valuesptr + (nitems * MAXALIGN(sizeof(Datum) * ndims));
1172
1173 dataptr = isnullptr + (nitems * MAXALIGN(sizeof(bool) * ndims));
1174
1175 /*
1176 * Build mapping (index => value) for translating the serialized data into
1177 * the in-memory representation.
1178 */
1179 for (dim = 0; dim < ndims; dim++)
1180 {
1181 /* remember start position in the input array */
1182 char *start PG_USED_FOR_ASSERTS_ONLY = ptr;
1183
1184 if (info[dim].typbyval)
1185 {
1186 /* for by-val types we simply copy data into the mapping */
1187 for (i = 0; i < info[dim].nvalues; i++)
1188 {
1189 Datum v = 0;
1190
1191 memcpy(&v, ptr, info[dim].typlen);
1192 ptr += info[dim].typlen;
1193
1194 map[dim][i] = fetch_att(&v, true, info[dim].typlen);
1195
1196 /* no under/overflow of input array */
1197 Assert(ptr <= (start + info[dim].nbytes));
1198 }
1199 }
1200 else
1201 {
1202 /* for by-ref types we need to also make a copy of the data */
1203
1204 /* passed by reference, but fixed length (name, tid, ...) */
1205 if (info[dim].typlen > 0)
1206 {
1207 for (i = 0; i < info[dim].nvalues; i++)
1208 {
1209 memcpy(dataptr, ptr, info[dim].typlen);
1210 ptr += info[dim].typlen;
1211
1212 /* just point into the array */
1213 map[dim][i] = PointerGetDatum(dataptr);
1214 dataptr += MAXALIGN(info[dim].typlen);
1215 }
1216 }
1217 else if (info[dim].typlen == -1)
1218 {
1219 /* varlena */
1220 for (i = 0; i < info[dim].nvalues; i++)
1221 {
1222 uint32 len;
1223
1224 /* read the uint32 length */
1225 memcpy(&len, ptr, sizeof(uint32));
1226 ptr += sizeof(uint32);
1227
1228 /* the length is data-only */
1229 SET_VARSIZE(dataptr, len + VARHDRSZ);
1230 memcpy(VARDATA(dataptr), ptr, len);
1231 ptr += len;
1232
1233 /* just point into the array */
1234 map[dim][i] = PointerGetDatum(dataptr);
1235
1236 /* skip to place of the next deserialized value */
1237 dataptr += MAXALIGN(len + VARHDRSZ);
1238 }
1239 }
1240 else if (info[dim].typlen == -2)
1241 {
1242 /* cstring */
1243 for (i = 0; i < info[dim].nvalues; i++)
1244 {
1245 uint32 len;
1246
1247 memcpy(&len, ptr, sizeof(uint32));
1248 ptr += sizeof(uint32);
1249
1250 memcpy(dataptr, ptr, len);
1251 ptr += len;
1252
1253 /* just point into the array */
1254 map[dim][i] = PointerGetDatum(dataptr);
1255 dataptr += MAXALIGN(len);
1256 }
1257 }
1258
1259 /* no under/overflow of input array */
1260 Assert(ptr <= (start + info[dim].nbytes));
1261
1262 /* no overflow of the output mcv value */
1263 Assert(dataptr <= ((char *) mcvlist + mcvlen));
1264 }
1265
1266 /* check we consumed input data for this dimension exactly */
1267 Assert(ptr == (start + info[dim].nbytes));
1268 }
1269
1270 /* we should have also filled the MCV list exactly */
1271 Assert(dataptr == ((char *) mcvlist + mcvlen));
1272
1273 /* deserialize the MCV items and translate the indexes to Datums */
1274 for (i = 0; i < nitems; i++)
1275 {
1276 MCVItem *item = &mcvlist->items[i];
1277
1278 item->values = (Datum *) valuesptr;
1279 valuesptr += MAXALIGN(sizeof(Datum) * ndims);
1280
1281 item->isnull = (bool *) isnullptr;
1282 isnullptr += MAXALIGN(sizeof(bool) * ndims);
1283
1284 memcpy(item->isnull, ptr, sizeof(bool) * ndims);
1285 ptr += sizeof(bool) * ndims;
1286
1287 memcpy(&item->frequency, ptr, sizeof(double));
1288 ptr += sizeof(double);
1289
1290 memcpy(&item->base_frequency, ptr, sizeof(double));
1291 ptr += sizeof(double);
1292
1293 /* finally translate the indexes (for non-NULL only) */
1294 for (dim = 0; dim < ndims; dim++)
1295 {
1296 uint16 index;
1297
1298 memcpy(&index, ptr, sizeof(uint16));
1299 ptr += sizeof(uint16);
1300
1301 if (item->isnull[dim])
1302 continue;
1303
1304 item->values[dim] = map[dim][index];
1305 }
1306
1307 /* check we're not overflowing the input */
1308 Assert(ptr <= endptr);
1309 }
1310
1311 /* check that we processed all the data */
1312 Assert(ptr == endptr);
1313
1314 /* release the buffers used for mapping */
1315 for (dim = 0; dim < ndims; dim++)
1316 pfree(map[dim]);
1317
1318 pfree(map);
1319
1320 return mcvlist;
1321}
1322
1323/*
1324 * SRF with details about buckets of a histogram:
1325 *
1326 * - item ID (0...nitems)
1327 * - values (string array)
1328 * - nulls only (boolean array)
1329 * - frequency (double precision)
1330 * - base_frequency (double precision)
1331 *
1332 * The input is the OID of the statistics, and there are no rows returned if
1333 * the statistics contains no histogram.
1334 */
1335Datum
1336pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)
1337{
1338 FuncCallContext *funcctx;
1339
1340 /* stuff done only on the first call of the function */
1341 if (SRF_IS_FIRSTCALL())
1342 {
1343 MemoryContext oldcontext;
1344 MCVList *mcvlist;
1345 TupleDesc tupdesc;
1346
1347 /* create a function context for cross-call persistence */
1348 funcctx = SRF_FIRSTCALL_INIT();
1349
1350 /* switch to memory context appropriate for multiple function calls */
1351 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
1352
1353 mcvlist = statext_mcv_deserialize(PG_GETARG_BYTEA_P(0));
1354
1355 funcctx->user_fctx = mcvlist;
1356
1357 /* total number of tuples to be returned */
1358 funcctx->max_calls = 0;
1359 if (funcctx->user_fctx != NULL)
1360 funcctx->max_calls = mcvlist->nitems;
1361
1362 /* Build a tuple descriptor for our result type */
1363 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1364 ereport(ERROR,
1365 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1366 errmsg("function returning record called in context "
1367 "that cannot accept type record")));
1368 tupdesc = BlessTupleDesc(tupdesc);
1369
1370 /*
1371 * generate attribute metadata needed later to produce tuples from raw
1372 * C strings
1373 */
1374 funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
1375
1376 MemoryContextSwitchTo(oldcontext);
1377 }
1378
1379 /* stuff done on every call of the function */
1380 funcctx = SRF_PERCALL_SETUP();
1381
1382 if (funcctx->call_cntr < funcctx->max_calls) /* do when there is more left to send */
1383 {
1384 Datum values[5];
1385 bool nulls[5];
1386 HeapTuple tuple;
1387 Datum result;
1388 ArrayBuildState *astate_values = NULL;
1389 ArrayBuildState *astate_nulls = NULL;
1390
1391 int i;
1392 MCVList *mcvlist;
1393 MCVItem *item;
1394
1395 mcvlist = (MCVList *) funcctx->user_fctx;
1396
1397 Assert(funcctx->call_cntr < mcvlist->nitems);
1398
1399 item = &mcvlist->items[funcctx->call_cntr];
1400
1401 for (i = 0; i < mcvlist->ndimensions; i++)
1402 {
1403
1404 astate_nulls = accumArrayResult(astate_nulls,
1405 BoolGetDatum(item->isnull[i]),
1406 false,
1407 BOOLOID,
1408 CurrentMemoryContext);
1409
1410 if (!item->isnull[i])
1411 {
1412 bool isvarlena;
1413 Oid outfunc;
1414 FmgrInfo fmgrinfo;
1415 Datum val;
1416 text *txt;
1417
1418 /* lookup output func for the type */
1419 getTypeOutputInfo(mcvlist->types[i], &outfunc, &isvarlena);
1420 fmgr_info(outfunc, &fmgrinfo);
1421
1422 val = FunctionCall1(&fmgrinfo, item->values[i]);
1423 txt = cstring_to_text(DatumGetPointer(val));
1424
1425 astate_values = accumArrayResult(astate_values,
1426 PointerGetDatum(txt),
1427 false,
1428 TEXTOID,
1429 CurrentMemoryContext);
1430 }
1431 else
1432 astate_values = accumArrayResult(astate_values,
1433 (Datum) 0,
1434 true,
1435 TEXTOID,
1436 CurrentMemoryContext);
1437 }
1438
1439 values[0] = Int32GetDatum(funcctx->call_cntr);
1440 values[1] = PointerGetDatum(makeArrayResult(astate_values, CurrentMemoryContext));
1441 values[2] = PointerGetDatum(makeArrayResult(astate_nulls, CurrentMemoryContext));
1442 values[3] = Float8GetDatum(item->frequency);
1443 values[4] = Float8GetDatum(item->base_frequency);
1444
1445 /* no NULLs in the tuple */
1446 memset(nulls, 0, sizeof(nulls));
1447
1448 /* build a tuple */
1449 tuple = heap_form_tuple(funcctx->attinmeta->tupdesc, values, nulls);
1450
1451 /* make the tuple into a datum */
1452 result = HeapTupleGetDatum(tuple);
1453
1454 SRF_RETURN_NEXT(funcctx, result);
1455 }
1456 else /* do when there is no more left */
1457 {
1458 SRF_RETURN_DONE(funcctx);
1459 }
1460}
1461
1462/*
1463 * pg_mcv_list_in - input routine for type pg_mcv_list.
1464 *
1465 * pg_mcv_list is real enough to be a table column, but it has no operations
1466 * of its own, and disallows input too
1467 */
1468Datum
1469pg_mcv_list_in(PG_FUNCTION_ARGS)
1470{
1471 /*
1472 * pg_mcv_list stores the data in binary form and parsing text input is
1473 * not needed, so disallow this.
1474 */
1475 ereport(ERROR,
1476 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1477 errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1478
1479 PG_RETURN_VOID(); /* keep compiler quiet */
1480}
1481
1482
1483/*
1484 * pg_mcv_list_out - output routine for type pg_mcv_list.
1485 *
1486 * MCV lists are serialized into a bytea value, so we simply call byteaout()
1487 * to serialize the value into text. But it'd be nice to serialize that into
1488 * a meaningful representation (e.g. for inspection by people).
1489 *
1490 * XXX This should probably return something meaningful, similar to what
1491 * pg_dependencies_out does. Not sure how to deal with the deduplicated
1492 * values, though - do we want to expand that or not?
1493 */
1494Datum
1495pg_mcv_list_out(PG_FUNCTION_ARGS)
1496{
1497 return byteaout(fcinfo);
1498}
1499
1500/*
1501 * pg_mcv_list_recv - binary input routine for type pg_mcv_list.
1502 */
1503Datum
1504pg_mcv_list_recv(PG_FUNCTION_ARGS)
1505{
1506 ereport(ERROR,
1507 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1508 errmsg("cannot accept a value of type %s", "pg_mcv_list")));
1509
1510 PG_RETURN_VOID(); /* keep compiler quiet */
1511}
1512
1513/*
1514 * pg_mcv_list_send - binary output routine for type pg_mcv_list.
1515 *
1516 * MCV lists are serialized in a bytea value (although the type is named
1517 * differently), so let's just send that.
1518 */
1519Datum
1520pg_mcv_list_send(PG_FUNCTION_ARGS)
1521{
1522 return byteasend(fcinfo);
1523}
1524
1525/*
1526 * mcv_get_match_bitmap
1527 * Evaluate clauses using the MCV list, and update the match bitmap.
1528 *
1529 * A match bitmap keeps match/mismatch status for each MCV item, and we
1530 * update it based on additional clauses. We also use it to skip items
1531 * that can't possibly match (e.g. item marked as "mismatch" can't change
1532 * to "match" when evaluating AND clause list).
1533 *
1534 * The function also returns a flag indicating whether there was an
1535 * equality condition for all attributes, the minimum frequency in the MCV
1536 * list, and a total MCV frequency (sum of frequencies for all items).
1537 *
1538 * XXX Currently the match bitmap uses a bool for each MCV item, which is
1539 * somewhat wasteful as we could do with just a single bit, thus reducing
1540 * the size to ~1/8. It would also allow us to combine bitmaps simply using
1541 * & and |, which should be faster than min/max. The bitmaps are fairly
1542 * small, though (thanks to the cap on the MCV list size).
1543 */
1544static bool *
1545mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
1546 Bitmapset *keys, MCVList *mcvlist, bool is_or)
1547{
1548 int i;
1549 ListCell *l;
1550 bool *matches;
1551
1552 /* The bitmap may be partially built. */
1553 Assert(clauses != NIL);
1554 Assert(list_length(clauses) >= 1);
1555 Assert(mcvlist != NULL);
1556 Assert(mcvlist->nitems > 0);
1557 Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS);
1558
1559 matches = palloc(sizeof(bool) * mcvlist->nitems);
1560 memset(matches, (is_or) ? false : true,
1561 sizeof(bool) * mcvlist->nitems);
1562
1563 /*
1564 * Loop through the list of clauses, and for each of them evaluate all the
1565 * MCV items not yet eliminated by the preceding clauses.
1566 */
1567 foreach(l, clauses)
1568 {
1569 Node *clause = (Node *) lfirst(l);
1570
1571 /* if it's a RestrictInfo, then extract the clause */
1572 if (IsA(clause, RestrictInfo))
1573 clause = (Node *) ((RestrictInfo *) clause)->clause;
1574
1575 /*
1576 * Handle the various types of clauses - OpClause, NullTest and
1577 * AND/OR/NOT
1578 */
1579 if (is_opclause(clause))
1580 {
1581 OpExpr *expr = (OpExpr *) clause;
1582 FmgrInfo opproc;
1583
1584 /* valid only after examine_opclause_expression returns true */
1585 Var *var;
1586 Const *cst;
1587 bool varonleft;
1588
1589 fmgr_info(get_opcode(expr->opno), &opproc);
1590
1591 /* extract the var and const from the expression */
1592 if (examine_opclause_expression(expr, &var, &cst, &varonleft))
1593 {
1594 int idx;
1595
1596 /* match the attribute to a dimension of the statistic */
1597 idx = bms_member_index(keys, var->varattno);
1598
1599 /*
1600 * Walk through the MCV items and evaluate the current clause.
1601 * We can skip items that were already ruled out, and
1602 * terminate if there are no remaining MCV items that might
1603 * possibly match.
1604 */
1605 for (i = 0; i < mcvlist->nitems; i++)
1606 {
1607 bool match = true;
1608 MCVItem *item = &mcvlist->items[i];
1609
1610 /*
1611 * When the MCV item or the Const value is NULL we can treat
1612 * this as a mismatch. We must not call the operator because
1613 * of strictness.
1614 */
1615 if (item->isnull[idx] || cst->constisnull)
1616 {
1617 matches[i] = RESULT_MERGE(matches[i], is_or, false);
1618 continue;
1619 }
1620
1621 /*
1622 * Skip MCV items that can't change result in the bitmap.
1623 * Once the value gets false for AND-lists, or true for
1624 * OR-lists, we don't need to look at more clauses.
1625 */
1626 if (RESULT_IS_FINAL(matches[i], is_or))
1627 continue;
1628
1629 /*
1630 * First check whether the constant is below the lower
1631 * boundary (in that case we can skip the bucket, because
1632 * there's no overlap).
1633 *
1634 * We don't store collations used to build the statistics,
1635 * but we can use the collation for the attribute itself,
1636 * as stored in varcollid. We do reset the statistics after
1637 * a type change (including collation change), so this is
1638 * OK. We may need to relax this after allowing extended
1639 * statistics on expressions.
1640 */
1641 if (varonleft)
1642 match = DatumGetBool(FunctionCall2Coll(&opproc,
1643 var->varcollid,
1644 item->values[idx],
1645 cst->constvalue));
1646 else
1647 match = DatumGetBool(FunctionCall2Coll(&opproc,
1648 var->varcollid,
1649 cst->constvalue,
1650 item->values[idx]));
1651
1652 /* update the match bitmap with the result */
1653 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1654 }
1655 }
1656 }
1657 else if (IsA(clause, NullTest))
1658 {
1659 NullTest *expr = (NullTest *) clause;
1660 Var *var = (Var *) (expr->arg);
1661
1662 /* match the attribute to a dimension of the statistic */
1663 int idx = bms_member_index(keys, var->varattno);
1664
1665 /*
1666 * Walk through the MCV items and evaluate the current clause. We
1667 * can skip items that were already ruled out, and terminate if
1668 * there are no remaining MCV items that might possibly match.
1669 */
1670 for (i = 0; i < mcvlist->nitems; i++)
1671 {
1672 bool match = false; /* assume mismatch */
1673 MCVItem *item = &mcvlist->items[i];
1674
1675 /* if the clause mismatches the MCV item, update the bitmap */
1676 switch (expr->nulltesttype)
1677 {
1678 case IS_NULL:
1679 match = (item->isnull[idx]) ? true : match;
1680 break;
1681
1682 case IS_NOT_NULL:
1683 match = (!item->isnull[idx]) ? true : match;
1684 break;
1685 }
1686
1687 /* now, update the match bitmap, depending on OR/AND type */
1688 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1689 }
1690 }
1691 else if (is_orclause(clause) || is_andclause(clause))
1692 {
1693 /* AND/OR clause, with all subclauses being compatible */
1694
1695 int i;
1696 BoolExpr *bool_clause = ((BoolExpr *) clause);
1697 List *bool_clauses = bool_clause->args;
1698
1699 /* match/mismatch bitmap for each MCV item */
1700 bool *bool_matches = NULL;
1701
1702 Assert(bool_clauses != NIL);
1703 Assert(list_length(bool_clauses) >= 2);
1704
1705 /* build the match bitmap for the OR-clauses */
1706 bool_matches = mcv_get_match_bitmap(root, bool_clauses, keys,
1707 mcvlist, is_orclause(clause));
1708
1709 /*
1710 * Merge the bitmap produced by mcv_get_match_bitmap into the
1711 * current one. We need to consider if we're evaluating AND or OR
1712 * condition when merging the results.
1713 */
1714 for (i = 0; i < mcvlist->nitems; i++)
1715 matches[i] = RESULT_MERGE(matches[i], is_or, bool_matches[i]);
1716
1717 pfree(bool_matches);
1718 }
1719 else if (is_notclause(clause))
1720 {
1721 /* NOT clause, with all subclauses compatible */
1722
1723 int i;
1724 BoolExpr *not_clause = ((BoolExpr *) clause);
1725 List *not_args = not_clause->args;
1726
1727 /* match/mismatch bitmap for each MCV item */
1728 bool *not_matches = NULL;
1729
1730 Assert(not_args != NIL);
1731 Assert(list_length(not_args) == 1);
1732
1733 /* build the match bitmap for the NOT-clause */
1734 not_matches = mcv_get_match_bitmap(root, not_args, keys,
1735 mcvlist, false);
1736
1737 /*
1738 * Merge the bitmap produced by mcv_get_match_bitmap into the
1739 * current one. We're handling a NOT clause, so invert the result
1740 * before merging it into the global bitmap.
1741 */
1742 for (i = 0; i < mcvlist->nitems; i++)
1743 matches[i] = RESULT_MERGE(matches[i], is_or, !not_matches[i]);
1744
1745 pfree(not_matches);
1746 }
1747 else if (IsA(clause, Var))
1748 {
1749 /* Var (has to be a boolean Var, possibly from below NOT) */
1750
1751 Var *var = (Var *) (clause);
1752
1753 /* match the attribute to a dimension of the statistic */
1754 int idx = bms_member_index(keys, var->varattno);
1755
1756 Assert(var->vartype == BOOLOID);
1757
1758 /*
1759 * Walk through the MCV items and evaluate the current clause. We
1760 * can skip items that were already ruled out, and terminate if
1761 * there are no remaining MCV items that might possibly match.
1762 */
1763 for (i = 0; i < mcvlist->nitems; i++)
1764 {
1765 MCVItem *item = &mcvlist->items[i];
1766 bool match = false;
1767
1768 /* if the item is NULL, it's a mismatch */
1769 if (!item->isnull[idx] && DatumGetBool(item->values[idx]))
1770 match = true;
1771
1772 /* update the result bitmap */
1773 matches[i] = RESULT_MERGE(matches[i], is_or, match);
1774 }
1775 }
1776 else
1777 elog(ERROR, "unknown clause type: %d", clause->type);
1778 }
1779
1780 return matches;
1781}
1782
1783
1784/*
1785 * mcv_clauselist_selectivity
1786 * Return the selectivity estimate computed using an MCV list.
1787 *
1788 * First builds a bitmap of MCV items matching the clauses, and then sums
1789 * the frequencies of matching items.
1790 *
1791 * It also produces two additional interesting selectivities - total
1792 * selectivity of all the MCV items (not just the matching ones), and the
1793 * base frequency computed on the assumption of independence.
1794 */
1795Selectivity
1796mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
1797 List *clauses, int varRelid,
1798 JoinType jointype, SpecialJoinInfo *sjinfo,
1799 RelOptInfo *rel,
1800 Selectivity *basesel, Selectivity *totalsel)
1801{
1802 int i;
1803 MCVList *mcv;
1804 Selectivity s = 0.0;
1805
1806 /* match/mismatch bitmap for each MCV item */
1807 bool *matches = NULL;
1808
1809 /* load the MCV list stored in the statistics object */
1810 mcv = statext_mcv_load(stat->statOid);
1811
1812 /* build a match bitmap for the clauses */
1813 matches = mcv_get_match_bitmap(root, clauses, stat->keys, mcv, false);
1814
1815 /* sum frequencies for all the matching MCV items */
1816 *basesel = 0.0;
1817 *totalsel = 0.0;
1818 for (i = 0; i < mcv->nitems; i++)
1819 {
1820 *totalsel += mcv->items[i].frequency;
1821
1822 if (matches[i] != false)
1823 {
1824 /* XXX Shouldn't the basesel be outside the if condition? */
1825 *basesel += mcv->items[i].base_frequency;
1826 s += mcv->items[i].frequency;
1827 }
1828 }
1829
1830 return s;
1831}
1832